From 93b7e55bcf1b820e8e4a4d97a73fc990c7075545 Mon Sep 17 00:00:00 2001
From: Ethiraric <ethiraric@gmail.com>
Date: Wed, 10 Jul 2024 22:29:59 +0200
Subject: [PATCH] Move scanning low-level functions to `Input`.

---
 parser/src/input.rs     | 188 +++++++++++++++++++++++++++++++++++++++-
 parser/src/input/str.rs | 130 +++++++++++++++++++++++++--
 parser/src/scanner.rs   | 126 ++++++++++++++-------------
 3 files changed, 375 insertions(+), 69 deletions(-)

diff --git a/parser/src/input.rs b/parser/src/input.rs
index 14b5e95..f8ff3b5 100644
--- a/parser/src/input.rs
+++ b/parser/src/input.rs
@@ -4,7 +4,9 @@ pub mod str;
 #[allow(clippy::module_name_repetitions)]
 pub use buffered::BufferedInput;
 
-use crate::char_traits::{is_blank_or_breakz, is_breakz, is_flow};
+use crate::char_traits::{
+    is_alpha, is_blank, is_blank_or_breakz, is_break, is_breakz, is_digit, is_flow, is_z,
+};
 
 /// Interface for a source of characters.
 ///
@@ -170,7 +172,7 @@ pub trait Input {
     ///
     /// # Return
     /// Return a tuple with the number of characters that were consumed and the result of skipping
-    /// whitespace. The number of characters returned can be used to advance the index and columns,
+    /// whitespace. The number of characters returned can be used to advance the index and column,
     /// since no end-of-line character will be consumed.
     /// See [`SkipTabs`] For more details on the success variant.
     ///
@@ -230,6 +232,188 @@ pub trait Input {
             _ => true,
         }
     }
+
+    /// Check whether the next character is [a blank] or [a break].
+    ///
+    /// The character must have previously been fetched through [`lookahead`]
+    ///
+    /// # Return
+    /// Returns true if the character is [a blank] or [a break], false otherwise.
+    ///
+    /// [`lookahead`]: Input::lookahead
+    /// [a blank]: is_blank
+    /// [a break]: is_break
+    #[inline]
+    fn next_is_blank_or_break(&self) -> bool {
+        is_blank(self.peek()) || is_break(self.peek())
+    }
+
+    /// Check whether the next character is [a blank] or [a breakz].
+    ///
+    /// The character must have previously been fetched through [`lookahead`]
+    ///
+    /// # Return
+    /// Returns true if the character is [a blank] or [a break], false otherwise.
+    ///
+    /// [`lookahead`]: Input::lookahead
+    /// [a blank]: is_blank
+    /// [a breakz]: is_breakz
+    #[inline]
+    fn next_is_blank_or_breakz(&self) -> bool {
+        is_blank(self.peek()) || is_breakz(self.peek())
+    }
+
+    /// Check whether the next character is [a blank].
+    ///
+    /// The character must have previously been fetched through [`lookahead`]
+    ///
+    /// # Return
+    /// Returns true if the character is [a blank], false otherwise.
+    ///
+    /// [`lookahead`]: Input::lookahead
+    /// [a blank]: is_blank
+    #[inline]
+    fn next_is_blank(&self) -> bool {
+        is_blank(self.peek())
+    }
+
+    /// Check whether the next character is [a break].
+    ///
+    /// The character must have previously been fetched through [`lookahead`]
+    ///
+    /// # Return
+    /// Returns true if the character is [a break], false otherwise.
+    ///
+    /// [`lookahead`]: Input::lookahead
+    /// [a break]: is_break
+    #[inline]
+    fn next_is_break(&self) -> bool {
+        is_break(self.peek())
+    }
+
+    /// Check whether the next character is [a breakz].
+    ///
+    /// The character must have previously been fetched through [`lookahead`]
+    ///
+    /// # Return
+    /// Returns true if the character is [a breakz], false otherwise.
+    ///
+    /// [`lookahead`]: Input::lookahead
+    /// [a breakz]: is_breakz
+    #[inline]
+    fn next_is_breakz(&self) -> bool {
+        is_breakz(self.peek())
+    }
+
+    /// Check whether the next character is [a z].
+    ///
+    /// The character must have previously been fetched through [`lookahead`]
+    ///
+    /// # Return
+    /// Returns true if the character is [a z], false otherwise.
+    ///
+    /// [`lookahead`]: Input::lookahead
+    /// [a z]: is_z
+    #[inline]
+    fn next_is_z(&self) -> bool {
+        is_z(self.peek())
+    }
+
+    /// Check whether the next character is [a flow].
+    ///
+    /// The character must have previously been fetched through [`lookahead`]
+    ///
+    /// # Return
+    /// Returns true if the character is [a flow], false otherwise.
+    ///
+    /// [`lookahead`]: Input::lookahead
+    /// [a flow]: is_flow
+    #[inline]
+    fn next_is_flow(&self) -> bool {
+        is_flow(self.peek())
+    }
+
+    /// Check whether the next character is [a digit].
+    ///
+    /// The character must have previously been fetched through [`lookahead`]
+    ///
+    /// # Return
+    /// Returns true if the character is [a digit], false otherwise.
+    ///
+    /// [`lookahead`]: Input::lookahead
+    /// [a digit]: is_digit
+    #[inline]
+    fn next_is_digit(&self) -> bool {
+        is_digit(self.peek())
+    }
+
+    /// Check whether the next character is [a letter].
+    ///
+    /// The character must have previously been fetched through [`lookahead`]
+    ///
+    /// # Return
+    /// Returns true if the character is [a letter], false otherwise.
+    ///
+    /// [`lookahead`]: Input::lookahead
+    /// [a letter]: is_alpha
+    #[inline]
+    fn next_is_alpha(&self) -> bool {
+        is_alpha(self.peek())
+    }
+
+    /// Skip characters from the input until a [breakz] is found.
+    ///
+    /// The characters are consumed from the input.
+    ///
+    /// # Return
+    /// Return the number of characters that were consumed. The number of characters returned can
+    /// be used to advance the index and column, since no end-of-line character will be consumed.
+    ///
+    /// [breakz]: is_breakz
+    #[inline]
+    fn skip_while_non_breakz(&mut self) -> usize {
+        let mut count = 0;
+        while !is_breakz(self.look_ch()) {
+            count += 1;
+            self.skip();
+        }
+        count
+    }
+
+    /// Skip characters from the input while [blanks] are found.
+    ///
+    /// The characters are consumed from the input.
+    ///
+    /// # Return
+    /// Return the number of characters that were consumed. The number of characters returned can
+    /// be used to advance the index and column, since no end-of-line character will be consumed.
+    ///
+    /// [blanks]: is_blank
+    fn skip_while_blank(&mut self) -> usize {
+        let mut n_chars = 0;
+        while is_blank(self.look_ch()) {
+            n_chars += 1;
+            self.skip();
+        }
+        n_chars
+    }
+
+    /// Fetch characters from the input while we encounter letters and store them in `out`.
+    ///
+    /// The characters are consumed from the input.
+    ///
+    /// # Return
+    /// Return the number of characters that were consumed. The number of characters returned can
+    /// be used to advance the index and column, since no end-of-line character will be consumed.
+    fn fetch_while_is_alpha(&mut self, out: &mut String) -> usize {
+        let mut n_chars = 0;
+        while is_alpha(self.look_ch()) {
+            n_chars += 1;
+            out.push(self.peek());
+            self.skip();
+        }
+        n_chars
+    }
 }
 
 /// Behavior to adopt regarding treating tabs as whitespace.
diff --git a/parser/src/input/str.rs b/parser/src/input/str.rs
index e50adac..32bd136 100644
--- a/parser/src/input/str.rs
+++ b/parser/src/input/str.rs
@@ -1,5 +1,7 @@
 use crate::{
-    char_traits::{is_blank_or_breakz, is_breakz, is_flow},
+    char_traits::{
+        is_alpha, is_blank, is_blank_or_breakz, is_break, is_breakz, is_digit, is_flow, is_z,
+    },
     input::{Input, SkipTabs},
 };
 
@@ -60,7 +62,9 @@ impl<'a> Input for StrInput<'a> {
 
     #[inline]
     fn push_back(&mut self, c: char) {
-        self.buffer = put_back_in_str(self.buffer, c);
+        // SAFETY: The preconditions of this function is that the character we are given is the one
+        // immediately preceding `self.buffer`.
+        self.buffer = unsafe { put_back_in_str(self.buffer, c) };
     }
 
     #[inline]
@@ -270,6 +274,122 @@ impl<'a> Input for StrInput<'a> {
             }
         }
     }
+
+    #[inline]
+    fn next_is_blank_or_break(&self) -> bool {
+        !self.buffer.is_empty()
+            && (is_blank(self.buffer.as_bytes()[0] as char)
+                || is_break(self.buffer.as_bytes()[0] as char))
+    }
+
+    #[inline]
+    fn next_is_blank_or_breakz(&self) -> bool {
+        self.buffer.is_empty()
+            || (is_blank(self.buffer.as_bytes()[0] as char)
+                || is_breakz(self.buffer.as_bytes()[0] as char))
+    }
+
+    #[inline]
+    fn next_is_blank(&self) -> bool {
+        !self.buffer.is_empty() && is_blank(self.buffer.as_bytes()[0] as char)
+    }
+
+    #[inline]
+    fn next_is_break(&self) -> bool {
+        !self.buffer.is_empty() && is_break(self.buffer.as_bytes()[0] as char)
+    }
+
+    #[inline]
+    fn next_is_breakz(&self) -> bool {
+        self.buffer.is_empty() || is_breakz(self.buffer.as_bytes()[0] as char)
+    }
+
+    #[inline]
+    fn next_is_z(&self) -> bool {
+        self.buffer.is_empty() || is_z(self.buffer.as_bytes()[0] as char)
+    }
+
+    #[inline]
+    fn next_is_flow(&self) -> bool {
+        !self.buffer.is_empty() && is_flow(self.buffer.as_bytes()[0] as char)
+    }
+
+    #[inline]
+    fn next_is_digit(&self) -> bool {
+        !self.buffer.is_empty() && is_digit(self.buffer.as_bytes()[0] as char)
+    }
+
+    #[inline]
+    fn next_is_alpha(&self) -> bool {
+        !self.buffer.is_empty() && is_alpha(self.buffer.as_bytes()[0] as char)
+    }
+
+    fn skip_while_non_breakz(&mut self) -> usize {
+        let mut found_breakz = false;
+        let mut count = 0;
+
+        // Skip over all non-breaks.
+        let mut chars = self.buffer.chars();
+        for c in chars.by_ref() {
+            if is_breakz(c) {
+                found_breakz = true;
+                break;
+            }
+            count += 1;
+        }
+
+        self.buffer = if found_breakz {
+            // If we read a breakz, we need to put it back to the buffer.
+            // SAFETY: The last character we extracted is either a '\n', '\r' or '\0', all of which
+            // are 1-byte long.
+            unsafe { extend_left(chars.as_str(), 1) }
+        } else {
+            chars.as_str()
+        };
+
+        count
+    }
+
+    fn skip_while_blank(&mut self) -> usize {
+        // Since all characters we look for are ascii, we can directly use the byte API of str.
+        let mut i = 0;
+        while i < self.buffer.len() {
+            if !is_blank(self.buffer.as_bytes()[i] as char) {
+                break;
+            }
+            i += 1;
+        }
+        self.buffer = &self.buffer[i..];
+        i
+    }
+
+    fn fetch_while_is_alpha(&mut self, out: &mut String) -> usize {
+        let mut not_alpha = None;
+
+        // Skip while we have alpha characters.
+        let mut chars = self.buffer.chars();
+        for c in chars.by_ref() {
+            if !is_alpha(c) {
+                not_alpha = Some(c);
+                break;
+            }
+        }
+
+        let remaining_string = if let Some(c) = not_alpha {
+            let n_bytes_read = chars.as_str().as_ptr() as usize - self.buffer.as_ptr() as usize;
+            let last_char_bytes = c.len_utf8();
+            &self.buffer[n_bytes_read - last_char_bytes..]
+        } else {
+            chars.as_str()
+        };
+
+        let n_bytes_to_append = remaining_string.as_ptr() as usize - self.buffer.as_ptr() as usize;
+        out.reserve(n_bytes_to_append);
+        out.push_str(&self.buffer[..n_bytes_to_append]);
+        self.buffer = remaining_string;
+
+        n_bytes_to_append
+    }
 }
 
 /// The buffer size we return to the scanner.
@@ -309,13 +429,13 @@ const BUFFER_LEN: usize = 128;
 /// assert_eq!(s1, s3);
 /// assert_eq!(s1.as_ptr(), s3.as_ptr());
 /// ```
-fn put_back_in_str(s: &str, c: char) -> &str {
+unsafe fn put_back_in_str(s: &str, c: char) -> &str {
     let n_bytes = c.len_utf8();
 
     // SAFETY: The character that gets pushed back is guaranteed to be the one that is
     // immediately preceding our buffer. We can compute the length of the character and move
     // our buffer back that many bytes.
-    unsafe { extend_left(s, n_bytes) }
+    extend_left(s, n_bytes)
 }
 
 /// Extend the string by moving the start pointer to the left by `n` bytes.
@@ -369,7 +489,7 @@ mod test {
     pub fn put_back_in_str_example() {
         let s1 = "foo";
         let s2 = &s1[1..];
-        let s3 = put_back_in_str(s2, 'f'); // OK, 'f' is the character immediately preceding
+        let s3 = unsafe { put_back_in_str(s2, 'f') }; // OK, 'f' is the character immediately preceding
         assert_eq!(s1, s3);
         assert_eq!(s1.as_ptr(), s3.as_ptr());
     }
diff --git a/parser/src/scanner.rs b/parser/src/scanner.rs
index e2c92ec..b1cb1f5 100644
--- a/parser/src/scanner.rs
+++ b/parser/src/scanner.rs
@@ -13,8 +13,8 @@ use std::{char, collections::VecDeque, error::Error, fmt};
 
 use crate::{
     char_traits::{
-        as_hex, is_alpha, is_anchor_char, is_blank, is_blank_or_breakz, is_break, is_breakz,
-        is_digit, is_flow, is_hex, is_tag_char, is_uri_char, is_z,
+        as_hex, is_anchor_char, is_blank_or_breakz, is_break, is_breakz, is_flow, is_hex,
+        is_tag_char, is_uri_char,
     },
     input::{Input, SkipTabs},
 };
@@ -533,7 +533,7 @@ impl<T: Input> Scanner<T> {
             // will be reset by `skip_nl`.
             self.skip_blank();
             self.skip_nl();
-        } else if is_break(self.input.peek()) {
+        } else if self.input.next_is_break() {
             self.skip_nl();
         }
     }
@@ -616,7 +616,7 @@ impl<T: Input> Scanner<T> {
 
         self.input.lookahead(4);
 
-        if is_z(self.input.peek()) {
+        if self.input.next_is_z() {
             self.fetch_stream_end()?;
             return Ok(());
         }
@@ -629,7 +629,7 @@ impl<T: Input> Scanner<T> {
             } else if self.input.next_is_document_end() {
                 self.fetch_document_indicator(TokenType::DocumentEnd)?;
                 self.skip_ws_to_eol(SkipTabs::Yes)?;
-                if !is_breakz(self.input.peek()) {
+                if !self.input.next_is_breakz() {
                     return Err(ScanError::new_str(
                         self.mark,
                         "invalid content after document end marker",
@@ -784,7 +784,7 @@ impl<T: Input> Scanner<T> {
                 {
                     self.skip_ws_to_eol(SkipTabs::Yes)?;
                     // If we have content on that line with a tab, return an error.
-                    if !is_breakz(self.input.peek()) {
+                    if !self.input.next_is_breakz() {
                         return Err(ScanError::new_str(
                             self.mark,
                             "tabs disallowed within this context (block indentation)",
@@ -800,9 +800,9 @@ impl<T: Input> Scanner<T> {
                     }
                 }
                 '#' => {
-                    while !is_breakz(self.input.look_ch()) {
-                        self.skip_non_blank();
-                    }
+                    let comment_length = self.input.skip_while_non_breakz();
+                    self.mark.index += comment_length;
+                    self.mark.col += comment_length;
                 }
                 _ => break,
             }
@@ -832,9 +832,9 @@ impl<T: Input> Scanner<T> {
                     need_whitespace = false;
                 }
                 '#' => {
-                    while !is_breakz(self.input.look_ch()) {
-                        self.skip_non_blank();
-                    }
+                    let comment_length = self.input.skip_while_non_breakz();
+                    self.mark.index += comment_length;
+                    self.mark.col += comment_length;
                 }
                 _ => break,
             }
@@ -912,9 +912,9 @@ impl<T: Input> Scanner<T> {
             // XXX This should be a warning instead of an error
             _ => {
                 // skip current line
-                while !is_breakz(self.input.look_ch()) {
-                    self.skip_non_blank();
-                }
+                let line_len = self.input.skip_while_non_breakz();
+                self.mark.index += line_len;
+                self.mark.col += line_len;
                 // XXX return an empty TagDirective token
                 Token(
                     start_mark,
@@ -927,7 +927,7 @@ impl<T: Input> Scanner<T> {
 
         self.skip_ws_to_eol(SkipTabs::Yes)?;
 
-        if is_breakz(self.input.peek()) {
+        if self.input.next_is_breakz() {
             self.input.lookahead(2);
             self.skip_linebreak();
             Ok(tok)
@@ -940,9 +940,9 @@ impl<T: Input> Scanner<T> {
     }
 
     fn scan_version_directive_value(&mut self, mark: &Marker) -> Result<Token, ScanError> {
-        while is_blank(self.input.look_ch()) {
-            self.skip_blank();
-        }
+        let n_blanks = self.input.skip_while_blank();
+        self.mark.index += n_blanks;
+        self.mark.col += n_blanks;
 
         let major = self.scan_version_directive_number(mark)?;
 
@@ -962,10 +962,10 @@ impl<T: Input> Scanner<T> {
     fn scan_directive_name(&mut self) -> Result<String, ScanError> {
         let start_mark = self.mark;
         let mut string = String::new();
-        while is_alpha(self.input.look_ch()) {
-            string.push(self.input.peek());
-            self.skip_non_blank();
-        }
+
+        let n_chars = self.input.fetch_while_is_alpha(&mut string);
+        self.mark.index += n_chars;
+        self.mark.col += n_chars;
 
         if string.is_empty() {
             return Err(ScanError::new_str(
@@ -1010,22 +1010,21 @@ impl<T: Input> Scanner<T> {
     }
 
     fn scan_tag_directive_value(&mut self, mark: &Marker) -> Result<Token, ScanError> {
-        /* Eat whitespaces. */
-        while is_blank(self.input.look_ch()) {
-            self.skip_blank();
-        }
+        let n_blanks = self.input.skip_while_blank();
+        self.mark.index += n_blanks;
+        self.mark.col += n_blanks;
+
         let handle = self.scan_tag_handle(true, mark)?;
 
-        /* Eat whitespaces. */
-        while is_blank(self.input.look_ch()) {
-            self.skip_blank();
-        }
+        let n_blanks = self.input.skip_while_blank();
+        self.mark.index += n_blanks;
+        self.mark.col += n_blanks;
 
         let prefix = self.scan_tag_prefix(mark)?;
 
         self.input.lookahead(1);
 
-        if is_blank_or_breakz(self.input.peek()) {
+        if self.input.next_is_blank_or_breakz() {
             Ok(Token(*mark, TokenType::TagDirective(handle, prefix)))
         } else {
             Err(ScanError::new_str(
@@ -1076,7 +1075,7 @@ impl<T: Input> Scanner<T> {
         }
 
         if is_blank_or_breakz(self.input.look_ch())
-            || (self.flow_level > 0 && is_flow(self.input.peek()))
+            || (self.flow_level > 0 && self.input.next_is_flow())
         {
             // XXX: ex 7.2, an empty scalar can follow a secondary tag
             Ok(Token(start_mark, TokenType::Tag(handle, suffix)))
@@ -1100,10 +1099,9 @@ impl<T: Input> Scanner<T> {
         string.push(self.input.peek());
         self.skip_non_blank();
 
-        while is_alpha(self.input.look_ch()) {
-            string.push(self.input.peek());
-            self.skip_non_blank();
-        }
+        let n_chars = self.input.fetch_while_is_alpha(&mut string);
+        self.mark.index += n_chars;
+        self.mark.col += n_chars;
 
         // Check if the trailing character is '!' and copy it.
         if self.input.peek() == '!' {
@@ -1448,7 +1446,8 @@ impl<T: Input> Scanner<T> {
         }
 
         self.skip_ws_to_eol(SkipTabs::No)?;
-        if is_break(self.input.look_ch()) || is_flow(self.input.peek()) {
+        self.input.lookahead(1);
+        if self.input.next_is_break() || self.input.next_is_flow() {
             self.roll_one_col_indent();
         }
 
@@ -1513,7 +1512,8 @@ impl<T: Input> Scanner<T> {
                 chomping = Chomping::Strip;
             }
             self.skip_non_blank();
-            if is_digit(self.input.look_ch()) {
+            self.input.lookahead(1);
+            if self.input.next_is_digit() {
                 if self.input.peek() == '0' {
                     return Err(ScanError::new_str(
                         start_mark,
@@ -1523,7 +1523,7 @@ impl<T: Input> Scanner<T> {
                 increment = (self.input.peek() as usize) - ('0' as usize);
                 self.skip_non_blank();
             }
-        } else if is_digit(self.input.peek()) {
+        } else if self.input.next_is_digit() {
             if self.input.peek() == '0' {
                 return Err(ScanError::new_str(
                     start_mark,
@@ -1547,14 +1547,15 @@ impl<T: Input> Scanner<T> {
         self.skip_ws_to_eol(SkipTabs::Yes)?;
 
         // Check if we are at the end of the line.
-        if !is_breakz(self.input.look_ch()) {
+        self.input.lookahead(1);
+        if !self.input.next_is_breakz() {
             return Err(ScanError::new_str(
                 start_mark,
                 "while scanning a block scalar, did not find expected comment or line break",
             ));
         }
 
-        if is_break(self.input.peek()) {
+        if self.input.next_is_break() {
             self.input.lookahead(2);
             self.read_break(&mut chomping_break);
         }
@@ -1585,7 +1586,7 @@ impl<T: Input> Scanner<T> {
         // ```yaml
         // - |+
         // ```
-        if is_z(self.input.peek()) {
+        if self.input.next_is_z() {
             let contents = match chomping {
                 // We strip trailing linebreaks. Nothing remain.
                 Chomping::Strip => String::new(),
@@ -1612,7 +1613,7 @@ impl<T: Input> Scanner<T> {
 
         let mut line_buffer = String::with_capacity(100);
         let start_mark = self.mark;
-        while self.mark.col == indent && !is_z(self.input.peek()) {
+        while self.mark.col == indent && !self.input.next_is_z() {
             if indent == 0 {
                 self.input.lookahead(4);
                 if self.input.next_is_document_end() {
@@ -1621,7 +1622,7 @@ impl<T: Input> Scanner<T> {
             }
 
             // We are at the first content character of a content line.
-            trailing_blank = is_blank(self.input.peek());
+            trailing_blank = self.input.next_is_blank();
             if !literal && !leading_break.is_empty() && !leading_blank && !trailing_blank {
                 string.push_str(&trailing_breaks);
                 if trailing_breaks.is_empty() {
@@ -1635,12 +1636,12 @@ impl<T: Input> Scanner<T> {
             leading_break.clear();
             trailing_breaks.clear();
 
-            leading_blank = is_blank(self.input.peek());
+            leading_blank = self.input.next_is_blank();
 
             self.scan_block_scalar_content_line(&mut string, &mut line_buffer);
 
             // break on EOF
-            if is_z(self.input.peek()) {
+            if self.input.next_is_z() {
                 break;
             }
 
@@ -1657,7 +1658,7 @@ impl<T: Input> Scanner<T> {
             // If we had reached an eof but the last character wasn't an end-of-line, check if the
             // last line was indented at least as the rest of the scalar, then we need to consider
             // there is a newline.
-            if is_z(self.input.peek()) && self.mark.col >= indent.max(1) {
+            if self.input.next_is_z() && self.mark.col >= indent.max(1) {
                 string.push('\n');
             }
         }
@@ -1680,7 +1681,7 @@ impl<T: Input> Scanner<T> {
     /// line. This function does not consume the line break character(s) after the line.
     fn scan_block_scalar_content_line(&mut self, string: &mut String, line_buffer: &mut String) {
         // Start by evaluating characters in the buffer.
-        while !self.input.buf_is_empty() && !is_breakz(self.input.peek()) {
+        while !self.input.buf_is_empty() && !self.input.next_is_breakz() {
             string.push(self.input.peek());
             // We may technically skip non-blank characters. However, the only distinction is
             // to determine what is leading whitespace and what is not. Here, we read the
@@ -1752,7 +1753,7 @@ impl<T: Input> Scanner<T> {
             }
 
             // If our current line is empty, skip over the break and continue looping.
-            if is_break(self.input.peek()) {
+            if self.input.next_is_break() {
                 self.read_break(breaks);
             } else {
                 // Otherwise, we have a content line. Return control.
@@ -1777,7 +1778,7 @@ impl<T: Input> Scanner<T> {
                 max_indent = self.mark.col;
             }
 
-            if is_break(self.input.peek()) {
+            if self.input.next_is_break() {
                 // If our current line is empty, skip over the break and continue looping.
                 self.input.lookahead(2);
                 self.read_break(breaks);
@@ -1840,7 +1841,7 @@ impl<T: Input> Scanner<T> {
                 ));
             }
 
-            if is_z(self.input.peek()) {
+            if self.input.next_is_z() {
                 return Err(ScanError::new_str(
                     start_mark,
                     "while scanning a quoted scalar, found unexpected end of stream",
@@ -1869,8 +1870,8 @@ impl<T: Input> Scanner<T> {
             }
 
             // Consume blank characters.
-            while is_blank(self.input.peek()) || is_break(self.input.peek()) {
-                if is_blank(self.input.peek()) {
+            while self.input.next_is_blank() || self.input.next_is_break() {
+                if self.input.next_is_blank() {
                     // Consume a space or a tab character.
                     if leading_blanks {
                         if self.input.peek() == '\t' && (self.mark.col as isize) < self.indent {
@@ -2118,7 +2119,7 @@ impl<T: Input> Scanner<T> {
                 ));
             }
 
-            if !is_blank_or_breakz(self.input.peek())
+            if !self.input.next_is_blank_or_breakz()
                 && self.input.next_can_be_plain_scalar(self.flow_level > 0)
             {
                 if self.leading_whitespace {
@@ -2155,7 +2156,7 @@ impl<T: Input> Scanner<T> {
                     // hence the `for` loop looping `self.input.bufmaxlen() - 1` times.
                     self.input.lookahead(self.input.bufmaxlen());
                     for _ in 0..self.input.bufmaxlen() - 1 {
-                        if is_blank_or_breakz(self.input.peek())
+                        if self.input.next_is_blank_or_breakz()
                             || !self.input.next_can_be_plain_scalar(self.flow_level > 0)
                         {
                             end = true;
@@ -2172,13 +2173,14 @@ impl<T: Input> Scanner<T> {
             //  - We reach eof
             //  - We reach ": "
             //  - We find a flow character in a flow context
-            if !(is_blank(self.input.peek()) || is_break(self.input.peek())) {
+            if !(self.input.next_is_blank() || self.input.next_is_break()) {
                 break;
             }
 
             // Process blank characters.
-            while is_blank(self.input.look_ch()) || is_break(self.input.peek()) {
-                if is_blank(self.input.peek()) {
+            self.input.lookahead(1);
+            while self.input.next_is_blank_or_break() {
+                if self.input.next_is_blank() {
                     if !self.leading_whitespace {
                         whitespaces.push(self.input.peek());
                         self.skip_blank();
@@ -2186,7 +2188,7 @@ impl<T: Input> Scanner<T> {
                         // Tabs in an indentation columns are allowed if and only if the line is
                         // empty. Skip to the end of the line.
                         self.skip_ws_to_eol(SkipTabs::Yes)?;
-                        if !is_breakz(self.input.peek()) {
+                        if !self.input.next_is_breakz() {
                             return Err(ScanError::new_str(
                                 start_mark,
                                 "while scanning a plain scalar, found a tab",
@@ -2196,7 +2198,6 @@ impl<T: Input> Scanner<T> {
                         self.skip_blank();
                     }
                 } else {
-                    self.input.lookahead(2);
                     // Check if it is a first line break
                     if self.leading_whitespace {
                         self.read_break(&mut trailing_breaks);
@@ -2206,6 +2207,7 @@ impl<T: Input> Scanner<T> {
                         self.leading_whitespace = true;
                     }
                 }
+                self.input.lookahead(1);
             }
 
             // check indentation level
@@ -2309,7 +2311,7 @@ impl<T: Input> Scanner<T> {
         self.skip_non_blank();
         if self.input.look_ch() == '\t'
             && !self.skip_ws_to_eol(SkipTabs::Yes)?.has_valid_yaml_ws()
-            && (self.input.peek() == '-' || is_alpha(self.input.peek()))
+            && (self.input.peek() == '-' || self.input.next_is_alpha())
         {
             return Err(ScanError::new_str(
                 self.mark,