diff --git a/parser/src/input.rs b/parser/src/input.rs index 3fc3d0e..14b5e95 100644 --- a/parser/src/input.rs +++ b/parser/src/input.rs @@ -4,7 +4,7 @@ pub mod str; #[allow(clippy::module_name_repetitions)] pub use buffered::BufferedInput; -use crate::char_traits::{is_blank_or_breakz, is_breakz}; +use crate::char_traits::{is_blank_or_breakz, is_breakz, is_flow}; /// Interface for a source of characters. /// @@ -215,6 +215,21 @@ pub trait Input { Ok(SkipTabs::Result(encountered_tab, has_yaml_ws)), ) } + + /// Check whether the next characters may be part of a plain scalar. + /// + /// This function assumes we are not given a blankz character. + #[allow(clippy::inline_always)] + #[inline(always)] + fn next_can_be_plain_scalar(&self, in_flow: bool) -> bool { + let nc = self.peek_nth(1); + match self.peek() { + // indicators can end a plain scalar, see 7.3.3. Plain Style + ':' if is_blank_or_breakz(nc) || (in_flow && is_flow(nc)) => false, + c if in_flow && is_flow(c) => false, + _ => true, + } + } } /// Behavior to adopt regarding treating tabs as whitespace. diff --git a/parser/src/input/str.rs b/parser/src/input/str.rs index 05e78f6..e50adac 100644 --- a/parser/src/input/str.rs +++ b/parser/src/input/str.rs @@ -1,5 +1,5 @@ use crate::{ - char_traits::{is_blank_or_breakz, is_breakz}, + char_traits::{is_blank_or_breakz, is_breakz, is_flow}, input::{Input, SkipTabs}, }; @@ -248,6 +248,28 @@ impl<'a> Input for StrInput<'a> { Ok(SkipTabs::Result(encountered_tab, has_yaml_ws)), ) } + + #[allow(clippy::inline_always)] + #[inline(always)] + fn next_can_be_plain_scalar(&self, in_flow: bool) -> bool { + let c = self.buffer.as_bytes()[0]; + if self.buffer.len() > 1 { + let nc = self.buffer.as_bytes()[1]; + match c { + // indicators can end a plain scalar, see 7.3.3. Plain Style + b':' if is_blank_or_breakz(nc as char) || (in_flow && is_flow(nc as char)) => false, + c if in_flow && is_flow(c as char) => false, + _ => true, + } + } else { + match c { + // indicators can end a plain scalar, see 7.3.3. Plain Style + b':' => false, + c if in_flow && is_flow(c as char) => false, + _ => true, + } + } + } } /// The buffer size we return to the scanner. diff --git a/parser/src/scanner.rs b/parser/src/scanner.rs index 7c9f69b..e2c92ec 100644 --- a/parser/src/scanner.rs +++ b/parser/src/scanner.rs @@ -2119,11 +2119,7 @@ impl Scanner { } if !is_blank_or_breakz(self.input.peek()) - && next_can_be_plain_scalar( - self.input.peek(), - self.input.peek_nth(1), - self.flow_level > 0, - ) + && self.input.next_can_be_plain_scalar(self.flow_level > 0) { if self.leading_whitespace { if leading_break.is_empty() { @@ -2159,20 +2155,13 @@ impl Scanner { // hence the `for` loop looping `self.input.bufmaxlen() - 1` times. self.input.lookahead(self.input.bufmaxlen()); for _ in 0..self.input.bufmaxlen() - 1 { - // We need to have `c` and `nc`'s assignations at the beginning of the - // loop. If at the end of it, we will peek one index further than we - // looked ahead. On the first iteration of the loop, `c` is a characte we - // already pushed in `string` a bit earlier. if is_blank_or_breakz(self.input.peek()) - || !next_can_be_plain_scalar( - self.input.peek(), - self.input.peek_nth(1), - self.flow_level > 0, - ) + || !self.input.next_can_be_plain_scalar(self.flow_level > 0) { end = true; break; } + assert!(string.len() < string.capacity()); string.push(self.input.peek()); self.skip_non_blank(); } @@ -2531,21 +2520,6 @@ pub enum Chomping { Keep, } -/// Check whether the next characters may be part of a plain scalar. -/// -/// This function assumes we are not given a blankz character. -// For some reason, `#[inline]` is not enough. -#[allow(clippy::inline_always)] -#[inline(always)] -pub fn next_can_be_plain_scalar(c: char, nc: char, in_flow: bool) -> bool { - match c { - // indicators can end a plain scalar, see 7.3.3. Plain Style - ':' if is_blank_or_breakz(nc) || (in_flow && is_flow(nc)) => false, - c if in_flow && is_flow(c) => false, - _ => true, - } -} - #[cfg(test)] mod test { #[test]