From 93a35ab6f71905f62e1d2aadd48691faf9b287da Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Thu, 18 Apr 2024 22:18:29 +0200 Subject: [PATCH] Move document indicator detection to `Input`. --- parser/src/input.rs | 40 ++++++++++++++++++++++++++++++++++++++-- parser/src/scanner.rs | 36 +++++------------------------------- 2 files changed, 43 insertions(+), 33 deletions(-) diff --git a/parser/src/input.rs b/parser/src/input.rs index fe7c662..1e9711b 100644 --- a/parser/src/input.rs +++ b/parser/src/input.rs @@ -1,3 +1,5 @@ +use crate::char_traits::is_blank_or_breakz; + /// Interface for a source of characters. /// /// Hiding the input's implementation behind this trait allows mostly: @@ -104,7 +106,7 @@ pub trait Input { /// Return whether the next 2 characters in the input source match the given characters. /// - /// This function assumes that the next 2 characters in the input has already been fetched + /// This function assumes that the next 2 characters in the input have already been fetched /// through [`Input::lookahead`]. #[inline] #[must_use] @@ -115,7 +117,7 @@ pub trait Input { /// Return whether the next 3 characters in the input source match the given characters. /// - /// This function assumes that the next 3 characters in the input has already been fetched + /// This function assumes that the next 3 characters in the input have already been fetched /// through [`Input::lookahead`]. #[inline] #[must_use] @@ -123,4 +125,38 @@ pub trait Input { assert!(self.buflen() >= 3); self.peek() == c1 && self.peek_nth(1) == c2 && self.peek_nth(2) == c3 } + + /// Check whether the next characters correspond to a document indicator. + /// + /// This function assumes that the next 4 characters in the input has already been fetched + /// through [`Input::lookahead`]. + #[inline] + #[must_use] + fn next_is_document_indicator(&self) -> bool { + assert!(self.buflen() >= 4); + is_blank_or_breakz(self.peek_nth(3)) + && (self.next_3_are('.', '.', '.') || self.next_3_are('-', '-', '-')) + } + + /// Check whether the next characters correspond to a start of document. + /// + /// This function assumes that the next 4 characters in the input has already been fetched + /// through [`Input::lookahead`]. + #[inline] + #[must_use] + fn next_is_document_start(&self) -> bool { + assert!(self.buflen() >= 4); + self.next_3_are('-', '-', '-') && is_blank_or_breakz(self.peek_nth(3)) + } + + /// Check whether the next characters correspond to an end of document. + /// + /// This function assumes that the next 4 characters in the input has already been fetched + /// through [`Input::lookahead`]. + #[inline] + #[must_use] + fn next_is_document_end(&self) -> bool { + assert!(self.buflen() >= 4); + self.next_3_are('.', '.', '.') && is_blank_or_breakz(self.peek_nth(3)) + } } diff --git a/parser/src/scanner.rs b/parser/src/scanner.rs index 971697c..afc290e 100644 --- a/parser/src/scanner.rs +++ b/parser/src/scanner.rs @@ -575,32 +575,6 @@ impl Scanner { s.push('\n'); } - /// Check whether the next characters correspond to a start of document. - /// - /// [`Self::lookahead`] must have been called before calling this function. - fn next_is_document_start(&self) -> bool { - assert!(self.input.buflen() >= 4); - self.input.next_3_are('-', '-', '-') && is_blank_or_breakz(self.input.peek_nth(3)) - } - - /// Check whether the next characters correspond to an end of document. - /// - /// [`Self::lookahead`] must have been called before calling this function. - fn next_is_document_end(&self) -> bool { - assert!(self.input.buflen() >= 4); - self.input.next_3_are('.', '.', '.') && is_blank_or_breakz(self.input.peek_nth(3)) - } - - /// Check whether the next characters correspond to a document indicator. - /// - /// [`Self::lookahead`] must have been called before calling this function. - #[inline] - fn next_is_document_indicator(&self) -> bool { - assert!(self.input.buflen() >= 4); - is_blank_or_breakz(self.input.peek_nth(3)) - && (self.input.next_3_are('.', '.', '.') || self.input.next_3_are('-', '-', '-')) - } - /// Insert a token at the given position. fn insert_token(&mut self, pos: usize, tok: Token) { let old_len = self.tokens.len(); @@ -650,9 +624,9 @@ impl Scanner { if self.mark.col == 0 { if self.input.next_char_is('%') { return self.fetch_directive(); - } else if self.next_is_document_start() { + } else if self.input.next_is_document_start() { return self.fetch_document_indicator(TokenType::DocumentStart); - } else if self.next_is_document_end() { + } else if self.input.next_is_document_end() { self.fetch_document_indicator(TokenType::DocumentEnd)?; self.skip_ws_to_eol(SkipTabs::Yes)?; if !is_breakz(self.input.peek()) { @@ -1667,7 +1641,7 @@ impl Scanner { while self.mark.col == indent && !is_z(self.input.peek()) { if indent == 0 { self.input.lookahead(4); - if self.next_is_document_end() { + if self.input.next_is_document_end() { break; } } @@ -1885,7 +1859,7 @@ impl Scanner { /* Check for a document indicator. */ self.input.lookahead(4); - if self.mark.col == 0 && self.next_is_document_indicator() { + if self.mark.col == 0 && self.input.next_is_document_indicator() { return Err(ScanError::new_str( start_mark, "while scanning a quoted scalar, found unexpected document indicator", @@ -2159,7 +2133,7 @@ impl Scanner { loop { self.input.lookahead(4); - if self.next_is_document_indicator() || self.input.peek() == '#' { + if self.input.next_is_document_indicator() || self.input.peek() == '#' { break; }