Move document indicator detection to Input.

This commit is contained in:
Ethiraric 2024-04-18 22:18:29 +02:00
parent afa1b2319f
commit 93a35ab6f7
2 changed files with 43 additions and 33 deletions

View file

@ -1,3 +1,5 @@
use crate::char_traits::is_blank_or_breakz;
/// Interface for a source of characters.
///
/// Hiding the input's implementation behind this trait allows mostly:
@ -104,7 +106,7 @@ pub trait Input {
/// Return whether the next 2 characters in the input source match the given characters.
///
/// This function assumes that the next 2 characters in the input has already been fetched
/// This function assumes that the next 2 characters in the input have already been fetched
/// through [`Input::lookahead`].
#[inline]
#[must_use]
@ -115,7 +117,7 @@ pub trait Input {
/// Return whether the next 3 characters in the input source match the given characters.
///
/// This function assumes that the next 3 characters in the input has already been fetched
/// This function assumes that the next 3 characters in the input have already been fetched
/// through [`Input::lookahead`].
#[inline]
#[must_use]
@ -123,4 +125,38 @@ pub trait Input {
assert!(self.buflen() >= 3);
self.peek() == c1 && self.peek_nth(1) == c2 && self.peek_nth(2) == c3
}
/// Check whether the next characters correspond to a document indicator.
///
/// This function assumes that the next 4 characters in the input has already been fetched
/// through [`Input::lookahead`].
#[inline]
#[must_use]
fn next_is_document_indicator(&self) -> bool {
assert!(self.buflen() >= 4);
is_blank_or_breakz(self.peek_nth(3))
&& (self.next_3_are('.', '.', '.') || self.next_3_are('-', '-', '-'))
}
/// Check whether the next characters correspond to a start of document.
///
/// This function assumes that the next 4 characters in the input has already been fetched
/// through [`Input::lookahead`].
#[inline]
#[must_use]
fn next_is_document_start(&self) -> bool {
assert!(self.buflen() >= 4);
self.next_3_are('-', '-', '-') && is_blank_or_breakz(self.peek_nth(3))
}
/// Check whether the next characters correspond to an end of document.
///
/// This function assumes that the next 4 characters in the input has already been fetched
/// through [`Input::lookahead`].
#[inline]
#[must_use]
fn next_is_document_end(&self) -> bool {
assert!(self.buflen() >= 4);
self.next_3_are('.', '.', '.') && is_blank_or_breakz(self.peek_nth(3))
}
}

View file

@ -575,32 +575,6 @@ impl<T: Input> Scanner<T> {
s.push('\n');
}
/// Check whether the next characters correspond to a start of document.
///
/// [`Self::lookahead`] must have been called before calling this function.
fn next_is_document_start(&self) -> bool {
assert!(self.input.buflen() >= 4);
self.input.next_3_are('-', '-', '-') && is_blank_or_breakz(self.input.peek_nth(3))
}
/// Check whether the next characters correspond to an end of document.
///
/// [`Self::lookahead`] must have been called before calling this function.
fn next_is_document_end(&self) -> bool {
assert!(self.input.buflen() >= 4);
self.input.next_3_are('.', '.', '.') && is_blank_or_breakz(self.input.peek_nth(3))
}
/// Check whether the next characters correspond to a document indicator.
///
/// [`Self::lookahead`] must have been called before calling this function.
#[inline]
fn next_is_document_indicator(&self) -> bool {
assert!(self.input.buflen() >= 4);
is_blank_or_breakz(self.input.peek_nth(3))
&& (self.input.next_3_are('.', '.', '.') || self.input.next_3_are('-', '-', '-'))
}
/// Insert a token at the given position.
fn insert_token(&mut self, pos: usize, tok: Token) {
let old_len = self.tokens.len();
@ -650,9 +624,9 @@ impl<T: Input> Scanner<T> {
if self.mark.col == 0 {
if self.input.next_char_is('%') {
return self.fetch_directive();
} else if self.next_is_document_start() {
} else if self.input.next_is_document_start() {
return self.fetch_document_indicator(TokenType::DocumentStart);
} else if self.next_is_document_end() {
} else if self.input.next_is_document_end() {
self.fetch_document_indicator(TokenType::DocumentEnd)?;
self.skip_ws_to_eol(SkipTabs::Yes)?;
if !is_breakz(self.input.peek()) {
@ -1667,7 +1641,7 @@ impl<T: Input> Scanner<T> {
while self.mark.col == indent && !is_z(self.input.peek()) {
if indent == 0 {
self.input.lookahead(4);
if self.next_is_document_end() {
if self.input.next_is_document_end() {
break;
}
}
@ -1885,7 +1859,7 @@ impl<T: Input> Scanner<T> {
/* Check for a document indicator. */
self.input.lookahead(4);
if self.mark.col == 0 && self.next_is_document_indicator() {
if self.mark.col == 0 && self.input.next_is_document_indicator() {
return Err(ScanError::new_str(
start_mark,
"while scanning a quoted scalar, found unexpected document indicator",
@ -2159,7 +2133,7 @@ impl<T: Input> Scanner<T> {
loop {
self.input.lookahead(4);
if self.next_is_document_indicator() || self.input.peek() == '#' {
if self.input.next_is_document_indicator() || self.input.peek() == '#' {
break;
}