Move scanning low-level functions to Input
.
This commit is contained in:
parent
696ca59a16
commit
93b7e55bcf
3 changed files with 375 additions and 69 deletions
|
@ -4,7 +4,9 @@ pub mod str;
|
||||||
#[allow(clippy::module_name_repetitions)]
|
#[allow(clippy::module_name_repetitions)]
|
||||||
pub use buffered::BufferedInput;
|
pub use buffered::BufferedInput;
|
||||||
|
|
||||||
use crate::char_traits::{is_blank_or_breakz, is_breakz, is_flow};
|
use crate::char_traits::{
|
||||||
|
is_alpha, is_blank, is_blank_or_breakz, is_break, is_breakz, is_digit, is_flow, is_z,
|
||||||
|
};
|
||||||
|
|
||||||
/// Interface for a source of characters.
|
/// Interface for a source of characters.
|
||||||
///
|
///
|
||||||
|
@ -170,7 +172,7 @@ pub trait Input {
|
||||||
///
|
///
|
||||||
/// # Return
|
/// # Return
|
||||||
/// Return a tuple with the number of characters that were consumed and the result of skipping
|
/// Return a tuple with the number of characters that were consumed and the result of skipping
|
||||||
/// whitespace. The number of characters returned can be used to advance the index and columns,
|
/// whitespace. The number of characters returned can be used to advance the index and column,
|
||||||
/// since no end-of-line character will be consumed.
|
/// since no end-of-line character will be consumed.
|
||||||
/// See [`SkipTabs`] For more details on the success variant.
|
/// See [`SkipTabs`] For more details on the success variant.
|
||||||
///
|
///
|
||||||
|
@ -230,6 +232,188 @@ pub trait Input {
|
||||||
_ => true,
|
_ => true,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Check whether the next character is [a blank] or [a break].
|
||||||
|
///
|
||||||
|
/// The character must have previously been fetched through [`lookahead`]
|
||||||
|
///
|
||||||
|
/// # Return
|
||||||
|
/// Returns true if the character is [a blank] or [a break], false otherwise.
|
||||||
|
///
|
||||||
|
/// [`lookahead`]: Input::lookahead
|
||||||
|
/// [a blank]: is_blank
|
||||||
|
/// [a break]: is_break
|
||||||
|
#[inline]
|
||||||
|
fn next_is_blank_or_break(&self) -> bool {
|
||||||
|
is_blank(self.peek()) || is_break(self.peek())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check whether the next character is [a blank] or [a breakz].
|
||||||
|
///
|
||||||
|
/// The character must have previously been fetched through [`lookahead`]
|
||||||
|
///
|
||||||
|
/// # Return
|
||||||
|
/// Returns true if the character is [a blank] or [a break], false otherwise.
|
||||||
|
///
|
||||||
|
/// [`lookahead`]: Input::lookahead
|
||||||
|
/// [a blank]: is_blank
|
||||||
|
/// [a breakz]: is_breakz
|
||||||
|
#[inline]
|
||||||
|
fn next_is_blank_or_breakz(&self) -> bool {
|
||||||
|
is_blank(self.peek()) || is_breakz(self.peek())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check whether the next character is [a blank].
|
||||||
|
///
|
||||||
|
/// The character must have previously been fetched through [`lookahead`]
|
||||||
|
///
|
||||||
|
/// # Return
|
||||||
|
/// Returns true if the character is [a blank], false otherwise.
|
||||||
|
///
|
||||||
|
/// [`lookahead`]: Input::lookahead
|
||||||
|
/// [a blank]: is_blank
|
||||||
|
#[inline]
|
||||||
|
fn next_is_blank(&self) -> bool {
|
||||||
|
is_blank(self.peek())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check whether the next character is [a break].
|
||||||
|
///
|
||||||
|
/// The character must have previously been fetched through [`lookahead`]
|
||||||
|
///
|
||||||
|
/// # Return
|
||||||
|
/// Returns true if the character is [a break], false otherwise.
|
||||||
|
///
|
||||||
|
/// [`lookahead`]: Input::lookahead
|
||||||
|
/// [a break]: is_break
|
||||||
|
#[inline]
|
||||||
|
fn next_is_break(&self) -> bool {
|
||||||
|
is_break(self.peek())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check whether the next character is [a breakz].
|
||||||
|
///
|
||||||
|
/// The character must have previously been fetched through [`lookahead`]
|
||||||
|
///
|
||||||
|
/// # Return
|
||||||
|
/// Returns true if the character is [a breakz], false otherwise.
|
||||||
|
///
|
||||||
|
/// [`lookahead`]: Input::lookahead
|
||||||
|
/// [a breakz]: is_breakz
|
||||||
|
#[inline]
|
||||||
|
fn next_is_breakz(&self) -> bool {
|
||||||
|
is_breakz(self.peek())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check whether the next character is [a z].
|
||||||
|
///
|
||||||
|
/// The character must have previously been fetched through [`lookahead`]
|
||||||
|
///
|
||||||
|
/// # Return
|
||||||
|
/// Returns true if the character is [a z], false otherwise.
|
||||||
|
///
|
||||||
|
/// [`lookahead`]: Input::lookahead
|
||||||
|
/// [a z]: is_z
|
||||||
|
#[inline]
|
||||||
|
fn next_is_z(&self) -> bool {
|
||||||
|
is_z(self.peek())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check whether the next character is [a flow].
|
||||||
|
///
|
||||||
|
/// The character must have previously been fetched through [`lookahead`]
|
||||||
|
///
|
||||||
|
/// # Return
|
||||||
|
/// Returns true if the character is [a flow], false otherwise.
|
||||||
|
///
|
||||||
|
/// [`lookahead`]: Input::lookahead
|
||||||
|
/// [a flow]: is_flow
|
||||||
|
#[inline]
|
||||||
|
fn next_is_flow(&self) -> bool {
|
||||||
|
is_flow(self.peek())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check whether the next character is [a digit].
|
||||||
|
///
|
||||||
|
/// The character must have previously been fetched through [`lookahead`]
|
||||||
|
///
|
||||||
|
/// # Return
|
||||||
|
/// Returns true if the character is [a digit], false otherwise.
|
||||||
|
///
|
||||||
|
/// [`lookahead`]: Input::lookahead
|
||||||
|
/// [a digit]: is_digit
|
||||||
|
#[inline]
|
||||||
|
fn next_is_digit(&self) -> bool {
|
||||||
|
is_digit(self.peek())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check whether the next character is [a letter].
|
||||||
|
///
|
||||||
|
/// The character must have previously been fetched through [`lookahead`]
|
||||||
|
///
|
||||||
|
/// # Return
|
||||||
|
/// Returns true if the character is [a letter], false otherwise.
|
||||||
|
///
|
||||||
|
/// [`lookahead`]: Input::lookahead
|
||||||
|
/// [a letter]: is_alpha
|
||||||
|
#[inline]
|
||||||
|
fn next_is_alpha(&self) -> bool {
|
||||||
|
is_alpha(self.peek())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Skip characters from the input until a [breakz] is found.
|
||||||
|
///
|
||||||
|
/// The characters are consumed from the input.
|
||||||
|
///
|
||||||
|
/// # Return
|
||||||
|
/// Return the number of characters that were consumed. The number of characters returned can
|
||||||
|
/// be used to advance the index and column, since no end-of-line character will be consumed.
|
||||||
|
///
|
||||||
|
/// [breakz]: is_breakz
|
||||||
|
#[inline]
|
||||||
|
fn skip_while_non_breakz(&mut self) -> usize {
|
||||||
|
let mut count = 0;
|
||||||
|
while !is_breakz(self.look_ch()) {
|
||||||
|
count += 1;
|
||||||
|
self.skip();
|
||||||
|
}
|
||||||
|
count
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Skip characters from the input while [blanks] are found.
|
||||||
|
///
|
||||||
|
/// The characters are consumed from the input.
|
||||||
|
///
|
||||||
|
/// # Return
|
||||||
|
/// Return the number of characters that were consumed. The number of characters returned can
|
||||||
|
/// be used to advance the index and column, since no end-of-line character will be consumed.
|
||||||
|
///
|
||||||
|
/// [blanks]: is_blank
|
||||||
|
fn skip_while_blank(&mut self) -> usize {
|
||||||
|
let mut n_chars = 0;
|
||||||
|
while is_blank(self.look_ch()) {
|
||||||
|
n_chars += 1;
|
||||||
|
self.skip();
|
||||||
|
}
|
||||||
|
n_chars
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Fetch characters from the input while we encounter letters and store them in `out`.
|
||||||
|
///
|
||||||
|
/// The characters are consumed from the input.
|
||||||
|
///
|
||||||
|
/// # Return
|
||||||
|
/// Return the number of characters that were consumed. The number of characters returned can
|
||||||
|
/// be used to advance the index and column, since no end-of-line character will be consumed.
|
||||||
|
fn fetch_while_is_alpha(&mut self, out: &mut String) -> usize {
|
||||||
|
let mut n_chars = 0;
|
||||||
|
while is_alpha(self.look_ch()) {
|
||||||
|
n_chars += 1;
|
||||||
|
out.push(self.peek());
|
||||||
|
self.skip();
|
||||||
|
}
|
||||||
|
n_chars
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Behavior to adopt regarding treating tabs as whitespace.
|
/// Behavior to adopt regarding treating tabs as whitespace.
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
use crate::{
|
use crate::{
|
||||||
char_traits::{is_blank_or_breakz, is_breakz, is_flow},
|
char_traits::{
|
||||||
|
is_alpha, is_blank, is_blank_or_breakz, is_break, is_breakz, is_digit, is_flow, is_z,
|
||||||
|
},
|
||||||
input::{Input, SkipTabs},
|
input::{Input, SkipTabs},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -60,7 +62,9 @@ impl<'a> Input for StrInput<'a> {
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn push_back(&mut self, c: char) {
|
fn push_back(&mut self, c: char) {
|
||||||
self.buffer = put_back_in_str(self.buffer, c);
|
// SAFETY: The preconditions of this function is that the character we are given is the one
|
||||||
|
// immediately preceding `self.buffer`.
|
||||||
|
self.buffer = unsafe { put_back_in_str(self.buffer, c) };
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
|
@ -270,6 +274,122 @@ impl<'a> Input for StrInput<'a> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn next_is_blank_or_break(&self) -> bool {
|
||||||
|
!self.buffer.is_empty()
|
||||||
|
&& (is_blank(self.buffer.as_bytes()[0] as char)
|
||||||
|
|| is_break(self.buffer.as_bytes()[0] as char))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn next_is_blank_or_breakz(&self) -> bool {
|
||||||
|
self.buffer.is_empty()
|
||||||
|
|| (is_blank(self.buffer.as_bytes()[0] as char)
|
||||||
|
|| is_breakz(self.buffer.as_bytes()[0] as char))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn next_is_blank(&self) -> bool {
|
||||||
|
!self.buffer.is_empty() && is_blank(self.buffer.as_bytes()[0] as char)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn next_is_break(&self) -> bool {
|
||||||
|
!self.buffer.is_empty() && is_break(self.buffer.as_bytes()[0] as char)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn next_is_breakz(&self) -> bool {
|
||||||
|
self.buffer.is_empty() || is_breakz(self.buffer.as_bytes()[0] as char)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn next_is_z(&self) -> bool {
|
||||||
|
self.buffer.is_empty() || is_z(self.buffer.as_bytes()[0] as char)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn next_is_flow(&self) -> bool {
|
||||||
|
!self.buffer.is_empty() && is_flow(self.buffer.as_bytes()[0] as char)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn next_is_digit(&self) -> bool {
|
||||||
|
!self.buffer.is_empty() && is_digit(self.buffer.as_bytes()[0] as char)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn next_is_alpha(&self) -> bool {
|
||||||
|
!self.buffer.is_empty() && is_alpha(self.buffer.as_bytes()[0] as char)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn skip_while_non_breakz(&mut self) -> usize {
|
||||||
|
let mut found_breakz = false;
|
||||||
|
let mut count = 0;
|
||||||
|
|
||||||
|
// Skip over all non-breaks.
|
||||||
|
let mut chars = self.buffer.chars();
|
||||||
|
for c in chars.by_ref() {
|
||||||
|
if is_breakz(c) {
|
||||||
|
found_breakz = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
count += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
self.buffer = if found_breakz {
|
||||||
|
// If we read a breakz, we need to put it back to the buffer.
|
||||||
|
// SAFETY: The last character we extracted is either a '\n', '\r' or '\0', all of which
|
||||||
|
// are 1-byte long.
|
||||||
|
unsafe { extend_left(chars.as_str(), 1) }
|
||||||
|
} else {
|
||||||
|
chars.as_str()
|
||||||
|
};
|
||||||
|
|
||||||
|
count
|
||||||
|
}
|
||||||
|
|
||||||
|
fn skip_while_blank(&mut self) -> usize {
|
||||||
|
// Since all characters we look for are ascii, we can directly use the byte API of str.
|
||||||
|
let mut i = 0;
|
||||||
|
while i < self.buffer.len() {
|
||||||
|
if !is_blank(self.buffer.as_bytes()[i] as char) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
i += 1;
|
||||||
|
}
|
||||||
|
self.buffer = &self.buffer[i..];
|
||||||
|
i
|
||||||
|
}
|
||||||
|
|
||||||
|
fn fetch_while_is_alpha(&mut self, out: &mut String) -> usize {
|
||||||
|
let mut not_alpha = None;
|
||||||
|
|
||||||
|
// Skip while we have alpha characters.
|
||||||
|
let mut chars = self.buffer.chars();
|
||||||
|
for c in chars.by_ref() {
|
||||||
|
if !is_alpha(c) {
|
||||||
|
not_alpha = Some(c);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let remaining_string = if let Some(c) = not_alpha {
|
||||||
|
let n_bytes_read = chars.as_str().as_ptr() as usize - self.buffer.as_ptr() as usize;
|
||||||
|
let last_char_bytes = c.len_utf8();
|
||||||
|
&self.buffer[n_bytes_read - last_char_bytes..]
|
||||||
|
} else {
|
||||||
|
chars.as_str()
|
||||||
|
};
|
||||||
|
|
||||||
|
let n_bytes_to_append = remaining_string.as_ptr() as usize - self.buffer.as_ptr() as usize;
|
||||||
|
out.reserve(n_bytes_to_append);
|
||||||
|
out.push_str(&self.buffer[..n_bytes_to_append]);
|
||||||
|
self.buffer = remaining_string;
|
||||||
|
|
||||||
|
n_bytes_to_append
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The buffer size we return to the scanner.
|
/// The buffer size we return to the scanner.
|
||||||
|
@ -309,13 +429,13 @@ const BUFFER_LEN: usize = 128;
|
||||||
/// assert_eq!(s1, s3);
|
/// assert_eq!(s1, s3);
|
||||||
/// assert_eq!(s1.as_ptr(), s3.as_ptr());
|
/// assert_eq!(s1.as_ptr(), s3.as_ptr());
|
||||||
/// ```
|
/// ```
|
||||||
fn put_back_in_str(s: &str, c: char) -> &str {
|
unsafe fn put_back_in_str(s: &str, c: char) -> &str {
|
||||||
let n_bytes = c.len_utf8();
|
let n_bytes = c.len_utf8();
|
||||||
|
|
||||||
// SAFETY: The character that gets pushed back is guaranteed to be the one that is
|
// SAFETY: The character that gets pushed back is guaranteed to be the one that is
|
||||||
// immediately preceding our buffer. We can compute the length of the character and move
|
// immediately preceding our buffer. We can compute the length of the character and move
|
||||||
// our buffer back that many bytes.
|
// our buffer back that many bytes.
|
||||||
unsafe { extend_left(s, n_bytes) }
|
extend_left(s, n_bytes)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Extend the string by moving the start pointer to the left by `n` bytes.
|
/// Extend the string by moving the start pointer to the left by `n` bytes.
|
||||||
|
@ -369,7 +489,7 @@ mod test {
|
||||||
pub fn put_back_in_str_example() {
|
pub fn put_back_in_str_example() {
|
||||||
let s1 = "foo";
|
let s1 = "foo";
|
||||||
let s2 = &s1[1..];
|
let s2 = &s1[1..];
|
||||||
let s3 = put_back_in_str(s2, 'f'); // OK, 'f' is the character immediately preceding
|
let s3 = unsafe { put_back_in_str(s2, 'f') }; // OK, 'f' is the character immediately preceding
|
||||||
assert_eq!(s1, s3);
|
assert_eq!(s1, s3);
|
||||||
assert_eq!(s1.as_ptr(), s3.as_ptr());
|
assert_eq!(s1.as_ptr(), s3.as_ptr());
|
||||||
}
|
}
|
||||||
|
|
|
@ -13,8 +13,8 @@ use std::{char, collections::VecDeque, error::Error, fmt};
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
char_traits::{
|
char_traits::{
|
||||||
as_hex, is_alpha, is_anchor_char, is_blank, is_blank_or_breakz, is_break, is_breakz,
|
as_hex, is_anchor_char, is_blank_or_breakz, is_break, is_breakz, is_flow, is_hex,
|
||||||
is_digit, is_flow, is_hex, is_tag_char, is_uri_char, is_z,
|
is_tag_char, is_uri_char,
|
||||||
},
|
},
|
||||||
input::{Input, SkipTabs},
|
input::{Input, SkipTabs},
|
||||||
};
|
};
|
||||||
|
@ -533,7 +533,7 @@ impl<T: Input> Scanner<T> {
|
||||||
// will be reset by `skip_nl`.
|
// will be reset by `skip_nl`.
|
||||||
self.skip_blank();
|
self.skip_blank();
|
||||||
self.skip_nl();
|
self.skip_nl();
|
||||||
} else if is_break(self.input.peek()) {
|
} else if self.input.next_is_break() {
|
||||||
self.skip_nl();
|
self.skip_nl();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -616,7 +616,7 @@ impl<T: Input> Scanner<T> {
|
||||||
|
|
||||||
self.input.lookahead(4);
|
self.input.lookahead(4);
|
||||||
|
|
||||||
if is_z(self.input.peek()) {
|
if self.input.next_is_z() {
|
||||||
self.fetch_stream_end()?;
|
self.fetch_stream_end()?;
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
|
@ -629,7 +629,7 @@ impl<T: Input> Scanner<T> {
|
||||||
} else if self.input.next_is_document_end() {
|
} else if self.input.next_is_document_end() {
|
||||||
self.fetch_document_indicator(TokenType::DocumentEnd)?;
|
self.fetch_document_indicator(TokenType::DocumentEnd)?;
|
||||||
self.skip_ws_to_eol(SkipTabs::Yes)?;
|
self.skip_ws_to_eol(SkipTabs::Yes)?;
|
||||||
if !is_breakz(self.input.peek()) {
|
if !self.input.next_is_breakz() {
|
||||||
return Err(ScanError::new_str(
|
return Err(ScanError::new_str(
|
||||||
self.mark,
|
self.mark,
|
||||||
"invalid content after document end marker",
|
"invalid content after document end marker",
|
||||||
|
@ -784,7 +784,7 @@ impl<T: Input> Scanner<T> {
|
||||||
{
|
{
|
||||||
self.skip_ws_to_eol(SkipTabs::Yes)?;
|
self.skip_ws_to_eol(SkipTabs::Yes)?;
|
||||||
// If we have content on that line with a tab, return an error.
|
// If we have content on that line with a tab, return an error.
|
||||||
if !is_breakz(self.input.peek()) {
|
if !self.input.next_is_breakz() {
|
||||||
return Err(ScanError::new_str(
|
return Err(ScanError::new_str(
|
||||||
self.mark,
|
self.mark,
|
||||||
"tabs disallowed within this context (block indentation)",
|
"tabs disallowed within this context (block indentation)",
|
||||||
|
@ -800,9 +800,9 @@ impl<T: Input> Scanner<T> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
'#' => {
|
'#' => {
|
||||||
while !is_breakz(self.input.look_ch()) {
|
let comment_length = self.input.skip_while_non_breakz();
|
||||||
self.skip_non_blank();
|
self.mark.index += comment_length;
|
||||||
}
|
self.mark.col += comment_length;
|
||||||
}
|
}
|
||||||
_ => break,
|
_ => break,
|
||||||
}
|
}
|
||||||
|
@ -832,9 +832,9 @@ impl<T: Input> Scanner<T> {
|
||||||
need_whitespace = false;
|
need_whitespace = false;
|
||||||
}
|
}
|
||||||
'#' => {
|
'#' => {
|
||||||
while !is_breakz(self.input.look_ch()) {
|
let comment_length = self.input.skip_while_non_breakz();
|
||||||
self.skip_non_blank();
|
self.mark.index += comment_length;
|
||||||
}
|
self.mark.col += comment_length;
|
||||||
}
|
}
|
||||||
_ => break,
|
_ => break,
|
||||||
}
|
}
|
||||||
|
@ -912,9 +912,9 @@ impl<T: Input> Scanner<T> {
|
||||||
// XXX This should be a warning instead of an error
|
// XXX This should be a warning instead of an error
|
||||||
_ => {
|
_ => {
|
||||||
// skip current line
|
// skip current line
|
||||||
while !is_breakz(self.input.look_ch()) {
|
let line_len = self.input.skip_while_non_breakz();
|
||||||
self.skip_non_blank();
|
self.mark.index += line_len;
|
||||||
}
|
self.mark.col += line_len;
|
||||||
// XXX return an empty TagDirective token
|
// XXX return an empty TagDirective token
|
||||||
Token(
|
Token(
|
||||||
start_mark,
|
start_mark,
|
||||||
|
@ -927,7 +927,7 @@ impl<T: Input> Scanner<T> {
|
||||||
|
|
||||||
self.skip_ws_to_eol(SkipTabs::Yes)?;
|
self.skip_ws_to_eol(SkipTabs::Yes)?;
|
||||||
|
|
||||||
if is_breakz(self.input.peek()) {
|
if self.input.next_is_breakz() {
|
||||||
self.input.lookahead(2);
|
self.input.lookahead(2);
|
||||||
self.skip_linebreak();
|
self.skip_linebreak();
|
||||||
Ok(tok)
|
Ok(tok)
|
||||||
|
@ -940,9 +940,9 @@ impl<T: Input> Scanner<T> {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn scan_version_directive_value(&mut self, mark: &Marker) -> Result<Token, ScanError> {
|
fn scan_version_directive_value(&mut self, mark: &Marker) -> Result<Token, ScanError> {
|
||||||
while is_blank(self.input.look_ch()) {
|
let n_blanks = self.input.skip_while_blank();
|
||||||
self.skip_blank();
|
self.mark.index += n_blanks;
|
||||||
}
|
self.mark.col += n_blanks;
|
||||||
|
|
||||||
let major = self.scan_version_directive_number(mark)?;
|
let major = self.scan_version_directive_number(mark)?;
|
||||||
|
|
||||||
|
@ -962,10 +962,10 @@ impl<T: Input> Scanner<T> {
|
||||||
fn scan_directive_name(&mut self) -> Result<String, ScanError> {
|
fn scan_directive_name(&mut self) -> Result<String, ScanError> {
|
||||||
let start_mark = self.mark;
|
let start_mark = self.mark;
|
||||||
let mut string = String::new();
|
let mut string = String::new();
|
||||||
while is_alpha(self.input.look_ch()) {
|
|
||||||
string.push(self.input.peek());
|
let n_chars = self.input.fetch_while_is_alpha(&mut string);
|
||||||
self.skip_non_blank();
|
self.mark.index += n_chars;
|
||||||
}
|
self.mark.col += n_chars;
|
||||||
|
|
||||||
if string.is_empty() {
|
if string.is_empty() {
|
||||||
return Err(ScanError::new_str(
|
return Err(ScanError::new_str(
|
||||||
|
@ -1010,22 +1010,21 @@ impl<T: Input> Scanner<T> {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn scan_tag_directive_value(&mut self, mark: &Marker) -> Result<Token, ScanError> {
|
fn scan_tag_directive_value(&mut self, mark: &Marker) -> Result<Token, ScanError> {
|
||||||
/* Eat whitespaces. */
|
let n_blanks = self.input.skip_while_blank();
|
||||||
while is_blank(self.input.look_ch()) {
|
self.mark.index += n_blanks;
|
||||||
self.skip_blank();
|
self.mark.col += n_blanks;
|
||||||
}
|
|
||||||
let handle = self.scan_tag_handle(true, mark)?;
|
let handle = self.scan_tag_handle(true, mark)?;
|
||||||
|
|
||||||
/* Eat whitespaces. */
|
let n_blanks = self.input.skip_while_blank();
|
||||||
while is_blank(self.input.look_ch()) {
|
self.mark.index += n_blanks;
|
||||||
self.skip_blank();
|
self.mark.col += n_blanks;
|
||||||
}
|
|
||||||
|
|
||||||
let prefix = self.scan_tag_prefix(mark)?;
|
let prefix = self.scan_tag_prefix(mark)?;
|
||||||
|
|
||||||
self.input.lookahead(1);
|
self.input.lookahead(1);
|
||||||
|
|
||||||
if is_blank_or_breakz(self.input.peek()) {
|
if self.input.next_is_blank_or_breakz() {
|
||||||
Ok(Token(*mark, TokenType::TagDirective(handle, prefix)))
|
Ok(Token(*mark, TokenType::TagDirective(handle, prefix)))
|
||||||
} else {
|
} else {
|
||||||
Err(ScanError::new_str(
|
Err(ScanError::new_str(
|
||||||
|
@ -1076,7 +1075,7 @@ impl<T: Input> Scanner<T> {
|
||||||
}
|
}
|
||||||
|
|
||||||
if is_blank_or_breakz(self.input.look_ch())
|
if is_blank_or_breakz(self.input.look_ch())
|
||||||
|| (self.flow_level > 0 && is_flow(self.input.peek()))
|
|| (self.flow_level > 0 && self.input.next_is_flow())
|
||||||
{
|
{
|
||||||
// XXX: ex 7.2, an empty scalar can follow a secondary tag
|
// XXX: ex 7.2, an empty scalar can follow a secondary tag
|
||||||
Ok(Token(start_mark, TokenType::Tag(handle, suffix)))
|
Ok(Token(start_mark, TokenType::Tag(handle, suffix)))
|
||||||
|
@ -1100,10 +1099,9 @@ impl<T: Input> Scanner<T> {
|
||||||
string.push(self.input.peek());
|
string.push(self.input.peek());
|
||||||
self.skip_non_blank();
|
self.skip_non_blank();
|
||||||
|
|
||||||
while is_alpha(self.input.look_ch()) {
|
let n_chars = self.input.fetch_while_is_alpha(&mut string);
|
||||||
string.push(self.input.peek());
|
self.mark.index += n_chars;
|
||||||
self.skip_non_blank();
|
self.mark.col += n_chars;
|
||||||
}
|
|
||||||
|
|
||||||
// Check if the trailing character is '!' and copy it.
|
// Check if the trailing character is '!' and copy it.
|
||||||
if self.input.peek() == '!' {
|
if self.input.peek() == '!' {
|
||||||
|
@ -1448,7 +1446,8 @@ impl<T: Input> Scanner<T> {
|
||||||
}
|
}
|
||||||
|
|
||||||
self.skip_ws_to_eol(SkipTabs::No)?;
|
self.skip_ws_to_eol(SkipTabs::No)?;
|
||||||
if is_break(self.input.look_ch()) || is_flow(self.input.peek()) {
|
self.input.lookahead(1);
|
||||||
|
if self.input.next_is_break() || self.input.next_is_flow() {
|
||||||
self.roll_one_col_indent();
|
self.roll_one_col_indent();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1513,7 +1512,8 @@ impl<T: Input> Scanner<T> {
|
||||||
chomping = Chomping::Strip;
|
chomping = Chomping::Strip;
|
||||||
}
|
}
|
||||||
self.skip_non_blank();
|
self.skip_non_blank();
|
||||||
if is_digit(self.input.look_ch()) {
|
self.input.lookahead(1);
|
||||||
|
if self.input.next_is_digit() {
|
||||||
if self.input.peek() == '0' {
|
if self.input.peek() == '0' {
|
||||||
return Err(ScanError::new_str(
|
return Err(ScanError::new_str(
|
||||||
start_mark,
|
start_mark,
|
||||||
|
@ -1523,7 +1523,7 @@ impl<T: Input> Scanner<T> {
|
||||||
increment = (self.input.peek() as usize) - ('0' as usize);
|
increment = (self.input.peek() as usize) - ('0' as usize);
|
||||||
self.skip_non_blank();
|
self.skip_non_blank();
|
||||||
}
|
}
|
||||||
} else if is_digit(self.input.peek()) {
|
} else if self.input.next_is_digit() {
|
||||||
if self.input.peek() == '0' {
|
if self.input.peek() == '0' {
|
||||||
return Err(ScanError::new_str(
|
return Err(ScanError::new_str(
|
||||||
start_mark,
|
start_mark,
|
||||||
|
@ -1547,14 +1547,15 @@ impl<T: Input> Scanner<T> {
|
||||||
self.skip_ws_to_eol(SkipTabs::Yes)?;
|
self.skip_ws_to_eol(SkipTabs::Yes)?;
|
||||||
|
|
||||||
// Check if we are at the end of the line.
|
// Check if we are at the end of the line.
|
||||||
if !is_breakz(self.input.look_ch()) {
|
self.input.lookahead(1);
|
||||||
|
if !self.input.next_is_breakz() {
|
||||||
return Err(ScanError::new_str(
|
return Err(ScanError::new_str(
|
||||||
start_mark,
|
start_mark,
|
||||||
"while scanning a block scalar, did not find expected comment or line break",
|
"while scanning a block scalar, did not find expected comment or line break",
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
if is_break(self.input.peek()) {
|
if self.input.next_is_break() {
|
||||||
self.input.lookahead(2);
|
self.input.lookahead(2);
|
||||||
self.read_break(&mut chomping_break);
|
self.read_break(&mut chomping_break);
|
||||||
}
|
}
|
||||||
|
@ -1585,7 +1586,7 @@ impl<T: Input> Scanner<T> {
|
||||||
// ```yaml
|
// ```yaml
|
||||||
// - |+
|
// - |+
|
||||||
// ```
|
// ```
|
||||||
if is_z(self.input.peek()) {
|
if self.input.next_is_z() {
|
||||||
let contents = match chomping {
|
let contents = match chomping {
|
||||||
// We strip trailing linebreaks. Nothing remain.
|
// We strip trailing linebreaks. Nothing remain.
|
||||||
Chomping::Strip => String::new(),
|
Chomping::Strip => String::new(),
|
||||||
|
@ -1612,7 +1613,7 @@ impl<T: Input> Scanner<T> {
|
||||||
|
|
||||||
let mut line_buffer = String::with_capacity(100);
|
let mut line_buffer = String::with_capacity(100);
|
||||||
let start_mark = self.mark;
|
let start_mark = self.mark;
|
||||||
while self.mark.col == indent && !is_z(self.input.peek()) {
|
while self.mark.col == indent && !self.input.next_is_z() {
|
||||||
if indent == 0 {
|
if indent == 0 {
|
||||||
self.input.lookahead(4);
|
self.input.lookahead(4);
|
||||||
if self.input.next_is_document_end() {
|
if self.input.next_is_document_end() {
|
||||||
|
@ -1621,7 +1622,7 @@ impl<T: Input> Scanner<T> {
|
||||||
}
|
}
|
||||||
|
|
||||||
// We are at the first content character of a content line.
|
// We are at the first content character of a content line.
|
||||||
trailing_blank = is_blank(self.input.peek());
|
trailing_blank = self.input.next_is_blank();
|
||||||
if !literal && !leading_break.is_empty() && !leading_blank && !trailing_blank {
|
if !literal && !leading_break.is_empty() && !leading_blank && !trailing_blank {
|
||||||
string.push_str(&trailing_breaks);
|
string.push_str(&trailing_breaks);
|
||||||
if trailing_breaks.is_empty() {
|
if trailing_breaks.is_empty() {
|
||||||
|
@ -1635,12 +1636,12 @@ impl<T: Input> Scanner<T> {
|
||||||
leading_break.clear();
|
leading_break.clear();
|
||||||
trailing_breaks.clear();
|
trailing_breaks.clear();
|
||||||
|
|
||||||
leading_blank = is_blank(self.input.peek());
|
leading_blank = self.input.next_is_blank();
|
||||||
|
|
||||||
self.scan_block_scalar_content_line(&mut string, &mut line_buffer);
|
self.scan_block_scalar_content_line(&mut string, &mut line_buffer);
|
||||||
|
|
||||||
// break on EOF
|
// break on EOF
|
||||||
if is_z(self.input.peek()) {
|
if self.input.next_is_z() {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1657,7 +1658,7 @@ impl<T: Input> Scanner<T> {
|
||||||
// If we had reached an eof but the last character wasn't an end-of-line, check if the
|
// If we had reached an eof but the last character wasn't an end-of-line, check if the
|
||||||
// last line was indented at least as the rest of the scalar, then we need to consider
|
// last line was indented at least as the rest of the scalar, then we need to consider
|
||||||
// there is a newline.
|
// there is a newline.
|
||||||
if is_z(self.input.peek()) && self.mark.col >= indent.max(1) {
|
if self.input.next_is_z() && self.mark.col >= indent.max(1) {
|
||||||
string.push('\n');
|
string.push('\n');
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1680,7 +1681,7 @@ impl<T: Input> Scanner<T> {
|
||||||
/// line. This function does not consume the line break character(s) after the line.
|
/// line. This function does not consume the line break character(s) after the line.
|
||||||
fn scan_block_scalar_content_line(&mut self, string: &mut String, line_buffer: &mut String) {
|
fn scan_block_scalar_content_line(&mut self, string: &mut String, line_buffer: &mut String) {
|
||||||
// Start by evaluating characters in the buffer.
|
// Start by evaluating characters in the buffer.
|
||||||
while !self.input.buf_is_empty() && !is_breakz(self.input.peek()) {
|
while !self.input.buf_is_empty() && !self.input.next_is_breakz() {
|
||||||
string.push(self.input.peek());
|
string.push(self.input.peek());
|
||||||
// We may technically skip non-blank characters. However, the only distinction is
|
// We may technically skip non-blank characters. However, the only distinction is
|
||||||
// to determine what is leading whitespace and what is not. Here, we read the
|
// to determine what is leading whitespace and what is not. Here, we read the
|
||||||
|
@ -1752,7 +1753,7 @@ impl<T: Input> Scanner<T> {
|
||||||
}
|
}
|
||||||
|
|
||||||
// If our current line is empty, skip over the break and continue looping.
|
// If our current line is empty, skip over the break and continue looping.
|
||||||
if is_break(self.input.peek()) {
|
if self.input.next_is_break() {
|
||||||
self.read_break(breaks);
|
self.read_break(breaks);
|
||||||
} else {
|
} else {
|
||||||
// Otherwise, we have a content line. Return control.
|
// Otherwise, we have a content line. Return control.
|
||||||
|
@ -1777,7 +1778,7 @@ impl<T: Input> Scanner<T> {
|
||||||
max_indent = self.mark.col;
|
max_indent = self.mark.col;
|
||||||
}
|
}
|
||||||
|
|
||||||
if is_break(self.input.peek()) {
|
if self.input.next_is_break() {
|
||||||
// If our current line is empty, skip over the break and continue looping.
|
// If our current line is empty, skip over the break and continue looping.
|
||||||
self.input.lookahead(2);
|
self.input.lookahead(2);
|
||||||
self.read_break(breaks);
|
self.read_break(breaks);
|
||||||
|
@ -1840,7 +1841,7 @@ impl<T: Input> Scanner<T> {
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
if is_z(self.input.peek()) {
|
if self.input.next_is_z() {
|
||||||
return Err(ScanError::new_str(
|
return Err(ScanError::new_str(
|
||||||
start_mark,
|
start_mark,
|
||||||
"while scanning a quoted scalar, found unexpected end of stream",
|
"while scanning a quoted scalar, found unexpected end of stream",
|
||||||
|
@ -1869,8 +1870,8 @@ impl<T: Input> Scanner<T> {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Consume blank characters.
|
// Consume blank characters.
|
||||||
while is_blank(self.input.peek()) || is_break(self.input.peek()) {
|
while self.input.next_is_blank() || self.input.next_is_break() {
|
||||||
if is_blank(self.input.peek()) {
|
if self.input.next_is_blank() {
|
||||||
// Consume a space or a tab character.
|
// Consume a space or a tab character.
|
||||||
if leading_blanks {
|
if leading_blanks {
|
||||||
if self.input.peek() == '\t' && (self.mark.col as isize) < self.indent {
|
if self.input.peek() == '\t' && (self.mark.col as isize) < self.indent {
|
||||||
|
@ -2118,7 +2119,7 @@ impl<T: Input> Scanner<T> {
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
if !is_blank_or_breakz(self.input.peek())
|
if !self.input.next_is_blank_or_breakz()
|
||||||
&& self.input.next_can_be_plain_scalar(self.flow_level > 0)
|
&& self.input.next_can_be_plain_scalar(self.flow_level > 0)
|
||||||
{
|
{
|
||||||
if self.leading_whitespace {
|
if self.leading_whitespace {
|
||||||
|
@ -2155,7 +2156,7 @@ impl<T: Input> Scanner<T> {
|
||||||
// hence the `for` loop looping `self.input.bufmaxlen() - 1` times.
|
// hence the `for` loop looping `self.input.bufmaxlen() - 1` times.
|
||||||
self.input.lookahead(self.input.bufmaxlen());
|
self.input.lookahead(self.input.bufmaxlen());
|
||||||
for _ in 0..self.input.bufmaxlen() - 1 {
|
for _ in 0..self.input.bufmaxlen() - 1 {
|
||||||
if is_blank_or_breakz(self.input.peek())
|
if self.input.next_is_blank_or_breakz()
|
||||||
|| !self.input.next_can_be_plain_scalar(self.flow_level > 0)
|
|| !self.input.next_can_be_plain_scalar(self.flow_level > 0)
|
||||||
{
|
{
|
||||||
end = true;
|
end = true;
|
||||||
|
@ -2172,13 +2173,14 @@ impl<T: Input> Scanner<T> {
|
||||||
// - We reach eof
|
// - We reach eof
|
||||||
// - We reach ": "
|
// - We reach ": "
|
||||||
// - We find a flow character in a flow context
|
// - We find a flow character in a flow context
|
||||||
if !(is_blank(self.input.peek()) || is_break(self.input.peek())) {
|
if !(self.input.next_is_blank() || self.input.next_is_break()) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Process blank characters.
|
// Process blank characters.
|
||||||
while is_blank(self.input.look_ch()) || is_break(self.input.peek()) {
|
self.input.lookahead(1);
|
||||||
if is_blank(self.input.peek()) {
|
while self.input.next_is_blank_or_break() {
|
||||||
|
if self.input.next_is_blank() {
|
||||||
if !self.leading_whitespace {
|
if !self.leading_whitespace {
|
||||||
whitespaces.push(self.input.peek());
|
whitespaces.push(self.input.peek());
|
||||||
self.skip_blank();
|
self.skip_blank();
|
||||||
|
@ -2186,7 +2188,7 @@ impl<T: Input> Scanner<T> {
|
||||||
// Tabs in an indentation columns are allowed if and only if the line is
|
// Tabs in an indentation columns are allowed if and only if the line is
|
||||||
// empty. Skip to the end of the line.
|
// empty. Skip to the end of the line.
|
||||||
self.skip_ws_to_eol(SkipTabs::Yes)?;
|
self.skip_ws_to_eol(SkipTabs::Yes)?;
|
||||||
if !is_breakz(self.input.peek()) {
|
if !self.input.next_is_breakz() {
|
||||||
return Err(ScanError::new_str(
|
return Err(ScanError::new_str(
|
||||||
start_mark,
|
start_mark,
|
||||||
"while scanning a plain scalar, found a tab",
|
"while scanning a plain scalar, found a tab",
|
||||||
|
@ -2196,7 +2198,6 @@ impl<T: Input> Scanner<T> {
|
||||||
self.skip_blank();
|
self.skip_blank();
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
self.input.lookahead(2);
|
|
||||||
// Check if it is a first line break
|
// Check if it is a first line break
|
||||||
if self.leading_whitespace {
|
if self.leading_whitespace {
|
||||||
self.read_break(&mut trailing_breaks);
|
self.read_break(&mut trailing_breaks);
|
||||||
|
@ -2206,6 +2207,7 @@ impl<T: Input> Scanner<T> {
|
||||||
self.leading_whitespace = true;
|
self.leading_whitespace = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
self.input.lookahead(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
// check indentation level
|
// check indentation level
|
||||||
|
@ -2309,7 +2311,7 @@ impl<T: Input> Scanner<T> {
|
||||||
self.skip_non_blank();
|
self.skip_non_blank();
|
||||||
if self.input.look_ch() == '\t'
|
if self.input.look_ch() == '\t'
|
||||||
&& !self.skip_ws_to_eol(SkipTabs::Yes)?.has_valid_yaml_ws()
|
&& !self.skip_ws_to_eol(SkipTabs::Yes)?.has_valid_yaml_ws()
|
||||||
&& (self.input.peek() == '-' || is_alpha(self.input.peek()))
|
&& (self.input.peek() == '-' || self.input.next_is_alpha())
|
||||||
{
|
{
|
||||||
return Err(ScanError::new_str(
|
return Err(ScanError::new_str(
|
||||||
self.mark,
|
self.mark,
|
||||||
|
|
Loading…
Reference in a new issue