Minor improvements.

* Doc comments
* Helper functions
* Line breaks for readability
This commit is contained in:
Ethiraric 2023-11-18 20:29:40 +01:00
parent c458fd1248
commit e4c4182020
2 changed files with 50 additions and 22 deletions

View file

@ -264,11 +264,15 @@ impl<T: Iterator<Item = char>> Scanner<T> {
token_available: false, token_available: false,
} }
} }
#[inline] #[inline]
pub fn get_error(&self) -> Option<ScanError> { pub fn get_error(&self) -> Option<ScanError> {
self.error.as_ref().map(std::clone::Clone::clone) self.error.as_ref().map(std::clone::Clone::clone)
} }
/// Fill `self.buffer` with at least `count` characters.
///
/// The characters that are extracted this way are not consumed but only placed in the buffer.
#[inline] #[inline]
fn lookahead(&mut self, count: usize) { fn lookahead(&mut self, count: usize) {
if self.buffer.len() >= count { if self.buffer.len() >= count {
@ -278,6 +282,7 @@ impl<T: Iterator<Item = char>> Scanner<T> {
self.buffer.push_back(self.rdr.next().unwrap_or('\0')); self.buffer.push_back(self.rdr.next().unwrap_or('\0'));
} }
} }
#[inline] #[inline]
fn skip(&mut self) { fn skip(&mut self) {
let c = self.buffer.pop_front().unwrap(); let c = self.buffer.pop_front().unwrap();
@ -290,6 +295,7 @@ impl<T: Iterator<Item = char>> Scanner<T> {
self.mark.col += 1; self.mark.col += 1;
} }
} }
#[inline] #[inline]
fn skip_line(&mut self) { fn skip_line(&mut self) {
if self.buffer[0] == '\r' && self.buffer[1] == '\n' { if self.buffer[0] == '\r' && self.buffer[1] == '\n' {
@ -299,31 +305,62 @@ impl<T: Iterator<Item = char>> Scanner<T> {
self.skip(); self.skip();
} }
} }
/// Return the next character in the buffer.
///
/// The character is not consumed.
#[inline] #[inline]
fn ch(&self) -> char { fn ch(&self) -> char {
self.buffer[0] self.buffer[0]
} }
/// Look for the next character and return it.
///
/// The character is not consumed.
/// Equivalent to calling [`Self::lookahead`] and [`Self::ch`].
#[inline]
fn look_ch(&mut self) -> char {
self.lookahead(1);
self.ch()
}
/// Consume and return the next character.
///
/// Equivalent to calling [`Self::ch`] and [`Self::skip`].
#[inline]
fn ch_skip(&mut self) -> char {
let ret = self.ch();
self.skip();
ret
}
/// Return whether the next character is `c`.
#[inline] #[inline]
fn ch_is(&self, c: char) -> bool { fn ch_is(&self, c: char) -> bool {
self.buffer[0] == c self.buffer[0] == c
} }
#[allow(dead_code)] #[allow(dead_code)]
#[inline] #[inline]
fn eof(&self) -> bool { fn eof(&self) -> bool {
self.ch_is('\0') self.ch_is('\0')
} }
#[inline] #[inline]
pub fn stream_started(&self) -> bool { pub fn stream_started(&self) -> bool {
self.stream_start_produced self.stream_start_produced
} }
#[inline] #[inline]
pub fn stream_ended(&self) -> bool { pub fn stream_ended(&self) -> bool {
self.stream_end_produced self.stream_end_produced
} }
#[inline] #[inline]
pub fn mark(&self) -> Marker { pub fn mark(&self) -> Marker {
self.mark self.mark
} }
#[inline] #[inline]
fn read_break(&mut self, s: &mut String) { fn read_break(&mut self, s: &mut String) {
if self.buffer[0] == '\r' && self.buffer[1] == '\n' { if self.buffer[0] == '\r' && self.buffer[1] == '\n' {
@ -337,6 +374,7 @@ impl<T: Iterator<Item = char>> Scanner<T> {
unreachable!(); unreachable!();
} }
} }
fn insert_token(&mut self, pos: usize, tok: Token) { fn insert_token(&mut self, pos: usize, tok: Token) {
let old_len = self.tokens.len(); let old_len = self.tokens.len();
assert!(pos <= old_len); assert!(pos <= old_len);
@ -345,9 +383,11 @@ impl<T: Iterator<Item = char>> Scanner<T> {
self.tokens.swap(old_len - i, old_len - i - 1); self.tokens.swap(old_len - i, old_len - i - 1);
} }
} }
fn allow_simple_key(&mut self) { fn allow_simple_key(&mut self) {
self.simple_key_allowed = true; self.simple_key_allowed = true;
} }
fn disallow_simple_key(&mut self) { fn disallow_simple_key(&mut self) {
self.simple_key_allowed = false; self.simple_key_allowed = false;
} }
@ -736,7 +776,6 @@ impl<T: Iterator<Item = char>> Scanner<T> {
let start_mark = self.mark; let start_mark = self.mark;
let mut handle = String::new(); let mut handle = String::new();
let mut suffix; let mut suffix;
let mut secondary = false;
// Check if the tag is in the canonical form (verbatim). // Check if the tag is in the canonical form (verbatim).
self.lookahead(2); self.lookahead(2);
@ -760,10 +799,9 @@ impl<T: Iterator<Item = char>> Scanner<T> {
handle = self.scan_tag_handle(false, &start_mark)?; handle = self.scan_tag_handle(false, &start_mark)?;
// Check if it is, indeed, handle. // Check if it is, indeed, handle.
if handle.len() >= 2 && handle.starts_with('!') && handle.ends_with('!') { if handle.len() >= 2 && handle.starts_with('!') && handle.ends_with('!') {
if handle == "!!" { // A tag handle starting with "!!" is a secondary tag handle.
secondary = true; let is_secondary_handle = handle == "!!";
} suffix = self.scan_tag_uri(false, is_secondary_handle, "", &start_mark)?;
suffix = self.scan_tag_uri(false, secondary, "", &start_mark)?;
} else { } else {
suffix = self.scan_tag_uri(false, false, &handle, &start_mark)?; suffix = self.scan_tag_uri(false, false, &handle, &start_mark)?;
handle = "!".to_owned(); handle = "!".to_owned();
@ -776,8 +814,7 @@ impl<T: Iterator<Item = char>> Scanner<T> {
} }
} }
self.lookahead(1); if is_blankz(self.look_ch()) {
if is_blankz(self.ch()) {
// XXX: ex 7.2, an empty scalar can follow a secondary tag // XXX: ex 7.2, an empty scalar can follow a secondary tag
Ok(Token(start_mark, TokenType::Tag(handle, suffix))) Ok(Token(start_mark, TokenType::Tag(handle, suffix)))
} else { } else {
@ -790,28 +827,22 @@ impl<T: Iterator<Item = char>> Scanner<T> {
fn scan_tag_handle(&mut self, directive: bool, mark: &Marker) -> Result<String, ScanError> { fn scan_tag_handle(&mut self, directive: bool, mark: &Marker) -> Result<String, ScanError> {
let mut string = String::new(); let mut string = String::new();
self.lookahead(1); if self.look_ch() != '!' {
if self.ch() != '!' {
return Err(ScanError::new( return Err(ScanError::new(
*mark, *mark,
"while scanning a tag, did not find expected '!'", "while scanning a tag, did not find expected '!'",
)); ));
} }
string.push(self.ch()); string.push(self.ch_skip());
self.skip();
self.lookahead(1); while is_alpha(self.look_ch()) {
while is_alpha(self.ch()) { string.push(self.ch_skip());
string.push(self.ch());
self.skip();
self.lookahead(1);
} }
// Check if the trailing character is '!' and copy it. // Check if the trailing character is '!' and copy it.
if self.ch() == '!' { if self.ch() == '!' {
string.push(self.ch()); string.push(self.ch_skip());
self.skip();
} else if directive && string != "!" { } else if directive && string != "!" {
// It's either the '!' tag or not really a tag handle. If it's a %TAG // It's either the '!' tag or not really a tag handle. If it's a %TAG
// directive, it's an error. If it's a tag token, it must be a part of // directive, it's an error. If it's a tag token, it must be a part of
@ -840,7 +871,6 @@ impl<T: Iterator<Item = char>> Scanner<T> {
string.extend(head.chars().skip(1)); string.extend(head.chars().skip(1));
} }
self.lookahead(1);
/* /*
* The set of characters that may appear in URI is as follows: * The set of characters that may appear in URI is as follows:
* *
@ -848,7 +878,7 @@ impl<T: Iterator<Item = char>> Scanner<T> {
* '=', '+', '$', ',', '.', '!', '~', '*', '\'', '(', ')', '[', ']', * '=', '+', '$', ',', '.', '!', '~', '*', '\'', '(', ')', '[', ']',
* '%'. * '%'.
*/ */
while match self.ch() { while match self.look_ch() {
';' | '/' | '?' | ':' | '@' | '&' => true, ';' | '/' | '?' | ':' | '@' | '&' => true,
'=' | '+' | '$' | ',' | '.' | '!' | '~' | '*' | '\'' | '(' | ')' | '[' | ']' => true, '=' | '+' | '$' | ',' | '.' | '!' | '~' | '*' | '\'' | '(' | ')' | '[' | ']' => true,
'%' => true, '%' => true,
@ -864,7 +894,6 @@ impl<T: Iterator<Item = char>> Scanner<T> {
} }
length += 1; length += 1;
self.lookahead(1);
} }
if length == 0 { if length == 0 {

View file

@ -299,7 +299,6 @@ fn expected_events(expected_tree: &str) -> Vec<String> {
static EXPECTED_FAILURES: &[&str] = &[ static EXPECTED_FAILURES: &[&str] = &[
// These seem to be API limited (not enough information on the event stream level) // These seem to be API limited (not enough information on the event stream level)
// No tag available for SEQ and MAP // No tag available for SEQ and MAP
"2XXW",
"35KP", "35KP",
"57H4", "57H4",
"6JWB", "6JWB",