Fix anchor names' character set.

This commit is contained in:
Ethiraric 2023-12-28 01:48:19 +01:00
parent 6308bbe98f
commit e9bcc8a28b
2 changed files with 36 additions and 13 deletions

View file

@ -351,6 +351,31 @@ fn is_flow(c: char) -> bool {
matches!(c, ',' | '[' | ']' | '{' | '}') matches!(c, ',' | '[' | ']' | '{' | '}')
} }
/// Check whether the character is the BOM character.
#[inline]
fn is_bom(c: char) -> bool {
c == '\u{FEFF}'
}
/// Check whether the character is a YAML non-breaking character.
#[inline]
fn is_yaml_non_break(c: char) -> bool {
// TODO(ethiraric, 28/12/2023): is_printable
!is_break(c) && !is_bom(c)
}
/// Check whether the character is NOT a YAML whitespace (` ` / `\t`).
#[inline]
fn is_yaml_non_space(c: char) -> bool {
is_yaml_non_break(c) && !is_blank(c)
}
/// Check whether the character is a valid YAML anchor name character.
#[inline]
fn is_anchor_char(c: char) -> bool {
is_yaml_non_space(c) && !is_flow(c) && !is_z(c)
}
pub type ScanResult = Result<(), ScanError>; pub type ScanResult = Result<(), ScanError>;
impl<T: Iterator<Item = char>> Scanner<T> { impl<T: Iterator<Item = char>> Scanner<T> {
@ -1193,20 +1218,12 @@ impl<T: Iterator<Item = char>> Scanner<T> {
let start_mark = self.mark; let start_mark = self.mark;
self.skip(); self.skip();
self.lookahead(1); while is_anchor_char(self.look_ch()) {
while is_alpha(self.ch()) || self.ch_is(':') {
string.push(self.ch()); string.push(self.ch());
self.skip(); self.skip();
self.lookahead(1);
} }
if string.is_empty() if string.is_empty() {
|| match self.ch() {
c if is_blankz(c) => false,
'?' | ',' | ']' | '}' | '%' | '@' | '`' => false,
_ => true,
}
{
return Err(ScanError::new(start_mark, "while scanning an anchor or alias, did not find expected alphabetic or numeric character")); return Err(ScanError::new(start_mark, "while scanning an anchor or alias, did not find expected alphabetic or numeric character"));
} }
@ -2174,3 +2191,12 @@ impl SkipTabs {
matches!(self, SkipTabs::Result(_, true)) matches!(self, SkipTabs::Result(_, true))
} }
} }
#[cfg(test)]
mod test {
#[test]
fn test_is_anchor_char() {
use super::is_anchor_char;
assert!(is_anchor_char('x'));
}
}

View file

@ -297,9 +297,6 @@ fn expected_events(expected_tree: &str) -> Vec<String> {
#[rustfmt::skip] #[rustfmt::skip]
static EXPECTED_FAILURES: &[&str] = &[ static EXPECTED_FAILURES: &[&str] = &[
// Unusual characters in anchors/aliases
"8XYN", // emoji!!
"W5VH", // :@*!$"<foo>:
// Flow mapping colon on next line / multiline key in flow mapping // Flow mapping colon on next line / multiline key in flow mapping
"4MUZ-00", "4MUZ-00",
"4MUZ-01", "4MUZ-01",