From 23f3a512f0dd413cda70a2ca8056846e293adcb3 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Thu, 28 Dec 2023 01:48:19 +0100 Subject: [PATCH] Fix anchor names' character set. --- parser/src/scanner.rs | 46 ++++++++++++++++++++++++++------- parser/tests/yaml-test-suite.rs | 3 --- 2 files changed, 36 insertions(+), 13 deletions(-) diff --git a/parser/src/scanner.rs b/parser/src/scanner.rs index 75cc1ab..ce3ed86 100644 --- a/parser/src/scanner.rs +++ b/parser/src/scanner.rs @@ -351,6 +351,31 @@ fn is_flow(c: char) -> bool { matches!(c, ',' | '[' | ']' | '{' | '}') } +/// Check whether the character is the BOM character. +#[inline] +fn is_bom(c: char) -> bool { + c == '\u{FEFF}' +} + +/// Check whether the character is a YAML non-breaking character. +#[inline] +fn is_yaml_non_break(c: char) -> bool { + // TODO(ethiraric, 28/12/2023): is_printable + !is_break(c) && !is_bom(c) +} + +/// Check whether the character is NOT a YAML whitespace (` ` / `\t`). +#[inline] +fn is_yaml_non_space(c: char) -> bool { + is_yaml_non_break(c) && !is_blank(c) +} + +/// Check whether the character is a valid YAML anchor name character. +#[inline] +fn is_anchor_char(c: char) -> bool { + is_yaml_non_space(c) && !is_flow(c) && !is_z(c) +} + pub type ScanResult = Result<(), ScanError>; impl> Scanner { @@ -1193,20 +1218,12 @@ impl> Scanner { let start_mark = self.mark; self.skip(); - self.lookahead(1); - while is_alpha(self.ch()) || self.ch_is(':') { + while is_anchor_char(self.look_ch()) { string.push(self.ch()); self.skip(); - self.lookahead(1); } - if string.is_empty() - || match self.ch() { - c if is_blankz(c) => false, - '?' | ',' | ']' | '}' | '%' | '@' | '`' => false, - _ => true, - } - { + if string.is_empty() { return Err(ScanError::new(start_mark, "while scanning an anchor or alias, did not find expected alphabetic or numeric character")); } @@ -2174,3 +2191,12 @@ impl SkipTabs { matches!(self, SkipTabs::Result(_, true)) } } + +#[cfg(test)] +mod test { + #[test] + fn test_is_anchor_char() { + use super::is_anchor_char; + assert!(is_anchor_char('x')); + } +} diff --git a/parser/tests/yaml-test-suite.rs b/parser/tests/yaml-test-suite.rs index 1778e6f..2f11c2a 100644 --- a/parser/tests/yaml-test-suite.rs +++ b/parser/tests/yaml-test-suite.rs @@ -297,9 +297,6 @@ fn expected_events(expected_tree: &str) -> Vec { #[rustfmt::skip] static EXPECTED_FAILURES: &[&str] = &[ - // Unusual characters in anchors/aliases - "8XYN", // emoji!! - "W5VH", // :@*!$": // Flow mapping colon on next line / multiline key in flow mapping "4MUZ-00", "4MUZ-01",