From 84bfd0d3deb215305ebf5c8e713d651e24d7b6f7 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Tue, 26 Dec 2023 00:34:29 +0100 Subject: [PATCH] More fixes towards invalid tabs. --- parser/src/scanner.rs | 35 +++++++++++++++++++++++++++++---- parser/tests/yaml-test-suite.rs | 1 - 2 files changed, 31 insertions(+), 5 deletions(-) diff --git a/parser/src/scanner.rs b/parser/src/scanner.rs index 03e2a1a..423a930 100644 --- a/parser/src/scanner.rs +++ b/parser/src/scanner.rs @@ -718,9 +718,13 @@ impl> Scanner { /// Skip yaml whitespace at most up to eol. Also skips comments. fn skip_ws_to_eol(&mut self, skip_tabs: SkipTabs) -> SkipTabs { let mut encountered_tab = false; + let mut has_yaml_ws = false; loop { match self.look_ch() { - ' ' => self.skip(), + ' ' => { + has_yaml_ws = true; + self.skip(); + } '\t' if skip_tabs != SkipTabs::No => { encountered_tab = true; self.skip(); @@ -735,7 +739,7 @@ impl> Scanner { } } - SkipTabs::Result(encountered_tab) + SkipTabs::Result(encountered_tab, has_yaml_ws) } fn fetch_stream_start(&mut self) { @@ -1906,6 +1910,19 @@ impl> Scanner { fn fetch_value(&mut self) -> ScanResult { let sk = self.simple_keys.last().unwrap().clone(); let start_mark = self.mark; + + // Skip over ':'. + self.skip(); + if self.look_ch() == '\t' + && !self.skip_ws_to_eol(SkipTabs::Yes).has_valid_yaml_ws() + && self.ch() == '-' + { + return Err(ScanError::new( + self.mark, + "':' must be followed by a valid YAML whitespace", + )); + } + if sk.possible { // insert simple key let tok = Token(sk.mark, TokenType::Key); @@ -1946,7 +1963,6 @@ impl> Scanner { self.disallow_simple_key(); } } - self.skip(); self.tokens.push_back(Token(start_mark, TokenType::Value)); Ok(()) @@ -2041,6 +2057,8 @@ impl> Scanner { } /// Behavior to adopt regarding treating tabs as whitespace. +/// +/// Although tab is a valid yaml whitespace, it doesn't always behave the same as a space. #[derive(Copy, Clone, Eq, PartialEq)] enum SkipTabs { /// Skip all tabs as whitespace. @@ -2051,6 +2069,8 @@ enum SkipTabs { Result( /// Whether tabs were encountered. bool, + /// Whether at least 1 valid yaml whitespace has been encountered. + bool, ), } @@ -2059,6 +2079,13 @@ impl SkipTabs { /// /// This function must be called after a call to `skip_ws_to_eol`. fn found_tabs(self) -> bool { - matches!(self, SkipTabs::Result(true)) + matches!(self, SkipTabs::Result(true, _)) + } + + /// Whether a valid YAML whitespace has been found in skipped-over content. + /// + /// This function must be called after a call to `skip_ws_to_eol`. + fn has_valid_yaml_ws(self) -> bool { + matches!(self, SkipTabs::Result(_, true)) } } diff --git a/parser/tests/yaml-test-suite.rs b/parser/tests/yaml-test-suite.rs index ba9abdb..1ef3b48 100644 --- a/parser/tests/yaml-test-suite.rs +++ b/parser/tests/yaml-test-suite.rs @@ -301,7 +301,6 @@ static EXPECTED_FAILURES: &[&str] = &[ // TAB as start of plain scalar instead of whitespace // TABs in whitespace-only lines // TABs after marker ? or : (space required?) - "Y79Y-07", "Y79Y-09", // Other TABs "DK95-01", // in double-quoted scalar