From a3591b3fefe266d24c7b20908efeb533be627ab4 Mon Sep 17 00:00:00 2001 From: Robin Stocker Date: Wed, 20 Feb 2019 18:23:31 +1100 Subject: [PATCH] Fix handling of indicators in plain scalars to conform to YAML 1.2 YAML 1.2 has special handling of indicators to be compatible with JSON. The following is equivalent to `{"a": "b"}` (note, no space after `:`): {"a":b} But without the quoted key, a space is required. So the `:` here is part of the plain scalar: {a:b} # == {"a:b"} A plain scalar can also start with a `:` as long as it's followed by "safe" characters: {a: :b} # == {"a": ":b"} (Fixes #118) --- parser/src/scanner.rs | 103 +++++++++++++++++++++++++++++++++++++----- parser/src/yaml.rs | 2 +- 2 files changed, 93 insertions(+), 12 deletions(-) diff --git a/parser/src/scanner.rs b/parser/src/scanner.rs index 4eb7912..6f4fa58 100644 --- a/parser/src/scanner.rs +++ b/parser/src/scanner.rs @@ -146,6 +146,7 @@ pub struct Scanner { stream_start_produced: bool, stream_end_produced: bool, + adjacent_value_allowed_at: usize, simple_key_allowed: bool, simple_keys: Vec, indent: isize, @@ -216,6 +217,13 @@ fn as_hex(c: char) -> u32 { _ => unreachable!(), } } +#[inline] +fn is_flow(c: char) -> bool { + match c { + ',' | '[' | ']' | '{' | '}' => true, + _ => false, + } +} pub type ScanResult = Result<(), ScanError>; @@ -231,6 +239,7 @@ impl> Scanner { stream_start_produced: false, stream_end_produced: false, + adjacent_value_allowed_at: 0, simple_key_allowed: true, simple_keys: Vec::new(), indent: -1, @@ -387,8 +396,13 @@ impl> Scanner { '}' => self.fetch_flow_collection_end(TokenType::FlowMappingEnd), ',' => self.fetch_flow_entry(), '-' if is_blankz(nc) => self.fetch_block_entry(), - '?' if self.flow_level > 0 || is_blankz(nc) => self.fetch_key(), - ':' if self.flow_level > 0 || is_blankz(nc) => self.fetch_value(), + '?' if is_blankz(nc) => self.fetch_key(), + ':' if is_blankz(nc) + || (self.flow_level > 0 + && (is_flow(nc) || self.mark.index == self.adjacent_value_allowed_at)) => + { + self.fetch_value() + } // Is it an alias? '*' => self.fetch_anchor(true), // Is it an anchor? @@ -1258,6 +1272,10 @@ impl> Scanner { let tok = self.scan_flow_scalar(single)?; + // From spec: To ensure JSON compatibility, if a key inside a flow mapping is JSON-like, + // YAML allows the following value to be specified adjacent to the “:”. + self.adjacent_value_allowed_at = self.mark.index; + self.tokens.push_back(tok); Ok(()) } @@ -1498,16 +1516,14 @@ impl> Scanner { break; } while !is_blankz(self.ch()) { - if self.flow_level > 0 && self.ch() == ':' && is_blankz(self.ch()) { - return Err(ScanError::new( - start_mark, - "while scanning a plain scalar, found unexpected ':'", - )); - } - // indicators ends a plain scalar + // indicators can end a plain scalar, see 7.3.3. Plain Style match self.ch() { - ':' if is_blankz(self.buffer[1]) => break, - ',' | ':' | '?' | '[' | ']' | '{' | '}' if self.flow_level > 0 => break, + ':' if is_blankz(self.buffer[1]) + || (self.flow_level > 0 && is_flow(self.buffer[1])) => + { + break; + } + ',' | '[' | ']' | '{' | '}' if self.flow_level > 0 => break, _ => {} } @@ -2073,6 +2089,71 @@ key: end!(p); } + #[test] + fn test_plain_scalar_starting_with_indicators_in_flow() { + // "Plain scalars must not begin with most indicators, as this would cause ambiguity with + // other YAML constructs. However, the “:”, “?” and “-” indicators may be used as the first + // character if followed by a non-space “safe” character, as this causes no ambiguity." + + let s = "{a: :b}"; + let mut p = Scanner::new(s.chars()); + next!(p, StreamStart(..)); + next!(p, FlowMappingStart); + next!(p, Key); + next_scalar!(p, TScalarStyle::Plain, "a"); + next!(p, Value); + next_scalar!(p, TScalarStyle::Plain, ":b"); + next!(p, FlowMappingEnd); + next!(p, StreamEnd); + end!(p); + + let s = "{a: ?b}"; + let mut p = Scanner::new(s.chars()); + next!(p, StreamStart(..)); + next!(p, FlowMappingStart); + next!(p, Key); + next_scalar!(p, TScalarStyle::Plain, "a"); + next!(p, Value); + next_scalar!(p, TScalarStyle::Plain, "?b"); + next!(p, FlowMappingEnd); + next!(p, StreamEnd); + end!(p); + } + + #[test] + fn test_plain_scalar_starting_with_indicators_in_block() { + let s = ":a"; + let mut p = Scanner::new(s.chars()); + next!(p, StreamStart(..)); + next_scalar!(p, TScalarStyle::Plain, ":a"); + next!(p, StreamEnd); + end!(p); + + let s = "?a"; + let mut p = Scanner::new(s.chars()); + next!(p, StreamStart(..)); + next_scalar!(p, TScalarStyle::Plain, "?a"); + next!(p, StreamEnd); + end!(p); + } + + #[test] + fn test_plain_scalar_containing_indicators_in_block() { + let s = "a:,b"; + let mut p = Scanner::new(s.chars()); + next!(p, StreamStart(..)); + next_scalar!(p, TScalarStyle::Plain, "a:,b"); + next!(p, StreamEnd); + end!(p); + + let s = ":,b"; + let mut p = Scanner::new(s.chars()); + next!(p, StreamStart(..)); + next_scalar!(p, TScalarStyle::Plain, ":,b"); + next!(p, StreamEnd); + end!(p); + } + #[test] fn test_scanner_cr() { let s = "---\r\n- tok1\r\n- tok2"; diff --git a/parser/src/yaml.rs b/parser/src/yaml.rs index 6ccbec3..fe112cc 100644 --- a/parser/src/yaml.rs +++ b/parser/src/yaml.rs @@ -40,7 +40,7 @@ pub enum Yaml { Array(self::Array), /// YAML hash, can be accessed as a `LinkedHashMap`. /// - /// Itertion order will match the order of insertion into the map. + /// Insertion order will match the order of insertion into the map. Hash(self::Hash), /// Alias, not fully supported yet. Alias(usize),