Add support for nested implicit flow mappings.

These are corner cases not tested by the `yaml-test-suite`.
Parsing for the reported input has been fixed, as well as other
similar-looking inputs which make use of nested implicit flow mappings
and implicit null keys and values.

Fixes #1.
This commit is contained in:
Ethiraric 2024-07-02 01:51:07 +02:00
parent 60f8919565
commit 750c992121
2 changed files with 277 additions and 17 deletions

View file

@ -309,6 +309,40 @@ struct Indent {
needs_block_end: bool,
}
/// The knowledge we have about an implicit mapping.
///
/// Implicit mappings occur in flow sequences where the opening `{` for a mapping in a flow
/// sequence is omitted:
/// ```yaml
/// [ a: b, c: d ]
/// # Equivalent to
/// [ { a: b }, { c: d } ]
/// # Equivalent to
/// - a: b
/// - c: d
/// ```
///
/// The state must be carefully tracked for each nested flow sequence since we must emit a
/// [`FlowMappingStart`] event when encountering `a` and `c` in our previous example without a
/// character hinting us. Similarly, we must emit a [`FlowMappingEnd`] event when we reach the `,`
/// or the `]`. If the state is not properly tracked, we may omit to emit these events or emit them
/// out-of-order.
///
/// [`FlowMappingStart`]: TokenType::FlowMappingStart
/// [`FlowMappingEnd`]: TokenType::FlowMappingEnd
#[derive(Debug, PartialEq)]
enum ImplicitMappingState {
/// It is possible there is an implicit mapping.
///
/// This state is the one when we have just encountered the opening `[`. We need more context
/// to know whether an implicit mapping follows.
Possible,
/// We are inside the implcit mapping.
///
/// Note that this state is not set immediately (we need to have encountered the `:` to know).
Inside,
}
/// The size of the [`Scanner`] buffer.
///
/// The buffer is statically allocated to avoid conditions for reallocations each time we
@ -386,8 +420,19 @@ pub struct Scanner<T> {
/// [ : foo ] # { null: "foo" }
/// ```
flow_mapping_started: bool,
/// Whether we currently are in an implicit flow mapping.
implicit_flow_mapping: bool,
/// An array of states, representing whether flow sequences have implicit mappings.
///
/// When a flow mapping is possible (when encountering the first `[` or a `,` in a sequence),
/// the state is set to [`Possible`].
/// When we encounter the `:`, we know we are in an implicit mapping and can set the state to
/// [`Inside`].
///
/// There is one entry in this [`Vec`] for each nested flow sequence that we are in.
/// The entries are created with the opening `]` and popped with the closing `]`.
///
/// [`Possible`]: ImplicitMappingState::Possible
/// [`Inside`]: ImplicitMappingState::Inside
implicit_flow_mapping_states: Vec<ImplicitMappingState>,
}
impl<T: Iterator<Item = char>> Iterator for Scanner<T> {
@ -439,7 +484,7 @@ impl<T: Iterator<Item = char>> Scanner<T> {
token_available: false,
leading_whitespace: true,
flow_mapping_started: false,
implicit_flow_mapping: false,
implicit_flow_mapping_states: vec![],
}
}
@ -705,11 +750,11 @@ impl<T: Iterator<Item = char>> Scanner<T> {
',' => self.fetch_flow_entry(),
'-' if is_blank_or_breakz(nc) => self.fetch_block_entry(),
'?' if is_blank_or_breakz(nc) => self.fetch_key(),
':' if is_blank_or_breakz(nc)
|| (self.flow_level > 0
&& (is_flow(nc) || self.mark.index == self.adjacent_value_allowed_at)) =>
':' if is_blank_or_breakz(nc) => self.fetch_value(),
':' if self.flow_level > 0
&& (is_flow(nc) || self.mark.index == self.adjacent_value_allowed_at) =>
{
self.fetch_value()
self.fetch_flow_value()
}
// Is it an alias?
'*' => self.fetch_anchor(true),
@ -1404,6 +1449,9 @@ impl<T: Iterator<Item = char>> Scanner<T> {
if tok == TokenType::FlowMappingStart {
self.flow_mapping_started = true;
} else {
self.implicit_flow_mapping_states
.push(ImplicitMappingState::Possible);
}
self.skip_ws_to_eol(SkipTabs::Yes)?;
@ -1418,7 +1466,11 @@ impl<T: Iterator<Item = char>> Scanner<T> {
self.disallow_simple_key();
self.end_implicit_mapping(self.mark);
if matches!(tok, TokenType::FlowSequenceEnd) {
self.end_implicit_mapping(self.mark);
// We are out exiting the flow sequence, nesting goes down 1 level.
self.implicit_flow_mapping_states.pop();
}
let start_mark = self.mark;
self.skip_non_blank();
@ -2292,7 +2344,7 @@ impl<T: Iterator<Item = char>> Scanner<T> {
start_mark,
);
} else {
// The parser, upon receiving a `Key`, will insert a `MappingStart` event.
// The scanner, upon emitting a `Key`, will prepend a `MappingStart` event.
self.flow_mapping_started = true;
}
@ -2316,11 +2368,45 @@ impl<T: Iterator<Item = char>> Scanner<T> {
Ok(())
}
/// Fetch a value in a mapping inside of a flow collection.
///
/// This must not be called if [`self.flow_level`] is 0. This ensures the rules surrounding
/// values in flow collections are respected prior to calling [`fetch_value`].
///
/// [`self.flow_level`]: Self::flow_level
/// [`fetch_value`]: Self::fetch_value
fn fetch_flow_value(&mut self) -> ScanResult {
let nc = self.buffer[1];
// If we encounter a ':' inside a flow collection and it is not immediately
// followed by a blank or breakz:
// - We must check whether an adjacent value is allowed
// `["a":[]]` is valid. If the key is double-quoted, no need for a space. This
// is needed for JSON compatibility.
// - If not, we must ensure there is a space after the ':' and before its value.
// `[a: []]` is valid while `[a:[]]` isn't. `[a:b]` is treated as `["a:b"]`.
// - But if the value is empty (null), then it's okay.
// The last line is for YAMLs like `[a:]`. The ':' is followed by a ']' (which is a
// flow character), but the ']' is not the value. The value is an invisible empty
// space which is represented as null ('~').
if self.mark.index != self.adjacent_value_allowed_at && (nc == '[' || nc == '{') {
return Err(ScanError::new_str(
self.mark,
"':' may not precede any of `[{` in flow mapping",
));
}
self.fetch_value()
}
/// Fetch a value from a mapping (after a `:`).
fn fetch_value(&mut self) -> ScanResult {
let sk = self.simple_keys.last().unwrap().clone();
let start_mark = self.mark;
self.implicit_flow_mapping = self.flow_level > 0 && !self.flow_mapping_started;
let is_implicit_flow_mapping = self.flow_level > 0 && !self.flow_mapping_started;
if is_implicit_flow_mapping {
*self.implicit_flow_mapping_states.last_mut().unwrap() = ImplicitMappingState::Inside;
}
// Skip over ':'.
self.skip_non_blank();
@ -2338,7 +2424,7 @@ impl<T: Iterator<Item = char>> Scanner<T> {
// insert simple key
let tok = Token(sk.mark, TokenType::Key);
self.insert_token(sk.token_number - self.tokens_parsed, tok);
if self.implicit_flow_mapping {
if is_implicit_flow_mapping {
if sk.mark.line < start_mark.line {
return Err(ScanError::new_str(
start_mark,
@ -2363,7 +2449,7 @@ impl<T: Iterator<Item = char>> Scanner<T> {
self.simple_keys.last_mut().unwrap().possible = false;
self.disallow_simple_key();
} else {
if self.implicit_flow_mapping {
if is_implicit_flow_mapping {
self.tokens
.push_back(Token(self.mark, TokenType::FlowMappingStart));
}
@ -2527,12 +2613,18 @@ impl<T: Iterator<Item = char>> Scanner<T> {
}
/// If an implicit mapping had started, end it.
///
/// This function does not pop the state in [`implicit_flow_mapping_states`].
///
/// [`implicit_flow_mapping_states`]: Self::implicit_flow_mapping_states
fn end_implicit_mapping(&mut self, mark: Marker) {
if self.implicit_flow_mapping {
self.implicit_flow_mapping = false;
self.flow_mapping_started = false;
self.tokens
.push_back(Token(mark, TokenType::FlowMappingEnd));
if let Some(implicit_mapping) = self.implicit_flow_mapping_states.last_mut() {
if *implicit_mapping == ImplicitMappingState::Inside {
self.flow_mapping_started = false;
*implicit_mapping = ImplicitMappingState::Possible;
self.tokens
.push_back(Token(mark, TokenType::FlowMappingEnd));
}
}
}
}

168
parser/tests/issues.rs Normal file
View file

@ -0,0 +1,168 @@
use saphyr_parser::{Event, Parser, ScanError, TScalarStyle};
/// Run the parser through the string.
///
/// # Returns
/// This functions returns the events if parsing succeeds, the error the parser returned otherwise.
fn run_parser(input: &str) -> Result<Vec<Event>, ScanError> {
let mut events = vec![];
for x in Parser::new_from_str(input) {
events.push(x?.0);
}
Ok(events)
}
#[test]
#[allow(clippy::too_many_lines)]
fn test_issue1() {
// https://github.com/saphyr-rs/saphyr-parser/issues/1
// Implicit mappings with a nested flow sequence prematurely end the mapping.
//
// [a: [42]]
// ^ This closing sequence character
// ^ is interpreted as closing this sequence, where the implicit mapping starts.
//
// The current implementation does not allow for nested implicit mapping:
//
// [a: [b: c]]
// ^ this implicit mapping would be ignored
let reference = r"
- a:
- 42
";
let expected = [
Event::StreamStart,
Event::DocumentStart,
Event::SequenceStart(0, None),
Event::MappingStart(0, None),
Event::Scalar("a".to_string(), TScalarStyle::Plain, 0, None),
Event::SequenceStart(0, None),
Event::Scalar("42".to_string(), TScalarStyle::Plain, 0, None),
Event::SequenceEnd,
Event::MappingEnd,
Event::SequenceEnd,
Event::DocumentEnd,
Event::StreamEnd,
];
assert_eq!(run_parser(reference).unwrap(), expected);
assert_eq!(run_parser("[{a: [42]}]").unwrap(), expected);
assert_eq!(run_parser("[a: [42]]").unwrap(), expected);
// Other test cases derived from the bug
// Implicit mapping in a complex key.
assert_eq!(
run_parser("[foo: [bar]]: baz").unwrap(),
[
Event::StreamStart,
Event::DocumentStart,
Event::MappingStart(0, None),
Event::SequenceStart(0, None),
Event::MappingStart(0, None),
Event::Scalar("foo".to_string(), TScalarStyle::Plain, 0, None),
Event::SequenceStart(0, None),
Event::Scalar("bar".to_string(), TScalarStyle::Plain, 0, None),
Event::SequenceEnd,
Event::MappingEnd,
Event::SequenceEnd,
Event::Scalar("baz".to_string(), TScalarStyle::Plain, 0, None),
Event::MappingEnd,
Event::DocumentEnd,
Event::StreamEnd,
]
);
// Implicit mappings with implicit null keys
assert_eq!(
run_parser("[:]").unwrap(),
[
Event::StreamStart,
Event::DocumentStart,
Event::SequenceStart(0, None),
Event::MappingStart(0, None),
Event::Scalar("~".to_string(), TScalarStyle::Plain, 0, None),
Event::Scalar("~".to_string(), TScalarStyle::Plain, 0, None),
Event::MappingEnd,
Event::SequenceEnd,
Event::DocumentEnd,
Event::StreamEnd,
]
);
// Nested implicit mappings with implicit null keys
assert_eq!(
run_parser("[: [:]]").unwrap(),
[
Event::StreamStart,
Event::DocumentStart,
Event::SequenceStart(0, None),
Event::MappingStart(0, None),
Event::Scalar("~".to_string(), TScalarStyle::Plain, 0, None),
Event::SequenceStart(0, None),
Event::MappingStart(0, None),
Event::Scalar("~".to_string(), TScalarStyle::Plain, 0, None),
Event::Scalar("~".to_string(), TScalarStyle::Plain, 0, None),
Event::MappingEnd,
Event::SequenceEnd,
Event::MappingEnd,
Event::SequenceEnd,
Event::DocumentEnd,
Event::StreamEnd,
]
);
// Interleaved nested implicit and non-implicit mappings.
assert_eq!(
run_parser("[a: [ [b:]]]").unwrap(),
// ^ ^ ^ has an implicit mapping
// | ` has no implicit mapping
// ` has an implicit mapping
// We must make sure that the `MappingEnd` events are correctly issued for the first and
// third nested sequences, but not the second.
[
Event::StreamStart,
Event::DocumentStart,
Event::SequenceStart(0, None),
Event::MappingStart(0, None),
Event::Scalar("a".to_string(), TScalarStyle::Plain, 0, None),
Event::SequenceStart(0, None),
// No `MappingStart` here.
Event::SequenceStart(0, None),
Event::MappingStart(0, None),
Event::Scalar("b".to_string(), TScalarStyle::Plain, 0, None),
Event::Scalar("~".to_string(), TScalarStyle::Plain, 0, None),
Event::MappingEnd,
Event::SequenceEnd,
// No `MappingEnd` here.
Event::SequenceEnd,
Event::MappingEnd,
Event::SequenceEnd,
Event::DocumentEnd,
Event::StreamEnd,
]
);
// There needs to be a space between a `:` in a flow sequence and the value.
assert!(run_parser("[:[:]]").is_err());
assert!(run_parser("[a:[42]]").is_err());
// Double-quoted keys may have a value without a space for JSON-compatibility.
assert_eq!(
run_parser(r#"["a":[]]"#).unwrap(),
[
Event::StreamStart,
Event::DocumentStart,
Event::SequenceStart(0, None),
Event::MappingStart(0, None),
Event::Scalar("a".to_string(), TScalarStyle::DoubleQuoted, 0, None),
Event::SequenceStart(0, None),
Event::SequenceEnd,
Event::MappingEnd,
Event::SequenceEnd,
Event::DocumentEnd,
Event::StreamEnd,
]
);
}