diff --git a/parser/src/scanner.rs b/parser/src/scanner.rs index 2cda77c..3c2690c 100644 --- a/parser/src/scanner.rs +++ b/parser/src/scanner.rs @@ -129,8 +129,15 @@ pub enum TokenType { DocumentStart, /// The end of a YAML document (`...`). DocumentEnd, + /// The start of a sequence block. + /// + /// Sequence blocks are arrays starting with a `-`. BlockSequenceStart, + /// The start of a sequence mapping. + /// + /// Sequence mappings are "dictionaries" with "key: value" entries. BlockMappingStart, + /// End of the corresponding `BlockSequenceStart` or `BlockMappingStart`. BlockEnd, /// Start of an inline array (`[ a, b ]`). FlowSequenceStart, @@ -186,6 +193,9 @@ pub struct Scanner { stream_start_produced: bool, stream_end_produced: bool, adjacent_value_allowed_at: usize, + /// Whether a simple key could potentially start at the current position. + /// + /// Simple keys are the opposite of complex keys which are keys starting with `?`. simple_key_allowed: bool, simple_keys: Vec, indent: isize, @@ -427,13 +437,11 @@ impl> Scanner { } } + /// Insert a token at the given position. fn insert_token(&mut self, pos: usize, tok: Token) { let old_len = self.tokens.len(); assert!(pos <= old_len); - self.tokens.push_back(tok); - for i in 0..old_len - pos { - self.tokens.swap(old_len - i, old_len - i - 1); - } + self.tokens.insert(pos, tok); } fn allow_simple_key(&mut self) { @@ -550,10 +558,10 @@ impl> Scanner { pub fn fetch_more_tokens(&mut self) -> ScanResult { let mut need_more; loop { - need_more = false; if self.tokens.is_empty() { need_more = true; } else { + need_more = false; self.stale_simple_keys()?; for sk in &self.simple_keys { if sk.possible && sk.token_number == self.tokens_parsed { @@ -600,9 +608,9 @@ impl> Scanner { return Err(ScanError::new( self.mark, "tabs disallowed within this context (block indentation)", - )) + )); } - '\t' if self.flow_level > 0 || !self.simple_key_allowed => self.skip(), + '\t' => self.skip(), '\n' | '\r' => { self.lookahead(2); self.skip_line(); @@ -1770,6 +1778,11 @@ impl> Scanner { Ok(()) } + /// Add an indentation level to the stack with the given block token, if needed. + /// + /// An indentation level is added only if: + /// - We are not in a flow-style construct (which don't have indentation per-se). + /// - The current column is further indented than the last indent we have registered. fn roll_indent(&mut self, col: usize, number: Option, tok: TokenType, mark: Marker) { if self.flow_level > 0 { return; @@ -1786,6 +1799,11 @@ impl> Scanner { } } + /// Pop indentation levels from the stack as much as needed. + /// + /// Indentation levels are popped from the stack while they are further indented than `col`. + /// If we are in a flow-style construct (which don't have indentation per-se), this function + /// does nothing. fn unroll_indent(&mut self, col: isize) { if self.flow_level > 0 { return; diff --git a/parser/src/yaml.rs b/parser/src/yaml.rs index 4c04eb1..22f30eb 100644 --- a/parser/src/yaml.rs +++ b/parser/src/yaml.rs @@ -1,7 +1,7 @@ #![allow(clippy::module_name_repetitions)] use crate::parser::{Event, MarkedEventReceiver, Parser, Tag}; -use crate::scanner::{Marker, ScanError, TScalarStyle, TokenType}; +use crate::scanner::{Marker, ScanError, TScalarStyle}; use linked_hash_map::LinkedHashMap; use std::collections::BTreeMap; use std::mem; diff --git a/parser/tests/yaml-test-suite.rs b/parser/tests/yaml-test-suite.rs index c9e9372..42b1668 100644 --- a/parser/tests/yaml-test-suite.rs +++ b/parser/tests/yaml-test-suite.rs @@ -52,7 +52,7 @@ fn run_yaml_test(test: &Test) -> Outcome { let actual_events = parse_to_events(&desc.yaml); let events_diff = actual_events.map(|events| events_differ(events, &desc.expected_events)); let mut error_text = match (events_diff, desc.expected_error) { - (Ok(_), true) => Some("no error when expected".into()), + (Ok(x), true) => Some(format!("no error when expected: {x:#?}")), (Err(_), true) => None, (Err(e), false) => Some(format!("unexpected error {:?}", e)), (Ok(Some(diff)), false) => Some(format!("events differ: {}", diff)), @@ -299,15 +299,12 @@ fn expected_events(expected_tree: &str) -> Vec { static EXPECTED_FAILURES: &[&str] = &[ // These seem to be plain bugs // TAB as start of plain scalar instead of whitespace - "6CA3", "DK95-00", - "Q5MG", "Y79Y-06", "Y79Y-03", // unexpected pass "Y79Y-04", // unexpected pass "Y79Y-05", // unexpected pass // TABs in whitespace-only lines - "DK95-03", "DK95-04", // TABs after marker ? or : (space required?) "Y79Y-07",