Minor improvements.

This commit is contained in:
Ethiraric 2023-12-21 00:14:08 +01:00
parent 01ecc1ab0f
commit abe5d30b3a
3 changed files with 27 additions and 12 deletions

View file

@ -129,8 +129,15 @@ pub enum TokenType {
DocumentStart, DocumentStart,
/// The end of a YAML document (`...`). /// The end of a YAML document (`...`).
DocumentEnd, DocumentEnd,
/// The start of a sequence block.
///
/// Sequence blocks are arrays starting with a `-`.
BlockSequenceStart, BlockSequenceStart,
/// The start of a sequence mapping.
///
/// Sequence mappings are "dictionaries" with "key: value" entries.
BlockMappingStart, BlockMappingStart,
/// End of the corresponding `BlockSequenceStart` or `BlockMappingStart`.
BlockEnd, BlockEnd,
/// Start of an inline array (`[ a, b ]`). /// Start of an inline array (`[ a, b ]`).
FlowSequenceStart, FlowSequenceStart,
@ -186,6 +193,9 @@ pub struct Scanner<T> {
stream_start_produced: bool, stream_start_produced: bool,
stream_end_produced: bool, stream_end_produced: bool,
adjacent_value_allowed_at: usize, adjacent_value_allowed_at: usize,
/// Whether a simple key could potentially start at the current position.
///
/// Simple keys are the opposite of complex keys which are keys starting with `?`.
simple_key_allowed: bool, simple_key_allowed: bool,
simple_keys: Vec<SimpleKey>, simple_keys: Vec<SimpleKey>,
indent: isize, indent: isize,
@ -427,13 +437,11 @@ impl<T: Iterator<Item = char>> Scanner<T> {
} }
} }
/// Insert a token at the given position.
fn insert_token(&mut self, pos: usize, tok: Token) { fn insert_token(&mut self, pos: usize, tok: Token) {
let old_len = self.tokens.len(); let old_len = self.tokens.len();
assert!(pos <= old_len); assert!(pos <= old_len);
self.tokens.push_back(tok); self.tokens.insert(pos, tok);
for i in 0..old_len - pos {
self.tokens.swap(old_len - i, old_len - i - 1);
}
} }
fn allow_simple_key(&mut self) { fn allow_simple_key(&mut self) {
@ -550,10 +558,10 @@ impl<T: Iterator<Item = char>> Scanner<T> {
pub fn fetch_more_tokens(&mut self) -> ScanResult { pub fn fetch_more_tokens(&mut self) -> ScanResult {
let mut need_more; let mut need_more;
loop { loop {
need_more = false;
if self.tokens.is_empty() { if self.tokens.is_empty() {
need_more = true; need_more = true;
} else { } else {
need_more = false;
self.stale_simple_keys()?; self.stale_simple_keys()?;
for sk in &self.simple_keys { for sk in &self.simple_keys {
if sk.possible && sk.token_number == self.tokens_parsed { if sk.possible && sk.token_number == self.tokens_parsed {
@ -600,9 +608,9 @@ impl<T: Iterator<Item = char>> Scanner<T> {
return Err(ScanError::new( return Err(ScanError::new(
self.mark, self.mark,
"tabs disallowed within this context (block indentation)", "tabs disallowed within this context (block indentation)",
)) ));
} }
'\t' if self.flow_level > 0 || !self.simple_key_allowed => self.skip(), '\t' => self.skip(),
'\n' | '\r' => { '\n' | '\r' => {
self.lookahead(2); self.lookahead(2);
self.skip_line(); self.skip_line();
@ -1770,6 +1778,11 @@ impl<T: Iterator<Item = char>> Scanner<T> {
Ok(()) Ok(())
} }
/// Add an indentation level to the stack with the given block token, if needed.
///
/// An indentation level is added only if:
/// - We are not in a flow-style construct (which don't have indentation per-se).
/// - The current column is further indented than the last indent we have registered.
fn roll_indent(&mut self, col: usize, number: Option<usize>, tok: TokenType, mark: Marker) { fn roll_indent(&mut self, col: usize, number: Option<usize>, tok: TokenType, mark: Marker) {
if self.flow_level > 0 { if self.flow_level > 0 {
return; return;
@ -1786,6 +1799,11 @@ impl<T: Iterator<Item = char>> Scanner<T> {
} }
} }
/// Pop indentation levels from the stack as much as needed.
///
/// Indentation levels are popped from the stack while they are further indented than `col`.
/// If we are in a flow-style construct (which don't have indentation per-se), this function
/// does nothing.
fn unroll_indent(&mut self, col: isize) { fn unroll_indent(&mut self, col: isize) {
if self.flow_level > 0 { if self.flow_level > 0 {
return; return;

View file

@ -1,7 +1,7 @@
#![allow(clippy::module_name_repetitions)] #![allow(clippy::module_name_repetitions)]
use crate::parser::{Event, MarkedEventReceiver, Parser, Tag}; use crate::parser::{Event, MarkedEventReceiver, Parser, Tag};
use crate::scanner::{Marker, ScanError, TScalarStyle, TokenType}; use crate::scanner::{Marker, ScanError, TScalarStyle};
use linked_hash_map::LinkedHashMap; use linked_hash_map::LinkedHashMap;
use std::collections::BTreeMap; use std::collections::BTreeMap;
use std::mem; use std::mem;

View file

@ -52,7 +52,7 @@ fn run_yaml_test(test: &Test<YamlTest>) -> Outcome {
let actual_events = parse_to_events(&desc.yaml); let actual_events = parse_to_events(&desc.yaml);
let events_diff = actual_events.map(|events| events_differ(events, &desc.expected_events)); let events_diff = actual_events.map(|events| events_differ(events, &desc.expected_events));
let mut error_text = match (events_diff, desc.expected_error) { let mut error_text = match (events_diff, desc.expected_error) {
(Ok(_), true) => Some("no error when expected".into()), (Ok(x), true) => Some(format!("no error when expected: {x:#?}")),
(Err(_), true) => None, (Err(_), true) => None,
(Err(e), false) => Some(format!("unexpected error {:?}", e)), (Err(e), false) => Some(format!("unexpected error {:?}", e)),
(Ok(Some(diff)), false) => Some(format!("events differ: {}", diff)), (Ok(Some(diff)), false) => Some(format!("events differ: {}", diff)),
@ -299,15 +299,12 @@ fn expected_events(expected_tree: &str) -> Vec<String> {
static EXPECTED_FAILURES: &[&str] = &[ static EXPECTED_FAILURES: &[&str] = &[
// These seem to be plain bugs // These seem to be plain bugs
// TAB as start of plain scalar instead of whitespace // TAB as start of plain scalar instead of whitespace
"6CA3",
"DK95-00", "DK95-00",
"Q5MG",
"Y79Y-06", "Y79Y-06",
"Y79Y-03", // unexpected pass "Y79Y-03", // unexpected pass
"Y79Y-04", // unexpected pass "Y79Y-04", // unexpected pass
"Y79Y-05", // unexpected pass "Y79Y-05", // unexpected pass
// TABs in whitespace-only lines // TABs in whitespace-only lines
"DK95-03",
"DK95-04", "DK95-04",
// TABs after marker ? or : (space required?) // TABs after marker ? or : (space required?)
"Y79Y-07", "Y79Y-07",