Better tracking for beginning and ending positions of mappings. (#10)

Previously, we often used the scanner state to infer the positions of
mappings. This is sometimes wrong, because the scanner has already
scanned ahead by the time the mapping is parsed.

This commit adds a check to the test suite, asserting that parser event
positions are all observed in order, and it fixes the scanner and parser
to make the new check pass.
This commit is contained in:
jneem 2024-09-25 22:13:31 +07:00 committed by Ethiraric
parent 833343757a
commit 434f4521dd
3 changed files with 83 additions and 55 deletions

View file

@ -7,6 +7,7 @@
use crate::{ use crate::{
input::{str::StrInput, Input}, input::{str::StrInput, Input},
scanner::{ScanError, Scanner, Span, TScalarStyle, Token, TokenType}, scanner::{ScanError, Scanner, Span, TScalarStyle, Token, TokenType},
Marker,
}; };
use std::collections::HashMap; use std::collections::HashMap;
@ -29,7 +30,7 @@ enum State {
FlowSequenceEntry, FlowSequenceEntry,
FlowSequenceEntryMappingKey, FlowSequenceEntryMappingKey,
FlowSequenceEntryMappingValue, FlowSequenceEntryMappingValue,
FlowSequenceEntryMappingEnd, FlowSequenceEntryMappingEnd(Marker),
FlowMappingFirstKey, FlowMappingFirstKey,
FlowMappingKey, FlowMappingKey,
FlowMappingValue, FlowMappingValue,
@ -559,7 +560,7 @@ impl<T: Input> Parser<T> {
State::FlowSequenceEntryMappingKey => self.flow_sequence_entry_mapping_key(), State::FlowSequenceEntryMappingKey => self.flow_sequence_entry_mapping_key(),
State::FlowSequenceEntryMappingValue => self.flow_sequence_entry_mapping_value(), State::FlowSequenceEntryMappingValue => self.flow_sequence_entry_mapping_value(),
State::FlowSequenceEntryMappingEnd => self.flow_sequence_entry_mapping_end(), State::FlowSequenceEntryMappingEnd(mark) => self.flow_sequence_entry_mapping_end(mark),
State::FlowMappingEmptyValue => self.flow_mapping_value(true), State::FlowMappingEmptyValue => self.flow_mapping_value(true),
/* impossible */ /* impossible */
@ -1070,27 +1071,26 @@ impl<T: Input> Parser<T> {
Token(_, TokenType::Value) => { Token(_, TokenType::Value) => {
self.skip(); self.skip();
self.state = State::FlowSequenceEntryMappingValue; self.state = State::FlowSequenceEntryMappingValue;
if let Token(mark, TokenType::FlowEntry | TokenType::FlowSequenceEnd) = let Token(span, ref tok) = *self.peek_token()?;
*self.peek_token()? if matches!(tok, TokenType::FlowEntry | TokenType::FlowSequenceEnd) {
{ self.state = State::FlowSequenceEntryMappingEnd(span.end);
self.state = State::FlowSequenceEntryMappingEnd; Ok((Event::empty_scalar(), span))
Ok((Event::empty_scalar(), mark))
} else { } else {
self.push_state(State::FlowSequenceEntryMappingEnd); self.push_state(State::FlowSequenceEntryMappingEnd(span.end));
self.parse_node(false, false) self.parse_node(false, false)
} }
} }
Token(mark, _) => { Token(mark, _) => {
self.state = State::FlowSequenceEntryMappingEnd; self.state = State::FlowSequenceEntryMappingEnd(mark.end);
Ok((Event::empty_scalar(), mark)) Ok((Event::empty_scalar(), mark))
} }
} }
} }
#[allow(clippy::unnecessary_wraps)] #[allow(clippy::unnecessary_wraps)]
fn flow_sequence_entry_mapping_end(&mut self) -> ParseResult { fn flow_sequence_entry_mapping_end(&mut self, mark: Marker) -> ParseResult {
self.state = State::FlowSequenceEntry; self.state = State::FlowSequenceEntry;
Ok((Event::MappingEnd, Span::empty(self.scanner.mark()))) Ok((Event::MappingEnd, Span::empty(mark)))
} }
/// Resolve a tag from the handle and the suffix. /// Resolve a tag from the handle and the suffix.

View file

@ -2403,7 +2403,7 @@ impl<T: Input> Scanner<T> {
} }
self.insert_token( self.insert_token(
sk.token_number - self.tokens_parsed, sk.token_number - self.tokens_parsed,
Token(Span::empty(self.mark), TokenType::FlowMappingStart), Token(Span::empty(sk.mark), TokenType::FlowMappingStart),
); );
} }
@ -2412,7 +2412,7 @@ impl<T: Input> Scanner<T> {
sk.mark.col, sk.mark.col,
Some(sk.token_number), Some(sk.token_number),
TokenType::BlockMappingStart, TokenType::BlockMappingStart,
start_mark, sk.mark,
); );
self.roll_one_col_indent(); self.roll_one_col_indent();
@ -2421,7 +2421,7 @@ impl<T: Input> Scanner<T> {
} else { } else {
if is_implicit_flow_mapping { if is_implicit_flow_mapping {
self.tokens self.tokens
.push_back(Token(Span::empty(self.mark), TokenType::FlowMappingStart)); .push_back(Token(Span::empty(start_mark), TokenType::FlowMappingStart));
} }
// The ':' indicator follows a complex key. // The ':' indicator follows a complex key.
if self.flow_level == 0 { if self.flow_level == 0 {

View file

@ -3,7 +3,9 @@ use std::fs::{self, DirEntry};
use libtest_mimic::{run_tests, Arguments, Outcome, Test}; use libtest_mimic::{run_tests, Arguments, Outcome, Test};
use saphyr::{Hash, Yaml}; use saphyr::{Hash, Yaml};
use saphyr_parser::{Event, EventReceiver, Parser, ScanError, TScalarStyle, Tag}; use saphyr_parser::{
Event, Marker, Parser, ScanError, Span, SpannedEventReceiver, TScalarStyle, Tag,
};
type Result<T, E = Box<dyn std::error::Error>> = std::result::Result<T, E>; type Result<T, E = Box<dyn std::error::Error>> = std::result::Result<T, E>;
@ -34,41 +36,56 @@ fn main() -> Result<()> {
fn run_yaml_test(test: &Test<YamlTest>) -> Outcome { fn run_yaml_test(test: &Test<YamlTest>) -> Outcome {
let desc = &test.data; let desc = &test.data;
let actual_events = parse_to_events(&desc.yaml); let reporter = parse_to_events(&desc.yaml);
let events_diff = actual_events.map(|events| events_differ(&events, &desc.expected_events)); let actual_events = reporter.as_ref().map(|reporter| &reporter.events);
let mut error_text = match (&events_diff, desc.expected_error) { let events_diff = actual_events.map(|events| events_differ(events, &desc.expected_events));
let error_text = match (&events_diff, desc.expected_error) {
(Ok(x), true) => Some(format!("no error when expected: {x:#?}")), (Ok(x), true) => Some(format!("no error when expected: {x:#?}")),
(Err(_), true) | (Ok(None), false) => None, (Err(_), true) | (Ok(None), false) => None,
(Err(e), false) => Some(format!("unexpected error {e:?}")), (Err(e), false) => Some(format!("unexpected error {e:?}")),
(Ok(Some(diff)), false) => Some(format!("events differ: {diff}")), (Ok(Some(diff)), false) => Some(format!("events differ: {diff}")),
}; };
// Show a caret on error. if let Some(mut txt) = error_text {
if let Some(text) = &mut error_text { add_error_context(
use std::fmt::Write; &mut txt,
let _ = writeln!(text, "\n### Input:\n{}\n### End", desc.yaml_visual); desc,
if let Err(err) = &events_diff { events_diff.err().map(saphyr::ScanError::marker),
writeln!(text, "### Error position").unwrap(); );
let mut lines = desc.yaml.lines(); Outcome::Failed { msg: Some(txt) }
for _ in 0..(err.marker().line() - 1) { } else if let Some((mut msg, span)) = reporter
let l = lines.next().unwrap(); .as_ref()
writeln!(text, "{l}").unwrap(); .ok()
} .and_then(|reporter| reporter.span_failures.first().cloned())
writeln!(text, "\x1B[91;1m{}", lines.next().unwrap()).unwrap(); {
for _ in 0..err.marker().col() { add_error_context(&mut msg, desc, Some(&span.start));
write!(text, " ").unwrap(); Outcome::Failed { msg: Some(msg) }
} } else {
writeln!(text, "^\x1b[m").unwrap(); Outcome::Passed
for l in lines {
writeln!(text, "{l}").unwrap();
}
writeln!(text, "### End error position").unwrap();
}
} }
}
match error_text { // Enrich the error message with the failing input, and a caret pointing
None => Outcome::Passed, // at the position that errored.
Some(txt) => Outcome::Failed { msg: Some(txt) }, fn add_error_context(text: &mut String, desc: &YamlTest, marker: Option<&Marker>) {
use std::fmt::Write;
let _ = writeln!(text, "\n### Input:\n{}\n### End", desc.yaml_visual);
if let Some(mark) = marker {
writeln!(text, "### Error position").unwrap();
let mut lines = desc.yaml.lines();
for _ in 0..(mark.line() - 1) {
let l = lines.next().unwrap();
writeln!(text, "{l}").unwrap();
}
writeln!(text, "\x1B[91;1m{}", lines.next().unwrap()).unwrap();
for _ in 0..mark.col() {
write!(text, " ").unwrap();
}
writeln!(text, "^\x1b[m").unwrap();
for l in lines {
writeln!(text, "{l}").unwrap();
}
writeln!(text, "### End error position").unwrap();
} }
} }
@ -118,26 +135,37 @@ fn load_tests_from_file(entry: &DirEntry) -> Result<Vec<Test<YamlTest>>> {
Ok(result) Ok(result)
} }
fn parse_to_events(source: &str) -> Result<Vec<String>, ScanError> { fn parse_to_events(source: &str) -> Result<EventReporter, ScanError> {
let mut reporter = EventReporter::new(); let mut reporter = EventReporter::default();
for x in Parser::new_from_str(source) { for x in Parser::new_from_str(source) {
reporter.on_event(x?.0); let x = x?;
reporter.on_event(x.0, x.1);
} }
Ok(reporter.events) Ok(reporter)
} }
struct EventReporter { #[derive(Default)]
events: Vec<String>, /// A [`SpannedEventReceiver`] checking for inconsistencies in event [`Spans`].
pub struct EventReporter {
pub events: Vec<String>,
last_span: Option<(Event, Span)>,
pub span_failures: Vec<(String, Span)>,
} }
impl EventReporter { impl SpannedEventReceiver for EventReporter {
fn new() -> Self { fn on_event(&mut self, ev: Event, span: Span) {
Self { events: vec![] } if let Some((last_ev, last_span)) = self.last_span.take() {
} if span.start.index() < last_span.start.index()
} || span.end.index() < last_span.end.index()
{
self.span_failures.push((
format!("event {ev:?}@{span:?} came before event {last_ev:?}@{last_span:?}"),
span,
));
}
}
self.last_span = Some((ev.clone(), span));
impl EventReceiver for EventReporter {
fn on_event(&mut self, ev: Event) {
let line: String = match ev { let line: String = match ev {
Event::StreamStart => "+STR".into(), Event::StreamStart => "+STR".into(),
Event::StreamEnd => "-STR".into(), Event::StreamEnd => "-STR".into(),