Better tracking for beginning and ending positions of mappings. (#10)

Previously, we often used the scanner state to infer the positions of
mappings. This is sometimes wrong, because the scanner has already
scanned ahead by the time the mapping is parsed.

This commit adds a check to the test suite, asserting that parser event
positions are all observed in order, and it fixes the scanner and parser
to make the new check pass.
This commit is contained in:
jneem 2024-09-25 22:13:31 +07:00 committed by Ethiraric
parent 833343757a
commit 434f4521dd
3 changed files with 83 additions and 55 deletions

View file

@ -7,6 +7,7 @@
use crate::{
input::{str::StrInput, Input},
scanner::{ScanError, Scanner, Span, TScalarStyle, Token, TokenType},
Marker,
};
use std::collections::HashMap;
@ -29,7 +30,7 @@ enum State {
FlowSequenceEntry,
FlowSequenceEntryMappingKey,
FlowSequenceEntryMappingValue,
FlowSequenceEntryMappingEnd,
FlowSequenceEntryMappingEnd(Marker),
FlowMappingFirstKey,
FlowMappingKey,
FlowMappingValue,
@ -559,7 +560,7 @@ impl<T: Input> Parser<T> {
State::FlowSequenceEntryMappingKey => self.flow_sequence_entry_mapping_key(),
State::FlowSequenceEntryMappingValue => self.flow_sequence_entry_mapping_value(),
State::FlowSequenceEntryMappingEnd => self.flow_sequence_entry_mapping_end(),
State::FlowSequenceEntryMappingEnd(mark) => self.flow_sequence_entry_mapping_end(mark),
State::FlowMappingEmptyValue => self.flow_mapping_value(true),
/* impossible */
@ -1070,27 +1071,26 @@ impl<T: Input> Parser<T> {
Token(_, TokenType::Value) => {
self.skip();
self.state = State::FlowSequenceEntryMappingValue;
if let Token(mark, TokenType::FlowEntry | TokenType::FlowSequenceEnd) =
*self.peek_token()?
{
self.state = State::FlowSequenceEntryMappingEnd;
Ok((Event::empty_scalar(), mark))
let Token(span, ref tok) = *self.peek_token()?;
if matches!(tok, TokenType::FlowEntry | TokenType::FlowSequenceEnd) {
self.state = State::FlowSequenceEntryMappingEnd(span.end);
Ok((Event::empty_scalar(), span))
} else {
self.push_state(State::FlowSequenceEntryMappingEnd);
self.push_state(State::FlowSequenceEntryMappingEnd(span.end));
self.parse_node(false, false)
}
}
Token(mark, _) => {
self.state = State::FlowSequenceEntryMappingEnd;
self.state = State::FlowSequenceEntryMappingEnd(mark.end);
Ok((Event::empty_scalar(), mark))
}
}
}
#[allow(clippy::unnecessary_wraps)]
fn flow_sequence_entry_mapping_end(&mut self) -> ParseResult {
fn flow_sequence_entry_mapping_end(&mut self, mark: Marker) -> ParseResult {
self.state = State::FlowSequenceEntry;
Ok((Event::MappingEnd, Span::empty(self.scanner.mark())))
Ok((Event::MappingEnd, Span::empty(mark)))
}
/// Resolve a tag from the handle and the suffix.

View file

@ -2403,7 +2403,7 @@ impl<T: Input> Scanner<T> {
}
self.insert_token(
sk.token_number - self.tokens_parsed,
Token(Span::empty(self.mark), TokenType::FlowMappingStart),
Token(Span::empty(sk.mark), TokenType::FlowMappingStart),
);
}
@ -2412,7 +2412,7 @@ impl<T: Input> Scanner<T> {
sk.mark.col,
Some(sk.token_number),
TokenType::BlockMappingStart,
start_mark,
sk.mark,
);
self.roll_one_col_indent();
@ -2421,7 +2421,7 @@ impl<T: Input> Scanner<T> {
} else {
if is_implicit_flow_mapping {
self.tokens
.push_back(Token(Span::empty(self.mark), TokenType::FlowMappingStart));
.push_back(Token(Span::empty(start_mark), TokenType::FlowMappingStart));
}
// The ':' indicator follows a complex key.
if self.flow_level == 0 {

View file

@ -3,7 +3,9 @@ use std::fs::{self, DirEntry};
use libtest_mimic::{run_tests, Arguments, Outcome, Test};
use saphyr::{Hash, Yaml};
use saphyr_parser::{Event, EventReceiver, Parser, ScanError, TScalarStyle, Tag};
use saphyr_parser::{
Event, Marker, Parser, ScanError, Span, SpannedEventReceiver, TScalarStyle, Tag,
};
type Result<T, E = Box<dyn std::error::Error>> = std::result::Result<T, E>;
@ -34,28 +36,49 @@ fn main() -> Result<()> {
fn run_yaml_test(test: &Test<YamlTest>) -> Outcome {
let desc = &test.data;
let actual_events = parse_to_events(&desc.yaml);
let events_diff = actual_events.map(|events| events_differ(&events, &desc.expected_events));
let mut error_text = match (&events_diff, desc.expected_error) {
let reporter = parse_to_events(&desc.yaml);
let actual_events = reporter.as_ref().map(|reporter| &reporter.events);
let events_diff = actual_events.map(|events| events_differ(events, &desc.expected_events));
let error_text = match (&events_diff, desc.expected_error) {
(Ok(x), true) => Some(format!("no error when expected: {x:#?}")),
(Err(_), true) | (Ok(None), false) => None,
(Err(e), false) => Some(format!("unexpected error {e:?}")),
(Ok(Some(diff)), false) => Some(format!("events differ: {diff}")),
};
// Show a caret on error.
if let Some(text) = &mut error_text {
if let Some(mut txt) = error_text {
add_error_context(
&mut txt,
desc,
events_diff.err().map(saphyr::ScanError::marker),
);
Outcome::Failed { msg: Some(txt) }
} else if let Some((mut msg, span)) = reporter
.as_ref()
.ok()
.and_then(|reporter| reporter.span_failures.first().cloned())
{
add_error_context(&mut msg, desc, Some(&span.start));
Outcome::Failed { msg: Some(msg) }
} else {
Outcome::Passed
}
}
// Enrich the error message with the failing input, and a caret pointing
// at the position that errored.
fn add_error_context(text: &mut String, desc: &YamlTest, marker: Option<&Marker>) {
use std::fmt::Write;
let _ = writeln!(text, "\n### Input:\n{}\n### End", desc.yaml_visual);
if let Err(err) = &events_diff {
if let Some(mark) = marker {
writeln!(text, "### Error position").unwrap();
let mut lines = desc.yaml.lines();
for _ in 0..(err.marker().line() - 1) {
for _ in 0..(mark.line() - 1) {
let l = lines.next().unwrap();
writeln!(text, "{l}").unwrap();
}
writeln!(text, "\x1B[91;1m{}", lines.next().unwrap()).unwrap();
for _ in 0..err.marker().col() {
for _ in 0..mark.col() {
write!(text, " ").unwrap();
}
writeln!(text, "^\x1b[m").unwrap();
@ -64,12 +87,6 @@ fn run_yaml_test(test: &Test<YamlTest>) -> Outcome {
}
writeln!(text, "### End error position").unwrap();
}
}
match error_text {
None => Outcome::Passed,
Some(txt) => Outcome::Failed { msg: Some(txt) },
}
}
fn load_tests_from_file(entry: &DirEntry) -> Result<Vec<Test<YamlTest>>> {
@ -118,26 +135,37 @@ fn load_tests_from_file(entry: &DirEntry) -> Result<Vec<Test<YamlTest>>> {
Ok(result)
}
fn parse_to_events(source: &str) -> Result<Vec<String>, ScanError> {
let mut reporter = EventReporter::new();
fn parse_to_events(source: &str) -> Result<EventReporter, ScanError> {
let mut reporter = EventReporter::default();
for x in Parser::new_from_str(source) {
reporter.on_event(x?.0);
let x = x?;
reporter.on_event(x.0, x.1);
}
Ok(reporter.events)
Ok(reporter)
}
struct EventReporter {
events: Vec<String>,
#[derive(Default)]
/// A [`SpannedEventReceiver`] checking for inconsistencies in event [`Spans`].
pub struct EventReporter {
pub events: Vec<String>,
last_span: Option<(Event, Span)>,
pub span_failures: Vec<(String, Span)>,
}
impl EventReporter {
fn new() -> Self {
Self { events: vec![] }
impl SpannedEventReceiver for EventReporter {
fn on_event(&mut self, ev: Event, span: Span) {
if let Some((last_ev, last_span)) = self.last_span.take() {
if span.start.index() < last_span.start.index()
|| span.end.index() < last_span.end.index()
{
self.span_failures.push((
format!("event {ev:?}@{span:?} came before event {last_ev:?}@{last_span:?}"),
span,
));
}
}
}
self.last_span = Some((ev.clone(), span));
impl EventReceiver for EventReporter {
fn on_event(&mut self, ev: Event) {
let line: String = match ev {
Event::StreamStart => "+STR".into(),
Event::StreamEnd => "-STR".into(),