Use spans instead of markers (#3)

This commit is contained in:
jneem 2024-08-05 22:08:23 +07:00 committed by GitHub
parent 4a5241e0bb
commit 926fdfb01b
7 changed files with 333 additions and 131 deletions

View file

@ -40,5 +40,5 @@ mod parser;
mod scanner; mod scanner;
pub use crate::input::BufferedInput; pub use crate::input::BufferedInput;
pub use crate::parser::{Event, EventReceiver, MarkedEventReceiver, Parser, Tag}; pub use crate::parser::{Event, EventReceiver, Parser, SpannedEventReceiver, Tag};
pub use crate::scanner::{Marker, ScanError, TScalarStyle}; pub use crate::scanner::{Marker, ScanError, Span, TScalarStyle};

View file

@ -6,8 +6,9 @@
use crate::{ use crate::{
input::{str::StrInput, Input}, input::{str::StrInput, Input},
scanner::{Marker, ScanError, Scanner, TScalarStyle, Token, TokenType}, scanner::{ScanError, Scanner, Span, TScalarStyle, Token, TokenType},
}; };
use std::collections::HashMap; use std::collections::HashMap;
#[derive(Clone, Copy, PartialEq, Debug, Eq)] #[derive(Clone, Copy, PartialEq, Debug, Eq)]
@ -116,7 +117,7 @@ pub struct Parser<T: Input> {
/// The next token from the scanner. /// The next token from the scanner.
token: Option<Token>, token: Option<Token>,
/// The next YAML event to emit. /// The next YAML event to emit.
current: Option<(Event, Marker)>, current: Option<(Event, Span)>,
/// Anchors that have been encountered in the YAML document. /// Anchors that have been encountered in the YAML document.
anchors: HashMap<String, usize>, anchors: HashMap<String, usize>,
/// Next ID available for an anchor. /// Next ID available for an anchor.
@ -142,8 +143,8 @@ pub struct Parser<T: Input> {
/// The low-level parsing API is event-based (a push parser), calling [`EventReceiver::on_event`] /// The low-level parsing API is event-based (a push parser), calling [`EventReceiver::on_event`]
/// for each YAML [`Event`] that occurs. /// for each YAML [`Event`] that occurs.
/// The [`EventReceiver`] trait only receives events. In order to receive both events and their /// The [`EventReceiver`] trait only receives events. In order to receive both events and their
/// location in the source, use [`MarkedEventReceiver`]. Note that [`EventReceiver`]s implement /// location in the source, use [`SpannedEventReceiver`]. Note that [`EventReceiver`]s implement
/// [`MarkedEventReceiver`] automatically. /// [`SpannedEventReceiver`] automatically.
/// ///
/// # Event hierarchy /// # Event hierarchy
/// The event stream starts with an [`Event::StreamStart`] event followed by an /// The event stream starts with an [`Event::StreamStart`] event followed by an
@ -213,20 +214,20 @@ pub trait EventReceiver {
/// Trait to be implemented for using the low-level parsing API. /// Trait to be implemented for using the low-level parsing API.
/// ///
/// Functionally similar to [`EventReceiver`], but receives a [`Marker`] as well as the event. /// Functionally similar to [`EventReceiver`], but receives a [`Span`] as well as the event.
pub trait MarkedEventReceiver { pub trait SpannedEventReceiver {
/// Handler called for each event that occurs. /// Handler called for each event that occurs.
fn on_event(&mut self, ev: Event, _mark: Marker); fn on_event(&mut self, ev: Event, span: Span);
} }
impl<R: EventReceiver> MarkedEventReceiver for R { impl<R: EventReceiver> SpannedEventReceiver for R {
fn on_event(&mut self, ev: Event, _mark: Marker) { fn on_event(&mut self, ev: Event, _span: Span) {
self.on_event(ev); self.on_event(ev);
} }
} }
/// A convenience alias for a `Result` of a parser event. /// A convenience alias for a `Result` of a parser event.
pub type ParseResult = Result<(Event, Marker), ScanError>; pub type ParseResult = Result<(Event, Span), ScanError>;
impl<'a> Parser<StrInput<'a>> { impl<'a> Parser<StrInput<'a>> {
/// Create a new instance of a parser from a &str. /// Create a new instance of a parser from a &str.
@ -290,7 +291,7 @@ impl<T: Input> Parser<T> {
/// ///
/// # Errors /// # Errors
/// Returns `ScanError` when loading the next event fails. /// Returns `ScanError` when loading the next event fails.
pub fn peek(&mut self) -> Option<Result<&(Event, Marker), ScanError>> { pub fn peek(&mut self) -> Option<Result<&(Event, Span), ScanError>> {
if let Some(ref x) = self.current { if let Some(ref x) = self.current {
Some(Ok(x)) Some(Ok(x))
} else { } else {
@ -379,7 +380,7 @@ impl<T: Input> Parser<T> {
fn parse(&mut self) -> ParseResult { fn parse(&mut self) -> ParseResult {
if self.state == State::End { if self.state == State::End {
return Ok((Event::StreamEnd, self.scanner.mark())); return Ok((Event::StreamEnd, Span::empty(self.scanner.mark())));
} }
let (ev, mark) = self.state_machine()?; let (ev, mark) = self.state_machine()?;
Ok((ev, mark)) Ok((ev, mark))
@ -393,40 +394,40 @@ impl<T: Input> Parser<T> {
/// If `multi` is set to `true`, the parser will allow parsing of multiple YAML documents /// If `multi` is set to `true`, the parser will allow parsing of multiple YAML documents
/// inside the stream. /// inside the stream.
/// ///
/// Note that any [`EventReceiver`] is also a [`MarkedEventReceiver`], so implementing the /// Note that any [`EventReceiver`] is also a [`SpannedEventReceiver`], so implementing the
/// former is enough to call this function. /// former is enough to call this function.
/// # Errors /// # Errors
/// Returns `ScanError` when loading fails. /// Returns `ScanError` when loading fails.
pub fn load<R: MarkedEventReceiver>( pub fn load<R: SpannedEventReceiver>(
&mut self, &mut self,
recv: &mut R, recv: &mut R,
multi: bool, multi: bool,
) -> Result<(), ScanError> { ) -> Result<(), ScanError> {
if !self.scanner.stream_started() { if !self.scanner.stream_started() {
let (ev, mark) = self.next_event_impl()?; let (ev, span) = self.next_event_impl()?;
if ev != Event::StreamStart { if ev != Event::StreamStart {
return Err(ScanError::new_str( return Err(ScanError::new_str(
mark, span.start,
"did not find expected <stream-start>", "did not find expected <stream-start>",
)); ));
} }
recv.on_event(ev, mark); recv.on_event(ev, span);
} }
if self.scanner.stream_ended() { if self.scanner.stream_ended() {
// XXX has parsed? // XXX has parsed?
recv.on_event(Event::StreamEnd, self.scanner.mark()); recv.on_event(Event::StreamEnd, Span::empty(self.scanner.mark()));
return Ok(()); return Ok(());
} }
loop { loop {
let (ev, mark) = self.next_event_impl()?; let (ev, span) = self.next_event_impl()?;
if ev == Event::StreamEnd { if ev == Event::StreamEnd {
recv.on_event(ev, mark); recv.on_event(ev, span);
return Ok(()); return Ok(());
} }
// clear anchors before a new document // clear anchors before a new document
self.anchors.clear(); self.anchors.clear();
self.load_document(ev, mark, recv)?; self.load_document(ev, span, recv)?;
if !multi { if !multi {
break; break;
} }
@ -434,22 +435,22 @@ impl<T: Input> Parser<T> {
Ok(()) Ok(())
} }
fn load_document<R: MarkedEventReceiver>( fn load_document<R: SpannedEventReceiver>(
&mut self, &mut self,
first_ev: Event, first_ev: Event,
mark: Marker, span: Span,
recv: &mut R, recv: &mut R,
) -> Result<(), ScanError> { ) -> Result<(), ScanError> {
if first_ev != Event::DocumentStart { if first_ev != Event::DocumentStart {
return Err(ScanError::new_str( return Err(ScanError::new_str(
mark, span.start,
"did not find expected <document-start>", "did not find expected <document-start>",
)); ));
} }
recv.on_event(first_ev, mark); recv.on_event(first_ev, span);
let (ev, mark) = self.next_event_impl()?; let (ev, span) = self.next_event_impl()?;
self.load_node(ev, mark, recv)?; self.load_node(ev, span, recv)?;
// DOCUMENT-END is expected. // DOCUMENT-END is expected.
let (ev, mark) = self.next_event_impl()?; let (ev, mark) = self.next_event_impl()?;
@ -459,23 +460,23 @@ impl<T: Input> Parser<T> {
Ok(()) Ok(())
} }
fn load_node<R: MarkedEventReceiver>( fn load_node<R: SpannedEventReceiver>(
&mut self, &mut self,
first_ev: Event, first_ev: Event,
mark: Marker, span: Span,
recv: &mut R, recv: &mut R,
) -> Result<(), ScanError> { ) -> Result<(), ScanError> {
match first_ev { match first_ev {
Event::Alias(..) | Event::Scalar(..) => { Event::Alias(..) | Event::Scalar(..) => {
recv.on_event(first_ev, mark); recv.on_event(first_ev, span);
Ok(()) Ok(())
} }
Event::SequenceStart(..) => { Event::SequenceStart(..) => {
recv.on_event(first_ev, mark); recv.on_event(first_ev, span);
self.load_sequence(recv) self.load_sequence(recv)
} }
Event::MappingStart(..) => { Event::MappingStart(..) => {
recv.on_event(first_ev, mark); recv.on_event(first_ev, span);
self.load_mapping(recv) self.load_mapping(recv)
} }
_ => { _ => {
@ -485,7 +486,7 @@ impl<T: Input> Parser<T> {
} }
} }
fn load_mapping<R: MarkedEventReceiver>(&mut self, recv: &mut R) -> Result<(), ScanError> { fn load_mapping<R: SpannedEventReceiver>(&mut self, recv: &mut R) -> Result<(), ScanError> {
let (mut key_ev, mut key_mark) = self.next_event_impl()?; let (mut key_ev, mut key_mark) = self.next_event_impl()?;
while key_ev != Event::MappingEnd { while key_ev != Event::MappingEnd {
// key // key
@ -504,7 +505,7 @@ impl<T: Input> Parser<T> {
Ok(()) Ok(())
} }
fn load_sequence<R: MarkedEventReceiver>(&mut self, recv: &mut R) -> Result<(), ScanError> { fn load_sequence<R: SpannedEventReceiver>(&mut self, recv: &mut R) -> Result<(), ScanError> {
let (mut ev, mut mark) = self.next_event_impl()?; let (mut ev, mut mark) = self.next_event_impl()?;
while ev != Event::SequenceEnd { while ev != Event::SequenceEnd {
self.load_node(ev, mark, recv)?; self.load_node(ev, mark, recv)?;
@ -562,13 +563,13 @@ impl<T: Input> Parser<T> {
fn stream_start(&mut self) -> ParseResult { fn stream_start(&mut self) -> ParseResult {
match *self.peek_token()? { match *self.peek_token()? {
Token(mark, TokenType::StreamStart(_)) => { Token(span, TokenType::StreamStart(_)) => {
self.state = State::ImplicitDocumentStart; self.state = State::ImplicitDocumentStart;
self.skip(); self.skip();
Ok((Event::StreamStart, mark)) Ok((Event::StreamStart, span))
} }
Token(mark, _) => Err(ScanError::new_str( Token(span, _) => Err(ScanError::new_str(
mark, span.start,
"did not find expected <stream-start>", "did not find expected <stream-start>",
)), )),
} }
@ -580,10 +581,10 @@ impl<T: Input> Parser<T> {
} }
match *self.peek_token()? { match *self.peek_token()? {
Token(mark, TokenType::StreamEnd) => { Token(span, TokenType::StreamEnd) => {
self.state = State::End; self.state = State::End;
self.skip(); self.skip();
Ok((Event::StreamEnd, mark)) Ok((Event::StreamEnd, span))
} }
Token( Token(
_, _,
@ -594,11 +595,11 @@ impl<T: Input> Parser<T> {
// explicit document // explicit document
self.explicit_document_start() self.explicit_document_start()
} }
Token(mark, _) if implicit => { Token(span, _) if implicit => {
self.parser_process_directives()?; self.parser_process_directives()?;
self.push_state(State::DocumentEnd); self.push_state(State::DocumentEnd);
self.state = State::BlockNode; self.state = State::BlockNode;
Ok((Event::DocumentStart, mark)) Ok((Event::DocumentStart, span))
} }
_ => { _ => {
// explicit document // explicit document
@ -612,20 +613,23 @@ impl<T: Input> Parser<T> {
loop { loop {
let mut tags = HashMap::new(); let mut tags = HashMap::new();
match self.peek_token()? { match self.peek_token()? {
Token(mark, TokenType::VersionDirective(_, _)) => { Token(span, TokenType::VersionDirective(_, _)) => {
// XXX parsing with warning according to spec // XXX parsing with warning according to spec
//if major != 1 || minor > 2 { //if major != 1 || minor > 2 {
// return Err(ScanError::new_str(tok.0, // return Err(ScanError::new_str(tok.0,
// "found incompatible YAML document")); // "found incompatible YAML document"));
//} //}
if version_directive_received { if version_directive_received {
return Err(ScanError::new_str(*mark, "duplicate version directive")); return Err(ScanError::new_str(
span.start,
"duplicate version directive",
));
} }
version_directive_received = true; version_directive_received = true;
} }
Token(mark, TokenType::TagDirective(handle, prefix)) => { Token(mark, TokenType::TagDirective(handle, prefix)) => {
if tags.contains_key(handle) { if tags.contains_key(handle) {
return Err(ScanError::new_str(*mark, "the TAG directive must only be given at most once per handle in the same document")); return Err(ScanError::new_str(mark.start, "the TAG directive must only be given at most once per handle in the same document"));
} }
tags.insert(handle.to_string(), prefix.to_string()); tags.insert(handle.to_string(), prefix.to_string());
} }
@ -646,8 +650,8 @@ impl<T: Input> Parser<T> {
self.skip(); self.skip();
Ok((Event::DocumentStart, mark)) Ok((Event::DocumentStart, mark))
} }
Token(mark, _) => Err(ScanError::new_str( Token(span, _) => Err(ScanError::new_str(
mark, span.start,
"did not find expected <document start>", "did not find expected <document start>",
)), )),
} }
@ -673,13 +677,13 @@ impl<T: Input> Parser<T> {
fn document_end(&mut self) -> ParseResult { fn document_end(&mut self) -> ParseResult {
let mut explicit_end = false; let mut explicit_end = false;
let marker: Marker = match *self.peek_token()? { let span: Span = match *self.peek_token()? {
Token(mark, TokenType::DocumentEnd) => { Token(span, TokenType::DocumentEnd) => {
explicit_end = true; explicit_end = true;
self.skip(); self.skip();
mark span
} }
Token(mark, _) => mark, Token(span, _) => span,
}; };
if !self.keep_tags { if !self.keep_tags {
@ -688,21 +692,21 @@ impl<T: Input> Parser<T> {
if explicit_end { if explicit_end {
self.state = State::ImplicitDocumentStart; self.state = State::ImplicitDocumentStart;
} else { } else {
if let Token(mark, TokenType::VersionDirective(..) | TokenType::TagDirective(..)) = if let Token(span, TokenType::VersionDirective(..) | TokenType::TagDirective(..)) =
*self.peek_token()? *self.peek_token()?
{ {
return Err(ScanError::new_str( return Err(ScanError::new_str(
mark, span.start,
"missing explicit document end marker before directive", "missing explicit document end marker before directive",
)); ));
} }
self.state = State::DocumentStart; self.state = State::DocumentStart;
} }
Ok((Event::DocumentEnd, marker)) Ok((Event::DocumentEnd, span))
} }
fn register_anchor(&mut self, name: String, _: &Marker) -> usize { fn register_anchor(&mut self, name: String, _: &Span) -> usize {
// anchors can be overridden/reused // anchors can be overridden/reused
// if self.anchors.contains_key(name) { // if self.anchors.contains_key(name) {
// return Err(ScanError::new_str(*mark, // return Err(ScanError::new_str(*mark,
@ -720,25 +724,25 @@ impl<T: Input> Parser<T> {
match *self.peek_token()? { match *self.peek_token()? {
Token(_, TokenType::Alias(_)) => { Token(_, TokenType::Alias(_)) => {
self.pop_state(); self.pop_state();
if let Token(mark, TokenType::Alias(name)) = self.fetch_token() { if let Token(span, TokenType::Alias(name)) = self.fetch_token() {
match self.anchors.get(&name) { match self.anchors.get(&name) {
None => { None => {
return Err(ScanError::new_str( return Err(ScanError::new_str(
mark, span.start,
"while parsing node, found unknown anchor", "while parsing node, found unknown anchor",
)) ))
} }
Some(id) => return Ok((Event::Alias(*id), mark)), Some(id) => return Ok((Event::Alias(*id), span)),
} }
} }
unreachable!() unreachable!()
} }
Token(_, TokenType::Anchor(_)) => { Token(_, TokenType::Anchor(_)) => {
if let Token(mark, TokenType::Anchor(name)) = self.fetch_token() { if let Token(span, TokenType::Anchor(name)) = self.fetch_token() {
anchor_id = self.register_anchor(name, &mark); anchor_id = self.register_anchor(name, &span);
if let TokenType::Tag(..) = self.peek_token()?.1 { if let TokenType::Tag(..) = self.peek_token()?.1 {
if let TokenType::Tag(handle, suffix) = self.fetch_token().1 { if let TokenType::Tag(handle, suffix) = self.fetch_token().1 {
tag = Some(self.resolve_tag(mark, &handle, suffix)?); tag = Some(self.resolve_tag(span, &handle, suffix)?);
} else { } else {
unreachable!() unreachable!()
} }
@ -797,8 +801,8 @@ impl<T: Input> Parser<T> {
self.pop_state(); self.pop_state();
Ok((Event::empty_scalar_with_anchor(anchor_id, tag), mark)) Ok((Event::empty_scalar_with_anchor(anchor_id, tag), mark))
} }
Token(mark, _) => Err(ScanError::new_str( Token(span, _) => Err(ScanError::new_str(
mark, span.start,
"while parsing a node, did not find expected node content", "while parsing a node, did not find expected node content",
)), )),
} }
@ -835,8 +839,8 @@ impl<T: Input> Parser<T> {
self.skip(); self.skip();
Ok((Event::MappingEnd, mark)) Ok((Event::MappingEnd, mark))
} }
Token(mark, _) => Err(ScanError::new_str( Token(span, _) => Err(ScanError::new_str(
mark, span.start,
"while parsing a block mapping, did not find expected key", "while parsing a block mapping, did not find expected key",
)), )),
} }
@ -870,15 +874,15 @@ impl<T: Input> Parser<T> {
let _ = self.peek_token()?; let _ = self.peek_token()?;
self.skip(); self.skip();
} }
let marker: Marker = { let span: Span = {
match *self.peek_token()? { match *self.peek_token()? {
Token(mark, TokenType::FlowMappingEnd) => mark, Token(mark, TokenType::FlowMappingEnd) => mark,
Token(mark, _) => { Token(mark, _) => {
if !first { if !first {
match *self.peek_token()? { match *self.peek_token()? {
Token(_, TokenType::FlowEntry) => self.skip(), Token(_, TokenType::FlowEntry) => self.skip(),
Token(mark, _) => return Err(ScanError::new_str( Token(span, _) => return Err(ScanError::new_str(
mark, span.start,
"while parsing a flow mapping, did not find expected ',' or '}'", "while parsing a flow mapping, did not find expected ',' or '}'",
)), )),
} }
@ -916,18 +920,18 @@ impl<T: Input> Parser<T> {
self.pop_state(); self.pop_state();
self.skip(); self.skip();
Ok((Event::MappingEnd, marker)) Ok((Event::MappingEnd, span))
} }
fn flow_mapping_value(&mut self, empty: bool) -> ParseResult { fn flow_mapping_value(&mut self, empty: bool) -> ParseResult {
let mark: Marker = { let span: Span = {
if empty { if empty {
let Token(mark, _) = *self.peek_token()?; let Token(mark, _) = *self.peek_token()?;
self.state = State::FlowMappingKey; self.state = State::FlowMappingKey;
return Ok((Event::empty_scalar(), mark)); return Ok((Event::empty_scalar(), mark));
} }
match *self.peek_token()? { match *self.peek_token()? {
Token(marker, TokenType::Value) => { Token(span, TokenType::Value) => {
self.skip(); self.skip();
match self.peek_token()?.1 { match self.peek_token()?.1 {
TokenType::FlowEntry | TokenType::FlowMappingEnd => {} TokenType::FlowEntry | TokenType::FlowMappingEnd => {}
@ -936,14 +940,14 @@ impl<T: Input> Parser<T> {
return self.parse_node(false, false); return self.parse_node(false, false);
} }
} }
marker span
} }
Token(marker, _) => marker, Token(marker, _) => marker,
} }
}; };
self.state = State::FlowMappingKey; self.state = State::FlowMappingKey;
Ok((Event::empty_scalar(), mark)) Ok((Event::empty_scalar(), span))
} }
fn flow_sequence_entry(&mut self, first: bool) -> ParseResult { fn flow_sequence_entry(&mut self, first: bool) -> ParseResult {
@ -962,9 +966,9 @@ impl<T: Input> Parser<T> {
Token(_, TokenType::FlowEntry) if !first => { Token(_, TokenType::FlowEntry) if !first => {
self.skip(); self.skip();
} }
Token(mark, _) if !first => { Token(span, _) if !first => {
return Err(ScanError::new_str( return Err(ScanError::new_str(
mark, span.start,
"while parsing a flow sequence, expected ',' or ']'", "while parsing a flow sequence, expected ',' or ']'",
)); ));
} }
@ -1035,8 +1039,8 @@ impl<T: Input> Parser<T> {
self.parse_node(true, false) self.parse_node(true, false)
} }
} }
Token(mark, _) => Err(ScanError::new_str( Token(span, _) => Err(ScanError::new_str(
mark, span.start,
"while parsing a block collection, did not find expected '-' indicator", "while parsing a block collection, did not find expected '-' indicator",
)), )),
} }
@ -1080,11 +1084,11 @@ impl<T: Input> Parser<T> {
#[allow(clippy::unnecessary_wraps)] #[allow(clippy::unnecessary_wraps)]
fn flow_sequence_entry_mapping_end(&mut self) -> ParseResult { fn flow_sequence_entry_mapping_end(&mut self) -> ParseResult {
self.state = State::FlowSequenceEntry; self.state = State::FlowSequenceEntry;
Ok((Event::MappingEnd, self.scanner.mark())) Ok((Event::MappingEnd, Span::empty(self.scanner.mark())))
} }
/// Resolve a tag from the handle and the suffix. /// Resolve a tag from the handle and the suffix.
fn resolve_tag(&self, mark: Marker, handle: &str, suffix: String) -> Result<Tag, ScanError> { fn resolve_tag(&self, span: Span, handle: &str, suffix: String) -> Result<Tag, ScanError> {
if handle == "!!" { if handle == "!!" {
// "!!" is a shorthand for "tag:yaml.org,2002:". However, that default can be // "!!" is a shorthand for "tag:yaml.org,2002:". However, that default can be
// overridden. // overridden.
@ -1121,7 +1125,7 @@ impl<T: Input> Parser<T> {
// If the handle is of the form "!foo!", this cannot be a local handle and we need // If the handle is of the form "!foo!", this cannot be a local handle and we need
// to error. // to error.
if handle.len() >= 2 && handle.starts_with('!') && handle.ends_with('!') { if handle.len() >= 2 && handle.starts_with('!') && handle.ends_with('!') {
Err(ScanError::new_str(mark, "the handle wasn't declared")) Err(ScanError::new_str(span.start, "the handle wasn't declared"))
} else { } else {
Ok(Tag { Ok(Tag {
handle: handle.to_string(), handle: handle.to_string(),
@ -1134,7 +1138,7 @@ impl<T: Input> Parser<T> {
} }
impl<T: Input> Iterator for Parser<T> { impl<T: Input> Iterator for Parser<T> {
type Item = Result<(Event, Marker), ScanError>; type Item = Result<(Event, Span), ScanError>;
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
self.next_event() self.next_event()

View file

@ -79,6 +79,37 @@ impl Marker {
} }
} }
/// A range of locations in a Yaml document.
#[derive(Clone, Copy, PartialEq, Debug, Eq, Default)]
pub struct Span {
/// The start (inclusive) of the range.
pub start: Marker,
/// The end (exclusive) of the range.
pub end: Marker,
}
impl Span {
/// Create a new [`Span`] for the given range.
#[must_use]
pub fn new(start: Marker, end: Marker) -> Span {
Span { start, end }
}
/// Create a empty [`Span`] at a given location.
///
/// An empty span doesn't contain any characters, but its position may still be meaningful.
/// For example, for an indented sequence [`SequenceEnd`] has a location but an empty span.
///
/// [`SequenceEnd`]: crate::Event::SequenceEnd
#[must_use]
pub fn empty(mark: Marker) -> Span {
Span {
start: mark,
end: mark,
}
}
}
/// An error that occurred while scanning. /// An error that occurred while scanning.
#[derive(Clone, PartialEq, Debug, Eq)] #[derive(Clone, PartialEq, Debug, Eq)]
pub struct ScanError { pub struct ScanError {
@ -204,7 +235,7 @@ pub enum TokenType {
/// A scanner token. /// A scanner token.
#[derive(Clone, PartialEq, Debug, Eq)] #[derive(Clone, PartialEq, Debug, Eq)]
pub struct Token(pub Marker, pub TokenType); pub struct Token(pub Span, pub TokenType);
/// A scalar that was parsed and may correspond to a simple key. /// A scalar that was parsed and may correspond to a simple key.
/// ///
@ -874,8 +905,10 @@ impl<T: Input> Scanner<T> {
self.indent = -1; self.indent = -1;
self.stream_start_produced = true; self.stream_start_produced = true;
self.allow_simple_key(); self.allow_simple_key();
self.tokens self.tokens.push_back(Token(
.push_back(Token(mark, TokenType::StreamStart(TEncoding::Utf8))); Span::empty(mark),
TokenType::StreamStart(TEncoding::Utf8),
));
self.simple_keys.push(SimpleKey::new(Marker::new(0, 0, 0))); self.simple_keys.push(SimpleKey::new(Marker::new(0, 0, 0)));
} }
@ -900,7 +933,7 @@ impl<T: Input> Scanner<T> {
self.disallow_simple_key(); self.disallow_simple_key();
self.tokens self.tokens
.push_back(Token(self.mark, TokenType::StreamEnd)); .push_back(Token(Span::empty(self.mark), TokenType::StreamEnd));
Ok(()) Ok(())
} }
@ -932,7 +965,7 @@ impl<T: Input> Scanner<T> {
self.mark.col += line_len; self.mark.col += line_len;
// XXX return an empty TagDirective token // XXX return an empty TagDirective token
Token( Token(
start_mark, Span::new(start_mark, self.mark),
TokenType::TagDirective(String::new(), String::new()), TokenType::TagDirective(String::new(), String::new()),
) )
// return Err(ScanError::new_str(start_mark, // return Err(ScanError::new_str(start_mark,
@ -971,7 +1004,10 @@ impl<T: Input> Scanner<T> {
let minor = self.scan_version_directive_number(mark)?; let minor = self.scan_version_directive_number(mark)?;
Ok(Token(*mark, TokenType::VersionDirective(major, minor))) Ok(Token(
Span::new(*mark, self.mark),
TokenType::VersionDirective(major, minor),
))
} }
fn scan_directive_name(&mut self) -> Result<String, ScanError> { fn scan_directive_name(&mut self) -> Result<String, ScanError> {
@ -1040,7 +1076,10 @@ impl<T: Input> Scanner<T> {
self.input.lookahead(1); self.input.lookahead(1);
if self.input.next_is_blank_or_breakz() { if self.input.next_is_blank_or_breakz() {
Ok(Token(*mark, TokenType::TagDirective(handle, prefix))) Ok(Token(
Span::new(*mark, self.mark),
TokenType::TagDirective(handle, prefix),
))
} else { } else {
Err(ScanError::new_str( Err(ScanError::new_str(
*mark, *mark,
@ -1093,7 +1132,10 @@ impl<T: Input> Scanner<T> {
|| (self.flow_level > 0 && self.input.next_is_flow()) || (self.flow_level > 0 && self.input.next_is_flow())
{ {
// XXX: ex 7.2, an empty scalar can follow a secondary tag // XXX: ex 7.2, an empty scalar can follow a secondary tag
Ok(Token(start_mark, TokenType::Tag(handle, suffix))) Ok(Token(
Span::new(start_mark, self.mark),
TokenType::Tag(handle, suffix),
))
} else { } else {
Err(ScanError::new_str( Err(ScanError::new_str(
start_mark, start_mark,
@ -1323,11 +1365,12 @@ impl<T: Input> Scanner<T> {
return Err(ScanError::new_str(start_mark, "while scanning an anchor or alias, did not find expected alphabetic or numeric character")); return Err(ScanError::new_str(start_mark, "while scanning an anchor or alias, did not find expected alphabetic or numeric character"));
} }
if alias { let tok = if alias {
Ok(Token(start_mark, TokenType::Alias(string))) TokenType::Alias(string)
} else { } else {
Ok(Token(start_mark, TokenType::Anchor(string))) TokenType::Anchor(string)
} };
Ok(Token(Span::new(start_mark, self.mark), tok))
} }
fn fetch_flow_collection_start(&mut self, tok: TokenType) -> ScanResult { fn fetch_flow_collection_start(&mut self, tok: TokenType) -> ScanResult {
@ -1351,7 +1394,8 @@ impl<T: Input> Scanner<T> {
self.skip_ws_to_eol(SkipTabs::Yes)?; self.skip_ws_to_eol(SkipTabs::Yes)?;
self.tokens.push_back(Token(start_mark, tok)); self.tokens
.push_back(Token(Span::new(start_mark, self.mark), tok));
Ok(()) Ok(())
} }
@ -1380,7 +1424,8 @@ impl<T: Input> Scanner<T> {
self.adjacent_value_allowed_at = self.mark.index; self.adjacent_value_allowed_at = self.mark.index;
} }
self.tokens.push_back(Token(start_mark, tok)); self.tokens
.push_back(Token(Span::new(start_mark, self.mark), tok));
Ok(()) Ok(())
} }
@ -1395,8 +1440,10 @@ impl<T: Input> Scanner<T> {
self.skip_non_blank(); self.skip_non_blank();
self.skip_ws_to_eol(SkipTabs::Yes)?; self.skip_ws_to_eol(SkipTabs::Yes)?;
self.tokens self.tokens.push_back(Token(
.push_back(Token(start_mark, TokenType::FlowEntry)); Span::new(start_mark, self.mark),
TokenType::FlowEntry,
));
Ok(()) Ok(())
} }
@ -1438,9 +1485,12 @@ impl<T: Input> Scanner<T> {
} }
// ???, fixes test G9HC. // ???, fixes test G9HC.
if let Some(Token(mark, TokenType::Anchor(..) | TokenType::Tag(..))) = self.tokens.back() { if let Some(Token(span, TokenType::Anchor(..) | TokenType::Tag(..))) = self.tokens.back() {
if self.mark.col == 0 && mark.col == 0 && self.indent > -1 { if self.mark.col == 0 && span.start.col == 0 && self.indent > -1 {
return Err(ScanError::new_str(*mark, "invalid indentation for anchor")); return Err(ScanError::new_str(
span.start,
"invalid indentation for anchor",
));
} }
} }
@ -1470,7 +1520,7 @@ impl<T: Input> Scanner<T> {
self.allow_simple_key(); self.allow_simple_key();
self.tokens self.tokens
.push_back(Token(self.mark, TokenType::BlockEntry)); .push_back(Token(Span::empty(self.mark), TokenType::BlockEntry));
Ok(()) Ok(())
} }
@ -1484,7 +1534,7 @@ impl<T: Input> Scanner<T> {
self.skip_n_non_blank(3); self.skip_n_non_blank(3);
self.tokens.push_back(Token(mark, t)); self.tokens.push_back(Token(Span::new(mark, self.mark), t));
Ok(()) Ok(())
} }
@ -1616,7 +1666,10 @@ impl<T: Input> Scanner<T> {
// Otherwise, the newline after chomping is ignored. // Otherwise, the newline after chomping is ignored.
Chomping::Keep => trailing_breaks, Chomping::Keep => trailing_breaks,
}; };
return Ok(Token(start_mark, TokenType::Scalar(style, contents))); return Ok(Token(
Span::new(start_mark, self.mark),
TokenType::Scalar(style, contents),
));
} }
if self.mark.col < indent && (self.mark.col as isize) > self.indent { if self.mark.col < indent && (self.mark.col as isize) > self.indent {
@ -1682,7 +1735,10 @@ impl<T: Input> Scanner<T> {
string.push_str(&trailing_breaks); string.push_str(&trailing_breaks);
} }
Ok(Token(start_mark, TokenType::Scalar(style, string))) Ok(Token(
Span::new(start_mark, self.mark),
TokenType::Scalar(style, string),
))
} }
/// Retrieve the contents of the line, parsing it as a block scalar. /// Retrieve the contents of the line, parsing it as a block scalar.
@ -1963,7 +2019,10 @@ impl<T: Input> Scanner<T> {
} else { } else {
TScalarStyle::DoubleQuoted TScalarStyle::DoubleQuoted
}; };
Ok(Token(start_mark, TokenType::Scalar(style, string))) Ok(Token(
Span::new(start_mark, self.mark),
TokenType::Scalar(style, string),
))
} }
/// Consume successive non-whitespace characters from a flow scalar. /// Consume successive non-whitespace characters from a flow scalar.
@ -2120,6 +2179,7 @@ impl<T: Input> Scanner<T> {
self.buf_whitespaces.clear(); self.buf_whitespaces.clear();
self.buf_leading_break.clear(); self.buf_leading_break.clear();
self.buf_trailing_breaks.clear(); self.buf_trailing_breaks.clear();
let mut end_mark = self.mark;
loop { loop {
self.input.lookahead(4); self.input.lookahead(4);
@ -2182,6 +2242,7 @@ impl<T: Input> Scanner<T> {
self.skip_non_blank(); self.skip_non_blank();
} }
} }
end_mark = self.mark;
} }
// We may reach the end of a plain scalar if: // We may reach the end of a plain scalar if:
@ -2238,7 +2299,7 @@ impl<T: Input> Scanner<T> {
} }
Ok(Token( Ok(Token(
start_mark, Span::new(start_mark, end_mark),
TokenType::Scalar(TScalarStyle::Plain, string), TokenType::Scalar(TScalarStyle::Plain, string),
)) ))
} }
@ -2280,7 +2341,8 @@ impl<T: Input> Scanner<T> {
"tabs disallowed in this context", "tabs disallowed in this context",
)); ));
} }
self.tokens.push_back(Token(start_mark, TokenType::Key)); self.tokens
.push_back(Token(Span::new(start_mark, self.mark), TokenType::Key));
Ok(()) Ok(())
} }
@ -2338,7 +2400,7 @@ impl<T: Input> Scanner<T> {
if sk.possible { if sk.possible {
// insert simple key // insert simple key
let tok = Token(sk.mark, TokenType::Key); let tok = Token(Span::empty(sk.mark), TokenType::Key);
self.insert_token(sk.token_number - self.tokens_parsed, tok); self.insert_token(sk.token_number - self.tokens_parsed, tok);
if is_implicit_flow_mapping { if is_implicit_flow_mapping {
if sk.mark.line < start_mark.line { if sk.mark.line < start_mark.line {
@ -2349,7 +2411,7 @@ impl<T: Input> Scanner<T> {
} }
self.insert_token( self.insert_token(
sk.token_number - self.tokens_parsed, sk.token_number - self.tokens_parsed,
Token(self.mark, TokenType::FlowMappingStart), Token(Span::empty(self.mark), TokenType::FlowMappingStart),
); );
} }
@ -2367,7 +2429,7 @@ impl<T: Input> Scanner<T> {
} else { } else {
if is_implicit_flow_mapping { if is_implicit_flow_mapping {
self.tokens self.tokens
.push_back(Token(self.mark, TokenType::FlowMappingStart)); .push_back(Token(Span::empty(self.mark), TokenType::FlowMappingStart));
} }
// The ':' indicator follows a complex key. // The ':' indicator follows a complex key.
if self.flow_level == 0 { if self.flow_level == 0 {
@ -2393,7 +2455,8 @@ impl<T: Input> Scanner<T> {
self.disallow_simple_key(); self.disallow_simple_key();
} }
} }
self.tokens.push_back(Token(start_mark, TokenType::Value)); self.tokens
.push_back(Token(Span::empty(start_mark), TokenType::Value));
Ok(()) Ok(())
} }
@ -2428,8 +2491,8 @@ impl<T: Input> Scanner<T> {
self.indent = col as isize; self.indent = col as isize;
let tokens_parsed = self.tokens_parsed; let tokens_parsed = self.tokens_parsed;
match number { match number {
Some(n) => self.insert_token(n - tokens_parsed, Token(mark, tok)), Some(n) => self.insert_token(n - tokens_parsed, Token(Span::empty(mark), tok)),
None => self.tokens.push_back(Token(mark, tok)), None => self.tokens.push_back(Token(Span::empty(mark), tok)),
} }
} }
} }
@ -2447,7 +2510,8 @@ impl<T: Input> Scanner<T> {
let indent = self.indents.pop().unwrap(); let indent = self.indents.pop().unwrap();
self.indent = indent.indent; self.indent = indent.indent;
if indent.needs_block_end { if indent.needs_block_end {
self.tokens.push_back(Token(self.mark, TokenType::BlockEnd)); self.tokens
.push_back(Token(Span::empty(self.mark), TokenType::BlockEnd));
} }
} }
} }
@ -2520,7 +2584,7 @@ impl<T: Input> Scanner<T> {
self.flow_mapping_started = false; self.flow_mapping_started = false;
*implicit_mapping = ImplicitMappingState::Possible; *implicit_mapping = ImplicitMappingState::Possible;
self.tokens self.tokens
.push_back(Token(mark, TokenType::FlowMappingEnd)); .push_back(Token(Span::empty(mark), TokenType::FlowMappingEnd));
} }
} }
} }

136
parser/tests/span.rs Normal file
View file

@ -0,0 +1,136 @@
#![allow(clippy::bool_assert_comparison)]
#![allow(clippy::float_cmp)]
use saphyr_parser::{Event, Parser, ScanError};
/// Run the parser through the string, returning all the scalars, and collecting their spans to strings.
fn run_parser_and_deref_scalar_spans(input: &str) -> Result<Vec<(String, String)>, ScanError> {
let mut events = vec![];
for x in Parser::new_from_str(input) {
let x = x?;
if let Event::Scalar(s, ..) = x.0 {
let start = x.1.start.index();
let end = x.1.end.index();
let input_s = input.chars().skip(start).take(end - start).collect();
events.push((s, input_s));
}
}
Ok(events)
}
/// Run the parser through the string, returning all the scalars, and collecting their spans to strings.
fn run_parser_and_deref_seq_spans(input: &str) -> Result<Vec<String>, ScanError> {
let mut events = vec![];
let mut start_stack = vec![];
for x in Parser::new_from_str(input) {
let x = x?;
match x.0 {
Event::SequenceStart(_, _) => start_stack.push(x.1.start.index()),
Event::SequenceEnd => {
let start = start_stack.pop().unwrap();
let end = x.1.end.index();
let input_s = input.chars().skip(start).take(end - start).collect();
events.push(input_s);
}
_ => {}
}
}
Ok(events)
}
fn deref_pairs(pairs: &[(String, String)]) -> Vec<(&str, &str)> {
pairs
.iter()
.map(|(a, b)| (a.as_str(), b.as_str()))
.collect()
}
#[test]
fn test_plain() {
assert_eq!(
deref_pairs(&run_parser_and_deref_scalar_spans("foo: bar").unwrap()),
[("foo", "foo"), ("bar", "bar"),]
);
assert_eq!(
deref_pairs(&run_parser_and_deref_scalar_spans("foo: bar ").unwrap()),
[("foo", "foo"), ("bar", "bar"),]
);
assert_eq!(
deref_pairs(&run_parser_and_deref_scalar_spans("foo : \t bar\t ").unwrap()),
[("foo", "foo"), ("bar", "bar"),]
);
assert_eq!(
deref_pairs(&run_parser_and_deref_scalar_spans("foo : \n - bar\n - baz\n ").unwrap()),
[("foo", "foo"), ("bar", "bar"), ("baz", "baz")]
);
}
#[test]
fn test_plain_utf8() {
assert_eq!(
deref_pairs(&run_parser_and_deref_scalar_spans("a: 你好").unwrap()),
[("a", "a"), ("你好", "你好")]
);
}
#[test]
fn test_quoted() {
assert_eq!(
deref_pairs(&run_parser_and_deref_scalar_spans(r#"foo: "bar""#).unwrap()),
[("foo", "foo"), ("bar", r#""bar""#),]
);
assert_eq!(
deref_pairs(&run_parser_and_deref_scalar_spans(r#"foo: 'bar'"#).unwrap()),
[("foo", "foo"), ("bar", r#"'bar'"#),]
);
assert_eq!(
deref_pairs(&run_parser_and_deref_scalar_spans(r#"foo: "bar ""#).unwrap()),
[("foo", "foo"), ("bar ", r#""bar ""#),]
);
}
#[test]
fn test_literal() {
assert_eq!(
deref_pairs(&run_parser_and_deref_scalar_spans("foo: |\n bar").unwrap()),
[("foo", "foo"), ("bar\n", "bar"),]
);
assert_eq!(
deref_pairs(&run_parser_and_deref_scalar_spans("foo: |\n bar\n more").unwrap()),
[("foo", "foo"), ("bar\nmore\n", "bar\n more"),]
);
}
#[test]
fn test_block() {
assert_eq!(
deref_pairs(&run_parser_and_deref_scalar_spans("foo: >\n bar").unwrap()),
[("foo", "foo"), ("bar\n", "bar"),]
);
assert_eq!(
deref_pairs(&run_parser_and_deref_scalar_spans("foo: >\n bar\n more").unwrap()),
[("foo", "foo"), ("bar more\n", "bar\n more"),]
);
}
#[test]
fn test_seq() {
assert_eq!(
run_parser_and_deref_seq_spans("[a, b]").unwrap(),
["[a, b]"]
);
assert_eq!(
run_parser_and_deref_seq_spans("- a\n- b").unwrap(),
["- a\n- b"]
);
assert_eq!(
run_parser_and_deref_seq_spans("foo:\n - a\n - b").unwrap(),
["- a\n - b"]
);
assert_eq!(
run_parser_and_deref_seq_spans("foo:\n - a\n - bar:\n - b\n - c").unwrap(),
["b\n - c", "- a\n - bar:\n - b\n - c"]
);
}

View file

@ -2,21 +2,21 @@ use std::env;
use std::fs::File; use std::fs::File;
use std::io::prelude::*; use std::io::prelude::*;
use saphyr_parser::{Event, MarkedEventReceiver, Marker, Parser}; use saphyr_parser::{Event, Parser, Span, SpannedEventReceiver};
#[derive(Debug)] #[derive(Debug)]
struct EventSink { struct EventSink {
events: Vec<(Event, Marker)>, events: Vec<(Event, Span)>,
} }
impl MarkedEventReceiver for EventSink { impl SpannedEventReceiver for EventSink {
fn on_event(&mut self, ev: Event, mark: Marker) { fn on_event(&mut self, ev: Event, span: Span) {
eprintln!(" \x1B[;34m\u{21B3} {:?}\x1B[;m", &ev); eprintln!(" \x1B[;34m\u{21B3} {:?}\x1B[;m", &ev);
self.events.push((ev, mark)); self.events.push((ev, span));
} }
} }
fn str_to_events(yaml: &str) -> Vec<(Event, Marker)> { fn str_to_events(yaml: &str) -> Vec<(Event, Span)> {
let mut sink = EventSink { events: Vec::new() }; let mut sink = EventSink { events: Vec::new() };
let mut parser = Parser::new_from_str(yaml); let mut parser = Parser::new_from_str(yaml);
// Load events using our sink as the receiver. // Load events using our sink as the receiver.

View file

@ -1,15 +1,13 @@
#![allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)] #![allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)]
use saphyr_parser::{ use saphyr_parser::{Event, Parser, Span, SpannedEventReceiver};
Event, Marker, {MarkedEventReceiver, Parser},
};
use std::{env, fs::File, io::prelude::*}; use std::{env, fs::File, io::prelude::*};
/// A sink which discards any event sent. /// A sink which discards any event sent.
struct NullSink {} struct NullSink {}
impl MarkedEventReceiver for NullSink { impl SpannedEventReceiver for NullSink {
fn on_event(&mut self, _: Event, _: Marker) {} fn on_event(&mut self, _: Event, _: Span) {}
} }
/// Parse the given input, returning elapsed time in nanoseconds. /// Parse the given input, returning elapsed time in nanoseconds.

View file

@ -2,13 +2,13 @@ use std::env;
use std::fs::File; use std::fs::File;
use std::io::prelude::*; use std::io::prelude::*;
use saphyr_parser::{Event, MarkedEventReceiver, Marker, Parser}; use saphyr_parser::{Event, Parser, Span, SpannedEventReceiver};
/// A sink which discards any event sent. /// A sink which discards any event sent.
struct NullSink {} struct NullSink {}
impl MarkedEventReceiver for NullSink { impl SpannedEventReceiver for NullSink {
fn on_event(&mut self, _: Event, _: Marker) {} fn on_event(&mut self, _: Event, _: Span) {}
} }
fn main() { fn main() {