diff --git a/parser/src/lib.rs b/parser/src/lib.rs index ac27132..36da373 100644 --- a/parser/src/lib.rs +++ b/parser/src/lib.rs @@ -40,5 +40,5 @@ mod parser; mod scanner; pub use crate::input::BufferedInput; -pub use crate::parser::{Event, EventReceiver, MarkedEventReceiver, Parser, Tag}; -pub use crate::scanner::{Marker, ScanError, TScalarStyle}; +pub use crate::parser::{Event, EventReceiver, Parser, SpannedEventReceiver, Tag}; +pub use crate::scanner::{Marker, ScanError, Span, TScalarStyle}; diff --git a/parser/src/parser.rs b/parser/src/parser.rs index 76dbe67..6aae44d 100644 --- a/parser/src/parser.rs +++ b/parser/src/parser.rs @@ -6,8 +6,9 @@ use crate::{ input::{str::StrInput, Input}, - scanner::{Marker, ScanError, Scanner, TScalarStyle, Token, TokenType}, + scanner::{ScanError, Scanner, Span, TScalarStyle, Token, TokenType}, }; + use std::collections::HashMap; #[derive(Clone, Copy, PartialEq, Debug, Eq)] @@ -116,7 +117,7 @@ pub struct Parser { /// The next token from the scanner. token: Option, /// The next YAML event to emit. - current: Option<(Event, Marker)>, + current: Option<(Event, Span)>, /// Anchors that have been encountered in the YAML document. anchors: HashMap, /// Next ID available for an anchor. @@ -142,8 +143,8 @@ pub struct Parser { /// The low-level parsing API is event-based (a push parser), calling [`EventReceiver::on_event`] /// for each YAML [`Event`] that occurs. /// The [`EventReceiver`] trait only receives events. In order to receive both events and their -/// location in the source, use [`MarkedEventReceiver`]. Note that [`EventReceiver`]s implement -/// [`MarkedEventReceiver`] automatically. +/// location in the source, use [`SpannedEventReceiver`]. Note that [`EventReceiver`]s implement +/// [`SpannedEventReceiver`] automatically. /// /// # Event hierarchy /// The event stream starts with an [`Event::StreamStart`] event followed by an @@ -213,20 +214,20 @@ pub trait EventReceiver { /// Trait to be implemented for using the low-level parsing API. /// -/// Functionally similar to [`EventReceiver`], but receives a [`Marker`] as well as the event. -pub trait MarkedEventReceiver { +/// Functionally similar to [`EventReceiver`], but receives a [`Span`] as well as the event. +pub trait SpannedEventReceiver { /// Handler called for each event that occurs. - fn on_event(&mut self, ev: Event, _mark: Marker); + fn on_event(&mut self, ev: Event, span: Span); } -impl MarkedEventReceiver for R { - fn on_event(&mut self, ev: Event, _mark: Marker) { +impl SpannedEventReceiver for R { + fn on_event(&mut self, ev: Event, _span: Span) { self.on_event(ev); } } /// A convenience alias for a `Result` of a parser event. -pub type ParseResult = Result<(Event, Marker), ScanError>; +pub type ParseResult = Result<(Event, Span), ScanError>; impl<'a> Parser> { /// Create a new instance of a parser from a &str. @@ -290,7 +291,7 @@ impl Parser { /// /// # Errors /// Returns `ScanError` when loading the next event fails. - pub fn peek(&mut self) -> Option> { + pub fn peek(&mut self) -> Option> { if let Some(ref x) = self.current { Some(Ok(x)) } else { @@ -379,7 +380,7 @@ impl Parser { fn parse(&mut self) -> ParseResult { if self.state == State::End { - return Ok((Event::StreamEnd, self.scanner.mark())); + return Ok((Event::StreamEnd, Span::empty(self.scanner.mark()))); } let (ev, mark) = self.state_machine()?; Ok((ev, mark)) @@ -393,40 +394,40 @@ impl Parser { /// If `multi` is set to `true`, the parser will allow parsing of multiple YAML documents /// inside the stream. /// - /// Note that any [`EventReceiver`] is also a [`MarkedEventReceiver`], so implementing the + /// Note that any [`EventReceiver`] is also a [`SpannedEventReceiver`], so implementing the /// former is enough to call this function. /// # Errors /// Returns `ScanError` when loading fails. - pub fn load( + pub fn load( &mut self, recv: &mut R, multi: bool, ) -> Result<(), ScanError> { if !self.scanner.stream_started() { - let (ev, mark) = self.next_event_impl()?; + let (ev, span) = self.next_event_impl()?; if ev != Event::StreamStart { return Err(ScanError::new_str( - mark, + span.start, "did not find expected ", )); } - recv.on_event(ev, mark); + recv.on_event(ev, span); } if self.scanner.stream_ended() { // XXX has parsed? - recv.on_event(Event::StreamEnd, self.scanner.mark()); + recv.on_event(Event::StreamEnd, Span::empty(self.scanner.mark())); return Ok(()); } loop { - let (ev, mark) = self.next_event_impl()?; + let (ev, span) = self.next_event_impl()?; if ev == Event::StreamEnd { - recv.on_event(ev, mark); + recv.on_event(ev, span); return Ok(()); } // clear anchors before a new document self.anchors.clear(); - self.load_document(ev, mark, recv)?; + self.load_document(ev, span, recv)?; if !multi { break; } @@ -434,22 +435,22 @@ impl Parser { Ok(()) } - fn load_document( + fn load_document( &mut self, first_ev: Event, - mark: Marker, + span: Span, recv: &mut R, ) -> Result<(), ScanError> { if first_ev != Event::DocumentStart { return Err(ScanError::new_str( - mark, + span.start, "did not find expected ", )); } - recv.on_event(first_ev, mark); + recv.on_event(first_ev, span); - let (ev, mark) = self.next_event_impl()?; - self.load_node(ev, mark, recv)?; + let (ev, span) = self.next_event_impl()?; + self.load_node(ev, span, recv)?; // DOCUMENT-END is expected. let (ev, mark) = self.next_event_impl()?; @@ -459,23 +460,23 @@ impl Parser { Ok(()) } - fn load_node( + fn load_node( &mut self, first_ev: Event, - mark: Marker, + span: Span, recv: &mut R, ) -> Result<(), ScanError> { match first_ev { Event::Alias(..) | Event::Scalar(..) => { - recv.on_event(first_ev, mark); + recv.on_event(first_ev, span); Ok(()) } Event::SequenceStart(..) => { - recv.on_event(first_ev, mark); + recv.on_event(first_ev, span); self.load_sequence(recv) } Event::MappingStart(..) => { - recv.on_event(first_ev, mark); + recv.on_event(first_ev, span); self.load_mapping(recv) } _ => { @@ -485,7 +486,7 @@ impl Parser { } } - fn load_mapping(&mut self, recv: &mut R) -> Result<(), ScanError> { + fn load_mapping(&mut self, recv: &mut R) -> Result<(), ScanError> { let (mut key_ev, mut key_mark) = self.next_event_impl()?; while key_ev != Event::MappingEnd { // key @@ -504,7 +505,7 @@ impl Parser { Ok(()) } - fn load_sequence(&mut self, recv: &mut R) -> Result<(), ScanError> { + fn load_sequence(&mut self, recv: &mut R) -> Result<(), ScanError> { let (mut ev, mut mark) = self.next_event_impl()?; while ev != Event::SequenceEnd { self.load_node(ev, mark, recv)?; @@ -562,13 +563,13 @@ impl Parser { fn stream_start(&mut self) -> ParseResult { match *self.peek_token()? { - Token(mark, TokenType::StreamStart(_)) => { + Token(span, TokenType::StreamStart(_)) => { self.state = State::ImplicitDocumentStart; self.skip(); - Ok((Event::StreamStart, mark)) + Ok((Event::StreamStart, span)) } - Token(mark, _) => Err(ScanError::new_str( - mark, + Token(span, _) => Err(ScanError::new_str( + span.start, "did not find expected ", )), } @@ -580,10 +581,10 @@ impl Parser { } match *self.peek_token()? { - Token(mark, TokenType::StreamEnd) => { + Token(span, TokenType::StreamEnd) => { self.state = State::End; self.skip(); - Ok((Event::StreamEnd, mark)) + Ok((Event::StreamEnd, span)) } Token( _, @@ -594,11 +595,11 @@ impl Parser { // explicit document self.explicit_document_start() } - Token(mark, _) if implicit => { + Token(span, _) if implicit => { self.parser_process_directives()?; self.push_state(State::DocumentEnd); self.state = State::BlockNode; - Ok((Event::DocumentStart, mark)) + Ok((Event::DocumentStart, span)) } _ => { // explicit document @@ -612,20 +613,23 @@ impl Parser { loop { let mut tags = HashMap::new(); match self.peek_token()? { - Token(mark, TokenType::VersionDirective(_, _)) => { + Token(span, TokenType::VersionDirective(_, _)) => { // XXX parsing with warning according to spec //if major != 1 || minor > 2 { // return Err(ScanError::new_str(tok.0, // "found incompatible YAML document")); //} if version_directive_received { - return Err(ScanError::new_str(*mark, "duplicate version directive")); + return Err(ScanError::new_str( + span.start, + "duplicate version directive", + )); } version_directive_received = true; } Token(mark, TokenType::TagDirective(handle, prefix)) => { if tags.contains_key(handle) { - return Err(ScanError::new_str(*mark, "the TAG directive must only be given at most once per handle in the same document")); + return Err(ScanError::new_str(mark.start, "the TAG directive must only be given at most once per handle in the same document")); } tags.insert(handle.to_string(), prefix.to_string()); } @@ -646,8 +650,8 @@ impl Parser { self.skip(); Ok((Event::DocumentStart, mark)) } - Token(mark, _) => Err(ScanError::new_str( - mark, + Token(span, _) => Err(ScanError::new_str( + span.start, "did not find expected ", )), } @@ -673,13 +677,13 @@ impl Parser { fn document_end(&mut self) -> ParseResult { let mut explicit_end = false; - let marker: Marker = match *self.peek_token()? { - Token(mark, TokenType::DocumentEnd) => { + let span: Span = match *self.peek_token()? { + Token(span, TokenType::DocumentEnd) => { explicit_end = true; self.skip(); - mark + span } - Token(mark, _) => mark, + Token(span, _) => span, }; if !self.keep_tags { @@ -688,21 +692,21 @@ impl Parser { if explicit_end { self.state = State::ImplicitDocumentStart; } else { - if let Token(mark, TokenType::VersionDirective(..) | TokenType::TagDirective(..)) = + if let Token(span, TokenType::VersionDirective(..) | TokenType::TagDirective(..)) = *self.peek_token()? { return Err(ScanError::new_str( - mark, + span.start, "missing explicit document end marker before directive", )); } self.state = State::DocumentStart; } - Ok((Event::DocumentEnd, marker)) + Ok((Event::DocumentEnd, span)) } - fn register_anchor(&mut self, name: String, _: &Marker) -> usize { + fn register_anchor(&mut self, name: String, _: &Span) -> usize { // anchors can be overridden/reused // if self.anchors.contains_key(name) { // return Err(ScanError::new_str(*mark, @@ -720,25 +724,25 @@ impl Parser { match *self.peek_token()? { Token(_, TokenType::Alias(_)) => { self.pop_state(); - if let Token(mark, TokenType::Alias(name)) = self.fetch_token() { + if let Token(span, TokenType::Alias(name)) = self.fetch_token() { match self.anchors.get(&name) { None => { return Err(ScanError::new_str( - mark, + span.start, "while parsing node, found unknown anchor", )) } - Some(id) => return Ok((Event::Alias(*id), mark)), + Some(id) => return Ok((Event::Alias(*id), span)), } } unreachable!() } Token(_, TokenType::Anchor(_)) => { - if let Token(mark, TokenType::Anchor(name)) = self.fetch_token() { - anchor_id = self.register_anchor(name, &mark); + if let Token(span, TokenType::Anchor(name)) = self.fetch_token() { + anchor_id = self.register_anchor(name, &span); if let TokenType::Tag(..) = self.peek_token()?.1 { if let TokenType::Tag(handle, suffix) = self.fetch_token().1 { - tag = Some(self.resolve_tag(mark, &handle, suffix)?); + tag = Some(self.resolve_tag(span, &handle, suffix)?); } else { unreachable!() } @@ -797,8 +801,8 @@ impl Parser { self.pop_state(); Ok((Event::empty_scalar_with_anchor(anchor_id, tag), mark)) } - Token(mark, _) => Err(ScanError::new_str( - mark, + Token(span, _) => Err(ScanError::new_str( + span.start, "while parsing a node, did not find expected node content", )), } @@ -835,8 +839,8 @@ impl Parser { self.skip(); Ok((Event::MappingEnd, mark)) } - Token(mark, _) => Err(ScanError::new_str( - mark, + Token(span, _) => Err(ScanError::new_str( + span.start, "while parsing a block mapping, did not find expected key", )), } @@ -870,15 +874,15 @@ impl Parser { let _ = self.peek_token()?; self.skip(); } - let marker: Marker = { + let span: Span = { match *self.peek_token()? { Token(mark, TokenType::FlowMappingEnd) => mark, Token(mark, _) => { if !first { match *self.peek_token()? { Token(_, TokenType::FlowEntry) => self.skip(), - Token(mark, _) => return Err(ScanError::new_str( - mark, + Token(span, _) => return Err(ScanError::new_str( + span.start, "while parsing a flow mapping, did not find expected ',' or '}'", )), } @@ -916,18 +920,18 @@ impl Parser { self.pop_state(); self.skip(); - Ok((Event::MappingEnd, marker)) + Ok((Event::MappingEnd, span)) } fn flow_mapping_value(&mut self, empty: bool) -> ParseResult { - let mark: Marker = { + let span: Span = { if empty { let Token(mark, _) = *self.peek_token()?; self.state = State::FlowMappingKey; return Ok((Event::empty_scalar(), mark)); } match *self.peek_token()? { - Token(marker, TokenType::Value) => { + Token(span, TokenType::Value) => { self.skip(); match self.peek_token()?.1 { TokenType::FlowEntry | TokenType::FlowMappingEnd => {} @@ -936,14 +940,14 @@ impl Parser { return self.parse_node(false, false); } } - marker + span } Token(marker, _) => marker, } }; self.state = State::FlowMappingKey; - Ok((Event::empty_scalar(), mark)) + Ok((Event::empty_scalar(), span)) } fn flow_sequence_entry(&mut self, first: bool) -> ParseResult { @@ -962,9 +966,9 @@ impl Parser { Token(_, TokenType::FlowEntry) if !first => { self.skip(); } - Token(mark, _) if !first => { + Token(span, _) if !first => { return Err(ScanError::new_str( - mark, + span.start, "while parsing a flow sequence, expected ',' or ']'", )); } @@ -1035,8 +1039,8 @@ impl Parser { self.parse_node(true, false) } } - Token(mark, _) => Err(ScanError::new_str( - mark, + Token(span, _) => Err(ScanError::new_str( + span.start, "while parsing a block collection, did not find expected '-' indicator", )), } @@ -1080,11 +1084,11 @@ impl Parser { #[allow(clippy::unnecessary_wraps)] fn flow_sequence_entry_mapping_end(&mut self) -> ParseResult { self.state = State::FlowSequenceEntry; - Ok((Event::MappingEnd, self.scanner.mark())) + Ok((Event::MappingEnd, Span::empty(self.scanner.mark()))) } /// Resolve a tag from the handle and the suffix. - fn resolve_tag(&self, mark: Marker, handle: &str, suffix: String) -> Result { + fn resolve_tag(&self, span: Span, handle: &str, suffix: String) -> Result { if handle == "!!" { // "!!" is a shorthand for "tag:yaml.org,2002:". However, that default can be // overridden. @@ -1121,7 +1125,7 @@ impl Parser { // If the handle is of the form "!foo!", this cannot be a local handle and we need // to error. if handle.len() >= 2 && handle.starts_with('!') && handle.ends_with('!') { - Err(ScanError::new_str(mark, "the handle wasn't declared")) + Err(ScanError::new_str(span.start, "the handle wasn't declared")) } else { Ok(Tag { handle: handle.to_string(), @@ -1134,7 +1138,7 @@ impl Parser { } impl Iterator for Parser { - type Item = Result<(Event, Marker), ScanError>; + type Item = Result<(Event, Span), ScanError>; fn next(&mut self) -> Option { self.next_event() diff --git a/parser/src/scanner.rs b/parser/src/scanner.rs index a639a4b..d488005 100644 --- a/parser/src/scanner.rs +++ b/parser/src/scanner.rs @@ -79,6 +79,37 @@ impl Marker { } } +/// A range of locations in a Yaml document. +#[derive(Clone, Copy, PartialEq, Debug, Eq, Default)] +pub struct Span { + /// The start (inclusive) of the range. + pub start: Marker, + /// The end (exclusive) of the range. + pub end: Marker, +} + +impl Span { + /// Create a new [`Span`] for the given range. + #[must_use] + pub fn new(start: Marker, end: Marker) -> Span { + Span { start, end } + } + + /// Create a empty [`Span`] at a given location. + /// + /// An empty span doesn't contain any characters, but its position may still be meaningful. + /// For example, for an indented sequence [`SequenceEnd`] has a location but an empty span. + /// + /// [`SequenceEnd`]: crate::Event::SequenceEnd + #[must_use] + pub fn empty(mark: Marker) -> Span { + Span { + start: mark, + end: mark, + } + } +} + /// An error that occurred while scanning. #[derive(Clone, PartialEq, Debug, Eq)] pub struct ScanError { @@ -204,7 +235,7 @@ pub enum TokenType { /// A scanner token. #[derive(Clone, PartialEq, Debug, Eq)] -pub struct Token(pub Marker, pub TokenType); +pub struct Token(pub Span, pub TokenType); /// A scalar that was parsed and may correspond to a simple key. /// @@ -874,8 +905,10 @@ impl Scanner { self.indent = -1; self.stream_start_produced = true; self.allow_simple_key(); - self.tokens - .push_back(Token(mark, TokenType::StreamStart(TEncoding::Utf8))); + self.tokens.push_back(Token( + Span::empty(mark), + TokenType::StreamStart(TEncoding::Utf8), + )); self.simple_keys.push(SimpleKey::new(Marker::new(0, 0, 0))); } @@ -900,7 +933,7 @@ impl Scanner { self.disallow_simple_key(); self.tokens - .push_back(Token(self.mark, TokenType::StreamEnd)); + .push_back(Token(Span::empty(self.mark), TokenType::StreamEnd)); Ok(()) } @@ -932,7 +965,7 @@ impl Scanner { self.mark.col += line_len; // XXX return an empty TagDirective token Token( - start_mark, + Span::new(start_mark, self.mark), TokenType::TagDirective(String::new(), String::new()), ) // return Err(ScanError::new_str(start_mark, @@ -971,7 +1004,10 @@ impl Scanner { let minor = self.scan_version_directive_number(mark)?; - Ok(Token(*mark, TokenType::VersionDirective(major, minor))) + Ok(Token( + Span::new(*mark, self.mark), + TokenType::VersionDirective(major, minor), + )) } fn scan_directive_name(&mut self) -> Result { @@ -1040,7 +1076,10 @@ impl Scanner { self.input.lookahead(1); if self.input.next_is_blank_or_breakz() { - Ok(Token(*mark, TokenType::TagDirective(handle, prefix))) + Ok(Token( + Span::new(*mark, self.mark), + TokenType::TagDirective(handle, prefix), + )) } else { Err(ScanError::new_str( *mark, @@ -1093,7 +1132,10 @@ impl Scanner { || (self.flow_level > 0 && self.input.next_is_flow()) { // XXX: ex 7.2, an empty scalar can follow a secondary tag - Ok(Token(start_mark, TokenType::Tag(handle, suffix))) + Ok(Token( + Span::new(start_mark, self.mark), + TokenType::Tag(handle, suffix), + )) } else { Err(ScanError::new_str( start_mark, @@ -1323,11 +1365,12 @@ impl Scanner { return Err(ScanError::new_str(start_mark, "while scanning an anchor or alias, did not find expected alphabetic or numeric character")); } - if alias { - Ok(Token(start_mark, TokenType::Alias(string))) + let tok = if alias { + TokenType::Alias(string) } else { - Ok(Token(start_mark, TokenType::Anchor(string))) - } + TokenType::Anchor(string) + }; + Ok(Token(Span::new(start_mark, self.mark), tok)) } fn fetch_flow_collection_start(&mut self, tok: TokenType) -> ScanResult { @@ -1351,7 +1394,8 @@ impl Scanner { self.skip_ws_to_eol(SkipTabs::Yes)?; - self.tokens.push_back(Token(start_mark, tok)); + self.tokens + .push_back(Token(Span::new(start_mark, self.mark), tok)); Ok(()) } @@ -1380,7 +1424,8 @@ impl Scanner { self.adjacent_value_allowed_at = self.mark.index; } - self.tokens.push_back(Token(start_mark, tok)); + self.tokens + .push_back(Token(Span::new(start_mark, self.mark), tok)); Ok(()) } @@ -1395,8 +1440,10 @@ impl Scanner { self.skip_non_blank(); self.skip_ws_to_eol(SkipTabs::Yes)?; - self.tokens - .push_back(Token(start_mark, TokenType::FlowEntry)); + self.tokens.push_back(Token( + Span::new(start_mark, self.mark), + TokenType::FlowEntry, + )); Ok(()) } @@ -1438,9 +1485,12 @@ impl Scanner { } // ???, fixes test G9HC. - if let Some(Token(mark, TokenType::Anchor(..) | TokenType::Tag(..))) = self.tokens.back() { - if self.mark.col == 0 && mark.col == 0 && self.indent > -1 { - return Err(ScanError::new_str(*mark, "invalid indentation for anchor")); + if let Some(Token(span, TokenType::Anchor(..) | TokenType::Tag(..))) = self.tokens.back() { + if self.mark.col == 0 && span.start.col == 0 && self.indent > -1 { + return Err(ScanError::new_str( + span.start, + "invalid indentation for anchor", + )); } } @@ -1470,7 +1520,7 @@ impl Scanner { self.allow_simple_key(); self.tokens - .push_back(Token(self.mark, TokenType::BlockEntry)); + .push_back(Token(Span::empty(self.mark), TokenType::BlockEntry)); Ok(()) } @@ -1484,7 +1534,7 @@ impl Scanner { self.skip_n_non_blank(3); - self.tokens.push_back(Token(mark, t)); + self.tokens.push_back(Token(Span::new(mark, self.mark), t)); Ok(()) } @@ -1616,7 +1666,10 @@ impl Scanner { // Otherwise, the newline after chomping is ignored. Chomping::Keep => trailing_breaks, }; - return Ok(Token(start_mark, TokenType::Scalar(style, contents))); + return Ok(Token( + Span::new(start_mark, self.mark), + TokenType::Scalar(style, contents), + )); } if self.mark.col < indent && (self.mark.col as isize) > self.indent { @@ -1682,7 +1735,10 @@ impl Scanner { string.push_str(&trailing_breaks); } - Ok(Token(start_mark, TokenType::Scalar(style, string))) + Ok(Token( + Span::new(start_mark, self.mark), + TokenType::Scalar(style, string), + )) } /// Retrieve the contents of the line, parsing it as a block scalar. @@ -1963,7 +2019,10 @@ impl Scanner { } else { TScalarStyle::DoubleQuoted }; - Ok(Token(start_mark, TokenType::Scalar(style, string))) + Ok(Token( + Span::new(start_mark, self.mark), + TokenType::Scalar(style, string), + )) } /// Consume successive non-whitespace characters from a flow scalar. @@ -2120,6 +2179,7 @@ impl Scanner { self.buf_whitespaces.clear(); self.buf_leading_break.clear(); self.buf_trailing_breaks.clear(); + let mut end_mark = self.mark; loop { self.input.lookahead(4); @@ -2182,6 +2242,7 @@ impl Scanner { self.skip_non_blank(); } } + end_mark = self.mark; } // We may reach the end of a plain scalar if: @@ -2238,7 +2299,7 @@ impl Scanner { } Ok(Token( - start_mark, + Span::new(start_mark, end_mark), TokenType::Scalar(TScalarStyle::Plain, string), )) } @@ -2280,7 +2341,8 @@ impl Scanner { "tabs disallowed in this context", )); } - self.tokens.push_back(Token(start_mark, TokenType::Key)); + self.tokens + .push_back(Token(Span::new(start_mark, self.mark), TokenType::Key)); Ok(()) } @@ -2338,7 +2400,7 @@ impl Scanner { if sk.possible { // insert simple key - let tok = Token(sk.mark, TokenType::Key); + let tok = Token(Span::empty(sk.mark), TokenType::Key); self.insert_token(sk.token_number - self.tokens_parsed, tok); if is_implicit_flow_mapping { if sk.mark.line < start_mark.line { @@ -2349,7 +2411,7 @@ impl Scanner { } self.insert_token( sk.token_number - self.tokens_parsed, - Token(self.mark, TokenType::FlowMappingStart), + Token(Span::empty(self.mark), TokenType::FlowMappingStart), ); } @@ -2367,7 +2429,7 @@ impl Scanner { } else { if is_implicit_flow_mapping { self.tokens - .push_back(Token(self.mark, TokenType::FlowMappingStart)); + .push_back(Token(Span::empty(self.mark), TokenType::FlowMappingStart)); } // The ':' indicator follows a complex key. if self.flow_level == 0 { @@ -2393,7 +2455,8 @@ impl Scanner { self.disallow_simple_key(); } } - self.tokens.push_back(Token(start_mark, TokenType::Value)); + self.tokens + .push_back(Token(Span::empty(start_mark), TokenType::Value)); Ok(()) } @@ -2428,8 +2491,8 @@ impl Scanner { self.indent = col as isize; let tokens_parsed = self.tokens_parsed; match number { - Some(n) => self.insert_token(n - tokens_parsed, Token(mark, tok)), - None => self.tokens.push_back(Token(mark, tok)), + Some(n) => self.insert_token(n - tokens_parsed, Token(Span::empty(mark), tok)), + None => self.tokens.push_back(Token(Span::empty(mark), tok)), } } } @@ -2447,7 +2510,8 @@ impl Scanner { let indent = self.indents.pop().unwrap(); self.indent = indent.indent; if indent.needs_block_end { - self.tokens.push_back(Token(self.mark, TokenType::BlockEnd)); + self.tokens + .push_back(Token(Span::empty(self.mark), TokenType::BlockEnd)); } } } @@ -2520,7 +2584,7 @@ impl Scanner { self.flow_mapping_started = false; *implicit_mapping = ImplicitMappingState::Possible; self.tokens - .push_back(Token(mark, TokenType::FlowMappingEnd)); + .push_back(Token(Span::empty(mark), TokenType::FlowMappingEnd)); } } } diff --git a/parser/tests/span.rs b/parser/tests/span.rs new file mode 100644 index 0000000..0e07662 --- /dev/null +++ b/parser/tests/span.rs @@ -0,0 +1,136 @@ +#![allow(clippy::bool_assert_comparison)] +#![allow(clippy::float_cmp)] + +use saphyr_parser::{Event, Parser, ScanError}; + +/// Run the parser through the string, returning all the scalars, and collecting their spans to strings. +fn run_parser_and_deref_scalar_spans(input: &str) -> Result, ScanError> { + let mut events = vec![]; + for x in Parser::new_from_str(input) { + let x = x?; + if let Event::Scalar(s, ..) = x.0 { + let start = x.1.start.index(); + let end = x.1.end.index(); + let input_s = input.chars().skip(start).take(end - start).collect(); + events.push((s, input_s)); + } + } + Ok(events) +} + +/// Run the parser through the string, returning all the scalars, and collecting their spans to strings. +fn run_parser_and_deref_seq_spans(input: &str) -> Result, ScanError> { + let mut events = vec![]; + let mut start_stack = vec![]; + for x in Parser::new_from_str(input) { + let x = x?; + match x.0 { + Event::SequenceStart(_, _) => start_stack.push(x.1.start.index()), + Event::SequenceEnd => { + let start = start_stack.pop().unwrap(); + let end = x.1.end.index(); + let input_s = input.chars().skip(start).take(end - start).collect(); + events.push(input_s); + } + _ => {} + } + } + Ok(events) +} + +fn deref_pairs(pairs: &[(String, String)]) -> Vec<(&str, &str)> { + pairs + .iter() + .map(|(a, b)| (a.as_str(), b.as_str())) + .collect() +} + +#[test] +fn test_plain() { + assert_eq!( + deref_pairs(&run_parser_and_deref_scalar_spans("foo: bar").unwrap()), + [("foo", "foo"), ("bar", "bar"),] + ); + assert_eq!( + deref_pairs(&run_parser_and_deref_scalar_spans("foo: bar ").unwrap()), + [("foo", "foo"), ("bar", "bar"),] + ); + assert_eq!( + deref_pairs(&run_parser_and_deref_scalar_spans("foo : \t bar\t ").unwrap()), + [("foo", "foo"), ("bar", "bar"),] + ); + + assert_eq!( + deref_pairs(&run_parser_and_deref_scalar_spans("foo : \n - bar\n - baz\n ").unwrap()), + [("foo", "foo"), ("bar", "bar"), ("baz", "baz")] + ); +} + +#[test] +fn test_plain_utf8() { + assert_eq!( + deref_pairs(&run_parser_and_deref_scalar_spans("a: 你好").unwrap()), + [("a", "a"), ("你好", "你好")] + ); +} + +#[test] +fn test_quoted() { + assert_eq!( + deref_pairs(&run_parser_and_deref_scalar_spans(r#"foo: "bar""#).unwrap()), + [("foo", "foo"), ("bar", r#""bar""#),] + ); + assert_eq!( + deref_pairs(&run_parser_and_deref_scalar_spans(r#"foo: 'bar'"#).unwrap()), + [("foo", "foo"), ("bar", r#"'bar'"#),] + ); + + assert_eq!( + deref_pairs(&run_parser_and_deref_scalar_spans(r#"foo: "bar ""#).unwrap()), + [("foo", "foo"), ("bar ", r#""bar ""#),] + ); +} + +#[test] +fn test_literal() { + assert_eq!( + deref_pairs(&run_parser_and_deref_scalar_spans("foo: |\n bar").unwrap()), + [("foo", "foo"), ("bar\n", "bar"),] + ); + assert_eq!( + deref_pairs(&run_parser_and_deref_scalar_spans("foo: |\n bar\n more").unwrap()), + [("foo", "foo"), ("bar\nmore\n", "bar\n more"),] + ); +} + +#[test] +fn test_block() { + assert_eq!( + deref_pairs(&run_parser_and_deref_scalar_spans("foo: >\n bar").unwrap()), + [("foo", "foo"), ("bar\n", "bar"),] + ); + assert_eq!( + deref_pairs(&run_parser_and_deref_scalar_spans("foo: >\n bar\n more").unwrap()), + [("foo", "foo"), ("bar more\n", "bar\n more"),] + ); +} + +#[test] +fn test_seq() { + assert_eq!( + run_parser_and_deref_seq_spans("[a, b]").unwrap(), + ["[a, b]"] + ); + assert_eq!( + run_parser_and_deref_seq_spans("- a\n- b").unwrap(), + ["- a\n- b"] + ); + assert_eq!( + run_parser_and_deref_seq_spans("foo:\n - a\n - b").unwrap(), + ["- a\n - b"] + ); + assert_eq!( + run_parser_and_deref_seq_spans("foo:\n - a\n - bar:\n - b\n - c").unwrap(), + ["b\n - c", "- a\n - bar:\n - b\n - c"] + ); +} diff --git a/parser/tools/dump_events.rs b/parser/tools/dump_events.rs index 0ec84c9..73d2a23 100644 --- a/parser/tools/dump_events.rs +++ b/parser/tools/dump_events.rs @@ -2,21 +2,21 @@ use std::env; use std::fs::File; use std::io::prelude::*; -use saphyr_parser::{Event, MarkedEventReceiver, Marker, Parser}; +use saphyr_parser::{Event, Parser, Span, SpannedEventReceiver}; #[derive(Debug)] struct EventSink { - events: Vec<(Event, Marker)>, + events: Vec<(Event, Span)>, } -impl MarkedEventReceiver for EventSink { - fn on_event(&mut self, ev: Event, mark: Marker) { +impl SpannedEventReceiver for EventSink { + fn on_event(&mut self, ev: Event, span: Span) { eprintln!(" \x1B[;34m\u{21B3} {:?}\x1B[;m", &ev); - self.events.push((ev, mark)); + self.events.push((ev, span)); } } -fn str_to_events(yaml: &str) -> Vec<(Event, Marker)> { +fn str_to_events(yaml: &str) -> Vec<(Event, Span)> { let mut sink = EventSink { events: Vec::new() }; let mut parser = Parser::new_from_str(yaml); // Load events using our sink as the receiver. diff --git a/parser/tools/run_bench.rs b/parser/tools/run_bench.rs index 6d9735e..a7cccab 100644 --- a/parser/tools/run_bench.rs +++ b/parser/tools/run_bench.rs @@ -1,15 +1,13 @@ #![allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)] -use saphyr_parser::{ - Event, Marker, {MarkedEventReceiver, Parser}, -}; +use saphyr_parser::{Event, Parser, Span, SpannedEventReceiver}; use std::{env, fs::File, io::prelude::*}; /// A sink which discards any event sent. struct NullSink {} -impl MarkedEventReceiver for NullSink { - fn on_event(&mut self, _: Event, _: Marker) {} +impl SpannedEventReceiver for NullSink { + fn on_event(&mut self, _: Event, _: Span) {} } /// Parse the given input, returning elapsed time in nanoseconds. diff --git a/parser/tools/time_parse.rs b/parser/tools/time_parse.rs index 154d698..554f620 100644 --- a/parser/tools/time_parse.rs +++ b/parser/tools/time_parse.rs @@ -2,13 +2,13 @@ use std::env; use std::fs::File; use std::io::prelude::*; -use saphyr_parser::{Event, MarkedEventReceiver, Marker, Parser}; +use saphyr_parser::{Event, Parser, Span, SpannedEventReceiver}; /// A sink which discards any event sent. struct NullSink {} -impl MarkedEventReceiver for NullSink { - fn on_event(&mut self, _: Event, _: Marker) {} +impl SpannedEventReceiver for NullSink { + fn on_event(&mut self, _: Event, _: Span) {} } fn main() {