Use spans instead of markers (#3)
This commit is contained in:
parent
4a5241e0bb
commit
926fdfb01b
7 changed files with 333 additions and 131 deletions
|
@ -40,5 +40,5 @@ mod parser;
|
|||
mod scanner;
|
||||
|
||||
pub use crate::input::BufferedInput;
|
||||
pub use crate::parser::{Event, EventReceiver, MarkedEventReceiver, Parser, Tag};
|
||||
pub use crate::scanner::{Marker, ScanError, TScalarStyle};
|
||||
pub use crate::parser::{Event, EventReceiver, Parser, SpannedEventReceiver, Tag};
|
||||
pub use crate::scanner::{Marker, ScanError, Span, TScalarStyle};
|
||||
|
|
|
@ -6,8 +6,9 @@
|
|||
|
||||
use crate::{
|
||||
input::{str::StrInput, Input},
|
||||
scanner::{Marker, ScanError, Scanner, TScalarStyle, Token, TokenType},
|
||||
scanner::{ScanError, Scanner, Span, TScalarStyle, Token, TokenType},
|
||||
};
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
#[derive(Clone, Copy, PartialEq, Debug, Eq)]
|
||||
|
@ -116,7 +117,7 @@ pub struct Parser<T: Input> {
|
|||
/// The next token from the scanner.
|
||||
token: Option<Token>,
|
||||
/// The next YAML event to emit.
|
||||
current: Option<(Event, Marker)>,
|
||||
current: Option<(Event, Span)>,
|
||||
/// Anchors that have been encountered in the YAML document.
|
||||
anchors: HashMap<String, usize>,
|
||||
/// Next ID available for an anchor.
|
||||
|
@ -142,8 +143,8 @@ pub struct Parser<T: Input> {
|
|||
/// The low-level parsing API is event-based (a push parser), calling [`EventReceiver::on_event`]
|
||||
/// for each YAML [`Event`] that occurs.
|
||||
/// The [`EventReceiver`] trait only receives events. In order to receive both events and their
|
||||
/// location in the source, use [`MarkedEventReceiver`]. Note that [`EventReceiver`]s implement
|
||||
/// [`MarkedEventReceiver`] automatically.
|
||||
/// location in the source, use [`SpannedEventReceiver`]. Note that [`EventReceiver`]s implement
|
||||
/// [`SpannedEventReceiver`] automatically.
|
||||
///
|
||||
/// # Event hierarchy
|
||||
/// The event stream starts with an [`Event::StreamStart`] event followed by an
|
||||
|
@ -213,20 +214,20 @@ pub trait EventReceiver {
|
|||
|
||||
/// Trait to be implemented for using the low-level parsing API.
|
||||
///
|
||||
/// Functionally similar to [`EventReceiver`], but receives a [`Marker`] as well as the event.
|
||||
pub trait MarkedEventReceiver {
|
||||
/// Functionally similar to [`EventReceiver`], but receives a [`Span`] as well as the event.
|
||||
pub trait SpannedEventReceiver {
|
||||
/// Handler called for each event that occurs.
|
||||
fn on_event(&mut self, ev: Event, _mark: Marker);
|
||||
fn on_event(&mut self, ev: Event, span: Span);
|
||||
}
|
||||
|
||||
impl<R: EventReceiver> MarkedEventReceiver for R {
|
||||
fn on_event(&mut self, ev: Event, _mark: Marker) {
|
||||
impl<R: EventReceiver> SpannedEventReceiver for R {
|
||||
fn on_event(&mut self, ev: Event, _span: Span) {
|
||||
self.on_event(ev);
|
||||
}
|
||||
}
|
||||
|
||||
/// A convenience alias for a `Result` of a parser event.
|
||||
pub type ParseResult = Result<(Event, Marker), ScanError>;
|
||||
pub type ParseResult = Result<(Event, Span), ScanError>;
|
||||
|
||||
impl<'a> Parser<StrInput<'a>> {
|
||||
/// Create a new instance of a parser from a &str.
|
||||
|
@ -290,7 +291,7 @@ impl<T: Input> Parser<T> {
|
|||
///
|
||||
/// # Errors
|
||||
/// Returns `ScanError` when loading the next event fails.
|
||||
pub fn peek(&mut self) -> Option<Result<&(Event, Marker), ScanError>> {
|
||||
pub fn peek(&mut self) -> Option<Result<&(Event, Span), ScanError>> {
|
||||
if let Some(ref x) = self.current {
|
||||
Some(Ok(x))
|
||||
} else {
|
||||
|
@ -379,7 +380,7 @@ impl<T: Input> Parser<T> {
|
|||
|
||||
fn parse(&mut self) -> ParseResult {
|
||||
if self.state == State::End {
|
||||
return Ok((Event::StreamEnd, self.scanner.mark()));
|
||||
return Ok((Event::StreamEnd, Span::empty(self.scanner.mark())));
|
||||
}
|
||||
let (ev, mark) = self.state_machine()?;
|
||||
Ok((ev, mark))
|
||||
|
@ -393,40 +394,40 @@ impl<T: Input> Parser<T> {
|
|||
/// If `multi` is set to `true`, the parser will allow parsing of multiple YAML documents
|
||||
/// inside the stream.
|
||||
///
|
||||
/// Note that any [`EventReceiver`] is also a [`MarkedEventReceiver`], so implementing the
|
||||
/// Note that any [`EventReceiver`] is also a [`SpannedEventReceiver`], so implementing the
|
||||
/// former is enough to call this function.
|
||||
/// # Errors
|
||||
/// Returns `ScanError` when loading fails.
|
||||
pub fn load<R: MarkedEventReceiver>(
|
||||
pub fn load<R: SpannedEventReceiver>(
|
||||
&mut self,
|
||||
recv: &mut R,
|
||||
multi: bool,
|
||||
) -> Result<(), ScanError> {
|
||||
if !self.scanner.stream_started() {
|
||||
let (ev, mark) = self.next_event_impl()?;
|
||||
let (ev, span) = self.next_event_impl()?;
|
||||
if ev != Event::StreamStart {
|
||||
return Err(ScanError::new_str(
|
||||
mark,
|
||||
span.start,
|
||||
"did not find expected <stream-start>",
|
||||
));
|
||||
}
|
||||
recv.on_event(ev, mark);
|
||||
recv.on_event(ev, span);
|
||||
}
|
||||
|
||||
if self.scanner.stream_ended() {
|
||||
// XXX has parsed?
|
||||
recv.on_event(Event::StreamEnd, self.scanner.mark());
|
||||
recv.on_event(Event::StreamEnd, Span::empty(self.scanner.mark()));
|
||||
return Ok(());
|
||||
}
|
||||
loop {
|
||||
let (ev, mark) = self.next_event_impl()?;
|
||||
let (ev, span) = self.next_event_impl()?;
|
||||
if ev == Event::StreamEnd {
|
||||
recv.on_event(ev, mark);
|
||||
recv.on_event(ev, span);
|
||||
return Ok(());
|
||||
}
|
||||
// clear anchors before a new document
|
||||
self.anchors.clear();
|
||||
self.load_document(ev, mark, recv)?;
|
||||
self.load_document(ev, span, recv)?;
|
||||
if !multi {
|
||||
break;
|
||||
}
|
||||
|
@ -434,22 +435,22 @@ impl<T: Input> Parser<T> {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
fn load_document<R: MarkedEventReceiver>(
|
||||
fn load_document<R: SpannedEventReceiver>(
|
||||
&mut self,
|
||||
first_ev: Event,
|
||||
mark: Marker,
|
||||
span: Span,
|
||||
recv: &mut R,
|
||||
) -> Result<(), ScanError> {
|
||||
if first_ev != Event::DocumentStart {
|
||||
return Err(ScanError::new_str(
|
||||
mark,
|
||||
span.start,
|
||||
"did not find expected <document-start>",
|
||||
));
|
||||
}
|
||||
recv.on_event(first_ev, mark);
|
||||
recv.on_event(first_ev, span);
|
||||
|
||||
let (ev, mark) = self.next_event_impl()?;
|
||||
self.load_node(ev, mark, recv)?;
|
||||
let (ev, span) = self.next_event_impl()?;
|
||||
self.load_node(ev, span, recv)?;
|
||||
|
||||
// DOCUMENT-END is expected.
|
||||
let (ev, mark) = self.next_event_impl()?;
|
||||
|
@ -459,23 +460,23 @@ impl<T: Input> Parser<T> {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
fn load_node<R: MarkedEventReceiver>(
|
||||
fn load_node<R: SpannedEventReceiver>(
|
||||
&mut self,
|
||||
first_ev: Event,
|
||||
mark: Marker,
|
||||
span: Span,
|
||||
recv: &mut R,
|
||||
) -> Result<(), ScanError> {
|
||||
match first_ev {
|
||||
Event::Alias(..) | Event::Scalar(..) => {
|
||||
recv.on_event(first_ev, mark);
|
||||
recv.on_event(first_ev, span);
|
||||
Ok(())
|
||||
}
|
||||
Event::SequenceStart(..) => {
|
||||
recv.on_event(first_ev, mark);
|
||||
recv.on_event(first_ev, span);
|
||||
self.load_sequence(recv)
|
||||
}
|
||||
Event::MappingStart(..) => {
|
||||
recv.on_event(first_ev, mark);
|
||||
recv.on_event(first_ev, span);
|
||||
self.load_mapping(recv)
|
||||
}
|
||||
_ => {
|
||||
|
@ -485,7 +486,7 @@ impl<T: Input> Parser<T> {
|
|||
}
|
||||
}
|
||||
|
||||
fn load_mapping<R: MarkedEventReceiver>(&mut self, recv: &mut R) -> Result<(), ScanError> {
|
||||
fn load_mapping<R: SpannedEventReceiver>(&mut self, recv: &mut R) -> Result<(), ScanError> {
|
||||
let (mut key_ev, mut key_mark) = self.next_event_impl()?;
|
||||
while key_ev != Event::MappingEnd {
|
||||
// key
|
||||
|
@ -504,7 +505,7 @@ impl<T: Input> Parser<T> {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
fn load_sequence<R: MarkedEventReceiver>(&mut self, recv: &mut R) -> Result<(), ScanError> {
|
||||
fn load_sequence<R: SpannedEventReceiver>(&mut self, recv: &mut R) -> Result<(), ScanError> {
|
||||
let (mut ev, mut mark) = self.next_event_impl()?;
|
||||
while ev != Event::SequenceEnd {
|
||||
self.load_node(ev, mark, recv)?;
|
||||
|
@ -562,13 +563,13 @@ impl<T: Input> Parser<T> {
|
|||
|
||||
fn stream_start(&mut self) -> ParseResult {
|
||||
match *self.peek_token()? {
|
||||
Token(mark, TokenType::StreamStart(_)) => {
|
||||
Token(span, TokenType::StreamStart(_)) => {
|
||||
self.state = State::ImplicitDocumentStart;
|
||||
self.skip();
|
||||
Ok((Event::StreamStart, mark))
|
||||
Ok((Event::StreamStart, span))
|
||||
}
|
||||
Token(mark, _) => Err(ScanError::new_str(
|
||||
mark,
|
||||
Token(span, _) => Err(ScanError::new_str(
|
||||
span.start,
|
||||
"did not find expected <stream-start>",
|
||||
)),
|
||||
}
|
||||
|
@ -580,10 +581,10 @@ impl<T: Input> Parser<T> {
|
|||
}
|
||||
|
||||
match *self.peek_token()? {
|
||||
Token(mark, TokenType::StreamEnd) => {
|
||||
Token(span, TokenType::StreamEnd) => {
|
||||
self.state = State::End;
|
||||
self.skip();
|
||||
Ok((Event::StreamEnd, mark))
|
||||
Ok((Event::StreamEnd, span))
|
||||
}
|
||||
Token(
|
||||
_,
|
||||
|
@ -594,11 +595,11 @@ impl<T: Input> Parser<T> {
|
|||
// explicit document
|
||||
self.explicit_document_start()
|
||||
}
|
||||
Token(mark, _) if implicit => {
|
||||
Token(span, _) if implicit => {
|
||||
self.parser_process_directives()?;
|
||||
self.push_state(State::DocumentEnd);
|
||||
self.state = State::BlockNode;
|
||||
Ok((Event::DocumentStart, mark))
|
||||
Ok((Event::DocumentStart, span))
|
||||
}
|
||||
_ => {
|
||||
// explicit document
|
||||
|
@ -612,20 +613,23 @@ impl<T: Input> Parser<T> {
|
|||
loop {
|
||||
let mut tags = HashMap::new();
|
||||
match self.peek_token()? {
|
||||
Token(mark, TokenType::VersionDirective(_, _)) => {
|
||||
Token(span, TokenType::VersionDirective(_, _)) => {
|
||||
// XXX parsing with warning according to spec
|
||||
//if major != 1 || minor > 2 {
|
||||
// return Err(ScanError::new_str(tok.0,
|
||||
// "found incompatible YAML document"));
|
||||
//}
|
||||
if version_directive_received {
|
||||
return Err(ScanError::new_str(*mark, "duplicate version directive"));
|
||||
return Err(ScanError::new_str(
|
||||
span.start,
|
||||
"duplicate version directive",
|
||||
));
|
||||
}
|
||||
version_directive_received = true;
|
||||
}
|
||||
Token(mark, TokenType::TagDirective(handle, prefix)) => {
|
||||
if tags.contains_key(handle) {
|
||||
return Err(ScanError::new_str(*mark, "the TAG directive must only be given at most once per handle in the same document"));
|
||||
return Err(ScanError::new_str(mark.start, "the TAG directive must only be given at most once per handle in the same document"));
|
||||
}
|
||||
tags.insert(handle.to_string(), prefix.to_string());
|
||||
}
|
||||
|
@ -646,8 +650,8 @@ impl<T: Input> Parser<T> {
|
|||
self.skip();
|
||||
Ok((Event::DocumentStart, mark))
|
||||
}
|
||||
Token(mark, _) => Err(ScanError::new_str(
|
||||
mark,
|
||||
Token(span, _) => Err(ScanError::new_str(
|
||||
span.start,
|
||||
"did not find expected <document start>",
|
||||
)),
|
||||
}
|
||||
|
@ -673,13 +677,13 @@ impl<T: Input> Parser<T> {
|
|||
|
||||
fn document_end(&mut self) -> ParseResult {
|
||||
let mut explicit_end = false;
|
||||
let marker: Marker = match *self.peek_token()? {
|
||||
Token(mark, TokenType::DocumentEnd) => {
|
||||
let span: Span = match *self.peek_token()? {
|
||||
Token(span, TokenType::DocumentEnd) => {
|
||||
explicit_end = true;
|
||||
self.skip();
|
||||
mark
|
||||
span
|
||||
}
|
||||
Token(mark, _) => mark,
|
||||
Token(span, _) => span,
|
||||
};
|
||||
|
||||
if !self.keep_tags {
|
||||
|
@ -688,21 +692,21 @@ impl<T: Input> Parser<T> {
|
|||
if explicit_end {
|
||||
self.state = State::ImplicitDocumentStart;
|
||||
} else {
|
||||
if let Token(mark, TokenType::VersionDirective(..) | TokenType::TagDirective(..)) =
|
||||
if let Token(span, TokenType::VersionDirective(..) | TokenType::TagDirective(..)) =
|
||||
*self.peek_token()?
|
||||
{
|
||||
return Err(ScanError::new_str(
|
||||
mark,
|
||||
span.start,
|
||||
"missing explicit document end marker before directive",
|
||||
));
|
||||
}
|
||||
self.state = State::DocumentStart;
|
||||
}
|
||||
|
||||
Ok((Event::DocumentEnd, marker))
|
||||
Ok((Event::DocumentEnd, span))
|
||||
}
|
||||
|
||||
fn register_anchor(&mut self, name: String, _: &Marker) -> usize {
|
||||
fn register_anchor(&mut self, name: String, _: &Span) -> usize {
|
||||
// anchors can be overridden/reused
|
||||
// if self.anchors.contains_key(name) {
|
||||
// return Err(ScanError::new_str(*mark,
|
||||
|
@ -720,25 +724,25 @@ impl<T: Input> Parser<T> {
|
|||
match *self.peek_token()? {
|
||||
Token(_, TokenType::Alias(_)) => {
|
||||
self.pop_state();
|
||||
if let Token(mark, TokenType::Alias(name)) = self.fetch_token() {
|
||||
if let Token(span, TokenType::Alias(name)) = self.fetch_token() {
|
||||
match self.anchors.get(&name) {
|
||||
None => {
|
||||
return Err(ScanError::new_str(
|
||||
mark,
|
||||
span.start,
|
||||
"while parsing node, found unknown anchor",
|
||||
))
|
||||
}
|
||||
Some(id) => return Ok((Event::Alias(*id), mark)),
|
||||
Some(id) => return Ok((Event::Alias(*id), span)),
|
||||
}
|
||||
}
|
||||
unreachable!()
|
||||
}
|
||||
Token(_, TokenType::Anchor(_)) => {
|
||||
if let Token(mark, TokenType::Anchor(name)) = self.fetch_token() {
|
||||
anchor_id = self.register_anchor(name, &mark);
|
||||
if let Token(span, TokenType::Anchor(name)) = self.fetch_token() {
|
||||
anchor_id = self.register_anchor(name, &span);
|
||||
if let TokenType::Tag(..) = self.peek_token()?.1 {
|
||||
if let TokenType::Tag(handle, suffix) = self.fetch_token().1 {
|
||||
tag = Some(self.resolve_tag(mark, &handle, suffix)?);
|
||||
tag = Some(self.resolve_tag(span, &handle, suffix)?);
|
||||
} else {
|
||||
unreachable!()
|
||||
}
|
||||
|
@ -797,8 +801,8 @@ impl<T: Input> Parser<T> {
|
|||
self.pop_state();
|
||||
Ok((Event::empty_scalar_with_anchor(anchor_id, tag), mark))
|
||||
}
|
||||
Token(mark, _) => Err(ScanError::new_str(
|
||||
mark,
|
||||
Token(span, _) => Err(ScanError::new_str(
|
||||
span.start,
|
||||
"while parsing a node, did not find expected node content",
|
||||
)),
|
||||
}
|
||||
|
@ -835,8 +839,8 @@ impl<T: Input> Parser<T> {
|
|||
self.skip();
|
||||
Ok((Event::MappingEnd, mark))
|
||||
}
|
||||
Token(mark, _) => Err(ScanError::new_str(
|
||||
mark,
|
||||
Token(span, _) => Err(ScanError::new_str(
|
||||
span.start,
|
||||
"while parsing a block mapping, did not find expected key",
|
||||
)),
|
||||
}
|
||||
|
@ -870,15 +874,15 @@ impl<T: Input> Parser<T> {
|
|||
let _ = self.peek_token()?;
|
||||
self.skip();
|
||||
}
|
||||
let marker: Marker = {
|
||||
let span: Span = {
|
||||
match *self.peek_token()? {
|
||||
Token(mark, TokenType::FlowMappingEnd) => mark,
|
||||
Token(mark, _) => {
|
||||
if !first {
|
||||
match *self.peek_token()? {
|
||||
Token(_, TokenType::FlowEntry) => self.skip(),
|
||||
Token(mark, _) => return Err(ScanError::new_str(
|
||||
mark,
|
||||
Token(span, _) => return Err(ScanError::new_str(
|
||||
span.start,
|
||||
"while parsing a flow mapping, did not find expected ',' or '}'",
|
||||
)),
|
||||
}
|
||||
|
@ -916,18 +920,18 @@ impl<T: Input> Parser<T> {
|
|||
|
||||
self.pop_state();
|
||||
self.skip();
|
||||
Ok((Event::MappingEnd, marker))
|
||||
Ok((Event::MappingEnd, span))
|
||||
}
|
||||
|
||||
fn flow_mapping_value(&mut self, empty: bool) -> ParseResult {
|
||||
let mark: Marker = {
|
||||
let span: Span = {
|
||||
if empty {
|
||||
let Token(mark, _) = *self.peek_token()?;
|
||||
self.state = State::FlowMappingKey;
|
||||
return Ok((Event::empty_scalar(), mark));
|
||||
}
|
||||
match *self.peek_token()? {
|
||||
Token(marker, TokenType::Value) => {
|
||||
Token(span, TokenType::Value) => {
|
||||
self.skip();
|
||||
match self.peek_token()?.1 {
|
||||
TokenType::FlowEntry | TokenType::FlowMappingEnd => {}
|
||||
|
@ -936,14 +940,14 @@ impl<T: Input> Parser<T> {
|
|||
return self.parse_node(false, false);
|
||||
}
|
||||
}
|
||||
marker
|
||||
span
|
||||
}
|
||||
Token(marker, _) => marker,
|
||||
}
|
||||
};
|
||||
|
||||
self.state = State::FlowMappingKey;
|
||||
Ok((Event::empty_scalar(), mark))
|
||||
Ok((Event::empty_scalar(), span))
|
||||
}
|
||||
|
||||
fn flow_sequence_entry(&mut self, first: bool) -> ParseResult {
|
||||
|
@ -962,9 +966,9 @@ impl<T: Input> Parser<T> {
|
|||
Token(_, TokenType::FlowEntry) if !first => {
|
||||
self.skip();
|
||||
}
|
||||
Token(mark, _) if !first => {
|
||||
Token(span, _) if !first => {
|
||||
return Err(ScanError::new_str(
|
||||
mark,
|
||||
span.start,
|
||||
"while parsing a flow sequence, expected ',' or ']'",
|
||||
));
|
||||
}
|
||||
|
@ -1035,8 +1039,8 @@ impl<T: Input> Parser<T> {
|
|||
self.parse_node(true, false)
|
||||
}
|
||||
}
|
||||
Token(mark, _) => Err(ScanError::new_str(
|
||||
mark,
|
||||
Token(span, _) => Err(ScanError::new_str(
|
||||
span.start,
|
||||
"while parsing a block collection, did not find expected '-' indicator",
|
||||
)),
|
||||
}
|
||||
|
@ -1080,11 +1084,11 @@ impl<T: Input> Parser<T> {
|
|||
#[allow(clippy::unnecessary_wraps)]
|
||||
fn flow_sequence_entry_mapping_end(&mut self) -> ParseResult {
|
||||
self.state = State::FlowSequenceEntry;
|
||||
Ok((Event::MappingEnd, self.scanner.mark()))
|
||||
Ok((Event::MappingEnd, Span::empty(self.scanner.mark())))
|
||||
}
|
||||
|
||||
/// Resolve a tag from the handle and the suffix.
|
||||
fn resolve_tag(&self, mark: Marker, handle: &str, suffix: String) -> Result<Tag, ScanError> {
|
||||
fn resolve_tag(&self, span: Span, handle: &str, suffix: String) -> Result<Tag, ScanError> {
|
||||
if handle == "!!" {
|
||||
// "!!" is a shorthand for "tag:yaml.org,2002:". However, that default can be
|
||||
// overridden.
|
||||
|
@ -1121,7 +1125,7 @@ impl<T: Input> Parser<T> {
|
|||
// If the handle is of the form "!foo!", this cannot be a local handle and we need
|
||||
// to error.
|
||||
if handle.len() >= 2 && handle.starts_with('!') && handle.ends_with('!') {
|
||||
Err(ScanError::new_str(mark, "the handle wasn't declared"))
|
||||
Err(ScanError::new_str(span.start, "the handle wasn't declared"))
|
||||
} else {
|
||||
Ok(Tag {
|
||||
handle: handle.to_string(),
|
||||
|
@ -1134,7 +1138,7 @@ impl<T: Input> Parser<T> {
|
|||
}
|
||||
|
||||
impl<T: Input> Iterator for Parser<T> {
|
||||
type Item = Result<(Event, Marker), ScanError>;
|
||||
type Item = Result<(Event, Span), ScanError>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
self.next_event()
|
||||
|
|
|
@ -79,6 +79,37 @@ impl Marker {
|
|||
}
|
||||
}
|
||||
|
||||
/// A range of locations in a Yaml document.
|
||||
#[derive(Clone, Copy, PartialEq, Debug, Eq, Default)]
|
||||
pub struct Span {
|
||||
/// The start (inclusive) of the range.
|
||||
pub start: Marker,
|
||||
/// The end (exclusive) of the range.
|
||||
pub end: Marker,
|
||||
}
|
||||
|
||||
impl Span {
|
||||
/// Create a new [`Span`] for the given range.
|
||||
#[must_use]
|
||||
pub fn new(start: Marker, end: Marker) -> Span {
|
||||
Span { start, end }
|
||||
}
|
||||
|
||||
/// Create a empty [`Span`] at a given location.
|
||||
///
|
||||
/// An empty span doesn't contain any characters, but its position may still be meaningful.
|
||||
/// For example, for an indented sequence [`SequenceEnd`] has a location but an empty span.
|
||||
///
|
||||
/// [`SequenceEnd`]: crate::Event::SequenceEnd
|
||||
#[must_use]
|
||||
pub fn empty(mark: Marker) -> Span {
|
||||
Span {
|
||||
start: mark,
|
||||
end: mark,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// An error that occurred while scanning.
|
||||
#[derive(Clone, PartialEq, Debug, Eq)]
|
||||
pub struct ScanError {
|
||||
|
@ -204,7 +235,7 @@ pub enum TokenType {
|
|||
|
||||
/// A scanner token.
|
||||
#[derive(Clone, PartialEq, Debug, Eq)]
|
||||
pub struct Token(pub Marker, pub TokenType);
|
||||
pub struct Token(pub Span, pub TokenType);
|
||||
|
||||
/// A scalar that was parsed and may correspond to a simple key.
|
||||
///
|
||||
|
@ -874,8 +905,10 @@ impl<T: Input> Scanner<T> {
|
|||
self.indent = -1;
|
||||
self.stream_start_produced = true;
|
||||
self.allow_simple_key();
|
||||
self.tokens
|
||||
.push_back(Token(mark, TokenType::StreamStart(TEncoding::Utf8)));
|
||||
self.tokens.push_back(Token(
|
||||
Span::empty(mark),
|
||||
TokenType::StreamStart(TEncoding::Utf8),
|
||||
));
|
||||
self.simple_keys.push(SimpleKey::new(Marker::new(0, 0, 0)));
|
||||
}
|
||||
|
||||
|
@ -900,7 +933,7 @@ impl<T: Input> Scanner<T> {
|
|||
self.disallow_simple_key();
|
||||
|
||||
self.tokens
|
||||
.push_back(Token(self.mark, TokenType::StreamEnd));
|
||||
.push_back(Token(Span::empty(self.mark), TokenType::StreamEnd));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
@ -932,7 +965,7 @@ impl<T: Input> Scanner<T> {
|
|||
self.mark.col += line_len;
|
||||
// XXX return an empty TagDirective token
|
||||
Token(
|
||||
start_mark,
|
||||
Span::new(start_mark, self.mark),
|
||||
TokenType::TagDirective(String::new(), String::new()),
|
||||
)
|
||||
// return Err(ScanError::new_str(start_mark,
|
||||
|
@ -971,7 +1004,10 @@ impl<T: Input> Scanner<T> {
|
|||
|
||||
let minor = self.scan_version_directive_number(mark)?;
|
||||
|
||||
Ok(Token(*mark, TokenType::VersionDirective(major, minor)))
|
||||
Ok(Token(
|
||||
Span::new(*mark, self.mark),
|
||||
TokenType::VersionDirective(major, minor),
|
||||
))
|
||||
}
|
||||
|
||||
fn scan_directive_name(&mut self) -> Result<String, ScanError> {
|
||||
|
@ -1040,7 +1076,10 @@ impl<T: Input> Scanner<T> {
|
|||
self.input.lookahead(1);
|
||||
|
||||
if self.input.next_is_blank_or_breakz() {
|
||||
Ok(Token(*mark, TokenType::TagDirective(handle, prefix)))
|
||||
Ok(Token(
|
||||
Span::new(*mark, self.mark),
|
||||
TokenType::TagDirective(handle, prefix),
|
||||
))
|
||||
} else {
|
||||
Err(ScanError::new_str(
|
||||
*mark,
|
||||
|
@ -1093,7 +1132,10 @@ impl<T: Input> Scanner<T> {
|
|||
|| (self.flow_level > 0 && self.input.next_is_flow())
|
||||
{
|
||||
// XXX: ex 7.2, an empty scalar can follow a secondary tag
|
||||
Ok(Token(start_mark, TokenType::Tag(handle, suffix)))
|
||||
Ok(Token(
|
||||
Span::new(start_mark, self.mark),
|
||||
TokenType::Tag(handle, suffix),
|
||||
))
|
||||
} else {
|
||||
Err(ScanError::new_str(
|
||||
start_mark,
|
||||
|
@ -1323,11 +1365,12 @@ impl<T: Input> Scanner<T> {
|
|||
return Err(ScanError::new_str(start_mark, "while scanning an anchor or alias, did not find expected alphabetic or numeric character"));
|
||||
}
|
||||
|
||||
if alias {
|
||||
Ok(Token(start_mark, TokenType::Alias(string)))
|
||||
let tok = if alias {
|
||||
TokenType::Alias(string)
|
||||
} else {
|
||||
Ok(Token(start_mark, TokenType::Anchor(string)))
|
||||
}
|
||||
TokenType::Anchor(string)
|
||||
};
|
||||
Ok(Token(Span::new(start_mark, self.mark), tok))
|
||||
}
|
||||
|
||||
fn fetch_flow_collection_start(&mut self, tok: TokenType) -> ScanResult {
|
||||
|
@ -1351,7 +1394,8 @@ impl<T: Input> Scanner<T> {
|
|||
|
||||
self.skip_ws_to_eol(SkipTabs::Yes)?;
|
||||
|
||||
self.tokens.push_back(Token(start_mark, tok));
|
||||
self.tokens
|
||||
.push_back(Token(Span::new(start_mark, self.mark), tok));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
@ -1380,7 +1424,8 @@ impl<T: Input> Scanner<T> {
|
|||
self.adjacent_value_allowed_at = self.mark.index;
|
||||
}
|
||||
|
||||
self.tokens.push_back(Token(start_mark, tok));
|
||||
self.tokens
|
||||
.push_back(Token(Span::new(start_mark, self.mark), tok));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
@ -1395,8 +1440,10 @@ impl<T: Input> Scanner<T> {
|
|||
self.skip_non_blank();
|
||||
self.skip_ws_to_eol(SkipTabs::Yes)?;
|
||||
|
||||
self.tokens
|
||||
.push_back(Token(start_mark, TokenType::FlowEntry));
|
||||
self.tokens.push_back(Token(
|
||||
Span::new(start_mark, self.mark),
|
||||
TokenType::FlowEntry,
|
||||
));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
@ -1438,9 +1485,12 @@ impl<T: Input> Scanner<T> {
|
|||
}
|
||||
|
||||
// ???, fixes test G9HC.
|
||||
if let Some(Token(mark, TokenType::Anchor(..) | TokenType::Tag(..))) = self.tokens.back() {
|
||||
if self.mark.col == 0 && mark.col == 0 && self.indent > -1 {
|
||||
return Err(ScanError::new_str(*mark, "invalid indentation for anchor"));
|
||||
if let Some(Token(span, TokenType::Anchor(..) | TokenType::Tag(..))) = self.tokens.back() {
|
||||
if self.mark.col == 0 && span.start.col == 0 && self.indent > -1 {
|
||||
return Err(ScanError::new_str(
|
||||
span.start,
|
||||
"invalid indentation for anchor",
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1470,7 +1520,7 @@ impl<T: Input> Scanner<T> {
|
|||
self.allow_simple_key();
|
||||
|
||||
self.tokens
|
||||
.push_back(Token(self.mark, TokenType::BlockEntry));
|
||||
.push_back(Token(Span::empty(self.mark), TokenType::BlockEntry));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
@ -1484,7 +1534,7 @@ impl<T: Input> Scanner<T> {
|
|||
|
||||
self.skip_n_non_blank(3);
|
||||
|
||||
self.tokens.push_back(Token(mark, t));
|
||||
self.tokens.push_back(Token(Span::new(mark, self.mark), t));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
@ -1616,7 +1666,10 @@ impl<T: Input> Scanner<T> {
|
|||
// Otherwise, the newline after chomping is ignored.
|
||||
Chomping::Keep => trailing_breaks,
|
||||
};
|
||||
return Ok(Token(start_mark, TokenType::Scalar(style, contents)));
|
||||
return Ok(Token(
|
||||
Span::new(start_mark, self.mark),
|
||||
TokenType::Scalar(style, contents),
|
||||
));
|
||||
}
|
||||
|
||||
if self.mark.col < indent && (self.mark.col as isize) > self.indent {
|
||||
|
@ -1682,7 +1735,10 @@ impl<T: Input> Scanner<T> {
|
|||
string.push_str(&trailing_breaks);
|
||||
}
|
||||
|
||||
Ok(Token(start_mark, TokenType::Scalar(style, string)))
|
||||
Ok(Token(
|
||||
Span::new(start_mark, self.mark),
|
||||
TokenType::Scalar(style, string),
|
||||
))
|
||||
}
|
||||
|
||||
/// Retrieve the contents of the line, parsing it as a block scalar.
|
||||
|
@ -1963,7 +2019,10 @@ impl<T: Input> Scanner<T> {
|
|||
} else {
|
||||
TScalarStyle::DoubleQuoted
|
||||
};
|
||||
Ok(Token(start_mark, TokenType::Scalar(style, string)))
|
||||
Ok(Token(
|
||||
Span::new(start_mark, self.mark),
|
||||
TokenType::Scalar(style, string),
|
||||
))
|
||||
}
|
||||
|
||||
/// Consume successive non-whitespace characters from a flow scalar.
|
||||
|
@ -2120,6 +2179,7 @@ impl<T: Input> Scanner<T> {
|
|||
self.buf_whitespaces.clear();
|
||||
self.buf_leading_break.clear();
|
||||
self.buf_trailing_breaks.clear();
|
||||
let mut end_mark = self.mark;
|
||||
|
||||
loop {
|
||||
self.input.lookahead(4);
|
||||
|
@ -2182,6 +2242,7 @@ impl<T: Input> Scanner<T> {
|
|||
self.skip_non_blank();
|
||||
}
|
||||
}
|
||||
end_mark = self.mark;
|
||||
}
|
||||
|
||||
// We may reach the end of a plain scalar if:
|
||||
|
@ -2238,7 +2299,7 @@ impl<T: Input> Scanner<T> {
|
|||
}
|
||||
|
||||
Ok(Token(
|
||||
start_mark,
|
||||
Span::new(start_mark, end_mark),
|
||||
TokenType::Scalar(TScalarStyle::Plain, string),
|
||||
))
|
||||
}
|
||||
|
@ -2280,7 +2341,8 @@ impl<T: Input> Scanner<T> {
|
|||
"tabs disallowed in this context",
|
||||
));
|
||||
}
|
||||
self.tokens.push_back(Token(start_mark, TokenType::Key));
|
||||
self.tokens
|
||||
.push_back(Token(Span::new(start_mark, self.mark), TokenType::Key));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
@ -2338,7 +2400,7 @@ impl<T: Input> Scanner<T> {
|
|||
|
||||
if sk.possible {
|
||||
// insert simple key
|
||||
let tok = Token(sk.mark, TokenType::Key);
|
||||
let tok = Token(Span::empty(sk.mark), TokenType::Key);
|
||||
self.insert_token(sk.token_number - self.tokens_parsed, tok);
|
||||
if is_implicit_flow_mapping {
|
||||
if sk.mark.line < start_mark.line {
|
||||
|
@ -2349,7 +2411,7 @@ impl<T: Input> Scanner<T> {
|
|||
}
|
||||
self.insert_token(
|
||||
sk.token_number - self.tokens_parsed,
|
||||
Token(self.mark, TokenType::FlowMappingStart),
|
||||
Token(Span::empty(self.mark), TokenType::FlowMappingStart),
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -2367,7 +2429,7 @@ impl<T: Input> Scanner<T> {
|
|||
} else {
|
||||
if is_implicit_flow_mapping {
|
||||
self.tokens
|
||||
.push_back(Token(self.mark, TokenType::FlowMappingStart));
|
||||
.push_back(Token(Span::empty(self.mark), TokenType::FlowMappingStart));
|
||||
}
|
||||
// The ':' indicator follows a complex key.
|
||||
if self.flow_level == 0 {
|
||||
|
@ -2393,7 +2455,8 @@ impl<T: Input> Scanner<T> {
|
|||
self.disallow_simple_key();
|
||||
}
|
||||
}
|
||||
self.tokens.push_back(Token(start_mark, TokenType::Value));
|
||||
self.tokens
|
||||
.push_back(Token(Span::empty(start_mark), TokenType::Value));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
@ -2428,8 +2491,8 @@ impl<T: Input> Scanner<T> {
|
|||
self.indent = col as isize;
|
||||
let tokens_parsed = self.tokens_parsed;
|
||||
match number {
|
||||
Some(n) => self.insert_token(n - tokens_parsed, Token(mark, tok)),
|
||||
None => self.tokens.push_back(Token(mark, tok)),
|
||||
Some(n) => self.insert_token(n - tokens_parsed, Token(Span::empty(mark), tok)),
|
||||
None => self.tokens.push_back(Token(Span::empty(mark), tok)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -2447,7 +2510,8 @@ impl<T: Input> Scanner<T> {
|
|||
let indent = self.indents.pop().unwrap();
|
||||
self.indent = indent.indent;
|
||||
if indent.needs_block_end {
|
||||
self.tokens.push_back(Token(self.mark, TokenType::BlockEnd));
|
||||
self.tokens
|
||||
.push_back(Token(Span::empty(self.mark), TokenType::BlockEnd));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -2520,7 +2584,7 @@ impl<T: Input> Scanner<T> {
|
|||
self.flow_mapping_started = false;
|
||||
*implicit_mapping = ImplicitMappingState::Possible;
|
||||
self.tokens
|
||||
.push_back(Token(mark, TokenType::FlowMappingEnd));
|
||||
.push_back(Token(Span::empty(mark), TokenType::FlowMappingEnd));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
136
parser/tests/span.rs
Normal file
136
parser/tests/span.rs
Normal file
|
@ -0,0 +1,136 @@
|
|||
#![allow(clippy::bool_assert_comparison)]
|
||||
#![allow(clippy::float_cmp)]
|
||||
|
||||
use saphyr_parser::{Event, Parser, ScanError};
|
||||
|
||||
/// Run the parser through the string, returning all the scalars, and collecting their spans to strings.
|
||||
fn run_parser_and_deref_scalar_spans(input: &str) -> Result<Vec<(String, String)>, ScanError> {
|
||||
let mut events = vec![];
|
||||
for x in Parser::new_from_str(input) {
|
||||
let x = x?;
|
||||
if let Event::Scalar(s, ..) = x.0 {
|
||||
let start = x.1.start.index();
|
||||
let end = x.1.end.index();
|
||||
let input_s = input.chars().skip(start).take(end - start).collect();
|
||||
events.push((s, input_s));
|
||||
}
|
||||
}
|
||||
Ok(events)
|
||||
}
|
||||
|
||||
/// Run the parser through the string, returning all the scalars, and collecting their spans to strings.
|
||||
fn run_parser_and_deref_seq_spans(input: &str) -> Result<Vec<String>, ScanError> {
|
||||
let mut events = vec![];
|
||||
let mut start_stack = vec![];
|
||||
for x in Parser::new_from_str(input) {
|
||||
let x = x?;
|
||||
match x.0 {
|
||||
Event::SequenceStart(_, _) => start_stack.push(x.1.start.index()),
|
||||
Event::SequenceEnd => {
|
||||
let start = start_stack.pop().unwrap();
|
||||
let end = x.1.end.index();
|
||||
let input_s = input.chars().skip(start).take(end - start).collect();
|
||||
events.push(input_s);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
Ok(events)
|
||||
}
|
||||
|
||||
fn deref_pairs(pairs: &[(String, String)]) -> Vec<(&str, &str)> {
|
||||
pairs
|
||||
.iter()
|
||||
.map(|(a, b)| (a.as_str(), b.as_str()))
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_plain() {
|
||||
assert_eq!(
|
||||
deref_pairs(&run_parser_and_deref_scalar_spans("foo: bar").unwrap()),
|
||||
[("foo", "foo"), ("bar", "bar"),]
|
||||
);
|
||||
assert_eq!(
|
||||
deref_pairs(&run_parser_and_deref_scalar_spans("foo: bar ").unwrap()),
|
||||
[("foo", "foo"), ("bar", "bar"),]
|
||||
);
|
||||
assert_eq!(
|
||||
deref_pairs(&run_parser_and_deref_scalar_spans("foo : \t bar\t ").unwrap()),
|
||||
[("foo", "foo"), ("bar", "bar"),]
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
deref_pairs(&run_parser_and_deref_scalar_spans("foo : \n - bar\n - baz\n ").unwrap()),
|
||||
[("foo", "foo"), ("bar", "bar"), ("baz", "baz")]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_plain_utf8() {
|
||||
assert_eq!(
|
||||
deref_pairs(&run_parser_and_deref_scalar_spans("a: 你好").unwrap()),
|
||||
[("a", "a"), ("你好", "你好")]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_quoted() {
|
||||
assert_eq!(
|
||||
deref_pairs(&run_parser_and_deref_scalar_spans(r#"foo: "bar""#).unwrap()),
|
||||
[("foo", "foo"), ("bar", r#""bar""#),]
|
||||
);
|
||||
assert_eq!(
|
||||
deref_pairs(&run_parser_and_deref_scalar_spans(r#"foo: 'bar'"#).unwrap()),
|
||||
[("foo", "foo"), ("bar", r#"'bar'"#),]
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
deref_pairs(&run_parser_and_deref_scalar_spans(r#"foo: "bar ""#).unwrap()),
|
||||
[("foo", "foo"), ("bar ", r#""bar ""#),]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_literal() {
|
||||
assert_eq!(
|
||||
deref_pairs(&run_parser_and_deref_scalar_spans("foo: |\n bar").unwrap()),
|
||||
[("foo", "foo"), ("bar\n", "bar"),]
|
||||
);
|
||||
assert_eq!(
|
||||
deref_pairs(&run_parser_and_deref_scalar_spans("foo: |\n bar\n more").unwrap()),
|
||||
[("foo", "foo"), ("bar\nmore\n", "bar\n more"),]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_block() {
|
||||
assert_eq!(
|
||||
deref_pairs(&run_parser_and_deref_scalar_spans("foo: >\n bar").unwrap()),
|
||||
[("foo", "foo"), ("bar\n", "bar"),]
|
||||
);
|
||||
assert_eq!(
|
||||
deref_pairs(&run_parser_and_deref_scalar_spans("foo: >\n bar\n more").unwrap()),
|
||||
[("foo", "foo"), ("bar more\n", "bar\n more"),]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_seq() {
|
||||
assert_eq!(
|
||||
run_parser_and_deref_seq_spans("[a, b]").unwrap(),
|
||||
["[a, b]"]
|
||||
);
|
||||
assert_eq!(
|
||||
run_parser_and_deref_seq_spans("- a\n- b").unwrap(),
|
||||
["- a\n- b"]
|
||||
);
|
||||
assert_eq!(
|
||||
run_parser_and_deref_seq_spans("foo:\n - a\n - b").unwrap(),
|
||||
["- a\n - b"]
|
||||
);
|
||||
assert_eq!(
|
||||
run_parser_and_deref_seq_spans("foo:\n - a\n - bar:\n - b\n - c").unwrap(),
|
||||
["b\n - c", "- a\n - bar:\n - b\n - c"]
|
||||
);
|
||||
}
|
|
@ -2,21 +2,21 @@ use std::env;
|
|||
use std::fs::File;
|
||||
use std::io::prelude::*;
|
||||
|
||||
use saphyr_parser::{Event, MarkedEventReceiver, Marker, Parser};
|
||||
use saphyr_parser::{Event, Parser, Span, SpannedEventReceiver};
|
||||
|
||||
#[derive(Debug)]
|
||||
struct EventSink {
|
||||
events: Vec<(Event, Marker)>,
|
||||
events: Vec<(Event, Span)>,
|
||||
}
|
||||
|
||||
impl MarkedEventReceiver for EventSink {
|
||||
fn on_event(&mut self, ev: Event, mark: Marker) {
|
||||
impl SpannedEventReceiver for EventSink {
|
||||
fn on_event(&mut self, ev: Event, span: Span) {
|
||||
eprintln!(" \x1B[;34m\u{21B3} {:?}\x1B[;m", &ev);
|
||||
self.events.push((ev, mark));
|
||||
self.events.push((ev, span));
|
||||
}
|
||||
}
|
||||
|
||||
fn str_to_events(yaml: &str) -> Vec<(Event, Marker)> {
|
||||
fn str_to_events(yaml: &str) -> Vec<(Event, Span)> {
|
||||
let mut sink = EventSink { events: Vec::new() };
|
||||
let mut parser = Parser::new_from_str(yaml);
|
||||
// Load events using our sink as the receiver.
|
||||
|
|
|
@ -1,15 +1,13 @@
|
|||
#![allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)]
|
||||
|
||||
use saphyr_parser::{
|
||||
Event, Marker, {MarkedEventReceiver, Parser},
|
||||
};
|
||||
use saphyr_parser::{Event, Parser, Span, SpannedEventReceiver};
|
||||
use std::{env, fs::File, io::prelude::*};
|
||||
|
||||
/// A sink which discards any event sent.
|
||||
struct NullSink {}
|
||||
|
||||
impl MarkedEventReceiver for NullSink {
|
||||
fn on_event(&mut self, _: Event, _: Marker) {}
|
||||
impl SpannedEventReceiver for NullSink {
|
||||
fn on_event(&mut self, _: Event, _: Span) {}
|
||||
}
|
||||
|
||||
/// Parse the given input, returning elapsed time in nanoseconds.
|
||||
|
|
|
@ -2,13 +2,13 @@ use std::env;
|
|||
use std::fs::File;
|
||||
use std::io::prelude::*;
|
||||
|
||||
use saphyr_parser::{Event, MarkedEventReceiver, Marker, Parser};
|
||||
use saphyr_parser::{Event, Parser, Span, SpannedEventReceiver};
|
||||
|
||||
/// A sink which discards any event sent.
|
||||
struct NullSink {}
|
||||
|
||||
impl MarkedEventReceiver for NullSink {
|
||||
fn on_event(&mut self, _: Event, _: Marker) {}
|
||||
impl SpannedEventReceiver for NullSink {
|
||||
fn on_event(&mut self, _: Event, _: Span) {}
|
||||
}
|
||||
|
||||
fn main() {
|
||||
|
|
Loading…
Reference in a new issue