Use spans instead of markers (#3)

This commit is contained in:
jneem 2024-08-05 22:08:23 +07:00 committed by GitHub
parent 4a5241e0bb
commit 926fdfb01b
7 changed files with 333 additions and 131 deletions

View file

@ -40,5 +40,5 @@ mod parser;
mod scanner;
pub use crate::input::BufferedInput;
pub use crate::parser::{Event, EventReceiver, MarkedEventReceiver, Parser, Tag};
pub use crate::scanner::{Marker, ScanError, TScalarStyle};
pub use crate::parser::{Event, EventReceiver, Parser, SpannedEventReceiver, Tag};
pub use crate::scanner::{Marker, ScanError, Span, TScalarStyle};

View file

@ -6,8 +6,9 @@
use crate::{
input::{str::StrInput, Input},
scanner::{Marker, ScanError, Scanner, TScalarStyle, Token, TokenType},
scanner::{ScanError, Scanner, Span, TScalarStyle, Token, TokenType},
};
use std::collections::HashMap;
#[derive(Clone, Copy, PartialEq, Debug, Eq)]
@ -116,7 +117,7 @@ pub struct Parser<T: Input> {
/// The next token from the scanner.
token: Option<Token>,
/// The next YAML event to emit.
current: Option<(Event, Marker)>,
current: Option<(Event, Span)>,
/// Anchors that have been encountered in the YAML document.
anchors: HashMap<String, usize>,
/// Next ID available for an anchor.
@ -142,8 +143,8 @@ pub struct Parser<T: Input> {
/// The low-level parsing API is event-based (a push parser), calling [`EventReceiver::on_event`]
/// for each YAML [`Event`] that occurs.
/// The [`EventReceiver`] trait only receives events. In order to receive both events and their
/// location in the source, use [`MarkedEventReceiver`]. Note that [`EventReceiver`]s implement
/// [`MarkedEventReceiver`] automatically.
/// location in the source, use [`SpannedEventReceiver`]. Note that [`EventReceiver`]s implement
/// [`SpannedEventReceiver`] automatically.
///
/// # Event hierarchy
/// The event stream starts with an [`Event::StreamStart`] event followed by an
@ -213,20 +214,20 @@ pub trait EventReceiver {
/// Trait to be implemented for using the low-level parsing API.
///
/// Functionally similar to [`EventReceiver`], but receives a [`Marker`] as well as the event.
pub trait MarkedEventReceiver {
/// Functionally similar to [`EventReceiver`], but receives a [`Span`] as well as the event.
pub trait SpannedEventReceiver {
/// Handler called for each event that occurs.
fn on_event(&mut self, ev: Event, _mark: Marker);
fn on_event(&mut self, ev: Event, span: Span);
}
impl<R: EventReceiver> MarkedEventReceiver for R {
fn on_event(&mut self, ev: Event, _mark: Marker) {
impl<R: EventReceiver> SpannedEventReceiver for R {
fn on_event(&mut self, ev: Event, _span: Span) {
self.on_event(ev);
}
}
/// A convenience alias for a `Result` of a parser event.
pub type ParseResult = Result<(Event, Marker), ScanError>;
pub type ParseResult = Result<(Event, Span), ScanError>;
impl<'a> Parser<StrInput<'a>> {
/// Create a new instance of a parser from a &str.
@ -290,7 +291,7 @@ impl<T: Input> Parser<T> {
///
/// # Errors
/// Returns `ScanError` when loading the next event fails.
pub fn peek(&mut self) -> Option<Result<&(Event, Marker), ScanError>> {
pub fn peek(&mut self) -> Option<Result<&(Event, Span), ScanError>> {
if let Some(ref x) = self.current {
Some(Ok(x))
} else {
@ -379,7 +380,7 @@ impl<T: Input> Parser<T> {
fn parse(&mut self) -> ParseResult {
if self.state == State::End {
return Ok((Event::StreamEnd, self.scanner.mark()));
return Ok((Event::StreamEnd, Span::empty(self.scanner.mark())));
}
let (ev, mark) = self.state_machine()?;
Ok((ev, mark))
@ -393,40 +394,40 @@ impl<T: Input> Parser<T> {
/// If `multi` is set to `true`, the parser will allow parsing of multiple YAML documents
/// inside the stream.
///
/// Note that any [`EventReceiver`] is also a [`MarkedEventReceiver`], so implementing the
/// Note that any [`EventReceiver`] is also a [`SpannedEventReceiver`], so implementing the
/// former is enough to call this function.
/// # Errors
/// Returns `ScanError` when loading fails.
pub fn load<R: MarkedEventReceiver>(
pub fn load<R: SpannedEventReceiver>(
&mut self,
recv: &mut R,
multi: bool,
) -> Result<(), ScanError> {
if !self.scanner.stream_started() {
let (ev, mark) = self.next_event_impl()?;
let (ev, span) = self.next_event_impl()?;
if ev != Event::StreamStart {
return Err(ScanError::new_str(
mark,
span.start,
"did not find expected <stream-start>",
));
}
recv.on_event(ev, mark);
recv.on_event(ev, span);
}
if self.scanner.stream_ended() {
// XXX has parsed?
recv.on_event(Event::StreamEnd, self.scanner.mark());
recv.on_event(Event::StreamEnd, Span::empty(self.scanner.mark()));
return Ok(());
}
loop {
let (ev, mark) = self.next_event_impl()?;
let (ev, span) = self.next_event_impl()?;
if ev == Event::StreamEnd {
recv.on_event(ev, mark);
recv.on_event(ev, span);
return Ok(());
}
// clear anchors before a new document
self.anchors.clear();
self.load_document(ev, mark, recv)?;
self.load_document(ev, span, recv)?;
if !multi {
break;
}
@ -434,22 +435,22 @@ impl<T: Input> Parser<T> {
Ok(())
}
fn load_document<R: MarkedEventReceiver>(
fn load_document<R: SpannedEventReceiver>(
&mut self,
first_ev: Event,
mark: Marker,
span: Span,
recv: &mut R,
) -> Result<(), ScanError> {
if first_ev != Event::DocumentStart {
return Err(ScanError::new_str(
mark,
span.start,
"did not find expected <document-start>",
));
}
recv.on_event(first_ev, mark);
recv.on_event(first_ev, span);
let (ev, mark) = self.next_event_impl()?;
self.load_node(ev, mark, recv)?;
let (ev, span) = self.next_event_impl()?;
self.load_node(ev, span, recv)?;
// DOCUMENT-END is expected.
let (ev, mark) = self.next_event_impl()?;
@ -459,23 +460,23 @@ impl<T: Input> Parser<T> {
Ok(())
}
fn load_node<R: MarkedEventReceiver>(
fn load_node<R: SpannedEventReceiver>(
&mut self,
first_ev: Event,
mark: Marker,
span: Span,
recv: &mut R,
) -> Result<(), ScanError> {
match first_ev {
Event::Alias(..) | Event::Scalar(..) => {
recv.on_event(first_ev, mark);
recv.on_event(first_ev, span);
Ok(())
}
Event::SequenceStart(..) => {
recv.on_event(first_ev, mark);
recv.on_event(first_ev, span);
self.load_sequence(recv)
}
Event::MappingStart(..) => {
recv.on_event(first_ev, mark);
recv.on_event(first_ev, span);
self.load_mapping(recv)
}
_ => {
@ -485,7 +486,7 @@ impl<T: Input> Parser<T> {
}
}
fn load_mapping<R: MarkedEventReceiver>(&mut self, recv: &mut R) -> Result<(), ScanError> {
fn load_mapping<R: SpannedEventReceiver>(&mut self, recv: &mut R) -> Result<(), ScanError> {
let (mut key_ev, mut key_mark) = self.next_event_impl()?;
while key_ev != Event::MappingEnd {
// key
@ -504,7 +505,7 @@ impl<T: Input> Parser<T> {
Ok(())
}
fn load_sequence<R: MarkedEventReceiver>(&mut self, recv: &mut R) -> Result<(), ScanError> {
fn load_sequence<R: SpannedEventReceiver>(&mut self, recv: &mut R) -> Result<(), ScanError> {
let (mut ev, mut mark) = self.next_event_impl()?;
while ev != Event::SequenceEnd {
self.load_node(ev, mark, recv)?;
@ -562,13 +563,13 @@ impl<T: Input> Parser<T> {
fn stream_start(&mut self) -> ParseResult {
match *self.peek_token()? {
Token(mark, TokenType::StreamStart(_)) => {
Token(span, TokenType::StreamStart(_)) => {
self.state = State::ImplicitDocumentStart;
self.skip();
Ok((Event::StreamStart, mark))
Ok((Event::StreamStart, span))
}
Token(mark, _) => Err(ScanError::new_str(
mark,
Token(span, _) => Err(ScanError::new_str(
span.start,
"did not find expected <stream-start>",
)),
}
@ -580,10 +581,10 @@ impl<T: Input> Parser<T> {
}
match *self.peek_token()? {
Token(mark, TokenType::StreamEnd) => {
Token(span, TokenType::StreamEnd) => {
self.state = State::End;
self.skip();
Ok((Event::StreamEnd, mark))
Ok((Event::StreamEnd, span))
}
Token(
_,
@ -594,11 +595,11 @@ impl<T: Input> Parser<T> {
// explicit document
self.explicit_document_start()
}
Token(mark, _) if implicit => {
Token(span, _) if implicit => {
self.parser_process_directives()?;
self.push_state(State::DocumentEnd);
self.state = State::BlockNode;
Ok((Event::DocumentStart, mark))
Ok((Event::DocumentStart, span))
}
_ => {
// explicit document
@ -612,20 +613,23 @@ impl<T: Input> Parser<T> {
loop {
let mut tags = HashMap::new();
match self.peek_token()? {
Token(mark, TokenType::VersionDirective(_, _)) => {
Token(span, TokenType::VersionDirective(_, _)) => {
// XXX parsing with warning according to spec
//if major != 1 || minor > 2 {
// return Err(ScanError::new_str(tok.0,
// "found incompatible YAML document"));
//}
if version_directive_received {
return Err(ScanError::new_str(*mark, "duplicate version directive"));
return Err(ScanError::new_str(
span.start,
"duplicate version directive",
));
}
version_directive_received = true;
}
Token(mark, TokenType::TagDirective(handle, prefix)) => {
if tags.contains_key(handle) {
return Err(ScanError::new_str(*mark, "the TAG directive must only be given at most once per handle in the same document"));
return Err(ScanError::new_str(mark.start, "the TAG directive must only be given at most once per handle in the same document"));
}
tags.insert(handle.to_string(), prefix.to_string());
}
@ -646,8 +650,8 @@ impl<T: Input> Parser<T> {
self.skip();
Ok((Event::DocumentStart, mark))
}
Token(mark, _) => Err(ScanError::new_str(
mark,
Token(span, _) => Err(ScanError::new_str(
span.start,
"did not find expected <document start>",
)),
}
@ -673,13 +677,13 @@ impl<T: Input> Parser<T> {
fn document_end(&mut self) -> ParseResult {
let mut explicit_end = false;
let marker: Marker = match *self.peek_token()? {
Token(mark, TokenType::DocumentEnd) => {
let span: Span = match *self.peek_token()? {
Token(span, TokenType::DocumentEnd) => {
explicit_end = true;
self.skip();
mark
span
}
Token(mark, _) => mark,
Token(span, _) => span,
};
if !self.keep_tags {
@ -688,21 +692,21 @@ impl<T: Input> Parser<T> {
if explicit_end {
self.state = State::ImplicitDocumentStart;
} else {
if let Token(mark, TokenType::VersionDirective(..) | TokenType::TagDirective(..)) =
if let Token(span, TokenType::VersionDirective(..) | TokenType::TagDirective(..)) =
*self.peek_token()?
{
return Err(ScanError::new_str(
mark,
span.start,
"missing explicit document end marker before directive",
));
}
self.state = State::DocumentStart;
}
Ok((Event::DocumentEnd, marker))
Ok((Event::DocumentEnd, span))
}
fn register_anchor(&mut self, name: String, _: &Marker) -> usize {
fn register_anchor(&mut self, name: String, _: &Span) -> usize {
// anchors can be overridden/reused
// if self.anchors.contains_key(name) {
// return Err(ScanError::new_str(*mark,
@ -720,25 +724,25 @@ impl<T: Input> Parser<T> {
match *self.peek_token()? {
Token(_, TokenType::Alias(_)) => {
self.pop_state();
if let Token(mark, TokenType::Alias(name)) = self.fetch_token() {
if let Token(span, TokenType::Alias(name)) = self.fetch_token() {
match self.anchors.get(&name) {
None => {
return Err(ScanError::new_str(
mark,
span.start,
"while parsing node, found unknown anchor",
))
}
Some(id) => return Ok((Event::Alias(*id), mark)),
Some(id) => return Ok((Event::Alias(*id), span)),
}
}
unreachable!()
}
Token(_, TokenType::Anchor(_)) => {
if let Token(mark, TokenType::Anchor(name)) = self.fetch_token() {
anchor_id = self.register_anchor(name, &mark);
if let Token(span, TokenType::Anchor(name)) = self.fetch_token() {
anchor_id = self.register_anchor(name, &span);
if let TokenType::Tag(..) = self.peek_token()?.1 {
if let TokenType::Tag(handle, suffix) = self.fetch_token().1 {
tag = Some(self.resolve_tag(mark, &handle, suffix)?);
tag = Some(self.resolve_tag(span, &handle, suffix)?);
} else {
unreachable!()
}
@ -797,8 +801,8 @@ impl<T: Input> Parser<T> {
self.pop_state();
Ok((Event::empty_scalar_with_anchor(anchor_id, tag), mark))
}
Token(mark, _) => Err(ScanError::new_str(
mark,
Token(span, _) => Err(ScanError::new_str(
span.start,
"while parsing a node, did not find expected node content",
)),
}
@ -835,8 +839,8 @@ impl<T: Input> Parser<T> {
self.skip();
Ok((Event::MappingEnd, mark))
}
Token(mark, _) => Err(ScanError::new_str(
mark,
Token(span, _) => Err(ScanError::new_str(
span.start,
"while parsing a block mapping, did not find expected key",
)),
}
@ -870,15 +874,15 @@ impl<T: Input> Parser<T> {
let _ = self.peek_token()?;
self.skip();
}
let marker: Marker = {
let span: Span = {
match *self.peek_token()? {
Token(mark, TokenType::FlowMappingEnd) => mark,
Token(mark, _) => {
if !first {
match *self.peek_token()? {
Token(_, TokenType::FlowEntry) => self.skip(),
Token(mark, _) => return Err(ScanError::new_str(
mark,
Token(span, _) => return Err(ScanError::new_str(
span.start,
"while parsing a flow mapping, did not find expected ',' or '}'",
)),
}
@ -916,18 +920,18 @@ impl<T: Input> Parser<T> {
self.pop_state();
self.skip();
Ok((Event::MappingEnd, marker))
Ok((Event::MappingEnd, span))
}
fn flow_mapping_value(&mut self, empty: bool) -> ParseResult {
let mark: Marker = {
let span: Span = {
if empty {
let Token(mark, _) = *self.peek_token()?;
self.state = State::FlowMappingKey;
return Ok((Event::empty_scalar(), mark));
}
match *self.peek_token()? {
Token(marker, TokenType::Value) => {
Token(span, TokenType::Value) => {
self.skip();
match self.peek_token()?.1 {
TokenType::FlowEntry | TokenType::FlowMappingEnd => {}
@ -936,14 +940,14 @@ impl<T: Input> Parser<T> {
return self.parse_node(false, false);
}
}
marker
span
}
Token(marker, _) => marker,
}
};
self.state = State::FlowMappingKey;
Ok((Event::empty_scalar(), mark))
Ok((Event::empty_scalar(), span))
}
fn flow_sequence_entry(&mut self, first: bool) -> ParseResult {
@ -962,9 +966,9 @@ impl<T: Input> Parser<T> {
Token(_, TokenType::FlowEntry) if !first => {
self.skip();
}
Token(mark, _) if !first => {
Token(span, _) if !first => {
return Err(ScanError::new_str(
mark,
span.start,
"while parsing a flow sequence, expected ',' or ']'",
));
}
@ -1035,8 +1039,8 @@ impl<T: Input> Parser<T> {
self.parse_node(true, false)
}
}
Token(mark, _) => Err(ScanError::new_str(
mark,
Token(span, _) => Err(ScanError::new_str(
span.start,
"while parsing a block collection, did not find expected '-' indicator",
)),
}
@ -1080,11 +1084,11 @@ impl<T: Input> Parser<T> {
#[allow(clippy::unnecessary_wraps)]
fn flow_sequence_entry_mapping_end(&mut self) -> ParseResult {
self.state = State::FlowSequenceEntry;
Ok((Event::MappingEnd, self.scanner.mark()))
Ok((Event::MappingEnd, Span::empty(self.scanner.mark())))
}
/// Resolve a tag from the handle and the suffix.
fn resolve_tag(&self, mark: Marker, handle: &str, suffix: String) -> Result<Tag, ScanError> {
fn resolve_tag(&self, span: Span, handle: &str, suffix: String) -> Result<Tag, ScanError> {
if handle == "!!" {
// "!!" is a shorthand for "tag:yaml.org,2002:". However, that default can be
// overridden.
@ -1121,7 +1125,7 @@ impl<T: Input> Parser<T> {
// If the handle is of the form "!foo!", this cannot be a local handle and we need
// to error.
if handle.len() >= 2 && handle.starts_with('!') && handle.ends_with('!') {
Err(ScanError::new_str(mark, "the handle wasn't declared"))
Err(ScanError::new_str(span.start, "the handle wasn't declared"))
} else {
Ok(Tag {
handle: handle.to_string(),
@ -1134,7 +1138,7 @@ impl<T: Input> Parser<T> {
}
impl<T: Input> Iterator for Parser<T> {
type Item = Result<(Event, Marker), ScanError>;
type Item = Result<(Event, Span), ScanError>;
fn next(&mut self) -> Option<Self::Item> {
self.next_event()

View file

@ -79,6 +79,37 @@ impl Marker {
}
}
/// A range of locations in a Yaml document.
#[derive(Clone, Copy, PartialEq, Debug, Eq, Default)]
pub struct Span {
/// The start (inclusive) of the range.
pub start: Marker,
/// The end (exclusive) of the range.
pub end: Marker,
}
impl Span {
/// Create a new [`Span`] for the given range.
#[must_use]
pub fn new(start: Marker, end: Marker) -> Span {
Span { start, end }
}
/// Create a empty [`Span`] at a given location.
///
/// An empty span doesn't contain any characters, but its position may still be meaningful.
/// For example, for an indented sequence [`SequenceEnd`] has a location but an empty span.
///
/// [`SequenceEnd`]: crate::Event::SequenceEnd
#[must_use]
pub fn empty(mark: Marker) -> Span {
Span {
start: mark,
end: mark,
}
}
}
/// An error that occurred while scanning.
#[derive(Clone, PartialEq, Debug, Eq)]
pub struct ScanError {
@ -204,7 +235,7 @@ pub enum TokenType {
/// A scanner token.
#[derive(Clone, PartialEq, Debug, Eq)]
pub struct Token(pub Marker, pub TokenType);
pub struct Token(pub Span, pub TokenType);
/// A scalar that was parsed and may correspond to a simple key.
///
@ -874,8 +905,10 @@ impl<T: Input> Scanner<T> {
self.indent = -1;
self.stream_start_produced = true;
self.allow_simple_key();
self.tokens
.push_back(Token(mark, TokenType::StreamStart(TEncoding::Utf8)));
self.tokens.push_back(Token(
Span::empty(mark),
TokenType::StreamStart(TEncoding::Utf8),
));
self.simple_keys.push(SimpleKey::new(Marker::new(0, 0, 0)));
}
@ -900,7 +933,7 @@ impl<T: Input> Scanner<T> {
self.disallow_simple_key();
self.tokens
.push_back(Token(self.mark, TokenType::StreamEnd));
.push_back(Token(Span::empty(self.mark), TokenType::StreamEnd));
Ok(())
}
@ -932,7 +965,7 @@ impl<T: Input> Scanner<T> {
self.mark.col += line_len;
// XXX return an empty TagDirective token
Token(
start_mark,
Span::new(start_mark, self.mark),
TokenType::TagDirective(String::new(), String::new()),
)
// return Err(ScanError::new_str(start_mark,
@ -971,7 +1004,10 @@ impl<T: Input> Scanner<T> {
let minor = self.scan_version_directive_number(mark)?;
Ok(Token(*mark, TokenType::VersionDirective(major, minor)))
Ok(Token(
Span::new(*mark, self.mark),
TokenType::VersionDirective(major, minor),
))
}
fn scan_directive_name(&mut self) -> Result<String, ScanError> {
@ -1040,7 +1076,10 @@ impl<T: Input> Scanner<T> {
self.input.lookahead(1);
if self.input.next_is_blank_or_breakz() {
Ok(Token(*mark, TokenType::TagDirective(handle, prefix)))
Ok(Token(
Span::new(*mark, self.mark),
TokenType::TagDirective(handle, prefix),
))
} else {
Err(ScanError::new_str(
*mark,
@ -1093,7 +1132,10 @@ impl<T: Input> Scanner<T> {
|| (self.flow_level > 0 && self.input.next_is_flow())
{
// XXX: ex 7.2, an empty scalar can follow a secondary tag
Ok(Token(start_mark, TokenType::Tag(handle, suffix)))
Ok(Token(
Span::new(start_mark, self.mark),
TokenType::Tag(handle, suffix),
))
} else {
Err(ScanError::new_str(
start_mark,
@ -1323,11 +1365,12 @@ impl<T: Input> Scanner<T> {
return Err(ScanError::new_str(start_mark, "while scanning an anchor or alias, did not find expected alphabetic or numeric character"));
}
if alias {
Ok(Token(start_mark, TokenType::Alias(string)))
let tok = if alias {
TokenType::Alias(string)
} else {
Ok(Token(start_mark, TokenType::Anchor(string)))
}
TokenType::Anchor(string)
};
Ok(Token(Span::new(start_mark, self.mark), tok))
}
fn fetch_flow_collection_start(&mut self, tok: TokenType) -> ScanResult {
@ -1351,7 +1394,8 @@ impl<T: Input> Scanner<T> {
self.skip_ws_to_eol(SkipTabs::Yes)?;
self.tokens.push_back(Token(start_mark, tok));
self.tokens
.push_back(Token(Span::new(start_mark, self.mark), tok));
Ok(())
}
@ -1380,7 +1424,8 @@ impl<T: Input> Scanner<T> {
self.adjacent_value_allowed_at = self.mark.index;
}
self.tokens.push_back(Token(start_mark, tok));
self.tokens
.push_back(Token(Span::new(start_mark, self.mark), tok));
Ok(())
}
@ -1395,8 +1440,10 @@ impl<T: Input> Scanner<T> {
self.skip_non_blank();
self.skip_ws_to_eol(SkipTabs::Yes)?;
self.tokens
.push_back(Token(start_mark, TokenType::FlowEntry));
self.tokens.push_back(Token(
Span::new(start_mark, self.mark),
TokenType::FlowEntry,
));
Ok(())
}
@ -1438,9 +1485,12 @@ impl<T: Input> Scanner<T> {
}
// ???, fixes test G9HC.
if let Some(Token(mark, TokenType::Anchor(..) | TokenType::Tag(..))) = self.tokens.back() {
if self.mark.col == 0 && mark.col == 0 && self.indent > -1 {
return Err(ScanError::new_str(*mark, "invalid indentation for anchor"));
if let Some(Token(span, TokenType::Anchor(..) | TokenType::Tag(..))) = self.tokens.back() {
if self.mark.col == 0 && span.start.col == 0 && self.indent > -1 {
return Err(ScanError::new_str(
span.start,
"invalid indentation for anchor",
));
}
}
@ -1470,7 +1520,7 @@ impl<T: Input> Scanner<T> {
self.allow_simple_key();
self.tokens
.push_back(Token(self.mark, TokenType::BlockEntry));
.push_back(Token(Span::empty(self.mark), TokenType::BlockEntry));
Ok(())
}
@ -1484,7 +1534,7 @@ impl<T: Input> Scanner<T> {
self.skip_n_non_blank(3);
self.tokens.push_back(Token(mark, t));
self.tokens.push_back(Token(Span::new(mark, self.mark), t));
Ok(())
}
@ -1616,7 +1666,10 @@ impl<T: Input> Scanner<T> {
// Otherwise, the newline after chomping is ignored.
Chomping::Keep => trailing_breaks,
};
return Ok(Token(start_mark, TokenType::Scalar(style, contents)));
return Ok(Token(
Span::new(start_mark, self.mark),
TokenType::Scalar(style, contents),
));
}
if self.mark.col < indent && (self.mark.col as isize) > self.indent {
@ -1682,7 +1735,10 @@ impl<T: Input> Scanner<T> {
string.push_str(&trailing_breaks);
}
Ok(Token(start_mark, TokenType::Scalar(style, string)))
Ok(Token(
Span::new(start_mark, self.mark),
TokenType::Scalar(style, string),
))
}
/// Retrieve the contents of the line, parsing it as a block scalar.
@ -1963,7 +2019,10 @@ impl<T: Input> Scanner<T> {
} else {
TScalarStyle::DoubleQuoted
};
Ok(Token(start_mark, TokenType::Scalar(style, string)))
Ok(Token(
Span::new(start_mark, self.mark),
TokenType::Scalar(style, string),
))
}
/// Consume successive non-whitespace characters from a flow scalar.
@ -2120,6 +2179,7 @@ impl<T: Input> Scanner<T> {
self.buf_whitespaces.clear();
self.buf_leading_break.clear();
self.buf_trailing_breaks.clear();
let mut end_mark = self.mark;
loop {
self.input.lookahead(4);
@ -2182,6 +2242,7 @@ impl<T: Input> Scanner<T> {
self.skip_non_blank();
}
}
end_mark = self.mark;
}
// We may reach the end of a plain scalar if:
@ -2238,7 +2299,7 @@ impl<T: Input> Scanner<T> {
}
Ok(Token(
start_mark,
Span::new(start_mark, end_mark),
TokenType::Scalar(TScalarStyle::Plain, string),
))
}
@ -2280,7 +2341,8 @@ impl<T: Input> Scanner<T> {
"tabs disallowed in this context",
));
}
self.tokens.push_back(Token(start_mark, TokenType::Key));
self.tokens
.push_back(Token(Span::new(start_mark, self.mark), TokenType::Key));
Ok(())
}
@ -2338,7 +2400,7 @@ impl<T: Input> Scanner<T> {
if sk.possible {
// insert simple key
let tok = Token(sk.mark, TokenType::Key);
let tok = Token(Span::empty(sk.mark), TokenType::Key);
self.insert_token(sk.token_number - self.tokens_parsed, tok);
if is_implicit_flow_mapping {
if sk.mark.line < start_mark.line {
@ -2349,7 +2411,7 @@ impl<T: Input> Scanner<T> {
}
self.insert_token(
sk.token_number - self.tokens_parsed,
Token(self.mark, TokenType::FlowMappingStart),
Token(Span::empty(self.mark), TokenType::FlowMappingStart),
);
}
@ -2367,7 +2429,7 @@ impl<T: Input> Scanner<T> {
} else {
if is_implicit_flow_mapping {
self.tokens
.push_back(Token(self.mark, TokenType::FlowMappingStart));
.push_back(Token(Span::empty(self.mark), TokenType::FlowMappingStart));
}
// The ':' indicator follows a complex key.
if self.flow_level == 0 {
@ -2393,7 +2455,8 @@ impl<T: Input> Scanner<T> {
self.disallow_simple_key();
}
}
self.tokens.push_back(Token(start_mark, TokenType::Value));
self.tokens
.push_back(Token(Span::empty(start_mark), TokenType::Value));
Ok(())
}
@ -2428,8 +2491,8 @@ impl<T: Input> Scanner<T> {
self.indent = col as isize;
let tokens_parsed = self.tokens_parsed;
match number {
Some(n) => self.insert_token(n - tokens_parsed, Token(mark, tok)),
None => self.tokens.push_back(Token(mark, tok)),
Some(n) => self.insert_token(n - tokens_parsed, Token(Span::empty(mark), tok)),
None => self.tokens.push_back(Token(Span::empty(mark), tok)),
}
}
}
@ -2447,7 +2510,8 @@ impl<T: Input> Scanner<T> {
let indent = self.indents.pop().unwrap();
self.indent = indent.indent;
if indent.needs_block_end {
self.tokens.push_back(Token(self.mark, TokenType::BlockEnd));
self.tokens
.push_back(Token(Span::empty(self.mark), TokenType::BlockEnd));
}
}
}
@ -2520,7 +2584,7 @@ impl<T: Input> Scanner<T> {
self.flow_mapping_started = false;
*implicit_mapping = ImplicitMappingState::Possible;
self.tokens
.push_back(Token(mark, TokenType::FlowMappingEnd));
.push_back(Token(Span::empty(mark), TokenType::FlowMappingEnd));
}
}
}

136
parser/tests/span.rs Normal file
View file

@ -0,0 +1,136 @@
#![allow(clippy::bool_assert_comparison)]
#![allow(clippy::float_cmp)]
use saphyr_parser::{Event, Parser, ScanError};
/// Run the parser through the string, returning all the scalars, and collecting their spans to strings.
fn run_parser_and_deref_scalar_spans(input: &str) -> Result<Vec<(String, String)>, ScanError> {
let mut events = vec![];
for x in Parser::new_from_str(input) {
let x = x?;
if let Event::Scalar(s, ..) = x.0 {
let start = x.1.start.index();
let end = x.1.end.index();
let input_s = input.chars().skip(start).take(end - start).collect();
events.push((s, input_s));
}
}
Ok(events)
}
/// Run the parser through the string, returning all the scalars, and collecting their spans to strings.
fn run_parser_and_deref_seq_spans(input: &str) -> Result<Vec<String>, ScanError> {
let mut events = vec![];
let mut start_stack = vec![];
for x in Parser::new_from_str(input) {
let x = x?;
match x.0 {
Event::SequenceStart(_, _) => start_stack.push(x.1.start.index()),
Event::SequenceEnd => {
let start = start_stack.pop().unwrap();
let end = x.1.end.index();
let input_s = input.chars().skip(start).take(end - start).collect();
events.push(input_s);
}
_ => {}
}
}
Ok(events)
}
fn deref_pairs(pairs: &[(String, String)]) -> Vec<(&str, &str)> {
pairs
.iter()
.map(|(a, b)| (a.as_str(), b.as_str()))
.collect()
}
#[test]
fn test_plain() {
assert_eq!(
deref_pairs(&run_parser_and_deref_scalar_spans("foo: bar").unwrap()),
[("foo", "foo"), ("bar", "bar"),]
);
assert_eq!(
deref_pairs(&run_parser_and_deref_scalar_spans("foo: bar ").unwrap()),
[("foo", "foo"), ("bar", "bar"),]
);
assert_eq!(
deref_pairs(&run_parser_and_deref_scalar_spans("foo : \t bar\t ").unwrap()),
[("foo", "foo"), ("bar", "bar"),]
);
assert_eq!(
deref_pairs(&run_parser_and_deref_scalar_spans("foo : \n - bar\n - baz\n ").unwrap()),
[("foo", "foo"), ("bar", "bar"), ("baz", "baz")]
);
}
#[test]
fn test_plain_utf8() {
assert_eq!(
deref_pairs(&run_parser_and_deref_scalar_spans("a: 你好").unwrap()),
[("a", "a"), ("你好", "你好")]
);
}
#[test]
fn test_quoted() {
assert_eq!(
deref_pairs(&run_parser_and_deref_scalar_spans(r#"foo: "bar""#).unwrap()),
[("foo", "foo"), ("bar", r#""bar""#),]
);
assert_eq!(
deref_pairs(&run_parser_and_deref_scalar_spans(r#"foo: 'bar'"#).unwrap()),
[("foo", "foo"), ("bar", r#"'bar'"#),]
);
assert_eq!(
deref_pairs(&run_parser_and_deref_scalar_spans(r#"foo: "bar ""#).unwrap()),
[("foo", "foo"), ("bar ", r#""bar ""#),]
);
}
#[test]
fn test_literal() {
assert_eq!(
deref_pairs(&run_parser_and_deref_scalar_spans("foo: |\n bar").unwrap()),
[("foo", "foo"), ("bar\n", "bar"),]
);
assert_eq!(
deref_pairs(&run_parser_and_deref_scalar_spans("foo: |\n bar\n more").unwrap()),
[("foo", "foo"), ("bar\nmore\n", "bar\n more"),]
);
}
#[test]
fn test_block() {
assert_eq!(
deref_pairs(&run_parser_and_deref_scalar_spans("foo: >\n bar").unwrap()),
[("foo", "foo"), ("bar\n", "bar"),]
);
assert_eq!(
deref_pairs(&run_parser_and_deref_scalar_spans("foo: >\n bar\n more").unwrap()),
[("foo", "foo"), ("bar more\n", "bar\n more"),]
);
}
#[test]
fn test_seq() {
assert_eq!(
run_parser_and_deref_seq_spans("[a, b]").unwrap(),
["[a, b]"]
);
assert_eq!(
run_parser_and_deref_seq_spans("- a\n- b").unwrap(),
["- a\n- b"]
);
assert_eq!(
run_parser_and_deref_seq_spans("foo:\n - a\n - b").unwrap(),
["- a\n - b"]
);
assert_eq!(
run_parser_and_deref_seq_spans("foo:\n - a\n - bar:\n - b\n - c").unwrap(),
["b\n - c", "- a\n - bar:\n - b\n - c"]
);
}

View file

@ -2,21 +2,21 @@ use std::env;
use std::fs::File;
use std::io::prelude::*;
use saphyr_parser::{Event, MarkedEventReceiver, Marker, Parser};
use saphyr_parser::{Event, Parser, Span, SpannedEventReceiver};
#[derive(Debug)]
struct EventSink {
events: Vec<(Event, Marker)>,
events: Vec<(Event, Span)>,
}
impl MarkedEventReceiver for EventSink {
fn on_event(&mut self, ev: Event, mark: Marker) {
impl SpannedEventReceiver for EventSink {
fn on_event(&mut self, ev: Event, span: Span) {
eprintln!(" \x1B[;34m\u{21B3} {:?}\x1B[;m", &ev);
self.events.push((ev, mark));
self.events.push((ev, span));
}
}
fn str_to_events(yaml: &str) -> Vec<(Event, Marker)> {
fn str_to_events(yaml: &str) -> Vec<(Event, Span)> {
let mut sink = EventSink { events: Vec::new() };
let mut parser = Parser::new_from_str(yaml);
// Load events using our sink as the receiver.

View file

@ -1,15 +1,13 @@
#![allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)]
use saphyr_parser::{
Event, Marker, {MarkedEventReceiver, Parser},
};
use saphyr_parser::{Event, Parser, Span, SpannedEventReceiver};
use std::{env, fs::File, io::prelude::*};
/// A sink which discards any event sent.
struct NullSink {}
impl MarkedEventReceiver for NullSink {
fn on_event(&mut self, _: Event, _: Marker) {}
impl SpannedEventReceiver for NullSink {
fn on_event(&mut self, _: Event, _: Span) {}
}
/// Parse the given input, returning elapsed time in nanoseconds.

View file

@ -2,13 +2,13 @@ use std::env;
use std::fs::File;
use std::io::prelude::*;
use saphyr_parser::{Event, MarkedEventReceiver, Marker, Parser};
use saphyr_parser::{Event, Parser, Span, SpannedEventReceiver};
/// A sink which discards any event sent.
struct NullSink {}
impl MarkedEventReceiver for NullSink {
fn on_event(&mut self, _: Event, _: Marker) {}
impl SpannedEventReceiver for NullSink {
fn on_event(&mut self, _: Event, _: Span) {}
}
fn main() {