Improve Parser's next() option handling.

This commit is contained in:
Ethiraric 2024-04-02 23:06:15 +02:00
parent 40670f3c48
commit 737f9d0ab1
3 changed files with 79 additions and 55 deletions

View file

@ -101,17 +101,35 @@ impl Event {
/// A YAML parser.
#[derive(Debug)]
pub struct Parser<T> {
/// The underlying scanner from which we pull tokens.
scanner: Scanner<T>,
/// The stack of _previous_ states we were in.
///
/// States are pushed in the context of subobjects to this stack. The top-most element is the
/// state in which to come back to when exiting the current state.
states: Vec<State>,
/// The state in which we currently are.
state: State,
/// The next token from the scanner.
token: Option<Token>,
/// The next YAML event to emit.
current: Option<(Event, Marker)>,
/// Anchors that have been encountered in the YAML document.
anchors: HashMap<String, usize>,
anchor_id: usize,
/// Next ID available for an anchor.
///
/// Every anchor is given a unique ID. We use an incrementing ID and this is both the ID to
/// return for the next anchor and the count of anchor IDs emitted.
anchor_id_count: usize,
/// The tag directives (`%TAG`) the parser has encountered.
///
/// Key is the handle, and value is the prefix.
tags: HashMap<String, String>,
/// Whether we have emitted [`Event::StreamEnd`].
///
/// Emitted means that it has been returned from [`Self::next_token`]. If it is stored in
/// [`Self::token`], this is set to `false`.
stream_end_emitted: bool,
/// Make tags global across all documents.
keep_tags: bool,
}
@ -227,8 +245,9 @@ impl<T: Iterator<Item = char>> Parser<T> {
anchors: HashMap::new(),
// valid anchor_id starts from 1
anchor_id: 1,
anchor_id_count: 1,
tags: HashMap::new(),
stream_end_emitted: false,
keep_tags: false,
}
}
@ -265,21 +284,46 @@ impl<T: Iterator<Item = char>> Parser<T> {
///
/// Any subsequent call to [`Parser::peek`] will return the same value, until a call to
/// [`Iterator::next`] or [`Parser::load`].
///
/// # Errors
/// Returns `ScanError` when loading the next event fails.
pub fn peek(&mut self) -> Result<&(Event, Marker), ScanError> {
pub fn peek(&mut self) -> Option<Result<&(Event, Marker), ScanError>> {
if let Some(ref x) = self.current {
Ok(x)
Some(Ok(x))
} else {
self.current = Some(self.next_token()?);
self.peek()
if self.stream_end_emitted {
return None;
}
match self.next_event_impl() {
Ok(token) => self.current = Some(token),
Err(e) => return Some(Err(e)),
}
self.current.as_ref().map(Ok)
}
}
/// Try to load the next event and return it, consuming it from `self`.
///
/// # Errors
/// Returns `ScanError` when loading the next event fails.
pub fn next_token(&mut self) -> ParseResult {
pub fn next_event(&mut self) -> Option<ParseResult> {
if self.stream_end_emitted {
return None;
}
let tok = self.next_event_impl();
if matches!(tok, Ok((Event::StreamEnd, _))) {
self.stream_end_emitted = true;
}
Some(tok)
}
/// Implementation function for [`Self::next_event`] without the `Option`.
///
/// [`Self::next_event`] should conform to the expectations of an [`Iterator`] and return an
/// option. This burdens the parser code. This function is used internally when an option is
/// undesirable.
fn next_event_impl(&mut self) -> ParseResult {
match self.current.take() {
None => self.parse(),
Some(v) => Ok(v),
@ -320,7 +364,6 @@ impl<T: Iterator<Item = char>> Parser<T> {
/// Skip the next token from the scanner.
fn skip(&mut self) {
self.token = None;
//self.peek_token();
}
/// Pops the top-most state and make it the current state.
fn pop_state(&mut self) {
@ -336,7 +379,6 @@ impl<T: Iterator<Item = char>> Parser<T> {
return Ok((Event::StreamEnd, self.scanner.mark()));
}
let (ev, mark) = self.state_machine()?;
// println!("EV {:?}", ev);
Ok((ev, mark))
}
@ -358,7 +400,7 @@ impl<T: Iterator<Item = char>> Parser<T> {
multi: bool,
) -> Result<(), ScanError> {
if !self.scanner.stream_started() {
let (ev, mark) = self.next_token()?;
let (ev, mark) = self.next_event_impl()?;
if ev != Event::StreamStart {
return Err(ScanError::new(mark, "did not find expected <stream-start>"));
}
@ -371,7 +413,7 @@ impl<T: Iterator<Item = char>> Parser<T> {
return Ok(());
}
loop {
let (ev, mark) = self.next_token()?;
let (ev, mark) = self.next_event_impl()?;
if ev == Event::StreamEnd {
recv.on_event(ev, mark);
return Ok(());
@ -400,11 +442,11 @@ impl<T: Iterator<Item = char>> Parser<T> {
}
recv.on_event(first_ev, mark);
let (ev, mark) = self.next_token()?;
let (ev, mark) = self.next_event_impl()?;
self.load_node(ev, mark, recv)?;
// DOCUMENT-END is expected.
let (ev, mark) = self.next_token()?;
let (ev, mark) = self.next_event_impl()?;
assert_eq!(ev, Event::DocumentEnd);
recv.on_event(ev, mark);
@ -438,17 +480,17 @@ impl<T: Iterator<Item = char>> Parser<T> {
}
fn load_mapping<R: MarkedEventReceiver>(&mut self, recv: &mut R) -> Result<(), ScanError> {
let (mut key_ev, mut key_mark) = self.next_token()?;
let (mut key_ev, mut key_mark) = self.next_event_impl()?;
while key_ev != Event::MappingEnd {
// key
self.load_node(key_ev, key_mark, recv)?;
// value
let (ev, mark) = self.next_token()?;
let (ev, mark) = self.next_event_impl()?;
self.load_node(ev, mark, recv)?;
// next event
let (ev, mark) = self.next_token()?;
let (ev, mark) = self.next_event_impl()?;
key_ev = ev;
key_mark = mark;
}
@ -457,12 +499,12 @@ impl<T: Iterator<Item = char>> Parser<T> {
}
fn load_sequence<R: MarkedEventReceiver>(&mut self, recv: &mut R) -> Result<(), ScanError> {
let (mut ev, mut mark) = self.next_token()?;
let (mut ev, mut mark) = self.next_event_impl()?;
while ev != Event::SequenceEnd {
self.load_node(ev, mark, recv)?;
// next event
let (next_ev, next_mark) = self.next_token()?;
let (next_ev, next_mark) = self.next_event_impl()?;
ev = next_ev;
mark = next_mark;
}
@ -657,8 +699,8 @@ impl<T: Iterator<Item = char>> Parser<T> {
// return Err(ScanError::new(*mark,
// "while parsing anchor, found duplicated anchor"));
// }
let new_id = self.anchor_id;
self.anchor_id += 1;
let new_id = self.anchor_id_count;
self.anchor_id_count += 1;
self.anchors.insert(name, new_id);
new_id
}
@ -1086,7 +1128,7 @@ impl<T: Iterator<Item = char>> Iterator for Parser<T> {
type Item = Result<(Event, Marker), ScanError>;
fn next(&mut self) -> Option<Self::Item> {
Some(self.next_token())
self.next_event()
}
}
@ -1109,12 +1151,14 @@ a4:
a5: *x
";
let mut p = Parser::new_from_str(s);
while {
let event_peek = p.peek().unwrap().clone();
let event = p.next_token().unwrap();
loop {
let event_peek = p.peek().unwrap().unwrap().clone();
let event = p.next_event().unwrap().unwrap();
assert_eq!(event, event_peek);
event.0 != Event::StreamEnd
} {}
if event.0 == Event::StreamEnd {
break;
}
}
}
#[test]
@ -1129,27 +1173,18 @@ baz: "qux"
"#;
for x in Parser::new_from_str(text).keep_tags(true) {
let x = x.unwrap();
match x.0 {
Event::StreamEnd => break,
Event::MappingStart(_, tag) => {
if let Event::MappingStart(_, tag) = x.0 {
let tag = tag.unwrap();
assert_eq!(tag.handle, "tag:test,2024:");
}
_ => (),
}
}
for x in Parser::new_from_str(text).keep_tags(false) {
match x {
Err(..) => {
if x.is_err() {
// Test successful
return;
}
Ok((Event::StreamEnd, _)) => {
}
panic!("Test failed, did not encounter error")
}
_ => (),
}
}
}
}

View file

@ -10,12 +10,7 @@ use saphyr_parser::{Event, Parser, ScanError, TScalarStyle};
fn run_parser(input: &str) -> Result<Vec<Event>, ScanError> {
let mut events = vec![];
for x in Parser::new_from_str(input) {
let x = x?;
let end = x.0 == Event::StreamEnd;
events.push(x.0);
if end {
break;
}
events.push(x?.0);
}
Ok(events)
}

View file

@ -122,13 +122,7 @@ fn load_tests_from_file(entry: &DirEntry) -> Result<Vec<Test<YamlTest>>> {
fn parse_to_events(source: &str) -> Result<Vec<String>, ScanError> {
let mut reporter = EventReporter::new();
for x in Parser::new_from_str(source) {
match x? {
(Event::StreamEnd, _) => {
reporter.on_event(Event::StreamEnd);
break;
}
(x, _) => reporter.on_event(x),
}
reporter.on_event(x?.0);
}
Ok(reporter.events)
}