From 737f9d0ab1094721b84fd5ab7cf5e01a214c5d22 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Tue, 2 Apr 2024 23:06:15 +0200 Subject: [PATCH] Improve Parser's `next()` option handling. --- parser/src/parser.rs | 119 +++++++++++++++++++++----------- parser/tests/basic.rs | 7 +- parser/tests/yaml-test-suite.rs | 8 +-- 3 files changed, 79 insertions(+), 55 deletions(-) diff --git a/parser/src/parser.rs b/parser/src/parser.rs index 9211392..ece9ec5 100644 --- a/parser/src/parser.rs +++ b/parser/src/parser.rs @@ -101,17 +101,35 @@ impl Event { /// A YAML parser. #[derive(Debug)] pub struct Parser { + /// The underlying scanner from which we pull tokens. scanner: Scanner, + /// The stack of _previous_ states we were in. + /// + /// States are pushed in the context of subobjects to this stack. The top-most element is the + /// state in which to come back to when exiting the current state. states: Vec, + /// The state in which we currently are. state: State, + /// The next token from the scanner. token: Option, + /// The next YAML event to emit. current: Option<(Event, Marker)>, + /// Anchors that have been encountered in the YAML document. anchors: HashMap, - anchor_id: usize, + /// Next ID available for an anchor. + /// + /// Every anchor is given a unique ID. We use an incrementing ID and this is both the ID to + /// return for the next anchor and the count of anchor IDs emitted. + anchor_id_count: usize, /// The tag directives (`%TAG`) the parser has encountered. /// /// Key is the handle, and value is the prefix. tags: HashMap, + /// Whether we have emitted [`Event::StreamEnd`]. + /// + /// Emitted means that it has been returned from [`Self::next_token`]. If it is stored in + /// [`Self::token`], this is set to `false`. + stream_end_emitted: bool, /// Make tags global across all documents. keep_tags: bool, } @@ -227,8 +245,9 @@ impl> Parser { anchors: HashMap::new(), // valid anchor_id starts from 1 - anchor_id: 1, + anchor_id_count: 1, tags: HashMap::new(), + stream_end_emitted: false, keep_tags: false, } } @@ -265,21 +284,46 @@ impl> Parser { /// /// Any subsequent call to [`Parser::peek`] will return the same value, until a call to /// [`Iterator::next`] or [`Parser::load`]. + /// /// # Errors /// Returns `ScanError` when loading the next event fails. - pub fn peek(&mut self) -> Result<&(Event, Marker), ScanError> { + pub fn peek(&mut self) -> Option> { if let Some(ref x) = self.current { - Ok(x) + Some(Ok(x)) } else { - self.current = Some(self.next_token()?); - self.peek() + if self.stream_end_emitted { + return None; + } + match self.next_event_impl() { + Ok(token) => self.current = Some(token), + Err(e) => return Some(Err(e)), + } + self.current.as_ref().map(Ok) } } /// Try to load the next event and return it, consuming it from `self`. + /// /// # Errors /// Returns `ScanError` when loading the next event fails. - pub fn next_token(&mut self) -> ParseResult { + pub fn next_event(&mut self) -> Option { + if self.stream_end_emitted { + return None; + } + + let tok = self.next_event_impl(); + if matches!(tok, Ok((Event::StreamEnd, _))) { + self.stream_end_emitted = true; + } + Some(tok) + } + + /// Implementation function for [`Self::next_event`] without the `Option`. + /// + /// [`Self::next_event`] should conform to the expectations of an [`Iterator`] and return an + /// option. This burdens the parser code. This function is used internally when an option is + /// undesirable. + fn next_event_impl(&mut self) -> ParseResult { match self.current.take() { None => self.parse(), Some(v) => Ok(v), @@ -320,7 +364,6 @@ impl> Parser { /// Skip the next token from the scanner. fn skip(&mut self) { self.token = None; - //self.peek_token(); } /// Pops the top-most state and make it the current state. fn pop_state(&mut self) { @@ -336,7 +379,6 @@ impl> Parser { return Ok((Event::StreamEnd, self.scanner.mark())); } let (ev, mark) = self.state_machine()?; - // println!("EV {:?}", ev); Ok((ev, mark)) } @@ -358,7 +400,7 @@ impl> Parser { multi: bool, ) -> Result<(), ScanError> { if !self.scanner.stream_started() { - let (ev, mark) = self.next_token()?; + let (ev, mark) = self.next_event_impl()?; if ev != Event::StreamStart { return Err(ScanError::new(mark, "did not find expected ")); } @@ -371,7 +413,7 @@ impl> Parser { return Ok(()); } loop { - let (ev, mark) = self.next_token()?; + let (ev, mark) = self.next_event_impl()?; if ev == Event::StreamEnd { recv.on_event(ev, mark); return Ok(()); @@ -400,11 +442,11 @@ impl> Parser { } recv.on_event(first_ev, mark); - let (ev, mark) = self.next_token()?; + let (ev, mark) = self.next_event_impl()?; self.load_node(ev, mark, recv)?; // DOCUMENT-END is expected. - let (ev, mark) = self.next_token()?; + let (ev, mark) = self.next_event_impl()?; assert_eq!(ev, Event::DocumentEnd); recv.on_event(ev, mark); @@ -438,17 +480,17 @@ impl> Parser { } fn load_mapping(&mut self, recv: &mut R) -> Result<(), ScanError> { - let (mut key_ev, mut key_mark) = self.next_token()?; + let (mut key_ev, mut key_mark) = self.next_event_impl()?; while key_ev != Event::MappingEnd { // key self.load_node(key_ev, key_mark, recv)?; // value - let (ev, mark) = self.next_token()?; + let (ev, mark) = self.next_event_impl()?; self.load_node(ev, mark, recv)?; // next event - let (ev, mark) = self.next_token()?; + let (ev, mark) = self.next_event_impl()?; key_ev = ev; key_mark = mark; } @@ -457,12 +499,12 @@ impl> Parser { } fn load_sequence(&mut self, recv: &mut R) -> Result<(), ScanError> { - let (mut ev, mut mark) = self.next_token()?; + let (mut ev, mut mark) = self.next_event_impl()?; while ev != Event::SequenceEnd { self.load_node(ev, mark, recv)?; // next event - let (next_ev, next_mark) = self.next_token()?; + let (next_ev, next_mark) = self.next_event_impl()?; ev = next_ev; mark = next_mark; } @@ -657,8 +699,8 @@ impl> Parser { // return Err(ScanError::new(*mark, // "while parsing anchor, found duplicated anchor")); // } - let new_id = self.anchor_id; - self.anchor_id += 1; + let new_id = self.anchor_id_count; + self.anchor_id_count += 1; self.anchors.insert(name, new_id); new_id } @@ -1086,7 +1128,7 @@ impl> Iterator for Parser { type Item = Result<(Event, Marker), ScanError>; fn next(&mut self) -> Option { - Some(self.next_token()) + self.next_event() } } @@ -1109,12 +1151,14 @@ a4: a5: *x "; let mut p = Parser::new_from_str(s); - while { - let event_peek = p.peek().unwrap().clone(); - let event = p.next_token().unwrap(); + loop { + let event_peek = p.peek().unwrap().unwrap().clone(); + let event = p.next_event().unwrap().unwrap(); assert_eq!(event, event_peek); - event.0 != Event::StreamEnd - } {} + if event.0 == Event::StreamEnd { + break; + } + } } #[test] @@ -1129,27 +1173,18 @@ baz: "qux" "#; for x in Parser::new_from_str(text).keep_tags(true) { let x = x.unwrap(); - match x.0 { - Event::StreamEnd => break, - Event::MappingStart(_, tag) => { - let tag = tag.unwrap(); - assert_eq!(tag.handle, "tag:test,2024:"); - } - _ => (), + if let Event::MappingStart(_, tag) = x.0 { + let tag = tag.unwrap(); + assert_eq!(tag.handle, "tag:test,2024:"); } } for x in Parser::new_from_str(text).keep_tags(false) { - match x { - Err(..) => { - // Test successful - return; - } - Ok((Event::StreamEnd, _)) => { - panic!("Test failed, did not encounter error") - } - _ => (), + if x.is_err() { + // Test successful + return; } } + panic!("Test failed, did not encounter error") } } diff --git a/parser/tests/basic.rs b/parser/tests/basic.rs index ebb39cf..b2d40d9 100644 --- a/parser/tests/basic.rs +++ b/parser/tests/basic.rs @@ -10,12 +10,7 @@ use saphyr_parser::{Event, Parser, ScanError, TScalarStyle}; fn run_parser(input: &str) -> Result, ScanError> { let mut events = vec![]; for x in Parser::new_from_str(input) { - let x = x?; - let end = x.0 == Event::StreamEnd; - events.push(x.0); - if end { - break; - } + events.push(x?.0); } Ok(events) } diff --git a/parser/tests/yaml-test-suite.rs b/parser/tests/yaml-test-suite.rs index 5067514..2032ea9 100644 --- a/parser/tests/yaml-test-suite.rs +++ b/parser/tests/yaml-test-suite.rs @@ -122,13 +122,7 @@ fn load_tests_from_file(entry: &DirEntry) -> Result>> { fn parse_to_events(source: &str) -> Result, ScanError> { let mut reporter = EventReporter::new(); for x in Parser::new_from_str(source) { - match x? { - (Event::StreamEnd, _) => { - reporter.on_event(Event::StreamEnd); - break; - } - (x, _) => reporter.on_event(x), - } + reporter.on_event(x?.0); } Ok(reporter.events) }