Improve Parser's next()
option handling.
This commit is contained in:
parent
40670f3c48
commit
737f9d0ab1
3 changed files with 79 additions and 55 deletions
|
@ -101,17 +101,35 @@ impl Event {
|
||||||
/// A YAML parser.
|
/// A YAML parser.
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct Parser<T> {
|
pub struct Parser<T> {
|
||||||
|
/// The underlying scanner from which we pull tokens.
|
||||||
scanner: Scanner<T>,
|
scanner: Scanner<T>,
|
||||||
|
/// The stack of _previous_ states we were in.
|
||||||
|
///
|
||||||
|
/// States are pushed in the context of subobjects to this stack. The top-most element is the
|
||||||
|
/// state in which to come back to when exiting the current state.
|
||||||
states: Vec<State>,
|
states: Vec<State>,
|
||||||
|
/// The state in which we currently are.
|
||||||
state: State,
|
state: State,
|
||||||
|
/// The next token from the scanner.
|
||||||
token: Option<Token>,
|
token: Option<Token>,
|
||||||
|
/// The next YAML event to emit.
|
||||||
current: Option<(Event, Marker)>,
|
current: Option<(Event, Marker)>,
|
||||||
|
/// Anchors that have been encountered in the YAML document.
|
||||||
anchors: HashMap<String, usize>,
|
anchors: HashMap<String, usize>,
|
||||||
anchor_id: usize,
|
/// Next ID available for an anchor.
|
||||||
|
///
|
||||||
|
/// Every anchor is given a unique ID. We use an incrementing ID and this is both the ID to
|
||||||
|
/// return for the next anchor and the count of anchor IDs emitted.
|
||||||
|
anchor_id_count: usize,
|
||||||
/// The tag directives (`%TAG`) the parser has encountered.
|
/// The tag directives (`%TAG`) the parser has encountered.
|
||||||
///
|
///
|
||||||
/// Key is the handle, and value is the prefix.
|
/// Key is the handle, and value is the prefix.
|
||||||
tags: HashMap<String, String>,
|
tags: HashMap<String, String>,
|
||||||
|
/// Whether we have emitted [`Event::StreamEnd`].
|
||||||
|
///
|
||||||
|
/// Emitted means that it has been returned from [`Self::next_token`]. If it is stored in
|
||||||
|
/// [`Self::token`], this is set to `false`.
|
||||||
|
stream_end_emitted: bool,
|
||||||
/// Make tags global across all documents.
|
/// Make tags global across all documents.
|
||||||
keep_tags: bool,
|
keep_tags: bool,
|
||||||
}
|
}
|
||||||
|
@ -227,8 +245,9 @@ impl<T: Iterator<Item = char>> Parser<T> {
|
||||||
|
|
||||||
anchors: HashMap::new(),
|
anchors: HashMap::new(),
|
||||||
// valid anchor_id starts from 1
|
// valid anchor_id starts from 1
|
||||||
anchor_id: 1,
|
anchor_id_count: 1,
|
||||||
tags: HashMap::new(),
|
tags: HashMap::new(),
|
||||||
|
stream_end_emitted: false,
|
||||||
keep_tags: false,
|
keep_tags: false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -265,21 +284,46 @@ impl<T: Iterator<Item = char>> Parser<T> {
|
||||||
///
|
///
|
||||||
/// Any subsequent call to [`Parser::peek`] will return the same value, until a call to
|
/// Any subsequent call to [`Parser::peek`] will return the same value, until a call to
|
||||||
/// [`Iterator::next`] or [`Parser::load`].
|
/// [`Iterator::next`] or [`Parser::load`].
|
||||||
|
///
|
||||||
/// # Errors
|
/// # Errors
|
||||||
/// Returns `ScanError` when loading the next event fails.
|
/// Returns `ScanError` when loading the next event fails.
|
||||||
pub fn peek(&mut self) -> Result<&(Event, Marker), ScanError> {
|
pub fn peek(&mut self) -> Option<Result<&(Event, Marker), ScanError>> {
|
||||||
if let Some(ref x) = self.current {
|
if let Some(ref x) = self.current {
|
||||||
Ok(x)
|
Some(Ok(x))
|
||||||
} else {
|
} else {
|
||||||
self.current = Some(self.next_token()?);
|
if self.stream_end_emitted {
|
||||||
self.peek()
|
return None;
|
||||||
|
}
|
||||||
|
match self.next_event_impl() {
|
||||||
|
Ok(token) => self.current = Some(token),
|
||||||
|
Err(e) => return Some(Err(e)),
|
||||||
|
}
|
||||||
|
self.current.as_ref().map(Ok)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Try to load the next event and return it, consuming it from `self`.
|
/// Try to load the next event and return it, consuming it from `self`.
|
||||||
|
///
|
||||||
/// # Errors
|
/// # Errors
|
||||||
/// Returns `ScanError` when loading the next event fails.
|
/// Returns `ScanError` when loading the next event fails.
|
||||||
pub fn next_token(&mut self) -> ParseResult {
|
pub fn next_event(&mut self) -> Option<ParseResult> {
|
||||||
|
if self.stream_end_emitted {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
let tok = self.next_event_impl();
|
||||||
|
if matches!(tok, Ok((Event::StreamEnd, _))) {
|
||||||
|
self.stream_end_emitted = true;
|
||||||
|
}
|
||||||
|
Some(tok)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Implementation function for [`Self::next_event`] without the `Option`.
|
||||||
|
///
|
||||||
|
/// [`Self::next_event`] should conform to the expectations of an [`Iterator`] and return an
|
||||||
|
/// option. This burdens the parser code. This function is used internally when an option is
|
||||||
|
/// undesirable.
|
||||||
|
fn next_event_impl(&mut self) -> ParseResult {
|
||||||
match self.current.take() {
|
match self.current.take() {
|
||||||
None => self.parse(),
|
None => self.parse(),
|
||||||
Some(v) => Ok(v),
|
Some(v) => Ok(v),
|
||||||
|
@ -320,7 +364,6 @@ impl<T: Iterator<Item = char>> Parser<T> {
|
||||||
/// Skip the next token from the scanner.
|
/// Skip the next token from the scanner.
|
||||||
fn skip(&mut self) {
|
fn skip(&mut self) {
|
||||||
self.token = None;
|
self.token = None;
|
||||||
//self.peek_token();
|
|
||||||
}
|
}
|
||||||
/// Pops the top-most state and make it the current state.
|
/// Pops the top-most state and make it the current state.
|
||||||
fn pop_state(&mut self) {
|
fn pop_state(&mut self) {
|
||||||
|
@ -336,7 +379,6 @@ impl<T: Iterator<Item = char>> Parser<T> {
|
||||||
return Ok((Event::StreamEnd, self.scanner.mark()));
|
return Ok((Event::StreamEnd, self.scanner.mark()));
|
||||||
}
|
}
|
||||||
let (ev, mark) = self.state_machine()?;
|
let (ev, mark) = self.state_machine()?;
|
||||||
// println!("EV {:?}", ev);
|
|
||||||
Ok((ev, mark))
|
Ok((ev, mark))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -358,7 +400,7 @@ impl<T: Iterator<Item = char>> Parser<T> {
|
||||||
multi: bool,
|
multi: bool,
|
||||||
) -> Result<(), ScanError> {
|
) -> Result<(), ScanError> {
|
||||||
if !self.scanner.stream_started() {
|
if !self.scanner.stream_started() {
|
||||||
let (ev, mark) = self.next_token()?;
|
let (ev, mark) = self.next_event_impl()?;
|
||||||
if ev != Event::StreamStart {
|
if ev != Event::StreamStart {
|
||||||
return Err(ScanError::new(mark, "did not find expected <stream-start>"));
|
return Err(ScanError::new(mark, "did not find expected <stream-start>"));
|
||||||
}
|
}
|
||||||
|
@ -371,7 +413,7 @@ impl<T: Iterator<Item = char>> Parser<T> {
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
loop {
|
loop {
|
||||||
let (ev, mark) = self.next_token()?;
|
let (ev, mark) = self.next_event_impl()?;
|
||||||
if ev == Event::StreamEnd {
|
if ev == Event::StreamEnd {
|
||||||
recv.on_event(ev, mark);
|
recv.on_event(ev, mark);
|
||||||
return Ok(());
|
return Ok(());
|
||||||
|
@ -400,11 +442,11 @@ impl<T: Iterator<Item = char>> Parser<T> {
|
||||||
}
|
}
|
||||||
recv.on_event(first_ev, mark);
|
recv.on_event(first_ev, mark);
|
||||||
|
|
||||||
let (ev, mark) = self.next_token()?;
|
let (ev, mark) = self.next_event_impl()?;
|
||||||
self.load_node(ev, mark, recv)?;
|
self.load_node(ev, mark, recv)?;
|
||||||
|
|
||||||
// DOCUMENT-END is expected.
|
// DOCUMENT-END is expected.
|
||||||
let (ev, mark) = self.next_token()?;
|
let (ev, mark) = self.next_event_impl()?;
|
||||||
assert_eq!(ev, Event::DocumentEnd);
|
assert_eq!(ev, Event::DocumentEnd);
|
||||||
recv.on_event(ev, mark);
|
recv.on_event(ev, mark);
|
||||||
|
|
||||||
|
@ -438,17 +480,17 @@ impl<T: Iterator<Item = char>> Parser<T> {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn load_mapping<R: MarkedEventReceiver>(&mut self, recv: &mut R) -> Result<(), ScanError> {
|
fn load_mapping<R: MarkedEventReceiver>(&mut self, recv: &mut R) -> Result<(), ScanError> {
|
||||||
let (mut key_ev, mut key_mark) = self.next_token()?;
|
let (mut key_ev, mut key_mark) = self.next_event_impl()?;
|
||||||
while key_ev != Event::MappingEnd {
|
while key_ev != Event::MappingEnd {
|
||||||
// key
|
// key
|
||||||
self.load_node(key_ev, key_mark, recv)?;
|
self.load_node(key_ev, key_mark, recv)?;
|
||||||
|
|
||||||
// value
|
// value
|
||||||
let (ev, mark) = self.next_token()?;
|
let (ev, mark) = self.next_event_impl()?;
|
||||||
self.load_node(ev, mark, recv)?;
|
self.load_node(ev, mark, recv)?;
|
||||||
|
|
||||||
// next event
|
// next event
|
||||||
let (ev, mark) = self.next_token()?;
|
let (ev, mark) = self.next_event_impl()?;
|
||||||
key_ev = ev;
|
key_ev = ev;
|
||||||
key_mark = mark;
|
key_mark = mark;
|
||||||
}
|
}
|
||||||
|
@ -457,12 +499,12 @@ impl<T: Iterator<Item = char>> Parser<T> {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn load_sequence<R: MarkedEventReceiver>(&mut self, recv: &mut R) -> Result<(), ScanError> {
|
fn load_sequence<R: MarkedEventReceiver>(&mut self, recv: &mut R) -> Result<(), ScanError> {
|
||||||
let (mut ev, mut mark) = self.next_token()?;
|
let (mut ev, mut mark) = self.next_event_impl()?;
|
||||||
while ev != Event::SequenceEnd {
|
while ev != Event::SequenceEnd {
|
||||||
self.load_node(ev, mark, recv)?;
|
self.load_node(ev, mark, recv)?;
|
||||||
|
|
||||||
// next event
|
// next event
|
||||||
let (next_ev, next_mark) = self.next_token()?;
|
let (next_ev, next_mark) = self.next_event_impl()?;
|
||||||
ev = next_ev;
|
ev = next_ev;
|
||||||
mark = next_mark;
|
mark = next_mark;
|
||||||
}
|
}
|
||||||
|
@ -657,8 +699,8 @@ impl<T: Iterator<Item = char>> Parser<T> {
|
||||||
// return Err(ScanError::new(*mark,
|
// return Err(ScanError::new(*mark,
|
||||||
// "while parsing anchor, found duplicated anchor"));
|
// "while parsing anchor, found duplicated anchor"));
|
||||||
// }
|
// }
|
||||||
let new_id = self.anchor_id;
|
let new_id = self.anchor_id_count;
|
||||||
self.anchor_id += 1;
|
self.anchor_id_count += 1;
|
||||||
self.anchors.insert(name, new_id);
|
self.anchors.insert(name, new_id);
|
||||||
new_id
|
new_id
|
||||||
}
|
}
|
||||||
|
@ -1086,7 +1128,7 @@ impl<T: Iterator<Item = char>> Iterator for Parser<T> {
|
||||||
type Item = Result<(Event, Marker), ScanError>;
|
type Item = Result<(Event, Marker), ScanError>;
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
Some(self.next_token())
|
self.next_event()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1109,12 +1151,14 @@ a4:
|
||||||
a5: *x
|
a5: *x
|
||||||
";
|
";
|
||||||
let mut p = Parser::new_from_str(s);
|
let mut p = Parser::new_from_str(s);
|
||||||
while {
|
loop {
|
||||||
let event_peek = p.peek().unwrap().clone();
|
let event_peek = p.peek().unwrap().unwrap().clone();
|
||||||
let event = p.next_token().unwrap();
|
let event = p.next_event().unwrap().unwrap();
|
||||||
assert_eq!(event, event_peek);
|
assert_eq!(event, event_peek);
|
||||||
event.0 != Event::StreamEnd
|
if event.0 == Event::StreamEnd {
|
||||||
} {}
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -1129,27 +1173,18 @@ baz: "qux"
|
||||||
"#;
|
"#;
|
||||||
for x in Parser::new_from_str(text).keep_tags(true) {
|
for x in Parser::new_from_str(text).keep_tags(true) {
|
||||||
let x = x.unwrap();
|
let x = x.unwrap();
|
||||||
match x.0 {
|
if let Event::MappingStart(_, tag) = x.0 {
|
||||||
Event::StreamEnd => break,
|
|
||||||
Event::MappingStart(_, tag) => {
|
|
||||||
let tag = tag.unwrap();
|
let tag = tag.unwrap();
|
||||||
assert_eq!(tag.handle, "tag:test,2024:");
|
assert_eq!(tag.handle, "tag:test,2024:");
|
||||||
}
|
}
|
||||||
_ => (),
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for x in Parser::new_from_str(text).keep_tags(false) {
|
for x in Parser::new_from_str(text).keep_tags(false) {
|
||||||
match x {
|
if x.is_err() {
|
||||||
Err(..) => {
|
|
||||||
// Test successful
|
// Test successful
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
Ok((Event::StreamEnd, _)) => {
|
}
|
||||||
panic!("Test failed, did not encounter error")
|
panic!("Test failed, did not encounter error")
|
||||||
}
|
}
|
||||||
_ => (),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -10,12 +10,7 @@ use saphyr_parser::{Event, Parser, ScanError, TScalarStyle};
|
||||||
fn run_parser(input: &str) -> Result<Vec<Event>, ScanError> {
|
fn run_parser(input: &str) -> Result<Vec<Event>, ScanError> {
|
||||||
let mut events = vec![];
|
let mut events = vec![];
|
||||||
for x in Parser::new_from_str(input) {
|
for x in Parser::new_from_str(input) {
|
||||||
let x = x?;
|
events.push(x?.0);
|
||||||
let end = x.0 == Event::StreamEnd;
|
|
||||||
events.push(x.0);
|
|
||||||
if end {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
Ok(events)
|
Ok(events)
|
||||||
}
|
}
|
||||||
|
|
|
@ -122,13 +122,7 @@ fn load_tests_from_file(entry: &DirEntry) -> Result<Vec<Test<YamlTest>>> {
|
||||||
fn parse_to_events(source: &str) -> Result<Vec<String>, ScanError> {
|
fn parse_to_events(source: &str) -> Result<Vec<String>, ScanError> {
|
||||||
let mut reporter = EventReporter::new();
|
let mut reporter = EventReporter::new();
|
||||||
for x in Parser::new_from_str(source) {
|
for x in Parser::new_from_str(source) {
|
||||||
match x? {
|
reporter.on_event(x?.0);
|
||||||
(Event::StreamEnd, _) => {
|
|
||||||
reporter.on_event(Event::StreamEnd);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
(x, _) => reporter.on_event(x),
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
Ok(reporter.events)
|
Ok(reporter.events)
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue