From f8065b79c72dcd57c5e3e0c3dfbee6e6dcc7b4bf Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Sun, 24 May 2015 14:27:42 +0800 Subject: [PATCH 001/380] Initial commit --- saphyr/.gitignore | 2 + saphyr/Cargo.toml | 4 + saphyr/src/lib.rs | 6 + saphyr/src/parser.rs | 475 +++++++++++++++++++++++++++ saphyr/src/scanner.rs | 726 ++++++++++++++++++++++++++++++++++++++++++ saphyr/src/yaml.rs | 39 +++ 6 files changed, 1252 insertions(+) create mode 100644 saphyr/.gitignore create mode 100644 saphyr/Cargo.toml create mode 100644 saphyr/src/lib.rs create mode 100644 saphyr/src/parser.rs create mode 100644 saphyr/src/scanner.rs create mode 100644 saphyr/src/yaml.rs diff --git a/saphyr/.gitignore b/saphyr/.gitignore new file mode 100644 index 0000000..a9d37c5 --- /dev/null +++ b/saphyr/.gitignore @@ -0,0 +1,2 @@ +target +Cargo.lock diff --git a/saphyr/Cargo.toml b/saphyr/Cargo.toml new file mode 100644 index 0000000..d7dd718 --- /dev/null +++ b/saphyr/Cargo.toml @@ -0,0 +1,4 @@ +[package] +name = "yaml-rust" +version = "0.1.0" +authors = ["Yuheng Chen "] diff --git a/saphyr/src/lib.rs b/saphyr/src/lib.rs new file mode 100644 index 0000000..a824d7d --- /dev/null +++ b/saphyr/src/lib.rs @@ -0,0 +1,6 @@ +pub mod yaml; +pub mod scanner; +pub mod parser; +#[test] +fn it_works() { +} diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs new file mode 100644 index 0000000..104f6a6 --- /dev/null +++ b/saphyr/src/parser.rs @@ -0,0 +1,475 @@ +use scanner::*; + +#[derive(Clone, Copy, PartialEq, Debug, Eq)] +pub enum State { + StreamStart, + ImplicitDocumentStart, + DocumentStart, + DocumentContent, + DocumentEnd, + BlockNode, + BlockNodeOrIndentlessSequence, + FlowNode, + BlockSequenceFirstEntry, + BlockSequenceEntry, + IndentlessSequenceEntry, + BlockMappingFirstKey, + BlockMappingKey, + BlockMappingValue, + FlowSequenceFirstEntry, + FlowSequenceEntry, + FlowSequenceEntryMappingKey, + FlowSequenceEntryMappingValue, + FlowSequenceEntryMappingEnd, + FlowMappingFirstKey, + FlowMappingKey, + FlowMappingValue, + FlowMappingEmptyValue, + End +} + +#[derive(Clone, PartialEq, Debug, Eq)] +pub enum Event { + NoEvent, + StreamStart, + StreamEnd, + DocumentStart, + DocumentEnd, + Alias, + Scalar(String), + SequenceStart, + SequenceEnd, + MappingStart, + MappingEnd +} + +#[derive(Debug)] +pub struct Parser { + scanner: Scanner, + states: Vec, + state: State, + marks: Vec, + token: Option, +} + +pub type ParseResult = Result; + +impl> Parser { + pub fn new(src: T) -> Parser { + Parser { + scanner: Scanner::new(src), + states: Vec::new(), + state: State::StreamStart, + marks: Vec::new(), + token: None + } + } + + fn peek(&mut self) -> Result { + if self.token.is_none() { + self.token = self.scanner.next(); + } + if self.token.is_none() { + return Err(ScanError::new(self.scanner.mark(), + "unexpected eof")); + } + // XXX better? + Ok(self.token.clone().unwrap()) + } + + fn skip(&mut self) { + self.token = None; + self.peek(); + } + fn pop_state(&mut self) { + self.state = self.states.pop().unwrap() + } + fn push_state(&mut self, state: State) { + self.states.push(state); + } + + pub fn parse(&mut self) -> ParseResult { + if self.scanner.stream_ended() + || self.state == State::End { + return Ok(Event::NoEvent); + } + let ev = self.state_machine(); + println!("EV {:?}", ev); + ev + } + + pub fn load(&mut self) -> Result<(), ScanError> { + if !self.scanner.stream_started() { + let ev = try!(self.parse()); + assert_eq!(ev, Event::StreamStart); + } + + if self.scanner.stream_ended() { + return Ok(()); + } + let ev = try!(self.parse()); + if ev == Event::StreamEnd { + return Ok(()); + } + self.load_document(&ev); + Ok(()) + } + + fn load_document(&mut self, first_ev: &Event) -> Result<(), ScanError> { + assert_eq!(first_ev, &Event::DocumentStart); + + let ev = try!(self.parse()); + let ev = try!(self.load_node(&ev)); + + Ok(()) + } + + fn load_node(&mut self, first_ev: &Event) -> Result<(), ScanError> { + match *first_ev { + Event::Scalar(_) => { + // TODO scalar + println!("Scalar: {:?}", first_ev); + Ok(()) + }, + Event::SequenceStart => { + self.load_sequence(first_ev) + }, + Event::MappingStart => { + self.load_mapping(first_ev) + }, + _ => { unreachable!(); } + } + } + + fn load_mapping(&mut self, first_ev: &Event) -> Result<(), ScanError> { + let mut ev = try!(self.parse()); + while ev != Event::MappingEnd { + // key + try!(self.load_node(&ev)); + + // value + ev = try!(self.parse()); + try!(self.load_node(&ev)); + + // next event + ev = try!(self.parse()); + } + Ok(()) + } + + fn load_sequence(&mut self, first_ev: &Event) -> Result<(), ScanError> { + let mut ev = try!(self.parse()); + while ev != Event::SequenceEnd { + try!(self.load_node(&ev)); + + // next event + ev = try!(self.parse()); + } + Ok(()) + } + + fn state_machine(&mut self) -> ParseResult { + let next_tok = self.peek(); + println!("cur_state {:?}, next tok: {:?}", self.state, next_tok); + match self.state { + State::StreamStart => self.stream_start(), + State::ImplicitDocumentStart => self.document_start(true), + State::DocumentStart => self.document_start(false), + State::DocumentContent => self.document_content(), + + State::BlockMappingFirstKey => self.block_mapping_key(true), + State::BlockMappingKey => self.block_mapping_key(false), + State::BlockMappingValue => self.block_mapping_value(), + + State::BlockSequenceFirstEntry => self.block_sequence_entry(true), + State::BlockSequenceEntry => self.block_sequence_entry(false), + + State::FlowSequenceFirstEntry => self.flow_sequence_entry(true), + State::FlowSequenceEntry => self.flow_sequence_entry(false), + + _ => unimplemented!() + } + } + + fn stream_start(&mut self) -> ParseResult { + let tok = try!(self.peek()); + + match tok.1 { + TokenType::StreamStartToken(_) => { + self.state = State::ImplicitDocumentStart; + self.skip(); + Ok(Event::StreamStart) + }, + _ => return Err(ScanError::new(tok.0, + "did not find expected ")), + } + } + + fn document_start(&mut self, implicit: bool) -> ParseResult { + let mut tok = try!(self.peek()); + if !implicit { + loop { + match tok.1 { + TokenType::DocumentEndToken => { + self.skip(); + tok = try!(self.peek()); + }, + _ => break + } + } + } + + match tok.1 { + TokenType::StreamEndToken => { + self.state = State::End; + self.skip(); + return Ok(Event::StreamEnd); + }, + TokenType::VersionDirectiveToken + | TokenType::TagDirectiveToken + | TokenType::DocumentStartToken => { + // explicit document + self._explict_document_start() + }, + _ if implicit => { + self.push_state(State::DocumentEnd); + self.state = State::BlockNode; + self.skip(); + Ok(Event::DocumentStart) + }, + _ => { + // explicit document + self._explict_document_start() + } + } + } + + fn _explict_document_start(&mut self) -> ParseResult { + let mut tok = try!(self.peek()); + if tok.1 != TokenType::DocumentStartToken { + return Err(ScanError::new(tok.0, "did not find expected ")); + } + self.push_state(State::DocumentEnd); + self.state = State::DocumentContent; + self.skip(); + Ok(Event::DocumentStart) + } + + fn document_content(&mut self) -> ParseResult { + let tok = try!(self.peek()); + match tok.1 { + TokenType::VersionDirectiveToken + |TokenType::TagDirectiveToken + |TokenType::DocumentStartToken + |TokenType::DocumentEndToken + |TokenType::StreamEndToken => { + self.pop_state(); + // empty scalar + Ok(Event::Scalar(String::new())) + }, + _ => { + self.parse_node(true, false) + } + } + } + + fn parse_node(&mut self, block: bool, indentless_sequence: bool) -> ParseResult { + let mut tok = try!(self.peek()); + match tok.1 { + TokenType::AliasToken => unimplemented!(), + TokenType::AnchorToken => unimplemented!(), + TokenType::BlockEntryToken if indentless_sequence => { + self.state = State::IndentlessSequenceEntry; + Ok(Event::SequenceStart) + }, + TokenType::ScalarToken(_, v) => { + self.pop_state(); + self.skip(); + Ok(Event::Scalar(v)) + }, + TokenType::FlowSequenceStartToken => { + self.state = State::FlowSequenceFirstEntry; + Ok(Event::SequenceStart) + }, + TokenType::FlowMappingStartToken => { + self.state = State::FlowMappingFirstKey; + Ok(Event::MappingStart) + }, + TokenType::BlockSequenceStartToken if block => { + self.state = State::BlockSequenceFirstEntry; + Ok(Event::SequenceStart) + }, + TokenType::BlockMappingStartToken if block => { + self.state = State::BlockMappingFirstKey; + Ok(Event::MappingStart) + }, + _ => { unimplemented!(); } + } + } + + fn block_mapping_key(&mut self, first: bool) -> ParseResult { + // skip BlockMappingStartToken + if first { + let tok = try!(self.peek()); + //self.marks.push(tok.0); + self.skip(); + } + let tok = try!(self.peek()); + match tok.1 { + TokenType::KeyToken => { + self.skip(); + let tok = try!(self.peek()); + match tok.1 { + TokenType::KeyToken | TokenType::ValueToken | TokenType::BlockEndToken + => { + self.state = State::BlockMappingValue; + // empty scalar + Ok(Event::Scalar(String::new())) + } + _ => { + self.push_state(State::BlockMappingValue); + self.parse_node(true, true) + } + } + }, + TokenType::BlockEndToken => { + self.pop_state(); + self.skip(); + Ok(Event::MappingEnd) + }, + _ => { + Err(ScanError::new(tok.0, "while parsing a block mapping, did not find expected key")) + } + } + } + + fn block_mapping_value(&mut self) -> ParseResult { + let tok = try!(self.peek()); + match tok.1 { + TokenType::ValueToken => { + self.skip(); + let tok = try!(self.peek()); + match tok.1 { + TokenType::KeyToken | TokenType::ValueToken | TokenType::BlockEndToken + => { + self.state = State::BlockMappingValue; + // empty scalar + Ok(Event::Scalar(String::new())) + } + _ => { + self.push_state(State::BlockMappingKey); + self.parse_node(true, true) + } + } + }, + _ => { + self.state = State::BlockMappingKey; + // empty scalar + Ok(Event::Scalar(String::new())) + } + } + } + + fn flow_sequence_entry(&mut self, first: bool) -> ParseResult { + // skip FlowMappingStartToken + if first { + let tok = try!(self.peek()); + //self.marks.push(tok.0); + self.skip(); + } + let mut tok = try!(self.peek()); + match tok.1 { + TokenType::FlowSequenceEndToken => { + self.pop_state(); + self.skip(); + return Ok(Event::SequenceEnd); + }, + TokenType::FlowEntryToken if !first => { + self.skip(); + tok = try!(self.peek()); + }, + _ if !first => { + return Err(ScanError::new(tok.0, + "while parsing a flow sequence, expectd ',' or ']'")); + } + _ => { /* next */ } + } + match tok.1 { + TokenType::FlowMappingEndToken => { + self.pop_state(); + self.skip(); + Ok(Event::SequenceEnd) + }, + TokenType::KeyToken => { + self.state = State::FlowSequenceEntryMappingKey; + self.skip(); + Ok(Event::MappingStart) + } + _ => { + self.push_state(State::FlowSequenceEntry); + self.parse_node(false, false) + } + } + } + + fn block_sequence_entry(&mut self, first: bool) -> ParseResult { + // BLOCK-SEQUENCE-START + if first { + let tok = try!(self.peek()); + //self.marks.push(tok.0); + self.skip(); + } + let mut tok = try!(self.peek()); + match tok.1 { + TokenType::BlockEndToken => { + self.pop_state(); + self.skip(); + Ok(Event::SequenceEnd) + }, + TokenType::BlockEntryToken => { + self.skip(); + tok = try!(self.peek()); + match tok.1 { + TokenType::BlockEntryToken | TokenType::BlockEndToken => { + self.state = State::BlockSequenceEntry; + Ok(Event::Scalar(String::new())) + }, + _ => { + self.push_state(State::BlockSequenceEntry); + self.parse_node(true, false) + } + } + }, + _ => { + Err(ScanError::new(tok.0, + "while parsing a block collection, did not find expected '-' indicator")) + } + } + } + +} + +#[cfg(test)] +mod test { + use super::*; + #[test] + fn test_parser() { + let s: String = "--- +# comment +a0 bb: val +a1: + b1: 4 + b2: d +a2: 4 +a3: [1, 2, 3] +a4: + - - a1 + - a2 + - 2 +".to_string(); + let mut parser = Parser::new(s.chars()); + parser.load().unwrap(); + + } +} + diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs new file mode 100644 index 0000000..2b10542 --- /dev/null +++ b/saphyr/src/scanner.rs @@ -0,0 +1,726 @@ +use std::collections::VecDeque; +use yaml::*; + +#[derive(Clone, Copy, PartialEq, Debug, Eq)] +pub enum TEncoding { + Utf8 +} + +#[derive(Clone, Copy, PartialEq, Debug, Eq)] +pub enum TScalarStyle { + Any, + Plain, + SingleQuoted, + DoubleQuoted, + + Literal, + Foled +} + +#[derive(Clone, Copy, PartialEq, Debug, Eq)] +pub struct Marker { + index: usize, + line: usize, + col: usize, +} + +impl Marker { + fn new(index: usize, line: usize, col: usize) -> Marker { + Marker { + index: index, + line: line, + col: col + } + } +} + +#[derive(Clone, PartialEq, Debug, Eq)] +pub struct ScanError { + mark: Marker, + info: String, +} + +impl ScanError { + pub fn new(loc: Marker, info: &str) -> ScanError { + ScanError { + mark: loc, + info: info.to_string() + } + } +} + +#[derive(Clone, PartialEq, Debug, Eq)] +pub enum TokenType { + NoToken, + StreamStartToken(TEncoding), + StreamEndToken, + VersionDirectiveToken, + TagDirectiveToken, + DocumentStartToken, + DocumentEndToken, + BlockSequenceStartToken, + BlockMappingStartToken, + BlockEndToken, + FlowSequenceStartToken, + FlowSequenceEndToken, + FlowMappingStartToken, + FlowMappingEndToken, + BlockEntryToken, + FlowEntryToken, + KeyToken, + ValueToken, + AliasToken, + AnchorToken, + TagToken, + ScalarToken(TScalarStyle, String) +} + +#[derive(Clone, PartialEq, Debug, Eq)] +pub struct Token(pub Marker, pub TokenType); + +#[derive(Clone, PartialEq, Debug, Eq)] +struct SimpleKey { + possible: bool, + required: bool, + token_number: usize, + mark: Marker, +} + +impl SimpleKey { + fn new(mark: Marker) -> SimpleKey { + SimpleKey { + possible: false, + required: false, + token_number: 0, + mark: mark, + } + } +} + +#[derive(Debug)] +pub struct Scanner { + rdr: T, + mark: Marker, + tokens: VecDeque, + buffer: VecDeque, + + stream_start_produced: bool, + stream_end_produced: bool, + simple_key_allowed: bool, + simple_keys: Vec, + indent: isize, + indents: Vec, + flow_level: usize, + tokens_parsed: usize, + token_available: bool, +} + +impl> Iterator for Scanner { + type Item = Token; + fn next(&mut self) -> Option { + match self.next_token() { + Ok(tok) => tok, + Err(e) => { + println!("Error: {:?}", e); + None + } + } + } +} + +fn is_z(c: char) -> bool { + c == '\0' +} +fn is_break(c: char) -> bool { + c == '\n' || c == '\r' +} +fn is_breakz(c: char) -> bool { + is_break(c) || is_z(c) +} +fn is_blank(c: char) -> bool { + c == ' ' || c == '\t' +} +fn is_blankz(c: char) -> bool { + is_blank(c) || is_breakz(c) +} + +pub type ScanResult = Result<(), ScanError>; + +impl> Scanner { + /// Creates the YAML tokenizer. + pub fn new(rdr: T) -> Scanner { + let mut p = Scanner { + rdr: rdr, + buffer: VecDeque::new(), + mark: Marker::new(0, 1, 0), + tokens: VecDeque::new(), + + stream_start_produced: false, + stream_end_produced: false, + simple_key_allowed: true, + simple_keys: Vec::new(), + indent: -1, + indents: Vec::new(), + flow_level: 0, + tokens_parsed: 0, + token_available: false, + }; + return p; + } + + fn lookhead(&mut self, count: usize) { + if self.buffer.len() >= count { + return; + } + for i in 0..(count - self.buffer.len()) { + self.buffer.push_back(self.rdr.next().unwrap_or('\0')); + } + } + fn skip(&mut self) { + let c = self.buffer.pop_front().unwrap(); + + self.mark.index += 1; + if c == '\n' { + self.mark.line += 1; + self.mark.col = 0; + } else { + self.mark.col += 1; + } + } + fn ch(&self) -> char { + self.buffer[0] + } + fn ch_is(&self, c: char) -> bool { + self.buffer[0] == c + } + fn eof(&self) -> bool { + self.ch_is('\0') + } + pub fn stream_started(&self) -> bool { + self.stream_start_produced + } + pub fn stream_ended(&self) -> bool { + self.stream_end_produced + } + pub fn mark(&self) -> Marker { + self.mark + } + fn read_break(&mut self, s: &mut String) { + if self.buffer[0] == '\r' && self.buffer[1] == '\n' { + s.push('\n'); + self.skip(); + self.skip(); + } else if self.buffer[0] == '\r' || self.buffer[0] == '\n' { + s.push('\n'); + self.skip(); + } else { + unreachable!(); + } + } + fn insert_token(&mut self, pos: usize, tok: Token) { + let old_len = self.tokens.len(); + assert!(pos <= old_len); + self.tokens.push_back(tok); + for i in 0..old_len - pos { + self.tokens.swap(old_len - i, old_len - i - 1); + } + } + fn allow_simple_key(&mut self) { + self.simple_key_allowed = true; + } + fn disallow_simple_key(&mut self) { + self.simple_key_allowed = false; + } + + pub fn fetch_next_token(&mut self) -> ScanResult { + self.lookhead(1); + // println!("--> fetch_next_token Cur {:?} {:?}", self.mark, self.ch()); + + if !self.stream_start_produced { + self.fetch_stream_start(); + return Ok(()); + } + self.skip_to_next_token(); + + try!(self.stale_simple_keys()); + + let mark = self.mark; + self.unroll_indent(mark.col as isize); + + self.lookhead(4); + + if is_z(self.ch()) { + self.fetch_stream_end(); + return Ok(()); + } + + if self.mark.col == 0 && self.ch_is('%') { + unimplemented!(); + } + + if self.mark.col == 0 + && self.buffer[0] == '-' + && self.buffer[1] == '-' + && self.buffer[2] == '-' + && is_blankz(self.buffer[3]) { + try!(self.fetch_document_indicator(TokenType::DocumentStartToken)); + return Ok(()); + } + + if self.mark.col == 0 + && self.buffer[0] == '.' + && self.buffer[1] == '.' + && self.buffer[2] == '.' + && is_blankz(self.buffer[3]) { + try!(self.fetch_document_indicator(TokenType::DocumentEndToken)); + return Ok(()); + } + + let c = self.buffer[0]; + let nc = self.buffer[1]; + match c { + '[' => try!(self.fetch_flow_collection_start(TokenType::FlowSequenceStartToken)), + '{' => try!(self.fetch_flow_collection_start(TokenType::FlowMappingStartToken)), + ']' => try!(self.fetch_flow_collection_end(TokenType::FlowSequenceEndToken)), + '}' => try!(self.fetch_flow_collection_end(TokenType::FlowMappingEndToken)), + ',' => try!(self.fetch_flow_entry()), + '-' if is_blankz(nc) => try!(self.fetch_block_entry()), + '?' if self.flow_level > 0 || is_blankz(nc) => unimplemented!(), + ':' if self.flow_level > 0 || is_blankz(nc) => try!(self.fetch_value()), + '*' => unimplemented!(), + '&' => unimplemented!(), + '!' => unimplemented!(), + '|' if self.flow_level == 0 => unimplemented!(), + '>' if self.flow_level == 0 => unimplemented!(), + '\'' => unimplemented!(), + '"' => unimplemented!(), + // plain scalar + '-' if !is_blankz(nc) => try!(self.fetch_plain_scalar()), + ':' | '?' if !is_blankz(nc) && self.flow_level == 0 => try!(self.fetch_plain_scalar()), + '%' | '@' | '`' => return Err(ScanError::new(self.mark, + &format!("unexpected character: `{}'", c))), + _ => try!(self.fetch_plain_scalar()), + } + + Ok(()) + } + + pub fn next_token(&mut self) -> Result, ScanError> { + if self.stream_end_produced { + return Ok(None); + } + + if !self.token_available { + try!(self.fetch_more_tokens()); + } + let t = self.tokens.pop_front().unwrap(); + self.token_available = false; + self.tokens_parsed += 1; + + match t.1 { + TokenType::StreamEndToken => self.stream_end_produced = true, + _ => {} + } + Ok(Some(t)) + } + + pub fn fetch_more_tokens(&mut self) -> ScanResult { + let mut need_more = false; + loop { + need_more = false; + if self.tokens.is_empty() { + need_more = true; + } else { + try!(self.stale_simple_keys()); + for sk in &self.simple_keys { + if sk.possible && sk.token_number == self.tokens_parsed { + need_more = true; + break; + } + } + } + + if !need_more { break; } + try!(self.fetch_next_token()); + } + self.token_available = true; + + Ok(()) + } + + fn stale_simple_keys(&mut self) -> ScanResult { + for sk in &mut self.simple_keys { + if sk.possible && (sk.mark.line < self.mark.line + || sk.mark.index + 1024 < self.mark.index) { + if sk.required { + return Err(ScanError::new(self.mark, "simple key expect ':'")); + } + sk.possible = false; + } + } + Ok(()) + } + + fn skip_to_next_token(&mut self) { + loop { + self.lookhead(1); + // TODO(chenyh) BOM + match self.ch() { + ' ' => self.skip(), + '\t' if self.flow_level > 0 || !self.simple_key_allowed => self.skip(), + '\n' | '\r' => { + self.skip(); + if self.flow_level == 0 { + self.allow_simple_key(); + } + }, + '#' => while !is_breakz(self.ch()) { self.skip(); self.lookhead(1); }, + _ => break + } + } + } + + fn fetch_stream_start(&mut self) { + let mark = self.mark; + self.indent = -1; + self.stream_start_produced = true; + self.allow_simple_key(); + self.tokens.push_back(Token(mark, TokenType::StreamStartToken(TEncoding::Utf8))); + self.simple_keys.push(SimpleKey::new(Marker::new(0,0,0))); + } + + fn fetch_stream_end(&mut self) -> ScanResult { + // force new line + if self.mark.col != 0 { + self.mark.col = 0; + self.mark.line += 1; + } + + self.unroll_indent(-1); + try!(self.remove_simple_key()); + self.disallow_simple_key(); + + self.tokens.push_back(Token(self.mark, TokenType::StreamEndToken)); + Ok(()) + } + + fn fetch_flow_collection_start(&mut self, tok :TokenType) -> ScanResult { + // The indicators '[' and '{' may start a simple key. + try!(self.save_simple_key()); + + self.increase_flow_level(); + + self.allow_simple_key(); + + let start_mark = self.mark; + self.skip(); + + self.tokens.push_back(Token(start_mark, tok)); + Ok(()) + } + + fn fetch_flow_collection_end(&mut self, tok :TokenType) -> ScanResult { + try!(self.remove_simple_key()); + self.decrease_flow_level(); + + self.disallow_simple_key(); + + let start_mark = self.mark; + self.skip(); + + self.tokens.push_back(Token(start_mark, tok)); + Ok(()) + } + + fn fetch_flow_entry(&mut self) -> ScanResult { + try!(self.remove_simple_key()); + self.allow_simple_key(); + + let start_mark = self.mark; + self.skip(); + + self.tokens.push_back(Token(start_mark, TokenType::FlowEntryToken)); + Ok(()) + } + + fn increase_flow_level(&mut self) { + self.simple_keys.push(SimpleKey::new(Marker::new(0,0,0))); + self.flow_level += 1; + } + fn decrease_flow_level(&mut self) { + if self.flow_level > 0 { + self.flow_level -= 1; + self.simple_keys.pop().unwrap(); + } + } + + fn fetch_block_entry(&mut self) -> ScanResult { + if self.flow_level == 0 { + // Check if we are allowed to start a new entry. + if !self.simple_key_allowed { + return Err(ScanError::new(self.mark, + "block sequence entries are not allowed in this context")); + } + + let mark = self.mark; + // generate BLOCK-SEQUENCE-START if indented + self.roll_indent(mark.col, None, TokenType::BlockSequenceStartToken, mark); + } else { + // - * only allowed in block + unreachable!(); + } + self.remove_simple_key(); + self.allow_simple_key(); + + let start_mark = self.mark; + self.skip(); + + self.tokens.push_back(Token(start_mark, TokenType::BlockEntryToken)); + Ok(()) + } + + fn fetch_document_indicator(&mut self, t: TokenType) -> ScanResult { + self.unroll_indent(-1); + try!(self.remove_simple_key()); + self.disallow_simple_key(); + + let mark = self.mark; + + self.skip(); + self.skip(); + self.skip(); + + self.tokens.push_back(Token(mark, t)); + Ok(()) + } + + fn fetch_plain_scalar(&mut self) -> Result<(), ScanError> { + try!(self.save_simple_key()); + + self.disallow_simple_key(); + + let tok = try!(self.scan_plain_scalar()); + + self.tokens.push_back(tok); + + Ok(()) + } + + fn scan_plain_scalar(&mut self) -> Result { + let indent = self.indent + 1; + let start_mark = self.mark; + + let mut string = String::new(); + let mut leading_break = String::new(); + let mut trailing_breaks = String::new(); + let mut whitespaces = String::new(); + let mut leading_blanks = false; + + loop { + /* Check for a document indicator. */ + self.lookhead(4); + + if self.mark.col == 0 && + ((self.buffer[0] == '-') && + (self.buffer[1] == '-') && + (self.buffer[2] == '-')) || + ((self.buffer[0] == '.') && + (self.buffer[1] == '.') && + (self.buffer[2] == '.')) && + is_blankz(self.buffer[3]) { + break; + } + + if self.ch() == '#' { break; } + while !is_blankz(self.ch()) { + if self.flow_level > 0 && self.ch() == ':' + && is_blankz(self.ch()) { + return Err(ScanError::new(start_mark, + "while scanning a plain scalar, found unexpected ':'")); + } + // indicators ends a plain scalar + match self.ch() { + ':' if is_blankz(self.buffer[1]) => break, + ',' | ':' | '?' | '[' | ']' |'{' |'}' => break, + _ => {} + } + + if leading_blanks || !whitespaces.is_empty() { + if leading_blanks { + if !leading_break.is_empty() { + if trailing_breaks.is_empty() { + string.push(' '); + } else { + string.extend(trailing_breaks.chars()); + trailing_breaks.clear(); + } + leading_break.clear(); + } else { + string.extend(leading_break.chars()); + string.extend(trailing_breaks.chars()); + trailing_breaks.clear(); + leading_break.clear(); + } + leading_blanks = false; + } else { + string.extend(whitespaces.chars()); + whitespaces.clear(); + } + } + + string.push(self.ch()); + self.skip(); + self.lookhead(2); + } + // is the end? + if !(is_blank(self.ch()) || is_break(self.ch())) { break; } + self.lookhead(1); + + while is_blank(self.ch()) || is_break(self.ch()) { + if is_blank(self.ch()) { + if leading_blanks && (self.mark.col as isize) < indent + && self.ch() == '\t' { + return Err(ScanError::new(start_mark, + "while scanning a plain scalar, found a tab")); + } + + if !leading_blanks { + whitespaces.push(self.ch()); + self.skip(); + } else { + self.skip(); + } + } else { + self.lookhead(2); + // Check if it is a first line break + if !leading_blanks { + whitespaces.clear(); + self.read_break(&mut leading_break); + leading_blanks = true; + } else { + self.read_break(&mut trailing_breaks); + } + } + self.lookhead(1); + } + + // check intendation level + if self.flow_level == 0 && (self.mark.col as isize) < indent { + break; + } + } + + if leading_blanks { + self.allow_simple_key(); + } + + Ok(Token(start_mark, TokenType::ScalarToken(TScalarStyle::Plain, string))) + } + + fn fetch_value(&mut self) -> ScanResult { + let sk = self.simple_keys.last().unwrap().clone(); + let start_mark = self.mark; + if sk.possible { + let tok = Token(start_mark, TokenType::KeyToken); + let tokens_parsed = self.tokens_parsed; + self.insert_token(sk.token_number - tokens_parsed, tok); + + // Add the BLOCK-MAPPING-START token if needed. + self.roll_indent(sk.mark.col, Some(sk.token_number), + TokenType::BlockMappingStartToken, start_mark); + + self.simple_keys.last_mut().unwrap().possible = false; + self.disallow_simple_key(); + } else { + // The ':' indicator follows a complex key. + unimplemented!(); + } + + self.skip(); + self.tokens.push_back(Token(start_mark, TokenType::ValueToken)); + + Ok(()) + } + + fn roll_indent(&mut self, col: usize, number: Option, + tok: TokenType, mark: Marker) { + if self.flow_level > 0 { + return; + } + + if self.indent < col as isize { + self.indents.push(self.indent); + self.indent = col as isize; + let tokens_parsed = self.tokens_parsed; + match number { + Some(n) => self.insert_token(n - tokens_parsed, Token(mark, tok)), + None => self.tokens.push_back(Token(mark, tok)) + } + } + } + + fn unroll_indent(&mut self, col: isize) { + if self.flow_level > 0 { + return; + } + while self.indent > col { + self.tokens.push_back(Token(self.mark, TokenType::BlockEndToken)); + self.indent = self.indents.pop().unwrap(); + } + } + + fn save_simple_key(&mut self) -> Result<(), ScanError> { + let required = self.flow_level > 0 && self.indent == (self.mark.col as isize); + if self.simple_key_allowed { + let mut sk = SimpleKey::new(self.mark); + sk.possible = true; + sk.required = required; + sk.token_number = self.tokens_parsed + self.tokens.len(); + + try!(self.remove_simple_key()); + + self.simple_keys.pop(); + self.simple_keys.push(sk); + } + Ok(()) + } + + fn remove_simple_key(&mut self) -> ScanResult { + let last = self.simple_keys.last_mut().unwrap(); + if last.possible { + if last.required { + return Err(ScanError::new(self.mark, "simple key expected")); + } + } + + last.possible = false; + Ok(()) + } + +} + +#[cfg(test)] +mod test { + use super::*; + #[test] + fn test_tokenizer() { + let s: String = "--- +# comment +a0 bb: val +a1: + b1: 4 + b2: d +a2: 4 +a3: [1, 2, 3] +a4: + - - a1 + - a2 + - 2 +".to_string(); + let p = Scanner::new(s.chars()); + for t in p { + //println!("{:?}", t); + } + } +} + diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs new file mode 100644 index 0000000..d803bdc --- /dev/null +++ b/saphyr/src/yaml.rs @@ -0,0 +1,39 @@ +use std::collections::{HashMap, BTreeMap}; +use std::string; + +#[derive(Clone, PartialEq, PartialOrd, Debug)] +pub enum Yaml { + I64(i64), + U64(u64), + F64(f64), + String(string::String), + Boolean(bool), + Array(self::Array), + Hash(self::Hash), + Null, +} + +pub type Array = Vec; +pub type Hash = BTreeMap; + +/// The errors that can arise while parsing a YAML stream. +#[derive(Clone, Copy, PartialEq, Debug)] +pub enum ErrorCode { + InvalidSyntax, + InvalidNumber, + EOFWhileParsingObject, + EOFWhileParsingArray, + EOFWhileParsingValue, + EOFWhileParsingString, + KeyMustBeAString, + ExpectedColon, + TrailingCharacters, + TrailingComma, + InvalidEscape, + InvalidUnicodeCodePoint, + LoneLeadingSurrogateInHexEscape, + UnexpectedEndOfHexEscape, + UnrecognizedHex, + NotFourDigit, + NotUtf8, +} From 5c25f78c7fd80b8810b883b894f3d1534e80b088 Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Sun, 24 May 2015 14:29:10 +0800 Subject: [PATCH 002/380] Add empty_scalar() --- saphyr/src/parser.rs | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs index 104f6a6..242a540 100644 --- a/saphyr/src/parser.rs +++ b/saphyr/src/parser.rs @@ -43,6 +43,12 @@ pub enum Event { MappingEnd } +impl Event { + fn empty_scalar() -> Event { + Event::Scalar(String::new()) + } +} + #[derive(Debug)] pub struct Parser { scanner: Scanner, @@ -265,7 +271,7 @@ impl> Parser { |TokenType::StreamEndToken => { self.pop_state(); // empty scalar - Ok(Event::Scalar(String::new())) + Ok(Event::empty_scalar()) }, _ => { self.parse_node(true, false) @@ -324,7 +330,7 @@ impl> Parser { => { self.state = State::BlockMappingValue; // empty scalar - Ok(Event::Scalar(String::new())) + Ok(Event::empty_scalar()) } _ => { self.push_state(State::BlockMappingValue); @@ -354,7 +360,7 @@ impl> Parser { => { self.state = State::BlockMappingValue; // empty scalar - Ok(Event::Scalar(String::new())) + Ok(Event::empty_scalar()) } _ => { self.push_state(State::BlockMappingKey); @@ -365,7 +371,7 @@ impl> Parser { _ => { self.state = State::BlockMappingKey; // empty scalar - Ok(Event::Scalar(String::new())) + Ok(Event::empty_scalar()) } } } @@ -432,7 +438,7 @@ impl> Parser { match tok.1 { TokenType::BlockEntryToken | TokenType::BlockEndToken => { self.state = State::BlockSequenceEntry; - Ok(Event::Scalar(String::new())) + Ok(Event::empty_scalar()) }, _ => { self.push_state(State::BlockSequenceEntry); From 1014e35108c28cac8ee526b2495edad4559e7a97 Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Sun, 24 May 2015 14:37:36 +0800 Subject: [PATCH 003/380] Fix warnings --- saphyr/.gitignore | 1 + saphyr/src/parser.rs | 16 ++++++++-------- saphyr/src/scanner.rs | 17 ++++++++--------- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/saphyr/.gitignore b/saphyr/.gitignore index a9d37c5..d4f917d 100644 --- a/saphyr/.gitignore +++ b/saphyr/.gitignore @@ -1,2 +1,3 @@ target Cargo.lock +*.swp diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs index 242a540..e1d18dc 100644 --- a/saphyr/src/parser.rs +++ b/saphyr/src/parser.rs @@ -85,7 +85,7 @@ impl> Parser { fn skip(&mut self) { self.token = None; - self.peek(); + //self.peek(); } fn pop_state(&mut self) { self.state = self.states.pop().unwrap() @@ -117,7 +117,7 @@ impl> Parser { if ev == Event::StreamEnd { return Ok(()); } - self.load_document(&ev); + try!(self.load_document(&ev)); Ok(()) } @@ -125,7 +125,7 @@ impl> Parser { assert_eq!(first_ev, &Event::DocumentStart); let ev = try!(self.parse()); - let ev = try!(self.load_node(&ev)); + try!(self.load_node(&ev)); Ok(()) } @@ -251,7 +251,7 @@ impl> Parser { } fn _explict_document_start(&mut self) -> ParseResult { - let mut tok = try!(self.peek()); + let tok = try!(self.peek()); if tok.1 != TokenType::DocumentStartToken { return Err(ScanError::new(tok.0, "did not find expected ")); } @@ -280,7 +280,7 @@ impl> Parser { } fn parse_node(&mut self, block: bool, indentless_sequence: bool) -> ParseResult { - let mut tok = try!(self.peek()); + let tok = try!(self.peek()); match tok.1 { TokenType::AliasToken => unimplemented!(), TokenType::AnchorToken => unimplemented!(), @@ -316,7 +316,7 @@ impl> Parser { fn block_mapping_key(&mut self, first: bool) -> ParseResult { // skip BlockMappingStartToken if first { - let tok = try!(self.peek()); + let _ = try!(self.peek()); //self.marks.push(tok.0); self.skip(); } @@ -379,7 +379,7 @@ impl> Parser { fn flow_sequence_entry(&mut self, first: bool) -> ParseResult { // skip FlowMappingStartToken if first { - let tok = try!(self.peek()); + let _ = try!(self.peek()); //self.marks.push(tok.0); self.skip(); } @@ -421,7 +421,7 @@ impl> Parser { fn block_sequence_entry(&mut self, first: bool) -> ParseResult { // BLOCK-SEQUENCE-START if first { - let tok = try!(self.peek()); + let _ = try!(self.peek()); //self.marks.push(tok.0); self.skip(); } diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index 2b10542..dd28f16 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -1,5 +1,4 @@ use std::collections::VecDeque; -use yaml::*; #[derive(Clone, Copy, PartialEq, Debug, Eq)] pub enum TEncoding { @@ -149,7 +148,7 @@ pub type ScanResult = Result<(), ScanError>; impl> Scanner { /// Creates the YAML tokenizer. pub fn new(rdr: T) -> Scanner { - let mut p = Scanner { + Scanner { rdr: rdr, buffer: VecDeque::new(), mark: Marker::new(0, 1, 0), @@ -164,15 +163,14 @@ impl> Scanner { flow_level: 0, tokens_parsed: 0, token_available: false, - }; - return p; + } } fn lookhead(&mut self, count: usize) { if self.buffer.len() >= count { return; } - for i in 0..(count - self.buffer.len()) { + for _ in 0..(count - self.buffer.len()) { self.buffer.push_back(self.rdr.next().unwrap_or('\0')); } } @@ -193,6 +191,7 @@ impl> Scanner { fn ch_is(&self, c: char) -> bool { self.buffer[0] == c } + #[allow(dead_code)] fn eof(&self) -> bool { self.ch_is('\0') } @@ -250,7 +249,7 @@ impl> Scanner { self.lookhead(4); if is_z(self.ch()) { - self.fetch_stream_end(); + try!(self.fetch_stream_end()); return Ok(()); } @@ -325,7 +324,7 @@ impl> Scanner { } pub fn fetch_more_tokens(&mut self) -> ScanResult { - let mut need_more = false; + let mut need_more; loop { need_more = false; if self.tokens.is_empty() { @@ -469,7 +468,7 @@ impl> Scanner { // - * only allowed in block unreachable!(); } - self.remove_simple_key(); + try!(self.remove_simple_key()); self.allow_simple_key(); let start_mark = self.mark; @@ -719,7 +718,7 @@ a4: ".to_string(); let p = Scanner::new(s.chars()); for t in p { - //println!("{:?}", t); + // println!("{:?}", t); } } } From 9fa4c839fcb6f34503b989bb8a9a7fa8a08293a7 Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Sun, 24 May 2015 14:38:54 +0800 Subject: [PATCH 004/380] fix lookahead spelling --- saphyr/src/scanner.rs | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index dd28f16..adf6518 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -166,7 +166,7 @@ impl> Scanner { } } - fn lookhead(&mut self, count: usize) { + fn lookahead(&mut self, count: usize) { if self.buffer.len() >= count { return; } @@ -232,7 +232,7 @@ impl> Scanner { } pub fn fetch_next_token(&mut self) -> ScanResult { - self.lookhead(1); + self.lookahead(1); // println!("--> fetch_next_token Cur {:?} {:?}", self.mark, self.ch()); if !self.stream_start_produced { @@ -246,7 +246,7 @@ impl> Scanner { let mark = self.mark; self.unroll_indent(mark.col as isize); - self.lookhead(4); + self.lookahead(4); if is_z(self.ch()) { try!(self.fetch_stream_end()); @@ -362,7 +362,7 @@ impl> Scanner { fn skip_to_next_token(&mut self) { loop { - self.lookhead(1); + self.lookahead(1); // TODO(chenyh) BOM match self.ch() { ' ' => self.skip(), @@ -373,7 +373,7 @@ impl> Scanner { self.allow_simple_key(); } }, - '#' => while !is_breakz(self.ch()) { self.skip(); self.lookhead(1); }, + '#' => while !is_breakz(self.ch()) { self.skip(); self.lookahead(1); }, _ => break } } @@ -517,7 +517,7 @@ impl> Scanner { loop { /* Check for a document indicator. */ - self.lookhead(4); + self.lookahead(4); if self.mark.col == 0 && ((self.buffer[0] == '-') && @@ -569,11 +569,11 @@ impl> Scanner { string.push(self.ch()); self.skip(); - self.lookhead(2); + self.lookahead(2); } // is the end? if !(is_blank(self.ch()) || is_break(self.ch())) { break; } - self.lookhead(1); + self.lookahead(1); while is_blank(self.ch()) || is_break(self.ch()) { if is_blank(self.ch()) { @@ -590,7 +590,7 @@ impl> Scanner { self.skip(); } } else { - self.lookhead(2); + self.lookahead(2); // Check if it is a first line break if !leading_blanks { whitespaces.clear(); @@ -600,7 +600,7 @@ impl> Scanner { self.read_break(&mut trailing_breaks); } } - self.lookhead(1); + self.lookahead(1); } // check intendation level From 2e1416c509a9c2346468f9e0b2c7ed8d870c1dcf Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Mon, 25 May 2015 01:34:18 +0800 Subject: [PATCH 005/380] Add yaml value coerce --- saphyr/Cargo.toml | 3 ++ saphyr/src/lib.rs | 2 ++ saphyr/src/parser.rs | 54 ++++++++++++++++------------- saphyr/src/scanner.rs | 4 +-- saphyr/src/yaml.rs | 80 ++++++++++++++++++++++++++++++++++++++++--- 5 files changed, 113 insertions(+), 30 deletions(-) diff --git a/saphyr/Cargo.toml b/saphyr/Cargo.toml index d7dd718..82b30d2 100644 --- a/saphyr/Cargo.toml +++ b/saphyr/Cargo.toml @@ -2,3 +2,6 @@ name = "yaml-rust" version = "0.1.0" authors = ["Yuheng Chen "] + +[dependencies] +regex = "*" diff --git a/saphyr/src/lib.rs b/saphyr/src/lib.rs index a824d7d..1241d22 100644 --- a/saphyr/src/lib.rs +++ b/saphyr/src/lib.rs @@ -1,3 +1,5 @@ +extern crate regex; + pub mod yaml; pub mod scanner; pub mod parser; diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs index e1d18dc..ffe97d9 100644 --- a/saphyr/src/parser.rs +++ b/saphyr/src/parser.rs @@ -1,4 +1,5 @@ use scanner::*; +use yaml::*; #[derive(Clone, Copy, PartialEq, Debug, Eq)] pub enum State { @@ -67,7 +68,7 @@ impl> Parser { states: Vec::new(), state: State::StreamStart, marks: Vec::new(), - token: None + token: None, } } @@ -104,38 +105,35 @@ impl> Parser { ev } - pub fn load(&mut self) -> Result<(), ScanError> { + pub fn load(&mut self) -> Result { if !self.scanner.stream_started() { let ev = try!(self.parse()); assert_eq!(ev, Event::StreamStart); } if self.scanner.stream_ended() { - return Ok(()); + return Ok(Yaml::Null); } let ev = try!(self.parse()); if ev == Event::StreamEnd { - return Ok(()); + return Ok(Yaml::Null); } - try!(self.load_document(&ev)); - Ok(()) + self.load_document(&ev) } - fn load_document(&mut self, first_ev: &Event) -> Result<(), ScanError> { + fn load_document(&mut self, first_ev: &Event) -> Result { assert_eq!(first_ev, &Event::DocumentStart); let ev = try!(self.parse()); - try!(self.load_node(&ev)); - - Ok(()) + self.load_node(&ev) } - fn load_node(&mut self, first_ev: &Event) -> Result<(), ScanError> { + fn load_node(&mut self, first_ev: &Event) -> Result { match *first_ev { - Event::Scalar(_) => { + Event::Scalar(ref v) => { // TODO scalar println!("Scalar: {:?}", first_ev); - Ok(()) + Ok(Yaml::String(v.clone())) }, Event::SequenceStart => { self.load_sequence(first_ev) @@ -147,31 +145,36 @@ impl> Parser { } } - fn load_mapping(&mut self, first_ev: &Event) -> Result<(), ScanError> { + fn load_mapping(&mut self, first_ev: &Event) -> Result { let mut ev = try!(self.parse()); + let mut map = Hash::new(); while ev != Event::MappingEnd { // key - try!(self.load_node(&ev)); + let key = try!(self.load_node(&ev)); // value ev = try!(self.parse()); - try!(self.load_node(&ev)); + let value = try!(self.load_node(&ev)); + + map.insert(key, value); // next event ev = try!(self.parse()); } - Ok(()) + Ok(Yaml::Hash(map)) } - fn load_sequence(&mut self, first_ev: &Event) -> Result<(), ScanError> { + fn load_sequence(&mut self, first_ev: &Event) -> Result { let mut ev = try!(self.parse()); + let mut vec = Vec::new(); while ev != Event::SequenceEnd { - try!(self.load_node(&ev)); + let entry = try!(self.load_node(&ev)); + vec.push(entry); // next event ev = try!(self.parse()); } - Ok(()) + Ok(Yaml::Array(vec)) } fn state_machine(&mut self) -> ParseResult { @@ -183,6 +186,10 @@ impl> Parser { State::DocumentStart => self.document_start(false), State::DocumentContent => self.document_content(), + State::BlockNode => self.parse_node(true, false), + State::BlockNodeOrIndentlessSequence => self.parse_node(true, true), + State::FlowNode => self.parse_node(false, false), + State::BlockMappingFirstKey => self.block_mapping_key(true), State::BlockMappingKey => self.block_mapping_key(false), State::BlockMappingValue => self.block_mapping_value(), @@ -240,7 +247,6 @@ impl> Parser { _ if implicit => { self.push_state(State::DocumentEnd); self.state = State::BlockNode; - self.skip(); Ok(Event::DocumentStart) }, _ => { @@ -460,7 +466,7 @@ mod test { use super::*; #[test] fn test_parser() { - let s: String = "--- + let s: String = " # comment a0 bb: val a1: @@ -474,8 +480,8 @@ a4: - 2 ".to_string(); let mut parser = Parser::new(s.chars()); - parser.load().unwrap(); - + let out = parser.load().unwrap(); + println!("DOC {:?}", out); } } diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index adf6518..02b6f65 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -540,7 +540,7 @@ impl> Scanner { // indicators ends a plain scalar match self.ch() { ':' if is_blankz(self.buffer[1]) => break, - ',' | ':' | '?' | '[' | ']' |'{' |'}' => break, + ',' | ':' | '?' | '[' | ']' |'{' |'}' if self.flow_level > 0 => break, _ => {} } @@ -718,7 +718,7 @@ a4: ".to_string(); let p = Scanner::new(s.chars()); for t in p { - // println!("{:?}", t); + println!("{:?}", t); } } } diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index d803bdc..4e5e4a1 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -1,11 +1,12 @@ use std::collections::{HashMap, BTreeMap}; use std::string; +use regex::Regex; -#[derive(Clone, PartialEq, PartialOrd, Debug)] +#[derive(Clone, PartialEq, PartialOrd, Debug, Eq, Ord)] pub enum Yaml { I64(i64), - U64(u64), - F64(f64), + //U64(u64), + //F64(f64), String(string::String), Boolean(bool), Array(self::Array), @@ -14,7 +15,7 @@ pub enum Yaml { } pub type Array = Vec; -pub type Hash = BTreeMap; +pub type Hash = BTreeMap; /// The errors that can arise while parsing a YAML stream. #[derive(Clone, Copy, PartialEq, Debug)] @@ -37,3 +38,74 @@ pub enum ErrorCode { NotFourDigit, NotUtf8, } + +macro_rules! define_as ( + ($name:ident, $t:ident, $yt:ident) => ( +pub fn $name(&self) -> Option<$t> { + match *self { + Yaml::$yt(v) => Some(v), + _ => None + } +} + ); +); + +macro_rules! define_as_ref ( + ($name:ident, $t:ty, $yt:ident) => ( +pub fn $name(&self) -> Option<$t> { + match *self { + Yaml::$yt(ref v) => Some(v), + _ => None + } +} + ); +); + +// these regex are from libyaml-rust project +macro_rules! regex( + ($s:expr) => (Regex::new($s).unwrap()); +); +impl Yaml { + define_as!(as_i64, i64, I64); + define_as!(as_bool, bool, Boolean); + + define_as_ref!(as_str, &str, String); + define_as_ref!(as_hash, &Hash, Hash); + define_as_ref!(as_vec, &Array, Array); + + pub fn is_null(&self) -> bool { + match *self { + Yaml::Null => true, + _ => false + } + } + + pub fn as_f64(&self) -> Option { + // XXX(chenyh) precompile me + let float_pattern = regex!(r"^([-+]?)(\.[0-9]+|[0-9]+(\.[0-9]*)?([eE][-+]?[0-9]+)?)$"); + match *self { + Yaml::String(ref v) if float_pattern.is_match(v) => { + v.parse::().ok() + }, + _ => None + } + } +} + +#[cfg(test)] +mod test { + use parser::Parser; + use yaml::Yaml; + // #[test] + fn test_coerce() { + let s = "--- +a: 1 +b: 2.2 +c: [1, 2] +"; + let mut parser = Parser::new(s.chars()); + let out = parser.load().unwrap(); + //assert_eq!(out.as_hash().unwrap()[&Yaml::String("a".to_string())].as_i64().unwrap(), 1i64); + } +} + From ae71367f3b337d290fec39fa9b3d7095b9ad9dcd Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Mon, 25 May 2015 02:16:28 +0800 Subject: [PATCH 006/380] Add Index trait for YAML node --- saphyr/src/yaml.rs | 46 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 44 insertions(+), 2 deletions(-) diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index 4e5e4a1..1065421 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -1,4 +1,5 @@ -use std::collections::{HashMap, BTreeMap}; +use std::collections::BTreeMap; +use std::ops::Index; use std::string; use regex::Regex; @@ -12,6 +13,7 @@ pub enum Yaml { Array(self::Array), Hash(self::Hash), Null, + BadValue, } pub type Array = Vec; @@ -80,6 +82,14 @@ impl Yaml { } } + pub fn is_badvalue(&self) -> bool { + match *self { + Yaml::BadValue => true, + _ => false + } + } + + pub fn as_f64(&self) -> Option { // XXX(chenyh) precompile me let float_pattern = regex!(r"^([-+]?)(\.[0-9]+|[0-9]+(\.[0-9]*)?([eE][-+]?[0-9]+)?)$"); @@ -90,13 +100,42 @@ impl Yaml { _ => None } } + + pub fn from_str(s: &str) -> Yaml { + Yaml::String(s.to_string()) + } } +static BAD_VALUE: Yaml = Yaml::BadValue; +impl<'a> Index<&'a str> for Yaml { + type Output = Yaml; + + fn index(&self, idx: &'a str) -> &Yaml { + let key = Yaml::String(idx.to_string()); + match self.as_hash() { + Some(h) => h.get(&key).unwrap_or(&BAD_VALUE), + None => &BAD_VALUE + } + } +} + +impl Index for Yaml { + type Output = Yaml; + + fn index(&self, idx: usize) -> &Yaml { + match self.as_vec() { + Some(v) => v.get(idx).unwrap_or(&BAD_VALUE), + None => &BAD_VALUE + } + } +} + + #[cfg(test)] mod test { use parser::Parser; use yaml::Yaml; - // #[test] + #[test] fn test_coerce() { let s = "--- a: 1 @@ -105,6 +144,9 @@ c: [1, 2] "; let mut parser = Parser::new(s.chars()); let out = parser.load().unwrap(); + assert_eq!(out["a"].as_str().unwrap(), "1"); + assert_eq!(out["c"][1].as_str().unwrap(), "2"); + assert!(out["d"][0].is_badvalue()); //assert_eq!(out.as_hash().unwrap()[&Yaml::String("a".to_string())].as_i64().unwrap(), 1i64); } } From 2b7380893980ed89cc791c9c5aedd70bde6c795e Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Mon, 25 May 2015 03:21:53 +0800 Subject: [PATCH 007/380] Add scan_flow_scalar --- saphyr/src/parser.rs | 15 +-- saphyr/src/scanner.rs | 217 +++++++++++++++++++++++++++++++++++++++--- saphyr/src/yaml.rs | 3 +- 3 files changed, 213 insertions(+), 22 deletions(-) diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs index ffe97d9..d90dc43 100644 --- a/saphyr/src/parser.rs +++ b/saphyr/src/parser.rs @@ -37,7 +37,7 @@ pub enum Event { DocumentStart, DocumentEnd, Alias, - Scalar(String), + Scalar(String, TScalarStyle), SequenceStart, SequenceEnd, MappingStart, @@ -46,7 +46,7 @@ pub enum Event { impl Event { fn empty_scalar() -> Event { - Event::Scalar(String::new()) + Event::Scalar(String::new(), TScalarStyle::Plain) } } @@ -130,9 +130,8 @@ impl> Parser { fn load_node(&mut self, first_ev: &Event) -> Result { match *first_ev { - Event::Scalar(ref v) => { + Event::Scalar(ref v, _) => { // TODO scalar - println!("Scalar: {:?}", first_ev); Ok(Yaml::String(v.clone())) }, Event::SequenceStart => { @@ -294,10 +293,10 @@ impl> Parser { self.state = State::IndentlessSequenceEntry; Ok(Event::SequenceStart) }, - TokenType::ScalarToken(_, v) => { + TokenType::ScalarToken(style, v) => { self.pop_state(); self.skip(); - Ok(Event::Scalar(v)) + Ok(Event::Scalar(v, style)) }, TokenType::FlowSequenceStartToken => { self.state = State::FlowSequenceFirstEntry; @@ -472,12 +471,14 @@ a0 bb: val a1: b1: 4 b2: d -a2: 4 +a2: 4 # i'm comment a3: [1, 2, 3] a4: - - a1 - a2 - 2 +a5: 'single_quoted' +a5: \"double_quoted\" ".to_string(); let mut parser = Parser::new(s.chars()); let out = parser.load().unwrap(); diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index 02b6f65..13fc932 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -127,18 +127,23 @@ impl> Iterator for Scanner { } } +#[inline] fn is_z(c: char) -> bool { c == '\0' } +#[inline] fn is_break(c: char) -> bool { c == '\n' || c == '\r' } +#[inline] fn is_breakz(c: char) -> bool { is_break(c) || is_z(c) } +#[inline] fn is_blank(c: char) -> bool { c == ' ' || c == '\t' } +#[inline] fn is_blankz(c: char) -> bool { is_blank(c) || is_breakz(c) } @@ -166,6 +171,7 @@ impl> Scanner { } } + #[inline] fn lookahead(&mut self, count: usize) { if self.buffer.len() >= count { return; @@ -174,6 +180,7 @@ impl> Scanner { self.buffer.push_back(self.rdr.next().unwrap_or('\0')); } } + #[inline] fn skip(&mut self) { let c = self.buffer.pop_front().unwrap(); @@ -185,25 +192,41 @@ impl> Scanner { self.mark.col += 1; } } + #[inline] + fn skip_line(&mut self) { + if self.buffer[0] == '\r' && self.buffer[1] == '\n' { + self.skip(); + self.skip(); + } else if is_break(self.buffer[0]) { + self.skip(); + } + } + #[inline] fn ch(&self) -> char { self.buffer[0] } + #[inline] fn ch_is(&self, c: char) -> bool { self.buffer[0] == c } #[allow(dead_code)] + #[inline] fn eof(&self) -> bool { self.ch_is('\0') } + #[inline] pub fn stream_started(&self) -> bool { self.stream_start_produced } + #[inline] pub fn stream_ended(&self) -> bool { self.stream_end_produced } + #[inline] pub fn mark(&self) -> Marker { self.mark } + #[inline] fn read_break(&mut self, s: &mut String) { if self.buffer[0] == '\r' && self.buffer[1] == '\n' { s.push('\n'); @@ -291,8 +314,8 @@ impl> Scanner { '!' => unimplemented!(), '|' if self.flow_level == 0 => unimplemented!(), '>' if self.flow_level == 0 => unimplemented!(), - '\'' => unimplemented!(), - '"' => unimplemented!(), + '\'' => try!(self.fetch_flow_scalar(true)), + '"' => try!(self.fetch_flow_scalar(false)), // plain scalar '-' if !is_blankz(nc) => try!(self.fetch_plain_scalar()), ':' | '?' if !is_blankz(nc) && self.flow_level == 0 => try!(self.fetch_plain_scalar()), @@ -368,7 +391,8 @@ impl> Scanner { ' ' => self.skip(), '\t' if self.flow_level > 0 || !self.simple_key_allowed => self.skip(), '\n' | '\r' => { - self.skip(); + self.lookahead(2); + self.skip_line(); if self.flow_level == 0 { self.allow_simple_key(); } @@ -493,15 +517,180 @@ impl> Scanner { Ok(()) } - fn fetch_plain_scalar(&mut self) -> Result<(), ScanError> { + fn fetch_flow_scalar(&mut self, single: bool) -> ScanResult { try!(self.save_simple_key()); + self.disallow_simple_key(); + let tok = try!(self.scan_flow_scalar(single)); + + self.tokens.push_back(tok); + Ok(()) + } + + fn scan_flow_scalar(&mut self, single: bool) -> Result { + let start_mark = self.mark; + + let mut string = String::new(); + let mut leading_break = String::new(); + let mut trailing_breaks = String::new(); + let mut whitespaces = String::new(); + let mut leading_blanks = false; + + /* Eat the left quote. */ + self.skip(); + + loop { + /* Check for a document indicator. */ + self.lookahead(4); + + if self.mark.col == 0 && + ((self.buffer[0] == '-') && + (self.buffer[1] == '-') && + (self.buffer[2] == '-')) || + ((self.buffer[0] == '.') && + (self.buffer[1] == '.') && + (self.buffer[2] == '.')) && + is_blankz(self.buffer[3]) { + return Err(ScanError::new(start_mark, + "while scanning a quoted scalar, found unexpected document indicator")); + } + + if is_z(self.ch()) { + return Err(ScanError::new(start_mark, + "while scanning a quoted scalar, found unexpected end of stream")); + } + + self.lookahead(2); + + leading_blanks = false; + // Consume non-blank characters. + + while !is_blankz(self.ch()) { + match self.ch() { + // Check for an escaped single quote. + '\'' if self.buffer[1] == '\'' && single => { + string.push('\''); + self.skip(); + self.skip(); + }, + // Check for the right quote. + '\'' if single => { break; }, + '"' if !single => { break; }, + // Check for an escaped line break. + '\\' if !single && is_break(self.buffer[1]) => { + self.lookahead(3); + self.skip(); + self.skip_line(); + leading_blanks = true; + break; + } + // Check for an escape sequence. + '\\' if !single => { + let mut code_length = 0usize; + match self.buffer[1] { + '0' => string.push('\0'), + 'a' => string.push('\x07'), + 'b' => string.push('\x08'), + 't' | '\t' => string.push('\t'), + 'n' => string.push('\n'), + 'v' => string.push('\x0b'), + 'f' => string.push('\x0c'), + 'r' => string.push('\x0d'), + 'e' => string.push('\x1b'), + ' ' => string.push('\x20'), + '"' => string.push('"'), + '\'' => string.push('\''), + '\\' => string.push('\\'), + //'N' => { string.push('\xc2'); string.push('\x85') }, + 'x' => code_length = 2, + 'u' => code_length = 4, + 'U' => code_length = 8, + _ => return Err(ScanError::new(start_mark, + "while parsing a quoted scalar, found unknown escape character")) + } + self.skip(); + self.skip(); + // Consume an arbitrary escape code. + if code_length > 0 { + let val = 0; + self.lookahead(code_length); + unimplemented!(); + } + }, + c => { string.push(c); self.skip(); } + } + self.lookahead(2); + } + match self.ch() { + '\'' if single => { break; }, + '"' if !single => { break; }, + _ => {} + } + self.lookahead(1); + + // Consume blank characters. + while is_blank(self.ch()) || is_break(self.ch()) { + if is_blank(self.ch()) { + // Consume a space or a tab character. + if !leading_blanks { + whitespaces.push(self.ch()); + self.skip(); + } else { + self.skip(); + } + } else { + self.lookahead(2); + // Check if it is a first line break. + if !leading_blanks { + whitespaces.clear(); + self.read_break(&mut leading_break); + leading_blanks = true; + } else { + self.read_break(&mut trailing_breaks); + } + } + self.lookahead(1); + } + // Join the whitespaces or fold line breaks. + if leading_blanks { + if !leading_break.is_empty() { + if trailing_breaks.is_empty() { + string.push(' '); + } else { + string.extend(trailing_breaks.chars()); + trailing_breaks.clear(); + } + leading_break.clear(); + } else { + string.extend(leading_break.chars()); + string.extend(trailing_breaks.chars()); + trailing_breaks.clear(); + leading_break.clear(); + } + leading_blanks = false; + } else { + string.extend(whitespaces.chars()); + whitespaces.clear(); + } + } // loop + + // Eat the right quote. + self.skip(); + + if single { + Ok(Token(start_mark, TokenType::ScalarToken(TScalarStyle::SingleQuoted, string))) + } else { + Ok(Token(start_mark, TokenType::ScalarToken(TScalarStyle::DoubleQuoted, string))) + } + } + + fn fetch_plain_scalar(&mut self) -> ScanResult { + try!(self.save_simple_key()); self.disallow_simple_key(); let tok = try!(self.scan_plain_scalar()); self.tokens.push_back(tok); - Ok(()) } @@ -521,21 +710,21 @@ impl> Scanner { if self.mark.col == 0 && ((self.buffer[0] == '-') && - (self.buffer[1] == '-') && - (self.buffer[2] == '-')) || - ((self.buffer[0] == '.') && - (self.buffer[1] == '.') && - (self.buffer[2] == '.')) && - is_blankz(self.buffer[3]) { - break; - } + (self.buffer[1] == '-') && + (self.buffer[2] == '-')) || + ((self.buffer[0] == '.') && + (self.buffer[1] == '.') && + (self.buffer[2] == '.')) && + is_blankz(self.buffer[3]) { + break; + } if self.ch() == '#' { break; } while !is_blankz(self.ch()) { if self.flow_level > 0 && self.ch() == ':' && is_blankz(self.ch()) { return Err(ScanError::new(start_mark, - "while scanning a plain scalar, found unexpected ':'")); + "while scanning a plain scalar, found unexpected ':'")); } // indicators ends a plain scalar match self.ch() { diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index 1065421..cab65cc 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -13,6 +13,8 @@ pub enum Yaml { Array(self::Array), Hash(self::Hash), Null, + /// Access non-exist node by Index trait will return BadValue. + /// This simplifies error handling of user. BadValue, } @@ -89,7 +91,6 @@ impl Yaml { } } - pub fn as_f64(&self) -> Option { // XXX(chenyh) precompile me let float_pattern = regex!(r"^([-+]?)(\.[0-9]+|[0-9]+(\.[0-9]*)?([eE][-+]?[0-9]+)?)$"); From b070a75ccb5d99db4b2090c1764eb687bf0c4de4 Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Mon, 25 May 2015 03:29:52 +0800 Subject: [PATCH 008/380] Add parse for Yaml --- saphyr/Cargo.toml | 3 --- saphyr/src/lib.rs | 3 +-- saphyr/src/yaml.rs | 15 +++++---------- 3 files changed, 6 insertions(+), 15 deletions(-) diff --git a/saphyr/Cargo.toml b/saphyr/Cargo.toml index 82b30d2..d7dd718 100644 --- a/saphyr/Cargo.toml +++ b/saphyr/Cargo.toml @@ -2,6 +2,3 @@ name = "yaml-rust" version = "0.1.0" authors = ["Yuheng Chen "] - -[dependencies] -regex = "*" diff --git a/saphyr/src/lib.rs b/saphyr/src/lib.rs index 1241d22..6dd2656 100644 --- a/saphyr/src/lib.rs +++ b/saphyr/src/lib.rs @@ -1,8 +1,7 @@ -extern crate regex; - pub mod yaml; pub mod scanner; pub mod parser; + #[test] fn it_works() { } diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index cab65cc..05b9fad 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -1,7 +1,7 @@ use std::collections::BTreeMap; use std::ops::Index; use std::string; -use regex::Regex; +use std::str::FromStr; #[derive(Clone, PartialEq, PartialOrd, Debug, Eq, Ord)] pub enum Yaml { @@ -65,10 +65,6 @@ pub fn $name(&self) -> Option<$t> { ); ); -// these regex are from libyaml-rust project -macro_rules! regex( - ($s:expr) => (Regex::new($s).unwrap()); -); impl Yaml { define_as!(as_i64, i64, I64); define_as!(as_bool, bool, Boolean); @@ -91,12 +87,11 @@ impl Yaml { } } - pub fn as_f64(&self) -> Option { + pub fn parse(&self) -> Option { // XXX(chenyh) precompile me - let float_pattern = regex!(r"^([-+]?)(\.[0-9]+|[0-9]+(\.[0-9]*)?([eE][-+]?[0-9]+)?)$"); match *self { - Yaml::String(ref v) if float_pattern.is_match(v) => { - v.parse::().ok() + Yaml::String(ref v) => { + v.parse::().ok() }, _ => None } @@ -146,7 +141,7 @@ c: [1, 2] let mut parser = Parser::new(s.chars()); let out = parser.load().unwrap(); assert_eq!(out["a"].as_str().unwrap(), "1"); - assert_eq!(out["c"][1].as_str().unwrap(), "2"); + assert_eq!(out["c"][1].parse::().unwrap(), 2); assert!(out["d"][0].is_badvalue()); //assert_eq!(out.as_hash().unwrap()[&Yaml::String("a".to_string())].as_i64().unwrap(), 1i64); } From fe07995ee4b3ae9ac5f04c045b405b3a28f0136a Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Mon, 25 May 2015 13:54:39 +0800 Subject: [PATCH 009/380] Add scalar coersion --- saphyr/src/parser.rs | 16 ++++++++++++++-- saphyr/src/yaml.rs | 16 +++++++--------- 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs index d90dc43..caafb45 100644 --- a/saphyr/src/parser.rs +++ b/saphyr/src/parser.rs @@ -130,9 +130,20 @@ impl> Parser { fn load_node(&mut self, first_ev: &Event) -> Result { match *first_ev { - Event::Scalar(ref v, _) => { + Event::Scalar(ref v, style) => { // TODO scalar - Ok(Yaml::String(v.clone())) + if style != TScalarStyle::Plain { + Ok(Yaml::String(v.clone())) + } else { + match v.as_ref() { + "~" => Ok(Yaml::Null), + "true" => Ok(Yaml::Boolean(true)), + "false" => Ok(Yaml::Boolean(false)), + // try parsing as f64 + _ if v.parse::().is_ok() => Ok(Yaml::Number(v.clone())), + _ => Ok(Yaml::String(v.clone())) + } + } }, Event::SequenceStart => { self.load_sequence(first_ev) @@ -140,6 +151,7 @@ impl> Parser { Event::MappingStart => { self.load_mapping(first_ev) }, + // TODO more events _ => { unreachable!(); } } } diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index 05b9fad..83ff906 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -5,9 +5,8 @@ use std::str::FromStr; #[derive(Clone, PartialEq, PartialOrd, Debug, Eq, Ord)] pub enum Yaml { - I64(i64), - //U64(u64), - //F64(f64), + /// number types are stored as String + Number(string::String), String(string::String), Boolean(bool), Array(self::Array), @@ -66,7 +65,6 @@ pub fn $name(&self) -> Option<$t> { ); impl Yaml { - define_as!(as_i64, i64, I64); define_as!(as_bool, bool, Boolean); define_as_ref!(as_str, &str, String); @@ -87,10 +85,9 @@ impl Yaml { } } - pub fn parse(&self) -> Option { - // XXX(chenyh) precompile me + pub fn as_number(&self) -> Option { match *self { - Yaml::String(ref v) => { + Yaml::Number(ref v) => { v.parse::().ok() }, _ => None @@ -140,8 +137,9 @@ c: [1, 2] "; let mut parser = Parser::new(s.chars()); let out = parser.load().unwrap(); - assert_eq!(out["a"].as_str().unwrap(), "1"); - assert_eq!(out["c"][1].parse::().unwrap(), 2); + assert_eq!(out["a"].as_number::().unwrap(), 1); + assert_eq!(out["b"].as_number::().unwrap(), 2.2f32); + assert_eq!(out["c"][1].as_number::().unwrap(), 2); assert!(out["d"][0].is_badvalue()); //assert_eq!(out.as_hash().unwrap()[&Yaml::String("a".to_string())].as_i64().unwrap(), 1i64); } From 998dbbef917fdfe9e5ca053e30645f763f135344 Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Mon, 25 May 2015 19:31:33 +0800 Subject: [PATCH 010/380] Add scanner unit tests --- saphyr/src/parser.rs | 4 +- saphyr/src/scanner.rs | 397 +++++++++++++++++++++++++++++++++++++++--- saphyr/src/yaml.rs | 24 +-- 3 files changed, 379 insertions(+), 46 deletions(-) diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs index caafb45..3e2b069 100644 --- a/saphyr/src/parser.rs +++ b/saphyr/src/parser.rs @@ -490,11 +490,13 @@ a4: - a2 - 2 a5: 'single_quoted' -a5: \"double_quoted\" +a6: \"double_quoted\" +a7: 你好 ".to_string(); let mut parser = Parser::new(s.chars()); let out = parser.load().unwrap(); println!("DOC {:?}", out); + println!("DOC {}", out["a7"].as_str().unwrap()); } } diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index 13fc932..7109bd1 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -307,7 +307,7 @@ impl> Scanner { '}' => try!(self.fetch_flow_collection_end(TokenType::FlowMappingEndToken)), ',' => try!(self.fetch_flow_entry()), '-' if is_blankz(nc) => try!(self.fetch_block_entry()), - '?' if self.flow_level > 0 || is_blankz(nc) => unimplemented!(), + '?' if self.flow_level > 0 || is_blankz(nc) => try!(self.fetch_key()), ':' if self.flow_level > 0 || is_blankz(nc) => try!(self.fetch_value()), '*' => unimplemented!(), '&' => unimplemented!(), @@ -805,11 +805,36 @@ impl> Scanner { Ok(Token(start_mark, TokenType::ScalarToken(TScalarStyle::Plain, string))) } + fn fetch_key(&mut self) -> ScanResult { + let start_mark = self.mark; + if self.flow_level == 0 { + // Check if we are allowed to start a new key (not nessesary simple). + if !self.simple_key_allowed { + return Err(ScanError::new(self.mark, "mapping keys are not allowed in this context")); + } + self.roll_indent(start_mark.col, None, + TokenType::BlockMappingStartToken, start_mark); + } + + try!(self.remove_simple_key()); + + if self.flow_level == 0 { + self.allow_simple_key(); + } else { + self.disallow_simple_key(); + } + + self.skip(); + self.tokens.push_back(Token(start_mark, TokenType::KeyToken)); + Ok(()) + } + fn fetch_value(&mut self) -> ScanResult { let sk = self.simple_keys.last().unwrap().clone(); let start_mark = self.mark; if sk.possible { - let tok = Token(start_mark, TokenType::KeyToken); + // insert simple key + let tok = Token(sk.mark, TokenType::KeyToken); let tokens_parsed = self.tokens_parsed; self.insert_token(sk.token_number - tokens_parsed, tok); @@ -821,9 +846,22 @@ impl> Scanner { self.disallow_simple_key(); } else { // The ':' indicator follows a complex key. - unimplemented!(); - } + if self.flow_level == 0 { + if !self.simple_key_allowed { + return Err(ScanError::new(start_mark, + "mapping values are not allowed in this context")); + } + self.roll_indent(start_mark.col, None, + TokenType::BlockMappingStartToken, start_mark); + } + + if self.flow_level == 0 { + self.allow_simple_key(); + } else { + self.disallow_simple_key(); + } + } self.skip(); self.tokens.push_back(Token(start_mark, TokenType::ValueToken)); @@ -890,25 +928,340 @@ impl> Scanner { #[cfg(test)] mod test { use super::*; - #[test] - fn test_tokenizer() { - let s: String = "--- -# comment -a0 bb: val -a1: - b1: 4 - b2: d -a2: 4 -a3: [1, 2, 3] -a4: - - - a1 - - a2 - - 2 -".to_string(); - let p = Scanner::new(s.chars()); - for t in p { - println!("{:?}", t); + use super::TokenType::*; + +macro_rules! next { + ($p:ident, $tk:pat) => {{ + let tok = $p.next().unwrap(); + match tok.1 { + $tk => {}, + _ => { panic!("unexpected token: {:?}", + tok) } } + }} +} + +macro_rules! next_scalar { + ($p:ident, $tk:expr, $v:expr) => {{ + let tok = $p.next().unwrap(); + match tok.1 { + ScalarToken(style, ref v) => { + assert_eq!(style, $tk); + assert_eq!(v, $v); + }, + _ => { panic!("unexpected token: {:?}", + tok) } + } + }} +} + +macro_rules! end { + ($p:ident) => {{ + assert_eq!($p.next(), None); + }} +} + /// test cases in libyaml scanner.c + #[test] + fn test_empty() { + let s = ""; + let mut p = Scanner::new(s.chars()); + next!(p, StreamStartToken(..)); + next!(p, StreamEndToken); + end!(p); + } + + #[test] + fn test_scalar() { + let s = "a scalar"; + let mut p = Scanner::new(s.chars()); + next!(p, StreamStartToken(..)); + next!(p, ScalarToken(TScalarStyle::Plain, _)); + next!(p, StreamEndToken); + end!(p); + } + + #[test] + fn test_explicit_scalar() { + let s = +"--- +'a scalar' +... +"; + let mut p = Scanner::new(s.chars()); + next!(p, StreamStartToken(..)); + next!(p, DocumentStartToken); + next!(p, ScalarToken(TScalarStyle::SingleQuoted, _)); + next!(p, DocumentEndToken); + next!(p, StreamEndToken); + end!(p); + } + + #[test] + fn test_multiple_documents() { + let s = +" +'a scalar' +--- +'a scalar' +--- +'a scalar' +"; + let mut p = Scanner::new(s.chars()); + next!(p, StreamStartToken(..)); + next!(p, ScalarToken(TScalarStyle::SingleQuoted, _)); + next!(p, DocumentStartToken); + next!(p, ScalarToken(TScalarStyle::SingleQuoted, _)); + next!(p, DocumentStartToken); + next!(p, ScalarToken(TScalarStyle::SingleQuoted, _)); + next!(p, StreamEndToken); + end!(p); + } + + #[test] + fn test_a_flow_sequence() { + let s = "[item 1, item 2, item 3]"; + let mut p = Scanner::new(s.chars()); + next!(p, StreamStartToken(..)); + next!(p, FlowSequenceStartToken); + next_scalar!(p, TScalarStyle::Plain, "item 1"); + next!(p, FlowEntryToken); + next!(p, ScalarToken(TScalarStyle::Plain, _)); + next!(p, FlowEntryToken); + next!(p, ScalarToken(TScalarStyle::Plain, _)); + next!(p, FlowSequenceEndToken); + next!(p, StreamEndToken); + end!(p); + } + + #[test] + fn test_a_flow_mapping() { + let s = +" +{ + a simple key: a value, # Note that the KEY token is produced. + ? a complex key: another value, +} +"; + let mut p = Scanner::new(s.chars()); + next!(p, StreamStartToken(..)); + next!(p, FlowMappingStartToken); + next!(p, KeyToken); + next!(p, ScalarToken(TScalarStyle::Plain, _)); + next!(p, ValueToken); + next!(p, ScalarToken(TScalarStyle::Plain, _)); + next!(p, FlowEntryToken); + next!(p, KeyToken); + next_scalar!(p, TScalarStyle::Plain, "a complex key"); + next!(p, ValueToken); + next!(p, ScalarToken(TScalarStyle::Plain, _)); + next!(p, FlowEntryToken); + next!(p, FlowMappingEndToken); + next!(p, StreamEndToken); + end!(p); + } + + #[test] + fn test_block_sequences() { + let s = +" +- item 1 +- item 2 +- + - item 3.1 + - item 3.2 +- + key 1: value 1 + key 2: value 2 +"; + let mut p = Scanner::new(s.chars()); + next!(p, StreamStartToken(..)); + next!(p, BlockSequenceStartToken); + next!(p, BlockEntryToken); + next_scalar!(p, TScalarStyle::Plain, "item 1"); + next!(p, BlockEntryToken); + next_scalar!(p, TScalarStyle::Plain, "item 2"); + next!(p, BlockEntryToken); + next!(p, BlockSequenceStartToken); + next!(p, BlockEntryToken); + next_scalar!(p, TScalarStyle::Plain, "item 3.1"); + next!(p, BlockEntryToken); + next_scalar!(p, TScalarStyle::Plain, "item 3.2"); + next!(p, BlockEndToken); + next!(p, BlockEntryToken); + next!(p, BlockMappingStartToken); + next!(p, KeyToken); + next_scalar!(p, TScalarStyle::Plain, "key 1"); + next!(p, ValueToken); + next_scalar!(p, TScalarStyle::Plain, "value 1"); + next!(p, KeyToken); + next_scalar!(p, TScalarStyle::Plain, "key 2"); + next!(p, ValueToken); + next_scalar!(p, TScalarStyle::Plain, "value 2"); + next!(p, BlockEndToken); + next!(p, BlockEndToken); + next!(p, StreamEndToken); + end!(p); + } + + #[test] + fn test_block_mappings() { + let s = +" +a simple key: a value # The KEY token is produced here. +? a complex key +: another value +a mapping: + key 1: value 1 + key 2: value 2 +a sequence: + - item 1 + - item 2 +"; + let mut p = Scanner::new(s.chars()); + next!(p, StreamStartToken(..)); + next!(p, BlockMappingStartToken); + next!(p, KeyToken); + next!(p, ScalarToken(_, _)); + next!(p, ValueToken); + next!(p, ScalarToken(_, _)); + next!(p, KeyToken); + next!(p, ScalarToken(_, _)); + next!(p, ValueToken); + next!(p, ScalarToken(_, _)); + next!(p, KeyToken); + next!(p, ScalarToken(_, _)); + next!(p, ValueToken); // libyaml comment seems to be wrong + next!(p, BlockMappingStartToken); + next!(p, KeyToken); + next!(p, ScalarToken(_, _)); + next!(p, ValueToken); + next!(p, ScalarToken(_, _)); + next!(p, KeyToken); + next!(p, ScalarToken(_, _)); + next!(p, ValueToken); + next!(p, ScalarToken(_, _)); + next!(p, BlockEndToken); + next!(p, KeyToken); + next!(p, ScalarToken(_, _)); + next!(p, ValueToken); + next!(p, BlockSequenceStartToken); + next!(p, BlockEntryToken); + next!(p, ScalarToken(_, _)); + next!(p, BlockEntryToken); + next!(p, ScalarToken(_, _)); + next!(p, BlockEndToken); + next!(p, BlockEndToken); + next!(p, StreamEndToken); + end!(p); + + } + + #[test] + fn test_no_block_sequence_start() { + let s = +" +key: +- item 1 +- item 2 +"; + let mut p = Scanner::new(s.chars()); + next!(p, StreamStartToken(..)); + next!(p, BlockMappingStartToken); + next!(p, KeyToken); + next_scalar!(p, TScalarStyle::Plain, "key"); + next!(p, ValueToken); + next!(p, BlockEntryToken); + next_scalar!(p, TScalarStyle::Plain, "item 1"); + next!(p, BlockEntryToken); + next_scalar!(p, TScalarStyle::Plain, "item 2"); + next!(p, BlockEndToken); + next!(p, StreamEndToken); + end!(p); + } + + #[test] + fn test_collections_in_sequence() { + let s = +" +- - item 1 + - item 2 +- key 1: value 1 + key 2: value 2 +- ? complex key + : complex value +"; + let mut p = Scanner::new(s.chars()); + next!(p, StreamStartToken(..)); + next!(p, BlockSequenceStartToken); + next!(p, BlockEntryToken); + next!(p, BlockSequenceStartToken); + next!(p, BlockEntryToken); + next_scalar!(p, TScalarStyle::Plain, "item 1"); + next!(p, BlockEntryToken); + next_scalar!(p, TScalarStyle::Plain, "item 2"); + next!(p, BlockEndToken); + next!(p, BlockEntryToken); + next!(p, BlockMappingStartToken); + next!(p, KeyToken); + next_scalar!(p, TScalarStyle::Plain, "key 1"); + next!(p, ValueToken); + next_scalar!(p, TScalarStyle::Plain, "value 1"); + next!(p, KeyToken); + next_scalar!(p, TScalarStyle::Plain, "key 2"); + next!(p, ValueToken); + next_scalar!(p, TScalarStyle::Plain, "value 2"); + next!(p, BlockEndToken); + next!(p, BlockEntryToken); + next!(p, BlockMappingStartToken); + next!(p, KeyToken); + next_scalar!(p, TScalarStyle::Plain, "complex key"); + next!(p, ValueToken); + next_scalar!(p, TScalarStyle::Plain, "complex value"); + next!(p, BlockEndToken); + next!(p, BlockEndToken); + next!(p, StreamEndToken); + end!(p); + } + + #[test] + fn test_collections_in_mapping() { + let s = +" +? a sequence +: - item 1 + - item 2 +? a mapping +: key 1: value 1 + key 2: value 2 +"; + let mut p = Scanner::new(s.chars()); + next!(p, StreamStartToken(..)); + next!(p, BlockMappingStartToken); + next!(p, KeyToken); + next_scalar!(p, TScalarStyle::Plain, "a sequence"); + next!(p, ValueToken); + next!(p, BlockSequenceStartToken); + next!(p, BlockEntryToken); + next_scalar!(p, TScalarStyle::Plain, "item 1"); + next!(p, BlockEntryToken); + next_scalar!(p, TScalarStyle::Plain, "item 2"); + next!(p, BlockEndToken); + next!(p, KeyToken); + next_scalar!(p, TScalarStyle::Plain, "a mapping"); + next!(p, ValueToken); + next!(p, BlockMappingStartToken); + next!(p, KeyToken); + next_scalar!(p, TScalarStyle::Plain, "key 1"); + next!(p, ValueToken); + next_scalar!(p, TScalarStyle::Plain, "value 1"); + next!(p, KeyToken); + next_scalar!(p, TScalarStyle::Plain, "key 2"); + next!(p, ValueToken); + next_scalar!(p, TScalarStyle::Plain, "value 2"); + next!(p, BlockEndToken); + next!(p, BlockEndToken); + next!(p, StreamEndToken); + end!(p); } } diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index 83ff906..4371a20 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -5,7 +5,7 @@ use std::str::FromStr; #[derive(Clone, PartialEq, PartialOrd, Debug, Eq, Ord)] pub enum Yaml { - /// number types are stored as String + /// number types are stored as String, and parsed on demand. Number(string::String), String(string::String), Boolean(bool), @@ -20,28 +20,6 @@ pub enum Yaml { pub type Array = Vec; pub type Hash = BTreeMap; -/// The errors that can arise while parsing a YAML stream. -#[derive(Clone, Copy, PartialEq, Debug)] -pub enum ErrorCode { - InvalidSyntax, - InvalidNumber, - EOFWhileParsingObject, - EOFWhileParsingArray, - EOFWhileParsingValue, - EOFWhileParsingString, - KeyMustBeAString, - ExpectedColon, - TrailingCharacters, - TrailingComma, - InvalidEscape, - InvalidUnicodeCodePoint, - LoneLeadingSurrogateInHexEscape, - UnexpectedEndOfHexEscape, - UnrecognizedHex, - NotFourDigit, - NotUtf8, -} - macro_rules! define_as ( ($name:ident, $t:ident, $yt:ident) => ( pub fn $name(&self) -> Option<$t> { From 6233d8cf680121d72bc678010bec892fbc296574 Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Tue, 26 May 2015 15:46:19 +0800 Subject: [PATCH 011/380] Add test spectest --- saphyr/tests/spec_test.rs | 63 ++ saphyr/tests/spec_test.rs.inc | 1610 ++++++++++++++++++++++++++++++ saphyr/tests/specexamples.rs.inc | 336 +++++++ 3 files changed, 2009 insertions(+) create mode 100644 saphyr/tests/spec_test.rs create mode 100644 saphyr/tests/spec_test.rs.inc create mode 100644 saphyr/tests/specexamples.rs.inc diff --git a/saphyr/tests/spec_test.rs b/saphyr/tests/spec_test.rs new file mode 100644 index 0000000..7c481b4 --- /dev/null +++ b/saphyr/tests/spec_test.rs @@ -0,0 +1,63 @@ +#![allow(dead_code)] +extern crate yaml_rust; + +use yaml_rust::parser::Parser; +use yaml_rust::yaml::Yaml; + + +#[derive(Clone, PartialEq, PartialOrd, Debug)] +enum TestEvent { + OnDocumentStart, + OnDocumentEnd, + OnSequenceStart, + OnSequenceEnd, + OnMapStart, + OnMapEnd, + OnScalar, + OnAlias, + OnNull, +} + +fn yaml_to_test_events(root :&Yaml) -> Vec { + fn next(root: &Yaml, evs: &mut Vec) { + match *root { + Yaml::BadValue => { panic!("unexpected BadValue"); }, + Yaml::Null => { evs.push(TestEvent::OnNull); }, + Yaml::Array(ref v) => { + evs.push(TestEvent::OnSequenceStart); + for e in v { + next(e, evs); + } + evs.push(TestEvent::OnSequenceEnd); + }, + Yaml::Hash(ref v) => { + evs.push(TestEvent::OnMapStart); + for (k, v) in v { + next(k, evs); + next(v, evs); + } + evs.push(TestEvent::OnMapEnd); + }, + _ => { evs.push(TestEvent::OnScalar); } + } + } + let mut evs: Vec = Vec::new(); + evs.push(TestEvent::OnDocumentStart); + next(&root, &mut evs); + evs.push(TestEvent::OnDocumentEnd); + evs +} + +macro_rules! assert_next { + ($v:expr, $p:pat) => ( + match $v.next().unwrap() { + $p => {}, + e => { panic!("unexpected event: {:?}", e); } + } + ) +} + +// auto generated from handler_spec_test.cpp +include!("specexamples.rs.inc"); +include!("spec_test.rs.inc"); + diff --git a/saphyr/tests/spec_test.rs.inc b/saphyr/tests/spec_test.rs.inc new file mode 100644 index 0000000..e9a68c7 --- /dev/null +++ b/saphyr/tests/spec_test.rs.inc @@ -0,0 +1,1610 @@ +#[test] +fn test_ex2_1_seq_scalars() { + let mut p = Parser::new(EX2_1.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnSequenceStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnSequenceEnd); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex2_2_mapping_scalars_to_scalars() { + let mut p = Parser::new(EX2_2.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex2_3_mapping_scalars_to_sequences() { + let mut p = Parser::new(EX2_3.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnSequenceStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnSequenceEnd); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnSequenceStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnSequenceEnd); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex2_4_sequence_of_mappings() { + let mut p = Parser::new(EX2_4.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnSequenceStart); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnSequenceEnd); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex2_5_sequence_of_sequences() { + let mut p = Parser::new(EX2_5.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnSequenceStart); + assert_next!(v, TestEvent::OnSequenceStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnSequenceEnd); + assert_next!(v, TestEvent::OnSequenceStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnSequenceEnd); + assert_next!(v, TestEvent::OnSequenceStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnSequenceEnd); + assert_next!(v, TestEvent::OnSequenceEnd); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex2_6_mapping_of_mappings() { + let mut p = Parser::new(EX2_6.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex2_7_two_documents_in_a_stream() { + let mut p = Parser::new(EX2_7.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnSequenceStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnSequenceEnd); + assert_next!(v, TestEvent::OnDocumentEnd); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnSequenceStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnSequenceEnd); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex2_8_play_by_play_feed() { + let mut p = Parser::new(EX2_8.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnDocumentEnd); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex2_9_single_document_with_two_comments() { + let mut p = Parser::new(EX2_9.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnSequenceStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnSequenceEnd); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnSequenceStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnSequenceEnd); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex2_10_simple_anchor() { + let mut p = Parser::new(EX2_10.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnSequenceStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnSequenceEnd); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnSequenceStart); + assert_next!(v, TestEvent::OnAlias); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnSequenceEnd); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex2_11_mapping_between_sequences() { + let mut p = Parser::new(EX2_11.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnSequenceStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnSequenceEnd); + assert_next!(v, TestEvent::OnSequenceStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnSequenceEnd); + assert_next!(v, TestEvent::OnSequenceStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnSequenceEnd); + assert_next!(v, TestEvent::OnSequenceStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnSequenceEnd); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex2_12_compact_nested_mapping() { + let mut p = Parser::new(EX2_12.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnSequenceStart); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnSequenceEnd); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex2_13_in_literals_newlines_are_preserved() { + let mut p = Parser::new(EX2_13.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex2_14_in_folded_scalars_newlines_become_spaces() { + let mut p = Parser::new(EX2_14.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex2_15_folded_newlines_are_preserved_for_more_indented_and_blank_lines() { + let mut p = Parser::new(EX2_15.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex2_16_indentation_determines_scope() { + let mut p = Parser::new(EX2_16.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex2_17_quoted_scalars() { + let mut p = Parser::new(EX2_17.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex2_18_multi_line_flow_scalars() { + let mut p = Parser::new(EX2_18.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex2_23_various_explicit_tags() { + let mut p = Parser::new(EX2_23.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex2_24_global_tags() { + let mut p = Parser::new(EX2_24.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnSequenceStart); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnAlias); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnAlias); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnSequenceEnd); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex2_25_unordered_sets() { + let mut p = Parser::new(EX2_25.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnNull); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnNull); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnNull); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex2_26_ordered_mappings() { + let mut p = Parser::new(EX2_26.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnSequenceStart); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnSequenceEnd); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex2_27_invoice() { + let mut p = Parser::new(EX2_27.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnAlias); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnSequenceStart); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnSequenceEnd); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex2_28_log_file() { + let mut p = Parser::new(EX2_28.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnDocumentEnd); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnDocumentEnd); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnSequenceStart); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnSequenceEnd); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex5_3_block_structure_indicators() { + let mut p = Parser::new(EX5_3.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnSequenceStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnSequenceEnd); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex5_4_flow_structure_indicators() { + let mut p = Parser::new(EX5_4.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnSequenceStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnSequenceEnd); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex5_6_node_property_indicators() { + let mut p = Parser::new(EX5_6.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnAlias); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex5_7_block_scalar_indicators() { + let mut p = Parser::new(EX5_7.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex5_8_quoted_scalar_indicators() { + let mut p = Parser::new(EX5_8.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex5_11_line_break_characters() { + let mut p = Parser::new(EX5_11.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex5_12_tabs_and_spaces() { + let mut p = Parser::new(EX5_12.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex5_13_escaped_characters() { + let mut p = Parser::new(EX5_13.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex6_1_indentation_spaces() { + let mut p = Parser::new(EX6_1.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnSequenceStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnSequenceEnd); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex6_2_indentation_indicators() { + let mut p = Parser::new(EX6_2.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnSequenceStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnSequenceStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnSequenceEnd); + assert_next!(v, TestEvent::OnSequenceEnd); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex6_3_separation_spaces() { + let mut p = Parser::new(EX6_3.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnSequenceStart); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnSequenceStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnSequenceEnd); + assert_next!(v, TestEvent::OnSequenceEnd); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex6_4_line_prefixes() { + let mut p = Parser::new(EX6_4.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex6_5_empty_lines() { + let mut p = Parser::new(EX6_5.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex6_6_line_folding() { + let mut p = Parser::new(EX6_6.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex6_7_block_folding() { + let mut p = Parser::new(EX6_7.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex6_8_flow_folding() { + let mut p = Parser::new(EX6_8.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex6_9_separated_comment() { + let mut p = Parser::new(EX6_9.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex6_12_separation_spaces_ii() { + let mut p = Parser::new(EX6_12.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex6_13_reserved_directives() { + let mut p = Parser::new(EX6_13.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex6_14_yaml_directive() { + let mut p = Parser::new(EX6_14.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex6_16_tag_directive() { + let mut p = Parser::new(EX6_16.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex6_18_primary_tag_handle() { + let mut p = Parser::new(EX6_18.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnDocumentEnd); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex6_19_secondary_tag_handle() { + let mut p = Parser::new(EX6_19.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex6_20_tag_handles() { + let mut p = Parser::new(EX6_20.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex6_21_local_tag_prefix() { + let mut p = Parser::new(EX6_21.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnDocumentEnd); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex6_22_global_tag_prefix() { + let mut p = Parser::new(EX6_22.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnSequenceStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnSequenceEnd); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex6_23_node_properties() { + let mut p = Parser::new(EX6_23.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnAlias); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex6_24_verbatim_tags() { + let mut p = Parser::new(EX6_24.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex6_26_tag_shorthands() { + let mut p = Parser::new(EX6_26.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnSequenceStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnSequenceEnd); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex6_28_non_specific_tags() { + let mut p = Parser::new(EX6_28.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnSequenceStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnSequenceEnd); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex6_29_node_anchors() { + let mut p = Parser::new(EX6_29.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnAlias); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex7_1_alias_nodes() { + let mut p = Parser::new(EX7_1.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnAlias); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnAlias); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex7_2_empty_nodes() { + let mut p = Parser::new(EX7_2.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex7_3_completely_empty_nodes() { + let mut p = Parser::new(EX7_3.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnNull); + assert_next!(v, TestEvent::OnNull); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex7_4_double_quoted_implicit_keys() { + let mut p = Parser::new(EX7_4.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnSequenceStart); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnSequenceEnd); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex7_5_double_quoted_line_breaks() { + let mut p = Parser::new(EX7_5.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex7_6_double_quoted_lines() { + let mut p = Parser::new(EX7_6.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex7_7_single_quoted_characters() { + let mut p = Parser::new(EX7_7.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex7_8_single_quoted_implicit_keys() { + let mut p = Parser::new(EX7_8.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnSequenceStart); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnSequenceEnd); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex7_9_single_quoted_lines() { + let mut p = Parser::new(EX7_9.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex7_10_plain_characters() { + let mut p = Parser::new(EX7_10.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnSequenceStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnSequenceStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnSequenceEnd); + assert_next!(v, TestEvent::OnSequenceEnd); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex7_11_plain_implicit_keys() { + let mut p = Parser::new(EX7_11.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnSequenceStart); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnSequenceEnd); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex7_12_plain_lines() { + let mut p = Parser::new(EX7_12.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex7_13_flow_sequence() { + let mut p = Parser::new(EX7_13.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnSequenceStart); + assert_next!(v, TestEvent::OnSequenceStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnSequenceEnd); + assert_next!(v, TestEvent::OnSequenceStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnSequenceEnd); + assert_next!(v, TestEvent::OnSequenceEnd); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex7_14_flow_sequence_entries() { + let mut p = Parser::new(EX7_14.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnSequenceStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnSequenceStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnSequenceEnd); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnSequenceEnd); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex7_15_flow_mappings() { + let mut p = Parser::new(EX7_15.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnSequenceStart); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnSequenceEnd); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex7_16_flow_mapping_entries() { + let mut p = Parser::new(EX7_16.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnNull); + assert_next!(v, TestEvent::OnNull); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex7_17_flow_mapping_separate_values() { + let mut p = Parser::new(EX7_17.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnNull); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnNull); + assert_next!(v, TestEvent::OnNull); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex7_18_flow_mapping_adjacent_values() { + let mut p = Parser::new(EX7_18.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnNull); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex7_19_single_pair_flow_mappings() { + let mut p = Parser::new(EX7_19.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnSequenceStart); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnSequenceEnd); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex7_20_single_pair_explicit_entry() { + let mut p = Parser::new(EX7_20.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnSequenceStart); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnSequenceEnd); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex7_21_single_pair_implicit_entries() { + let mut p = Parser::new(EX7_21.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnSequenceStart); + assert_next!(v, TestEvent::OnSequenceStart); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnSequenceEnd); + assert_next!(v, TestEvent::OnSequenceStart); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnNull); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnSequenceEnd); + assert_next!(v, TestEvent::OnSequenceStart); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnSequenceEnd); + assert_next!(v, TestEvent::OnSequenceEnd); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex7_23_flow_content() { + let mut p = Parser::new(EX7_23.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnSequenceStart); + assert_next!(v, TestEvent::OnSequenceStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnSequenceEnd); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnSequenceEnd); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex7_24_flow_nodes() { + let mut p = Parser::new(EX7_24.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnSequenceStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnAlias); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnSequenceEnd); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex8_1_block_scalar_header() { + let mut p = Parser::new(EX8_1.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnSequenceStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnSequenceEnd); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex8_2_block_indentation_header() { + let mut p = Parser::new(EX8_2.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnSequenceStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnSequenceEnd); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex8_4_chomping_final_line_break() { + let mut p = Parser::new(EX8_4.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex8_6_empty_scalar_chomping() { + let mut p = Parser::new(EX8_6.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex8_7_literal_scalar() { + let mut p = Parser::new(EX8_7.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex8_8_literal_content() { + let mut p = Parser::new(EX8_8.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex8_9_folded_scalar() { + let mut p = Parser::new(EX8_9.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex8_10_folded_lines() { + let mut p = Parser::new(EX8_10.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex8_11_more_indented_lines() { + let mut p = Parser::new(EX8_11.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex8_12_empty_separation_lines() { + let mut p = Parser::new(EX8_12.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex8_13_final_empty_lines() { + let mut p = Parser::new(EX8_13.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex8_14_block_sequence() { + let mut p = Parser::new(EX8_14.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnSequenceStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnSequenceEnd); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex8_15_block_sequence_entry_types() { + let mut p = Parser::new(EX8_15.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnSequenceStart); + assert_next!(v, TestEvent::OnNull); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnSequenceStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnSequenceEnd); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnSequenceEnd); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex8_16_block_mappings() { + let mut p = Parser::new(EX8_16.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex8_17_explicit_block_mapping_entries() { + let mut p = Parser::new(EX8_17.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnNull); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnSequenceStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnSequenceEnd); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex8_18_implicit_block_mapping_entries() { + let mut p = Parser::new(EX8_18.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnNull); + assert_next!(v, TestEvent::OnNull); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnSequenceStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnSequenceEnd); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex8_19_compact_block_mappings() { + let mut p = Parser::new(EX8_19.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnSequenceStart); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnSequenceEnd); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex8_20_block_node_types() { + let mut p = Parser::new(EX8_20.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnSequenceStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnSequenceEnd); + assert_next!(v, TestEvent::OnDocumentEnd); +} + +#[test] +fn test_ex8_22_block_collection_nodes() { + let mut p = Parser::new(EX8_22.chars()); + let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + assert_next!(v, TestEvent::OnDocumentStart); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnSequenceStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnSequenceStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnSequenceEnd); + assert_next!(v, TestEvent::OnSequenceEnd); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapStart); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnScalar); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnMapEnd); + assert_next!(v, TestEvent::OnDocumentEnd); +} + diff --git a/saphyr/tests/specexamples.rs.inc b/saphyr/tests/specexamples.rs.inc new file mode 100644 index 0000000..0996fda --- /dev/null +++ b/saphyr/tests/specexamples.rs.inc @@ -0,0 +1,336 @@ +const EX2_1 : &'static str = + "- Mark McGwire\n- Sammy Sosa\n- Ken Griffey"; + +const EX2_2 : &'static str = + "hr: 65 # Home runs\navg: 0.278 # Batting average\nrbi: 147 # Runs Batted In"; + +const EX2_3 : &'static str = + "american:\n- Boston Red Sox\n- Detroit Tigers\n- New York Yankees\nnational:\n- New York Mets\n- Chicago Cubs\n- Atlanta Braves"; + +const EX2_4 : &'static str = + "-\n name: Mark McGwire\n hr: 65\n avg: 0.278\n-\n name: Sammy Sosa\n hr: 63\n avg: 0.288"; + +const EX2_5 : &'static str = + "- [name , hr, avg ]\n- [Mark McGwire, 65, 0.278]\n- [Sammy Sosa , 63, 0.288]"; + +const EX2_6 : &'static str = + "Mark McGwire: {hr: 65, avg: 0.278}\nSammy Sosa: {\n hr: 63,\n avg: 0.288\n }"; + +const EX2_7 : &'static str = + "# Ranking of 1998 home runs\n---\n- Mark McGwire\n- Sammy Sosa\n- Ken Griffey\n\n# Team ranking\n---\n- Chicago Cubs\n- St Louis Cardinals"; + +const EX2_8 : &'static str = + "---\ntime: 20:03:20\nplayer: Sammy Sosa\naction: strike (miss)\n...\n---\ntime: 20:03:47\nplayer: Sammy Sosa\naction: grand slam\n..."; + +const EX2_9 : &'static str = + "---\nhr: # 1998 hr ranking\n - Mark McGwire\n - Sammy Sosa\nrbi:\n # 1998 rbi ranking\n - Sammy Sosa\n - Ken Griffey"; + +const EX2_10 : &'static str = + "---\nhr:\n - Mark McGwire\n # Following node labeled SS\n - &SS Sammy Sosa\nrbi:\n - *SS # Subsequent occurrence\n - Ken Griffey"; + +const EX2_11 : &'static str = + "? - Detroit Tigers\n - Chicago cubs\n:\n - 2001-07-23\n\n? [ New York Yankees,\n Atlanta Braves ]\n: [ 2001-07-02, 2001-08-12,\n 2001-08-14 ]"; + +const EX2_12 : &'static str = + "---\n# Products purchased\n- item : Super Hoop\n quantity: 1\n- item : Basketball\n quantity: 4\n- item : Big Shoes\n quantity: 1"; + +const EX2_13 : &'static str = + "# ASCII Art\n--- |\n \\//||\\/||\n // || ||__"; + +const EX2_14 : &'static str = + "--- >\n Mark McGwire's\n year was crippled\n by a knee injury."; + +const EX2_15 : &'static str = + ">\n Sammy Sosa completed another\n fine season with great stats.\n \n 63 Home Runs\n 0.288 Batting Average\n \n What a year!"; + +const EX2_16 : &'static str = + "name: Mark McGwire\naccomplishment: >\n Mark set a major league\n home run record in 1998.\nstats: |\n 65 Home Runs\n 0.278 Batting Average\n"; + +const EX2_17 : &'static str = + "unicode: \"Sosa did fine.\\u263A\"\ncontrol: \"\\b1998\\t1999\\t2000\\n\"\nhex esc: \"\\x0d\\x0a is \\r\\n\"\n\nsingle: '\"Howdy!\" he cried.'\nquoted: ' # Not a ''comment''.'\ntie-fighter: '|\\-*-/|'"; + +const EX2_18 : &'static str = + "plain:\n This unquoted scalar\n spans many lines.\n\nquoted: \"So does this\n quoted scalar.\\n\""; + +// TODO: 2.19 - 2.22 schema tags + +const EX2_23 : &'static str = + "---\nnot-date: !!str 2002-04-28\n\npicture: !!binary |\n R0lGODlhDAAMAIQAAP//9/X\n 17unp5WZmZgAAAOfn515eXv\n Pz7Y6OjuDg4J+fn5OTk6enp\n 56enmleECcgggoBADs=\n\napplication specific tag: !something |\n The semantics of the tag\n above may be different for\n different documents."; + +const EX2_24 : &'static str = + "%TAG ! tag:clarkevans.com,2002:\n--- !shape\n # Use the ! handle for presenting\n # tag:clarkevans.com,2002:circle\n- !circle\n center: &ORIGIN {x: 73, y: 129}\n radius: 7\n- !line\n start: *ORIGIN\n finish: { x: 89, y: 102 }\n- !label\n start: *ORIGIN\n color: 0xFFEEBB\n text: Pretty vector drawing."; + +const EX2_25 : &'static str = + "# Sets are represented as a\n# Mapping where each key is\n# associated with a null value\n--- !!set\n? Mark McGwire\n? Sammy Sosa\n? Ken Griffey"; + +const EX2_26 : &'static str = + "# Ordered maps are represented as\n# A sequence of mappings, with\n# each mapping having one key\n--- !!omap\n- Mark McGwire: 65\n- Sammy Sosa: 63\n- Ken Griffey: 58"; + +const EX2_27 : &'static str = + "--- !\ninvoice: 34843\ndate : 2001-01-23\nbill-to: &id001\n given : Chris\n family : Dumars\n address:\n lines: |\n 458 Walkman Dr.\n Suite #292\n city : Royal Oak\n state : MI\n postal : 48046\nship-to: *id001\nproduct:\n - sku : BL394D\n quantity : 4\n description : Basketball\n price : 450.00\n - sku : BL4438H\n quantity : 1\n description : Super Hoop\n price : 2392.00\ntax : 251.42\ntotal: 4443.52\ncomments:\n Late afternoon is best.\n Backup contact is Nancy\n Billsmer @ 338-4338."; + +const EX2_28 : &'static str = + "---\nTime: 2001-11-23 15:01:42 -5\nUser: ed\nWarning:\n This is an error message\n for the log file\n---\nTime: 2001-11-23 15:02:31 -5\nUser: ed\nWarning:\n A slightly different error\n message.\n---\nDate: 2001-11-23 15:03:17 -5\nUser: ed\nFatal:\n Unknown variable \"bar\"\nStack:\n - file: TopClass.py\n line: 23\n code: |\n x = MoreObject(\"345\\n\")\n - file: MoreClass.py\n line: 58\n code: |-\n foo = bar"; + +// TODO: 5.1 - 5.2 BOM + +const EX5_3 : &'static str = + "sequence:\n- one\n- two\nmapping:\n ? sky\n : blue\n sea : green"; + +const EX5_4 : &'static str = + "sequence: [ one, two, ]\nmapping: { sky: blue, sea: green }"; + +const EX5_5 : &'static str = "# Comment only."; + +const EX5_6 : &'static str = + "anchored: !local &anchor value\nalias: *anchor"; + +const EX5_7 : &'static str = + "literal: |\n some\n text\nfolded: >\n some\n text\n"; + +const EX5_8 : &'static str = + "single: 'text'\ndouble: \"text\""; + +// TODO: 5.9 directive +// TODO: 5.10 reserved indicator + +const EX5_11 : &'static str = + "|\n Line break (no glyph)\n Line break (glyphed)\n"; + +const EX5_12 : &'static str = + "# Tabs and spaces\nquoted: \"Quoted\t\"\nblock: |\n void main() {\n \tprintf(\"Hello, world!\\n\");\n }"; + +const EX5_13 : &'static str = + "\"Fun with \\\\\n\\\" \\a \\b \\e \\f \\\n\\n \\r \\t \\v \\0 \\\n\\ \\_ \\N \\L \\P \\\n\\x41 \\u0041 \\U00000041\""; + +const EX5_14 : &'static str = + "Bad escapes:\n \"\\c\n \\xq-\""; + +const EX6_1 : &'static str = + " # Leading comment line spaces are\n # neither content nor indentation.\n \nNot indented:\n By one space: |\n By four\n spaces\n Flow style: [ # Leading spaces\n By two, # in flow style\n Also by two, # are neither\n \tStill by two # content nor\n ] # indentation."; + +const EX6_2 : &'static str = + "? a\n: -\tb\n - -\tc\n - d"; + +const EX6_3 : &'static str = + "- foo:\t bar\n- - baz\n -\tbaz"; + +const EX6_4 : &'static str = + "plain: text\n lines\nquoted: \"text\n \tlines\"\nblock: |\n text\n \tlines\n"; + +const EX6_5 : &'static str = + "Folding:\n \"Empty line\n \t\n as a line feed\"\nChomping: |\n Clipped empty lines\n "; + +const EX6_6 : &'static str = + ">-\n trimmed\n \n \n\n as\n space"; + +const EX6_7 : &'static str = + ">\n foo \n \n \t bar\n\n baz\n"; + +const EX6_8 : &'static str = + "\"\n foo \n \n \t bar\n\n baz\n\""; + +const EX6_9 : &'static str = + "key: # Comment\n value"; + +const EX6_10 : &'static str = + " # Comment\n \n\n"; + +const EX6_11 : &'static str = + "key: # Comment\n # lines\n value\n\n"; + +const EX6_12 : &'static str = + "{ first: Sammy, last: Sosa }:\n# Statistics:\n hr: # Home runs\n 65\n avg: # Average\n 0.278"; + +const EX6_13 : &'static str = + "%FOO bar baz # Should be ignored\n # with a warning.\n--- \"foo\""; + +const EX6_14 : &'static str = + "%YAML 1.3 # Attempt parsing\n # with a warning\n---\n\"foo\""; + +const EX6_15 : &'static str = + "%YAML 1.2\n%YAML 1.1\nfoo"; + +const EX6_16 : &'static str = + "%TAG !yaml! tag:yaml.org,2002:\n---\n!yaml!str \"foo\""; + +const EX6_17 : &'static str = + "%TAG ! !foo\n%TAG ! !foo\nbar"; + +const EX6_18 : &'static str = + "# Private\n!foo \"bar\"\n...\n# Global\n%TAG ! tag:example.com,2000:app/\n---\n!foo \"bar\""; + +const EX6_19 : &'static str = + "%TAG !! tag:example.com,2000:app/\n---\n!!int 1 - 3 # Interval, not integer"; + +const EX6_20 : &'static str = + "%TAG !e! tag:example.com,2000:app/\n---\n!e!foo \"bar\""; + +const EX6_21 : &'static str = + "%TAG !m! !my-\n--- # Bulb here\n!m!light fluorescent\n...\n%TAG !m! !my-\n--- # Color here\n!m!light green"; + +const EX6_22 : &'static str = + "%TAG !e! tag:example.com,2000:app/\n---\n- !e!foo \"bar\""; + +const EX6_23 : &'static str = + "!!str &a1 \"foo\":\n !!str bar\n&a2 baz : *a1"; + +const EX6_24 : &'static str = + "! foo :\n ! baz"; + +const EX6_25 : &'static str = + "- ! foo\n- !<$:?> bar\n"; + +const EX6_26 : &'static str = + "%TAG !e! tag:example.com,2000:app/\n---\n- !local foo\n- !!str bar\n- !e!tag%21 baz\n"; + +const EX6_27a : &'static str = + "%TAG !e! tag:example,2000:app/\n---\n- !e! foo"; + +const EX6_27b : &'static str = + "%TAG !e! tag:example,2000:app/\n---\n- !h!bar baz"; + +const EX6_28 : &'static str = + "# Assuming conventional resolution:\n- \"12\"\n- 12\n- ! 12"; + +const EX6_29 : &'static str = + "First occurrence: &anchor Value\nSecond occurrence: *anchor"; + +const EX7_1 : &'static str = + "First occurrence: &anchor Foo\nSecond occurrence: *anchor\nOverride anchor: &anchor Bar\nReuse anchor: *anchor"; + +const EX7_2 : &'static str = + "{\n foo : !!str,\n !!str : bar,\n}"; + +const EX7_3 : &'static str = + "{\n ? foo :,\n : bar,\n}\n"; + +const EX7_4 : &'static str = + "\"implicit block key\" : [\n \"implicit flow key\" : value,\n ]"; + +const EX7_5 : &'static str = + "\"folded \nto a space,\t\n \nto a line feed, or \t\\\n \\ \tnon-content\""; + +const EX7_6 : &'static str = + "\" 1st non-empty\n\n 2nd non-empty \n\t3rd non-empty \""; + +const EX7_7 : &'static str = " 'here''s to \"quotes\"'"; + +const EX7_8 : &'static str = + "'implicit block key' : [\n 'implicit flow key' : value,\n ]"; + +const EX7_9 : &'static str = + "' 1st non-empty\n\n 2nd non-empty \n\t3rd non-empty '"; + +const EX7_10 : &'static str = + "# Outside flow collection:\n- ::vector\n- \": - ()\"\n- Up, up, and away!\n- -123\n- http://example.com/foo#bar\n# Inside flow collection:\n- [ ::vector,\n \": - ()\",\n \"Up, up, and away!\",\n -123,\n http://example.com/foo#bar ]"; + +const EX7_11 : &'static str = + "implicit block key : [\n implicit flow key : value,\n ]"; + +const EX7_12 : &'static str = + "1st non-empty\n\n 2nd non-empty \n\t3rd non-empty"; + +const EX7_13 : &'static str = + "- [ one, two, ]\n- [three ,four]"; + +const EX7_14 : &'static str = + "[\n\"double\n quoted\", 'single\n quoted',\nplain\n text, [ nested ],\nsingle: pair,\n]"; + +const EX7_15 : &'static str = + "- { one : two , three: four , }\n- {five: six,seven : eight}"; + +const EX7_16 : &'static str = + "{\n? explicit: entry,\nimplicit: entry,\n?\n}"; + +const EX7_17 : &'static str = + "{\nunquoted : \"separate\",\nhttp://foo.com,\nomitted value:,\n: omitted key,\n}"; + +const EX7_18 : &'static str = + "{\n\"adjacent\":value,\n\"readable\":value,\n\"empty\":\n}"; + +const EX7_19 : &'static str = + "[\nfoo: bar\n]"; + +const EX7_20 : &'static str = + "[\n? foo\n bar : baz\n]"; + +const EX7_21 : &'static str = + "- [ YAML : separate ]\n- [ : empty key entry ]\n- [ {JSON: like}:adjacent ]"; + +const EX7_22 : &'static str = + "[ foo\n bar: invalid,"; // Note: we don't check (on purpose) the >1K chars for an + // implicit key + +const EX7_23 : &'static str = + "- [ a, b ]\n- { a: b }\n- \"a\"\n- 'b'\n- c"; + +const EX7_24 : &'static str = + "- !!str \"a\"\n- 'b'\n- &anchor \"c\"\n- *anchor\n- !!str"; + +const EX8_1 : &'static str = + "- | # Empty header\n literal\n- >1 # Indentation indicator\n folded\n- |+ # Chomping indicator\n keep\n\n- >1- # Both indicators\n strip\n"; + +const EX8_2 : &'static str = + "- |\n detected\n- >\n \n \n # detected\n- |1\n explicit\n- >\n \t\n detected\n"; + +const EX8_3a : &'static str = + "- |\n \n text"; + +const EX8_3b : &'static str = + "- >\n text\n text"; + +const EX8_3c : &'static str = + "- |2\n text"; + +const EX8_4 : &'static str = + "strip: |-\n text\nclip: |\n text\nkeep: |+\n text\n"; + +const EX8_5 : &'static str = + " # Strip\n # Comments:\nstrip: |-\n # text\n \n # Clip\n # comments:\n\nclip: |\n # text\n \n # Keep\n # comments:\n\nkeep: |+\n # text\n\n # Trail\n # Comments\n"; + +const EX8_6 : &'static str = + "strip: >-\n\nclip: >\n\nkeep: |+\n\n"; + +const EX8_7 : &'static str = + "|\n literal\n \ttext\n\n"; + +const EX8_8 : &'static str = + "|\n \n \n literal\n \n \n text\n\n # Comment\n"; + +const EX8_9 : &'static str = + ">\n folded\n text\n\n"; + +const EX8_10 : &'static str = + ">\n\n folded\n line\n\n next\n line\n * bullet\n\n * list\n * lines\n\n last\n line\n\n# Comment\n"; + +const EX8_11 : &'static str = EX8_10; +const EX8_12 : &'static str = EX8_10; +const EX8_13 : &'static str = EX8_10; + +const EX8_14 : &'static str = + "block sequence:\n - one\n - two : three\n"; + +const EX8_15 : &'static str = + "- # Empty\n- |\n block node\n- - one # Compact\n - two # sequence\n- one: two # Compact mapping\n"; + +const EX8_16 : &'static str = + "block mapping:\n key: value\n"; + +const EX8_17 : &'static str = + "? explicit key # Empty value\n? |\n block key\n: - one # Explicit compact\n - two # block value\n"; + +const EX8_18 : &'static str = + "plain key: in-line value\n: # Both empty\n\"quoted key\":\n- entry\n"; + +const EX8_19 : &'static str = + "- sun: yellow\n- ? earth: blue\n : moon: white\n"; + +const EX8_20 : &'static str = + "-\n \"flow in block\"\n- >\n Block scalar\n- !!map # Block collection\n foo : bar\n"; + +const EX8_21 : &'static str = + "literal: |2\n value\nfolded:\n !foo\n >1\n value\n"; + +const EX8_22 : &'static str = + "sequence: !!seq\n- entry\n- !!seq\n - nested\nmapping: !!map\n foo: bar\n"; From 1894d11a26b40928df490138a8406384c0c6aa6a Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Tue, 26 May 2015 16:41:35 +0800 Subject: [PATCH 012/380] Add multidoc support --- saphyr/src/parser.rs | 70 +++++++++++- saphyr/tests/spec_test.rs | 10 +- saphyr/tests/spec_test.rs.inc | 194 +++++++++++++++++----------------- 3 files changed, 169 insertions(+), 105 deletions(-) diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs index 3e2b069..8fa31f4 100644 --- a/saphyr/src/parser.rs +++ b/saphyr/src/parser.rs @@ -98,7 +98,7 @@ impl> Parser { pub fn parse(&mut self) -> ParseResult { if self.scanner.stream_ended() || self.state == State::End { - return Ok(Event::NoEvent); + return Ok(Event::StreamEnd); } let ev = self.state_machine(); println!("EV {:?}", ev); @@ -112,6 +112,7 @@ impl> Parser { } if self.scanner.stream_ended() { + // XXX has parsed? return Ok(Yaml::Null); } let ev = try!(self.parse()); @@ -121,11 +122,37 @@ impl> Parser { self.load_document(&ev) } + pub fn load_multidoc(&mut self) -> Result, ScanError> { + if !self.scanner.stream_started() { + let ev = try!(self.parse()); + assert_eq!(ev, Event::StreamStart); + } + + if self.scanner.stream_ended() { + // XXX has parsed? + return Ok(Vec::new()); + } + let mut docs: Vec = Vec::new(); + loop { + let ev = try!(self.parse()); + if ev == Event::StreamEnd { + return Ok(docs); + } + docs.push(try!(self.load_document(&ev))); + } + unreachable!(); + } + fn load_document(&mut self, first_ev: &Event) -> Result { assert_eq!(first_ev, &Event::DocumentStart); let ev = try!(self.parse()); - self.load_node(&ev) + let doc = try!(self.load_node(&ev)); + // DOCUMENT-END is expected. + let ev = try!(self.parse()); + assert_eq!(ev, Event::DocumentEnd); + + Ok(doc) } fn load_node(&mut self, first_ev: &Event) -> Result { @@ -193,9 +220,11 @@ impl> Parser { println!("cur_state {:?}, next tok: {:?}", self.state, next_tok); match self.state { State::StreamStart => self.stream_start(), + State::ImplicitDocumentStart => self.document_start(true), State::DocumentStart => self.document_start(false), State::DocumentContent => self.document_content(), + State::DocumentEnd => self.document_end(), State::BlockNode => self.parse_node(true, false), State::BlockNodeOrIndentlessSequence => self.parse_node(true, true), @@ -296,6 +325,24 @@ impl> Parser { } } + fn document_end(&mut self) -> ParseResult { + let mut _implicit = true; + let tok = try!(self.peek()); + let _start_mark = tok.0; + + match tok.1 { + TokenType::DocumentEndToken => { + self.skip(); + _implicit = false; + } + _ => {} + } + + // TODO tag handling + self.state = State::DocumentStart; + Ok(Event::DocumentEnd) + } + fn parse_node(&mut self, block: bool, indentless_sequence: bool) -> ParseResult { let tok = try!(self.peek()); match tok.1 { @@ -495,8 +542,23 @@ a7: 你好 ".to_string(); let mut parser = Parser::new(s.chars()); let out = parser.load().unwrap(); - println!("DOC {:?}", out); - println!("DOC {}", out["a7"].as_str().unwrap()); + assert_eq!(out["a7"].as_str().unwrap(), "你好"); + } + + #[test] + fn test_multi_doc() { + let s = +" +'a scalar' +--- +'a scalar' +--- +'a scalar' +"; + let mut p = Parser::new(s.chars()); + let out = p.load_multidoc().unwrap(); + assert_eq!(out.len(), 3); + } } diff --git a/saphyr/tests/spec_test.rs b/saphyr/tests/spec_test.rs index 7c481b4..cd9f1c1 100644 --- a/saphyr/tests/spec_test.rs +++ b/saphyr/tests/spec_test.rs @@ -18,7 +18,7 @@ enum TestEvent { OnNull, } -fn yaml_to_test_events(root :&Yaml) -> Vec { +fn yaml_to_test_events(docs: &Vec) -> Vec { fn next(root: &Yaml, evs: &mut Vec) { match *root { Yaml::BadValue => { panic!("unexpected BadValue"); }, @@ -42,9 +42,11 @@ fn yaml_to_test_events(root :&Yaml) -> Vec { } } let mut evs: Vec = Vec::new(); - evs.push(TestEvent::OnDocumentStart); - next(&root, &mut evs); - evs.push(TestEvent::OnDocumentEnd); + for doc in docs { + evs.push(TestEvent::OnDocumentStart); + next(doc, &mut evs); + evs.push(TestEvent::OnDocumentEnd); + } evs } diff --git a/saphyr/tests/spec_test.rs.inc b/saphyr/tests/spec_test.rs.inc index e9a68c7..3282454 100644 --- a/saphyr/tests/spec_test.rs.inc +++ b/saphyr/tests/spec_test.rs.inc @@ -1,7 +1,7 @@ #[test] fn test_ex2_1_seq_scalars() { let mut p = Parser::new(EX2_1.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnSequenceStart); assert_next!(v, TestEvent::OnScalar); @@ -14,7 +14,7 @@ fn test_ex2_1_seq_scalars() { #[test] fn test_ex2_2_mapping_scalars_to_scalars() { let mut p = Parser::new(EX2_2.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -30,7 +30,7 @@ fn test_ex2_2_mapping_scalars_to_scalars() { #[test] fn test_ex2_3_mapping_scalars_to_sequences() { let mut p = Parser::new(EX2_3.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -52,7 +52,7 @@ fn test_ex2_3_mapping_scalars_to_sequences() { #[test] fn test_ex2_4_sequence_of_mappings() { let mut p = Parser::new(EX2_4.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnSequenceStart); assert_next!(v, TestEvent::OnMapStart); @@ -78,7 +78,7 @@ fn test_ex2_4_sequence_of_mappings() { #[test] fn test_ex2_5_sequence_of_sequences() { let mut p = Parser::new(EX2_5.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnSequenceStart); assert_next!(v, TestEvent::OnSequenceStart); @@ -103,7 +103,7 @@ fn test_ex2_5_sequence_of_sequences() { #[test] fn test_ex2_6_mapping_of_mappings() { let mut p = Parser::new(EX2_6.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -127,7 +127,7 @@ fn test_ex2_6_mapping_of_mappings() { #[test] fn test_ex2_7_two_documents_in_a_stream() { let mut p = Parser::new(EX2_7.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnSequenceStart); assert_next!(v, TestEvent::OnScalar); @@ -146,7 +146,7 @@ fn test_ex2_7_two_documents_in_a_stream() { #[test] fn test_ex2_8_play_by_play_feed() { let mut p = Parser::new(EX2_8.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -172,7 +172,7 @@ fn test_ex2_8_play_by_play_feed() { #[test] fn test_ex2_9_single_document_with_two_comments() { let mut p = Parser::new(EX2_9.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -192,7 +192,7 @@ fn test_ex2_9_single_document_with_two_comments() { #[test] fn test_ex2_10_simple_anchor() { let mut p = Parser::new(EX2_10.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -212,7 +212,7 @@ fn test_ex2_10_simple_anchor() { #[test] fn test_ex2_11_mapping_between_sequences() { let mut p = Parser::new(EX2_11.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnSequenceStart); @@ -238,7 +238,7 @@ fn test_ex2_11_mapping_between_sequences() { #[test] fn test_ex2_12_compact_nested_mapping() { let mut p = Parser::new(EX2_12.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnSequenceStart); assert_next!(v, TestEvent::OnMapStart); @@ -266,7 +266,7 @@ fn test_ex2_12_compact_nested_mapping() { #[test] fn test_ex2_13_in_literals_newlines_are_preserved() { let mut p = Parser::new(EX2_13.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnScalar); assert_next!(v, TestEvent::OnDocumentEnd); @@ -275,7 +275,7 @@ fn test_ex2_13_in_literals_newlines_are_preserved() { #[test] fn test_ex2_14_in_folded_scalars_newlines_become_spaces() { let mut p = Parser::new(EX2_14.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnScalar); assert_next!(v, TestEvent::OnDocumentEnd); @@ -284,7 +284,7 @@ fn test_ex2_14_in_folded_scalars_newlines_become_spaces() { #[test] fn test_ex2_15_folded_newlines_are_preserved_for_more_indented_and_blank_lines() { let mut p = Parser::new(EX2_15.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnScalar); assert_next!(v, TestEvent::OnDocumentEnd); @@ -293,7 +293,7 @@ fn test_ex2_15_folded_newlines_are_preserved_for_more_indented_and_blank_lines() #[test] fn test_ex2_16_indentation_determines_scope() { let mut p = Parser::new(EX2_16.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -309,7 +309,7 @@ fn test_ex2_16_indentation_determines_scope() { #[test] fn test_ex2_17_quoted_scalars() { let mut p = Parser::new(EX2_17.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -331,7 +331,7 @@ fn test_ex2_17_quoted_scalars() { #[test] fn test_ex2_18_multi_line_flow_scalars() { let mut p = Parser::new(EX2_18.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -345,7 +345,7 @@ fn test_ex2_18_multi_line_flow_scalars() { #[test] fn test_ex2_23_various_explicit_tags() { let mut p = Parser::new(EX2_23.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -361,7 +361,7 @@ fn test_ex2_23_various_explicit_tags() { #[test] fn test_ex2_24_global_tags() { let mut p = Parser::new(EX2_24.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnSequenceStart); assert_next!(v, TestEvent::OnMapStart); @@ -401,7 +401,7 @@ fn test_ex2_24_global_tags() { #[test] fn test_ex2_25_unordered_sets() { let mut p = Parser::new(EX2_25.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -417,7 +417,7 @@ fn test_ex2_25_unordered_sets() { #[test] fn test_ex2_26_ordered_mappings() { let mut p = Parser::new(EX2_26.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnSequenceStart); assert_next!(v, TestEvent::OnMapStart); @@ -439,7 +439,7 @@ fn test_ex2_26_ordered_mappings() { #[test] fn test_ex2_27_invoice() { let mut p = Parser::new(EX2_27.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -502,7 +502,7 @@ fn test_ex2_27_invoice() { #[test] fn test_ex2_28_log_file() { let mut p = Parser::new(EX2_28.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -557,7 +557,7 @@ fn test_ex2_28_log_file() { #[test] fn test_ex5_3_block_structure_indicators() { let mut p = Parser::new(EX5_3.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -579,7 +579,7 @@ fn test_ex5_3_block_structure_indicators() { #[test] fn test_ex5_4_flow_structure_indicators() { let mut p = Parser::new(EX5_4.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -601,7 +601,7 @@ fn test_ex5_4_flow_structure_indicators() { #[test] fn test_ex5_6_node_property_indicators() { let mut p = Parser::new(EX5_6.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -615,7 +615,7 @@ fn test_ex5_6_node_property_indicators() { #[test] fn test_ex5_7_block_scalar_indicators() { let mut p = Parser::new(EX5_7.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -629,7 +629,7 @@ fn test_ex5_7_block_scalar_indicators() { #[test] fn test_ex5_8_quoted_scalar_indicators() { let mut p = Parser::new(EX5_8.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -643,7 +643,7 @@ fn test_ex5_8_quoted_scalar_indicators() { #[test] fn test_ex5_11_line_break_characters() { let mut p = Parser::new(EX5_11.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnScalar); assert_next!(v, TestEvent::OnDocumentEnd); @@ -652,7 +652,7 @@ fn test_ex5_11_line_break_characters() { #[test] fn test_ex5_12_tabs_and_spaces() { let mut p = Parser::new(EX5_12.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -666,7 +666,7 @@ fn test_ex5_12_tabs_and_spaces() { #[test] fn test_ex5_13_escaped_characters() { let mut p = Parser::new(EX5_13.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnScalar); assert_next!(v, TestEvent::OnDocumentEnd); @@ -675,7 +675,7 @@ fn test_ex5_13_escaped_characters() { #[test] fn test_ex6_1_indentation_spaces() { let mut p = Parser::new(EX6_1.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -696,7 +696,7 @@ fn test_ex6_1_indentation_spaces() { #[test] fn test_ex6_2_indentation_indicators() { let mut p = Parser::new(EX6_2.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -714,7 +714,7 @@ fn test_ex6_2_indentation_indicators() { #[test] fn test_ex6_3_separation_spaces() { let mut p = Parser::new(EX6_3.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnSequenceStart); assert_next!(v, TestEvent::OnMapStart); @@ -732,7 +732,7 @@ fn test_ex6_3_separation_spaces() { #[test] fn test_ex6_4_line_prefixes() { let mut p = Parser::new(EX6_4.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -748,7 +748,7 @@ fn test_ex6_4_line_prefixes() { #[test] fn test_ex6_5_empty_lines() { let mut p = Parser::new(EX6_5.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -762,7 +762,7 @@ fn test_ex6_5_empty_lines() { #[test] fn test_ex6_6_line_folding() { let mut p = Parser::new(EX6_6.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnScalar); assert_next!(v, TestEvent::OnDocumentEnd); @@ -771,7 +771,7 @@ fn test_ex6_6_line_folding() { #[test] fn test_ex6_7_block_folding() { let mut p = Parser::new(EX6_7.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnScalar); assert_next!(v, TestEvent::OnDocumentEnd); @@ -780,7 +780,7 @@ fn test_ex6_7_block_folding() { #[test] fn test_ex6_8_flow_folding() { let mut p = Parser::new(EX6_8.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnScalar); assert_next!(v, TestEvent::OnDocumentEnd); @@ -789,7 +789,7 @@ fn test_ex6_8_flow_folding() { #[test] fn test_ex6_9_separated_comment() { let mut p = Parser::new(EX6_9.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -801,7 +801,7 @@ fn test_ex6_9_separated_comment() { #[test] fn test_ex6_12_separation_spaces_ii() { let mut p = Parser::new(EX6_12.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnMapStart); @@ -823,7 +823,7 @@ fn test_ex6_12_separation_spaces_ii() { #[test] fn test_ex6_13_reserved_directives() { let mut p = Parser::new(EX6_13.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnScalar); assert_next!(v, TestEvent::OnDocumentEnd); @@ -832,7 +832,7 @@ fn test_ex6_13_reserved_directives() { #[test] fn test_ex6_14_yaml_directive() { let mut p = Parser::new(EX6_14.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnScalar); assert_next!(v, TestEvent::OnDocumentEnd); @@ -841,7 +841,7 @@ fn test_ex6_14_yaml_directive() { #[test] fn test_ex6_16_tag_directive() { let mut p = Parser::new(EX6_16.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnScalar); assert_next!(v, TestEvent::OnDocumentEnd); @@ -850,7 +850,7 @@ fn test_ex6_16_tag_directive() { #[test] fn test_ex6_18_primary_tag_handle() { let mut p = Parser::new(EX6_18.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnScalar); assert_next!(v, TestEvent::OnDocumentEnd); @@ -862,7 +862,7 @@ fn test_ex6_18_primary_tag_handle() { #[test] fn test_ex6_19_secondary_tag_handle() { let mut p = Parser::new(EX6_19.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnScalar); assert_next!(v, TestEvent::OnDocumentEnd); @@ -871,7 +871,7 @@ fn test_ex6_19_secondary_tag_handle() { #[test] fn test_ex6_20_tag_handles() { let mut p = Parser::new(EX6_20.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnScalar); assert_next!(v, TestEvent::OnDocumentEnd); @@ -880,7 +880,7 @@ fn test_ex6_20_tag_handles() { #[test] fn test_ex6_21_local_tag_prefix() { let mut p = Parser::new(EX6_21.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnScalar); assert_next!(v, TestEvent::OnDocumentEnd); @@ -892,7 +892,7 @@ fn test_ex6_21_local_tag_prefix() { #[test] fn test_ex6_22_global_tag_prefix() { let mut p = Parser::new(EX6_22.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnSequenceStart); assert_next!(v, TestEvent::OnScalar); @@ -903,7 +903,7 @@ fn test_ex6_22_global_tag_prefix() { #[test] fn test_ex6_23_node_properties() { let mut p = Parser::new(EX6_23.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -917,7 +917,7 @@ fn test_ex6_23_node_properties() { #[test] fn test_ex6_24_verbatim_tags() { let mut p = Parser::new(EX6_24.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -929,7 +929,7 @@ fn test_ex6_24_verbatim_tags() { #[test] fn test_ex6_26_tag_shorthands() { let mut p = Parser::new(EX6_26.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnSequenceStart); assert_next!(v, TestEvent::OnScalar); @@ -942,7 +942,7 @@ fn test_ex6_26_tag_shorthands() { #[test] fn test_ex6_28_non_specific_tags() { let mut p = Parser::new(EX6_28.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnSequenceStart); assert_next!(v, TestEvent::OnScalar); @@ -955,7 +955,7 @@ fn test_ex6_28_non_specific_tags() { #[test] fn test_ex6_29_node_anchors() { let mut p = Parser::new(EX6_29.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -969,7 +969,7 @@ fn test_ex6_29_node_anchors() { #[test] fn test_ex7_1_alias_nodes() { let mut p = Parser::new(EX7_1.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -987,7 +987,7 @@ fn test_ex7_1_alias_nodes() { #[test] fn test_ex7_2_empty_nodes() { let mut p = Parser::new(EX7_2.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -1001,7 +1001,7 @@ fn test_ex7_2_empty_nodes() { #[test] fn test_ex7_3_completely_empty_nodes() { let mut p = Parser::new(EX7_3.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -1015,7 +1015,7 @@ fn test_ex7_3_completely_empty_nodes() { #[test] fn test_ex7_4_double_quoted_implicit_keys() { let mut p = Parser::new(EX7_4.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -1032,7 +1032,7 @@ fn test_ex7_4_double_quoted_implicit_keys() { #[test] fn test_ex7_5_double_quoted_line_breaks() { let mut p = Parser::new(EX7_5.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnScalar); assert_next!(v, TestEvent::OnDocumentEnd); @@ -1041,7 +1041,7 @@ fn test_ex7_5_double_quoted_line_breaks() { #[test] fn test_ex7_6_double_quoted_lines() { let mut p = Parser::new(EX7_6.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnScalar); assert_next!(v, TestEvent::OnDocumentEnd); @@ -1050,7 +1050,7 @@ fn test_ex7_6_double_quoted_lines() { #[test] fn test_ex7_7_single_quoted_characters() { let mut p = Parser::new(EX7_7.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnScalar); assert_next!(v, TestEvent::OnDocumentEnd); @@ -1059,7 +1059,7 @@ fn test_ex7_7_single_quoted_characters() { #[test] fn test_ex7_8_single_quoted_implicit_keys() { let mut p = Parser::new(EX7_8.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -1076,7 +1076,7 @@ fn test_ex7_8_single_quoted_implicit_keys() { #[test] fn test_ex7_9_single_quoted_lines() { let mut p = Parser::new(EX7_9.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnScalar); assert_next!(v, TestEvent::OnDocumentEnd); @@ -1085,7 +1085,7 @@ fn test_ex7_9_single_quoted_lines() { #[test] fn test_ex7_10_plain_characters() { let mut p = Parser::new(EX7_10.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnSequenceStart); assert_next!(v, TestEvent::OnScalar); @@ -1107,7 +1107,7 @@ fn test_ex7_10_plain_characters() { #[test] fn test_ex7_11_plain_implicit_keys() { let mut p = Parser::new(EX7_11.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -1124,7 +1124,7 @@ fn test_ex7_11_plain_implicit_keys() { #[test] fn test_ex7_12_plain_lines() { let mut p = Parser::new(EX7_12.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnScalar); assert_next!(v, TestEvent::OnDocumentEnd); @@ -1133,7 +1133,7 @@ fn test_ex7_12_plain_lines() { #[test] fn test_ex7_13_flow_sequence() { let mut p = Parser::new(EX7_13.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnSequenceStart); assert_next!(v, TestEvent::OnSequenceStart); @@ -1151,7 +1151,7 @@ fn test_ex7_13_flow_sequence() { #[test] fn test_ex7_14_flow_sequence_entries() { let mut p = Parser::new(EX7_14.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnSequenceStart); assert_next!(v, TestEvent::OnScalar); @@ -1171,7 +1171,7 @@ fn test_ex7_14_flow_sequence_entries() { #[test] fn test_ex7_15_flow_mappings() { let mut p = Parser::new(EX7_15.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnSequenceStart); assert_next!(v, TestEvent::OnMapStart); @@ -1193,7 +1193,7 @@ fn test_ex7_15_flow_mappings() { #[test] fn test_ex7_16_flow_mapping_entries() { let mut p = Parser::new(EX7_16.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -1209,7 +1209,7 @@ fn test_ex7_16_flow_mapping_entries() { #[test] fn test_ex7_17_flow_mapping_separate_values() { let mut p = Parser::new(EX7_17.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -1227,7 +1227,7 @@ fn test_ex7_17_flow_mapping_separate_values() { #[test] fn test_ex7_18_flow_mapping_adjacent_values() { let mut p = Parser::new(EX7_18.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -1243,7 +1243,7 @@ fn test_ex7_18_flow_mapping_adjacent_values() { #[test] fn test_ex7_19_single_pair_flow_mappings() { let mut p = Parser::new(EX7_19.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnSequenceStart); assert_next!(v, TestEvent::OnMapStart); @@ -1257,7 +1257,7 @@ fn test_ex7_19_single_pair_flow_mappings() { #[test] fn test_ex7_20_single_pair_explicit_entry() { let mut p = Parser::new(EX7_20.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnSequenceStart); assert_next!(v, TestEvent::OnMapStart); @@ -1271,7 +1271,7 @@ fn test_ex7_20_single_pair_explicit_entry() { #[test] fn test_ex7_21_single_pair_implicit_entries() { let mut p = Parser::new(EX7_21.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnSequenceStart); assert_next!(v, TestEvent::OnSequenceStart); @@ -1302,7 +1302,7 @@ fn test_ex7_21_single_pair_implicit_entries() { #[test] fn test_ex7_23_flow_content() { let mut p = Parser::new(EX7_23.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnSequenceStart); assert_next!(v, TestEvent::OnSequenceStart); @@ -1323,7 +1323,7 @@ fn test_ex7_23_flow_content() { #[test] fn test_ex7_24_flow_nodes() { let mut p = Parser::new(EX7_24.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnSequenceStart); assert_next!(v, TestEvent::OnScalar); @@ -1338,7 +1338,7 @@ fn test_ex7_24_flow_nodes() { #[test] fn test_ex8_1_block_scalar_header() { let mut p = Parser::new(EX8_1.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnSequenceStart); assert_next!(v, TestEvent::OnScalar); @@ -1352,7 +1352,7 @@ fn test_ex8_1_block_scalar_header() { #[test] fn test_ex8_2_block_indentation_header() { let mut p = Parser::new(EX8_2.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnSequenceStart); assert_next!(v, TestEvent::OnScalar); @@ -1366,7 +1366,7 @@ fn test_ex8_2_block_indentation_header() { #[test] fn test_ex8_4_chomping_final_line_break() { let mut p = Parser::new(EX8_4.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -1382,7 +1382,7 @@ fn test_ex8_4_chomping_final_line_break() { #[test] fn test_ex8_6_empty_scalar_chomping() { let mut p = Parser::new(EX8_6.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -1398,7 +1398,7 @@ fn test_ex8_6_empty_scalar_chomping() { #[test] fn test_ex8_7_literal_scalar() { let mut p = Parser::new(EX8_7.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnScalar); assert_next!(v, TestEvent::OnDocumentEnd); @@ -1407,7 +1407,7 @@ fn test_ex8_7_literal_scalar() { #[test] fn test_ex8_8_literal_content() { let mut p = Parser::new(EX8_8.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnScalar); assert_next!(v, TestEvent::OnDocumentEnd); @@ -1416,7 +1416,7 @@ fn test_ex8_8_literal_content() { #[test] fn test_ex8_9_folded_scalar() { let mut p = Parser::new(EX8_9.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnScalar); assert_next!(v, TestEvent::OnDocumentEnd); @@ -1425,7 +1425,7 @@ fn test_ex8_9_folded_scalar() { #[test] fn test_ex8_10_folded_lines() { let mut p = Parser::new(EX8_10.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnScalar); assert_next!(v, TestEvent::OnDocumentEnd); @@ -1434,7 +1434,7 @@ fn test_ex8_10_folded_lines() { #[test] fn test_ex8_11_more_indented_lines() { let mut p = Parser::new(EX8_11.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnScalar); assert_next!(v, TestEvent::OnDocumentEnd); @@ -1443,7 +1443,7 @@ fn test_ex8_11_more_indented_lines() { #[test] fn test_ex8_12_empty_separation_lines() { let mut p = Parser::new(EX8_12.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnScalar); assert_next!(v, TestEvent::OnDocumentEnd); @@ -1452,7 +1452,7 @@ fn test_ex8_12_empty_separation_lines() { #[test] fn test_ex8_13_final_empty_lines() { let mut p = Parser::new(EX8_13.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnScalar); assert_next!(v, TestEvent::OnDocumentEnd); @@ -1461,7 +1461,7 @@ fn test_ex8_13_final_empty_lines() { #[test] fn test_ex8_14_block_sequence() { let mut p = Parser::new(EX8_14.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -1479,7 +1479,7 @@ fn test_ex8_14_block_sequence() { #[test] fn test_ex8_15_block_sequence_entry_types() { let mut p = Parser::new(EX8_15.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnSequenceStart); assert_next!(v, TestEvent::OnNull); @@ -1499,7 +1499,7 @@ fn test_ex8_15_block_sequence_entry_types() { #[test] fn test_ex8_16_block_mappings() { let mut p = Parser::new(EX8_16.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -1514,7 +1514,7 @@ fn test_ex8_16_block_mappings() { #[test] fn test_ex8_17_explicit_block_mapping_entries() { let mut p = Parser::new(EX8_17.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -1531,7 +1531,7 @@ fn test_ex8_17_explicit_block_mapping_entries() { #[test] fn test_ex8_18_implicit_block_mapping_entries() { let mut p = Parser::new(EX8_18.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -1549,7 +1549,7 @@ fn test_ex8_18_implicit_block_mapping_entries() { #[test] fn test_ex8_19_compact_block_mappings() { let mut p = Parser::new(EX8_19.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnSequenceStart); assert_next!(v, TestEvent::OnMapStart); @@ -1573,7 +1573,7 @@ fn test_ex8_19_compact_block_mappings() { #[test] fn test_ex8_20_block_node_types() { let mut p = Parser::new(EX8_20.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnSequenceStart); assert_next!(v, TestEvent::OnScalar); @@ -1589,7 +1589,7 @@ fn test_ex8_20_block_node_types() { #[test] fn test_ex8_22_block_collection_nodes() { let mut p = Parser::new(EX8_22.chars()); - let mut v = yaml_to_test_events(&p.load().unwrap()).into_iter(); + let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); From 89b9c6b1db907f879b32518a24e194e3f84b14d8 Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Wed, 27 May 2015 00:29:40 +0800 Subject: [PATCH 013/380] Add block_scalar --- saphyr/src/parser.rs | 4 + saphyr/src/scanner.rs | 198 +++++++++++++++++++++++++++++++++++++- saphyr/tests/spec_test.rs | 1 + 3 files changed, 201 insertions(+), 2 deletions(-) diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs index 8fa31f4..cb95203 100644 --- a/saphyr/src/parser.rs +++ b/saphyr/src/parser.rs @@ -59,6 +59,10 @@ pub struct Parser { token: Option, } +pub trait EventReceiver { + fn on_event(&mut self, ev: &Event); +} + pub type ParseResult = Result; impl> Parser { diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index 7109bd1..5171f97 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -147,6 +147,10 @@ fn is_blank(c: char) -> bool { fn is_blankz(c: char) -> bool { is_blank(c) || is_breakz(c) } +#[inline] +fn is_digit(c: char) -> bool { + c >= '0' && c <= '9' +} pub type ScanResult = Result<(), ScanError>; @@ -312,8 +316,10 @@ impl> Scanner { '*' => unimplemented!(), '&' => unimplemented!(), '!' => unimplemented!(), - '|' if self.flow_level == 0 => unimplemented!(), - '>' if self.flow_level == 0 => unimplemented!(), + // Is it a literal scalar? + '|' if self.flow_level == 0 => try!(self.fetch_block_scalar(true)), + // Is it a folded scalar? + '>' if self.flow_level == 0 => try!(self.fetch_block_scalar(false)), '\'' => try!(self.fetch_flow_scalar(true)), '"' => try!(self.fetch_flow_scalar(false)), // plain scalar @@ -517,6 +523,194 @@ impl> Scanner { Ok(()) } + fn fetch_block_scalar(&mut self, literal: bool) -> ScanResult { + try!(self.save_simple_key()); + self.allow_simple_key(); + let tok = try!(self.scan_block_scalar(literal)); + + self.tokens.push_back(tok); + Ok(()) + } + + fn scan_block_scalar(&mut self, literal: bool) -> Result { + let start_mark = self.mark; + let mut chomping: i32 = 0; + let mut increment: usize = 0; + let mut indent: usize = 0; + let mut trailing_blank: bool = false; + let mut leading_blank: bool = false; + + let mut string = String::new(); + let mut leading_break = String::new(); + let mut trailing_breaks = String::new(); + + // skip '|' or '>' + self.skip(); + self.lookahead(1); + + if self.ch() == '+' || self.ch() == '-' { + if self.ch() == '+' { + chomping = 1; + } else { + chomping = -1; + } + self.skip(); + self.lookahead(1); + if is_digit(self.ch()) { + if self.ch() == '0' { + return Err(ScanError::new(start_mark, + "while scanning a block scalar, found an intendation indicator equal to 0")); + } + increment = (self.ch() as usize) - ('0' as usize); + self.skip(); + } + } else if is_digit(self.ch()) { + if self.ch() == '0' { + return Err(ScanError::new(start_mark, + "while scanning a block scalar, found an intendation indicator equal to 0")); + } + + increment = (self.ch() as usize) - ('0' as usize); + self.skip(); + self.lookahead(1); + if self.ch() == '+' || self.ch() == '-' { + if self.ch() == '+' { + chomping = 1; + } else { + chomping = -1; + } + self.skip(); + } + } + + // Eat whitespaces and comments to the end of the line. + self.lookahead(1); + + while is_blank(self.ch()) { + self.skip(); + self.lookahead(1); + } + + if self.ch() == '#' { + while !is_breakz(self.ch()) { + self.skip(); + self.lookahead(1); + } + } + + // Check if we are at the end of the line. + if !is_breakz(self.ch()) { + return Err(ScanError::new(start_mark, + "while scanning a block scalar, did not find expected comment or line break")); + } + + if is_break(self.ch()) { + self.lookahead(2); + self.skip_line(); + } + + if increment > 0 { + indent = if self.indent >= 0 { (self.indent + increment as isize) as usize } else { increment } + } + // Scan the leading line breaks and determine the indentation level if needed. + try!(self.block_scalar_breaks(&mut indent, &mut trailing_breaks)); + + self.lookahead(1); + + let start_mark = self.mark; + + while self.mark.col == indent && !is_z(self.ch()) { + println!("--> {:?}", self.ch()); + // We are at the beginning of a non-empty line. + trailing_blank = is_blank(self.ch()); + if !literal && !leading_break.is_empty() + && !leading_blank && !trailing_blank { + if trailing_breaks.is_empty() { + string.push(' '); + } + leading_break.clear(); + } else { + string.extend(leading_break.chars()); + leading_break.clear(); + } + + string.extend(trailing_breaks.chars()); + trailing_breaks.clear(); + + leading_blank = is_blank(self.ch()); + + while !is_breakz(self.ch()) { + println!("----> {:?}", self.ch()); + string.push(self.ch()); + self.skip(); + self.lookahead(1); + } + + self.lookahead(2); + self.skip_line(); + + // Eat the following intendation spaces and line breaks. + try!(self.block_scalar_breaks(&mut indent, &mut trailing_breaks)); + } + + // Chomp the tail. + if chomping != -1 { + string.extend(leading_break.chars()); + } + + if chomping == 1 { + string.extend(trailing_breaks.chars()); + } + + if literal { + Ok(Token(start_mark, TokenType::ScalarToken(TScalarStyle::Literal, string))) + } else { + Ok(Token(start_mark, TokenType::ScalarToken(TScalarStyle::Foled, string))) + } + } + + fn block_scalar_breaks(&mut self, indent: &mut usize, breaks: &mut String) -> ScanResult { + let mut max_indent = 0; + loop { + self.lookahead(1); + while (*indent == 0 || self.mark.col < *indent) + && self.buffer[0] == ' ' { + self.skip(); + self.lookahead(1); + } + + if self.mark.col > max_indent { + max_indent = self.mark.col; + } + + // Check for a tab character messing the intendation. + if (*indent == 0 || self.mark.col < *indent) + && self.buffer[0] == '\t' { + return Err(ScanError::new(self.mark, + "while scanning a block scalar, found a tab character where an intendation space is expected")); + } + + if !is_break(self.ch()) { + break; + } + + self.lookahead(2); + // Consume the line break. + self.read_break(breaks); + } + + if *indent == 0 { + *indent = max_indent; + if *indent < (self.indent + 1) as usize { + *indent = (self.indent + 1) as usize; + } + if *indent < 1 { + *indent = 1; + } + } + Ok(()) + } + fn fetch_flow_scalar(&mut self, single: bool) -> ScanResult { try!(self.save_simple_key()); self.disallow_simple_key(); diff --git a/saphyr/tests/spec_test.rs b/saphyr/tests/spec_test.rs index cd9f1c1..c5910b0 100644 --- a/saphyr/tests/spec_test.rs +++ b/saphyr/tests/spec_test.rs @@ -19,6 +19,7 @@ enum TestEvent { } fn yaml_to_test_events(docs: &Vec) -> Vec { + println!("DOCS {:?}", docs); fn next(root: &Yaml, evs: &mut Vec) { match *root { Yaml::BadValue => { panic!("unexpected BadValue"); }, From 616ebfa378dc518e5d19b57fe8a345b80ed86d98 Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Wed, 27 May 2015 02:50:51 +0800 Subject: [PATCH 014/380] Refactory YAML document loader --- saphyr/src/parser.rs | 154 ++++++------------ saphyr/src/scanner.rs | 2 - saphyr/src/yaml.rs | 155 +++++++++++++++++- saphyr/tests/spec_test.rs | 66 -------- saphyr/tests/spec_test.rs.inc | 291 ++++++++++++---------------------- 5 files changed, 291 insertions(+), 377 deletions(-) delete mode 100644 saphyr/tests/spec_test.rs diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs index cb95203..4ffab44 100644 --- a/saphyr/src/parser.rs +++ b/saphyr/src/parser.rs @@ -1,5 +1,5 @@ use scanner::*; -use yaml::*; +// use yaml::*; #[derive(Clone, Copy, PartialEq, Debug, Eq)] pub enum State { @@ -99,129 +99,107 @@ impl> Parser { self.states.push(state); } - pub fn parse(&mut self) -> ParseResult { + fn parse(&mut self, recv: &mut R) + -> ParseResult { if self.scanner.stream_ended() || self.state == State::End { return Ok(Event::StreamEnd); } - let ev = self.state_machine(); - println!("EV {:?}", ev); - ev + let ev = try!(self.state_machine()); + // println!("EV {:?}", ev); + recv.on_event(&ev); + Ok(ev) } - pub fn load(&mut self) -> Result { + pub fn load(&mut self, recv: &mut R, multi: bool) + -> Result<(), ScanError> { if !self.scanner.stream_started() { - let ev = try!(self.parse()); + let ev = try!(self.parse(recv)); assert_eq!(ev, Event::StreamStart); } if self.scanner.stream_ended() { // XXX has parsed? - return Ok(Yaml::Null); + recv.on_event(&Event::StreamEnd); + return Ok(()); } - let ev = try!(self.parse()); - if ev == Event::StreamEnd { - return Ok(Yaml::Null); - } - self.load_document(&ev) - } - - pub fn load_multidoc(&mut self) -> Result, ScanError> { - if !self.scanner.stream_started() { - let ev = try!(self.parse()); - assert_eq!(ev, Event::StreamStart); - } - - if self.scanner.stream_ended() { - // XXX has parsed? - return Ok(Vec::new()); - } - let mut docs: Vec = Vec::new(); loop { - let ev = try!(self.parse()); + let ev = try!(self.parse(recv)); if ev == Event::StreamEnd { - return Ok(docs); + recv.on_event(&Event::StreamEnd); + return Ok(()); + } + try!(self.load_document(&ev, recv)); + if !multi { + break; } - docs.push(try!(self.load_document(&ev))); } - unreachable!(); + Ok(()) } - fn load_document(&mut self, first_ev: &Event) -> Result { + fn load_document(&mut self, first_ev: &Event, recv: &mut R) + -> Result<(), ScanError> { assert_eq!(first_ev, &Event::DocumentStart); - let ev = try!(self.parse()); - let doc = try!(self.load_node(&ev)); + let ev = try!(self.parse(recv)); + try!(self.load_node(&ev, recv)); + // DOCUMENT-END is expected. - let ev = try!(self.parse()); + let ev = try!(self.parse(recv)); assert_eq!(ev, Event::DocumentEnd); - Ok(doc) + Ok(()) } - fn load_node(&mut self, first_ev: &Event) -> Result { + fn load_node(&mut self, first_ev: &Event, recv: &mut R) + -> Result<(), ScanError> { match *first_ev { Event::Scalar(ref v, style) => { - // TODO scalar - if style != TScalarStyle::Plain { - Ok(Yaml::String(v.clone())) - } else { - match v.as_ref() { - "~" => Ok(Yaml::Null), - "true" => Ok(Yaml::Boolean(true)), - "false" => Ok(Yaml::Boolean(false)), - // try parsing as f64 - _ if v.parse::().is_ok() => Ok(Yaml::Number(v.clone())), - _ => Ok(Yaml::String(v.clone())) - } - } + Ok(()) }, Event::SequenceStart => { - self.load_sequence(first_ev) + self.load_sequence(first_ev, recv) }, Event::MappingStart => { - self.load_mapping(first_ev) + self.load_mapping(first_ev, recv) }, // TODO more events _ => { unreachable!(); } } } - fn load_mapping(&mut self, first_ev: &Event) -> Result { - let mut ev = try!(self.parse()); - let mut map = Hash::new(); + fn load_mapping(&mut self, first_ev: &Event, recv: &mut R) + -> Result<(), ScanError> { + let mut ev = try!(self.parse(recv)); while ev != Event::MappingEnd { // key - let key = try!(self.load_node(&ev)); + try!(self.load_node(&ev, recv)); // value - ev = try!(self.parse()); - let value = try!(self.load_node(&ev)); - - map.insert(key, value); + ev = try!(self.parse(recv)); + try!(self.load_node(&ev, recv)); // next event - ev = try!(self.parse()); + ev = try!(self.parse(recv)); } - Ok(Yaml::Hash(map)) + Ok(()) } - fn load_sequence(&mut self, first_ev: &Event) -> Result { - let mut ev = try!(self.parse()); - let mut vec = Vec::new(); + fn load_sequence(&mut self, first_ev: &Event, recv: &mut R) + -> Result<(), ScanError> { + let mut ev = try!(self.parse(recv)); while ev != Event::SequenceEnd { - let entry = try!(self.load_node(&ev)); - vec.push(entry); + let entry = try!(self.load_node(&ev, recv)); // next event - ev = try!(self.parse()); + ev = try!(self.parse(recv)); } - Ok(Yaml::Array(vec)) + Ok(()) } fn state_machine(&mut self) -> ParseResult { let next_tok = self.peek(); - println!("cur_state {:?}, next tok: {:?}", self.state, next_tok); + //println!("cur_state {:?}, next tok: {:?}", self.state, next_tok); match self.state { State::StreamStart => self.stream_start(), @@ -526,43 +504,5 @@ impl> Parser { #[cfg(test)] mod test { use super::*; - #[test] - fn test_parser() { - let s: String = " -# comment -a0 bb: val -a1: - b1: 4 - b2: d -a2: 4 # i'm comment -a3: [1, 2, 3] -a4: - - - a1 - - a2 - - 2 -a5: 'single_quoted' -a6: \"double_quoted\" -a7: 你好 -".to_string(); - let mut parser = Parser::new(s.chars()); - let out = parser.load().unwrap(); - assert_eq!(out["a7"].as_str().unwrap(), "你好"); - } - - #[test] - fn test_multi_doc() { - let s = -" -'a scalar' ---- -'a scalar' ---- -'a scalar' -"; - let mut p = Parser::new(s.chars()); - let out = p.load_multidoc().unwrap(); - assert_eq!(out.len(), 3); - - } } diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index 5171f97..3f55eda 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -620,7 +620,6 @@ impl> Scanner { let start_mark = self.mark; while self.mark.col == indent && !is_z(self.ch()) { - println!("--> {:?}", self.ch()); // We are at the beginning of a non-empty line. trailing_blank = is_blank(self.ch()); if !literal && !leading_break.is_empty() @@ -640,7 +639,6 @@ impl> Scanner { leading_blank = is_blank(self.ch()); while !is_breakz(self.ch()) { - println!("----> {:?}", self.ch()); string.push(self.ch()); self.skip(); self.lookahead(1); diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index 4371a20..d6e3abc 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -2,6 +2,9 @@ use std::collections::BTreeMap; use std::ops::Index; use std::string; use std::str::FromStr; +use std::mem; +use parser::*; +use scanner::{TScalarStyle, ScanError}; #[derive(Clone, PartialEq, PartialOrd, Debug, Eq, Ord)] pub enum Yaml { @@ -20,6 +23,103 @@ pub enum Yaml { pub type Array = Vec; pub type Hash = BTreeMap; +pub struct YamlLoader { + docs: Vec, + // states + doc_stack: Vec, + key_stack: Vec, +} + +impl EventReceiver for YamlLoader { + fn on_event(&mut self, ev: &Event) { + println!("EV {:?}", ev); + match *ev { + Event::DocumentStart => { + // do nothing + }, + Event::DocumentEnd => { + match self.doc_stack.len() { + // empty document + 0 => self.docs.push(Yaml::BadValue), + 1 => self.docs.push(self.doc_stack.pop().unwrap()), + _ => unreachable!() + } + }, + Event::SequenceStart => { + self.doc_stack.push(Yaml::Array(Vec::new())); + }, + Event::SequenceEnd => { + let node = self.doc_stack.pop().unwrap(); + self.insert_new_node(node); + }, + Event::MappingStart => { + self.doc_stack.push(Yaml::Hash(Hash::new())); + self.key_stack.push(Yaml::BadValue); + }, + Event::MappingEnd => { + self.key_stack.pop().unwrap(); + let node = self.doc_stack.pop().unwrap(); + self.insert_new_node(node); + }, + Event::Scalar(ref v, style) => { + let node = if style != TScalarStyle::Plain { + Yaml::String(v.clone()) + } else { + match v.as_ref() { + "~" => Yaml::Null, + "true" => Yaml::Boolean(true), + "false" => Yaml::Boolean(false), + // try parsing as f64 + _ if v.parse::().is_ok() => Yaml::Number(v.clone()), + _ => Yaml::String(v.clone()) + } + }; + + self.insert_new_node(node); + }, + _ => { /* ignore */ } + } + // println!("DOC {:?}", self.doc_stack); + } +} + +impl YamlLoader { + fn insert_new_node(&mut self, node: Yaml) { + if !self.doc_stack.is_empty() { + let parent = self.doc_stack.last_mut().unwrap(); + match *parent { + Yaml::Array(ref mut v) => v.push(node), + Yaml::Hash(ref mut h) => { + let mut cur_key = self.key_stack.last_mut().unwrap(); + // current node is a key + if cur_key.is_badvalue() { + *cur_key = node; + // current node is a value + } else { + let mut newkey = Yaml::BadValue; + mem::swap(&mut newkey, cur_key); + h.insert(newkey, node); + } + }, + _ => unreachable!(), + } + } else { + self.doc_stack.push(node); + } + } + + pub fn load_from_str(source: &str) -> Result, ScanError>{ + let mut loader = YamlLoader { + docs: Vec::new(), + doc_stack: Vec::new(), + key_stack: Vec::new(), + }; + let mut parser = Parser::new(source.chars()); + try!(parser.load(&mut loader, true)); + Ok(loader.docs) + } +} + macro_rules! define_as ( ($name:ident, $t:ident, $yt:ident) => ( pub fn $name(&self) -> Option<$t> { @@ -102,10 +202,11 @@ impl Index for Yaml { } + #[cfg(test)] mod test { use parser::Parser; - use yaml::Yaml; + use yaml::*; #[test] fn test_coerce() { let s = "--- @@ -113,13 +214,51 @@ a: 1 b: 2.2 c: [1, 2] "; - let mut parser = Parser::new(s.chars()); - let out = parser.load().unwrap(); - assert_eq!(out["a"].as_number::().unwrap(), 1); - assert_eq!(out["b"].as_number::().unwrap(), 2.2f32); - assert_eq!(out["c"][1].as_number::().unwrap(), 2); - assert!(out["d"][0].is_badvalue()); - //assert_eq!(out.as_hash().unwrap()[&Yaml::String("a".to_string())].as_i64().unwrap(), 1i64); + let out = YamlLoader::load_from_str(&s).unwrap(); + let doc = &out[0]; + assert_eq!(doc["a"].as_number::().unwrap(), 1); + assert_eq!(doc["b"].as_number::().unwrap(), 2.2f32); + assert_eq!(doc["c"][1].as_number::().unwrap(), 2); + assert!(doc["d"][0].is_badvalue()); } + + #[test] + fn test_parser() { + let s: String = " +# comment +a0 bb: val +a1: + b1: 4 + b2: d +a2: 4 # i'm comment +a3: [1, 2, 3] +a4: + - - a1 + - a2 + - 2 +a5: 'single_quoted' +a6: \"double_quoted\" +a7: 你好 +".to_string(); + let out = YamlLoader::load_from_str(&s).unwrap(); + let doc = &out[0]; + println!("DOC {:?}", doc); + assert_eq!(doc["a7"].as_str().unwrap(), "你好"); + } + + #[test] + fn test_multi_doc() { + let s = +" +'a scalar' +--- +'a scalar' +--- +'a scalar' +"; + let out = YamlLoader::load_from_str(&s).unwrap(); + assert_eq!(out.len(), 3); + } + } diff --git a/saphyr/tests/spec_test.rs b/saphyr/tests/spec_test.rs deleted file mode 100644 index c5910b0..0000000 --- a/saphyr/tests/spec_test.rs +++ /dev/null @@ -1,66 +0,0 @@ -#![allow(dead_code)] -extern crate yaml_rust; - -use yaml_rust::parser::Parser; -use yaml_rust::yaml::Yaml; - - -#[derive(Clone, PartialEq, PartialOrd, Debug)] -enum TestEvent { - OnDocumentStart, - OnDocumentEnd, - OnSequenceStart, - OnSequenceEnd, - OnMapStart, - OnMapEnd, - OnScalar, - OnAlias, - OnNull, -} - -fn yaml_to_test_events(docs: &Vec) -> Vec { - println!("DOCS {:?}", docs); - fn next(root: &Yaml, evs: &mut Vec) { - match *root { - Yaml::BadValue => { panic!("unexpected BadValue"); }, - Yaml::Null => { evs.push(TestEvent::OnNull); }, - Yaml::Array(ref v) => { - evs.push(TestEvent::OnSequenceStart); - for e in v { - next(e, evs); - } - evs.push(TestEvent::OnSequenceEnd); - }, - Yaml::Hash(ref v) => { - evs.push(TestEvent::OnMapStart); - for (k, v) in v { - next(k, evs); - next(v, evs); - } - evs.push(TestEvent::OnMapEnd); - }, - _ => { evs.push(TestEvent::OnScalar); } - } - } - let mut evs: Vec = Vec::new(); - for doc in docs { - evs.push(TestEvent::OnDocumentStart); - next(doc, &mut evs); - evs.push(TestEvent::OnDocumentEnd); - } - evs -} - -macro_rules! assert_next { - ($v:expr, $p:pat) => ( - match $v.next().unwrap() { - $p => {}, - e => { panic!("unexpected event: {:?}", e); } - } - ) -} - -// auto generated from handler_spec_test.cpp -include!("specexamples.rs.inc"); -include!("spec_test.rs.inc"); - diff --git a/saphyr/tests/spec_test.rs.inc b/saphyr/tests/spec_test.rs.inc index 3282454..a359e7b 100644 --- a/saphyr/tests/spec_test.rs.inc +++ b/saphyr/tests/spec_test.rs.inc @@ -1,7 +1,6 @@ #[test] fn test_ex2_1_seq_scalars() { - let mut p = Parser::new(EX2_1.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX2_1).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnSequenceStart); assert_next!(v, TestEvent::OnScalar); @@ -13,8 +12,7 @@ fn test_ex2_1_seq_scalars() { #[test] fn test_ex2_2_mapping_scalars_to_scalars() { - let mut p = Parser::new(EX2_2.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX2_2).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -29,8 +27,7 @@ fn test_ex2_2_mapping_scalars_to_scalars() { #[test] fn test_ex2_3_mapping_scalars_to_sequences() { - let mut p = Parser::new(EX2_3.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX2_3).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -51,8 +48,7 @@ fn test_ex2_3_mapping_scalars_to_sequences() { #[test] fn test_ex2_4_sequence_of_mappings() { - let mut p = Parser::new(EX2_4.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX2_4).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnSequenceStart); assert_next!(v, TestEvent::OnMapStart); @@ -77,8 +73,7 @@ fn test_ex2_4_sequence_of_mappings() { #[test] fn test_ex2_5_sequence_of_sequences() { - let mut p = Parser::new(EX2_5.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX2_5).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnSequenceStart); assert_next!(v, TestEvent::OnSequenceStart); @@ -102,8 +97,7 @@ fn test_ex2_5_sequence_of_sequences() { #[test] fn test_ex2_6_mapping_of_mappings() { - let mut p = Parser::new(EX2_6.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX2_6).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -126,8 +120,7 @@ fn test_ex2_6_mapping_of_mappings() { #[test] fn test_ex2_7_two_documents_in_a_stream() { - let mut p = Parser::new(EX2_7.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX2_7).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnSequenceStart); assert_next!(v, TestEvent::OnScalar); @@ -145,8 +138,7 @@ fn test_ex2_7_two_documents_in_a_stream() { #[test] fn test_ex2_8_play_by_play_feed() { - let mut p = Parser::new(EX2_8.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX2_8).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -171,8 +163,7 @@ fn test_ex2_8_play_by_play_feed() { #[test] fn test_ex2_9_single_document_with_two_comments() { - let mut p = Parser::new(EX2_9.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX2_9).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -191,8 +182,7 @@ fn test_ex2_9_single_document_with_two_comments() { #[test] fn test_ex2_10_simple_anchor() { - let mut p = Parser::new(EX2_10.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX2_10).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -211,8 +201,7 @@ fn test_ex2_10_simple_anchor() { #[test] fn test_ex2_11_mapping_between_sequences() { - let mut p = Parser::new(EX2_11.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX2_11).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnSequenceStart); @@ -237,8 +226,7 @@ fn test_ex2_11_mapping_between_sequences() { #[test] fn test_ex2_12_compact_nested_mapping() { - let mut p = Parser::new(EX2_12.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX2_12).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnSequenceStart); assert_next!(v, TestEvent::OnMapStart); @@ -265,8 +253,7 @@ fn test_ex2_12_compact_nested_mapping() { #[test] fn test_ex2_13_in_literals_newlines_are_preserved() { - let mut p = Parser::new(EX2_13.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX2_13).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnScalar); assert_next!(v, TestEvent::OnDocumentEnd); @@ -274,8 +261,7 @@ fn test_ex2_13_in_literals_newlines_are_preserved() { #[test] fn test_ex2_14_in_folded_scalars_newlines_become_spaces() { - let mut p = Parser::new(EX2_14.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX2_14).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnScalar); assert_next!(v, TestEvent::OnDocumentEnd); @@ -283,8 +269,7 @@ fn test_ex2_14_in_folded_scalars_newlines_become_spaces() { #[test] fn test_ex2_15_folded_newlines_are_preserved_for_more_indented_and_blank_lines() { - let mut p = Parser::new(EX2_15.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX2_15).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnScalar); assert_next!(v, TestEvent::OnDocumentEnd); @@ -292,8 +277,7 @@ fn test_ex2_15_folded_newlines_are_preserved_for_more_indented_and_blank_lines() #[test] fn test_ex2_16_indentation_determines_scope() { - let mut p = Parser::new(EX2_16.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX2_16).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -308,8 +292,7 @@ fn test_ex2_16_indentation_determines_scope() { #[test] fn test_ex2_17_quoted_scalars() { - let mut p = Parser::new(EX2_17.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX2_17).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -330,8 +313,7 @@ fn test_ex2_17_quoted_scalars() { #[test] fn test_ex2_18_multi_line_flow_scalars() { - let mut p = Parser::new(EX2_18.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX2_18).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -344,8 +326,7 @@ fn test_ex2_18_multi_line_flow_scalars() { #[test] fn test_ex2_23_various_explicit_tags() { - let mut p = Parser::new(EX2_23.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX2_23).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -360,8 +341,7 @@ fn test_ex2_23_various_explicit_tags() { #[test] fn test_ex2_24_global_tags() { - let mut p = Parser::new(EX2_24.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX2_24).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnSequenceStart); assert_next!(v, TestEvent::OnMapStart); @@ -400,8 +380,7 @@ fn test_ex2_24_global_tags() { #[test] fn test_ex2_25_unordered_sets() { - let mut p = Parser::new(EX2_25.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX2_25).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -416,8 +395,7 @@ fn test_ex2_25_unordered_sets() { #[test] fn test_ex2_26_ordered_mappings() { - let mut p = Parser::new(EX2_26.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX2_26).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnSequenceStart); assert_next!(v, TestEvent::OnMapStart); @@ -438,8 +416,7 @@ fn test_ex2_26_ordered_mappings() { #[test] fn test_ex2_27_invoice() { - let mut p = Parser::new(EX2_27.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX2_27).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -501,8 +478,7 @@ fn test_ex2_27_invoice() { #[test] fn test_ex2_28_log_file() { - let mut p = Parser::new(EX2_28.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX2_28).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -556,8 +532,7 @@ fn test_ex2_28_log_file() { #[test] fn test_ex5_3_block_structure_indicators() { - let mut p = Parser::new(EX5_3.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX5_3).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -578,8 +553,7 @@ fn test_ex5_3_block_structure_indicators() { #[test] fn test_ex5_4_flow_structure_indicators() { - let mut p = Parser::new(EX5_4.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX5_4).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -600,8 +574,7 @@ fn test_ex5_4_flow_structure_indicators() { #[test] fn test_ex5_6_node_property_indicators() { - let mut p = Parser::new(EX5_6.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX5_6).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -614,8 +587,7 @@ fn test_ex5_6_node_property_indicators() { #[test] fn test_ex5_7_block_scalar_indicators() { - let mut p = Parser::new(EX5_7.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX5_7).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -628,8 +600,7 @@ fn test_ex5_7_block_scalar_indicators() { #[test] fn test_ex5_8_quoted_scalar_indicators() { - let mut p = Parser::new(EX5_8.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX5_8).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -642,8 +613,7 @@ fn test_ex5_8_quoted_scalar_indicators() { #[test] fn test_ex5_11_line_break_characters() { - let mut p = Parser::new(EX5_11.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX5_11).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnScalar); assert_next!(v, TestEvent::OnDocumentEnd); @@ -651,8 +621,7 @@ fn test_ex5_11_line_break_characters() { #[test] fn test_ex5_12_tabs_and_spaces() { - let mut p = Parser::new(EX5_12.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX5_12).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -665,8 +634,7 @@ fn test_ex5_12_tabs_and_spaces() { #[test] fn test_ex5_13_escaped_characters() { - let mut p = Parser::new(EX5_13.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX5_13).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnScalar); assert_next!(v, TestEvent::OnDocumentEnd); @@ -674,8 +642,7 @@ fn test_ex5_13_escaped_characters() { #[test] fn test_ex6_1_indentation_spaces() { - let mut p = Parser::new(EX6_1.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX6_1).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -695,8 +662,7 @@ fn test_ex6_1_indentation_spaces() { #[test] fn test_ex6_2_indentation_indicators() { - let mut p = Parser::new(EX6_2.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX6_2).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -713,8 +679,7 @@ fn test_ex6_2_indentation_indicators() { #[test] fn test_ex6_3_separation_spaces() { - let mut p = Parser::new(EX6_3.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX6_3).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnSequenceStart); assert_next!(v, TestEvent::OnMapStart); @@ -731,8 +696,7 @@ fn test_ex6_3_separation_spaces() { #[test] fn test_ex6_4_line_prefixes() { - let mut p = Parser::new(EX6_4.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX6_4).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -747,8 +711,7 @@ fn test_ex6_4_line_prefixes() { #[test] fn test_ex6_5_empty_lines() { - let mut p = Parser::new(EX6_5.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX6_5).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -761,8 +724,7 @@ fn test_ex6_5_empty_lines() { #[test] fn test_ex6_6_line_folding() { - let mut p = Parser::new(EX6_6.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX6_6).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnScalar); assert_next!(v, TestEvent::OnDocumentEnd); @@ -770,8 +732,7 @@ fn test_ex6_6_line_folding() { #[test] fn test_ex6_7_block_folding() { - let mut p = Parser::new(EX6_7.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX6_7).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnScalar); assert_next!(v, TestEvent::OnDocumentEnd); @@ -779,8 +740,7 @@ fn test_ex6_7_block_folding() { #[test] fn test_ex6_8_flow_folding() { - let mut p = Parser::new(EX6_8.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX6_8).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnScalar); assert_next!(v, TestEvent::OnDocumentEnd); @@ -788,8 +748,7 @@ fn test_ex6_8_flow_folding() { #[test] fn test_ex6_9_separated_comment() { - let mut p = Parser::new(EX6_9.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX6_9).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -800,8 +759,7 @@ fn test_ex6_9_separated_comment() { #[test] fn test_ex6_12_separation_spaces_ii() { - let mut p = Parser::new(EX6_12.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX6_12).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnMapStart); @@ -822,8 +780,7 @@ fn test_ex6_12_separation_spaces_ii() { #[test] fn test_ex6_13_reserved_directives() { - let mut p = Parser::new(EX6_13.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX6_13).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnScalar); assert_next!(v, TestEvent::OnDocumentEnd); @@ -831,8 +788,7 @@ fn test_ex6_13_reserved_directives() { #[test] fn test_ex6_14_yaml_directive() { - let mut p = Parser::new(EX6_14.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX6_14).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnScalar); assert_next!(v, TestEvent::OnDocumentEnd); @@ -840,8 +796,7 @@ fn test_ex6_14_yaml_directive() { #[test] fn test_ex6_16_tag_directive() { - let mut p = Parser::new(EX6_16.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX6_16).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnScalar); assert_next!(v, TestEvent::OnDocumentEnd); @@ -849,8 +804,7 @@ fn test_ex6_16_tag_directive() { #[test] fn test_ex6_18_primary_tag_handle() { - let mut p = Parser::new(EX6_18.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX6_18).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnScalar); assert_next!(v, TestEvent::OnDocumentEnd); @@ -861,8 +815,7 @@ fn test_ex6_18_primary_tag_handle() { #[test] fn test_ex6_19_secondary_tag_handle() { - let mut p = Parser::new(EX6_19.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX6_19).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnScalar); assert_next!(v, TestEvent::OnDocumentEnd); @@ -870,8 +823,7 @@ fn test_ex6_19_secondary_tag_handle() { #[test] fn test_ex6_20_tag_handles() { - let mut p = Parser::new(EX6_20.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX6_20).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnScalar); assert_next!(v, TestEvent::OnDocumentEnd); @@ -879,8 +831,7 @@ fn test_ex6_20_tag_handles() { #[test] fn test_ex6_21_local_tag_prefix() { - let mut p = Parser::new(EX6_21.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX6_21).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnScalar); assert_next!(v, TestEvent::OnDocumentEnd); @@ -891,8 +842,7 @@ fn test_ex6_21_local_tag_prefix() { #[test] fn test_ex6_22_global_tag_prefix() { - let mut p = Parser::new(EX6_22.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX6_22).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnSequenceStart); assert_next!(v, TestEvent::OnScalar); @@ -902,8 +852,7 @@ fn test_ex6_22_global_tag_prefix() { #[test] fn test_ex6_23_node_properties() { - let mut p = Parser::new(EX6_23.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX6_23).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -916,8 +865,7 @@ fn test_ex6_23_node_properties() { #[test] fn test_ex6_24_verbatim_tags() { - let mut p = Parser::new(EX6_24.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX6_24).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -928,8 +876,7 @@ fn test_ex6_24_verbatim_tags() { #[test] fn test_ex6_26_tag_shorthands() { - let mut p = Parser::new(EX6_26.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX6_26).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnSequenceStart); assert_next!(v, TestEvent::OnScalar); @@ -941,8 +888,7 @@ fn test_ex6_26_tag_shorthands() { #[test] fn test_ex6_28_non_specific_tags() { - let mut p = Parser::new(EX6_28.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX6_28).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnSequenceStart); assert_next!(v, TestEvent::OnScalar); @@ -954,8 +900,7 @@ fn test_ex6_28_non_specific_tags() { #[test] fn test_ex6_29_node_anchors() { - let mut p = Parser::new(EX6_29.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX6_29).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -968,8 +913,7 @@ fn test_ex6_29_node_anchors() { #[test] fn test_ex7_1_alias_nodes() { - let mut p = Parser::new(EX7_1.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX7_1).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -986,8 +930,7 @@ fn test_ex7_1_alias_nodes() { #[test] fn test_ex7_2_empty_nodes() { - let mut p = Parser::new(EX7_2.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX7_2).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -1000,8 +943,7 @@ fn test_ex7_2_empty_nodes() { #[test] fn test_ex7_3_completely_empty_nodes() { - let mut p = Parser::new(EX7_3.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX7_3).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -1014,8 +956,7 @@ fn test_ex7_3_completely_empty_nodes() { #[test] fn test_ex7_4_double_quoted_implicit_keys() { - let mut p = Parser::new(EX7_4.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX7_4).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -1031,8 +972,7 @@ fn test_ex7_4_double_quoted_implicit_keys() { #[test] fn test_ex7_5_double_quoted_line_breaks() { - let mut p = Parser::new(EX7_5.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX7_5).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnScalar); assert_next!(v, TestEvent::OnDocumentEnd); @@ -1040,8 +980,7 @@ fn test_ex7_5_double_quoted_line_breaks() { #[test] fn test_ex7_6_double_quoted_lines() { - let mut p = Parser::new(EX7_6.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX7_6).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnScalar); assert_next!(v, TestEvent::OnDocumentEnd); @@ -1049,8 +988,7 @@ fn test_ex7_6_double_quoted_lines() { #[test] fn test_ex7_7_single_quoted_characters() { - let mut p = Parser::new(EX7_7.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX7_7).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnScalar); assert_next!(v, TestEvent::OnDocumentEnd); @@ -1058,8 +996,7 @@ fn test_ex7_7_single_quoted_characters() { #[test] fn test_ex7_8_single_quoted_implicit_keys() { - let mut p = Parser::new(EX7_8.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX7_8).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -1075,8 +1012,7 @@ fn test_ex7_8_single_quoted_implicit_keys() { #[test] fn test_ex7_9_single_quoted_lines() { - let mut p = Parser::new(EX7_9.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX7_9).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnScalar); assert_next!(v, TestEvent::OnDocumentEnd); @@ -1084,8 +1020,7 @@ fn test_ex7_9_single_quoted_lines() { #[test] fn test_ex7_10_plain_characters() { - let mut p = Parser::new(EX7_10.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX7_10).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnSequenceStart); assert_next!(v, TestEvent::OnScalar); @@ -1106,8 +1041,7 @@ fn test_ex7_10_plain_characters() { #[test] fn test_ex7_11_plain_implicit_keys() { - let mut p = Parser::new(EX7_11.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX7_11).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -1123,8 +1057,7 @@ fn test_ex7_11_plain_implicit_keys() { #[test] fn test_ex7_12_plain_lines() { - let mut p = Parser::new(EX7_12.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX7_12).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnScalar); assert_next!(v, TestEvent::OnDocumentEnd); @@ -1132,8 +1065,7 @@ fn test_ex7_12_plain_lines() { #[test] fn test_ex7_13_flow_sequence() { - let mut p = Parser::new(EX7_13.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX7_13).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnSequenceStart); assert_next!(v, TestEvent::OnSequenceStart); @@ -1150,8 +1082,7 @@ fn test_ex7_13_flow_sequence() { #[test] fn test_ex7_14_flow_sequence_entries() { - let mut p = Parser::new(EX7_14.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX7_14).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnSequenceStart); assert_next!(v, TestEvent::OnScalar); @@ -1170,8 +1101,7 @@ fn test_ex7_14_flow_sequence_entries() { #[test] fn test_ex7_15_flow_mappings() { - let mut p = Parser::new(EX7_15.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX7_15).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnSequenceStart); assert_next!(v, TestEvent::OnMapStart); @@ -1192,8 +1122,7 @@ fn test_ex7_15_flow_mappings() { #[test] fn test_ex7_16_flow_mapping_entries() { - let mut p = Parser::new(EX7_16.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX7_16).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -1208,8 +1137,7 @@ fn test_ex7_16_flow_mapping_entries() { #[test] fn test_ex7_17_flow_mapping_separate_values() { - let mut p = Parser::new(EX7_17.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX7_17).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -1226,8 +1154,7 @@ fn test_ex7_17_flow_mapping_separate_values() { #[test] fn test_ex7_18_flow_mapping_adjacent_values() { - let mut p = Parser::new(EX7_18.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX7_18).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -1242,8 +1169,7 @@ fn test_ex7_18_flow_mapping_adjacent_values() { #[test] fn test_ex7_19_single_pair_flow_mappings() { - let mut p = Parser::new(EX7_19.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX7_19).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnSequenceStart); assert_next!(v, TestEvent::OnMapStart); @@ -1256,8 +1182,7 @@ fn test_ex7_19_single_pair_flow_mappings() { #[test] fn test_ex7_20_single_pair_explicit_entry() { - let mut p = Parser::new(EX7_20.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX7_20).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnSequenceStart); assert_next!(v, TestEvent::OnMapStart); @@ -1270,8 +1195,7 @@ fn test_ex7_20_single_pair_explicit_entry() { #[test] fn test_ex7_21_single_pair_implicit_entries() { - let mut p = Parser::new(EX7_21.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX7_21).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnSequenceStart); assert_next!(v, TestEvent::OnSequenceStart); @@ -1301,8 +1225,7 @@ fn test_ex7_21_single_pair_implicit_entries() { #[test] fn test_ex7_23_flow_content() { - let mut p = Parser::new(EX7_23.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX7_23).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnSequenceStart); assert_next!(v, TestEvent::OnSequenceStart); @@ -1322,8 +1245,7 @@ fn test_ex7_23_flow_content() { #[test] fn test_ex7_24_flow_nodes() { - let mut p = Parser::new(EX7_24.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX7_24).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnSequenceStart); assert_next!(v, TestEvent::OnScalar); @@ -1337,8 +1259,7 @@ fn test_ex7_24_flow_nodes() { #[test] fn test_ex8_1_block_scalar_header() { - let mut p = Parser::new(EX8_1.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX8_1).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnSequenceStart); assert_next!(v, TestEvent::OnScalar); @@ -1351,8 +1272,7 @@ fn test_ex8_1_block_scalar_header() { #[test] fn test_ex8_2_block_indentation_header() { - let mut p = Parser::new(EX8_2.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX8_2).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnSequenceStart); assert_next!(v, TestEvent::OnScalar); @@ -1365,8 +1285,7 @@ fn test_ex8_2_block_indentation_header() { #[test] fn test_ex8_4_chomping_final_line_break() { - let mut p = Parser::new(EX8_4.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX8_4).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -1381,8 +1300,7 @@ fn test_ex8_4_chomping_final_line_break() { #[test] fn test_ex8_6_empty_scalar_chomping() { - let mut p = Parser::new(EX8_6.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX8_6).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -1397,8 +1315,7 @@ fn test_ex8_6_empty_scalar_chomping() { #[test] fn test_ex8_7_literal_scalar() { - let mut p = Parser::new(EX8_7.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX8_7).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnScalar); assert_next!(v, TestEvent::OnDocumentEnd); @@ -1406,8 +1323,7 @@ fn test_ex8_7_literal_scalar() { #[test] fn test_ex8_8_literal_content() { - let mut p = Parser::new(EX8_8.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX8_8).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnScalar); assert_next!(v, TestEvent::OnDocumentEnd); @@ -1415,8 +1331,7 @@ fn test_ex8_8_literal_content() { #[test] fn test_ex8_9_folded_scalar() { - let mut p = Parser::new(EX8_9.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX8_9).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnScalar); assert_next!(v, TestEvent::OnDocumentEnd); @@ -1424,8 +1339,7 @@ fn test_ex8_9_folded_scalar() { #[test] fn test_ex8_10_folded_lines() { - let mut p = Parser::new(EX8_10.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX8_10).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnScalar); assert_next!(v, TestEvent::OnDocumentEnd); @@ -1433,8 +1347,7 @@ fn test_ex8_10_folded_lines() { #[test] fn test_ex8_11_more_indented_lines() { - let mut p = Parser::new(EX8_11.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX8_11).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnScalar); assert_next!(v, TestEvent::OnDocumentEnd); @@ -1442,8 +1355,7 @@ fn test_ex8_11_more_indented_lines() { #[test] fn test_ex8_12_empty_separation_lines() { - let mut p = Parser::new(EX8_12.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX8_12).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnScalar); assert_next!(v, TestEvent::OnDocumentEnd); @@ -1451,8 +1363,7 @@ fn test_ex8_12_empty_separation_lines() { #[test] fn test_ex8_13_final_empty_lines() { - let mut p = Parser::new(EX8_13.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX8_13).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnScalar); assert_next!(v, TestEvent::OnDocumentEnd); @@ -1460,8 +1371,7 @@ fn test_ex8_13_final_empty_lines() { #[test] fn test_ex8_14_block_sequence() { - let mut p = Parser::new(EX8_14.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX8_14).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -1478,8 +1388,7 @@ fn test_ex8_14_block_sequence() { #[test] fn test_ex8_15_block_sequence_entry_types() { - let mut p = Parser::new(EX8_15.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX8_15).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnSequenceStart); assert_next!(v, TestEvent::OnNull); @@ -1498,8 +1407,7 @@ fn test_ex8_15_block_sequence_entry_types() { #[test] fn test_ex8_16_block_mappings() { - let mut p = Parser::new(EX8_16.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX8_16).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -1513,8 +1421,7 @@ fn test_ex8_16_block_mappings() { #[test] fn test_ex8_17_explicit_block_mapping_entries() { - let mut p = Parser::new(EX8_17.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX8_17).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -1530,8 +1437,7 @@ fn test_ex8_17_explicit_block_mapping_entries() { #[test] fn test_ex8_18_implicit_block_mapping_entries() { - let mut p = Parser::new(EX8_18.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX8_18).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); @@ -1548,8 +1454,7 @@ fn test_ex8_18_implicit_block_mapping_entries() { #[test] fn test_ex8_19_compact_block_mappings() { - let mut p = Parser::new(EX8_19.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX8_19).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnSequenceStart); assert_next!(v, TestEvent::OnMapStart); @@ -1572,8 +1477,7 @@ fn test_ex8_19_compact_block_mappings() { #[test] fn test_ex8_20_block_node_types() { - let mut p = Parser::new(EX8_20.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX8_20).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnSequenceStart); assert_next!(v, TestEvent::OnScalar); @@ -1588,8 +1492,7 @@ fn test_ex8_20_block_node_types() { #[test] fn test_ex8_22_block_collection_nodes() { - let mut p = Parser::new(EX8_22.chars()); - let mut v = yaml_to_test_events(&p.load_multidoc().unwrap()).into_iter(); + let mut v = str_to_test_events(EX8_22).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); assert_next!(v, TestEvent::OnMapStart); assert_next!(v, TestEvent::OnScalar); From 032a595dde5eb03a100d8431b386e29e4e5e3f46 Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Wed, 27 May 2015 16:35:13 +0800 Subject: [PATCH 015/380] Add indentless_sequence_entry --- saphyr/src/parser.rs | 28 +++++++++++++++++++++++++++- saphyr/src/scanner.rs | 40 ++++++++++++++++++++++++++++++++++++++-- 2 files changed, 65 insertions(+), 3 deletions(-) diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs index 4ffab44..dead8e4 100644 --- a/saphyr/src/parser.rs +++ b/saphyr/src/parser.rs @@ -199,7 +199,7 @@ impl> Parser { fn state_machine(&mut self) -> ParseResult { let next_tok = self.peek(); - //println!("cur_state {:?}, next tok: {:?}", self.state, next_tok); + println!("cur_state {:?}, next tok: {:?}", self.state, next_tok); match self.state { State::StreamStart => self.stream_start(), @@ -222,6 +222,8 @@ impl> Parser { State::FlowSequenceFirstEntry => self.flow_sequence_entry(true), State::FlowSequenceEntry => self.flow_sequence_entry(false), + State::IndentlessSequenceEntry => self.indentless_sequence_entry(), + _ => unimplemented!() } } @@ -464,6 +466,30 @@ impl> Parser { } } + fn indentless_sequence_entry(&mut self) -> ParseResult { + let mut tok = try!(self.peek()); + if tok.1 != TokenType::BlockEntryToken { + self.pop_state(); + return Ok(Event::SequenceEnd); + } + + self.skip(); + tok = try!(self.peek()); + match tok.1 { + TokenType::BlockEntryToken + | TokenType::KeyToken + | TokenType::ValueToken + | TokenType::BlockEndToken => { + self.state = State::IndentlessSequenceEntry; + Ok(Event::empty_scalar()) + }, + _ => { + self.push_state(State::IndentlessSequenceEntry); + self.parse_node(true, false) + } + } + } + fn block_sequence_entry(&mut self, first: bool) -> ParseResult { // BLOCK-SEQUENCE-START if first { diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index 3f55eda..e56fa8a 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -1,4 +1,5 @@ use std::collections::VecDeque; +use std::char; #[derive(Clone, Copy, PartialEq, Debug, Eq)] pub enum TEncoding { @@ -151,6 +152,21 @@ fn is_blankz(c: char) -> bool { fn is_digit(c: char) -> bool { c >= '0' && c <= '9' } +#[inline] +fn is_hex(c: char) -> bool { + (c >= '0' && c <= '9') + || (c >= 'a' && c <= 'f') + || (c >= 'A' && c <= 'F') +} +#[inline] +fn as_hex(c: char) -> u32 { + match c { + '0'...'9' => (c as u32) - ('0' as u32), + 'a'...'f' => (c as u32) - ('a' as u32) + 10, + 'A'...'F' => (c as u32) - ('A' as u32) + 10, + _ => unreachable!() + } +} pub type ScanResult = Result<(), ScanError>; @@ -804,9 +820,29 @@ impl> Scanner { self.skip(); // Consume an arbitrary escape code. if code_length > 0 { - let val = 0; + let val = 0usize; self.lookahead(code_length); - unimplemented!(); + let mut value = 0u32; + for i in 0..code_length { + if !is_hex(self.buffer[i]) { + return Err(ScanError::new(start_mark, + "while parsing a quoted scalar, did not find expected hexdecimal number")); + } + value = (value << 4) + as_hex(self.buffer[i]); + } + + let ch = match char::from_u32(value) { + Some(v) => v, + None => { + return Err(ScanError::new(start_mark, + "while parsing a quoted scalar, found invalid Unicode character escape code")); + } + }; + string.push(ch); + + for i in 0..code_length { + self.skip(); + } } }, c => { string.push(c); self.skip(); } From 44b1b631e22dc90485ac2bcb1355fdebb57fc857 Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Wed, 27 May 2015 21:34:52 +0800 Subject: [PATCH 016/380] Add flow_mapping_key --- saphyr/src/parser.rs | 74 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 73 insertions(+), 1 deletion(-) diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs index dead8e4..644121d 100644 --- a/saphyr/src/parser.rs +++ b/saphyr/src/parser.rs @@ -222,6 +222,10 @@ impl> Parser { State::FlowSequenceFirstEntry => self.flow_sequence_entry(true), State::FlowSequenceEntry => self.flow_sequence_entry(false), + State::FlowMappingFirstKey => self.flow_mapping_key(true), + State::FlowMappingKey => self.flow_mapping_key(false), + State::FlowMappingValue => self.flow_mapping_value(false), + State::IndentlessSequenceEntry => self.indentless_sequence_entry(), _ => unimplemented!() @@ -424,6 +428,74 @@ impl> Parser { } } + fn flow_mapping_key(&mut self, first: bool) -> ParseResult { + if first { + let _ = try!(self.peek()); + self.skip(); + } + let mut tok = try!(self.peek()); + + if tok.1 != TokenType::FlowMappingEndToken { + if !first { + if tok.1 == TokenType::FlowEntryToken { + self.skip(); + tok = try!(self.peek()); + } else { + return Err(ScanError::new(tok.0, + "while parsing a flow mapping, did not find expected ',' or '}'")); + } + } + + if tok.1 == TokenType::KeyToken { + self.skip(); + tok = try!(self.peek()); + match tok.1 { + TokenType::ValueToken + | TokenType::FlowEntryToken + | TokenType::FlowMappingEndToken => { + self.state = State::FlowMappingValue; + return Ok(Event::empty_scalar()); + }, + _ => { + self.push_state(State::FlowMappingValue); + return self.parse_node(false, false); + } + } + } else if (tok.1 != TokenType::FlowMappingEndToken) { + self.push_state(State::FlowMappingEmptyValue); + return self.parse_node(false, false); + } + } + + self.pop_state(); + self.skip(); + Ok(Event::MappingEnd) + } + + fn flow_mapping_value(&mut self, empty: bool) -> ParseResult { + let tok = try!(self.peek()); + if empty { + self.state = State::FlowMappingKey; + return Ok(Event::empty_scalar()); + } + + if tok.1 == TokenType::ValueToken { + self.skip(); + let mut tok = try!(self.peek()); + match tok.1 { + TokenType::FlowEntryToken + | TokenType::FlowMappingEndToken => { }, + _ => { + self.push_state(State::FlowMappingKey); + return self.parse_node(false, false); + } + } + } + + self.state = State::FlowMappingKey; + Ok(Event::empty_scalar()) + } + fn flow_sequence_entry(&mut self, first: bool) -> ParseResult { // skip FlowMappingStartToken if first { @@ -449,7 +521,7 @@ impl> Parser { _ => { /* next */ } } match tok.1 { - TokenType::FlowMappingEndToken => { + TokenType::FlowSequenceEndToken => { self.pop_state(); self.skip(); Ok(Event::SequenceEnd) From ef020f0f95795705130ea5f9111a9f06a47369e1 Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Wed, 27 May 2015 21:57:42 +0800 Subject: [PATCH 017/380] Change empty scalar definition --- saphyr/src/parser.rs | 3 +- saphyr/tests/spec_test.rs | 67 + saphyr/tests/specs/cpp2rust.rb | 66 + saphyr/tests/specs/handler_spec_test.cpp | 1532 ++++++++++++++++++++++ 4 files changed, 1667 insertions(+), 1 deletion(-) create mode 100644 saphyr/tests/spec_test.rs create mode 100755 saphyr/tests/specs/cpp2rust.rb create mode 100644 saphyr/tests/specs/handler_spec_test.cpp diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs index 644121d..fa8ce0a 100644 --- a/saphyr/src/parser.rs +++ b/saphyr/src/parser.rs @@ -46,7 +46,8 @@ pub enum Event { impl Event { fn empty_scalar() -> Event { - Event::Scalar(String::new(), TScalarStyle::Plain) + // a null scalar + Event::Scalar("~".to_string(), TScalarStyle::Plain) } } diff --git a/saphyr/tests/spec_test.rs b/saphyr/tests/spec_test.rs new file mode 100644 index 0000000..11f5fe1 --- /dev/null +++ b/saphyr/tests/spec_test.rs @@ -0,0 +1,67 @@ +#![allow(dead_code)] +extern crate yaml_rust; + +use yaml_rust::parser::{Parser, EventReceiver, Event}; +use yaml_rust::yaml::Yaml; + +#[derive(Clone, PartialEq, PartialOrd, Debug)] +enum TestEvent { + OnDocumentStart, + OnDocumentEnd, + OnSequenceStart, + OnSequenceEnd, + OnMapStart, + OnMapEnd, + OnScalar, + OnAlias, + OnNull, +} + +struct YamlChecker { + pub evs: Vec +} + +impl EventReceiver for YamlChecker { + fn on_event(&mut self, ev: &Event) { + let tev = match *ev { + Event::DocumentStart => TestEvent::OnDocumentStart, + Event::DocumentEnd => TestEvent::OnDocumentEnd, + Event::SequenceStart => TestEvent::OnSequenceStart, + Event::SequenceEnd => TestEvent::OnSequenceEnd, + Event::MappingStart => TestEvent::OnMapStart, + Event::MappingEnd => TestEvent::OnMapEnd, + Event::Scalar(ref v, style) => { + if v == "~" { + TestEvent::OnNull + } else { + TestEvent::OnScalar + } + }, + _ => { return } // ignore other events + }; + self.evs.push(tev); + } +} + +fn str_to_test_events(docs: &str) -> Vec { + let mut p = YamlChecker { + evs: Vec::new() + }; + let mut parser = Parser::new(docs.chars()); + parser.load(&mut p, true).unwrap(); + p.evs +} + +macro_rules! assert_next { + ($v:expr, $p:pat) => ( + match $v.next().unwrap() { + $p => {}, + e => { panic!("unexpected event: {:?}", e); } + } + ) +} + +// auto generated from handler_spec_test.cpp +include!("specexamples.rs.inc"); +include!("spec_test.rs.inc"); + diff --git a/saphyr/tests/specs/cpp2rust.rb b/saphyr/tests/specs/cpp2rust.rb new file mode 100755 index 0000000..20c8575 --- /dev/null +++ b/saphyr/tests/specs/cpp2rust.rb @@ -0,0 +1,66 @@ +#!/usr/bin/env ruby + +TEST_REGEX = /TEST_F\([a-zA-Z0-9_]+,\s+([a-zA-Z0-9_]+)\)/ + +class Context + attr_accessor :name, :ev, :src + def initialize + @name = "" + @src = "" + @ev = [] + end +end + +class String + def snakecase + self + .gsub(/([A-Z]+)([A-Z][a-z])/, '\1_\2') + .gsub(/([a-z\d])([A-Z])/, '\1_\2') + .tr('-', '_') + .gsub(/\s/, '_') + .gsub(/__+/, '_') + .downcase + end +end + +ctx = nil + +tests = [] +IO.foreach(ARGV[0]) do |line| + line.strip! + if ctx + fail "unexpected TEST_F" if line =~ TEST_REGEX + if line =~ /^}/ + tests << ctx + ctx = nil + end + if line =~ /^EXPECT_CALL/ + fail 'not end with ;' unless line[-1] == ';' + v = line.gsub('(', ' ').gsub(')', ' ').split + ctx.ev << v[2] + end + else + next unless line =~ TEST_REGEX + name = $1 + next unless name =~ /^(Ex\d+_\d+)/ + str = $1.upcase + $stderr.puts "found #{name}" + ctx = Context.new + ctx.name = "test_#{name.snakecase}" + ctx.src = str + end +end + +# code gen +tests.each do |t| + next if t.ev.size == 0 + puts "#[test]" + puts "fn #{t.name}() {" + puts " let mut v = str_to_test_events(#{t.src}).into_iter();" + t.ev.each do |e| + puts " assert_next!(v, TestEvent::#{e});" + end + puts "}" + puts +end + diff --git a/saphyr/tests/specs/handler_spec_test.cpp b/saphyr/tests/specs/handler_spec_test.cpp new file mode 100644 index 0000000..aa4f7ca --- /dev/null +++ b/saphyr/tests/specs/handler_spec_test.cpp @@ -0,0 +1,1532 @@ +#include "handler_test.h" +#include "specexamples.h" // IWYU pragma: keep +#include "yaml-cpp/yaml.h" // IWYU pragma: keep + +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +using ::testing::_; + +#define EXPECT_THROW_PARSER_EXCEPTION(statement, message) \ + ASSERT_THROW(statement, ParserException); \ + try { \ + statement; \ + } catch (const ParserException& e) { \ + EXPECT_EQ(e.msg, message); \ + } + +namespace YAML { +namespace { + +typedef HandlerTest HandlerSpecTest; + +TEST_F(HandlerSpecTest, Ex2_1_SeqScalars) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "Mark McGwire")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "Sammy Sosa")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "Ken Griffey")); + EXPECT_CALL(handler, OnSequenceEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex2_1); +} + +TEST_F(HandlerSpecTest, Ex2_2_MappingScalarsToScalars) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "hr")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "65")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "avg")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "0.278")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "rbi")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "147")); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex2_2); +} + +TEST_F(HandlerSpecTest, Ex2_3_MappingScalarsToSequences) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "american")); + EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "Boston Red Sox")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "Detroit Tigers")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "New York Yankees")); + EXPECT_CALL(handler, OnSequenceEnd()); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "national")); + EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "New York Mets")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "Chicago Cubs")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "Atlanta Braves")); + EXPECT_CALL(handler, OnSequenceEnd()); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex2_3); +} + +TEST_F(HandlerSpecTest, Ex2_4_SequenceOfMappings) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "name")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "Mark McGwire")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "hr")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "65")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "avg")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "0.278")); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "name")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "Sammy Sosa")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "hr")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "63")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "avg")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "0.288")); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnSequenceEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex2_4); +} + +TEST_F(HandlerSpecTest, Ex2_5_SequenceOfSequences) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Flow)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "name")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "hr")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "avg")); + EXPECT_CALL(handler, OnSequenceEnd()); + EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Flow)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "Mark McGwire")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "65")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "0.278")); + EXPECT_CALL(handler, OnSequenceEnd()); + EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Flow)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "Sammy Sosa")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "63")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "0.288")); + EXPECT_CALL(handler, OnSequenceEnd()); + EXPECT_CALL(handler, OnSequenceEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex2_5); +} + +TEST_F(HandlerSpecTest, Ex2_6_MappingOfMappings) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "Mark McGwire")); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Flow)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "hr")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "65")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "avg")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "0.278")); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "Sammy Sosa")); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Flow)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "hr")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "63")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "avg")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "0.288")); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex2_6); +} + +TEST_F(HandlerSpecTest, Ex2_7_TwoDocumentsInAStream) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "Mark McGwire")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "Sammy Sosa")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "Ken Griffey")); + EXPECT_CALL(handler, OnSequenceEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "Chicago Cubs")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "St Louis Cardinals")); + EXPECT_CALL(handler, OnSequenceEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex2_7); +} + +TEST_F(HandlerSpecTest, Ex2_8_PlayByPlayFeed) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "time")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "20:03:20")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "player")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "Sammy Sosa")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "action")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "strike (miss)")); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "time")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "20:03:47")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "player")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "Sammy Sosa")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "action")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "grand slam")); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex2_8); +} + +TEST_F(HandlerSpecTest, Ex2_9_SingleDocumentWithTwoComments) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "hr")); + EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "Mark McGwire")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "Sammy Sosa")); + EXPECT_CALL(handler, OnSequenceEnd()); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "rbi")); + EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "Sammy Sosa")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "Ken Griffey")); + EXPECT_CALL(handler, OnSequenceEnd()); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex2_9); +} + +TEST_F(HandlerSpecTest, Ex2_10_SimpleAnchor) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "hr")); + EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "Mark McGwire")); + EXPECT_CALL(handler, OnScalar(_, "?", 1, "Sammy Sosa")); + EXPECT_CALL(handler, OnSequenceEnd()); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "rbi")); + EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnAlias(_, 1)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "Ken Griffey")); + EXPECT_CALL(handler, OnSequenceEnd()); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex2_10); +} + +TEST_F(HandlerSpecTest, Ex2_11_MappingBetweenSequences) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "Detroit Tigers")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "Chicago cubs")); + EXPECT_CALL(handler, OnSequenceEnd()); + EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "2001-07-23")); + EXPECT_CALL(handler, OnSequenceEnd()); + EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Flow)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "New York Yankees")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "Atlanta Braves")); + EXPECT_CALL(handler, OnSequenceEnd()); + EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Flow)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "2001-07-02")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "2001-08-12")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "2001-08-14")); + EXPECT_CALL(handler, OnSequenceEnd()); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex2_11); +} + +TEST_F(HandlerSpecTest, Ex2_12_CompactNestedMapping) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "item")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "Super Hoop")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "quantity")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "1")); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "item")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "Basketball")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "quantity")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "4")); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "item")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "Big Shoes")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "quantity")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "1")); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnSequenceEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex2_12); +} + +TEST_F(HandlerSpecTest, Ex2_13_InLiteralsNewlinesArePreserved) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "\\//||\\/||\n// || ||__")); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex2_13); +} + +TEST_F(HandlerSpecTest, Ex2_14_InFoldedScalarsNewlinesBecomeSpaces) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "Mark McGwire's year was crippled by a knee injury.")); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex2_14); +} + +TEST_F(HandlerSpecTest, Ex2_15_FoldedNewlinesArePreservedForMoreIndentedAndBlankLines) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "Sammy Sosa completed another fine season with great stats.\n\n 63 Home Runs\n 0.288 Batting Average\n\nWhat a year!")); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex2_15); +} + +TEST_F(HandlerSpecTest, Ex2_16_IndentationDeterminesScope) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "name")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "Mark McGwire")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "accomplishment")); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "Mark set a major league home run record in 1998.\n")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "stats")); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "65 Home Runs\n0.278 Batting Average\n")); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex2_16); +} + +TEST_F(HandlerSpecTest, Ex2_17_QuotedScalars) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "unicode")); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "Sosa did fine.\xE2\x98\xBA")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "control")); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "\b1998\t1999\t2000\n")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "hex esc")); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "\x0d\x0a is \r\n")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "single")); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "\"Howdy!\" he cried.")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "quoted")); + EXPECT_CALL(handler, OnScalar(_, "!", 0, " # Not a 'comment'.")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "tie-fighter")); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "|\\-*-/|")); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex2_17); +} + +TEST_F(HandlerSpecTest, Ex2_18_MultiLineFlowScalars) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "plain")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "This unquoted scalar spans many lines.")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "quoted")); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "So does this quoted scalar.\n")); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex2_18); +} + +// TODO: 2.19 - 2.22 schema tags + +TEST_F(HandlerSpecTest, Ex2_23_VariousExplicitTags) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "not-date")); + EXPECT_CALL(handler, OnScalar(_, "tag:yaml.org,2002:str", 0, "2002-04-28")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "picture")); + EXPECT_CALL(handler, OnScalar(_, "tag:yaml.org,2002:binary", 0, "R0lGODlhDAAMAIQAAP//9/X\n17unp5WZmZgAAAOfn515eXv\nPz7Y6OjuDg4J+fn5OTk6enp\n56enmleECcgggoBADs=\n")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "application specific tag")); + EXPECT_CALL(handler, OnScalar(_, "!something", 0, "The semantics of the tag\nabove may be different for\ndifferent documents.")); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex2_23); +} + +TEST_F(HandlerSpecTest, Ex2_24_GlobalTags) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnSequenceStart(_, "tag:clarkevans.com,2002:shape", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnMapStart(_, "tag:clarkevans.com,2002:circle", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "center")); + EXPECT_CALL(handler, OnMapStart(_, "?", 1, EmitterStyle::Flow)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "x")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "73")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "y")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "129")); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "radius")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "7")); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnMapStart(_, "tag:clarkevans.com,2002:line", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "start")); + EXPECT_CALL(handler, OnAlias(_, 1)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "finish")); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Flow)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "x")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "89")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "y")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "102")); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnMapStart(_, "tag:clarkevans.com,2002:label", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "start")); + EXPECT_CALL(handler, OnAlias(_, 1)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "color")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "0xFFEEBB")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "text")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "Pretty vector drawing.")); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnSequenceEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex2_24); +} + +TEST_F(HandlerSpecTest, Ex2_25_UnorderedSets) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnMapStart(_, "tag:yaml.org,2002:set", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "Mark McGwire")); + EXPECT_CALL(handler, OnNull(_, 0)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "Sammy Sosa")); + EXPECT_CALL(handler, OnNull(_, 0)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "Ken Griffey")); + EXPECT_CALL(handler, OnNull(_, 0)); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex2_25); +} + +TEST_F(HandlerSpecTest, Ex2_26_OrderedMappings) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnSequenceStart(_, "tag:yaml.org,2002:omap", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "Mark McGwire")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "65")); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "Sammy Sosa")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "63")); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "Ken Griffey")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "58")); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnSequenceEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex2_26); +} + +TEST_F(HandlerSpecTest, Ex2_27_Invoice) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnMapStart(_, "tag:clarkevans.com,2002:invoice", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "invoice")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "34843")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "date")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "2001-01-23")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "bill-to")); + EXPECT_CALL(handler, OnMapStart(_, "?", 1, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "given")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "Chris")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "family")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "Dumars")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "address")); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "lines")); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "458 Walkman Dr.\nSuite #292\n")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "city")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "Royal Oak")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "state")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "MI")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "postal")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "48046")); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "ship-to")); + EXPECT_CALL(handler, OnAlias(_, 1)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "product")); + EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "sku")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "BL394D")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "quantity")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "4")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "description")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "Basketball")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "price")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "450.00")); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "sku")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "BL4438H")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "quantity")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "1")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "description")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "Super Hoop")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "price")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "2392.00")); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnSequenceEnd()); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "tax")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "251.42")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "total")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "4443.52")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "comments")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "Late afternoon is best. Backup contact is Nancy Billsmer @ 338-4338.")); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex2_27); +} + +TEST_F(HandlerSpecTest, Ex2_28_LogFile) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "Time")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "2001-11-23 15:01:42 -5")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "User")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "ed")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "Warning")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "This is an error message for the log file")); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "Time")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "2001-11-23 15:02:31 -5")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "User")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "ed")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "Warning")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "A slightly different error message.")); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "Date")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "2001-11-23 15:03:17 -5")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "User")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "ed")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "Fatal")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "Unknown variable \"bar\"")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "Stack")); + EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "file")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "TopClass.py")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "line")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "23")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "code")); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "x = MoreObject(\"345\\n\")\n")); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "file")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "MoreClass.py")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "line")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "58")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "code")); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "foo = bar")); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnSequenceEnd()); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex2_28); +} + +// TODO: 5.1 - 5.2 BOM + +TEST_F(HandlerSpecTest, Ex5_3_BlockStructureIndicators) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "sequence")); + EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "one")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "two")); + EXPECT_CALL(handler, OnSequenceEnd()); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "mapping")); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "sky")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "blue")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "sea")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "green")); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex5_3); +} + +TEST_F(HandlerSpecTest, Ex5_4_FlowStructureIndicators) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "sequence")); + EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Flow)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "one")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "two")); + EXPECT_CALL(handler, OnSequenceEnd()); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "mapping")); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Flow)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "sky")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "blue")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "sea")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "green")); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex5_4); +} + + +TEST_F(HandlerSpecTest, Ex5_6_NodePropertyIndicators) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "anchored")); + EXPECT_CALL(handler, OnScalar(_, "!local", 1, "value")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "alias")); + EXPECT_CALL(handler, OnAlias(_, 1)); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex5_6); +} + +TEST_F(HandlerSpecTest, Ex5_7_BlockScalarIndicators) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "literal")); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "some\ntext\n")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "folded")); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "some text\n")); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex5_7); +} + +TEST_F(HandlerSpecTest, Ex5_8_QuotedScalarIndicators) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "single")); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "text")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "double")); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "text")); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex5_8); +} + +// TODO: 5.9 directive +// TODO: 5.10 reserved indicator + +TEST_F(HandlerSpecTest, Ex5_11_LineBreakCharacters) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "Line break (no glyph)\nLine break (glyphed)\n")); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex5_11); +} + +TEST_F(HandlerSpecTest, Ex5_12_TabsAndSpaces) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "quoted")); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "Quoted\t")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "block")); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "void main() {\n\tprintf(\"Hello, world!\\n\");\n}")); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex5_12); +} + +TEST_F(HandlerSpecTest, Ex5_13_EscapedCharacters) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "Fun with \x5C \x22 \x07 \x08 \x1B \x0C \x0A \x0D \x09 \x0B \x00 \x20 \xA0 \x85 \xe2\x80\xa8 \xe2\x80\xa9 A A A")); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex5_13); +} + +TEST_F(HandlerSpecTest, Ex5_14_InvalidEscapedCharacters) { + EXPECT_THROW_PARSER_EXCEPTION(IgnoreParse(ex5_14), std::string(ErrorMsg::INVALID_ESCAPE) + "c"); +} + +TEST_F(HandlerSpecTest, Ex6_1_IndentationSpaces) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "Not indented")); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "By one space")); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "By four\n spaces\n")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "Flow style")); + EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Flow)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "By two")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "Also by two")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "Still by two")); + EXPECT_CALL(handler, OnSequenceEnd()); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex6_1); +} + +TEST_F(HandlerSpecTest, Ex6_2_IndentationIndicators) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "a")); + EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "b")); + EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "c")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "d")); + EXPECT_CALL(handler, OnSequenceEnd()); + EXPECT_CALL(handler, OnSequenceEnd()); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex6_2); +} + +TEST_F(HandlerSpecTest, Ex6_3_SeparationSpaces) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "foo")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "bar")); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "baz")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "baz")); + EXPECT_CALL(handler, OnSequenceEnd()); + EXPECT_CALL(handler, OnSequenceEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex6_3); +} + +TEST_F(HandlerSpecTest, Ex6_4_LinePrefixes) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "plain")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "text lines")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "quoted")); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "text lines")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "block")); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "text\n \tlines\n")); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex6_4); +} + +TEST_F(HandlerSpecTest, Ex6_5_EmptyLines) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "Folding")); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "Empty line\nas a line feed")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "Chomping")); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "Clipped empty lines\n")); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex6_5); +} + +TEST_F(HandlerSpecTest, Ex6_6_LineFolding) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "trimmed\n\n\nas space")); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex6_6); +} + +TEST_F(HandlerSpecTest, Ex6_7_BlockFolding) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "foo \n\n\t bar\n\nbaz\n")); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex6_7); +} + +TEST_F(HandlerSpecTest, Ex6_8_FlowFolding) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnScalar(_, "!", 0, " foo\nbar\nbaz ")); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex6_8); +} + +TEST_F(HandlerSpecTest, Ex6_9_SeparatedComment) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "key")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "value")); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex6_9); +} + + +TEST_F(HandlerSpecTest, _MultiLineComments) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "key")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "value")); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex6_11); +} + +TEST_F(HandlerSpecTest, Ex6_12_SeparationSpacesII) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Flow)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "first")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "Sammy")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "last")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "Sosa")); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "hr")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "65")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "avg")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "0.278")); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex6_12); +} + +TEST_F(HandlerSpecTest, Ex6_13_ReservedDirectives) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "foo")); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex6_13); +} + +TEST_F(HandlerSpecTest, Ex6_14_YAMLDirective) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "foo")); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex6_14); +} + +TEST_F(HandlerSpecTest, Ex6_15_InvalidRepeatedYAMLDirective) { + EXPECT_THROW_PARSER_EXCEPTION(IgnoreParse(ex6_15), ErrorMsg::REPEATED_YAML_DIRECTIVE); +} + +TEST_F(HandlerSpecTest, Ex6_16_TagDirective) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnScalar(_, "tag:yaml.org,2002:str", 0, "foo")); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex6_16); +} + +TEST_F(HandlerSpecTest, Ex6_17_InvalidRepeatedTagDirective) { + EXPECT_THROW_PARSER_EXCEPTION(IgnoreParse(ex6_17), ErrorMsg::REPEATED_TAG_DIRECTIVE); +} + +TEST_F(HandlerSpecTest, Ex6_18_PrimaryTagHandle) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnScalar(_, "!foo", 0, "bar")); + EXPECT_CALL(handler, OnDocumentEnd()); + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnScalar(_, "tag:example.com,2000:app/foo", 0, "bar")); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex6_18); +} + +TEST_F(HandlerSpecTest, Ex6_19_SecondaryTagHandle) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnScalar(_, "tag:example.com,2000:app/int", 0, "1 - 3")); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex6_19); +} + +TEST_F(HandlerSpecTest, Ex6_20_TagHandles) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnScalar(_, "tag:example.com,2000:app/foo", 0, "bar")); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex6_20); +} + +TEST_F(HandlerSpecTest, Ex6_21_LocalTagPrefix) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnScalar(_, "!my-light", 0, "fluorescent")); + EXPECT_CALL(handler, OnDocumentEnd()); + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnScalar(_, "!my-light", 0, "green")); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex6_21); +} + +TEST_F(HandlerSpecTest, Ex6_22_GlobalTagPrefix) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "tag:example.com,2000:app/foo", 0, "bar")); + EXPECT_CALL(handler, OnSequenceEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex6_22); +} + +TEST_F(HandlerSpecTest, Ex6_23_NodeProperties) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "tag:yaml.org,2002:str", 1, "foo")); + EXPECT_CALL(handler, OnScalar(_, "tag:yaml.org,2002:str", 0, "bar")); + EXPECT_CALL(handler, OnScalar(_, "?", 2, "baz")); + EXPECT_CALL(handler, OnAlias(_, 1)); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex6_23); +} + +TEST_F(HandlerSpecTest, Ex6_24_VerbatimTags) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "tag:yaml.org,2002:str", 0, "foo")); + EXPECT_CALL(handler, OnScalar(_, "!bar", 0, "baz")); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex6_24); +} + +// TODO: Implement +TEST_F(HandlerSpecTest, DISABLED_Ex6_25_InvalidVerbatimTags) { + Parse(ex6_25); + FAIL() << "not implemented yet"; +} + +TEST_F(HandlerSpecTest, Ex6_26_TagShorthands) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "!local", 0, "foo")); + EXPECT_CALL(handler, OnScalar(_, "tag:yaml.org,2002:str", 0, "bar")); + EXPECT_CALL(handler, OnScalar(_, "tag:example.com,2000:app/tag%21", 0, "baz")); + EXPECT_CALL(handler, OnSequenceEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex6_26); +} + +TEST_F(HandlerSpecTest, Ex6_27a_InvalidTagShorthands) { + EXPECT_THROW_PARSER_EXCEPTION(IgnoreParse(ex6_27a), ErrorMsg::TAG_WITH_NO_SUFFIX); +} + +// TODO: should we reject this one (since !h! is not declared)? +TEST_F(HandlerSpecTest, DISABLED_Ex6_27b_InvalidTagShorthands) { + Parse(ex6_27b); + FAIL() << "not implemented yet"; +} + +TEST_F(HandlerSpecTest, Ex6_28_NonSpecificTags) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "12")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "12")); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "12")); + EXPECT_CALL(handler, OnSequenceEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex6_28); +} + +TEST_F(HandlerSpecTest, Ex6_29_NodeAnchors) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "First occurrence")); + EXPECT_CALL(handler, OnScalar(_, "?", 1, "Value")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "Second occurrence")); + EXPECT_CALL(handler, OnAlias(_, 1)); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex6_29); +} + +TEST_F(HandlerSpecTest, Ex7_1_AliasNodes) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "First occurrence")); + EXPECT_CALL(handler, OnScalar(_, "?", 1, "Foo")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "Second occurrence")); + EXPECT_CALL(handler, OnAlias(_, 1)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "Override anchor")); + EXPECT_CALL(handler, OnScalar(_, "?", 2, "Bar")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "Reuse anchor")); + EXPECT_CALL(handler, OnAlias(_, 2)); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex7_1); +} + +TEST_F(HandlerSpecTest, Ex7_2_EmptyNodes) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Flow)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "foo")); + EXPECT_CALL(handler, OnScalar(_, "tag:yaml.org,2002:str", 0, "")); + EXPECT_CALL(handler, OnScalar(_, "tag:yaml.org,2002:str", 0, "")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "bar")); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex7_2); +} + +TEST_F(HandlerSpecTest, Ex7_3_CompletelyEmptyNodes) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Flow)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "foo")); + EXPECT_CALL(handler, OnNull(_, 0)); + EXPECT_CALL(handler, OnNull(_, 0)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "bar")); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex7_3); +} + +TEST_F(HandlerSpecTest, Ex7_4_DoubleQuotedImplicitKeys) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "implicit block key")); + EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Flow)); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Flow)); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "implicit flow key")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "value")); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnSequenceEnd()); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex7_4); +} + +TEST_F(HandlerSpecTest, Ex7_5_DoubleQuotedLineBreaks) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "folded to a space,\nto a line feed, or \t \tnon-content")); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex7_5); +} + +TEST_F(HandlerSpecTest, Ex7_6_DoubleQuotedLines) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnScalar(_, "!", 0, " 1st non-empty\n2nd non-empty 3rd non-empty ")); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex7_6); +} + +TEST_F(HandlerSpecTest, Ex7_7_SingleQuotedCharacters) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "here's to \"quotes\"")); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex7_7); +} + +TEST_F(HandlerSpecTest, Ex7_8_SingleQuotedImplicitKeys) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "implicit block key")); + EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Flow)); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Flow)); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "implicit flow key")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "value")); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnSequenceEnd()); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex7_8); +} + +TEST_F(HandlerSpecTest, Ex7_9_SingleQuotedLines) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnScalar(_, "!", 0, " 1st non-empty\n2nd non-empty 3rd non-empty ")); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex7_9); +} + +TEST_F(HandlerSpecTest, Ex7_10_PlainCharacters) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "::vector")); + EXPECT_CALL(handler, OnScalar(_, "!", 0, ": - ()")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "Up, up, and away!")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "-123")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "http://example.com/foo#bar")); + EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Flow)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "::vector")); + EXPECT_CALL(handler, OnScalar(_, "!", 0, ": - ()")); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "Up, up, and away!")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "-123")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "http://example.com/foo#bar")); + EXPECT_CALL(handler, OnSequenceEnd()); + EXPECT_CALL(handler, OnSequenceEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex7_10); +} + +TEST_F(HandlerSpecTest, Ex7_11_PlainImplicitKeys) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "implicit block key")); + EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Flow)); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Flow)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "implicit flow key")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "value")); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnSequenceEnd()); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex7_11); +} + +TEST_F(HandlerSpecTest, Ex7_12_PlainLines) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "1st non-empty\n2nd non-empty 3rd non-empty")); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex7_12); +} + +TEST_F(HandlerSpecTest, Ex7_13_FlowSequence) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Flow)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "one")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "two")); + EXPECT_CALL(handler, OnSequenceEnd()); + EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Flow)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "three")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "four")); + EXPECT_CALL(handler, OnSequenceEnd()); + EXPECT_CALL(handler, OnSequenceEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex7_13); +} + +TEST_F(HandlerSpecTest, Ex7_14_FlowSequenceEntries) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Flow)); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "double quoted")); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "single quoted")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "plain text")); + EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Flow)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "nested")); + EXPECT_CALL(handler, OnSequenceEnd()); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Flow)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "single")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "pair")); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnSequenceEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex7_14); +} + +TEST_F(HandlerSpecTest, Ex7_15_FlowMappings) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Flow)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "one")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "two")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "three")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "four")); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Flow)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "five")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "six")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "seven")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "eight")); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnSequenceEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex7_15); +} + +TEST_F(HandlerSpecTest, Ex7_16_FlowMappingEntries) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Flow)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "explicit")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "entry")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "implicit")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "entry")); + EXPECT_CALL(handler, OnNull(_, 0)); + EXPECT_CALL(handler, OnNull(_, 0)); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex7_16); +} + +TEST_F(HandlerSpecTest, Ex7_17_FlowMappingSeparateValues) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Flow)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "unquoted")); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "separate")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "http://foo.com")); + EXPECT_CALL(handler, OnNull(_, 0)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "omitted value")); + EXPECT_CALL(handler, OnNull(_, 0)); + EXPECT_CALL(handler, OnNull(_, 0)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "omitted key")); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex7_17); +} + +TEST_F(HandlerSpecTest, Ex7_18_FlowMappingAdjacentValues) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Flow)); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "adjacent")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "value")); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "readable")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "value")); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "empty")); + EXPECT_CALL(handler, OnNull(_, 0)); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex7_18); +} + +TEST_F(HandlerSpecTest, Ex7_19_SinglePairFlowMappings) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Flow)); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Flow)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "foo")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "bar")); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnSequenceEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex7_19); +} + +TEST_F(HandlerSpecTest, Ex7_20_SinglePairExplicitEntry) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Flow)); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Flow)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "foo bar")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "baz")); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnSequenceEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex7_20); +} + +TEST_F(HandlerSpecTest, Ex7_21_SinglePairImplicitEntries) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Flow)); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Flow)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "YAML")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "separate")); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnSequenceEnd()); + EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Flow)); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Default)); + EXPECT_CALL(handler, OnNull(_, 0)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "empty key entry")); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnSequenceEnd()); + EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Flow)); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Flow)); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Flow)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "JSON")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "like")); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "adjacent")); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnSequenceEnd()); + EXPECT_CALL(handler, OnSequenceEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex7_21); +} + +TEST_F(HandlerSpecTest, Ex7_22_InvalidImplicitKeys) { + EXPECT_THROW_PARSER_EXCEPTION(IgnoreParse(ex7_22), ErrorMsg::END_OF_SEQ_FLOW); +} + +TEST_F(HandlerSpecTest, Ex7_23_FlowContent) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Flow)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "a")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "b")); + EXPECT_CALL(handler, OnSequenceEnd()); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Flow)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "a")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "b")); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "a")); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "b")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "c")); + EXPECT_CALL(handler, OnSequenceEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex7_23); +} + +TEST_F(HandlerSpecTest, Ex7_24_FlowNodes) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "tag:yaml.org,2002:str", 0, "a")); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "b")); + EXPECT_CALL(handler, OnScalar(_, "!", 1, "c")); + EXPECT_CALL(handler, OnAlias(_, 1)); + EXPECT_CALL(handler, OnScalar(_, "tag:yaml.org,2002:str", 0, "")); + EXPECT_CALL(handler, OnSequenceEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex7_24); +} + +TEST_F(HandlerSpecTest, Ex8_1_BlockScalarHeader) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "literal\n")); + EXPECT_CALL(handler, OnScalar(_, "!", 0, " folded\n")); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "keep\n\n")); + EXPECT_CALL(handler, OnScalar(_, "!", 0, " strip")); + EXPECT_CALL(handler, OnSequenceEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex8_1); +} + +TEST_F(HandlerSpecTest, Ex8_2_BlockIndentationHeader) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "detected\n")); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "\n\n# detected\n")); + EXPECT_CALL(handler, OnScalar(_, "!", 0, " explicit\n")); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "\t\ndetected\n")); + EXPECT_CALL(handler, OnSequenceEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex8_2); +} + +TEST_F(HandlerSpecTest, Ex8_3a_InvalidBlockScalarIndentationIndicators) { + EXPECT_THROW_PARSER_EXCEPTION(IgnoreParse(ex8_3a), ErrorMsg::END_OF_SEQ); +} + +TEST_F(HandlerSpecTest, Ex8_3b_InvalidBlockScalarIndentationIndicators) { + EXPECT_THROW_PARSER_EXCEPTION(IgnoreParse(ex8_3b), ErrorMsg::END_OF_SEQ); +} + +TEST_F(HandlerSpecTest, Ex8_3c_InvalidBlockScalarIndentationIndicators) { + EXPECT_THROW_PARSER_EXCEPTION(IgnoreParse(ex8_3c), ErrorMsg::END_OF_SEQ); +} + +TEST_F(HandlerSpecTest, Ex8_4_ChompingFinalLineBreak) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "strip")); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "text")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "clip")); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "text\n")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "keep")); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "text\n")); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex8_4); +} + +TEST_F(HandlerSpecTest, DISABLED_Ex8_5_ChompingTrailingLines) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "strip")); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "# text")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "clip")); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "# text\n")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "keep")); + // NOTE: I believe this is a bug in the YAML spec - + // it should be "# text\n\n" + EXPECT_CALL(handler, OnScalar(_, "!", 0, "# text\n")); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex8_5); +} + +TEST_F(HandlerSpecTest, Ex8_6_EmptyScalarChomping) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "strip")); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "clip")); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "keep")); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "\n")); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex8_6); +} + +TEST_F(HandlerSpecTest, Ex8_7_LiteralScalar) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "literal\n\ttext\n")); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex8_7); +} + +TEST_F(HandlerSpecTest, Ex8_8_LiteralContent) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "\n\nliteral\n \n\ntext\n")); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex8_8); +} + +TEST_F(HandlerSpecTest, Ex8_9_FoldedScalar) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "folded text\n")); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex8_9); +} + +TEST_F(HandlerSpecTest, Ex8_10_FoldedLines) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "\nfolded line\nnext line\n * bullet\n\n * list\n * lines\n\nlast line\n")); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex8_10); +} + +TEST_F(HandlerSpecTest, Ex8_11_MoreIndentedLines) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "\nfolded line\nnext line\n * bullet\n\n * list\n * lines\n\nlast line\n")); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex8_11); +} + +TEST_F(HandlerSpecTest, Ex8_12_EmptySeparationLines) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "\nfolded line\nnext line\n * bullet\n\n * list\n * lines\n\nlast line\n")); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex8_12); +} + +TEST_F(HandlerSpecTest, Ex8_13_FinalEmptyLines) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "\nfolded line\nnext line\n * bullet\n\n * list\n * lines\n\nlast line\n")); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex8_13); +} + +TEST_F(HandlerSpecTest, Ex8_14_BlockSequence) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "block sequence")); + EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "one")); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "two")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "three")); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnSequenceEnd()); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex8_14); +} + +TEST_F(HandlerSpecTest, Ex8_15_BlockSequenceEntryTypes) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnNull(_, 0)); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "block node\n")); + EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "one")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "two")); + EXPECT_CALL(handler, OnSequenceEnd()); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "one")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "two")); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnSequenceEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex8_15); +} + +TEST_F(HandlerSpecTest, Ex8_16_BlockMappings) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "block mapping")); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "key")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "value")); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex8_16); +} + +TEST_F(HandlerSpecTest, Ex8_17_ExplicitBlockMappingEntries) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "explicit key")); + EXPECT_CALL(handler, OnNull(_, 0)); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "block key\n")); + EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "one")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "two")); + EXPECT_CALL(handler, OnSequenceEnd()); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex8_17); +} + +TEST_F(HandlerSpecTest, Ex8_18_ImplicitBlockMappingEntries) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "plain key")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "in-line value")); + EXPECT_CALL(handler, OnNull(_, 0)); + EXPECT_CALL(handler, OnNull(_, 0)); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "quoted key")); + EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "entry")); + EXPECT_CALL(handler, OnSequenceEnd()); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex8_18); +} + +TEST_F(HandlerSpecTest, Ex8_19_CompactBlockMappings) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "sun")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "yellow")); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "earth")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "blue")); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "moon")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "white")); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnSequenceEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex8_19); +} + +TEST_F(HandlerSpecTest, Ex8_20_BlockNodeTypes) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "flow in block")); + EXPECT_CALL(handler, OnScalar(_, "!", 0, "Block scalar\n")); + EXPECT_CALL(handler, OnMapStart(_, "tag:yaml.org,2002:map", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "foo")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "bar")); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnSequenceEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex8_20); +} + +TEST_F(HandlerSpecTest, DISABLED_Ex8_21_BlockScalarNodes) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "literal")); + // NOTE: I believe this is a bug in the YAML spec + // - it should be "value\n" + EXPECT_CALL(handler, OnScalar(_, "!", 0, "value")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "folded")); + EXPECT_CALL(handler, OnScalar(_, "!foo", 0, "value")); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex8_21); +} + +TEST_F(HandlerSpecTest, Ex8_22_BlockCollectionNodes) { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "sequence")); + EXPECT_CALL(handler, OnSequenceStart(_, "tag:yaml.org,2002:seq", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "entry")); + EXPECT_CALL(handler, OnSequenceStart(_, "tag:yaml.org,2002:seq", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "nested")); + EXPECT_CALL(handler, OnSequenceEnd()); + EXPECT_CALL(handler, OnSequenceEnd()); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "mapping")); + EXPECT_CALL(handler, OnMapStart(_, "tag:yaml.org,2002:map", 0, EmitterStyle::Block)); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "foo")); + EXPECT_CALL(handler, OnScalar(_, "?", 0, "bar")); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnMapEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + Parse(ex8_22); +} +} +} From b6acd869ebba614309e0adfb2bbfc1ce8f45c9c9 Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Wed, 27 May 2015 23:04:03 +0800 Subject: [PATCH 018/380] Fix parsing bugs 1. empty key & value support 2. OnNull handling in test --- saphyr/src/parser.rs | 16 ++++++++++++---- saphyr/tests/spec_test.rs | 3 ++- saphyr/tests/specexamples.rs.inc | 1 + 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs index fa8ce0a..c72b786 100644 --- a/saphyr/src/parser.rs +++ b/saphyr/src/parser.rs @@ -155,6 +155,7 @@ impl> Parser { fn load_node(&mut self, first_ev: &Event, recv: &mut R) -> Result<(), ScanError> { match *first_ev { + Event::Alias => { unimplemented!() }, Event::Scalar(ref v, style) => { Ok(()) }, @@ -164,8 +165,8 @@ impl> Parser { Event::MappingStart => { self.load_mapping(first_ev, recv) }, - // TODO more events - _ => { unreachable!(); } + _ => { println!("UNREACHABLE EVENT: {:?}", first_ev); + unreachable!(); } } } @@ -379,7 +380,9 @@ impl> Parser { self.skip(); let tok = try!(self.peek()); match tok.1 { - TokenType::KeyToken | TokenType::ValueToken | TokenType::BlockEndToken + TokenType::KeyToken + | TokenType::ValueToken + | TokenType::BlockEndToken => { self.state = State::BlockMappingValue; // empty scalar @@ -391,6 +394,11 @@ impl> Parser { } } }, + // XXX(chenyh): libyaml failed to parse spec 1.2, ex8.18 + TokenType::ValueToken => { + self.state = State::BlockMappingValue; + Ok(Event::empty_scalar()) + }, TokenType::BlockEndToken => { self.pop_state(); self.skip(); @@ -411,7 +419,7 @@ impl> Parser { match tok.1 { TokenType::KeyToken | TokenType::ValueToken | TokenType::BlockEndToken => { - self.state = State::BlockMappingValue; + self.state = State::BlockMappingKey; // empty scalar Ok(Event::empty_scalar()) } diff --git a/saphyr/tests/spec_test.rs b/saphyr/tests/spec_test.rs index 11f5fe1..a796403 100644 --- a/saphyr/tests/spec_test.rs +++ b/saphyr/tests/spec_test.rs @@ -2,6 +2,7 @@ extern crate yaml_rust; use yaml_rust::parser::{Parser, EventReceiver, Event}; +use yaml_rust::scanner::TScalarStyle; use yaml_rust::yaml::Yaml; #[derive(Clone, PartialEq, PartialOrd, Debug)] @@ -31,7 +32,7 @@ impl EventReceiver for YamlChecker { Event::MappingStart => TestEvent::OnMapStart, Event::MappingEnd => TestEvent::OnMapEnd, Event::Scalar(ref v, style) => { - if v == "~" { + if v == "~" && style == TScalarStyle::Plain { TestEvent::OnNull } else { TestEvent::OnScalar diff --git a/saphyr/tests/specexamples.rs.inc b/saphyr/tests/specexamples.rs.inc index 0996fda..a5398c3 100644 --- a/saphyr/tests/specexamples.rs.inc +++ b/saphyr/tests/specexamples.rs.inc @@ -320,6 +320,7 @@ const EX8_16 : &'static str = const EX8_17 : &'static str = "? explicit key # Empty value\n? |\n block key\n: - one # Explicit compact\n - two # block value\n"; +// XXX libyaml failed this test const EX8_18 : &'static str = "plain key: in-line value\n: # Both empty\n\"quoted key\":\n- entry\n"; From ce6b057bfe52ff63c09f964170ba5ddbfb2e7cfa Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Wed, 27 May 2015 23:18:38 +0800 Subject: [PATCH 019/380] Add flow_sequence_entry_mapping --- saphyr/src/parser.rs | 53 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs index c72b786..9bffdad 100644 --- a/saphyr/src/parser.rs +++ b/saphyr/src/parser.rs @@ -230,6 +230,10 @@ impl> Parser { State::IndentlessSequenceEntry => self.indentless_sequence_entry(), + State::FlowSequenceEntryMappingKey => self.flow_sequence_entry_mapping_key(), + State::FlowSequenceEntryMappingValue => self.flow_sequence_entry_mapping_value(), + State::FlowSequenceEntryMappingEnd => self.flow_sequence_entry_mapping_end(), + _ => unimplemented!() } } @@ -606,6 +610,55 @@ impl> Parser { } } + fn flow_sequence_entry_mapping_key(&mut self) -> ParseResult { + let tok = try!(self.peek()); + + match tok.1 { + TokenType::ValueToken + | TokenType::FlowEntryToken + | TokenType::FlowSequenceEndToken => { + self.skip(); + self.state = State::FlowSequenceEntryMappingValue; + Ok(Event::empty_scalar()) + }, + _ => { + self.push_state(State::FlowSequenceEntryMappingValue); + self.parse_node(false, false) + } + } + } + + fn flow_sequence_entry_mapping_value(&mut self) -> ParseResult { + let tok = try!(self.peek()); + + match tok.1 { + TokenType::ValueToken => { + self.skip(); + let tok = try!(self.peek()); + self.state = State::FlowSequenceEntryMappingValue; + match tok.1 { + TokenType::FlowEntryToken + | TokenType::FlowSequenceEndToken => { + self.state = State::FlowSequenceEntryMappingEnd; + Ok(Event::empty_scalar()) + }, + _ => { + self.push_state(State::FlowSequenceEntryMappingEnd); + self.parse_node(false, false) + } + } + }, + _ => { + self.state = State::FlowSequenceEntryMappingEnd; + Ok(Event::empty_scalar()) + } + } + } + + fn flow_sequence_entry_mapping_end(&mut self) -> ParseResult { + self.state = State::FlowSequenceEntry; + Ok(Event::MappingEnd) + } } #[cfg(test)] From b4f94fdca750a72675c3473ef86587be052c86a5 Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Thu, 28 May 2015 17:18:20 +0800 Subject: [PATCH 020/380] Fix test_spec_ex7_3 --- saphyr/src/parser.rs | 18 ++++++++------ saphyr/src/scanner.rs | 31 +++++++++++++++++++++---- saphyr/src/yaml.rs | 1 - saphyr/tests/specs/libyaml_fail-01.yaml | 6 +++++ saphyr/tests/specs/libyaml_fail-02.yaml | 7 ++++++ 5 files changed, 51 insertions(+), 12 deletions(-) create mode 100644 saphyr/tests/specs/libyaml_fail-01.yaml create mode 100644 saphyr/tests/specs/libyaml_fail-02.yaml diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs index 9bffdad..8bc2918 100644 --- a/saphyr/src/parser.rs +++ b/saphyr/src/parser.rs @@ -156,7 +156,7 @@ impl> Parser { -> Result<(), ScanError> { match *first_ev { Event::Alias => { unimplemented!() }, - Event::Scalar(ref v, style) => { + Event::Scalar(_, _) => { Ok(()) }, Event::SequenceStart => { @@ -170,7 +170,7 @@ impl> Parser { } } - fn load_mapping(&mut self, first_ev: &Event, recv: &mut R) + fn load_mapping(&mut self, _first_ev: &Event, recv: &mut R) -> Result<(), ScanError> { let mut ev = try!(self.parse(recv)); while ev != Event::MappingEnd { @@ -187,11 +187,11 @@ impl> Parser { Ok(()) } - fn load_sequence(&mut self, first_ev: &Event, recv: &mut R) + fn load_sequence(&mut self, _first_ev: &Event, recv: &mut R) -> Result<(), ScanError> { let mut ev = try!(self.parse(recv)); while ev != Event::SequenceEnd { - let entry = try!(self.load_node(&ev, recv)); + try!(self.load_node(&ev, recv)); // next event ev = try!(self.parse(recv)); @@ -367,7 +367,7 @@ impl> Parser { self.state = State::BlockMappingFirstKey; Ok(Event::MappingStart) }, - _ => { unimplemented!(); } + _ => { Err(ScanError::new(tok.0, "while parsing a node, did not find expected node content")) } } } @@ -474,7 +474,11 @@ impl> Parser { return self.parse_node(false, false); } } - } else if (tok.1 != TokenType::FlowMappingEndToken) { + // XXX libyaml fail ex 7.3, empty key + } else if tok.1 == TokenType::ValueToken { + self.state = State::FlowMappingValue; + return Ok(Event::empty_scalar()); + } else if tok.1 != TokenType::FlowMappingEndToken { self.push_state(State::FlowMappingEmptyValue); return self.parse_node(false, false); } @@ -494,7 +498,7 @@ impl> Parser { if tok.1 == TokenType::ValueToken { self.skip(); - let mut tok = try!(self.peek()); + let tok = try!(self.peek()); match tok.1 { TokenType::FlowEntryToken | TokenType::FlowMappingEndToken => { }, diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index e56fa8a..4b0cdfa 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -553,7 +553,7 @@ impl> Scanner { let mut chomping: i32 = 0; let mut increment: usize = 0; let mut indent: usize = 0; - let mut trailing_blank: bool = false; + let mut trailing_blank: bool; let mut leading_blank: bool = false; let mut string = String::new(); @@ -742,7 +742,7 @@ impl> Scanner { let mut leading_break = String::new(); let mut trailing_breaks = String::new(); let mut whitespaces = String::new(); - let mut leading_blanks = false; + let mut leading_blanks; /* Eat the left quote. */ self.skip(); @@ -820,7 +820,6 @@ impl> Scanner { self.skip(); // Consume an arbitrary escape code. if code_length > 0 { - let val = 0usize; self.lookahead(code_length); let mut value = 0u32; for i in 0..code_length { @@ -840,7 +839,7 @@ impl> Scanner { }; string.push(ch); - for i in 0..code_length { + for _ in 0..code_length { self.skip(); } } @@ -1491,5 +1490,29 @@ key: next!(p, StreamEndToken); end!(p); } + + #[test] + fn test_spec_ex7_3() { + let s = +" +{ + ? foo :, + : bar, +} +"; + let mut p = Scanner::new(s.chars()); + next!(p, StreamStartToken(..)); + next!(p, FlowMappingStartToken); + next!(p, KeyToken); + next_scalar!(p, TScalarStyle::Plain, "foo"); + next!(p, ValueToken); + next!(p, FlowEntryToken); + next!(p, ValueToken); + next_scalar!(p, TScalarStyle::Plain, "bar"); + next!(p, FlowEntryToken); + next!(p, FlowMappingEndToken); + next!(p, StreamEndToken); + end!(p); + } } diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index d6e3abc..33522b2 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -205,7 +205,6 @@ impl Index for Yaml { #[cfg(test)] mod test { - use parser::Parser; use yaml::*; #[test] fn test_coerce() { diff --git a/saphyr/tests/specs/libyaml_fail-01.yaml b/saphyr/tests/specs/libyaml_fail-01.yaml new file mode 100644 index 0000000..5e6c0dc --- /dev/null +++ b/saphyr/tests/specs/libyaml_fail-01.yaml @@ -0,0 +1,6 @@ +# Ex 8.18 +plain key: in-line value +: # Both empty +"quoted key": +- entry + diff --git a/saphyr/tests/specs/libyaml_fail-02.yaml b/saphyr/tests/specs/libyaml_fail-02.yaml new file mode 100644 index 0000000..60074de --- /dev/null +++ b/saphyr/tests/specs/libyaml_fail-02.yaml @@ -0,0 +1,7 @@ +# Ex 7.17 +{ +unqoted : "separate", +http://foo.com, +omitted value:, +: omitted key, +} From 4890b7de5bccaa4902bb739572c096145c299ae7 Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Thu, 28 May 2015 22:07:59 +0800 Subject: [PATCH 021/380] Add VersionDirectiveToken --- saphyr/src/parser.rs | 38 +++++++++-- saphyr/src/scanner.rs | 148 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 178 insertions(+), 8 deletions(-) diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs index 8bc2918..1108e2d 100644 --- a/saphyr/src/parser.rs +++ b/saphyr/src/parser.rs @@ -82,8 +82,12 @@ impl> Parser { self.token = self.scanner.next(); } if self.token.is_none() { - return Err(ScanError::new(self.scanner.mark(), - "unexpected eof")); + match self.scanner.get_error() { + None => + return Err(ScanError::new(self.scanner.mark(), + "unexpected eof")), + Some(e) => return Err(e), + } } // XXX better? Ok(self.token.clone().unwrap()) @@ -200,7 +204,7 @@ impl> Parser { } fn state_machine(&mut self) -> ParseResult { - let next_tok = self.peek(); + let next_tok = try!(self.peek()); println!("cur_state {:?}, next tok: {:?}", self.state, next_tok); match self.state { State::StreamStart => self.stream_start(), @@ -272,13 +276,14 @@ impl> Parser { self.skip(); return Ok(Event::StreamEnd); }, - TokenType::VersionDirectiveToken + TokenType::VersionDirectiveToken(..) | TokenType::TagDirectiveToken | TokenType::DocumentStartToken => { // explicit document self._explict_document_start() }, _ if implicit => { + try!(self.parser_process_directives()); self.push_state(State::DocumentEnd); self.state = State::BlockNode; Ok(Event::DocumentStart) @@ -290,7 +295,30 @@ impl> Parser { } } + fn parser_process_directives(&mut self) -> Result<(), ScanError> { + loop { + let tok = try!(self.peek()); + match tok.1 { + TokenType::VersionDirectiveToken(_, _) => { + // XXX parsing with warning according to spec + //if major != 1 || minor > 2 { + // return Err(ScanError::new(tok.0, + // "found incompatible YAML document")); + //} + }, + TokenType::TagDirectiveToken => { + unimplemented!(); + }, + _ => break + } + self.skip(); + } + // TODO tag directive + Ok(()) + } + fn _explict_document_start(&mut self) -> ParseResult { + try!(self.parser_process_directives()); let tok = try!(self.peek()); if tok.1 != TokenType::DocumentStartToken { return Err(ScanError::new(tok.0, "did not find expected ")); @@ -304,7 +332,7 @@ impl> Parser { fn document_content(&mut self) -> ParseResult { let tok = try!(self.peek()); match tok.1 { - TokenType::VersionDirectiveToken + TokenType::VersionDirectiveToken(..) |TokenType::TagDirectiveToken |TokenType::DocumentStartToken |TokenType::DocumentEndToken diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index 4b0cdfa..00ba30d 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -54,7 +54,8 @@ pub enum TokenType { NoToken, StreamStartToken(TEncoding), StreamEndToken, - VersionDirectiveToken, + /// major, minor + VersionDirectiveToken(u32, u32), TagDirectiveToken, DocumentStartToken, DocumentEndToken, @@ -103,6 +104,7 @@ pub struct Scanner { mark: Marker, tokens: VecDeque, buffer: VecDeque, + error: Option, stream_start_produced: bool, stream_end_produced: bool, @@ -118,10 +120,13 @@ pub struct Scanner { impl> Iterator for Scanner { type Item = Token; fn next(&mut self) -> Option { + if self.error.is_some() { + return None; + } match self.next_token() { Ok(tok) => tok, Err(e) => { - println!("Error: {:?}", e); + self.error = Some(e); None } } @@ -178,6 +183,7 @@ impl> Scanner { buffer: VecDeque::new(), mark: Marker::new(0, 1, 0), tokens: VecDeque::new(), + error: None, stream_start_produced: false, stream_end_produced: false, @@ -190,6 +196,13 @@ impl> Scanner { token_available: false, } } + #[inline] + pub fn get_error(&self) -> Option { + match self.error { + None => None, + Some(ref e) => Some(e.clone()), + } + } #[inline] fn lookahead(&mut self, count: usize) { @@ -296,8 +309,9 @@ impl> Scanner { return Ok(()); } + // Is it a directive? if self.mark.col == 0 && self.ch_is('%') { - unimplemented!(); + return self.fetch_directive(); } if self.mark.col == 0 @@ -449,6 +463,134 @@ impl> Scanner { Ok(()) } + fn fetch_directive(&mut self) -> ScanResult { + self.unroll_indent(-1); + try!(self.remove_simple_key()); + + self.disallow_simple_key(); + + let tok = try!(self.scan_directive()); + + self.tokens.push_back(tok); + + Ok(()) + } + + fn scan_directive(&mut self) -> Result { + let start_mark = self.mark; + self.skip(); + + let name = try!(self.scan_directive_name()); + let tok = match name.as_ref() { + "YAML" => { + try!(self.scan_version_directive_value(&start_mark)) + }, + "TAG" => { + try!(self.scan_tag_directive_value(&start_mark)) + }, + _ => return Err(ScanError::new(start_mark, + "while scanning a directive, found uknown directive name")) + }; + self.lookahead(1); + + while is_blank(self.ch()) { + self.skip(); + self.lookahead(1); + } + + if self.ch() == '#' { + while !is_breakz(self.ch()) { + self.skip(); + self.lookahead(1); + } + } + + if !is_breakz(self.ch()) { + return Err(ScanError::new(start_mark, + "while scanning a directive, did not find expected comment or line break")); + } + + // Eat a line break + if is_break(self.ch()) { + self.lookahead(2); + self.skip_line(); + } + + Ok(tok) + } + + fn scan_version_directive_value(&mut self, mark: &Marker) -> Result { + self.lookahead(1); + + while is_blank(self.ch()) { + self.skip(); + self.lookahead(1); + } + + let major = try!(self.scan_version_directive_number(mark)); + + if self.ch() != '.' { + return Err(ScanError::new(*mark, + "while scanning a YAML directive, did not find expected digit or '.' character")); + } + + self.skip(); + + let minor = try!(self.scan_version_directive_number(mark)); + + Ok(Token(*mark, TokenType::VersionDirectiveToken(major, minor))) + } + + fn scan_directive_name(&mut self) -> Result { + let start_mark = self.mark; + let mut string = String::new(); + self.lookahead(1); + while self.ch().is_alphabetic() { + string.push(self.ch()); + self.skip(); + self.lookahead(1); + } + + if string.is_empty() { + return Err(ScanError::new(start_mark, + "while scanning a directive, could not find expected directive name")); + } + + if !is_blankz(self.ch()) { + return Err(ScanError::new(start_mark, + "while scanning a directive, found unexpected non-alphabetical character")); + } + + Ok(string) + } + + fn scan_version_directive_number(&mut self, mark: &Marker) -> Result { + let mut val = 0u32; + let mut length = 0usize; + self.lookahead(1); + while is_digit(self.ch()) { + if length + 1 > 9 { + return Err(ScanError::new(*mark, + "while scanning a YAML directive, found extremely long version number")); + } + length += 1; + val = val * 10 + ((self.ch() as u32) - ('0' as u32)); + self.skip(); + self.lookahead(1); + } + + if length == 0 { + return Err(ScanError::new(*mark, + "while scanning a YAML directive, did not find expected version number")); + } + + Ok(val) + } + + fn scan_tag_directive_value(&mut self, mark: &Marker) -> Result { + unimplemented!(); + } + fn fetch_flow_collection_start(&mut self, tok :TokenType) -> ScanResult { // The indicators '[' and '{' may start a simple key. try!(self.save_simple_key()); From 169ec430391a762bc28dc78b47863b51393296ec Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Thu, 28 May 2015 22:19:06 +0800 Subject: [PATCH 022/380] Fix unicode escape --- saphyr/src/scanner.rs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index 00ba30d..734c7f8 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -951,7 +951,14 @@ impl> Scanner { '"' => string.push('"'), '\'' => string.push('\''), '\\' => string.push('\\'), - //'N' => { string.push('\xc2'); string.push('\x85') }, + // NEL (#x85) + 'N' => string.push(char::from_u32(0x85).unwrap()), + // #xA0 + '_' => string.push(char::from_u32(0xA0).unwrap()), + // LS (#x2028) + 'L' => string.push(char::from_u32(0x2028).unwrap()), + // PS (#x2029) + 'P' => string.push(char::from_u32(0x2029).unwrap()), 'x' => code_length = 2, 'u' => code_length = 4, 'U' => code_length = 8, From 3d7a97ad3288476e6c19862bbc0a8dd495dffd53 Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Fri, 29 May 2015 01:56:03 +0800 Subject: [PATCH 023/380] Add partial anchor support --- saphyr/Readme.md | 11 ++ saphyr/src/parser.rs | 71 +++++-- saphyr/src/scanner.rs | 238 +++++++++++++++++++++--- saphyr/src/yaml.rs | 11 +- saphyr/tests/spec_test.rs | 7 +- saphyr/tests/specs/libyaml_fail-03.yaml | 5 + 6 files changed, 293 insertions(+), 50 deletions(-) create mode 100644 saphyr/Readme.md create mode 100644 saphyr/tests/specs/libyaml_fail-03.yaml diff --git a/saphyr/Readme.md b/saphyr/Readme.md new file mode 100644 index 0000000..fb9de9f --- /dev/null +++ b/saphyr/Readme.md @@ -0,0 +1,11 @@ +# yaml-rust + +The missing Rust implementation for YAML 1.2. + +## Specification Compliance + +### Missing Feature + +* Tag directive +* Tag data type are ignored +* Alias & Anchor diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs index 1108e2d..5608d63 100644 --- a/saphyr/src/parser.rs +++ b/saphyr/src/parser.rs @@ -36,18 +36,21 @@ pub enum Event { StreamEnd, DocumentStart, DocumentEnd, - Alias, - Scalar(String, TScalarStyle), - SequenceStart, + // anchor_id + Alias(usize), + Scalar(String, TScalarStyle, usize), + // anchor_id + SequenceStart(usize), SequenceEnd, - MappingStart, + // anchor_id + MappingStart(usize), MappingEnd } impl Event { fn empty_scalar() -> Event { // a null scalar - Event::Scalar("~".to_string(), TScalarStyle::Plain) + Event::Scalar("~".to_string(), TScalarStyle::Plain, 0) } } @@ -111,7 +114,7 @@ impl> Parser { return Ok(Event::StreamEnd); } let ev = try!(self.state_machine()); - // println!("EV {:?}", ev); + println!("EV {:?}", ev); recv.on_event(&ev); Ok(ev) } @@ -159,14 +162,16 @@ impl> Parser { fn load_node(&mut self, first_ev: &Event, recv: &mut R) -> Result<(), ScanError> { match *first_ev { - Event::Alias => { unimplemented!() }, - Event::Scalar(_, _) => { + Event::Alias(..) => { Ok(()) }, - Event::SequenceStart => { + Event::Scalar(_, _, _) => { + Ok(()) + }, + Event::SequenceStart(_) => { self.load_sequence(first_ev, recv) }, - Event::MappingStart => { + Event::MappingStart(_) => { self.load_mapping(first_ev, recv) }, _ => { println!("UNREACHABLE EVENT: {:?}", first_ev); @@ -366,34 +371,60 @@ impl> Parser { } fn parse_node(&mut self, block: bool, indentless_sequence: bool) -> ParseResult { - let tok = try!(self.peek()); + let mut tok = try!(self.peek()); + let anchor_id = 0; + match tok.1 { + TokenType::AliasToken(v) => { + self.pop_state(); + self.skip(); + // TODO(chenyh): find anchor id + return Ok(Event::Alias(0)); + }, + TokenType::AnchorToken(..) => { + self.skip(); + tok = try!(self.peek()); + if let TokenType::TagToken(_, _) = tok.1 { + self.skip(); + tok = try!(self.peek()); + } + }, + TokenType::TagToken(..) => { + // XXX: ex 7.2, an empty scalar can follow a secondary tag + // but we haven't implemented it + self.skip(); + tok = try!(self.peek()); + if let TokenType::AnchorToken(_) = tok.1 { + self.skip(); + tok = try!(self.peek()); + } + }, + _ => {} + } match tok.1 { - TokenType::AliasToken => unimplemented!(), - TokenType::AnchorToken => unimplemented!(), TokenType::BlockEntryToken if indentless_sequence => { self.state = State::IndentlessSequenceEntry; - Ok(Event::SequenceStart) + Ok(Event::SequenceStart(anchor_id)) }, TokenType::ScalarToken(style, v) => { self.pop_state(); self.skip(); - Ok(Event::Scalar(v, style)) + Ok(Event::Scalar(v, style, anchor_id)) }, TokenType::FlowSequenceStartToken => { self.state = State::FlowSequenceFirstEntry; - Ok(Event::SequenceStart) + Ok(Event::SequenceStart(anchor_id)) }, TokenType::FlowMappingStartToken => { self.state = State::FlowMappingFirstKey; - Ok(Event::MappingStart) + Ok(Event::MappingStart(anchor_id)) }, TokenType::BlockSequenceStartToken if block => { self.state = State::BlockSequenceFirstEntry; - Ok(Event::SequenceStart) + Ok(Event::SequenceStart(anchor_id)) }, TokenType::BlockMappingStartToken if block => { self.state = State::BlockMappingFirstKey; - Ok(Event::MappingStart) + Ok(Event::MappingStart(anchor_id)) }, _ => { Err(ScanError::new(tok.0, "while parsing a node, did not find expected node content")) } } @@ -574,7 +605,7 @@ impl> Parser { TokenType::KeyToken => { self.state = State::FlowSequenceEntryMappingKey; self.skip(); - Ok(Event::MappingStart) + Ok(Event::MappingStart(0)) } _ => { self.push_state(State::FlowSequenceEntry); diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index 734c7f8..ddec711 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -70,9 +70,10 @@ pub enum TokenType { FlowEntryToken, KeyToken, ValueToken, - AliasToken, - AnchorToken, - TagToken, + AliasToken(String), + AnchorToken(String), + // handle, suffix + TagToken(String, String), ScalarToken(TScalarStyle, String) } @@ -158,6 +159,14 @@ fn is_digit(c: char) -> bool { c >= '0' && c <= '9' } #[inline] +fn is_alpha(c: char) -> bool { + match c { + '0'...'9' | 'a'...'z' | 'A'...'Z' => true, + '_' | '-' => true, + _ => false + } +} +#[inline] fn is_hex(c: char) -> bool { (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') @@ -335,32 +344,32 @@ impl> Scanner { let c = self.buffer[0]; let nc = self.buffer[1]; match c { - '[' => try!(self.fetch_flow_collection_start(TokenType::FlowSequenceStartToken)), - '{' => try!(self.fetch_flow_collection_start(TokenType::FlowMappingStartToken)), - ']' => try!(self.fetch_flow_collection_end(TokenType::FlowSequenceEndToken)), - '}' => try!(self.fetch_flow_collection_end(TokenType::FlowMappingEndToken)), - ',' => try!(self.fetch_flow_entry()), - '-' if is_blankz(nc) => try!(self.fetch_block_entry()), - '?' if self.flow_level > 0 || is_blankz(nc) => try!(self.fetch_key()), - ':' if self.flow_level > 0 || is_blankz(nc) => try!(self.fetch_value()), - '*' => unimplemented!(), - '&' => unimplemented!(), - '!' => unimplemented!(), + '[' => self.fetch_flow_collection_start(TokenType::FlowSequenceStartToken), + '{' => self.fetch_flow_collection_start(TokenType::FlowMappingStartToken), + ']' => self.fetch_flow_collection_end(TokenType::FlowSequenceEndToken), + '}' => self.fetch_flow_collection_end(TokenType::FlowMappingEndToken), + ',' => self.fetch_flow_entry(), + '-' if is_blankz(nc) => self.fetch_block_entry(), + '?' if self.flow_level > 0 || is_blankz(nc) => self.fetch_key(), + ':' if self.flow_level > 0 || is_blankz(nc) => self.fetch_value(), + // Is it an alias? + '*' => self.fetch_anchor(true), + // Is it an anchor? + '&' => self.fetch_anchor(false), + '!' => self.fetch_tag(), // Is it a literal scalar? - '|' if self.flow_level == 0 => try!(self.fetch_block_scalar(true)), + '|' if self.flow_level == 0 => self.fetch_block_scalar(true), // Is it a folded scalar? - '>' if self.flow_level == 0 => try!(self.fetch_block_scalar(false)), - '\'' => try!(self.fetch_flow_scalar(true)), - '"' => try!(self.fetch_flow_scalar(false)), + '>' if self.flow_level == 0 => self.fetch_block_scalar(false), + '\'' => self.fetch_flow_scalar(true), + '"' => self.fetch_flow_scalar(false), // plain scalar - '-' if !is_blankz(nc) => try!(self.fetch_plain_scalar()), - ':' | '?' if !is_blankz(nc) && self.flow_level == 0 => try!(self.fetch_plain_scalar()), + '-' if !is_blankz(nc) => self.fetch_plain_scalar(), + ':' | '?' if !is_blankz(nc) && self.flow_level == 0 => self.fetch_plain_scalar(), '%' | '@' | '`' => return Err(ScanError::new(self.mark, &format!("unexpected character: `{}'", c))), - _ => try!(self.fetch_plain_scalar()), + _ => self.fetch_plain_scalar(), } - - Ok(()) } pub fn next_token(&mut self) -> Result, ScanError> { @@ -545,7 +554,7 @@ impl> Scanner { let start_mark = self.mark; let mut string = String::new(); self.lookahead(1); - while self.ch().is_alphabetic() { + while is_alpha(self.ch()) { string.push(self.ch()); self.skip(); self.lookahead(1); @@ -591,6 +600,187 @@ impl> Scanner { unimplemented!(); } + fn fetch_tag(&mut self) -> ScanResult { + try!(self.save_simple_key()); + self.disallow_simple_key(); + + let tok = try!(self.scan_tag()); + self.tokens.push_back(tok); + Ok(()) + } + + fn scan_tag(&mut self) -> Result { + let start_mark = self.mark; + let mut handle = String::new(); + let mut suffix = String::new(); + let mut secondary = false; + + // Check if the tag is in the canonical form (verbatim). + self.lookahead(2); + + if self.buffer[1] == '<' { + // Eat '!<' + self.skip(); + self.skip(); + suffix = try!(self.scan_tag_uri(false, false, &String::new(), &start_mark)); + + if self.ch() != '>' { + return Err(ScanError::new(start_mark, + "while scanning a tag, did not find the expected '>'")); + } + + self.skip(); + } else { + // The tag has either the '!suffix' or the '!handle!suffix' + handle = try!(self.scan_tag_handle(false, &start_mark)); + // Check if it is, indeed, handle. + if handle.len() >= 2 && handle.starts_with('!') && handle.ends_with('!') { + if handle == "!!" { + secondary = true; + } + suffix = try!(self.scan_tag_uri(false, secondary, &String::new(), &start_mark)); + } else { + suffix = try!(self.scan_tag_uri(false, false, &handle, &start_mark)); + handle = "!".to_string(); + // A special case: the '!' tag. Set the handle to '' and the + // suffix to '!'. + if suffix.len() == 0 { + handle.clear(); + suffix = "!".to_string(); + } + } + } + + self.lookahead(1); + if is_blankz(self.ch()) { + // XXX: ex 7.2, an empty scalar can follow a secondary tag + Ok(Token(start_mark, TokenType::TagToken(handle, suffix))) + } else { + Err(ScanError::new(start_mark, + "while scanning a tag, did not find expected whitespace or line break")) + } + } + + fn scan_tag_handle(&mut self, directive: bool, mark: &Marker) -> Result { + let mut string = String::new(); + self.lookahead(1); + if self.ch() != '!' { + return Err(ScanError::new(*mark, + "while scanning a tag, did not find expected '!'")); + } + + string.push(self.ch()); + self.skip(); + + self.lookahead(1); + while is_alpha(self.ch()) { + string.push(self.ch()); + self.skip(); + self.lookahead(1); + } + + // Check if the trailing character is '!' and copy it. + if self.ch() == '!' { + string.push(self.ch()); + self.skip(); + } else { + // It's either the '!' tag or not really a tag handle. If it's a %TAG + // directive, it's an error. If it's a tag token, it must be a part of + // URI. + if directive && string != "!" { + return Err(ScanError::new(*mark, + "while parsing a tag directive, did not find expected '!'")); + } + } + Ok(string) + } + + fn scan_tag_uri(&mut self, directive: bool, is_secondary: bool, + head: &String, mark: &Marker) -> Result { + let mut length = head.len(); + let mut string = String::new(); + + // Copy the head if needed. + // Note that we don't copy the leading '!' character. + if length > 1 { + string.extend(head.chars().skip(1)); + } + + self.lookahead(1); + /* + * The set of characters that may appear in URI is as follows: + * + * '0'-'9', 'A'-'Z', 'a'-'z', '_', '-', ';', '/', '?', ':', '@', '&', + * '=', '+', '$', ',', '.', '!', '~', '*', '\'', '(', ')', '[', ']', + * '%'. + */ + while match self.ch() { + ';' | '/' | '?' | ':' | '@' | '&' if !is_secondary => true, + '=' | '+' | '$' | ',' | '.' | '!' | '~' | '*' | '\'' | '(' | ')' | '[' | ']' if !is_secondary => true, + '%' => true, + c if is_alpha(c) => true, + _ => false + } { + // Check if it is a URI-escape sequence. + if self.ch() == '%' { + unimplemented!(); + } else { + string.push(self.ch()); + self.skip(); + } + + length += 1; + self.lookahead(1); + } + + if length == 0 { + return Err(ScanError::new(*mark, + "while parsing a tag, did not find expected tag URI")); + } + + Ok(string) + } + + fn fetch_anchor(&mut self, alias: bool) -> ScanResult { + try!(self.save_simple_key()); + self.disallow_simple_key(); + + let tok = try!(self.scan_anchor(alias)); + + self.tokens.push_back(tok); + + Ok(()) + } + + fn scan_anchor(&mut self, alias: bool) + -> Result { + let mut string = String::new(); + let start_mark = self.mark; + + self.skip(); + self.lookahead(1); + while is_alpha(self.ch()) { + string.push(self.ch()); + self.skip(); + self.lookahead(1); + } + + if string.is_empty() + || match self.ch() { + c if is_blankz(c) => false, + '?' | ':' | ',' | ']' | '}' | '%' | '@' | '`' => false, + _ => true + } { + return Err(ScanError::new(start_mark, "while scanning an anchor or alias, did not find expected alphabetic or numeric character")); + } + + if alias { + Ok(Token(start_mark, TokenType::AliasToken(string))) + } else { + Ok(Token(start_mark, TokenType::AnchorToken(string))) + } + } + fn fetch_flow_collection_start(&mut self, tok :TokenType) -> ScanResult { // The indicators '[' and '{' may start a simple key. try!(self.save_simple_key()); diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index 33522b2..585feb9 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -14,6 +14,7 @@ pub enum Yaml { Boolean(bool), Array(self::Array), Hash(self::Hash), + Alias(usize), Null, /// Access non-exist node by Index trait will return BadValue. /// This simplifies error handling of user. @@ -45,14 +46,14 @@ impl EventReceiver for YamlLoader { _ => unreachable!() } }, - Event::SequenceStart => { + Event::SequenceStart(_) => { self.doc_stack.push(Yaml::Array(Vec::new())); }, Event::SequenceEnd => { let node = self.doc_stack.pop().unwrap(); self.insert_new_node(node); }, - Event::MappingStart => { + Event::MappingStart(_) => { self.doc_stack.push(Yaml::Hash(Hash::new())); self.key_stack.push(Yaml::BadValue); }, @@ -61,7 +62,7 @@ impl EventReceiver for YamlLoader { let node = self.doc_stack.pop().unwrap(); self.insert_new_node(node); }, - Event::Scalar(ref v, style) => { + Event::Scalar(ref v, style, _) => { let node = if style != TScalarStyle::Plain { Yaml::String(v.clone()) } else { @@ -77,6 +78,10 @@ impl EventReceiver for YamlLoader { self.insert_new_node(node); }, + Event::Alias(id) => { + // XXX(chenyh): how to handle alias? + self.insert_new_node(Yaml::Alias(id)); + } _ => { /* ignore */ } } // println!("DOC {:?}", self.doc_stack); diff --git a/saphyr/tests/spec_test.rs b/saphyr/tests/spec_test.rs index a796403..f6342a3 100644 --- a/saphyr/tests/spec_test.rs +++ b/saphyr/tests/spec_test.rs @@ -27,17 +27,18 @@ impl EventReceiver for YamlChecker { let tev = match *ev { Event::DocumentStart => TestEvent::OnDocumentStart, Event::DocumentEnd => TestEvent::OnDocumentEnd, - Event::SequenceStart => TestEvent::OnSequenceStart, + Event::SequenceStart(..) => TestEvent::OnSequenceStart, Event::SequenceEnd => TestEvent::OnSequenceEnd, - Event::MappingStart => TestEvent::OnMapStart, + Event::MappingStart(..) => TestEvent::OnMapStart, Event::MappingEnd => TestEvent::OnMapEnd, - Event::Scalar(ref v, style) => { + Event::Scalar(ref v, style, _) => { if v == "~" && style == TScalarStyle::Plain { TestEvent::OnNull } else { TestEvent::OnScalar } }, + Event::Alias(_) => TestEvent::OnAlias, _ => { return } // ignore other events }; self.evs.push(tev); diff --git a/saphyr/tests/specs/libyaml_fail-03.yaml b/saphyr/tests/specs/libyaml_fail-03.yaml new file mode 100644 index 0000000..fc821dc --- /dev/null +++ b/saphyr/tests/specs/libyaml_fail-03.yaml @@ -0,0 +1,5 @@ +# ex 7.2 +{ + foo : !!str, + !!str : bar, +} From 68810253e8c08246d744f1731c500fd3ed3c0316 Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Fri, 29 May 2015 02:26:37 +0800 Subject: [PATCH 024/380] Add alias event --- saphyr/Readme.md | 2 +- saphyr/src/parser.rs | 41 +++++++++++++++++++++++++++++---------- saphyr/tests/spec_test.rs | 5 +++++ 3 files changed, 37 insertions(+), 11 deletions(-) diff --git a/saphyr/Readme.md b/saphyr/Readme.md index fb9de9f..58ac418 100644 --- a/saphyr/Readme.md +++ b/saphyr/Readme.md @@ -8,4 +8,4 @@ The missing Rust implementation for YAML 1.2. * Tag directive * Tag data type are ignored -* Alias & Anchor +* Alias while desearilization diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs index 5608d63..fcbf37c 100644 --- a/saphyr/src/parser.rs +++ b/saphyr/src/parser.rs @@ -1,4 +1,5 @@ use scanner::*; +use std::collections::HashMap; // use yaml::*; #[derive(Clone, Copy, PartialEq, Debug, Eq)] @@ -61,6 +62,8 @@ pub struct Parser { state: State, marks: Vec, token: Option, + anchors: HashMap, + anchor_id: usize, } pub trait EventReceiver { @@ -77,6 +80,10 @@ impl> Parser { state: State::StreamStart, marks: Vec::new(), token: None, + + anchors: HashMap::new(), + // valid anchor_id starts from 1 + anchor_id: 1, } } @@ -137,6 +144,8 @@ impl> Parser { recv.on_event(&Event::StreamEnd); return Ok(()); } + // clear anchors before a new document + self.anchors.clear(); try!(self.load_document(&ev, recv)); if !multi { break; @@ -370,17 +379,33 @@ impl> Parser { Ok(Event::DocumentEnd) } + fn register_anchor(&mut self, name: &String, _: &Marker) -> Result { + // anchors can be overrided/reused + // if self.anchors.contains_key(name) { + // return Err(ScanError::new(*mark, + // "while parsing anchor, found duplicated anchor")); + // } + let new_id = self.anchor_id; + self.anchor_id += 1; + self.anchors.insert(name.clone(), new_id); + Ok(new_id) + } + fn parse_node(&mut self, block: bool, indentless_sequence: bool) -> ParseResult { let mut tok = try!(self.peek()); - let anchor_id = 0; + let mut anchor_id = 0; match tok.1 { - TokenType::AliasToken(v) => { + TokenType::AliasToken(name) => { self.pop_state(); self.skip(); // TODO(chenyh): find anchor id - return Ok(Event::Alias(0)); + match self.anchors.get(&name) { + None => return Err(ScanError::new(tok.0, "while parsing node, found unknown anchor")), + Some(id) => return Ok(Event::Alias(*id)) + } }, - TokenType::AnchorToken(..) => { + TokenType::AnchorToken(name) => { + anchor_id = try!(self.register_anchor(&name, &tok.0)); self.skip(); tok = try!(self.peek()); if let TokenType::TagToken(_, _) = tok.1 { @@ -393,7 +418,8 @@ impl> Parser { // but we haven't implemented it self.skip(); tok = try!(self.peek()); - if let TokenType::AnchorToken(_) = tok.1 { + if let TokenType::AnchorToken(name) = tok.1 { + anchor_id = try!(self.register_anchor(&name, &tok.0)); self.skip(); tok = try!(self.peek()); } @@ -724,8 +750,3 @@ impl> Parser { } } -#[cfg(test)] -mod test { - use super::*; -} - diff --git a/saphyr/tests/spec_test.rs b/saphyr/tests/spec_test.rs index f6342a3..bbb647f 100644 --- a/saphyr/tests/spec_test.rs +++ b/saphyr/tests/spec_test.rs @@ -67,3 +67,8 @@ macro_rules! assert_next { include!("specexamples.rs.inc"); include!("spec_test.rs.inc"); +// hand-crafted tests +//#[test] +//fn test_hc_alias() { +//} + From ec1cf5a223d9a705991efc15d63d9d54603f6728 Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Fri, 29 May 2015 02:57:41 +0800 Subject: [PATCH 025/380] Add scan_tag_directive_value --- saphyr/src/parser.rs | 9 ++++---- saphyr/src/scanner.rs | 49 +++++++++++++++++++++++++++++++++++++------ 2 files changed, 48 insertions(+), 10 deletions(-) diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs index fcbf37c..b2a35cb 100644 --- a/saphyr/src/parser.rs +++ b/saphyr/src/parser.rs @@ -291,7 +291,7 @@ impl> Parser { return Ok(Event::StreamEnd); }, TokenType::VersionDirectiveToken(..) - | TokenType::TagDirectiveToken + | TokenType::TagDirectiveToken(..) | TokenType::DocumentStartToken => { // explicit document self._explict_document_start() @@ -320,8 +320,9 @@ impl> Parser { // "found incompatible YAML document")); //} }, - TokenType::TagDirectiveToken => { - unimplemented!(); + TokenType::TagDirectiveToken(..) => { + // unimplemented!(); + // TODO add tag directive }, _ => break } @@ -347,7 +348,7 @@ impl> Parser { let tok = try!(self.peek()); match tok.1 { TokenType::VersionDirectiveToken(..) - |TokenType::TagDirectiveToken + |TokenType::TagDirectiveToken(..) |TokenType::DocumentStartToken |TokenType::DocumentEndToken |TokenType::StreamEndToken => { diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index ddec711..94b2639 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -56,7 +56,8 @@ pub enum TokenType { StreamEndToken, /// major, minor VersionDirectiveToken(u32, u32), - TagDirectiveToken, + /// handle, prefix + TagDirectiveToken(String, String), DocumentStartToken, DocumentEndToken, BlockSequenceStartToken, @@ -72,7 +73,7 @@ pub enum TokenType { ValueToken, AliasToken(String), AnchorToken(String), - // handle, suffix + /// handle, suffix TagToken(String, String), ScalarToken(TScalarStyle, String) } @@ -497,6 +498,7 @@ impl> Scanner { "TAG" => { try!(self.scan_tag_directive_value(&start_mark)) }, + // XXX This should be a warning instead of an error _ => return Err(ScanError::new(start_mark, "while scanning a directive, found uknown directive name")) }; @@ -597,7 +599,32 @@ impl> Scanner { } fn scan_tag_directive_value(&mut self, mark: &Marker) -> Result { - unimplemented!(); + self.lookahead(1); + /* Eat whitespaces. */ + while is_blank(self.ch()) { + self.skip(); + self.lookahead(1); + } + let handle = try!(self.scan_tag_handle(true, mark)); + + self.lookahead(1); + /* Eat whitespaces. */ + while is_blank(self.ch()) { + self.skip(); + self.lookahead(1); + } + + let is_secondary = handle == "!!"; + let prefix = try!(self.scan_tag_uri(true, is_secondary, &String::new(), mark)); + + self.lookahead(1); + + if !is_blankz(self.ch()) { + Err(ScanError::new(*mark, + "while scanning TAG, did not find expected whitespace or line break")) + } else { + Ok(Token(*mark, TokenType::TagDirectiveToken(handle, prefix))) + } } fn fetch_tag(&mut self) -> ScanResult { @@ -612,7 +639,7 @@ impl> Scanner { fn scan_tag(&mut self) -> Result { let start_mark = self.mark; let mut handle = String::new(); - let mut suffix = String::new(); + let mut suffix; let mut secondary = false; // Check if the tag is in the canonical form (verbatim). @@ -715,8 +742,8 @@ impl> Scanner { * '%'. */ while match self.ch() { - ';' | '/' | '?' | ':' | '@' | '&' if !is_secondary => true, - '=' | '+' | '$' | ',' | '.' | '!' | '~' | '*' | '\'' | '(' | ')' | '[' | ']' if !is_secondary => true, + ';' | '/' | '?' | ':' | '@' | '&' => true, + '=' | '+' | '$' | ',' | '.' | '!' | '~' | '*' | '\'' | '(' | ')' | '[' | ']' => true, '%' => true, c if is_alpha(c) => true, _ => false @@ -1853,5 +1880,15 @@ key: next!(p, StreamEndToken); end!(p); } + + #[test] + fn test_uri() { + // TODO + } + + #[test] + fn test_uri_escapes() { + // TODO + } } From 5ae4b208b21fbe5ff210b5d50150c15d6ac9032b Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Fri, 29 May 2015 03:07:32 +0800 Subject: [PATCH 026/380] Add travis CI config --- saphyr/.travis.yml | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 saphyr/.travis.yml diff --git a/saphyr/.travis.yml b/saphyr/.travis.yml new file mode 100644 index 0000000..3d713d8 --- /dev/null +++ b/saphyr/.travis.yml @@ -0,0 +1,4 @@ +language: rust +rust: + - 1.0.0 + - nightly From 064f10beeec90c46b93207064e0fca5e643a1280 Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Sat, 30 May 2015 18:49:54 +0800 Subject: [PATCH 027/380] Add scan_uri_escapes --- saphyr/src/scanner.rs | 70 ++++++++++++++++++++++++++++++++++++--- saphyr/tests/spec_test.rs | 2 +- 2 files changed, 67 insertions(+), 5 deletions(-) diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index 94b2639..2887356 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -499,8 +499,18 @@ impl> Scanner { try!(self.scan_tag_directive_value(&start_mark)) }, // XXX This should be a warning instead of an error - _ => return Err(ScanError::new(start_mark, - "while scanning a directive, found uknown directive name")) + _ => { + // skip current line + self.lookahead(1); + while !is_breakz(self.ch()) { + self.skip(); + self.lookahead(1); + } + // XXX return an empty TagDirective token + Token(start_mark, TokenType::TagDirectiveToken(String::new(), String::new())) + // return Err(ScanError::new(start_mark, + // "while scanning a directive, found unknown directive name")) + } }; self.lookahead(1); @@ -722,7 +732,7 @@ impl> Scanner { Ok(string) } - fn scan_tag_uri(&mut self, directive: bool, is_secondary: bool, + fn scan_tag_uri(&mut self, directive: bool, _is_secondary: bool, head: &String, mark: &Marker) -> Result { let mut length = head.len(); let mut string = String::new(); @@ -750,7 +760,7 @@ impl> Scanner { } { // Check if it is a URI-escape sequence. if self.ch() == '%' { - unimplemented!(); + string.push(try!(self.scan_uri_escapes(directive, mark))); } else { string.push(self.ch()); self.skip(); @@ -768,6 +778,58 @@ impl> Scanner { Ok(string) } + fn scan_uri_escapes(&mut self, _directive: bool, mark: &Marker) + -> Result { + let mut width = 0usize; + let mut code = 0u32; + loop { + self.lookahead(3); + + if !(self.ch() == '%' + && is_hex(self.buffer[1]) + && is_hex(self.buffer[2])) { + return Err(ScanError::new(*mark, + "while parsing a tag, did not find URI escaped octet")); + } + + let octet = (as_hex(self.buffer[1]) << 4) + as_hex(self.buffer[2]); + if width == 0 { + width = match octet { + _ if octet & 0x80 == 0x00 => 1, + _ if octet & 0xE0 == 0xC0 => 2, + _ if octet & 0xF0 == 0xE0 => 3, + _ if octet & 0xF8 == 0xF0 => 4, + _ => { + return Err(ScanError::new(*mark, + "while parsing a tag, found an incorrect leading UTF-8 octet")); + } + }; + code = octet; + } else { + if octet & 0xc0 != 0x80 { + return Err(ScanError::new(*mark, + "while parsing a tag, found an incorrect trailing UTF-8 octet")); + } + code = (code << 8) + octet; + } + + self.skip(); + self.skip(); + self.skip(); + + width -= 1; + if width == 0 { + break; + } + } + + match char::from_u32(code) { + Some(ch) => Ok(ch), + None => Err(ScanError::new(*mark, + "while parsing a tag, found an invalid UTF-8 codepoint")) + } + } + fn fetch_anchor(&mut self, alias: bool) -> ScanResult { try!(self.save_simple_key()); self.disallow_simple_key(); diff --git a/saphyr/tests/spec_test.rs b/saphyr/tests/spec_test.rs index bbb647f..daa38d2 100644 --- a/saphyr/tests/spec_test.rs +++ b/saphyr/tests/spec_test.rs @@ -1,9 +1,9 @@ #![allow(dead_code)] +#![allow(non_upper_case_globals)] extern crate yaml_rust; use yaml_rust::parser::{Parser, EventReceiver, Event}; use yaml_rust::scanner::TScalarStyle; -use yaml_rust::yaml::Yaml; #[derive(Clone, PartialEq, PartialOrd, Debug)] enum TestEvent { From e4862a7c8f3f238b2ef30f62ea1c837030350a3d Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Sat, 30 May 2015 22:39:50 +0800 Subject: [PATCH 028/380] Add tag:yaml.org,2002 parsing --- saphyr/src/parser.rs | 33 ++++++---- saphyr/src/scanner.rs | 1 - saphyr/src/yaml.rs | 135 ++++++++++++++++++++++++++++++++------ saphyr/tests/spec_test.rs | 2 +- 4 files changed, 137 insertions(+), 34 deletions(-) diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs index b2a35cb..b70f88e 100644 --- a/saphyr/src/parser.rs +++ b/saphyr/src/parser.rs @@ -39,7 +39,8 @@ pub enum Event { DocumentEnd, // anchor_id Alias(usize), - Scalar(String, TScalarStyle, usize), + // value, style, anchor_id, tag + Scalar(String, TScalarStyle, usize, Option), // anchor_id SequenceStart(usize), SequenceEnd, @@ -51,7 +52,11 @@ pub enum Event { impl Event { fn empty_scalar() -> Event { // a null scalar - Event::Scalar("~".to_string(), TScalarStyle::Plain, 0) + Event::Scalar("~".to_string(), TScalarStyle::Plain, 0, None) + } + + fn empty_scalar_with_anchor(anchor: usize, tag: TokenType) -> Event { + Event::Scalar("".to_string(), TScalarStyle::Plain, anchor, Some(tag)) } } @@ -121,7 +126,7 @@ impl> Parser { return Ok(Event::StreamEnd); } let ev = try!(self.state_machine()); - println!("EV {:?}", ev); + // println!("EV {:?}", ev); recv.on_event(&ev); Ok(ev) } @@ -174,7 +179,7 @@ impl> Parser { Event::Alias(..) => { Ok(()) }, - Event::Scalar(_, _, _) => { + Event::Scalar(..) => { Ok(()) }, Event::SequenceStart(_) => { @@ -218,8 +223,8 @@ impl> Parser { } fn state_machine(&mut self) -> ParseResult { - let next_tok = try!(self.peek()); - println!("cur_state {:?}, next tok: {:?}", self.state, next_tok); + // let next_tok = try!(self.peek()); + // println!("cur_state {:?}, next tok: {:?}", self.state, next_tok); match self.state { State::StreamStart => self.stream_start(), @@ -395,11 +400,11 @@ impl> Parser { fn parse_node(&mut self, block: bool, indentless_sequence: bool) -> ParseResult { let mut tok = try!(self.peek()); let mut anchor_id = 0; + let mut tag = None; match tok.1 { TokenType::AliasToken(name) => { self.pop_state(); self.skip(); - // TODO(chenyh): find anchor id match self.anchors.get(&name) { None => return Err(ScanError::new(tok.0, "while parsing node, found unknown anchor")), Some(id) => return Ok(Event::Alias(*id)) @@ -410,13 +415,13 @@ impl> Parser { self.skip(); tok = try!(self.peek()); if let TokenType::TagToken(_, _) = tok.1 { + tag = Some(tok.1); self.skip(); tok = try!(self.peek()); } }, TokenType::TagToken(..) => { - // XXX: ex 7.2, an empty scalar can follow a secondary tag - // but we haven't implemented it + tag = Some(tok.1); self.skip(); tok = try!(self.peek()); if let TokenType::AnchorToken(name) = tok.1 { @@ -435,7 +440,7 @@ impl> Parser { TokenType::ScalarToken(style, v) => { self.pop_state(); self.skip(); - Ok(Event::Scalar(v, style, anchor_id)) + Ok(Event::Scalar(v, style, anchor_id, tag)) }, TokenType::FlowSequenceStartToken => { self.state = State::FlowSequenceFirstEntry; @@ -453,6 +458,11 @@ impl> Parser { self.state = State::BlockMappingFirstKey; Ok(Event::MappingStart(anchor_id)) }, + // ex 7.2, an empty scalar can follow a secondary tag + _ if tag.is_some() || anchor_id > 0 => { + self.pop_state(); + Ok(Event::empty_scalar_with_anchor(anchor_id, tag.unwrap())) + }, _ => { Err(ScanError::new(tok.0, "while parsing a node, did not find expected node content")) } } } @@ -683,7 +693,8 @@ impl> Parser { self.skip(); tok = try!(self.peek()); match tok.1 { - TokenType::BlockEntryToken | TokenType::BlockEndToken => { + TokenType::BlockEntryToken + | TokenType::BlockEndToken => { self.state = State::BlockSequenceEntry; Ok(Event::empty_scalar()) }, diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index 2887356..0d2c86e 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -1322,7 +1322,6 @@ impl> Scanner { trailing_breaks.clear(); leading_break.clear(); } - leading_blanks = false; } else { string.extend(whitespaces.chars()); whitespaces.clear(); diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index 585feb9..ec14088 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -4,12 +4,14 @@ use std::string; use std::str::FromStr; use std::mem; use parser::*; -use scanner::{TScalarStyle, ScanError}; +use scanner::{TScalarStyle, ScanError, TokenType}; #[derive(Clone, PartialEq, PartialOrd, Debug, Eq, Ord)] pub enum Yaml { - /// number types are stored as String, and parsed on demand. - Number(string::String), + /// float types are stored as String, and parsed on demand. + /// Note that f64 does NOT implement Eq trait and can NOT be stored in BTreeMap + Real(string::String), + Integer(i64), String(string::String), Boolean(bool), Array(self::Array), @@ -33,7 +35,7 @@ pub struct YamlLoader { impl EventReceiver for YamlLoader { fn on_event(&mut self, ev: &Event) { - println!("EV {:?}", ev); + // println!("EV {:?}", ev); match *ev { Event::DocumentStart => { // do nothing @@ -62,17 +64,48 @@ impl EventReceiver for YamlLoader { let node = self.doc_stack.pop().unwrap(); self.insert_new_node(node); }, - Event::Scalar(ref v, style, _) => { + Event::Scalar(ref v, style, _, ref tag) => { let node = if style != TScalarStyle::Plain { Yaml::String(v.clone()) } else { - match v.as_ref() { - "~" => Yaml::Null, - "true" => Yaml::Boolean(true), - "false" => Yaml::Boolean(false), - // try parsing as f64 - _ if v.parse::().is_ok() => Yaml::Number(v.clone()), - _ => Yaml::String(v.clone()) + match tag { + &Some(TokenType::TagToken(ref handle, ref suffix)) => { + // XXX tag:yaml.org,2002: + if handle == "!!" { + match suffix.as_ref() { + "bool" => { + // "true" or "false" + match v.parse::() { + Err(_) => Yaml::BadValue, + Ok(v) => Yaml::Boolean(v) + } + }, + "int" => { + match v.parse::() { + Err(_) => Yaml::BadValue, + Ok(v) => Yaml::Integer(v) + } + }, + "float" => { + match v.parse::() { + Err(_) => Yaml::BadValue, + Ok(_) => Yaml::Real(v.clone()) + } + }, + "null" => { + match v.as_ref() { + "~" | "null" => Yaml::Null, + _ => Yaml::BadValue, + } + } + _ => Yaml::String(v.clone()), + } + } else { + Yaml::String(v.clone()) + } + }, + // Datatype is not specified, or unrecognized + _ => { Yaml::from_str(v.as_ref()) } } }; @@ -149,6 +182,7 @@ pub fn $name(&self) -> Option<$t> { impl Yaml { define_as!(as_bool, bool, Boolean); + define_as!(as_i64, i64, Integer); define_as_ref!(as_str, &str, String); define_as_ref!(as_hash, &Hash, Hash); @@ -168,17 +202,25 @@ impl Yaml { } } - pub fn as_number(&self) -> Option { + pub fn as_f64(&self) -> Option { match *self { - Yaml::Number(ref v) => { - v.parse::().ok() + Yaml::Real(ref v) => { + v.parse::().ok() }, _ => None } } - pub fn from_str(s: &str) -> Yaml { - Yaml::String(s.to_string()) + pub fn from_str(v: &str) -> Yaml { + match v { + "~" | "null" => Yaml::Null, + "true" => Yaml::Boolean(true), + "false" => Yaml::Boolean(false), + _ if v.parse::().is_ok() => Yaml::Integer(v.parse::().unwrap()), + // try parsing as f64 + _ if v.parse::().is_ok() => Yaml::Real(v.to_string()), + _ => Yaml::String(v.to_string()) + } } } @@ -220,9 +262,9 @@ c: [1, 2] "; let out = YamlLoader::load_from_str(&s).unwrap(); let doc = &out[0]; - assert_eq!(doc["a"].as_number::().unwrap(), 1); - assert_eq!(doc["b"].as_number::().unwrap(), 2.2f32); - assert_eq!(doc["c"][1].as_number::().unwrap(), 2); + assert_eq!(doc["a"].as_i64().unwrap(), 1i64); + assert_eq!(doc["b"].as_f64().unwrap(), 2.2f64); + assert_eq!(doc["c"][1].as_i64().unwrap(), 2i64); assert!(doc["d"][0].is_badvalue()); } @@ -246,7 +288,6 @@ a7: 你好 ".to_string(); let out = YamlLoader::load_from_str(&s).unwrap(); let doc = &out[0]; - println!("DOC {:?}", doc); assert_eq!(doc["a7"].as_str().unwrap(), "你好"); } @@ -264,5 +305,57 @@ a7: 你好 assert_eq!(out.len(), 3); } + #[test] + fn test_plain_datatype() { + let s = +" +- 'string' +- \"string\" +- string +- 123 +- -321 +- 1.23 +- -1e4 +- ~ +- null +- true +- false +- !!str 0 +- !!int 100 +- !!float 2 +- !!null ~ +- !!bool true +- !!bool false +# bad values +- !!int string +- !!float string +- !!bool null +- !!null val +"; + let out = YamlLoader::load_from_str(&s).unwrap(); + let doc = &out[0]; + + assert_eq!(doc[0].as_str().unwrap(), "string"); + assert_eq!(doc[1].as_str().unwrap(), "string"); + assert_eq!(doc[2].as_str().unwrap(), "string"); + assert_eq!(doc[3].as_i64().unwrap(), 123); + assert_eq!(doc[4].as_i64().unwrap(), -321); + assert_eq!(doc[5].as_f64().unwrap(), 1.23); + assert_eq!(doc[6].as_f64().unwrap(), -1e4); + assert!(doc[7].is_null()); + assert!(doc[8].is_null()); + assert_eq!(doc[9].as_bool().unwrap(), true); + assert_eq!(doc[10].as_bool().unwrap(), false); + assert_eq!(doc[11].as_str().unwrap(), "0"); + assert_eq!(doc[12].as_i64().unwrap(), 100); + assert_eq!(doc[13].as_f64().unwrap(), 2.0); + assert!(doc[14].is_null()); + assert_eq!(doc[15].as_bool().unwrap(), true); + assert_eq!(doc[16].as_bool().unwrap(), false); + assert!(doc[17].is_badvalue()); + assert!(doc[18].is_badvalue()); + assert!(doc[19].is_badvalue()); + assert!(doc[20].is_badvalue()); + } } diff --git a/saphyr/tests/spec_test.rs b/saphyr/tests/spec_test.rs index daa38d2..fa1d065 100644 --- a/saphyr/tests/spec_test.rs +++ b/saphyr/tests/spec_test.rs @@ -31,7 +31,7 @@ impl EventReceiver for YamlChecker { Event::SequenceEnd => TestEvent::OnSequenceEnd, Event::MappingStart(..) => TestEvent::OnMapStart, Event::MappingEnd => TestEvent::OnMapEnd, - Event::Scalar(ref v, style, _) => { + Event::Scalar(ref v, style, _, _)=> { if v == "~" && style == TScalarStyle::Plain { TestEvent::OnNull } else { From f87edaed986cc6eb83c77c54031bf36a2a3a2e0c Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Sat, 30 May 2015 23:25:16 +0800 Subject: [PATCH 029/380] Add appveyor and example --- saphyr/appveyor.yml | 13 ++++++++++ saphyr/examples/dump_yaml.rs | 46 ++++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+) create mode 100644 saphyr/appveyor.yml create mode 100644 saphyr/examples/dump_yaml.rs diff --git a/saphyr/appveyor.yml b/saphyr/appveyor.yml new file mode 100644 index 0000000..fc72b1e --- /dev/null +++ b/saphyr/appveyor.yml @@ -0,0 +1,13 @@ +install: + - ps: Start-FileDownload 'https://static.rust-lang.org/dist/rust-nightly-i686-pc-windows-gnu.exe' + - rust-nightly-i686-pc-windows-gnu.exe /VERYSILENT /NORESTART /DIR="C:\Program Files (x86)\Rust" + - SET PATH=%PATH%;C:\Program Files (x86)\Rust\bin + - SET PATH=%PATH%;C:\MinGW\bin + - rustc -V + - cargo -V + - git submodule update --init --recursive + +build: false + +test_script: + - cargo test --verbose diff --git a/saphyr/examples/dump_yaml.rs b/saphyr/examples/dump_yaml.rs new file mode 100644 index 0000000..dc335cf --- /dev/null +++ b/saphyr/examples/dump_yaml.rs @@ -0,0 +1,46 @@ +extern crate yaml_rust; + +use std::env; +use std::fs::File; +use std::io::prelude::*; +use yaml_rust::yaml; + +fn print_indent(indent: usize) { + for _ in 0..indent { + print!(" "); + } +} + +fn dump_node(doc: &yaml::Yaml, indent: usize) { + match doc { + &yaml::Yaml::Array(ref v) => { + for x in v { + dump_node(x, indent + 1) + } + }, + &yaml::Yaml::Hash(ref h) => { + for (k, v) in h { + print_indent(indent); + println!("{:?}:", k); + dump_node(v, indent + 1) + } + }, + _ => { + print_indent(indent); + println!("{:?}", doc); + } + } +} + +fn main() { + let args: Vec<_> = env::args().collect(); + let mut f = File::open(&args[1]).unwrap(); + let mut s = String::new(); + f.read_to_string(&mut s).unwrap(); + + let docs = yaml::YamlLoader::load_from_str(&s).unwrap(); + for doc in &docs { + println!("---"); + dump_node(doc, 0); + } +} From 3af90353f6aa5574318616f9d923060b03f7b53d Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Sun, 31 May 2015 00:13:21 +0800 Subject: [PATCH 030/380] Add LICENSE and build status --- saphyr/LICENSE | 21 +++++++++++++++++++++ saphyr/Readme.md | 3 +++ saphyr/examples/dump_yaml.rs | 4 ++-- saphyr/src/scanner.rs | 2 +- saphyr/tests/spec_test.rs.inc | 10 +++++----- saphyr/tests/specs/cpp2rust.rb | 14 +++++++++++++- 6 files changed, 45 insertions(+), 9 deletions(-) create mode 100644 saphyr/LICENSE diff --git a/saphyr/LICENSE b/saphyr/LICENSE new file mode 100644 index 0000000..2526547 --- /dev/null +++ b/saphyr/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2015 Chen Yuheng + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/saphyr/Readme.md b/saphyr/Readme.md index 58ac418..639b8d7 100644 --- a/saphyr/Readme.md +++ b/saphyr/Readme.md @@ -2,6 +2,9 @@ The missing Rust implementation for YAML 1.2. +[![Build Status](https://travis-ci.org/chyh1990/yaml-rust.svg?branch=master)](https://travis-ci.org/chyh1990/yaml-rust) +[![Build status](https://ci.appveyor.com/api/projects/status/scf47535ckp4ylg4?svg=true)](https://ci.appveyor.com/project/chyh1990/yaml-rust) + ## Specification Compliance ### Missing Feature diff --git a/saphyr/examples/dump_yaml.rs b/saphyr/examples/dump_yaml.rs index dc335cf..a77d676 100644 --- a/saphyr/examples/dump_yaml.rs +++ b/saphyr/examples/dump_yaml.rs @@ -15,14 +15,14 @@ fn dump_node(doc: &yaml::Yaml, indent: usize) { match doc { &yaml::Yaml::Array(ref v) => { for x in v { - dump_node(x, indent + 1) + dump_node(x, indent + 1); } }, &yaml::Yaml::Hash(ref h) => { for (k, v) in h { print_indent(indent); println!("{:?}:", k); - dump_node(v, indent + 1) + dump_node(v, indent + 1); } }, _ => { diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index 0d2c86e..8d514cc 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -214,7 +214,7 @@ impl> Scanner { } } - #[inline] + #[inline(always)] fn lookahead(&mut self, count: usize) { if self.buffer.len() >= count { return; diff --git a/saphyr/tests/spec_test.rs.inc b/saphyr/tests/spec_test.rs.inc index a359e7b..bb50b3b 100644 --- a/saphyr/tests/spec_test.rs.inc +++ b/saphyr/tests/spec_test.rs.inc @@ -928,7 +928,7 @@ fn test_ex7_1_alias_nodes() { assert_next!(v, TestEvent::OnDocumentEnd); } -#[test] +#[allow(dead_code)] fn test_ex7_2_empty_nodes() { let mut v = str_to_test_events(EX7_2).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); @@ -1018,7 +1018,7 @@ fn test_ex7_9_single_quoted_lines() { assert_next!(v, TestEvent::OnDocumentEnd); } -#[test] +#[allow(dead_code)] fn test_ex7_10_plain_characters() { let mut v = str_to_test_events(EX7_10).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); @@ -1135,7 +1135,7 @@ fn test_ex7_16_flow_mapping_entries() { assert_next!(v, TestEvent::OnDocumentEnd); } -#[test] +#[allow(dead_code)] fn test_ex7_17_flow_mapping_separate_values() { let mut v = str_to_test_events(EX7_17).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); @@ -1193,7 +1193,7 @@ fn test_ex7_20_single_pair_explicit_entry() { assert_next!(v, TestEvent::OnDocumentEnd); } -#[test] +#[allow(dead_code)] fn test_ex7_21_single_pair_implicit_entries() { let mut v = str_to_test_events(EX7_21).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); @@ -1270,7 +1270,7 @@ fn test_ex8_1_block_scalar_header() { assert_next!(v, TestEvent::OnDocumentEnd); } -#[test] +#[allow(dead_code)] fn test_ex8_2_block_indentation_header() { let mut v = str_to_test_events(EX8_2).into_iter(); assert_next!(v, TestEvent::OnDocumentStart); diff --git a/saphyr/tests/specs/cpp2rust.rb b/saphyr/tests/specs/cpp2rust.rb index 20c8575..25813c8 100755 --- a/saphyr/tests/specs/cpp2rust.rb +++ b/saphyr/tests/specs/cpp2rust.rb @@ -2,6 +2,14 @@ TEST_REGEX = /TEST_F\([a-zA-Z0-9_]+,\s+([a-zA-Z0-9_]+)\)/ +DISABLED_TESTS = %w( + test_ex7_10_plain_characters + test_ex7_17_flow_mapping_separate_values + test_ex7_21_single_pair_implicit_entries + test_ex7_2_empty_nodes + test_ex8_2_block_indentation_header +) + class Context attr_accessor :name, :ev, :src def initialize @@ -54,7 +62,11 @@ end # code gen tests.each do |t| next if t.ev.size == 0 - puts "#[test]" + if DISABLED_TESTS.include? t.name + puts "#[allow(dead_code)]" + else + puts "#[test]" + end puts "fn #{t.name}() {" puts " let mut v = str_to_test_events(#{t.src}).into_iter();" t.ev.each do |e| From fc1c15f22504fd48968e22a136c586d91a9ee5a5 Mon Sep 17 00:00:00 2001 From: Chen Yuheng Date: Sun, 31 May 2015 01:58:39 +0800 Subject: [PATCH 031/380] Update Readme.md --- saphyr/Readme.md | 92 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 89 insertions(+), 3 deletions(-) diff --git a/saphyr/Readme.md b/saphyr/Readme.md index 639b8d7..b3f8392 100644 --- a/saphyr/Readme.md +++ b/saphyr/Readme.md @@ -1,14 +1,100 @@ # yaml-rust -The missing Rust implementation for YAML 1.2. +The missing YAML 1.2 implementation for Rust. [![Build Status](https://travis-ci.org/chyh1990/yaml-rust.svg?branch=master)](https://travis-ci.org/chyh1990/yaml-rust) [![Build status](https://ci.appveyor.com/api/projects/status/scf47535ckp4ylg4?svg=true)](https://ci.appveyor.com/project/chyh1990/yaml-rust) +`yaml-rust` is a pure Rust YAML 1.2 implementation without +any FFI and crate dependencies, which enjoys the memory safe +property and other benefits from the Rust language. +The parser is havily influenced by `libyaml` and `yaml-cpp`. + +NOTE: This library is still under heavily development. + +## Quick Start + +Adding the following to the Cargo.toml in your project: + +``` +[dependencies.yaml-rust] +git = "https://github.com/chyh1990/yaml-rust.git" +``` + +and import using *extern crate*: + +```.rust +extern crate yaml_rust; +``` + +Use `yaml::YamlLoader` to load the YAML documents and access it +as Vec/HashMap: + +```.rust +extern crate yaml_rust; +use yaml_rust::yaml; + +fn main() { + let s = +" +foo: + - list1 + - list2 +bar: + - 1 + - 2.0 +"; + let docs = yaml::YamlLoader::load_from_str(s).unwrap(); + + // Multi document support, doc is a yaml::Yaml + let doc = &docs[0]; + + // Debug support + println!("{:?}", doc); + + // Index access for map & array + assert_eq!(doc["foo"][0].as_str().unwrap(), "list1"); + assert_eq!(doc["bar"][1].as_f64().unwrap(), 2.0); + + // Chained key/array access is checked and won't panic, + // return BadValue if they are not exist. + assert!(doc["INVALID_KEY"][100].is_badvalue()); +} +``` + +Note that `yaml::Yaml` implements `Index<&'a str>` & `Index`: + +* `Index` assumes the container is an Array +* `Index<&'a str>` assumes the container is a string to value Map +* otherwise, `Yaml::BadValue` is returned + +If your document does not conform to this convention (e.g. map with +complex type key), you can use the `Yaml::as_XXX` family API to access your +documents. + +## Features + +* Pure Rust +* Ruby-like Array/Hash access API +* Low-level YAML events emission + ## Specification Compliance -### Missing Feature +This implementation aims to provide YAML parser fully compatible with +the YAML 1.2 specification. The pasrser can correctly parse almost all +examples in the specification, except for the following known bugs: +* Empty plain scalar in certain contexts + +However, the widely used library `libyaml` also fails to parse these examples, +so it may not be a huge problem for most users. + +## Goals + +* Encoder * Tag directive -* Tag data type are ignored * Alias while desearilization + +## Contribution + +Fork & PR on Github. From 24d4fad5cf391f6044ee8f3c73244cdf70d099f3 Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Sun, 31 May 2015 12:56:45 +0800 Subject: [PATCH 032/380] Add basic emitter --- saphyr/Readme.md | 2 +- saphyr/src/emitter.rs | 239 ++++++++++++++++++++++++++++++++++++++++++ saphyr/src/lib.rs | 1 + 3 files changed, 241 insertions(+), 1 deletion(-) create mode 100644 saphyr/src/emitter.rs diff --git a/saphyr/Readme.md b/saphyr/Readme.md index b3f8392..85536ed 100644 --- a/saphyr/Readme.md +++ b/saphyr/Readme.md @@ -8,7 +8,7 @@ The missing YAML 1.2 implementation for Rust. `yaml-rust` is a pure Rust YAML 1.2 implementation without any FFI and crate dependencies, which enjoys the memory safe property and other benefits from the Rust language. -The parser is havily influenced by `libyaml` and `yaml-cpp`. +The parser is heavily influenced by `libyaml` and `yaml-cpp`. NOTE: This library is still under heavily development. diff --git a/saphyr/src/emitter.rs b/saphyr/src/emitter.rs new file mode 100644 index 0000000..650d921 --- /dev/null +++ b/saphyr/src/emitter.rs @@ -0,0 +1,239 @@ +use std::fmt; +use std::convert::From; +use yaml::*; + +#[derive(Copy, Clone, Debug)] +pub enum EmitError { + FmtError(fmt::Error), + BadHashmapKey, +} + +impl From for EmitError { + fn from(f: fmt::Error) -> Self { + EmitError::FmtError(f) + } +} + +pub struct YamlEmitter<'a> { + writer: &'a mut fmt::Write, + best_indent: usize, + + level: isize, +} + +pub type EmitResult = Result<(), EmitError>; + +// from serialize::json +fn escape_str(wr: &mut fmt::Write, v: &str) -> Result<(), fmt::Error> { + try!(wr.write_str("\"")); + + let mut start = 0; + + for (i, byte) in v.bytes().enumerate() { + let escaped = match byte { + b'"' => "\\\"", + b'\\' => "\\\\", + b'\x00' => "\\u0000", + b'\x01' => "\\u0001", + b'\x02' => "\\u0002", + b'\x03' => "\\u0003", + b'\x04' => "\\u0004", + b'\x05' => "\\u0005", + b'\x06' => "\\u0006", + b'\x07' => "\\u0007", + b'\x08' => "\\b", + b'\t' => "\\t", + b'\n' => "\\n", + b'\x0b' => "\\u000b", + b'\x0c' => "\\f", + b'\r' => "\\r", + b'\x0e' => "\\u000e", + b'\x0f' => "\\u000f", + b'\x10' => "\\u0010", + b'\x11' => "\\u0011", + b'\x12' => "\\u0012", + b'\x13' => "\\u0013", + b'\x14' => "\\u0014", + b'\x15' => "\\u0015", + b'\x16' => "\\u0016", + b'\x17' => "\\u0017", + b'\x18' => "\\u0018", + b'\x19' => "\\u0019", + b'\x1a' => "\\u001a", + b'\x1b' => "\\u001b", + b'\x1c' => "\\u001c", + b'\x1d' => "\\u001d", + b'\x1e' => "\\u001e", + b'\x1f' => "\\u001f", + b'\x7f' => "\\u007f", + _ => { continue; } + }; + + if start < i { + try!(wr.write_str(&v[start..i])); + } + + try!(wr.write_str(escaped)); + + start = i + 1; + } + + if start != v.len() { + try!(wr.write_str(&v[start..])); + } + + try!(wr.write_str("\"")); + Ok(()) +} + +impl<'a> YamlEmitter<'a> { + pub fn new(writer: &'a mut fmt::Write) -> YamlEmitter { + YamlEmitter { + writer: writer, + best_indent: 2, + + level: -1 + } + } + + pub fn dump(&mut self, doc: &Yaml) -> EmitResult { + // write DocumentStart + try!(write!(self.writer, "---\n")); + self.level = -1; + self.emit_node(doc) + } + + fn write_indent(&mut self) -> EmitResult { + if self.level <= 0 { return Ok(()); } + for _ in 0..self.level { + for _ in 0..self.best_indent { + try!(write!(self.writer, " ")); + } + } + Ok(()) + } + + fn emit_node(&mut self, node: &Yaml) -> EmitResult { + match node { + &Yaml::Array(ref v) => { + if v.is_empty() { + try!(write!(self.writer, "[]")); + Ok(()) + } else { + if self.level >= 0 { + try!(write!(self.writer, "\n")); + } + self.level += 1; + let mut cnt = 0usize; + for x in v { + try!(self.write_indent()); + try!(write!(self.writer, "- ")); + try!(self.emit_node(x)); + cnt += 1; + if cnt < v.len() { + try!(write!(self.writer, "\n")); + } + } + self.level -= 1; + Ok(()) + } + }, + &Yaml::Hash(ref h) => { + if h.is_empty() { + try!(self.writer.write_str("{}")); + Ok(()) + } else { + if self.level >= 0 { + try!(write!(self.writer, "\n")); + } + self.level += 1; + let mut cnt = 0usize; + for (k, v) in h { + try!(self.write_indent()); + match k { + // complex key is not supported + &Yaml::Array(_) | &Yaml::Hash(_) => { + return Err(EmitError::BadHashmapKey); + }, + _ => { try!(self.emit_node(k)); } + } + try!(write!(self.writer, ": ")); + try!(self.emit_node(v)); + cnt += 1; + if cnt < h.len() { + try!(write!(self.writer, "\n")); + } + } + self.level -= 1; + Ok(()) + } + }, + &Yaml::String(ref v) => { + try!(escape_str(self.writer, v)); + Ok(()) + }, + &Yaml::Boolean(v) => { + if v { + try!(self.writer.write_str("true")); + } else { + try!(self.writer.write_str("false")); + } + Ok(()) + }, + &Yaml::Integer(v) => { + try!(write!(self.writer, "{}", v)); + Ok(()) + }, + &Yaml::Real(ref v) => { + try!(write!(self.writer, "{}", v)); + Ok(()) + }, + &Yaml::Null | &Yaml::BadValue => { + try!(write!(self.writer, "~")); + Ok(()) + }, + // XXX(chenyh) Alias + _ => { Ok(()) } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use yaml::*; + + #[test] + fn test_emit_simple() { + let s = " +# comment +a0 bb: val +a1: + b1: 4 + b2: d +a2: 4 # i'm comment +a3: [1, 2, 3] +a4: + - - a1 + - a2 + - 2 +a5: 'single_quoted' +a6: \"double_quoted\" +a7: 你好 +'key 1': \"ddd\\\tbbb\" +"; + + let docs = YamlLoader::load_from_str(&s).unwrap(); + let doc = &docs[0]; + let mut writer = String::new(); + { + let mut emitter = YamlEmitter::new(&mut writer); + emitter.dump(doc).unwrap(); + } + let docs_new = YamlLoader::load_from_str(&s).unwrap(); + let doc_new = &docs_new[0]; + + println!("{}", writer); + assert_eq!(doc, doc_new); + } +} diff --git a/saphyr/src/lib.rs b/saphyr/src/lib.rs index 6dd2656..09a6542 100644 --- a/saphyr/src/lib.rs +++ b/saphyr/src/lib.rs @@ -1,6 +1,7 @@ pub mod yaml; pub mod scanner; pub mod parser; +pub mod emitter; #[test] fn it_works() { From 83a57e7ada53cd7ab7cebfb6e59c0b713402ef5b Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Sun, 31 May 2015 17:02:22 +0800 Subject: [PATCH 033/380] Export API --- saphyr/src/emitter.rs | 1 - saphyr/src/lib.rs | 64 +++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 62 insertions(+), 3 deletions(-) diff --git a/saphyr/src/emitter.rs b/saphyr/src/emitter.rs index 650d921..4601ba2 100644 --- a/saphyr/src/emitter.rs +++ b/saphyr/src/emitter.rs @@ -233,7 +233,6 @@ a7: 你好 let docs_new = YamlLoader::load_from_str(&s).unwrap(); let doc_new = &docs_new[0]; - println!("{}", writer); assert_eq!(doc, doc_new); } } diff --git a/saphyr/src/lib.rs b/saphyr/src/lib.rs index 09a6542..120adb2 100644 --- a/saphyr/src/lib.rs +++ b/saphyr/src/lib.rs @@ -3,6 +3,66 @@ pub mod scanner; pub mod parser; pub mod emitter; -#[test] -fn it_works() { +// reexport key APIs +pub use scanner::ScanError; +pub use parser::Event; +pub use yaml::{Yaml, YamlLoader}; +pub use emitter::{YamlEmitter, EmitError}; + +#[cfg(test)] +mod tests { + use super::*; + #[test] + fn test_api() { + let s = +" +# from yaml-cpp example +- name: Ogre + position: [0, 5, 0] + powers: + - name: Club + damage: 10 + - name: Fist + damage: 8 +- name: Dragon + position: [1, 0, 10] + powers: + - name: Fire Breath + damage: 25 + - name: Claws + damage: 15 +- name: Wizard + position: [5, -3, 0] + powers: + - name: Acid Rain + damage: 50 + - name: Staff + damage: 3 +"; + let docs = YamlLoader::load_from_str(s).unwrap(); + let doc = &docs[0]; + + assert_eq!(doc[0]["name"].as_str().unwrap(), "Ogre"); + + let mut writer = String::new(); + { + let mut emitter = YamlEmitter::new(&mut writer); + emitter.dump(doc).unwrap(); + } + + assert!(writer.len() > 0); + } + + #[test] + fn test_fail() { + let s = +" +# syntax error +scalar +key: [1, 2]] +key1:a2 +"; + assert!(YamlLoader::load_from_str(s).is_err()); + } + } From 51f3a66dd250d8b1327fac35ed8dfa5b505723a7 Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Sun, 31 May 2015 17:59:43 +0800 Subject: [PATCH 034/380] Add document --- saphyr/.travis.yml | 5 +++++ saphyr/src/lib.rs | 39 +++++++++++++++++++++++++++++++++++++++ saphyr/src/parser.rs | 13 ++++++++----- saphyr/src/yaml.rs | 28 ++++++++++++++++++++++++++-- 4 files changed, 78 insertions(+), 7 deletions(-) diff --git a/saphyr/.travis.yml b/saphyr/.travis.yml index 3d713d8..7fbcb0a 100644 --- a/saphyr/.travis.yml +++ b/saphyr/.travis.yml @@ -2,3 +2,8 @@ language: rust rust: - 1.0.0 - nightly +env: + global: + - secure: ZUcdcbS8xbpdII9FSPx7VtoVhEkJhWL2Hb75tDlKDHNhfXqmt1NyB9q/2qXJ5Ulp4MnYXwsI8LsDloR6gvdB4xElay3smuF/neGvMjrqcB15/2p0MSQ+kZjMsNB6mlb5kAlm8ahduXIscppmw/V+m5hn3Vo+RQz/Ng+pzv0nc8KEXPMYrfRFg+a7FaeIbRbb8ir9EfflUSqArLq2hbi2WdhM3hFMcCIAUt6DD4x5ubjEg60OnIof5FDu0mXMXzQvUfHWOeYnsNcD/DLyDnm6FuQEzk37M4EB8op2SdBUeQMQ5abR3i2rd//DZpbTTEjud0PseWohGAwTwL2aoFrqs7uYQMx+vcGlOzAyDUm4VemVUa3F2BECdzU5BiujcKOITJEVUYWongld93arQq34FuXG/TO/T1XrerxfG6LTkTkKS5Vz7W8z6Rloa99WrQLJg1ZJP6itEU7G7KsDFVgRhsg7rz4/dV/2+cV4UvIwd4HlGXKCFlH0SClqvM3/7i/qqCD0689SJW6Zip+ly38MXlGy2s/AmReEasXvFer9JkOEIuPa8QTBNAjDlw7bWXi6neQWBIZU1VhZcSssnrVmEFN8fNklShzpw5DyKCv8jPTx2O6Dw8B/LgIK8uo+eaTXiO6zz/T1c/qEdsYslvxPA2D3F+ONpPU7238ykT4eRog= +after_script: + - curl http://www.rust-ci.org/artifacts/put?t=$RUSTCI_TOKEN | sh diff --git a/saphyr/src/lib.rs b/saphyr/src/lib.rs index 120adb2..c301ab9 100644 --- a/saphyr/src/lib.rs +++ b/saphyr/src/lib.rs @@ -1,3 +1,42 @@ +// Copyright 2015, Yuheng Chen. See the LICENSE file at the top-level +// directory of this distribution. + +//! YAML 1.2 implementation in pure Rust. +//! +//! # Usage +//! +//! This crate is [on github](https://github.com/chyh1990/yaml-rust) and can be +//! used by adding `yaml-rust` to the dependencies in your project's `Cargo.toml`. +//! +//! ```toml +//! [dependencies.yaml-rust] +//! git = "https://github.com/chyh1990/yaml-rust.git" +//! ``` +//! +//! And this in your crate root: +//! +//! ```rust +//! extern crate yaml_rust; +//! ``` +//! +//! Parse a string into `Vec` and then serialize it as a YAML string. +//! +//! # Examples +//! +//! ``` +//! use yaml_rust::{YamlLoader, YamlEmitter}; +//! +//! let docs = YamlLoader::load_from_str("[1, 2, 3]").unwrap(); +//! let doc = &docs[0]; // select the first document +//! assert_eq!(doc[0].as_i64().unwrap(), 1); // access elements by index +//! +//! let mut out_str = String::new(); +//! let mut emitter = YamlEmitter::new(&mut out_str); +//! emitter.dump(doc).unwrap(); // dump the YAML object to a String +//! +//! ``` + + pub mod yaml; pub mod scanner; pub mod parser; diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs index b70f88e..51001a8 100644 --- a/saphyr/src/parser.rs +++ b/saphyr/src/parser.rs @@ -3,7 +3,7 @@ use std::collections::HashMap; // use yaml::*; #[derive(Clone, Copy, PartialEq, Debug, Eq)] -pub enum State { +enum State { StreamStart, ImplicitDocumentStart, DocumentStart, @@ -30,21 +30,24 @@ pub enum State { End } +/// `Event` is used with the low-level event base parsing API, +/// see `EventReceiver` trait. #[derive(Clone, PartialEq, Debug, Eq)] pub enum Event { + /// Reserved for internal use NoEvent, StreamStart, StreamEnd, DocumentStart, DocumentEnd, - // anchor_id + /// Refer to an anchor ID Alias(usize), - // value, style, anchor_id, tag + /// Value, style, anchor_id, tag Scalar(String, TScalarStyle, usize, Option), - // anchor_id + /// Anchor ID SequenceStart(usize), SequenceEnd, - // anchor_id + /// Anchor ID MappingStart(usize), MappingEnd } diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index ec14088..252a3b4 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -6,20 +6,44 @@ use std::mem; use parser::*; use scanner::{TScalarStyle, ScanError, TokenType}; +/// An YAML node is store as this `Yaml` enumeration, it provides an easy way to +/// access your YAML document. +/// +/// # Examples +/// +/// ``` +/// use yaml_rust::Yaml; +/// let foo = Yaml::from_str("-123"); // convert the string to the appropriate YAML type +/// assert_eq!(foo.as_i64().unwrap(), -123); +/// +/// // iterator over an Array +/// let vec = Yaml::Array(vec![Yaml::Integer(1), Yaml::Integer(2)]); +/// for v in vec.as_vec().unwrap() { +/// assert!(v.as_i64().is_some()); +/// } +/// ``` #[derive(Clone, PartialEq, PartialOrd, Debug, Eq, Ord)] pub enum Yaml { /// float types are stored as String, and parsed on demand. /// Note that f64 does NOT implement Eq trait and can NOT be stored in BTreeMap Real(string::String), + /// Yaml int is stored as i64. Integer(i64), + /// Yaml scalar. String(string::String), + /// Yaml bool, e.g. `true` or `false`. Boolean(bool), + /// Yaml array, can be access as a `Vec`. Array(self::Array), + /// Yaml hash, can be access as a `BTreeMap`. Hash(self::Hash), + /// Alias, not fully supported yet. Alias(usize), + /// Yaml bool, e.g. `null` or `~`. Null, - /// Access non-exist node by Index trait will return BadValue. - /// This simplifies error handling of user. + /// Access non-exist node by Index trait will return `BadValue`. + /// This simplifies error handling of user. Invalid type conversion + /// also return `BadValue`. BadValue, } From 464200438c829846df143ca4183e98370c01448a Mon Sep 17 00:00:00 2001 From: Chen Yuheng Date: Mon, 1 Jun 2015 23:01:50 +0800 Subject: [PATCH 035/380] Update Readme.md --- saphyr/Readme.md | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/saphyr/Readme.md b/saphyr/Readme.md index 85536ed..43e6f04 100644 --- a/saphyr/Readme.md +++ b/saphyr/Readme.md @@ -10,6 +10,9 @@ any FFI and crate dependencies, which enjoys the memory safe property and other benefits from the Rust language. The parser is heavily influenced by `libyaml` and `yaml-cpp`. +This crate works on all Rust supported platforms and +Rust 1.0.0 and nightly! + NOTE: This library is still under heavily development. ## Quick Start @@ -32,7 +35,7 @@ as Vec/HashMap: ```.rust extern crate yaml_rust; -use yaml_rust::yaml; +use yaml_rust::{YamlLoader, YamlEmitter}; fn main() { let s = @@ -44,7 +47,7 @@ bar: - 1 - 2.0 "; - let docs = yaml::YamlLoader::load_from_str(s).unwrap(); + let docs = YamlLoader::load_from_str(s).unwrap(); // Multi document support, doc is a yaml::Yaml let doc = &docs[0]; @@ -59,6 +62,14 @@ bar: // Chained key/array access is checked and won't panic, // return BadValue if they are not exist. assert!(doc["INVALID_KEY"][100].is_badvalue()); + + // Dump the YAML object + let mut out_str = String::new(); + { + let mut emitter = YamlEmitter::new(&mut out_str); + emitter.dump(doc).unwrap(); // dump the YAML object to a String + } + println!("{}", out_str); } ``` From d5debe4da237ab34a77bae08c240b5f2a57d2546 Mon Sep 17 00:00:00 2001 From: Chen Yuheng Date: Mon, 1 Jun 2015 23:35:31 +0800 Subject: [PATCH 036/380] Update Readme.md --- saphyr/Readme.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/saphyr/Readme.md b/saphyr/Readme.md index 43e6f04..d642c0b 100644 --- a/saphyr/Readme.md +++ b/saphyr/Readme.md @@ -13,6 +13,8 @@ The parser is heavily influenced by `libyaml` and `yaml-cpp`. This crate works on all Rust supported platforms and Rust 1.0.0 and nightly! +See [Document](http://chyh1990.github.io/yaml-rust/doc/yaml_rust/) + NOTE: This library is still under heavily development. ## Quick Start From 97747a719c901edf8c5615a398957573242d6ac8 Mon Sep 17 00:00:00 2001 From: Chen Yuheng Date: Tue, 2 Jun 2015 00:53:32 +0800 Subject: [PATCH 037/380] Update Readme.md --- saphyr/Readme.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/saphyr/Readme.md b/saphyr/Readme.md index d642c0b..5758fc2 100644 --- a/saphyr/Readme.md +++ b/saphyr/Readme.md @@ -15,7 +15,10 @@ Rust 1.0.0 and nightly! See [Document](http://chyh1990.github.io/yaml-rust/doc/yaml_rust/) -NOTE: This library is still under heavily development. +> NOTE: This library is still under heavily development. + +> WARNING: This library needs more tests and it is NOT ready for +> parsing arbitrary user input from *untrusted source*. ## Quick Start From d40b7cbdd29a4fc7225da79a2cdfe4489d2ae115 Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Thu, 4 Jun 2015 16:10:43 +0800 Subject: [PATCH 038/380] Better code coverage --- saphyr/src/emitter.rs | 2 ++ saphyr/src/parser.rs | 5 +++-- saphyr/src/scanner.rs | 16 ++++++++++++++++ 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/saphyr/src/emitter.rs b/saphyr/src/emitter.rs index 4601ba2..8810c95 100644 --- a/saphyr/src/emitter.rs +++ b/saphyr/src/emitter.rs @@ -217,6 +217,8 @@ a4: - - a1 - a2 - 2 + - [] + - {} a5: 'single_quoted' a6: \"double_quoted\" a7: 你好 diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs index 51001a8..f097a80 100644 --- a/saphyr/src/parser.rs +++ b/saphyr/src/parser.rs @@ -259,8 +259,10 @@ impl> Parser { State::FlowSequenceEntryMappingKey => self.flow_sequence_entry_mapping_key(), State::FlowSequenceEntryMappingValue => self.flow_sequence_entry_mapping_value(), State::FlowSequenceEntryMappingEnd => self.flow_sequence_entry_mapping_end(), + State::FlowMappingEmptyValue => self.flow_mapping_value(true), - _ => unimplemented!() + /* impossible */ + State::End => unreachable!(), } } @@ -329,7 +331,6 @@ impl> Parser { //} }, TokenType::TagDirectiveToken(..) => { - // unimplemented!(); // TODO add tag directive }, _ => break diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index 8d514cc..470a25d 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -1942,6 +1942,22 @@ key: end!(p); } + #[test] + fn test_scanner_cr() { + let s = "---\r\n- tok1\r\n- tok2"; + let mut p = Scanner::new(s.chars()); + next!(p, StreamStartToken(..)); + next!(p, DocumentStartToken); + next!(p, BlockSequenceStartToken); + next!(p, BlockEntryToken); + next_scalar!(p, TScalarStyle::Plain, "tok1"); + next!(p, BlockEntryToken); + next_scalar!(p, TScalarStyle::Plain, "tok2"); + next!(p, BlockEndToken); + next!(p, StreamEndToken); + end!(p); + } + #[test] fn test_uri() { // TODO From aa5eae631849b7ab50d4caa5496c12a0b9618bd7 Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Thu, 4 Jun 2015 16:28:01 +0800 Subject: [PATCH 039/380] Bump to 0.2.0 --- saphyr/Cargo.toml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/saphyr/Cargo.toml b/saphyr/Cargo.toml index d7dd718..fa04339 100644 --- a/saphyr/Cargo.toml +++ b/saphyr/Cargo.toml @@ -1,4 +1,6 @@ [package] name = "yaml-rust" -version = "0.1.0" +version = "0.2.0" authors = ["Yuheng Chen "] +homepage = "http://chyh1990.github.io/yaml-rust/" +documentation = "http://chyh1990.github.io/yaml-rust/doc/yaml_rust/" From 5217f95091a7d2ee3c4ce44713d6499ab7b2017b Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Thu, 4 Jun 2015 16:30:31 +0800 Subject: [PATCH 040/380] Add license and description to Cargo.toml --- saphyr/Cargo.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/saphyr/Cargo.toml b/saphyr/Cargo.toml index fa04339..7f36ff9 100644 --- a/saphyr/Cargo.toml +++ b/saphyr/Cargo.toml @@ -4,3 +4,5 @@ version = "0.2.0" authors = ["Yuheng Chen "] homepage = "http://chyh1990.github.io/yaml-rust/" documentation = "http://chyh1990.github.io/yaml-rust/doc/yaml_rust/" +license-file = "LICENSE" +description = "The missing YAML 1.2 parser for rust" From 2a86545adf7e8f18ab90916c429f55bb76bbb476 Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Thu, 4 Jun 2015 16:33:55 +0800 Subject: [PATCH 041/380] Add repository to Cargo.toml --- saphyr/Cargo.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/saphyr/Cargo.toml b/saphyr/Cargo.toml index 7f36ff9..1a52afd 100644 --- a/saphyr/Cargo.toml +++ b/saphyr/Cargo.toml @@ -4,5 +4,6 @@ version = "0.2.0" authors = ["Yuheng Chen "] homepage = "http://chyh1990.github.io/yaml-rust/" documentation = "http://chyh1990.github.io/yaml-rust/doc/yaml_rust/" -license-file = "LICENSE" +license = "MIT" description = "The missing YAML 1.2 parser for rust" +repository = "https://github.com/chyh1990/yaml-rust" From 3c9da807e576b0f06fd524efd8da5f98c333b474 Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Thu, 4 Jun 2015 16:39:53 +0800 Subject: [PATCH 042/380] Update Readme --- saphyr/Readme.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/saphyr/Readme.md b/saphyr/Readme.md index 5758fc2..73f8a3e 100644 --- a/saphyr/Readme.md +++ b/saphyr/Readme.md @@ -24,6 +24,13 @@ See [Document](http://chyh1990.github.io/yaml-rust/doc/yaml_rust/) Adding the following to the Cargo.toml in your project: +``` +[dependencies] +yaml-rust = "*" +``` + +or + ``` [dependencies.yaml-rust] git = "https://github.com/chyh1990/yaml-rust.git" From 34f81af1013693702f4a9b438ee724b46013e9e7 Mon Sep 17 00:00:00 2001 From: m-r-r Date: Thu, 18 Jun 2015 23:49:28 +0200 Subject: [PATCH 043/380] Implementation of std::scanner::Error for ScanError --- saphyr/src/scanner.rs | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index 470a25d..f78ae89 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -1,5 +1,6 @@ use std::collections::VecDeque; -use std::char; +use std::{char, fmt}; +use std::error::Error; #[derive(Clone, Copy, PartialEq, Debug, Eq)] pub enum TEncoding { @@ -49,6 +50,22 @@ impl ScanError { } } +impl Error for ScanError { + fn description(&self) -> &str { + self.info.as_ref() + } + + fn cause(&self) -> Option<&Error> { + None + } +} + +impl fmt::Display for ScanError { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + self.info.fmt(formatter) + } +} + #[derive(Clone, PartialEq, Debug, Eq)] pub enum TokenType { NoToken, From 2f7195250edb0c8f1bd1158f834ae54486e8299b Mon Sep 17 00:00:00 2001 From: Sergey Pchelincev Date: Tue, 23 Jun 2015 17:09:41 +0300 Subject: [PATCH 044/380] Update Readme.md --- saphyr/Readme.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/saphyr/Readme.md b/saphyr/Readme.md index 73f8a3e..70b5019 100644 --- a/saphyr/Readme.md +++ b/saphyr/Readme.md @@ -104,7 +104,7 @@ documents. ## Specification Compliance This implementation aims to provide YAML parser fully compatible with -the YAML 1.2 specification. The pasrser can correctly parse almost all +the YAML 1.2 specification. The parser can correctly parse almost all examples in the specification, except for the following known bugs: * Empty plain scalar in certain contexts From 87539f852da4b4cd998238d9b6fed617ab0c5f64 Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Tue, 30 Jun 2015 00:31:22 +0800 Subject: [PATCH 045/380] remove trailing space --- saphyr/src/lib.rs | 3 +-- saphyr/src/parser.rs | 7 +++---- saphyr/src/scanner.rs | 33 ++++++++++++++++----------------- saphyr/src/yaml.rs | 11 ++++------- saphyr/tests/spec_test.rs | 1 - 5 files changed, 24 insertions(+), 31 deletions(-) diff --git a/saphyr/src/lib.rs b/saphyr/src/lib.rs index c301ab9..d18f85e 100644 --- a/saphyr/src/lib.rs +++ b/saphyr/src/lib.rs @@ -29,14 +29,13 @@ //! let docs = YamlLoader::load_from_str("[1, 2, 3]").unwrap(); //! let doc = &docs[0]; // select the first document //! assert_eq!(doc[0].as_i64().unwrap(), 1); // access elements by index -//! +//! //! let mut out_str = String::new(); //! let mut emitter = YamlEmitter::new(&mut out_str); //! emitter.dump(doc).unwrap(); // dump the YAML object to a String //! //! ``` - pub mod yaml; pub mod scanner; pub mod parser; diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs index f097a80..9a11abf 100644 --- a/saphyr/src/parser.rs +++ b/saphyr/src/parser.rs @@ -102,7 +102,7 @@ impl> Parser { if self.token.is_none() { match self.scanner.get_error() { None => - return Err(ScanError::new(self.scanner.mark(), + return Err(ScanError::new(self.scanner.mark(), "unexpected eof")), Some(e) => return Err(e), } @@ -179,7 +179,7 @@ impl> Parser { fn load_node(&mut self, first_ev: &Event, recv: &mut R) -> Result<(), ScanError> { match *first_ev { - Event::Alias(..) => { + Event::Alias(..) => { Ok(()) }, Event::Scalar(..) => { @@ -600,7 +600,7 @@ impl> Parser { self.skip(); let tok = try!(self.peek()); match tok.1 { - TokenType::FlowEntryToken + TokenType::FlowEntryToken | TokenType::FlowMappingEndToken => { }, _ => { self.push_state(State::FlowMappingKey); @@ -765,4 +765,3 @@ impl> Parser { Ok(Event::MappingEnd) } } - diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index f78ae89..8811acf 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -62,7 +62,7 @@ impl Error for ScanError { impl fmt::Display for ScanError { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - self.info.fmt(formatter) + self.info.fmt(formatter) } } @@ -590,12 +590,12 @@ impl> Scanner { } if string.is_empty() { - return Err(ScanError::new(start_mark, + return Err(ScanError::new(start_mark, "while scanning a directive, could not find expected directive name")); } if !is_blankz(self.ch()) { - return Err(ScanError::new(start_mark, + return Err(ScanError::new(start_mark, "while scanning a directive, found unexpected non-alphabetical character")); } @@ -685,7 +685,7 @@ impl> Scanner { self.skip(); } else { - // The tag has either the '!suffix' or the '!handle!suffix' + // The tag has either the '!suffix' or the '!handle!suffix' handle = try!(self.scan_tag_handle(false, &start_mark)); // Check if it is, indeed, handle. if handle.len() >= 2 && handle.starts_with('!') && handle.ends_with('!') { @@ -802,7 +802,7 @@ impl> Scanner { loop { self.lookahead(3); - if !(self.ch() == '%' + if !(self.ch() == '%' && is_hex(self.buffer[1]) && is_hex(self.buffer[2])) { return Err(ScanError::new(*mark, @@ -961,7 +961,7 @@ impl> Scanner { self.tokens.push_back(Token(start_mark, TokenType::BlockEntryToken)); Ok(()) } - + fn fetch_document_indicator(&mut self, t: TokenType) -> ScanResult { self.unroll_indent(-1); try!(self.remove_simple_key()); @@ -1068,7 +1068,7 @@ impl> Scanner { } // Scan the leading line breaks and determine the indentation level if needed. try!(self.block_scalar_breaks(&mut indent, &mut trailing_breaks)); - + self.lookahead(1); let start_mark = self.mark; @@ -1138,7 +1138,7 @@ impl> Scanner { // Check for a tab character messing the intendation. if (*indent == 0 || self.mark.col < *indent) && self.buffer[0] == '\t' { - return Err(ScanError::new(self.mark, + return Err(ScanError::new(self.mark, "while scanning a block scalar, found a tab character where an intendation space is expected")); } @@ -1197,12 +1197,12 @@ impl> Scanner { (self.buffer[1] == '.') && (self.buffer[2] == '.')) && is_blankz(self.buffer[3]) { - return Err(ScanError::new(start_mark, + return Err(ScanError::new(start_mark, "while scanning a quoted scalar, found unexpected document indicator")); } if is_z(self.ch()) { - return Err(ScanError::new(start_mark, + return Err(ScanError::new(start_mark, "while scanning a quoted scalar, found unexpected end of stream")); } @@ -1507,7 +1507,7 @@ impl> Scanner { // insert simple key let tok = Token(sk.mark, TokenType::KeyToken); let tokens_parsed = self.tokens_parsed; - self.insert_token(sk.token_number - tokens_parsed, tok); + self.insert_token(sk.token_number - tokens_parsed, tok); // Add the BLOCK-MAPPING-START token if needed. self.roll_indent(sk.mark.col, Some(sk.token_number), @@ -1653,7 +1653,7 @@ macro_rules! end { #[test] fn test_explicit_scalar() { - let s = + let s = "--- 'a scalar' ... @@ -1669,7 +1669,7 @@ macro_rules! end { #[test] fn test_multiple_documents() { - let s = + let s = " 'a scalar' --- @@ -1706,7 +1706,7 @@ macro_rules! end { #[test] fn test_a_flow_mapping() { - let s = + let s = " { a simple key: a value, # Note that the KEY token is produced. @@ -1733,7 +1733,7 @@ macro_rules! end { #[test] fn test_block_sequences() { - let s = + let s = " - item 1 - item 2 @@ -1776,7 +1776,7 @@ macro_rules! end { #[test] fn test_block_mappings() { - let s = + let s = " a simple key: a value # The KEY token is produced here. ? a complex key @@ -1985,4 +1985,3 @@ key: // TODO } } - diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index 252a3b4..f1f06c4 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -8,14 +8,14 @@ use scanner::{TScalarStyle, ScanError, TokenType}; /// An YAML node is store as this `Yaml` enumeration, it provides an easy way to /// access your YAML document. -/// +/// /// # Examples -/// +/// /// ``` /// use yaml_rust::Yaml; /// let foo = Yaml::from_str("-123"); // convert the string to the appropriate YAML type /// assert_eq!(foo.as_i64().unwrap(), -123); -/// +/// /// // iterator over an Array /// let vec = Yaml::Array(vec![Yaml::Integer(1), Yaml::Integer(2)]); /// for v in vec.as_vec().unwrap() { @@ -272,8 +272,6 @@ impl Index for Yaml { } } - - #[cfg(test)] mod test { use yaml::*; @@ -317,7 +315,7 @@ a7: 你好 #[test] fn test_multi_doc() { - let s = + let s = " 'a scalar' --- @@ -382,4 +380,3 @@ a7: 你好 assert!(doc[20].is_badvalue()); } } - diff --git a/saphyr/tests/spec_test.rs b/saphyr/tests/spec_test.rs index fa1d065..a79c051 100644 --- a/saphyr/tests/spec_test.rs +++ b/saphyr/tests/spec_test.rs @@ -71,4 +71,3 @@ include!("spec_test.rs.inc"); //#[test] //fn test_hc_alias() { //} - From 79d6b5ec84f352aed90bdfe1ca5532883c2813f3 Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Tue, 30 Jun 2015 00:44:40 +0800 Subject: [PATCH 046/380] Add rust 1.1.0 to travis config --- saphyr/.travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/saphyr/.travis.yml b/saphyr/.travis.yml index 7fbcb0a..7b1d66e 100644 --- a/saphyr/.travis.yml +++ b/saphyr/.travis.yml @@ -1,6 +1,7 @@ language: rust rust: - 1.0.0 + - 1.1.0 - nightly env: global: From e8f754bb9ed2026fb2e51de6e8700dbddae905c3 Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Thu, 2 Jul 2015 23:46:04 +0800 Subject: [PATCH 047/380] Display error line and col number for ScanError --- saphyr/src/lib.rs | 7 +++++++ saphyr/src/scanner.rs | 4 +++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/saphyr/src/lib.rs b/saphyr/src/lib.rs index d18f85e..1cba925 100644 --- a/saphyr/src/lib.rs +++ b/saphyr/src/lib.rs @@ -50,6 +50,7 @@ pub use emitter::{YamlEmitter, EmitError}; #[cfg(test)] mod tests { use super::*; + #[test] fn test_api() { let s = @@ -91,6 +92,11 @@ mod tests { assert!(writer.len() > 0); } + fn try_fail(s: &str) -> Result, ScanError> { + let t = try!(YamlLoader::load_from_str(s)); + Ok(t) + } + #[test] fn test_fail() { let s = @@ -101,6 +107,7 @@ key: [1, 2]] key1:a2 "; assert!(YamlLoader::load_from_str(s).is_err()); + assert!(try_fail(s).is_err()); } } diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index 8811acf..03f717d 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -61,8 +61,10 @@ impl Error for ScanError { } impl fmt::Display for ScanError { + // col starts from 0 fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - self.info.fmt(formatter) + write!(formatter, "{} at line {} column {}", self.info, + self.mark.line, self.mark.col + 1) } } From 628cf4dd36a6aa696f56e45f2cf8d23a34153e2f Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Fri, 3 Jul 2015 00:03:29 +0800 Subject: [PATCH 048/380] Bump to v0.2.1 --- saphyr/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/saphyr/Cargo.toml b/saphyr/Cargo.toml index 1a52afd..a511b62 100644 --- a/saphyr/Cargo.toml +++ b/saphyr/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "yaml-rust" -version = "0.2.0" +version = "0.2.1" authors = ["Yuheng Chen "] homepage = "http://chyh1990.github.io/yaml-rust/" documentation = "http://chyh1990.github.io/yaml-rust/doc/yaml_rust/" From a29b0c12f64175fdbc3748334d032d5e028b3ef1 Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Tue, 15 Sep 2015 15:27:32 +0800 Subject: [PATCH 049/380] Fix CR/LF in literal scalar --- saphyr/src/scanner.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index 03f717d..4cfe2bc 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -1099,9 +1099,11 @@ impl> Scanner { self.skip(); self.lookahead(1); } + // break on EOF + if is_z(self.ch()) { break; } self.lookahead(2); - self.skip_line(); + self.read_break(&mut leading_break); // Eat the following intendation spaces and line breaks. try!(self.block_scalar_breaks(&mut indent, &mut trailing_breaks)); From de8e94ab34924597e9f58a88290b2d5f1c2121e6 Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Thu, 17 Sep 2015 18:08:58 +0800 Subject: [PATCH 050/380] Bump to 0.2.2 --- saphyr/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/saphyr/Cargo.toml b/saphyr/Cargo.toml index a511b62..a120413 100644 --- a/saphyr/Cargo.toml +++ b/saphyr/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "yaml-rust" -version = "0.2.1" +version = "0.2.2" authors = ["Yuheng Chen "] homepage = "http://chyh1990.github.io/yaml-rust/" documentation = "http://chyh1990.github.io/yaml-rust/doc/yaml_rust/" From 071d338e0e6c391124389f8d606a338310a4015d Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Wed, 16 Dec 2015 15:10:02 +0800 Subject: [PATCH 051/380] Add alias deserialize support --- saphyr/.travis.yml | 1 + saphyr/src/yaml.rs | 69 ++++++++++++++++++++++++++++++++++++---------- 2 files changed, 55 insertions(+), 15 deletions(-) diff --git a/saphyr/.travis.yml b/saphyr/.travis.yml index 7b1d66e..5e1e170 100644 --- a/saphyr/.travis.yml +++ b/saphyr/.travis.yml @@ -2,6 +2,7 @@ language: rust rust: - 1.0.0 - 1.1.0 + - 1.5.0 - nightly env: global: diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index f1f06c4..3e8f820 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -53,8 +53,10 @@ pub type Hash = BTreeMap; pub struct YamlLoader { docs: Vec, // states - doc_stack: Vec, + // (current node, anchor_id) tuple + doc_stack: Vec<(Yaml, usize)>, key_stack: Vec, + anchor_map: BTreeMap, } impl EventReceiver for YamlLoader { @@ -68,19 +70,19 @@ impl EventReceiver for YamlLoader { match self.doc_stack.len() { // empty document 0 => self.docs.push(Yaml::BadValue), - 1 => self.docs.push(self.doc_stack.pop().unwrap()), + 1 => self.docs.push(self.doc_stack.pop().unwrap().0), _ => unreachable!() } }, - Event::SequenceStart(_) => { - self.doc_stack.push(Yaml::Array(Vec::new())); + Event::SequenceStart(aid) => { + self.doc_stack.push((Yaml::Array(Vec::new()), aid)); }, Event::SequenceEnd => { let node = self.doc_stack.pop().unwrap(); self.insert_new_node(node); }, - Event::MappingStart(_) => { - self.doc_stack.push(Yaml::Hash(Hash::new())); + Event::MappingStart(aid) => { + self.doc_stack.push((Yaml::Hash(Hash::new()), aid)); self.key_stack.push(Yaml::BadValue); }, Event::MappingEnd => { @@ -88,7 +90,7 @@ impl EventReceiver for YamlLoader { let node = self.doc_stack.pop().unwrap(); self.insert_new_node(node); }, - Event::Scalar(ref v, style, _, ref tag) => { + Event::Scalar(ref v, style, aid, ref tag) => { let node = if style != TScalarStyle::Plain { Yaml::String(v.clone()) } else { @@ -133,11 +135,14 @@ impl EventReceiver for YamlLoader { } }; - self.insert_new_node(node); + self.insert_new_node((node, aid)); }, Event::Alias(id) => { - // XXX(chenyh): how to handle alias? - self.insert_new_node(Yaml::Alias(id)); + let n = match self.anchor_map.get(&id) { + Some(v) => v.clone(), + None => Yaml::BadValue, + }; + self.insert_new_node((n, 0)); } _ => { /* ignore */ } } @@ -146,21 +151,25 @@ impl EventReceiver for YamlLoader { } impl YamlLoader { - fn insert_new_node(&mut self, node: Yaml) { + fn insert_new_node(&mut self, node: (Yaml, usize)) { + // valid anchor id starts from 1 + if node.1 > 0 { + self.anchor_map.insert(node.1, node.0.clone()); + } if !self.doc_stack.is_empty() { let parent = self.doc_stack.last_mut().unwrap(); match *parent { - Yaml::Array(ref mut v) => v.push(node), - Yaml::Hash(ref mut h) => { + (Yaml::Array(ref mut v), _) => v.push(node.0), + (Yaml::Hash(ref mut h), _) => { let mut cur_key = self.key_stack.last_mut().unwrap(); // current node is a key if cur_key.is_badvalue() { - *cur_key = node; + *cur_key = node.0; // current node is a value } else { let mut newkey = Yaml::BadValue; mem::swap(&mut newkey, cur_key); - h.insert(newkey, node); + h.insert(newkey, node.0); } }, _ => unreachable!(), @@ -175,6 +184,7 @@ impl YamlLoader { docs: Vec::new(), doc_stack: Vec::new(), key_stack: Vec::new(), + anchor_map: BTreeMap::new(), }; let mut parser = Parser::new(source.chars()); try!(parser.load(&mut loader, true)); @@ -327,6 +337,35 @@ a7: 你好 assert_eq!(out.len(), 3); } + #[test] + fn test_anchor() { + let s = +" +a1: &DEFAULT + b1: 4 + b2: d +a2: *DEFAULT +"; + let out = YamlLoader::load_from_str(&s).unwrap(); + let doc = &out[0]; + assert_eq!(doc["a2"]["b1"].as_i64().unwrap(), 4); + } + + #[test] + fn test_bad_anchor() { + let s = +" +a1: &DEFAULT + b1: 4 + b2: *DEFAULT +"; + let out = YamlLoader::load_from_str(&s).unwrap(); + let doc = &out[0]; + assert_eq!(doc["a1"]["b2"], Yaml::BadValue); + + } + + #[test] fn test_plain_datatype() { let s = From 31ace0a89d679a36a03f1a6dc874eaf2324e3b9c Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Thu, 24 Dec 2015 14:55:49 +0800 Subject: [PATCH 052/380] Bump to 0.3.0 Add alias support --- saphyr/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/saphyr/Cargo.toml b/saphyr/Cargo.toml index a120413..1552e6b 100644 --- a/saphyr/Cargo.toml +++ b/saphyr/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "yaml-rust" -version = "0.2.2" +version = "0.3.0" authors = ["Yuheng Chen "] homepage = "http://chyh1990.github.io/yaml-rust/" documentation = "http://chyh1990.github.io/yaml-rust/doc/yaml_rust/" From 316cc0e5066948e9ef8f3877f26580333cb66ce6 Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Mon, 11 Jan 2016 12:53:19 +0800 Subject: [PATCH 053/380] Relicense under MIT/Apache-2.0 As recommended by https://github.com/chyh1990/yaml-rust/issues/11 --- saphyr/Cargo.toml | 2 +- saphyr/LICENSE-APACHE | 201 ++++++++++++++++++++++++++++++++ saphyr/{LICENSE => LICENSE-MIT} | 0 saphyr/{Readme.md => README.md} | 14 +++ 4 files changed, 216 insertions(+), 1 deletion(-) create mode 100644 saphyr/LICENSE-APACHE rename saphyr/{LICENSE => LICENSE-MIT} (100%) rename saphyr/{Readme.md => README.md} (86%) diff --git a/saphyr/Cargo.toml b/saphyr/Cargo.toml index 1552e6b..56ad109 100644 --- a/saphyr/Cargo.toml +++ b/saphyr/Cargo.toml @@ -4,6 +4,6 @@ version = "0.3.0" authors = ["Yuheng Chen "] homepage = "http://chyh1990.github.io/yaml-rust/" documentation = "http://chyh1990.github.io/yaml-rust/doc/yaml_rust/" -license = "MIT" +license = "MIT/Apache-2.0" description = "The missing YAML 1.2 parser for rust" repository = "https://github.com/chyh1990/yaml-rust" diff --git a/saphyr/LICENSE-APACHE b/saphyr/LICENSE-APACHE new file mode 100644 index 0000000..16fe87b --- /dev/null +++ b/saphyr/LICENSE-APACHE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/saphyr/LICENSE b/saphyr/LICENSE-MIT similarity index 100% rename from saphyr/LICENSE rename to saphyr/LICENSE-MIT diff --git a/saphyr/Readme.md b/saphyr/README.md similarity index 86% rename from saphyr/Readme.md rename to saphyr/README.md index 70b5019..dd0a709 100644 --- a/saphyr/Readme.md +++ b/saphyr/README.md @@ -118,6 +118,20 @@ so it may not be a huge problem for most users. * Tag directive * Alias while desearilization +## License + +Licensed under either of + + * Apache License, Version 2.0 ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0) + * MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT) + +at your option. + ## Contribution Fork & PR on Github. + +Unless you explicitly state otherwise, any contribution intentionally submitted +for inclusion in the work by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any +additional terms or conditions. + From 663145bcb6c80f0046d78fad0898a0a190f1c9c5 Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Mon, 11 Jan 2016 12:55:05 +0800 Subject: [PATCH 054/380] Bump to 0.3.1 No code change, just for relicense. --- saphyr/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/saphyr/Cargo.toml b/saphyr/Cargo.toml index 56ad109..d427e4b 100644 --- a/saphyr/Cargo.toml +++ b/saphyr/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "yaml-rust" -version = "0.3.0" +version = "0.3.1" authors = ["Yuheng Chen "] homepage = "http://chyh1990.github.io/yaml-rust/" documentation = "http://chyh1990.github.io/yaml-rust/doc/yaml_rust/" From 61996a44c4909047f75daf211d530659111217c9 Mon Sep 17 00:00:00 2001 From: GaveUp Date: Sun, 7 Feb 2016 15:52:20 -0600 Subject: [PATCH 055/380] Add hex and octal integer support. --- saphyr/src/yaml.rs | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index 3e8f820..649c271 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -1,6 +1,7 @@ use std::collections::BTreeMap; use std::ops::Index; use std::string; +use std::i64; use std::str::FromStr; use std::mem; use parser::*; @@ -246,6 +247,18 @@ impl Yaml { } pub fn from_str(v: &str) -> Yaml { + if v.starts_with("0x") { + let n = i64::from_str_radix(&v[2..], 16); + if n.is_ok() { + return Yaml::Integer(n.unwrap()); + } + } + if v.starts_with("0o") { + let n = i64::from_str_radix(&v[2..], 8); + if n.is_ok() { + return Yaml::Integer(n.unwrap()); + } + } match v { "~" | "null" => Yaml::Null, "true" => Yaml::Boolean(true), @@ -387,11 +400,16 @@ a1: &DEFAULT - !!null ~ - !!bool true - !!bool false +- 0xFF # bad values - !!int string - !!float string - !!bool null - !!null val +- 0o77 +- [ 0xF, 0xF ] +- +12345 +- [ true, false ] "; let out = YamlLoader::load_from_str(&s).unwrap(); let doc = &out[0]; @@ -413,9 +431,16 @@ a1: &DEFAULT assert!(doc[14].is_null()); assert_eq!(doc[15].as_bool().unwrap(), true); assert_eq!(doc[16].as_bool().unwrap(), false); - assert!(doc[17].is_badvalue()); + assert_eq!(doc[17].as_i64().unwrap(), 255); assert!(doc[18].is_badvalue()); assert!(doc[19].is_badvalue()); assert!(doc[20].is_badvalue()); + assert!(doc[21].is_badvalue()); + assert_eq!(doc[22].as_i64().unwrap(), 63); + assert_eq!(doc[23][0].as_i64().unwrap(), 15); + assert_eq!(doc[23][1].as_i64().unwrap(), 15); + assert_eq!(doc[24].as_i64().unwrap(), 12345); + assert!(doc[25][0].as_bool().unwrap()); + assert!(!doc[25][1].as_bool().unwrap()); } } From 6b7de98875d1757dde74a2afd2104dcab0bfd25d Mon Sep 17 00:00:00 2001 From: GaveUp Date: Sun, 7 Feb 2016 16:21:05 -0600 Subject: [PATCH 056/380] Fix tests on 1.0.0 and 1.1.0. --- saphyr/src/yaml.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index 649c271..45f2fb8 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -259,6 +259,9 @@ impl Yaml { return Yaml::Integer(n.unwrap()); } } + if v.starts_with("+") && v[1..].parse::().is_ok() { + return Yaml::Integer(v[1..].parse::().unwrap()); + } match v { "~" | "null" => Yaml::Null, "true" => Yaml::Boolean(true), From 956f7625d1e682c92df4b2029e14bf903854c533 Mon Sep 17 00:00:00 2001 From: David Tolnay Date: Sat, 27 Feb 2016 15:29:34 -0800 Subject: [PATCH 057/380] Remove common suffix from TokenType enum See https://github.com/Manishearth/rust-clippy/wiki#enum_variant_names --- saphyr/src/parser.rs | 132 +++++++------- saphyr/src/scanner.rs | 410 +++++++++++++++++++++--------------------- saphyr/src/yaml.rs | 2 +- 3 files changed, 272 insertions(+), 272 deletions(-) diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs index 9a11abf..a6cdf73 100644 --- a/saphyr/src/parser.rs +++ b/saphyr/src/parser.rs @@ -270,7 +270,7 @@ impl> Parser { let tok = try!(self.peek()); match tok.1 { - TokenType::StreamStartToken(_) => { + TokenType::StreamStart(_) => { self.state = State::ImplicitDocumentStart; self.skip(); Ok(Event::StreamStart) @@ -285,7 +285,7 @@ impl> Parser { if !implicit { loop { match tok.1 { - TokenType::DocumentEndToken => { + TokenType::DocumentEnd => { self.skip(); tok = try!(self.peek()); }, @@ -295,14 +295,14 @@ impl> Parser { } match tok.1 { - TokenType::StreamEndToken => { + TokenType::StreamEnd => { self.state = State::End; self.skip(); return Ok(Event::StreamEnd); }, - TokenType::VersionDirectiveToken(..) - | TokenType::TagDirectiveToken(..) - | TokenType::DocumentStartToken => { + TokenType::VersionDirective(..) + | TokenType::TagDirective(..) + | TokenType::DocumentStart => { // explicit document self._explict_document_start() }, @@ -323,14 +323,14 @@ impl> Parser { loop { let tok = try!(self.peek()); match tok.1 { - TokenType::VersionDirectiveToken(_, _) => { + TokenType::VersionDirective(_, _) => { // XXX parsing with warning according to spec //if major != 1 || minor > 2 { // return Err(ScanError::new(tok.0, // "found incompatible YAML document")); //} }, - TokenType::TagDirectiveToken(..) => { + TokenType::TagDirective(..) => { // TODO add tag directive }, _ => break @@ -344,7 +344,7 @@ impl> Parser { fn _explict_document_start(&mut self) -> ParseResult { try!(self.parser_process_directives()); let tok = try!(self.peek()); - if tok.1 != TokenType::DocumentStartToken { + if tok.1 != TokenType::DocumentStart { return Err(ScanError::new(tok.0, "did not find expected ")); } self.push_state(State::DocumentEnd); @@ -356,11 +356,11 @@ impl> Parser { fn document_content(&mut self) -> ParseResult { let tok = try!(self.peek()); match tok.1 { - TokenType::VersionDirectiveToken(..) - |TokenType::TagDirectiveToken(..) - |TokenType::DocumentStartToken - |TokenType::DocumentEndToken - |TokenType::StreamEndToken => { + TokenType::VersionDirective(..) + |TokenType::TagDirective(..) + |TokenType::DocumentStart + |TokenType::DocumentEnd + |TokenType::StreamEnd => { self.pop_state(); // empty scalar Ok(Event::empty_scalar()) @@ -377,7 +377,7 @@ impl> Parser { let _start_mark = tok.0; match tok.1 { - TokenType::DocumentEndToken => { + TokenType::DocumentEnd => { self.skip(); _implicit = false; } @@ -406,7 +406,7 @@ impl> Parser { let mut anchor_id = 0; let mut tag = None; match tok.1 { - TokenType::AliasToken(name) => { + TokenType::Alias(name) => { self.pop_state(); self.skip(); match self.anchors.get(&name) { @@ -414,21 +414,21 @@ impl> Parser { Some(id) => return Ok(Event::Alias(*id)) } }, - TokenType::AnchorToken(name) => { + TokenType::Anchor(name) => { anchor_id = try!(self.register_anchor(&name, &tok.0)); self.skip(); tok = try!(self.peek()); - if let TokenType::TagToken(_, _) = tok.1 { + if let TokenType::Tag(_, _) = tok.1 { tag = Some(tok.1); self.skip(); tok = try!(self.peek()); } }, - TokenType::TagToken(..) => { + TokenType::Tag(..) => { tag = Some(tok.1); self.skip(); tok = try!(self.peek()); - if let TokenType::AnchorToken(name) = tok.1 { + if let TokenType::Anchor(name) = tok.1 { anchor_id = try!(self.register_anchor(&name, &tok.0)); self.skip(); tok = try!(self.peek()); @@ -437,28 +437,28 @@ impl> Parser { _ => {} } match tok.1 { - TokenType::BlockEntryToken if indentless_sequence => { + TokenType::BlockEntry if indentless_sequence => { self.state = State::IndentlessSequenceEntry; Ok(Event::SequenceStart(anchor_id)) }, - TokenType::ScalarToken(style, v) => { + TokenType::Scalar(style, v) => { self.pop_state(); self.skip(); Ok(Event::Scalar(v, style, anchor_id, tag)) }, - TokenType::FlowSequenceStartToken => { + TokenType::FlowSequenceStart => { self.state = State::FlowSequenceFirstEntry; Ok(Event::SequenceStart(anchor_id)) }, - TokenType::FlowMappingStartToken => { + TokenType::FlowMappingStart => { self.state = State::FlowMappingFirstKey; Ok(Event::MappingStart(anchor_id)) }, - TokenType::BlockSequenceStartToken if block => { + TokenType::BlockSequenceStart if block => { self.state = State::BlockSequenceFirstEntry; Ok(Event::SequenceStart(anchor_id)) }, - TokenType::BlockMappingStartToken if block => { + TokenType::BlockMappingStart if block => { self.state = State::BlockMappingFirstKey; Ok(Event::MappingStart(anchor_id)) }, @@ -472,7 +472,7 @@ impl> Parser { } fn block_mapping_key(&mut self, first: bool) -> ParseResult { - // skip BlockMappingStartToken + // skip BlockMappingStart if first { let _ = try!(self.peek()); //self.marks.push(tok.0); @@ -480,13 +480,13 @@ impl> Parser { } let tok = try!(self.peek()); match tok.1 { - TokenType::KeyToken => { + TokenType::Key => { self.skip(); let tok = try!(self.peek()); match tok.1 { - TokenType::KeyToken - | TokenType::ValueToken - | TokenType::BlockEndToken + TokenType::Key + | TokenType::Value + | TokenType::BlockEnd => { self.state = State::BlockMappingValue; // empty scalar @@ -499,11 +499,11 @@ impl> Parser { } }, // XXX(chenyh): libyaml failed to parse spec 1.2, ex8.18 - TokenType::ValueToken => { + TokenType::Value => { self.state = State::BlockMappingValue; Ok(Event::empty_scalar()) }, - TokenType::BlockEndToken => { + TokenType::BlockEnd => { self.pop_state(); self.skip(); Ok(Event::MappingEnd) @@ -517,11 +517,11 @@ impl> Parser { fn block_mapping_value(&mut self) -> ParseResult { let tok = try!(self.peek()); match tok.1 { - TokenType::ValueToken => { + TokenType::Value => { self.skip(); let tok = try!(self.peek()); match tok.1 { - TokenType::KeyToken | TokenType::ValueToken | TokenType::BlockEndToken + TokenType::Key | TokenType::Value | TokenType::BlockEnd => { self.state = State::BlockMappingKey; // empty scalar @@ -548,9 +548,9 @@ impl> Parser { } let mut tok = try!(self.peek()); - if tok.1 != TokenType::FlowMappingEndToken { + if tok.1 != TokenType::FlowMappingEnd { if !first { - if tok.1 == TokenType::FlowEntryToken { + if tok.1 == TokenType::FlowEntry { self.skip(); tok = try!(self.peek()); } else { @@ -559,13 +559,13 @@ impl> Parser { } } - if tok.1 == TokenType::KeyToken { + if tok.1 == TokenType::Key { self.skip(); tok = try!(self.peek()); match tok.1 { - TokenType::ValueToken - | TokenType::FlowEntryToken - | TokenType::FlowMappingEndToken => { + TokenType::Value + | TokenType::FlowEntry + | TokenType::FlowMappingEnd => { self.state = State::FlowMappingValue; return Ok(Event::empty_scalar()); }, @@ -575,10 +575,10 @@ impl> Parser { } } // XXX libyaml fail ex 7.3, empty key - } else if tok.1 == TokenType::ValueToken { + } else if tok.1 == TokenType::Value { self.state = State::FlowMappingValue; return Ok(Event::empty_scalar()); - } else if tok.1 != TokenType::FlowMappingEndToken { + } else if tok.1 != TokenType::FlowMappingEnd { self.push_state(State::FlowMappingEmptyValue); return self.parse_node(false, false); } @@ -596,12 +596,12 @@ impl> Parser { return Ok(Event::empty_scalar()); } - if tok.1 == TokenType::ValueToken { + if tok.1 == TokenType::Value { self.skip(); let tok = try!(self.peek()); match tok.1 { - TokenType::FlowEntryToken - | TokenType::FlowMappingEndToken => { }, + TokenType::FlowEntry + | TokenType::FlowMappingEnd => { }, _ => { self.push_state(State::FlowMappingKey); return self.parse_node(false, false); @@ -614,7 +614,7 @@ impl> Parser { } fn flow_sequence_entry(&mut self, first: bool) -> ParseResult { - // skip FlowMappingStartToken + // skip FlowMappingStart if first { let _ = try!(self.peek()); //self.marks.push(tok.0); @@ -622,12 +622,12 @@ impl> Parser { } let mut tok = try!(self.peek()); match tok.1 { - TokenType::FlowSequenceEndToken => { + TokenType::FlowSequenceEnd => { self.pop_state(); self.skip(); return Ok(Event::SequenceEnd); }, - TokenType::FlowEntryToken if !first => { + TokenType::FlowEntry if !first => { self.skip(); tok = try!(self.peek()); }, @@ -638,12 +638,12 @@ impl> Parser { _ => { /* next */ } } match tok.1 { - TokenType::FlowSequenceEndToken => { + TokenType::FlowSequenceEnd => { self.pop_state(); self.skip(); Ok(Event::SequenceEnd) }, - TokenType::KeyToken => { + TokenType::Key => { self.state = State::FlowSequenceEntryMappingKey; self.skip(); Ok(Event::MappingStart(0)) @@ -657,7 +657,7 @@ impl> Parser { fn indentless_sequence_entry(&mut self) -> ParseResult { let mut tok = try!(self.peek()); - if tok.1 != TokenType::BlockEntryToken { + if tok.1 != TokenType::BlockEntry { self.pop_state(); return Ok(Event::SequenceEnd); } @@ -665,10 +665,10 @@ impl> Parser { self.skip(); tok = try!(self.peek()); match tok.1 { - TokenType::BlockEntryToken - | TokenType::KeyToken - | TokenType::ValueToken - | TokenType::BlockEndToken => { + TokenType::BlockEntry + | TokenType::Key + | TokenType::Value + | TokenType::BlockEnd => { self.state = State::IndentlessSequenceEntry; Ok(Event::empty_scalar()) }, @@ -688,17 +688,17 @@ impl> Parser { } let mut tok = try!(self.peek()); match tok.1 { - TokenType::BlockEndToken => { + TokenType::BlockEnd => { self.pop_state(); self.skip(); Ok(Event::SequenceEnd) }, - TokenType::BlockEntryToken => { + TokenType::BlockEntry => { self.skip(); tok = try!(self.peek()); match tok.1 { - TokenType::BlockEntryToken - | TokenType::BlockEndToken => { + TokenType::BlockEntry + | TokenType::BlockEnd => { self.state = State::BlockSequenceEntry; Ok(Event::empty_scalar()) }, @@ -719,9 +719,9 @@ impl> Parser { let tok = try!(self.peek()); match tok.1 { - TokenType::ValueToken - | TokenType::FlowEntryToken - | TokenType::FlowSequenceEndToken => { + TokenType::Value + | TokenType::FlowEntry + | TokenType::FlowSequenceEnd => { self.skip(); self.state = State::FlowSequenceEntryMappingValue; Ok(Event::empty_scalar()) @@ -737,13 +737,13 @@ impl> Parser { let tok = try!(self.peek()); match tok.1 { - TokenType::ValueToken => { + TokenType::Value => { self.skip(); let tok = try!(self.peek()); self.state = State::FlowSequenceEntryMappingValue; match tok.1 { - TokenType::FlowEntryToken - | TokenType::FlowSequenceEndToken => { + TokenType::FlowEntry + | TokenType::FlowSequenceEnd => { self.state = State::FlowSequenceEntryMappingEnd; Ok(Event::empty_scalar()) }, diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index 4cfe2bc..16c3b61 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -71,30 +71,30 @@ impl fmt::Display for ScanError { #[derive(Clone, PartialEq, Debug, Eq)] pub enum TokenType { NoToken, - StreamStartToken(TEncoding), - StreamEndToken, + StreamStart(TEncoding), + StreamEnd, /// major, minor - VersionDirectiveToken(u32, u32), + VersionDirective(u32, u32), /// handle, prefix - TagDirectiveToken(String, String), - DocumentStartToken, - DocumentEndToken, - BlockSequenceStartToken, - BlockMappingStartToken, - BlockEndToken, - FlowSequenceStartToken, - FlowSequenceEndToken, - FlowMappingStartToken, - FlowMappingEndToken, - BlockEntryToken, - FlowEntryToken, - KeyToken, - ValueToken, - AliasToken(String), - AnchorToken(String), + TagDirective(String, String), + DocumentStart, + DocumentEnd, + BlockSequenceStart, + BlockMappingStart, + BlockEnd, + FlowSequenceStart, + FlowSequenceEnd, + FlowMappingStart, + FlowMappingEnd, + BlockEntry, + FlowEntry, + Key, + Value, + Alias(String), + Anchor(String), /// handle, suffix - TagToken(String, String), - ScalarToken(TScalarStyle, String) + Tag(String, String), + Scalar(TScalarStyle, String) } #[derive(Clone, PartialEq, Debug, Eq)] @@ -348,7 +348,7 @@ impl> Scanner { && self.buffer[1] == '-' && self.buffer[2] == '-' && is_blankz(self.buffer[3]) { - try!(self.fetch_document_indicator(TokenType::DocumentStartToken)); + try!(self.fetch_document_indicator(TokenType::DocumentStart)); return Ok(()); } @@ -357,17 +357,17 @@ impl> Scanner { && self.buffer[1] == '.' && self.buffer[2] == '.' && is_blankz(self.buffer[3]) { - try!(self.fetch_document_indicator(TokenType::DocumentEndToken)); + try!(self.fetch_document_indicator(TokenType::DocumentEnd)); return Ok(()); } let c = self.buffer[0]; let nc = self.buffer[1]; match c { - '[' => self.fetch_flow_collection_start(TokenType::FlowSequenceStartToken), - '{' => self.fetch_flow_collection_start(TokenType::FlowMappingStartToken), - ']' => self.fetch_flow_collection_end(TokenType::FlowSequenceEndToken), - '}' => self.fetch_flow_collection_end(TokenType::FlowMappingEndToken), + '[' => self.fetch_flow_collection_start(TokenType::FlowSequenceStart), + '{' => self.fetch_flow_collection_start(TokenType::FlowMappingStart), + ']' => self.fetch_flow_collection_end(TokenType::FlowSequenceEnd), + '}' => self.fetch_flow_collection_end(TokenType::FlowMappingEnd), ',' => self.fetch_flow_entry(), '-' if is_blankz(nc) => self.fetch_block_entry(), '?' if self.flow_level > 0 || is_blankz(nc) => self.fetch_key(), @@ -405,7 +405,7 @@ impl> Scanner { self.tokens_parsed += 1; match t.1 { - TokenType::StreamEndToken => self.stream_end_produced = true, + TokenType::StreamEnd => self.stream_end_produced = true, _ => {} } Ok(Some(t)) @@ -473,7 +473,7 @@ impl> Scanner { self.indent = -1; self.stream_start_produced = true; self.allow_simple_key(); - self.tokens.push_back(Token(mark, TokenType::StreamStartToken(TEncoding::Utf8))); + self.tokens.push_back(Token(mark, TokenType::StreamStart(TEncoding::Utf8))); self.simple_keys.push(SimpleKey::new(Marker::new(0,0,0))); } @@ -488,7 +488,7 @@ impl> Scanner { try!(self.remove_simple_key()); self.disallow_simple_key(); - self.tokens.push_back(Token(self.mark, TokenType::StreamEndToken)); + self.tokens.push_back(Token(self.mark, TokenType::StreamEnd)); Ok(()) } @@ -526,7 +526,7 @@ impl> Scanner { self.lookahead(1); } // XXX return an empty TagDirective token - Token(start_mark, TokenType::TagDirectiveToken(String::new(), String::new())) + Token(start_mark, TokenType::TagDirective(String::new(), String::new())) // return Err(ScanError::new(start_mark, // "while scanning a directive, found unknown directive name")) } @@ -578,7 +578,7 @@ impl> Scanner { let minor = try!(self.scan_version_directive_number(mark)); - Ok(Token(*mark, TokenType::VersionDirectiveToken(major, minor))) + Ok(Token(*mark, TokenType::VersionDirective(major, minor))) } fn scan_directive_name(&mut self) -> Result { @@ -652,7 +652,7 @@ impl> Scanner { Err(ScanError::new(*mark, "while scanning TAG, did not find expected whitespace or line break")) } else { - Ok(Token(*mark, TokenType::TagDirectiveToken(handle, prefix))) + Ok(Token(*mark, TokenType::TagDirective(handle, prefix))) } } @@ -710,7 +710,7 @@ impl> Scanner { self.lookahead(1); if is_blankz(self.ch()) { // XXX: ex 7.2, an empty scalar can follow a secondary tag - Ok(Token(start_mark, TokenType::TagToken(handle, suffix))) + Ok(Token(start_mark, TokenType::Tag(handle, suffix))) } else { Err(ScanError::new(start_mark, "while scanning a tag, did not find expected whitespace or line break")) @@ -883,9 +883,9 @@ impl> Scanner { } if alias { - Ok(Token(start_mark, TokenType::AliasToken(string))) + Ok(Token(start_mark, TokenType::Alias(string))) } else { - Ok(Token(start_mark, TokenType::AnchorToken(string))) + Ok(Token(start_mark, TokenType::Anchor(string))) } } @@ -924,7 +924,7 @@ impl> Scanner { let start_mark = self.mark; self.skip(); - self.tokens.push_back(Token(start_mark, TokenType::FlowEntryToken)); + self.tokens.push_back(Token(start_mark, TokenType::FlowEntry)); Ok(()) } @@ -949,7 +949,7 @@ impl> Scanner { let mark = self.mark; // generate BLOCK-SEQUENCE-START if indented - self.roll_indent(mark.col, None, TokenType::BlockSequenceStartToken, mark); + self.roll_indent(mark.col, None, TokenType::BlockSequenceStart, mark); } else { // - * only allowed in block unreachable!(); @@ -960,7 +960,7 @@ impl> Scanner { let start_mark = self.mark; self.skip(); - self.tokens.push_back(Token(start_mark, TokenType::BlockEntryToken)); + self.tokens.push_back(Token(start_mark, TokenType::BlockEntry)); Ok(()) } @@ -1119,9 +1119,9 @@ impl> Scanner { } if literal { - Ok(Token(start_mark, TokenType::ScalarToken(TScalarStyle::Literal, string))) + Ok(Token(start_mark, TokenType::Scalar(TScalarStyle::Literal, string))) } else { - Ok(Token(start_mark, TokenType::ScalarToken(TScalarStyle::Foled, string))) + Ok(Token(start_mark, TokenType::Scalar(TScalarStyle::Foled, string))) } } @@ -1353,9 +1353,9 @@ impl> Scanner { self.skip(); if single { - Ok(Token(start_mark, TokenType::ScalarToken(TScalarStyle::SingleQuoted, string))) + Ok(Token(start_mark, TokenType::Scalar(TScalarStyle::SingleQuoted, string))) } else { - Ok(Token(start_mark, TokenType::ScalarToken(TScalarStyle::DoubleQuoted, string))) + Ok(Token(start_mark, TokenType::Scalar(TScalarStyle::DoubleQuoted, string))) } } @@ -1477,7 +1477,7 @@ impl> Scanner { self.allow_simple_key(); } - Ok(Token(start_mark, TokenType::ScalarToken(TScalarStyle::Plain, string))) + Ok(Token(start_mark, TokenType::Scalar(TScalarStyle::Plain, string))) } fn fetch_key(&mut self) -> ScanResult { @@ -1488,7 +1488,7 @@ impl> Scanner { return Err(ScanError::new(self.mark, "mapping keys are not allowed in this context")); } self.roll_indent(start_mark.col, None, - TokenType::BlockMappingStartToken, start_mark); + TokenType::BlockMappingStart, start_mark); } try!(self.remove_simple_key()); @@ -1500,7 +1500,7 @@ impl> Scanner { } self.skip(); - self.tokens.push_back(Token(start_mark, TokenType::KeyToken)); + self.tokens.push_back(Token(start_mark, TokenType::Key)); Ok(()) } @@ -1509,13 +1509,13 @@ impl> Scanner { let start_mark = self.mark; if sk.possible { // insert simple key - let tok = Token(sk.mark, TokenType::KeyToken); + let tok = Token(sk.mark, TokenType::Key); let tokens_parsed = self.tokens_parsed; self.insert_token(sk.token_number - tokens_parsed, tok); // Add the BLOCK-MAPPING-START token if needed. self.roll_indent(sk.mark.col, Some(sk.token_number), - TokenType::BlockMappingStartToken, start_mark); + TokenType::BlockMappingStart, start_mark); self.simple_keys.last_mut().unwrap().possible = false; self.disallow_simple_key(); @@ -1528,7 +1528,7 @@ impl> Scanner { } self.roll_indent(start_mark.col, None, - TokenType::BlockMappingStartToken, start_mark); + TokenType::BlockMappingStart, start_mark); } if self.flow_level == 0 { @@ -1538,7 +1538,7 @@ impl> Scanner { } } self.skip(); - self.tokens.push_back(Token(start_mark, TokenType::ValueToken)); + self.tokens.push_back(Token(start_mark, TokenType::Value)); Ok(()) } @@ -1565,7 +1565,7 @@ impl> Scanner { return; } while self.indent > col { - self.tokens.push_back(Token(self.mark, TokenType::BlockEndToken)); + self.tokens.push_back(Token(self.mark, TokenType::BlockEnd)); self.indent = self.indents.pop().unwrap(); } } @@ -1620,7 +1620,7 @@ macro_rules! next_scalar { ($p:ident, $tk:expr, $v:expr) => {{ let tok = $p.next().unwrap(); match tok.1 { - ScalarToken(style, ref v) => { + Scalar(style, ref v) => { assert_eq!(style, $tk); assert_eq!(v, $v); }, @@ -1640,8 +1640,8 @@ macro_rules! end { fn test_empty() { let s = ""; let mut p = Scanner::new(s.chars()); - next!(p, StreamStartToken(..)); - next!(p, StreamEndToken); + next!(p, StreamStart(..)); + next!(p, StreamEnd); end!(p); } @@ -1649,9 +1649,9 @@ macro_rules! end { fn test_scalar() { let s = "a scalar"; let mut p = Scanner::new(s.chars()); - next!(p, StreamStartToken(..)); - next!(p, ScalarToken(TScalarStyle::Plain, _)); - next!(p, StreamEndToken); + next!(p, StreamStart(..)); + next!(p, Scalar(TScalarStyle::Plain, _)); + next!(p, StreamEnd); end!(p); } @@ -1663,11 +1663,11 @@ macro_rules! end { ... "; let mut p = Scanner::new(s.chars()); - next!(p, StreamStartToken(..)); - next!(p, DocumentStartToken); - next!(p, ScalarToken(TScalarStyle::SingleQuoted, _)); - next!(p, DocumentEndToken); - next!(p, StreamEndToken); + next!(p, StreamStart(..)); + next!(p, DocumentStart); + next!(p, Scalar(TScalarStyle::SingleQuoted, _)); + next!(p, DocumentEnd); + next!(p, StreamEnd); end!(p); } @@ -1682,13 +1682,13 @@ macro_rules! end { 'a scalar' "; let mut p = Scanner::new(s.chars()); - next!(p, StreamStartToken(..)); - next!(p, ScalarToken(TScalarStyle::SingleQuoted, _)); - next!(p, DocumentStartToken); - next!(p, ScalarToken(TScalarStyle::SingleQuoted, _)); - next!(p, DocumentStartToken); - next!(p, ScalarToken(TScalarStyle::SingleQuoted, _)); - next!(p, StreamEndToken); + next!(p, StreamStart(..)); + next!(p, Scalar(TScalarStyle::SingleQuoted, _)); + next!(p, DocumentStart); + next!(p, Scalar(TScalarStyle::SingleQuoted, _)); + next!(p, DocumentStart); + next!(p, Scalar(TScalarStyle::SingleQuoted, _)); + next!(p, StreamEnd); end!(p); } @@ -1696,15 +1696,15 @@ macro_rules! end { fn test_a_flow_sequence() { let s = "[item 1, item 2, item 3]"; let mut p = Scanner::new(s.chars()); - next!(p, StreamStartToken(..)); - next!(p, FlowSequenceStartToken); + next!(p, StreamStart(..)); + next!(p, FlowSequenceStart); next_scalar!(p, TScalarStyle::Plain, "item 1"); - next!(p, FlowEntryToken); - next!(p, ScalarToken(TScalarStyle::Plain, _)); - next!(p, FlowEntryToken); - next!(p, ScalarToken(TScalarStyle::Plain, _)); - next!(p, FlowSequenceEndToken); - next!(p, StreamEndToken); + next!(p, FlowEntry); + next!(p, Scalar(TScalarStyle::Plain, _)); + next!(p, FlowEntry); + next!(p, Scalar(TScalarStyle::Plain, _)); + next!(p, FlowSequenceEnd); + next!(p, StreamEnd); end!(p); } @@ -1718,20 +1718,20 @@ macro_rules! end { } "; let mut p = Scanner::new(s.chars()); - next!(p, StreamStartToken(..)); - next!(p, FlowMappingStartToken); - next!(p, KeyToken); - next!(p, ScalarToken(TScalarStyle::Plain, _)); - next!(p, ValueToken); - next!(p, ScalarToken(TScalarStyle::Plain, _)); - next!(p, FlowEntryToken); - next!(p, KeyToken); + next!(p, StreamStart(..)); + next!(p, FlowMappingStart); + next!(p, Key); + next!(p, Scalar(TScalarStyle::Plain, _)); + next!(p, Value); + next!(p, Scalar(TScalarStyle::Plain, _)); + next!(p, FlowEntry); + next!(p, Key); next_scalar!(p, TScalarStyle::Plain, "a complex key"); - next!(p, ValueToken); - next!(p, ScalarToken(TScalarStyle::Plain, _)); - next!(p, FlowEntryToken); - next!(p, FlowMappingEndToken); - next!(p, StreamEndToken); + next!(p, Value); + next!(p, Scalar(TScalarStyle::Plain, _)); + next!(p, FlowEntry); + next!(p, FlowMappingEnd); + next!(p, StreamEnd); end!(p); } @@ -1749,32 +1749,32 @@ macro_rules! end { key 2: value 2 "; let mut p = Scanner::new(s.chars()); - next!(p, StreamStartToken(..)); - next!(p, BlockSequenceStartToken); - next!(p, BlockEntryToken); + next!(p, StreamStart(..)); + next!(p, BlockSequenceStart); + next!(p, BlockEntry); next_scalar!(p, TScalarStyle::Plain, "item 1"); - next!(p, BlockEntryToken); + next!(p, BlockEntry); next_scalar!(p, TScalarStyle::Plain, "item 2"); - next!(p, BlockEntryToken); - next!(p, BlockSequenceStartToken); - next!(p, BlockEntryToken); + next!(p, BlockEntry); + next!(p, BlockSequenceStart); + next!(p, BlockEntry); next_scalar!(p, TScalarStyle::Plain, "item 3.1"); - next!(p, BlockEntryToken); + next!(p, BlockEntry); next_scalar!(p, TScalarStyle::Plain, "item 3.2"); - next!(p, BlockEndToken); - next!(p, BlockEntryToken); - next!(p, BlockMappingStartToken); - next!(p, KeyToken); + next!(p, BlockEnd); + next!(p, BlockEntry); + next!(p, BlockMappingStart); + next!(p, Key); next_scalar!(p, TScalarStyle::Plain, "key 1"); - next!(p, ValueToken); + next!(p, Value); next_scalar!(p, TScalarStyle::Plain, "value 1"); - next!(p, KeyToken); + next!(p, Key); next_scalar!(p, TScalarStyle::Plain, "key 2"); - next!(p, ValueToken); + next!(p, Value); next_scalar!(p, TScalarStyle::Plain, "value 2"); - next!(p, BlockEndToken); - next!(p, BlockEndToken); - next!(p, StreamEndToken); + next!(p, BlockEnd); + next!(p, BlockEnd); + next!(p, StreamEnd); end!(p); } @@ -1793,40 +1793,40 @@ a sequence: - item 2 "; let mut p = Scanner::new(s.chars()); - next!(p, StreamStartToken(..)); - next!(p, BlockMappingStartToken); - next!(p, KeyToken); - next!(p, ScalarToken(_, _)); - next!(p, ValueToken); - next!(p, ScalarToken(_, _)); - next!(p, KeyToken); - next!(p, ScalarToken(_, _)); - next!(p, ValueToken); - next!(p, ScalarToken(_, _)); - next!(p, KeyToken); - next!(p, ScalarToken(_, _)); - next!(p, ValueToken); // libyaml comment seems to be wrong - next!(p, BlockMappingStartToken); - next!(p, KeyToken); - next!(p, ScalarToken(_, _)); - next!(p, ValueToken); - next!(p, ScalarToken(_, _)); - next!(p, KeyToken); - next!(p, ScalarToken(_, _)); - next!(p, ValueToken); - next!(p, ScalarToken(_, _)); - next!(p, BlockEndToken); - next!(p, KeyToken); - next!(p, ScalarToken(_, _)); - next!(p, ValueToken); - next!(p, BlockSequenceStartToken); - next!(p, BlockEntryToken); - next!(p, ScalarToken(_, _)); - next!(p, BlockEntryToken); - next!(p, ScalarToken(_, _)); - next!(p, BlockEndToken); - next!(p, BlockEndToken); - next!(p, StreamEndToken); + next!(p, StreamStart(..)); + next!(p, BlockMappingStart); + next!(p, Key); + next!(p, Scalar(_, _)); + next!(p, Value); + next!(p, Scalar(_, _)); + next!(p, Key); + next!(p, Scalar(_, _)); + next!(p, Value); + next!(p, Scalar(_, _)); + next!(p, Key); + next!(p, Scalar(_, _)); + next!(p, Value); // libyaml comment seems to be wrong + next!(p, BlockMappingStart); + next!(p, Key); + next!(p, Scalar(_, _)); + next!(p, Value); + next!(p, Scalar(_, _)); + next!(p, Key); + next!(p, Scalar(_, _)); + next!(p, Value); + next!(p, Scalar(_, _)); + next!(p, BlockEnd); + next!(p, Key); + next!(p, Scalar(_, _)); + next!(p, Value); + next!(p, BlockSequenceStart); + next!(p, BlockEntry); + next!(p, Scalar(_, _)); + next!(p, BlockEntry); + next!(p, Scalar(_, _)); + next!(p, BlockEnd); + next!(p, BlockEnd); + next!(p, StreamEnd); end!(p); } @@ -1840,17 +1840,17 @@ key: - item 2 "; let mut p = Scanner::new(s.chars()); - next!(p, StreamStartToken(..)); - next!(p, BlockMappingStartToken); - next!(p, KeyToken); + next!(p, StreamStart(..)); + next!(p, BlockMappingStart); + next!(p, Key); next_scalar!(p, TScalarStyle::Plain, "key"); - next!(p, ValueToken); - next!(p, BlockEntryToken); + next!(p, Value); + next!(p, BlockEntry); next_scalar!(p, TScalarStyle::Plain, "item 1"); - next!(p, BlockEntryToken); + next!(p, BlockEntry); next_scalar!(p, TScalarStyle::Plain, "item 2"); - next!(p, BlockEndToken); - next!(p, StreamEndToken); + next!(p, BlockEnd); + next!(p, StreamEnd); end!(p); } @@ -1866,35 +1866,35 @@ key: : complex value "; let mut p = Scanner::new(s.chars()); - next!(p, StreamStartToken(..)); - next!(p, BlockSequenceStartToken); - next!(p, BlockEntryToken); - next!(p, BlockSequenceStartToken); - next!(p, BlockEntryToken); + next!(p, StreamStart(..)); + next!(p, BlockSequenceStart); + next!(p, BlockEntry); + next!(p, BlockSequenceStart); + next!(p, BlockEntry); next_scalar!(p, TScalarStyle::Plain, "item 1"); - next!(p, BlockEntryToken); + next!(p, BlockEntry); next_scalar!(p, TScalarStyle::Plain, "item 2"); - next!(p, BlockEndToken); - next!(p, BlockEntryToken); - next!(p, BlockMappingStartToken); - next!(p, KeyToken); + next!(p, BlockEnd); + next!(p, BlockEntry); + next!(p, BlockMappingStart); + next!(p, Key); next_scalar!(p, TScalarStyle::Plain, "key 1"); - next!(p, ValueToken); + next!(p, Value); next_scalar!(p, TScalarStyle::Plain, "value 1"); - next!(p, KeyToken); + next!(p, Key); next_scalar!(p, TScalarStyle::Plain, "key 2"); - next!(p, ValueToken); + next!(p, Value); next_scalar!(p, TScalarStyle::Plain, "value 2"); - next!(p, BlockEndToken); - next!(p, BlockEntryToken); - next!(p, BlockMappingStartToken); - next!(p, KeyToken); + next!(p, BlockEnd); + next!(p, BlockEntry); + next!(p, BlockMappingStart); + next!(p, Key); next_scalar!(p, TScalarStyle::Plain, "complex key"); - next!(p, ValueToken); + next!(p, Value); next_scalar!(p, TScalarStyle::Plain, "complex value"); - next!(p, BlockEndToken); - next!(p, BlockEndToken); - next!(p, StreamEndToken); + next!(p, BlockEnd); + next!(p, BlockEnd); + next!(p, StreamEnd); end!(p); } @@ -1910,32 +1910,32 @@ key: key 2: value 2 "; let mut p = Scanner::new(s.chars()); - next!(p, StreamStartToken(..)); - next!(p, BlockMappingStartToken); - next!(p, KeyToken); + next!(p, StreamStart(..)); + next!(p, BlockMappingStart); + next!(p, Key); next_scalar!(p, TScalarStyle::Plain, "a sequence"); - next!(p, ValueToken); - next!(p, BlockSequenceStartToken); - next!(p, BlockEntryToken); + next!(p, Value); + next!(p, BlockSequenceStart); + next!(p, BlockEntry); next_scalar!(p, TScalarStyle::Plain, "item 1"); - next!(p, BlockEntryToken); + next!(p, BlockEntry); next_scalar!(p, TScalarStyle::Plain, "item 2"); - next!(p, BlockEndToken); - next!(p, KeyToken); + next!(p, BlockEnd); + next!(p, Key); next_scalar!(p, TScalarStyle::Plain, "a mapping"); - next!(p, ValueToken); - next!(p, BlockMappingStartToken); - next!(p, KeyToken); + next!(p, Value); + next!(p, BlockMappingStart); + next!(p, Key); next_scalar!(p, TScalarStyle::Plain, "key 1"); - next!(p, ValueToken); + next!(p, Value); next_scalar!(p, TScalarStyle::Plain, "value 1"); - next!(p, KeyToken); + next!(p, Key); next_scalar!(p, TScalarStyle::Plain, "key 2"); - next!(p, ValueToken); + next!(p, Value); next_scalar!(p, TScalarStyle::Plain, "value 2"); - next!(p, BlockEndToken); - next!(p, BlockEndToken); - next!(p, StreamEndToken); + next!(p, BlockEnd); + next!(p, BlockEnd); + next!(p, StreamEnd); end!(p); } @@ -1949,17 +1949,17 @@ key: } "; let mut p = Scanner::new(s.chars()); - next!(p, StreamStartToken(..)); - next!(p, FlowMappingStartToken); - next!(p, KeyToken); + next!(p, StreamStart(..)); + next!(p, FlowMappingStart); + next!(p, Key); next_scalar!(p, TScalarStyle::Plain, "foo"); - next!(p, ValueToken); - next!(p, FlowEntryToken); - next!(p, ValueToken); + next!(p, Value); + next!(p, FlowEntry); + next!(p, Value); next_scalar!(p, TScalarStyle::Plain, "bar"); - next!(p, FlowEntryToken); - next!(p, FlowMappingEndToken); - next!(p, StreamEndToken); + next!(p, FlowEntry); + next!(p, FlowMappingEnd); + next!(p, StreamEnd); end!(p); } @@ -1967,15 +1967,15 @@ key: fn test_scanner_cr() { let s = "---\r\n- tok1\r\n- tok2"; let mut p = Scanner::new(s.chars()); - next!(p, StreamStartToken(..)); - next!(p, DocumentStartToken); - next!(p, BlockSequenceStartToken); - next!(p, BlockEntryToken); + next!(p, StreamStart(..)); + next!(p, DocumentStart); + next!(p, BlockSequenceStart); + next!(p, BlockEntry); next_scalar!(p, TScalarStyle::Plain, "tok1"); - next!(p, BlockEntryToken); + next!(p, BlockEntry); next_scalar!(p, TScalarStyle::Plain, "tok2"); - next!(p, BlockEndToken); - next!(p, StreamEndToken); + next!(p, BlockEnd); + next!(p, StreamEnd); end!(p); } diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index 45f2fb8..d63e357 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -96,7 +96,7 @@ impl EventReceiver for YamlLoader { Yaml::String(v.clone()) } else { match tag { - &Some(TokenType::TagToken(ref handle, ref suffix)) => { + &Some(TokenType::Tag(ref handle, ref suffix)) => { // XXX tag:yaml.org,2002: if handle == "!!" { match suffix.as_ref() { From 06732a6b770275901f447e634e15876f26eba1fd Mon Sep 17 00:00:00 2001 From: David Tolnay Date: Sat, 27 Feb 2016 16:30:13 -0800 Subject: [PATCH 058/380] Resolve remaining clippy lints and add to Travis build --- saphyr/.travis.yml | 3 ++ saphyr/Cargo.toml | 3 ++ saphyr/src/emitter.rs | 40 +++++++++----------- saphyr/src/lib.rs | 8 +++- saphyr/src/parser.rs | 37 +++++++----------- saphyr/src/scanner.rs | 33 +++++++--------- saphyr/src/yaml.rs | 88 ++++++++++++++++++++++--------------------- 7 files changed, 103 insertions(+), 109 deletions(-) diff --git a/saphyr/.travis.yml b/saphyr/.travis.yml index 5e1e170..627e527 100644 --- a/saphyr/.travis.yml +++ b/saphyr/.travis.yml @@ -7,5 +7,8 @@ rust: env: global: - secure: ZUcdcbS8xbpdII9FSPx7VtoVhEkJhWL2Hb75tDlKDHNhfXqmt1NyB9q/2qXJ5Ulp4MnYXwsI8LsDloR6gvdB4xElay3smuF/neGvMjrqcB15/2p0MSQ+kZjMsNB6mlb5kAlm8ahduXIscppmw/V+m5hn3Vo+RQz/Ng+pzv0nc8KEXPMYrfRFg+a7FaeIbRbb8ir9EfflUSqArLq2hbi2WdhM3hFMcCIAUt6DD4x5ubjEg60OnIof5FDu0mXMXzQvUfHWOeYnsNcD/DLyDnm6FuQEzk37M4EB8op2SdBUeQMQ5abR3i2rd//DZpbTTEjud0PseWohGAwTwL2aoFrqs7uYQMx+vcGlOzAyDUm4VemVUa3F2BECdzU5BiujcKOITJEVUYWongld93arQq34FuXG/TO/T1XrerxfG6LTkTkKS5Vz7W8z6Rloa99WrQLJg1ZJP6itEU7G7KsDFVgRhsg7rz4/dV/2+cV4UvIwd4HlGXKCFlH0SClqvM3/7i/qqCD0689SJW6Zip+ly38MXlGy2s/AmReEasXvFer9JkOEIuPa8QTBNAjDlw7bWXi6neQWBIZU1VhZcSssnrVmEFN8fNklShzpw5DyKCv8jPTx2O6Dw8B/LgIK8uo+eaTXiO6zz/T1c/qEdsYslvxPA2D3F+ONpPU7238ykT4eRog= +script: + - cargo build --features clippy --verbose + - cargo test --verbose after_script: - curl http://www.rust-ci.org/artifacts/put?t=$RUSTCI_TOKEN | sh diff --git a/saphyr/Cargo.toml b/saphyr/Cargo.toml index d427e4b..46d8c7a 100644 --- a/saphyr/Cargo.toml +++ b/saphyr/Cargo.toml @@ -7,3 +7,6 @@ documentation = "http://chyh1990.github.io/yaml-rust/doc/yaml_rust/" license = "MIT/Apache-2.0" description = "The missing YAML 1.2 parser for rust" repository = "https://github.com/chyh1990/yaml-rust" + +[dependencies] +clippy = { version = "^0.*", optional = true } diff --git a/saphyr/src/emitter.rs b/saphyr/src/emitter.rs index 8810c95..fe9a6e1 100644 --- a/saphyr/src/emitter.rs +++ b/saphyr/src/emitter.rs @@ -114,8 +114,8 @@ impl<'a> YamlEmitter<'a> { } fn emit_node(&mut self, node: &Yaml) -> EmitResult { - match node { - &Yaml::Array(ref v) => { + match *node { + Yaml::Array(ref v) => { if v.is_empty() { try!(write!(self.writer, "[]")); Ok(()) @@ -124,21 +124,19 @@ impl<'a> YamlEmitter<'a> { try!(write!(self.writer, "\n")); } self.level += 1; - let mut cnt = 0usize; - for x in v { + for (cnt, x) in v.iter().enumerate() { + if cnt > 0 { + try!(write!(self.writer, "\n")); + } try!(self.write_indent()); try!(write!(self.writer, "- ")); try!(self.emit_node(x)); - cnt += 1; - if cnt < v.len() { - try!(write!(self.writer, "\n")); - } } self.level -= 1; Ok(()) } }, - &Yaml::Hash(ref h) => { + Yaml::Hash(ref h) => { if h.is_empty() { try!(self.writer.write_str("{}")); Ok(()) @@ -147,32 +145,30 @@ impl<'a> YamlEmitter<'a> { try!(write!(self.writer, "\n")); } self.level += 1; - let mut cnt = 0usize; - for (k, v) in h { + for (cnt, (k, v)) in h.iter().enumerate() { + if cnt > 0 { + try!(write!(self.writer, "\n")); + } try!(self.write_indent()); - match k { + match *k { // complex key is not supported - &Yaml::Array(_) | &Yaml::Hash(_) => { + Yaml::Array(_) | Yaml::Hash(_) => { return Err(EmitError::BadHashmapKey); }, _ => { try!(self.emit_node(k)); } } try!(write!(self.writer, ": ")); try!(self.emit_node(v)); - cnt += 1; - if cnt < h.len() { - try!(write!(self.writer, "\n")); - } } self.level -= 1; Ok(()) } }, - &Yaml::String(ref v) => { + Yaml::String(ref v) => { try!(escape_str(self.writer, v)); Ok(()) }, - &Yaml::Boolean(v) => { + Yaml::Boolean(v) => { if v { try!(self.writer.write_str("true")); } else { @@ -180,15 +176,15 @@ impl<'a> YamlEmitter<'a> { } Ok(()) }, - &Yaml::Integer(v) => { + Yaml::Integer(v) => { try!(write!(self.writer, "{}", v)); Ok(()) }, - &Yaml::Real(ref v) => { + Yaml::Real(ref v) => { try!(write!(self.writer, "{}", v)); Ok(()) }, - &Yaml::Null | &Yaml::BadValue => { + Yaml::Null | Yaml::BadValue => { try!(write!(self.writer, "~")); Ok(()) }, diff --git a/saphyr/src/lib.rs b/saphyr/src/lib.rs index 1cba925..dc66adc 100644 --- a/saphyr/src/lib.rs +++ b/saphyr/src/lib.rs @@ -36,6 +36,12 @@ //! //! ``` +#![cfg_attr(feature="clippy", feature(plugin))] +#![cfg_attr(feature="clippy", plugin(clippy))] +#![cfg_attr(feature="clippy", deny(clippy))] +#![cfg_attr(feature="clippy", warn(cyclomatic_complexity))] +#![cfg_attr(feature="clippy", allow(match_same_arms))] + pub mod yaml; pub mod scanner; pub mod parser; @@ -89,7 +95,7 @@ mod tests { emitter.dump(doc).unwrap(); } - assert!(writer.len() > 0); + assert!(!writer.is_empty()); } fn try_fail(s: &str) -> Result, ScanError> { diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs index a6cdf73..65924d7 100644 --- a/saphyr/src/parser.rs +++ b/saphyr/src/parser.rs @@ -55,11 +55,11 @@ pub enum Event { impl Event { fn empty_scalar() -> Event { // a null scalar - Event::Scalar("~".to_string(), TScalarStyle::Plain, 0, None) + Event::Scalar("~".to_owned(), TScalarStyle::Plain, 0, None) } fn empty_scalar_with_anchor(anchor: usize, tag: TokenType) -> Event { - Event::Scalar("".to_string(), TScalarStyle::Plain, anchor, Some(tag)) + Event::Scalar("".to_owned(), TScalarStyle::Plain, anchor, Some(tag)) } } @@ -179,10 +179,7 @@ impl> Parser { fn load_node(&mut self, first_ev: &Event, recv: &mut R) -> Result<(), ScanError> { match *first_ev { - Event::Alias(..) => { - Ok(()) - }, - Event::Scalar(..) => { + Event::Alias(..) | Event::Scalar(..) => { Ok(()) }, Event::SequenceStart(_) => { @@ -275,7 +272,7 @@ impl> Parser { self.skip(); Ok(Event::StreamStart) }, - _ => return Err(ScanError::new(tok.0, + _ => Err(ScanError::new(tok.0, "did not find expected ")), } } @@ -283,14 +280,9 @@ impl> Parser { fn document_start(&mut self, implicit: bool) -> ParseResult { let mut tok = try!(self.peek()); if !implicit { - loop { - match tok.1 { - TokenType::DocumentEnd => { - self.skip(); - tok = try!(self.peek()); - }, - _ => break - } + while let TokenType::DocumentEnd = tok.1 { + self.skip(); + tok = try!(self.peek()); } } @@ -298,7 +290,7 @@ impl> Parser { TokenType::StreamEnd => { self.state = State::End; self.skip(); - return Ok(Event::StreamEnd); + Ok(Event::StreamEnd) }, TokenType::VersionDirective(..) | TokenType::TagDirective(..) @@ -376,12 +368,9 @@ impl> Parser { let tok = try!(self.peek()); let _start_mark = tok.0; - match tok.1 { - TokenType::DocumentEnd => { - self.skip(); - _implicit = false; - } - _ => {} + if let TokenType::DocumentEnd = tok.1 { + self.skip(); + _implicit = false; } // TODO tag handling @@ -389,7 +378,7 @@ impl> Parser { Ok(Event::DocumentEnd) } - fn register_anchor(&mut self, name: &String, _: &Marker) -> Result { + fn register_anchor(&mut self, name: &str, _: &Marker) -> Result { // anchors can be overrided/reused // if self.anchors.contains_key(name) { // return Err(ScanError::new(*mark, @@ -397,7 +386,7 @@ impl> Parser { // } let new_id = self.anchor_id; self.anchor_id += 1; - self.anchors.insert(name.clone(), new_id); + self.anchors.insert(name.to_owned(), new_id); Ok(new_id) } diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index 16c3b61..d0b5e2b 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -45,7 +45,7 @@ impl ScanError { pub fn new(loc: Marker, info: &str) -> ScanError { ScanError { mark: loc, - info: info.to_string() + info: info.to_owned() } } } @@ -233,7 +233,7 @@ impl> Scanner { } } - #[inline(always)] + #[inline] fn lookahead(&mut self, count: usize) { if self.buffer.len() >= count { return; @@ -386,7 +386,7 @@ impl> Scanner { // plain scalar '-' if !is_blankz(nc) => self.fetch_plain_scalar(), ':' | '?' if !is_blankz(nc) && self.flow_level == 0 => self.fetch_plain_scalar(), - '%' | '@' | '`' => return Err(ScanError::new(self.mark, + '%' | '@' | '`' => Err(ScanError::new(self.mark, &format!("unexpected character: `{}'", c))), _ => self.fetch_plain_scalar(), } @@ -404,9 +404,8 @@ impl> Scanner { self.token_available = false; self.tokens_parsed += 1; - match t.1 { - TokenType::StreamEnd => self.stream_end_produced = true, - _ => {} + if let TokenType::StreamEnd = t.1 { + self.stream_end_produced = true; } Ok(Some(t)) } @@ -697,12 +696,12 @@ impl> Scanner { suffix = try!(self.scan_tag_uri(false, secondary, &String::new(), &start_mark)); } else { suffix = try!(self.scan_tag_uri(false, false, &handle, &start_mark)); - handle = "!".to_string(); + handle = "!".to_owned(); // A special case: the '!' tag. Set the handle to '' and the // suffix to '!'. - if suffix.len() == 0 { + if suffix.is_empty() { handle.clear(); - suffix = "!".to_string(); + suffix = "!".to_owned(); } } } @@ -739,20 +738,18 @@ impl> Scanner { if self.ch() == '!' { string.push(self.ch()); self.skip(); - } else { + } else if directive && string != "!" { // It's either the '!' tag or not really a tag handle. If it's a %TAG // directive, it's an error. If it's a tag token, it must be a part of // URI. - if directive && string != "!" { - return Err(ScanError::new(*mark, - "while parsing a tag directive, did not find expected '!'")); - } + return Err(ScanError::new(*mark, + "while parsing a tag directive, did not find expected '!'")); } Ok(string) } fn scan_tag_uri(&mut self, directive: bool, _is_secondary: bool, - head: &String, mark: &Marker) -> Result { + head: &str, mark: &Marker) -> Result { let mut length = head.len(); let mut string = String::new(); @@ -1588,10 +1585,8 @@ impl> Scanner { fn remove_simple_key(&mut self) -> ScanResult { let last = self.simple_keys.last_mut().unwrap(); - if last.possible { - if last.required { - return Err(ScanError::new(self.mark, "simple key expected")); - } + if last.possible && last.required { + return Err(ScanError::new(self.mark, "simple key expected")); } last.possible = false; diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index d63e357..6db2a1b 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -94,46 +94,43 @@ impl EventReceiver for YamlLoader { Event::Scalar(ref v, style, aid, ref tag) => { let node = if style != TScalarStyle::Plain { Yaml::String(v.clone()) - } else { - match tag { - &Some(TokenType::Tag(ref handle, ref suffix)) => { - // XXX tag:yaml.org,2002: - if handle == "!!" { - match suffix.as_ref() { - "bool" => { - // "true" or "false" - match v.parse::() { - Err(_) => Yaml::BadValue, - Ok(v) => Yaml::Boolean(v) - } - }, - "int" => { - match v.parse::() { - Err(_) => Yaml::BadValue, - Ok(v) => Yaml::Integer(v) - } - }, - "float" => { - match v.parse::() { - Err(_) => Yaml::BadValue, - Ok(_) => Yaml::Real(v.clone()) - } - }, - "null" => { - match v.as_ref() { - "~" | "null" => Yaml::Null, - _ => Yaml::BadValue, - } - } - _ => Yaml::String(v.clone()), + } else if let Some(TokenType::Tag(ref handle, ref suffix)) = *tag { + // XXX tag:yaml.org,2002: + if handle == "!!" { + match suffix.as_ref() { + "bool" => { + // "true" or "false" + match v.parse::() { + Err(_) => Yaml::BadValue, + Ok(v) => Yaml::Boolean(v) + } + }, + "int" => { + match v.parse::() { + Err(_) => Yaml::BadValue, + Ok(v) => Yaml::Integer(v) + } + }, + "float" => { + match v.parse::() { + Err(_) => Yaml::BadValue, + Ok(_) => Yaml::Real(v.clone()) + } + }, + "null" => { + match v.as_ref() { + "~" | "null" => Yaml::Null, + _ => Yaml::BadValue, } - } else { - Yaml::String(v.clone()) } - }, - // Datatype is not specified, or unrecognized - _ => { Yaml::from_str(v.as_ref()) } + _ => Yaml::String(v.clone()), + } + } else { + Yaml::String(v.clone()) } + } else { + // Datatype is not specified, or unrecognized + Yaml::from_str(v.as_ref()) }; self.insert_new_node((node, aid)); @@ -245,7 +242,12 @@ impl Yaml { _ => None } } +} +#[cfg_attr(feature="clippy", allow(should_implement_trait))] +impl Yaml { + // Not implementing FromStr because there is no possibility of Error. + // This function falls back to Yaml::String if nothing else matches. pub fn from_str(v: &str) -> Yaml { if v.starts_with("0x") { let n = i64::from_str_radix(&v[2..], 16); @@ -259,8 +261,8 @@ impl Yaml { return Yaml::Integer(n.unwrap()); } } - if v.starts_with("+") && v[1..].parse::().is_ok() { - return Yaml::Integer(v[1..].parse::().unwrap()); + if v.starts_with('+') && v[1..].parse::().is_ok() { + return Yaml::Integer(v[1..].parse::().unwrap()); } match v { "~" | "null" => Yaml::Null, @@ -268,8 +270,8 @@ impl Yaml { "false" => Yaml::Boolean(false), _ if v.parse::().is_ok() => Yaml::Integer(v.parse::().unwrap()), // try parsing as f64 - _ if v.parse::().is_ok() => Yaml::Real(v.to_string()), - _ => Yaml::String(v.to_string()) + _ if v.parse::().is_ok() => Yaml::Real(v.to_owned()), + _ => Yaml::String(v.to_owned()) } } } @@ -279,7 +281,7 @@ impl<'a> Index<&'a str> for Yaml { type Output = Yaml; fn index(&self, idx: &'a str) -> &Yaml { - let key = Yaml::String(idx.to_string()); + let key = Yaml::String(idx.to_owned()); match self.as_hash() { Some(h) => h.get(&key).unwrap_or(&BAD_VALUE), None => &BAD_VALUE @@ -333,7 +335,7 @@ a4: a5: 'single_quoted' a6: \"double_quoted\" a7: 你好 -".to_string(); +".to_owned(); let out = YamlLoader::load_from_str(&s).unwrap(); let doc = &out[0]; assert_eq!(doc["a7"].as_str().unwrap(), "你好"); From bda87f9f96639ff27874946149ea8ce4e2d756d2 Mon Sep 17 00:00:00 2001 From: David Tolnay Date: Sat, 27 Feb 2016 17:01:51 -0800 Subject: [PATCH 059/380] Run clippy for nightly only --- saphyr/.travis.yml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/saphyr/.travis.yml b/saphyr/.travis.yml index 627e527..3a542b8 100644 --- a/saphyr/.travis.yml +++ b/saphyr/.travis.yml @@ -8,7 +8,12 @@ env: global: - secure: ZUcdcbS8xbpdII9FSPx7VtoVhEkJhWL2Hb75tDlKDHNhfXqmt1NyB9q/2qXJ5Ulp4MnYXwsI8LsDloR6gvdB4xElay3smuF/neGvMjrqcB15/2p0MSQ+kZjMsNB6mlb5kAlm8ahduXIscppmw/V+m5hn3Vo+RQz/Ng+pzv0nc8KEXPMYrfRFg+a7FaeIbRbb8ir9EfflUSqArLq2hbi2WdhM3hFMcCIAUt6DD4x5ubjEg60OnIof5FDu0mXMXzQvUfHWOeYnsNcD/DLyDnm6FuQEzk37M4EB8op2SdBUeQMQ5abR3i2rd//DZpbTTEjud0PseWohGAwTwL2aoFrqs7uYQMx+vcGlOzAyDUm4VemVUa3F2BECdzU5BiujcKOITJEVUYWongld93arQq34FuXG/TO/T1XrerxfG6LTkTkKS5Vz7W8z6Rloa99WrQLJg1ZJP6itEU7G7KsDFVgRhsg7rz4/dV/2+cV4UvIwd4HlGXKCFlH0SClqvM3/7i/qqCD0689SJW6Zip+ly38MXlGy2s/AmReEasXvFer9JkOEIuPa8QTBNAjDlw7bWXi6neQWBIZU1VhZcSssnrVmEFN8fNklShzpw5DyKCv8jPTx2O6Dw8B/LgIK8uo+eaTXiO6zz/T1c/qEdsYslvxPA2D3F+ONpPU7238ykT4eRog= script: - - cargo build --features clippy --verbose + - | + if [ "$TRAVIS_RUST_VERSION" = nightly ]; then + cargo build --features clippy --verbose + else + cargo build --verbose + fi - cargo test --verbose after_script: - curl http://www.rust-ci.org/artifacts/put?t=$RUSTCI_TOKEN | sh From 7e296ce9b447a1c4ecc6aef7d059de00e1d67d82 Mon Sep 17 00:00:00 2001 From: David Tolnay Date: Thu, 10 Mar 2016 01:55:21 -0800 Subject: [PATCH 060/380] Grammar in rustdoc --- saphyr/src/yaml.rs | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index 6db2a1b..92e053c 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -7,7 +7,7 @@ use std::mem; use parser::*; use scanner::{TScalarStyle, ScanError, TokenType}; -/// An YAML node is store as this `Yaml` enumeration, it provides an easy way to +/// A YAML node is stored as this `Yaml` enumeration, which provides an easy way to /// access your YAML document. /// /// # Examples @@ -17,7 +17,7 @@ use scanner::{TScalarStyle, ScanError, TokenType}; /// let foo = Yaml::from_str("-123"); // convert the string to the appropriate YAML type /// assert_eq!(foo.as_i64().unwrap(), -123); /// -/// // iterator over an Array +/// // iterate over an Array /// let vec = Yaml::Array(vec![Yaml::Integer(1), Yaml::Integer(2)]); /// for v in vec.as_vec().unwrap() { /// assert!(v.as_i64().is_some()); @@ -25,26 +25,26 @@ use scanner::{TScalarStyle, ScanError, TokenType}; /// ``` #[derive(Clone, PartialEq, PartialOrd, Debug, Eq, Ord)] pub enum Yaml { - /// float types are stored as String, and parsed on demand. - /// Note that f64 does NOT implement Eq trait and can NOT be stored in BTreeMap + /// Float types are stored as String and parsed on demand. + /// Note that f64 does NOT implement Eq trait and can NOT be stored in BTreeMap. Real(string::String), - /// Yaml int is stored as i64. + /// YAML int is stored as i64. Integer(i64), - /// Yaml scalar. + /// YAML scalar. String(string::String), - /// Yaml bool, e.g. `true` or `false`. + /// YAML bool, e.g. `true` or `false`. Boolean(bool), - /// Yaml array, can be access as a `Vec`. + /// YAML array, can be accessed as a `Vec`. Array(self::Array), - /// Yaml hash, can be access as a `BTreeMap`. + /// YAML hash, can be accessed as a `BTreeMap`. Hash(self::Hash), /// Alias, not fully supported yet. Alias(usize), - /// Yaml bool, e.g. `null` or `~`. + /// YAML null, e.g. `null` or `~`. Null, - /// Access non-exist node by Index trait will return `BadValue`. - /// This simplifies error handling of user. Invalid type conversion - /// also return `BadValue`. + /// Accessing a nonexistent node via the Index trait returns `BadValue`. This + /// simplifies error handling in the calling code. Invalid type conversion also + /// returns `BadValue`. BadValue, } @@ -250,13 +250,13 @@ impl Yaml { // This function falls back to Yaml::String if nothing else matches. pub fn from_str(v: &str) -> Yaml { if v.starts_with("0x") { - let n = i64::from_str_radix(&v[2..], 16); + let n = i64::from_str_radix(&v[2..], 16); if n.is_ok() { return Yaml::Integer(n.unwrap()); } } if v.starts_with("0o") { - let n = i64::from_str_radix(&v[2..], 8); + let n = i64::from_str_radix(&v[2..], 8); if n.is_ok() { return Yaml::Integer(n.unwrap()); } From 5fa1c5d7cc6d9452ac80d68050c3679fb1b3bbfc Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Thu, 10 Mar 2016 20:49:02 +0800 Subject: [PATCH 061/380] Fix clippy warnings about `not` --- saphyr/src/scanner.rs | 51 ++++++++++++++++++++++--------------------- saphyr/src/yaml.rs | 6 ++--- 2 files changed, 29 insertions(+), 28 deletions(-) diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index d0b5e2b..3d6b3c1 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -647,11 +647,11 @@ impl> Scanner { self.lookahead(1); - if !is_blankz(self.ch()) { + if is_blankz(self.ch()) { + Ok(Token(*mark, TokenType::TagDirective(handle, prefix))) + } else { Err(ScanError::new(*mark, "while scanning TAG, did not find expected whitespace or line break")) - } else { - Ok(Token(*mark, TokenType::TagDirective(handle, prefix))) } } @@ -1305,28 +1305,33 @@ impl> Scanner { while is_blank(self.ch()) || is_break(self.ch()) { if is_blank(self.ch()) { // Consume a space or a tab character. - if !leading_blanks { - whitespaces.push(self.ch()); + if leading_blanks { self.skip(); } else { + whitespaces.push(self.ch()); self.skip(); } } else { self.lookahead(2); // Check if it is a first line break. - if !leading_blanks { + if leading_blanks { + self.read_break(&mut trailing_breaks); + } else { whitespaces.clear(); self.read_break(&mut leading_break); leading_blanks = true; - } else { - self.read_break(&mut trailing_breaks); } } self.lookahead(1); } // Join the whitespaces or fold line breaks. if leading_blanks { - if !leading_break.is_empty() { + if leading_break.is_empty() { + string.extend(leading_break.chars()); + string.extend(trailing_breaks.chars()); + trailing_breaks.clear(); + leading_break.clear(); + } else { if trailing_breaks.is_empty() { string.push(' '); } else { @@ -1334,11 +1339,6 @@ impl> Scanner { trailing_breaks.clear(); } leading_break.clear(); - } else { - string.extend(leading_break.chars()); - string.extend(trailing_breaks.chars()); - trailing_breaks.clear(); - leading_break.clear(); } } else { string.extend(whitespaces.chars()); @@ -1407,7 +1407,12 @@ impl> Scanner { if leading_blanks || !whitespaces.is_empty() { if leading_blanks { - if !leading_break.is_empty() { + if leading_break.is_empty() { + string.extend(leading_break.chars()); + string.extend(trailing_breaks.chars()); + trailing_breaks.clear(); + leading_break.clear(); + } else { if trailing_breaks.is_empty() { string.push(' '); } else { @@ -1415,11 +1420,7 @@ impl> Scanner { trailing_breaks.clear(); } leading_break.clear(); - } else { - string.extend(leading_break.chars()); - string.extend(trailing_breaks.chars()); - trailing_breaks.clear(); - leading_break.clear(); + } leading_blanks = false; } else { @@ -1444,21 +1445,21 @@ impl> Scanner { "while scanning a plain scalar, found a tab")); } - if !leading_blanks { - whitespaces.push(self.ch()); + if leading_blanks { self.skip(); } else { + whitespaces.push(self.ch()); self.skip(); } } else { self.lookahead(2); // Check if it is a first line break - if !leading_blanks { + if leading_blanks { + self.read_break(&mut trailing_breaks); + } else { whitespaces.clear(); self.read_break(&mut leading_break); leading_blanks = true; - } else { - self.read_break(&mut trailing_breaks); } } self.lookahead(1); diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index 6db2a1b..6ea5cb9 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -154,7 +154,9 @@ impl YamlLoader { if node.1 > 0 { self.anchor_map.insert(node.1, node.0.clone()); } - if !self.doc_stack.is_empty() { + if self.doc_stack.is_empty() { + self.doc_stack.push(node); + } else { let parent = self.doc_stack.last_mut().unwrap(); match *parent { (Yaml::Array(ref mut v), _) => v.push(node.0), @@ -172,8 +174,6 @@ impl YamlLoader { }, _ => unreachable!(), } - } else { - self.doc_stack.push(node); } } From b749378805d202593659bbd21911177593f6168f Mon Sep 17 00:00:00 2001 From: Hendrik Sollich Date: Thu, 10 Mar 2016 14:45:02 +0100 Subject: [PATCH 062/380] added crates.io shields to readme Hi, this shows what versions you currently released on crates.io and under which license. --- saphyr/README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/saphyr/README.md b/saphyr/README.md index dd0a709..7fcd385 100644 --- a/saphyr/README.md +++ b/saphyr/README.md @@ -4,6 +4,8 @@ The missing YAML 1.2 implementation for Rust. [![Build Status](https://travis-ci.org/chyh1990/yaml-rust.svg?branch=master)](https://travis-ci.org/chyh1990/yaml-rust) [![Build status](https://ci.appveyor.com/api/projects/status/scf47535ckp4ylg4?svg=true)](https://ci.appveyor.com/project/chyh1990/yaml-rust) +[![license](https://img.shields.io/crates/l/yaml-rust.svg)](https://crates.io/crates/yaml-rust/) +[![version](https://img.shields.io/crates/v/yaml-rust.svg)](https://crates.io/crates/yaml-rust/) `yaml-rust` is a pure Rust YAML 1.2 implementation without any FFI and crate dependencies, which enjoys the memory safe From 82e2b2f5afb6d37b6d3dee34b67cc0fe192893e6 Mon Sep 17 00:00:00 2001 From: David Tolnay Date: Sat, 19 Mar 2016 22:06:44 -0700 Subject: [PATCH 063/380] Add preserve_order feature to use LinkedHashMap instead of BTreeMap --- saphyr/Cargo.toml | 4 ++++ saphyr/src/lib.rs | 3 +++ saphyr/src/yaml.rs | 15 ++++++++++++++- 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/saphyr/Cargo.toml b/saphyr/Cargo.toml index 46d8c7a..c5e70fd 100644 --- a/saphyr/Cargo.toml +++ b/saphyr/Cargo.toml @@ -8,5 +8,9 @@ license = "MIT/Apache-2.0" description = "The missing YAML 1.2 parser for rust" repository = "https://github.com/chyh1990/yaml-rust" +[features] +preserve_order = ["linked-hash-map"] + [dependencies] clippy = { version = "^0.*", optional = true } +linked-hash-map = { version = "0.0.9", optional = true } diff --git a/saphyr/src/lib.rs b/saphyr/src/lib.rs index dc66adc..6bdde1f 100644 --- a/saphyr/src/lib.rs +++ b/saphyr/src/lib.rs @@ -42,6 +42,9 @@ #![cfg_attr(feature="clippy", warn(cyclomatic_complexity))] #![cfg_attr(feature="clippy", allow(match_same_arms))] +#[cfg(feature = "preserve_order")] +extern crate linked_hash_map; + pub mod yaml; pub mod scanner; pub mod parser; diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index ef98e50..d840a30 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -23,7 +23,7 @@ use scanner::{TScalarStyle, ScanError, TokenType}; /// assert!(v.as_i64().is_some()); /// } /// ``` -#[derive(Clone, PartialEq, PartialOrd, Debug, Eq, Ord)] +#[derive(Clone, PartialEq, PartialOrd, Debug, Eq, Ord, Hash)] pub enum Yaml { /// Float types are stored as String and parsed on demand. /// Note that f64 does NOT implement Eq trait and can NOT be stored in BTreeMap. @@ -37,6 +37,15 @@ pub enum Yaml { /// YAML array, can be accessed as a `Vec`. Array(self::Array), /// YAML hash, can be accessed as a `BTreeMap`. + /// + /// If the order of keys is meaningful, enable the `preserve_order` feature to + /// store hashes as a `LinkedHashMap` intead of `BTreeMap`. When using a + /// `LinkedHashMap`, the itertion order will match the order of insertion into + /// the map. + /// + /// ```toml + /// yaml-rust = { version = "*", features = ["preserve_order"] } + /// ``` Hash(self::Hash), /// Alias, not fully supported yet. Alias(usize), @@ -49,7 +58,11 @@ pub enum Yaml { } pub type Array = Vec; + +#[cfg(not(feature = "preserve_order"))] pub type Hash = BTreeMap; +#[cfg(feature = "preserve_order")] +pub type Hash = ::linked_hash_map::LinkedHashMap; pub struct YamlLoader { docs: Vec, From 0d0baf9947a7119dc5c433e0cfba50020c746ab7 Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Tue, 22 Mar 2016 22:27:13 +0800 Subject: [PATCH 064/380] Bump to 0.3.2 1. Add preserve_order feature 2. Add clippy in nightly build --- saphyr/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/saphyr/Cargo.toml b/saphyr/Cargo.toml index c5e70fd..7bcc411 100644 --- a/saphyr/Cargo.toml +++ b/saphyr/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "yaml-rust" -version = "0.3.1" +version = "0.3.2" authors = ["Yuheng Chen "] homepage = "http://chyh1990.github.io/yaml-rust/" documentation = "http://chyh1990.github.io/yaml-rust/doc/yaml_rust/" From 7cd23fe58d3e0a0b90deb1a27f02583a373b7e10 Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Wed, 25 May 2016 13:19:36 +0800 Subject: [PATCH 065/380] Fix assert fail on empty document This closes #20 --- saphyr/src/parser.rs | 3 +-- saphyr/src/yaml.rs | 8 ++++++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs index 65924d7..eb574c0 100644 --- a/saphyr/src/parser.rs +++ b/saphyr/src/parser.rs @@ -124,8 +124,7 @@ impl> Parser { fn parse(&mut self, recv: &mut R) -> ParseResult { - if self.scanner.stream_ended() - || self.state == State::End { + if self.state == State::End { return Ok(Event::StreamEnd); } let ev = try!(self.state_machine()); diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index d840a30..86730dc 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -331,6 +331,14 @@ c: [1, 2] assert!(doc["d"][0].is_badvalue()); } + #[test] + fn test_empty_doc() { + let s: String = "".to_owned(); + YamlLoader::load_from_str(&s).unwrap(); + let s: String = "---".to_owned(); + assert_eq!(YamlLoader::load_from_str(&s).unwrap()[0], Yaml::Null); + } + #[test] fn test_parser() { let s: String = " From 3dc63c1965226dd11531855135da985833f5f5cc Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Fri, 27 May 2016 13:42:17 +0800 Subject: [PATCH 066/380] Ignore untracked files --- saphyr/.gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/saphyr/.gitignore b/saphyr/.gitignore index d4f917d..b7b38e4 100644 --- a/saphyr/.gitignore +++ b/saphyr/.gitignore @@ -1,3 +1,5 @@ target Cargo.lock *.swp +/perf.* +/coverage.sh From 77b90df939af33a51934e549baf875e6bcd82250 Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Wed, 1 Jun 2016 12:14:06 +0800 Subject: [PATCH 067/380] Bump to 0.3.3 --- saphyr/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/saphyr/Cargo.toml b/saphyr/Cargo.toml index 7bcc411..96cd00b 100644 --- a/saphyr/Cargo.toml +++ b/saphyr/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "yaml-rust" -version = "0.3.2" +version = "0.3.3" authors = ["Yuheng Chen "] homepage = "http://chyh1990.github.io/yaml-rust/" documentation = "http://chyh1990.github.io/yaml-rust/doc/yaml_rust/" From a2a12c93a61ea6329d2c17cbd94d70acc71f0495 Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Wed, 22 Jun 2016 15:47:02 +0800 Subject: [PATCH 068/380] CI: remove put to rust-ci.org Server down occasionally... --- saphyr/.travis.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/saphyr/.travis.yml b/saphyr/.travis.yml index 3a542b8..f3e89e1 100644 --- a/saphyr/.travis.yml +++ b/saphyr/.travis.yml @@ -15,5 +15,3 @@ script: cargo build --verbose fi - cargo test --verbose -after_script: - - curl http://www.rust-ci.org/artifacts/put?t=$RUSTCI_TOKEN | sh From 27e11273c1a4526e0931fd6938191bfb3f3eab78 Mon Sep 17 00:00:00 2001 From: Hendrik Sollich Date: Tue, 21 Jun 2016 21:50:27 +0200 Subject: [PATCH 069/380] returning ScanError instead of running unreachable!() fixes #23 --- saphyr/src/scanner.rs | 2 +- saphyr/src/yaml.rs | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index 3d6b3c1..058a050 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -949,7 +949,7 @@ impl> Scanner { self.roll_indent(mark.col, None, TokenType::BlockSequenceStart, mark); } else { // - * only allowed in block - unreachable!(); + return Err(ScanError::new(self.mark, r#""-" is only valid inside a block"#)) } try!(self.remove_simple_key()); self.allow_simple_key(); diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index 86730dc..15aea91 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -469,4 +469,11 @@ a1: &DEFAULT assert!(doc[25][0].as_bool().unwrap()); assert!(!doc[25][1].as_bool().unwrap()); } + + #[test] + fn test_bad_hypen() { + // See: https://github.com/chyh1990/yaml-rust/issues/23 + let s = "{-"; + assert!(YamlLoader::load_from_str(&s).is_err()); + } } From fc774f2963101d580062901ee951158ea39fe738 Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Wed, 22 Jun 2016 16:05:40 +0800 Subject: [PATCH 070/380] Rename NoEvent to Nothing to make clippy happy --- saphyr/src/parser.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs index eb574c0..78408e4 100644 --- a/saphyr/src/parser.rs +++ b/saphyr/src/parser.rs @@ -35,7 +35,7 @@ enum State { #[derive(Clone, PartialEq, Debug, Eq)] pub enum Event { /// Reserved for internal use - NoEvent, + Nothing, StreamStart, StreamEnd, DocumentStart, From ca7956f429cad36f0f47375fecf2d1ef0badf7c4 Mon Sep 17 00:00:00 2001 From: Hendrik Sollich Date: Wed, 22 Jun 2016 23:57:24 +0200 Subject: [PATCH 071/380] Fix logic error that caused infinite loop --- saphyr/src/scanner.rs | 8 ++++---- saphyr/src/yaml.rs | 8 ++++++++ 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index 058a050..e7a5290 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -1191,12 +1191,12 @@ impl> Scanner { self.lookahead(4); if self.mark.col == 0 && - ((self.buffer[0] == '-') && + (((self.buffer[0] == '-') && (self.buffer[1] == '-') && (self.buffer[2] == '-')) || ((self.buffer[0] == '.') && (self.buffer[1] == '.') && - (self.buffer[2] == '.')) && + (self.buffer[2] == '.'))) && is_blankz(self.buffer[3]) { return Err(ScanError::new(start_mark, "while scanning a quoted scalar, found unexpected document indicator")); @@ -1381,12 +1381,12 @@ impl> Scanner { self.lookahead(4); if self.mark.col == 0 && - ((self.buffer[0] == '-') && + (((self.buffer[0] == '-') && (self.buffer[1] == '-') && (self.buffer[2] == '-')) || ((self.buffer[0] == '.') && (self.buffer[1] == '.') && - (self.buffer[2] == '.')) && + (self.buffer[2] == '.'))) && is_blankz(self.buffer[3]) { break; } diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index 15aea91..f843491 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -476,4 +476,12 @@ a1: &DEFAULT let s = "{-"; assert!(YamlLoader::load_from_str(&s).is_err()); } + + #[test] + fn test_bad_docstart() { + assert!(YamlLoader::load_from_str("---This used to cause an infinite loop").is_ok()); + assert_eq!(YamlLoader::load_from_str("----"), Ok(vec![Yaml::String(String::from("----"))])); + assert_eq!(YamlLoader::load_from_str("--- #here goes a comment"), Ok(vec![Yaml::Null])); + assert_eq!(YamlLoader::load_from_str("---- #here goes a comment"), Ok(vec![Yaml::String(String::from("----"))])); + } } From ff90a2127ca6de3130a81f6431efb560b83b00fd Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Thu, 28 Jul 2016 17:36:25 +0800 Subject: [PATCH 072/380] Use empty_scalar_with_anchor properly This closes #27. --- saphyr/src/parser.rs | 6 +++--- saphyr/src/yaml.rs | 8 ++++++++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs index 78408e4..678f841 100644 --- a/saphyr/src/parser.rs +++ b/saphyr/src/parser.rs @@ -58,8 +58,8 @@ impl Event { Event::Scalar("~".to_owned(), TScalarStyle::Plain, 0, None) } - fn empty_scalar_with_anchor(anchor: usize, tag: TokenType) -> Event { - Event::Scalar("".to_owned(), TScalarStyle::Plain, anchor, Some(tag)) + fn empty_scalar_with_anchor(anchor: usize, tag: Option) -> Event { + Event::Scalar("".to_owned(), TScalarStyle::Plain, anchor, tag) } } @@ -453,7 +453,7 @@ impl> Parser { // ex 7.2, an empty scalar can follow a secondary tag _ if tag.is_some() || anchor_id > 0 => { self.pop_state(); - Ok(Event::empty_scalar_with_anchor(anchor_id, tag.unwrap())) + Ok(Event::empty_scalar_with_anchor(anchor_id, tag)) }, _ => { Err(ScanError::new(tok.0, "while parsing a node, did not find expected node content")) } } diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index f843491..04a2fca 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -404,6 +404,14 @@ a1: &DEFAULT } + #[test] + fn test_github_27() { + // https://github.com/chyh1990/yaml-rust/issues/27 + let s = "&a"; + let out = YamlLoader::load_from_str(&s).unwrap(); + let doc = &out[0]; + assert_eq!(doc.as_str().unwrap(), ""); + } #[test] fn test_plain_datatype() { From a3fcf880eaebb744a53a077ee4ccadbdcd36f5ec Mon Sep 17 00:00:00 2001 From: Matthew Piziak Date: Sun, 7 Aug 2016 22:25:30 -0400 Subject: [PATCH 073/380] add `into_` counterparts for all `as_` methods fix #28 --- saphyr/src/yaml.rs | 78 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 77 insertions(+), 1 deletion(-) diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index 04a2fca..6cc1f1c 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -225,6 +225,17 @@ pub fn $name(&self) -> Option<$t> { ); ); +macro_rules! define_into ( + ($name:ident, $t:ty, $yt:ident) => ( +pub fn $name(self) -> Option<$t> { + match self { + Yaml::$yt(v) => Some(v), + _ => None + } +} + ); +); + impl Yaml { define_as!(as_bool, bool, Boolean); define_as!(as_i64, i64, Integer); @@ -233,6 +244,12 @@ impl Yaml { define_as_ref!(as_hash, &Hash, Hash); define_as_ref!(as_vec, &Array, Array); + define_into!(into_bool, bool, Boolean); + define_into!(into_i64, i64, Integer); + define_into!(into_string, String, String); + define_into!(into_hash, Hash, Hash); + define_into!(into_vec, Array, Array); + pub fn is_null(&self) -> bool { match *self { Yaml::Null => true, @@ -255,6 +272,15 @@ impl Yaml { _ => None } } + + pub fn into_f64(self) -> Option { + match self { + Yaml::Real(v) => { + v.parse::().ok() + }, + _ => None + } + } } #[cfg_attr(feature="clippy", allow(should_implement_trait))] @@ -490,6 +516,56 @@ a1: &DEFAULT assert!(YamlLoader::load_from_str("---This used to cause an infinite loop").is_ok()); assert_eq!(YamlLoader::load_from_str("----"), Ok(vec![Yaml::String(String::from("----"))])); assert_eq!(YamlLoader::load_from_str("--- #here goes a comment"), Ok(vec![Yaml::Null])); - assert_eq!(YamlLoader::load_from_str("---- #here goes a comment"), Ok(vec![Yaml::String(String::from("----"))])); + assert_eq!(YamlLoader::load_from_str("---- #here goes a comment"), Ok(vec![Yaml::String(String::from("----"))])); + } + + #[test] + fn test_plain_datatype_with_into_methods() { + let s = +" +- 'string' +- \"string\" +- string +- 123 +- -321 +- 1.23 +- -1e4 +- true +- false +- !!str 0 +- !!int 100 +- !!float 2 +- !!bool true +- !!bool false +- 0xFF +- 0o77 +- [ 0xF, 0xF ] +- +12345 +- [ true, false ] +"; + let out = YamlLoader::load_from_str(&s).unwrap(); + let doc = &out[0]; + + assert_eq!(doc[0].clone().into_string().unwrap(), "string"); + assert_eq!(doc[1].clone().into_string().unwrap(), "string"); + assert_eq!(doc[2].clone().into_string().unwrap(), "string"); + assert_eq!(doc[3].clone().into_i64().unwrap(), 123); + assert_eq!(doc[4].clone().into_i64().unwrap(), -321); + assert_eq!(doc[5].clone().into_f64().unwrap(), 1.23); + assert_eq!(doc[6].clone().into_f64().unwrap(), -1e4); + assert_eq!(doc[7].clone().into_bool().unwrap(), true); + assert_eq!(doc[8].clone().into_bool().unwrap(), false); + assert_eq!(doc[9].clone().into_string().unwrap(), "0"); + assert_eq!(doc[10].clone().into_i64().unwrap(), 100); + assert_eq!(doc[11].clone().into_f64().unwrap(), 2.0); + assert_eq!(doc[12].clone().into_bool().unwrap(), true); + assert_eq!(doc[13].clone().into_bool().unwrap(), false); + assert_eq!(doc[14].clone().into_i64().unwrap(), 255); + assert_eq!(doc[15].clone().into_i64().unwrap(), 63); + assert_eq!(doc[16][0].clone().into_i64().unwrap(), 15); + assert_eq!(doc[16][1].clone().into_i64().unwrap(), 15); + assert_eq!(doc[17].clone().into_i64().unwrap(), 12345); + assert!(doc[18][0].clone().into_bool().unwrap()); + assert!(!doc[18][1].clone().into_bool().unwrap()); } } From b600b3bafef4ebcadf5c3369eda4ebb464a128dc Mon Sep 17 00:00:00 2001 From: Matthew Piziak Date: Mon, 8 Aug 2016 17:31:36 -0400 Subject: [PATCH 074/380] implement IntoIterator for Yaml --- saphyr/src/yaml.rs | 68 +++++++++++++++++++++++++++++----------------- 1 file changed, 43 insertions(+), 25 deletions(-) diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index 6cc1f1c..96b4cc4 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -339,6 +339,30 @@ impl Index for Yaml { } } +impl IntoIterator for Yaml { + type Item = Yaml; + type IntoIter = YamlIter; + + fn into_iter(self) -> Self::IntoIter { + YamlIter {yaml: self, index: 0} + } +} + +pub struct YamlIter { + yaml: Yaml, + index: usize, +} + +impl Iterator for YamlIter { + type Item = Yaml; + + fn next(&mut self) -> Option { + let result = self.yaml[self.index].clone(); + self.index += 1; + Some(result) + } +} + #[cfg(test)] mod test { use yaml::*; @@ -539,33 +563,27 @@ a1: &DEFAULT - !!bool false - 0xFF - 0o77 -- [ 0xF, 0xF ] - +12345 -- [ true, false ] "; - let out = YamlLoader::load_from_str(&s).unwrap(); - let doc = &out[0]; + let mut out = YamlLoader::load_from_str(&s).unwrap().into_iter(); + let mut doc = out.next().unwrap().into_iter(); - assert_eq!(doc[0].clone().into_string().unwrap(), "string"); - assert_eq!(doc[1].clone().into_string().unwrap(), "string"); - assert_eq!(doc[2].clone().into_string().unwrap(), "string"); - assert_eq!(doc[3].clone().into_i64().unwrap(), 123); - assert_eq!(doc[4].clone().into_i64().unwrap(), -321); - assert_eq!(doc[5].clone().into_f64().unwrap(), 1.23); - assert_eq!(doc[6].clone().into_f64().unwrap(), -1e4); - assert_eq!(doc[7].clone().into_bool().unwrap(), true); - assert_eq!(doc[8].clone().into_bool().unwrap(), false); - assert_eq!(doc[9].clone().into_string().unwrap(), "0"); - assert_eq!(doc[10].clone().into_i64().unwrap(), 100); - assert_eq!(doc[11].clone().into_f64().unwrap(), 2.0); - assert_eq!(doc[12].clone().into_bool().unwrap(), true); - assert_eq!(doc[13].clone().into_bool().unwrap(), false); - assert_eq!(doc[14].clone().into_i64().unwrap(), 255); - assert_eq!(doc[15].clone().into_i64().unwrap(), 63); - assert_eq!(doc[16][0].clone().into_i64().unwrap(), 15); - assert_eq!(doc[16][1].clone().into_i64().unwrap(), 15); - assert_eq!(doc[17].clone().into_i64().unwrap(), 12345); - assert!(doc[18][0].clone().into_bool().unwrap()); - assert!(!doc[18][1].clone().into_bool().unwrap()); + assert_eq!(doc.next().unwrap().into_string().unwrap(), "string"); + assert_eq!(doc.next().unwrap().into_string().unwrap(), "string"); + assert_eq!(doc.next().unwrap().into_string().unwrap(), "string"); + assert_eq!(doc.next().unwrap().into_i64().unwrap(), 123); + assert_eq!(doc.next().unwrap().into_i64().unwrap(), -321); + assert_eq!(doc.next().unwrap().into_f64().unwrap(), 1.23); + assert_eq!(doc.next().unwrap().into_f64().unwrap(), -1e4); + assert_eq!(doc.next().unwrap().into_bool().unwrap(), true); + assert_eq!(doc.next().unwrap().into_bool().unwrap(), false); + assert_eq!(doc.next().unwrap().into_string().unwrap(), "0"); + assert_eq!(doc.next().unwrap().into_i64().unwrap(), 100); + assert_eq!(doc.next().unwrap().into_f64().unwrap(), 2.0); + assert_eq!(doc.next().unwrap().into_bool().unwrap(), true); + assert_eq!(doc.next().unwrap().into_bool().unwrap(), false); + assert_eq!(doc.next().unwrap().into_i64().unwrap(), 255); + assert_eq!(doc.next().unwrap().into_i64().unwrap(), 63); + assert_eq!(doc.next().unwrap().into_i64().unwrap(), 12345); } } From 9e77a839d32ee810503c0a0b4c8e2d3cf6f16743 Mon Sep 17 00:00:00 2001 From: Matthew Piziak Date: Mon, 8 Aug 2016 17:52:24 -0400 Subject: [PATCH 075/380] remove clone from `into_iter` --- saphyr/src/yaml.rs | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index 96b4cc4..e5af16f 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -234,7 +234,7 @@ pub fn $name(self) -> Option<$t> { } } ); -); +); impl Yaml { define_as!(as_bool, bool, Boolean); @@ -245,7 +245,7 @@ impl Yaml { define_as_ref!(as_vec, &Array, Array); define_into!(into_bool, bool, Boolean); - define_into!(into_i64, i64, Integer); + define_into!(into_i64, i64, Integer); define_into!(into_string, String, String); define_into!(into_hash, Hash, Hash); define_into!(into_vec, Array, Array); @@ -344,22 +344,21 @@ impl IntoIterator for Yaml { type IntoIter = YamlIter; fn into_iter(self) -> Self::IntoIter { - YamlIter {yaml: self, index: 0} + let mut yaml = self.into_vec().unwrap_or(vec![]); + yaml.reverse(); + YamlIter {yaml: yaml} } } pub struct YamlIter { - yaml: Yaml, - index: usize, + yaml: Vec, } impl Iterator for YamlIter { type Item = Yaml; fn next(&mut self) -> Option { - let result = self.yaml[self.index].clone(); - self.index += 1; - Some(result) + self.yaml.pop() } } @@ -540,7 +539,7 @@ a1: &DEFAULT assert!(YamlLoader::load_from_str("---This used to cause an infinite loop").is_ok()); assert_eq!(YamlLoader::load_from_str("----"), Ok(vec![Yaml::String(String::from("----"))])); assert_eq!(YamlLoader::load_from_str("--- #here goes a comment"), Ok(vec![Yaml::Null])); - assert_eq!(YamlLoader::load_from_str("---- #here goes a comment"), Ok(vec![Yaml::String(String::from("----"))])); + assert_eq!(YamlLoader::load_from_str("---- #here goes a comment"), Ok(vec![Yaml::String(String::from("----"))])); } #[test] @@ -561,13 +560,13 @@ a1: &DEFAULT - !!float 2 - !!bool true - !!bool false -- 0xFF +- 0xFF - 0o77 - +12345 "; - let mut out = YamlLoader::load_from_str(&s).unwrap().into_iter(); + let mut out = YamlLoader::load_from_str(&s).unwrap().into_iter(); let mut doc = out.next().unwrap().into_iter(); - + assert_eq!(doc.next().unwrap().into_string().unwrap(), "string"); assert_eq!(doc.next().unwrap().into_string().unwrap(), "string"); assert_eq!(doc.next().unwrap().into_string().unwrap(), "string"); @@ -583,7 +582,7 @@ a1: &DEFAULT assert_eq!(doc.next().unwrap().into_bool().unwrap(), true); assert_eq!(doc.next().unwrap().into_bool().unwrap(), false); assert_eq!(doc.next().unwrap().into_i64().unwrap(), 255); - assert_eq!(doc.next().unwrap().into_i64().unwrap(), 63); - assert_eq!(doc.next().unwrap().into_i64().unwrap(), 12345); + assert_eq!(doc.next().unwrap().into_i64().unwrap(), 63); + assert_eq!(doc.next().unwrap().into_i64().unwrap(), 12345); } } From e8f4fcbb03629bb164080f619e936ddafe55deff Mon Sep 17 00:00:00 2001 From: Matthew Piziak Date: Mon, 8 Aug 2016 18:21:57 -0400 Subject: [PATCH 076/380] properly wrap Vec's IntoIter property --- saphyr/src/yaml.rs | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index e5af16f..6ed730d 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -4,6 +4,7 @@ use std::string; use std::i64; use std::str::FromStr; use std::mem; +use std::vec; use parser::*; use scanner::{TScalarStyle, ScanError, TokenType}; @@ -344,21 +345,19 @@ impl IntoIterator for Yaml { type IntoIter = YamlIter; fn into_iter(self) -> Self::IntoIter { - let mut yaml = self.into_vec().unwrap_or(vec![]); - yaml.reverse(); - YamlIter {yaml: yaml} + YamlIter {yaml: self.into_vec().unwrap_or(vec![]).into_iter()} } } pub struct YamlIter { - yaml: Vec, + yaml: vec::IntoIter, } impl Iterator for YamlIter { type Item = Yaml; fn next(&mut self) -> Option { - self.yaml.pop() + self.yaml.next() } } From d235acc1cc3c714033a71f79c3fbb1eb7cfe35bd Mon Sep 17 00:00:00 2001 From: David Tolnay Date: Mon, 29 Aug 2016 11:39:39 -0700 Subject: [PATCH 077/380] Allow range of linked-hash-map versions --- saphyr/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/saphyr/Cargo.toml b/saphyr/Cargo.toml index 96cd00b..accaceb 100644 --- a/saphyr/Cargo.toml +++ b/saphyr/Cargo.toml @@ -13,4 +13,4 @@ preserve_order = ["linked-hash-map"] [dependencies] clippy = { version = "^0.*", optional = true } -linked-hash-map = { version = "0.0.9", optional = true } +linked-hash-map = { version = ">=0.0.9, <0.4", optional = true } From d0f4fc3abc89108b71832ec8600bff80a24013f5 Mon Sep 17 00:00:00 2001 From: Hendrik Sollich Date: Fri, 16 Sep 2016 20:06:50 +0200 Subject: [PATCH 078/380] fixed: emitter also emits complex keys --- saphyr/src/emitter.rs | 46 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 44 insertions(+), 2 deletions(-) diff --git a/saphyr/src/emitter.rs b/saphyr/src/emitter.rs index fe9a6e1..2ee7b98 100644 --- a/saphyr/src/emitter.rs +++ b/saphyr/src/emitter.rs @@ -113,6 +113,48 @@ impl<'a> YamlEmitter<'a> { Ok(()) } + fn emit_node_compact(&mut self, node: &Yaml) -> EmitResult { + match *node { + Yaml::Array(ref v) => { + try!(write!(self.writer, "[")); + if self.level >= 0 { + try!(write!(self.writer, "+ ")); + } + self.level += 1; + for (cnt, x) in v.iter().enumerate() { + try!(self.write_indent()); + if cnt > 0 { try!(write!(self.writer, ", ")); } + try!(self.emit_node(x)); + } + self.level -= 1; + try!(write!(self.writer, "]")); + Ok(()) + }, + Yaml::Hash(ref h) => { + try!(self.writer.write_str("{")); + self.level += 1; + for (cnt, (k, v)) in h.iter().enumerate() { + if cnt > 0 { + try!(write!(self.writer, ", ")); + } + match *k { + // complex key is not supported + Yaml::Array(_) | Yaml::Hash(_) => { + return Err(EmitError::BadHashmapKey); + }, + _ => { try!(self.emit_node(k)); } + } + try!(write!(self.writer, ": ")); + try!(self.emit_node(v)); + } + try!(self.writer.write_str("}")); + self.level -= 1; + Ok(()) + }, + _ => self.emit_node(node) + } + } + fn emit_node(&mut self, node: &Yaml) -> EmitResult { match *node { Yaml::Array(ref v) => { @@ -151,9 +193,9 @@ impl<'a> YamlEmitter<'a> { } try!(self.write_indent()); match *k { - // complex key is not supported Yaml::Array(_) | Yaml::Hash(_) => { - return Err(EmitError::BadHashmapKey); + try!(self.emit_node_compact(k)); + //return Err(EmitError::BadHashmapKey); }, _ => { try!(self.emit_node(k)); } } From 6d238118310cefe2fd21cbdfb40df1ebf73839f3 Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Thu, 22 Sep 2016 16:54:51 +0800 Subject: [PATCH 079/380] Fix clippy warning --- saphyr/src/yaml.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index 6ed730d..6d0417c 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -345,7 +345,10 @@ impl IntoIterator for Yaml { type IntoIter = YamlIter; fn into_iter(self) -> Self::IntoIter { - YamlIter {yaml: self.into_vec().unwrap_or(vec![]).into_iter()} + YamlIter { + yaml: self.into_vec() + .unwrap_or_else(Vec::new).into_iter() + } } } From f54a6c3bff20f502b4d34e49ca329dfb503dd7dc Mon Sep 17 00:00:00 2001 From: Hendrik Sollich Date: Thu, 22 Sep 2016 14:17:42 +0200 Subject: [PATCH 080/380] added test for emitting equivalence of complex keys --- saphyr/src/emitter.rs | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/saphyr/src/emitter.rs b/saphyr/src/emitter.rs index 2ee7b98..0631ec2 100644 --- a/saphyr/src/emitter.rs +++ b/saphyr/src/emitter.rs @@ -263,6 +263,7 @@ a7: 你好 'key 1': \"ddd\\\tbbb\" "; + let docs = YamlLoader::load_from_str(&s).unwrap(); let doc = &docs[0]; let mut writer = String::new(); @@ -275,4 +276,37 @@ a7: 你好 assert_eq!(doc, doc_new); } + + #[test] + fn test_emit_complex() { + let s = r#" +cataloge: + product: &coffee { name: Coffee, price: 2.5 , unit: 1l } + product: &cookies { name: Cookies!, price: 3.40 , unit: 400g} + +products: + *coffee: + amount: 4 + *cookies: + amount: 4 + [1,2,3,4]: + array key + 2.4: + real key + true: + bool key + {}: + empty hash key + "#; + let docs = YamlLoader::load_from_str(&s).unwrap(); + let doc = &docs[0]; + let mut writer = String::new(); + { + let mut emitter = YamlEmitter::new(&mut writer); + emitter.dump(doc).unwrap(); + } + let docs_new = YamlLoader::load_from_str(&s).unwrap(); + let doc_new = &docs_new[0]; + assert_eq!(doc, doc_new); + } } From 7e173eebc3473be50bafba2b1e1fa1f5c626b11d Mon Sep 17 00:00:00 2001 From: Jan Likar Date: Sun, 2 Oct 2016 03:41:28 +0200 Subject: [PATCH 081/380] Fix a typo --- saphyr/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/saphyr/README.md b/saphyr/README.md index 7fcd385..4c7d4a8 100644 --- a/saphyr/README.md +++ b/saphyr/README.md @@ -87,7 +87,7 @@ bar: } ``` -Note that `yaml::Yaml` implements `Index<&'a str>` & `Index`: +Note that `yaml_rust::Yaml` implements `Index<&'a str>` & `Index`: * `Index` assumes the container is an Array * `Index<&'a str>` assumes the container is a string to value Map From 9d4b40518ac05ab5ab851d33d03c618093f0afe2 Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Fri, 14 Oct 2016 12:41:22 +0800 Subject: [PATCH 082/380] Bump to 0.3.4 --- saphyr/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/saphyr/Cargo.toml b/saphyr/Cargo.toml index accaceb..b0459e2 100644 --- a/saphyr/Cargo.toml +++ b/saphyr/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "yaml-rust" -version = "0.3.3" +version = "0.3.4" authors = ["Yuheng Chen "] homepage = "http://chyh1990.github.io/yaml-rust/" documentation = "http://chyh1990.github.io/yaml-rust/doc/yaml_rust/" From aa80fca2908f4a0b5f05dc44d226c33700cbda63 Mon Sep 17 00:00:00 2001 From: Jan Likar Date: Sat, 22 Oct 2016 01:25:03 +0200 Subject: [PATCH 083/380] Fix spelling mistakes. --- saphyr/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/saphyr/README.md b/saphyr/README.md index 4c7d4a8..0cfca8d 100644 --- a/saphyr/README.md +++ b/saphyr/README.md @@ -8,7 +8,7 @@ The missing YAML 1.2 implementation for Rust. [![version](https://img.shields.io/crates/v/yaml-rust.svg)](https://crates.io/crates/yaml-rust/) `yaml-rust` is a pure Rust YAML 1.2 implementation without -any FFI and crate dependencies, which enjoys the memory safe +any external dependencies, which enjoys the memory safety property and other benefits from the Rust language. The parser is heavily influenced by `libyaml` and `yaml-cpp`. @@ -17,14 +17,14 @@ Rust 1.0.0 and nightly! See [Document](http://chyh1990.github.io/yaml-rust/doc/yaml_rust/) -> NOTE: This library is still under heavily development. +> NOTE: This library is still under heavy development. > WARNING: This library needs more tests and it is NOT ready for > parsing arbitrary user input from *untrusted source*. ## Quick Start -Adding the following to the Cargo.toml in your project: +Add the following to the Cargo.toml of your project: ``` [dependencies] From ef59ea712aea59e0d1cb36d597b8cd5789a12d56 Mon Sep 17 00:00:00 2001 From: Antoni Boucher Date: Tue, 1 Nov 2016 22:03:35 -0400 Subject: [PATCH 084/380] Added an option to avoid emitting quotes. --- saphyr/src/emitter.rs | 101 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 100 insertions(+), 1 deletion(-) diff --git a/saphyr/src/emitter.rs b/saphyr/src/emitter.rs index 0631ec2..2bc6cd4 100644 --- a/saphyr/src/emitter.rs +++ b/saphyr/src/emitter.rs @@ -17,6 +17,7 @@ impl From for EmitError { pub struct YamlEmitter<'a> { writer: &'a mut fmt::Write, best_indent: usize, + avoid_quotes: bool, level: isize, } @@ -86,11 +87,39 @@ fn escape_str(wr: &mut fmt::Write, v: &str) -> Result<(), fmt::Error> { Ok(()) } +pub struct YamlEmitterBuilder { + avoid_quotes: bool, +} + +impl YamlEmitterBuilder { + pub fn new() -> Self { + YamlEmitterBuilder { + avoid_quotes: false, + } + } + + pub fn avoid_quotes(mut self) -> Self { + self.avoid_quotes = true; + self + } + + pub fn build<'a>(self, writer: &'a mut fmt::Write) -> YamlEmitter<'a> { + YamlEmitter { + writer: writer, + best_indent: 2, + avoid_quotes: self.avoid_quotes, + + level: -1, + } + } +} + impl<'a> YamlEmitter<'a> { pub fn new(writer: &'a mut fmt::Write) -> YamlEmitter { YamlEmitter { writer: writer, best_indent: 2, + avoid_quotes: false, level: -1 } @@ -207,7 +236,12 @@ impl<'a> YamlEmitter<'a> { } }, Yaml::String(ref v) => { - try!(escape_str(self.writer, v)); + if !self.avoid_quotes || need_quotes(v) { + try!(escape_str(self.writer, v)); + } + else { + try!(write!(self.writer, "{}", v)); + } Ok(()) }, Yaml::Boolean(v) => { @@ -236,6 +270,30 @@ impl<'a> YamlEmitter<'a> { } } +/// Check if the string requires quoting. +/// Strings containing any of the following characters must be quoted. +/// :, {, }, [, ], ,, &, *, #, ?, |, -, <, >, =, !, %, @, ` +/// +/// If the string contains any of the following control characters, it must be escaped with double quotes: +/// \0, \x01, \x02, \x03, \x04, \x05, \x06, \a, \b, \t, \n, \v, \f, \r, \x0e, \x0f, \x10, \x11, \x12, \x13, \x14, \x15, \x16, \x17, \x18, \x19, \x1a, \e, \x1c, \x1d, \x1e, \x1f, \N, \_, \L, \P +/// +/// Finally, there are other cases when the strings must be quoted, no matter if you're using single or double quotes: +/// * When the string is true or false (otherwise, it would be treated as a boolean value); +/// * When the string is null or ~ (otherwise, it would be considered as a null value); +/// * When the string looks like a number, such as integers (e.g. 2, 14, etc.), floats (e.g. 2.6, 14.9) and exponential numbers (e.g. 12e7, etc.) (otherwise, it would be treated as a numeric value); +/// * When the string looks like a date (e.g. 2014-12-31) (otherwise it would be automatically converted into a Unix timestamp). +fn need_quotes(string: &str) -> bool { + string.contains(|character: char| { + match character { + ':' | '{' | '}' | '[' | ']' | ',' | '&' | '*' | '#' | '?' | '|' | '-' | '<' | '>' | '=' | '!' | '%' | '@' | '`' | '\\' | '\0' ... '\x06' | '\t' | '\n' | '\r' | '\x0e' ... '\x1a' | '\x1c' ... '\x1f' => true, + _ => false, + } + }) || + string == "true" || string == "false" || string == "null" || string == "~" || + string.parse::().is_ok() || + string.parse::().is_ok() +} + #[cfg(test)] mod tests { use super::*; @@ -309,4 +367,45 @@ products: let doc_new = &docs_new[0]; assert_eq!(doc, doc_new); } + + #[test] + fn test_emit_avoid_quotes() { + let s = r#"--- +a7: 你好 +boolean: "true" +boolean2: "false" +date: "2014-12-31" +exp: "12e7" +field: ":" +field2: "{" +field3: "\\" +field4: "\n" +float: "2.6" +int: "4" +nullable: "null" +nullable2: "~" +products: + "*coffee": + amount: 4 + "*cookies": + amount: 4 + "2.4": real key + "[1,2,3,4]": array key + "true": bool key + "{}": empty hash key +x: test +y: string with spaces"#; + + let docs = YamlLoader::load_from_str(&s).unwrap(); + let doc = &docs[0]; + let mut writer = String::new(); + { + let mut emitter = YamlEmitterBuilder::new() + .avoid_quotes() + .build(&mut writer); + emitter.dump(doc).unwrap(); + } + + assert_eq!(s, writer); + } } From e1e4ed9c05201abdfdcdc93551360d0108a9f7f4 Mon Sep 17 00:00:00 2001 From: Antoni Boucher Date: Wed, 2 Nov 2016 16:56:46 -0400 Subject: [PATCH 085/380] Fixed to always avoid quoting when possible. --- saphyr/src/emitter.rs | 35 ++--------------------------------- 1 file changed, 2 insertions(+), 33 deletions(-) diff --git a/saphyr/src/emitter.rs b/saphyr/src/emitter.rs index 2bc6cd4..390bf53 100644 --- a/saphyr/src/emitter.rs +++ b/saphyr/src/emitter.rs @@ -17,7 +17,6 @@ impl From for EmitError { pub struct YamlEmitter<'a> { writer: &'a mut fmt::Write, best_indent: usize, - avoid_quotes: bool, level: isize, } @@ -87,39 +86,11 @@ fn escape_str(wr: &mut fmt::Write, v: &str) -> Result<(), fmt::Error> { Ok(()) } -pub struct YamlEmitterBuilder { - avoid_quotes: bool, -} - -impl YamlEmitterBuilder { - pub fn new() -> Self { - YamlEmitterBuilder { - avoid_quotes: false, - } - } - - pub fn avoid_quotes(mut self) -> Self { - self.avoid_quotes = true; - self - } - - pub fn build<'a>(self, writer: &'a mut fmt::Write) -> YamlEmitter<'a> { - YamlEmitter { - writer: writer, - best_indent: 2, - avoid_quotes: self.avoid_quotes, - - level: -1, - } - } -} - impl<'a> YamlEmitter<'a> { pub fn new(writer: &'a mut fmt::Write) -> YamlEmitter { YamlEmitter { writer: writer, best_indent: 2, - avoid_quotes: false, level: -1 } @@ -236,7 +207,7 @@ impl<'a> YamlEmitter<'a> { } }, Yaml::String(ref v) => { - if !self.avoid_quotes || need_quotes(v) { + if need_quotes(v) { try!(escape_str(self.writer, v)); } else { @@ -400,9 +371,7 @@ y: string with spaces"#; let doc = &docs[0]; let mut writer = String::new(); { - let mut emitter = YamlEmitterBuilder::new() - .avoid_quotes() - .build(&mut writer); + let mut emitter = YamlEmitter::new(&mut writer); emitter.dump(doc).unwrap(); } From 439f0e68b29135073974c9e6e130f5fc4c2f3e14 Mon Sep 17 00:00:00 2001 From: Martin Hoffmann Date: Wed, 9 Nov 2016 13:04:48 +0100 Subject: [PATCH 086/380] Pass markers to EventReceiver. --- saphyr/src/parser.rs | 86 +++++++++++++++++++++++--------------------- 1 file changed, 45 insertions(+), 41 deletions(-) diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs index 678f841..3a64045 100644 --- a/saphyr/src/parser.rs +++ b/saphyr/src/parser.rs @@ -76,9 +76,13 @@ pub struct Parser { pub trait EventReceiver { fn on_event(&mut self, ev: &Event); + + fn on_event_with_marker(&mut self, ev: &Event, _mark: Marker) { + self.on_event(ev) + } } -pub type ParseResult = Result; +pub type ParseResult = Result<(Event, Marker), ScanError>; impl> Parser { pub fn new(src: T) -> Parser { @@ -123,13 +127,13 @@ impl> Parser { } fn parse(&mut self, recv: &mut R) - -> ParseResult { + -> Result { if self.state == State::End { return Ok(Event::StreamEnd); } - let ev = try!(self.state_machine()); + let (ev, mark) = try!(self.state_machine()); // println!("EV {:?}", ev); - recv.on_event(&ev); + recv.on_event_with_marker(&ev, mark); Ok(ev) } @@ -142,13 +146,13 @@ impl> Parser { if self.scanner.stream_ended() { // XXX has parsed? - recv.on_event(&Event::StreamEnd); + recv.on_event_with_marker(&Event::StreamEnd, self.scanner.mark()); return Ok(()); } loop { let ev = try!(self.parse(recv)); if ev == Event::StreamEnd { - recv.on_event(&Event::StreamEnd); + recv.on_event_with_marker(&Event::StreamEnd, self.scanner.mark()); return Ok(()); } // clear anchors before a new document @@ -269,7 +273,7 @@ impl> Parser { TokenType::StreamStart(_) => { self.state = State::ImplicitDocumentStart; self.skip(); - Ok(Event::StreamStart) + Ok((Event::StreamStart, tok.0)) }, _ => Err(ScanError::new(tok.0, "did not find expected ")), @@ -289,7 +293,7 @@ impl> Parser { TokenType::StreamEnd => { self.state = State::End; self.skip(); - Ok(Event::StreamEnd) + Ok((Event::StreamEnd, tok.0)) }, TokenType::VersionDirective(..) | TokenType::TagDirective(..) @@ -301,7 +305,7 @@ impl> Parser { try!(self.parser_process_directives()); self.push_state(State::DocumentEnd); self.state = State::BlockNode; - Ok(Event::DocumentStart) + Ok((Event::DocumentStart, tok.0)) }, _ => { // explicit document @@ -341,7 +345,7 @@ impl> Parser { self.push_state(State::DocumentEnd); self.state = State::DocumentContent; self.skip(); - Ok(Event::DocumentStart) + Ok((Event::DocumentStart, tok.0)) } fn document_content(&mut self) -> ParseResult { @@ -354,7 +358,7 @@ impl> Parser { |TokenType::StreamEnd => { self.pop_state(); // empty scalar - Ok(Event::empty_scalar()) + Ok((Event::empty_scalar(), tok.0)) }, _ => { self.parse_node(true, false) @@ -374,7 +378,7 @@ impl> Parser { // TODO tag handling self.state = State::DocumentStart; - Ok(Event::DocumentEnd) + Ok((Event::DocumentEnd, tok.0)) } fn register_anchor(&mut self, name: &str, _: &Marker) -> Result { @@ -399,7 +403,7 @@ impl> Parser { self.skip(); match self.anchors.get(&name) { None => return Err(ScanError::new(tok.0, "while parsing node, found unknown anchor")), - Some(id) => return Ok(Event::Alias(*id)) + Some(id) => return Ok((Event::Alias(*id), tok.0)) } }, TokenType::Anchor(name) => { @@ -427,33 +431,33 @@ impl> Parser { match tok.1 { TokenType::BlockEntry if indentless_sequence => { self.state = State::IndentlessSequenceEntry; - Ok(Event::SequenceStart(anchor_id)) + Ok((Event::SequenceStart(anchor_id), tok.0)) }, TokenType::Scalar(style, v) => { self.pop_state(); self.skip(); - Ok(Event::Scalar(v, style, anchor_id, tag)) + Ok((Event::Scalar(v, style, anchor_id, tag), tok.0)) }, TokenType::FlowSequenceStart => { self.state = State::FlowSequenceFirstEntry; - Ok(Event::SequenceStart(anchor_id)) + Ok((Event::SequenceStart(anchor_id), tok.0)) }, TokenType::FlowMappingStart => { self.state = State::FlowMappingFirstKey; - Ok(Event::MappingStart(anchor_id)) + Ok((Event::MappingStart(anchor_id), tok.0)) }, TokenType::BlockSequenceStart if block => { self.state = State::BlockSequenceFirstEntry; - Ok(Event::SequenceStart(anchor_id)) + Ok((Event::SequenceStart(anchor_id), tok.0)) }, TokenType::BlockMappingStart if block => { self.state = State::BlockMappingFirstKey; - Ok(Event::MappingStart(anchor_id)) + Ok((Event::MappingStart(anchor_id), tok.0)) }, // ex 7.2, an empty scalar can follow a secondary tag _ if tag.is_some() || anchor_id > 0 => { self.pop_state(); - Ok(Event::empty_scalar_with_anchor(anchor_id, tag)) + Ok((Event::empty_scalar_with_anchor(anchor_id, tag), tok.0)) }, _ => { Err(ScanError::new(tok.0, "while parsing a node, did not find expected node content")) } } @@ -478,7 +482,7 @@ impl> Parser { => { self.state = State::BlockMappingValue; // empty scalar - Ok(Event::empty_scalar()) + Ok((Event::empty_scalar(), tok.0)) } _ => { self.push_state(State::BlockMappingValue); @@ -489,12 +493,12 @@ impl> Parser { // XXX(chenyh): libyaml failed to parse spec 1.2, ex8.18 TokenType::Value => { self.state = State::BlockMappingValue; - Ok(Event::empty_scalar()) + Ok((Event::empty_scalar(), tok.0)) }, TokenType::BlockEnd => { self.pop_state(); self.skip(); - Ok(Event::MappingEnd) + Ok((Event::MappingEnd, tok.0)) }, _ => { Err(ScanError::new(tok.0, "while parsing a block mapping, did not find expected key")) @@ -513,7 +517,7 @@ impl> Parser { => { self.state = State::BlockMappingKey; // empty scalar - Ok(Event::empty_scalar()) + Ok((Event::empty_scalar(), tok.0)) } _ => { self.push_state(State::BlockMappingKey); @@ -524,7 +528,7 @@ impl> Parser { _ => { self.state = State::BlockMappingKey; // empty scalar - Ok(Event::empty_scalar()) + Ok((Event::empty_scalar(), tok.0)) } } } @@ -555,7 +559,7 @@ impl> Parser { | TokenType::FlowEntry | TokenType::FlowMappingEnd => { self.state = State::FlowMappingValue; - return Ok(Event::empty_scalar()); + return Ok((Event::empty_scalar(), tok.0)); }, _ => { self.push_state(State::FlowMappingValue); @@ -565,7 +569,7 @@ impl> Parser { // XXX libyaml fail ex 7.3, empty key } else if tok.1 == TokenType::Value { self.state = State::FlowMappingValue; - return Ok(Event::empty_scalar()); + return Ok((Event::empty_scalar(), tok.0)); } else if tok.1 != TokenType::FlowMappingEnd { self.push_state(State::FlowMappingEmptyValue); return self.parse_node(false, false); @@ -574,14 +578,14 @@ impl> Parser { self.pop_state(); self.skip(); - Ok(Event::MappingEnd) + Ok((Event::MappingEnd, tok.0)) } fn flow_mapping_value(&mut self, empty: bool) -> ParseResult { let tok = try!(self.peek()); if empty { self.state = State::FlowMappingKey; - return Ok(Event::empty_scalar()); + return Ok((Event::empty_scalar(), tok.0)); } if tok.1 == TokenType::Value { @@ -598,7 +602,7 @@ impl> Parser { } self.state = State::FlowMappingKey; - Ok(Event::empty_scalar()) + Ok((Event::empty_scalar(), tok.0)) } fn flow_sequence_entry(&mut self, first: bool) -> ParseResult { @@ -613,7 +617,7 @@ impl> Parser { TokenType::FlowSequenceEnd => { self.pop_state(); self.skip(); - return Ok(Event::SequenceEnd); + return Ok((Event::SequenceEnd, tok.0)); }, TokenType::FlowEntry if !first => { self.skip(); @@ -629,12 +633,12 @@ impl> Parser { TokenType::FlowSequenceEnd => { self.pop_state(); self.skip(); - Ok(Event::SequenceEnd) + Ok((Event::SequenceEnd, tok.0)) }, TokenType::Key => { self.state = State::FlowSequenceEntryMappingKey; self.skip(); - Ok(Event::MappingStart(0)) + Ok((Event::MappingStart(0), tok.0)) } _ => { self.push_state(State::FlowSequenceEntry); @@ -647,7 +651,7 @@ impl> Parser { let mut tok = try!(self.peek()); if tok.1 != TokenType::BlockEntry { self.pop_state(); - return Ok(Event::SequenceEnd); + return Ok((Event::SequenceEnd, tok.0)); } self.skip(); @@ -658,7 +662,7 @@ impl> Parser { | TokenType::Value | TokenType::BlockEnd => { self.state = State::IndentlessSequenceEntry; - Ok(Event::empty_scalar()) + Ok((Event::empty_scalar(), tok.0)) }, _ => { self.push_state(State::IndentlessSequenceEntry); @@ -679,7 +683,7 @@ impl> Parser { TokenType::BlockEnd => { self.pop_state(); self.skip(); - Ok(Event::SequenceEnd) + Ok((Event::SequenceEnd, tok.0)) }, TokenType::BlockEntry => { self.skip(); @@ -688,7 +692,7 @@ impl> Parser { TokenType::BlockEntry | TokenType::BlockEnd => { self.state = State::BlockSequenceEntry; - Ok(Event::empty_scalar()) + Ok((Event::empty_scalar(), tok.0)) }, _ => { self.push_state(State::BlockSequenceEntry); @@ -712,7 +716,7 @@ impl> Parser { | TokenType::FlowSequenceEnd => { self.skip(); self.state = State::FlowSequenceEntryMappingValue; - Ok(Event::empty_scalar()) + Ok((Event::empty_scalar(), tok.0)) }, _ => { self.push_state(State::FlowSequenceEntryMappingValue); @@ -733,7 +737,7 @@ impl> Parser { TokenType::FlowEntry | TokenType::FlowSequenceEnd => { self.state = State::FlowSequenceEntryMappingEnd; - Ok(Event::empty_scalar()) + Ok((Event::empty_scalar(), tok.0)) }, _ => { self.push_state(State::FlowSequenceEntryMappingEnd); @@ -743,13 +747,13 @@ impl> Parser { }, _ => { self.state = State::FlowSequenceEntryMappingEnd; - Ok(Event::empty_scalar()) + Ok((Event::empty_scalar(), tok.0)) } } } fn flow_sequence_entry_mapping_end(&mut self) -> ParseResult { self.state = State::FlowSequenceEntry; - Ok(Event::MappingEnd) + Ok((Event::MappingEnd, self.scanner.mark())) } } From 51f8fbf0e6e8682ba7d253850f862370ffef6419 Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Thu, 10 Nov 2016 15:31:07 +0800 Subject: [PATCH 087/380] always quote empty string --- saphyr/src/emitter.rs | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/saphyr/src/emitter.rs b/saphyr/src/emitter.rs index 390bf53..6354c86 100644 --- a/saphyr/src/emitter.rs +++ b/saphyr/src/emitter.rs @@ -259,10 +259,14 @@ fn need_quotes(string: &str) -> bool { ':' | '{' | '}' | '[' | ']' | ',' | '&' | '*' | '#' | '?' | '|' | '-' | '<' | '>' | '=' | '!' | '%' | '@' | '`' | '\\' | '\0' ... '\x06' | '\t' | '\n' | '\r' | '\x0e' ... '\x1a' | '\x1c' ... '\x1f' => true, _ => false, } - }) || - string == "true" || string == "false" || string == "null" || string == "~" || - string.parse::().is_ok() || - string.parse::().is_ok() + }) + || string == "true" + || string == "false" + || string == "null" + || string == "~" + || string == "" + || string.parse::().is_ok() + || string.parse::().is_ok() } #[cfg(test)] @@ -346,6 +350,7 @@ a7: 你好 boolean: "true" boolean2: "false" date: "2014-12-31" +empty_string: "" exp: "12e7" field: ":" field2: "{" From 685d5ba0438a0a722a4e9d85cb85b55071554275 Mon Sep 17 00:00:00 2001 From: Martin Hoffmann Date: Thu, 10 Nov 2016 11:53:28 +0100 Subject: [PATCH 088/380] Introduce a new trait for event receivers that want markers. --- saphyr/src/parser.rs | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs index 3a64045..2b7d441 100644 --- a/saphyr/src/parser.rs +++ b/saphyr/src/parser.rs @@ -74,14 +74,24 @@ pub struct Parser { anchor_id: usize, } + pub trait EventReceiver { fn on_event(&mut self, ev: &Event); +} - fn on_event_with_marker(&mut self, ev: &Event, _mark: Marker) { + +pub trait MarkedEventReceiver { + fn on_event(&mut self, ev: &Event, _mark: Marker); +} + +impl MarkedEventReceiver for R { + fn on_event(&mut self, ev: &Event, _mark: Marker) { self.on_event(ev) } } + + pub type ParseResult = Result<(Event, Marker), ScanError>; impl> Parser { @@ -126,18 +136,18 @@ impl> Parser { self.states.push(state); } - fn parse(&mut self, recv: &mut R) + fn parse(&mut self, recv: &mut R) -> Result { if self.state == State::End { return Ok(Event::StreamEnd); } let (ev, mark) = try!(self.state_machine()); // println!("EV {:?}", ev); - recv.on_event_with_marker(&ev, mark); + recv.on_event(&ev, mark); Ok(ev) } - pub fn load(&mut self, recv: &mut R, multi: bool) + pub fn load(&mut self, recv: &mut R, multi: bool) -> Result<(), ScanError> { if !self.scanner.stream_started() { let ev = try!(self.parse(recv)); @@ -146,13 +156,13 @@ impl> Parser { if self.scanner.stream_ended() { // XXX has parsed? - recv.on_event_with_marker(&Event::StreamEnd, self.scanner.mark()); + recv.on_event(&Event::StreamEnd, self.scanner.mark()); return Ok(()); } loop { let ev = try!(self.parse(recv)); if ev == Event::StreamEnd { - recv.on_event_with_marker(&Event::StreamEnd, self.scanner.mark()); + recv.on_event(&Event::StreamEnd, self.scanner.mark()); return Ok(()); } // clear anchors before a new document @@ -165,7 +175,7 @@ impl> Parser { Ok(()) } - fn load_document(&mut self, first_ev: &Event, recv: &mut R) + fn load_document(&mut self, first_ev: &Event, recv: &mut R) -> Result<(), ScanError> { assert_eq!(first_ev, &Event::DocumentStart); @@ -179,7 +189,7 @@ impl> Parser { Ok(()) } - fn load_node(&mut self, first_ev: &Event, recv: &mut R) + fn load_node(&mut self, first_ev: &Event, recv: &mut R) -> Result<(), ScanError> { match *first_ev { Event::Alias(..) | Event::Scalar(..) => { @@ -196,7 +206,7 @@ impl> Parser { } } - fn load_mapping(&mut self, _first_ev: &Event, recv: &mut R) + fn load_mapping(&mut self, _first_ev: &Event, recv: &mut R) -> Result<(), ScanError> { let mut ev = try!(self.parse(recv)); while ev != Event::MappingEnd { @@ -213,7 +223,7 @@ impl> Parser { Ok(()) } - fn load_sequence(&mut self, _first_ev: &Event, recv: &mut R) + fn load_sequence(&mut self, _first_ev: &Event, recv: &mut R) -> Result<(), ScanError> { let mut ev = try!(self.parse(recv)); while ev != Event::SequenceEnd { From ba9dbcfe8f6f4c6a695a48ebf2412bfa321820dc Mon Sep 17 00:00:00 2001 From: Martin Hoffmann Date: Fri, 11 Nov 2016 11:53:31 +0100 Subject: [PATCH 089/380] Make YamlLoader implement MarkedEventReceiver. --- saphyr/src/yaml.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index 6d0417c..d9f63d4 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -6,7 +6,7 @@ use std::str::FromStr; use std::mem; use std::vec; use parser::*; -use scanner::{TScalarStyle, ScanError, TokenType}; +use scanner::{TScalarStyle, ScanError, TokenType, Marker}; /// A YAML node is stored as this `Yaml` enumeration, which provides an easy way to /// access your YAML document. @@ -74,8 +74,8 @@ pub struct YamlLoader { anchor_map: BTreeMap, } -impl EventReceiver for YamlLoader { - fn on_event(&mut self, ev: &Event) { +impl MarkedEventReceiver for YamlLoader { + fn on_event(&mut self, ev: &Event, _: Marker) { // println!("EV {:?}", ev); match *ev { Event::DocumentStart => { From f23be5e1cb3c97ed97b5679202cd59b7795dcc5d Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Mon, 14 Nov 2016 11:22:32 +0800 Subject: [PATCH 090/380] Comment out unreachable states --- saphyr/src/parser.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs index 2b7d441..7d147e7 100644 --- a/saphyr/src/parser.rs +++ b/saphyr/src/parser.rs @@ -10,8 +10,8 @@ enum State { DocumentContent, DocumentEnd, BlockNode, - BlockNodeOrIndentlessSequence, - FlowNode, + // BlockNodeOrIndentlessSequence, + // FlowNode, BlockSequenceFirstEntry, BlockSequenceEntry, IndentlessSequenceEntry, @@ -247,8 +247,8 @@ impl> Parser { State::DocumentEnd => self.document_end(), State::BlockNode => self.parse_node(true, false), - State::BlockNodeOrIndentlessSequence => self.parse_node(true, true), - State::FlowNode => self.parse_node(false, false), + // State::BlockNodeOrIndentlessSequence => self.parse_node(true, true), + // State::FlowNode => self.parse_node(false, false), State::BlockMappingFirstKey => self.block_mapping_key(true), State::BlockMappingKey => self.block_mapping_key(false), From 6c7a07a18cdd7358e5a3cae2f3eff4b423dad32f Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Mon, 14 Nov 2016 11:17:14 +0800 Subject: [PATCH 091/380] Emitter: quote strings starts/ends with spaces --- saphyr/src/emitter.rs | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/saphyr/src/emitter.rs b/saphyr/src/emitter.rs index 6354c86..e56fc14 100644 --- a/saphyr/src/emitter.rs +++ b/saphyr/src/emitter.rs @@ -254,7 +254,14 @@ impl<'a> YamlEmitter<'a> { /// * When the string looks like a number, such as integers (e.g. 2, 14, etc.), floats (e.g. 2.6, 14.9) and exponential numbers (e.g. 12e7, etc.) (otherwise, it would be treated as a numeric value); /// * When the string looks like a date (e.g. 2014-12-31) (otherwise it would be automatically converted into a Unix timestamp). fn need_quotes(string: &str) -> bool { - string.contains(|character: char| { + fn need_quotes_spaces(string: &str) -> bool { + string.starts_with(' ') + || string.ends_with(' ') + } + + string == "" + || need_quotes_spaces(string) + || string.contains(|character: char| { match character { ':' | '{' | '}' | '[' | ']' | ',' | '&' | '*' | '#' | '?' | '|' | '-' | '<' | '>' | '=' | '!' | '%' | '@' | '`' | '\\' | '\0' ... '\x06' | '\t' | '\n' | '\r' | '\x0e' ... '\x1a' | '\x1c' ... '\x1f' => true, _ => false, @@ -264,7 +271,6 @@ fn need_quotes(string: &str) -> bool { || string == "false" || string == "null" || string == "~" - || string == "" || string.parse::().is_ok() || string.parse::().is_ok() } @@ -351,6 +357,9 @@ boolean: "true" boolean2: "false" date: "2014-12-31" empty_string: "" +empty_string1: " " +empty_string2: " a" +empty_string3: " a " exp: "12e7" field: ":" field2: "{" From 864c7e082953f54f96e1a01573d3839c62319863 Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Thu, 24 Nov 2016 18:10:49 +0800 Subject: [PATCH 092/380] Use push_str() instead of extend() --- saphyr/src/scanner.rs | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index e7a5290..d25ecb8 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -1082,11 +1082,11 @@ impl> Scanner { } leading_break.clear(); } else { - string.extend(leading_break.chars()); + string.push_str(&leading_break); leading_break.clear(); } - string.extend(trailing_breaks.chars()); + string.push_str(&trailing_breaks); trailing_breaks.clear(); leading_blank = is_blank(self.ch()); @@ -1108,11 +1108,11 @@ impl> Scanner { // Chomp the tail. if chomping != -1 { - string.extend(leading_break.chars()); + string.push_str(&leading_break); } if chomping == 1 { - string.extend(trailing_breaks.chars()); + string.push_str(&trailing_breaks); } if literal { @@ -1327,21 +1327,21 @@ impl> Scanner { // Join the whitespaces or fold line breaks. if leading_blanks { if leading_break.is_empty() { - string.extend(leading_break.chars()); - string.extend(trailing_breaks.chars()); + string.push_str(&leading_break); + string.push_str(&trailing_breaks); trailing_breaks.clear(); leading_break.clear(); } else { if trailing_breaks.is_empty() { string.push(' '); } else { - string.extend(trailing_breaks.chars()); + string.push_str(&trailing_breaks); trailing_breaks.clear(); } leading_break.clear(); } } else { - string.extend(whitespaces.chars()); + string.push_str(&whitespaces); whitespaces.clear(); } } // loop @@ -1408,15 +1408,15 @@ impl> Scanner { if leading_blanks || !whitespaces.is_empty() { if leading_blanks { if leading_break.is_empty() { - string.extend(leading_break.chars()); - string.extend(trailing_breaks.chars()); + string.push_str(&leading_break); + string.push_str(&trailing_breaks); trailing_breaks.clear(); leading_break.clear(); } else { if trailing_breaks.is_empty() { string.push(' '); } else { - string.extend(trailing_breaks.chars()); + string.push_str(&trailing_breaks); trailing_breaks.clear(); } leading_break.clear(); @@ -1424,7 +1424,7 @@ impl> Scanner { } leading_blanks = false; } else { - string.extend(whitespaces.chars()); + string.push_str(&whitespaces); whitespaces.clear(); } } From 4243924689e21214894ceae51f4dece714d3b2e8 Mon Sep 17 00:00:00 2001 From: David Tolnay Date: Fri, 27 Jan 2017 20:50:52 -0800 Subject: [PATCH 093/380] Always preserve order --- saphyr/Cargo.toml | 5 +---- saphyr/src/emitter.rs | 1 - saphyr/src/lib.rs | 1 - saphyr/src/yaml.rs | 35 ++++++++++++++++++++--------------- 4 files changed, 21 insertions(+), 21 deletions(-) diff --git a/saphyr/Cargo.toml b/saphyr/Cargo.toml index b0459e2..ea768ff 100644 --- a/saphyr/Cargo.toml +++ b/saphyr/Cargo.toml @@ -8,9 +8,6 @@ license = "MIT/Apache-2.0" description = "The missing YAML 1.2 parser for rust" repository = "https://github.com/chyh1990/yaml-rust" -[features] -preserve_order = ["linked-hash-map"] - [dependencies] clippy = { version = "^0.*", optional = true } -linked-hash-map = { version = ">=0.0.9, <0.4", optional = true } +linked-hash-map = ">=0.0.9, <0.4" diff --git a/saphyr/src/emitter.rs b/saphyr/src/emitter.rs index e56fc14..9571e80 100644 --- a/saphyr/src/emitter.rs +++ b/saphyr/src/emitter.rs @@ -278,7 +278,6 @@ fn need_quotes(string: &str) -> bool { #[cfg(test)] mod tests { use super::*; - use yaml::*; #[test] fn test_emit_simple() { diff --git a/saphyr/src/lib.rs b/saphyr/src/lib.rs index 6bdde1f..aa35e47 100644 --- a/saphyr/src/lib.rs +++ b/saphyr/src/lib.rs @@ -42,7 +42,6 @@ #![cfg_attr(feature="clippy", warn(cyclomatic_complexity))] #![cfg_attr(feature="clippy", allow(match_same_arms))] -#[cfg(feature = "preserve_order")] extern crate linked_hash_map; pub mod yaml; diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index d9f63d4..94f60bf 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -2,11 +2,11 @@ use std::collections::BTreeMap; use std::ops::Index; use std::string; use std::i64; -use std::str::FromStr; use std::mem; use std::vec; use parser::*; use scanner::{TScalarStyle, ScanError, TokenType, Marker}; +use linked_hash_map::LinkedHashMap; /// A YAML node is stored as this `Yaml` enumeration, which provides an easy way to /// access your YAML document. @@ -37,16 +37,9 @@ pub enum Yaml { Boolean(bool), /// YAML array, can be accessed as a `Vec`. Array(self::Array), - /// YAML hash, can be accessed as a `BTreeMap`. + /// YAML hash, can be accessed as a `LinkedHashMap`. /// - /// If the order of keys is meaningful, enable the `preserve_order` feature to - /// store hashes as a `LinkedHashMap` intead of `BTreeMap`. When using a - /// `LinkedHashMap`, the itertion order will match the order of insertion into - /// the map. - /// - /// ```toml - /// yaml-rust = { version = "*", features = ["preserve_order"] } - /// ``` + /// Itertion order will match the order of insertion into the map. Hash(self::Hash), /// Alias, not fully supported yet. Alias(usize), @@ -59,11 +52,7 @@ pub enum Yaml { } pub type Array = Vec; - -#[cfg(not(feature = "preserve_order"))] -pub type Hash = BTreeMap; -#[cfg(feature = "preserve_order")] -pub type Hash = ::linked_hash_map::LinkedHashMap; +pub type Hash = LinkedHashMap; pub struct YamlLoader { docs: Vec, @@ -587,4 +576,20 @@ a1: &DEFAULT assert_eq!(doc.next().unwrap().into_i64().unwrap(), 63); assert_eq!(doc.next().unwrap().into_i64().unwrap(), 12345); } + + #[test] + fn test_hash_order() { + let s = "--- +b: ~ +a: ~ +c: ~ +"; + let out = YamlLoader::load_from_str(&s).unwrap(); + let first = out.into_iter().next().unwrap(); + let mut iter = first.into_hash().unwrap().into_iter(); + assert_eq!(Some((Yaml::String("b".to_owned()), Yaml::Null)), iter.next()); + assert_eq!(Some((Yaml::String("a".to_owned()), Yaml::Null)), iter.next()); + assert_eq!(Some((Yaml::String("c".to_owned()), Yaml::Null)), iter.next()); + assert_eq!(None, iter.next()); + } } From db7cc35e5c7cfed07cd6ef70e7eac4dc49d37f33 Mon Sep 17 00:00:00 2001 From: Tom Parker Date: Sun, 12 Mar 2017 16:00:10 +0000 Subject: [PATCH 094/380] Remove warnings about unused items --- saphyr/src/emitter.rs | 2 +- saphyr/src/yaml.rs | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/saphyr/src/emitter.rs b/saphyr/src/emitter.rs index e56fc14..9cd6a2d 100644 --- a/saphyr/src/emitter.rs +++ b/saphyr/src/emitter.rs @@ -1,6 +1,6 @@ use std::fmt; use std::convert::From; -use yaml::*; +use yaml::Yaml; #[derive(Copy, Clone, Debug)] pub enum EmitError { diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index d9f63d4..cd8ebb1 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -2,7 +2,6 @@ use std::collections::BTreeMap; use std::ops::Index; use std::string; use std::i64; -use std::str::FromStr; use std::mem; use std::vec; use parser::*; From 5bd0dd33e57e53e576f86e42da523589faf623d4 Mon Sep 17 00:00:00 2001 From: Tom Parker Date: Sat, 18 Mar 2017 14:14:01 +0000 Subject: [PATCH 095/380] Upgrade Rust versions in Travis to working ones... --- saphyr/.travis.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/saphyr/.travis.yml b/saphyr/.travis.yml index f3e89e1..ce92e56 100644 --- a/saphyr/.travis.yml +++ b/saphyr/.travis.yml @@ -1,8 +1,7 @@ language: rust rust: - - 1.0.0 - - 1.1.0 - - 1.5.0 + - 1.8.0 + - 1.16.0 - nightly env: global: From 72040d25de5d224786733a6d19715291a138d15e Mon Sep 17 00:00:00 2001 From: Tom Parker Date: Sat, 18 Mar 2017 14:19:04 +0000 Subject: [PATCH 096/380] Nightly sometimes is completely broken, but not our fault --- saphyr/.travis.yml | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/saphyr/.travis.yml b/saphyr/.travis.yml index ce92e56..89e430a 100644 --- a/saphyr/.travis.yml +++ b/saphyr/.travis.yml @@ -1,8 +1,12 @@ language: rust rust: - - 1.8.0 - - 1.16.0 - - nightly + - 1.8.0 + - 1.16.0 + - beta + - nightly +matrix: + allow_failures: + - rust: nightly env: global: - secure: ZUcdcbS8xbpdII9FSPx7VtoVhEkJhWL2Hb75tDlKDHNhfXqmt1NyB9q/2qXJ5Ulp4MnYXwsI8LsDloR6gvdB4xElay3smuF/neGvMjrqcB15/2p0MSQ+kZjMsNB6mlb5kAlm8ahduXIscppmw/V+m5hn3Vo+RQz/Ng+pzv0nc8KEXPMYrfRFg+a7FaeIbRbb8ir9EfflUSqArLq2hbi2WdhM3hFMcCIAUt6DD4x5ubjEg60OnIof5FDu0mXMXzQvUfHWOeYnsNcD/DLyDnm6FuQEzk37M4EB8op2SdBUeQMQ5abR3i2rd//DZpbTTEjud0PseWohGAwTwL2aoFrqs7uYQMx+vcGlOzAyDUm4VemVUa3F2BECdzU5BiujcKOITJEVUYWongld93arQq34FuXG/TO/T1XrerxfG6LTkTkKS5Vz7W8z6Rloa99WrQLJg1ZJP6itEU7G7KsDFVgRhsg7rz4/dV/2+cV4UvIwd4HlGXKCFlH0SClqvM3/7i/qqCD0689SJW6Zip+ly38MXlGy2s/AmReEasXvFer9JkOEIuPa8QTBNAjDlw7bWXi6neQWBIZU1VhZcSssnrVmEFN8fNklShzpw5DyKCv8jPTx2O6Dw8B/LgIK8uo+eaTXiO6zz/T1c/qEdsYslvxPA2D3F+ONpPU7238ykT4eRog= From 66cf3d23896ee4221087e3be836d2ceb338aa536 Mon Sep 17 00:00:00 2001 From: Tom Parker Date: Sat, 18 Mar 2017 14:25:24 +0000 Subject: [PATCH 097/380] Switch Appveyor to using Rust 1.16 --- saphyr/appveyor.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/saphyr/appveyor.yml b/saphyr/appveyor.yml index fc72b1e..e7c3127 100644 --- a/saphyr/appveyor.yml +++ b/saphyr/appveyor.yml @@ -1,5 +1,5 @@ install: - - ps: Start-FileDownload 'https://static.rust-lang.org/dist/rust-nightly-i686-pc-windows-gnu.exe' + - ps: Start-FileDownload 'https://static.rust-lang.org/dist/rust-1.16.0-i686-pc-windows-gnu.exe' - rust-nightly-i686-pc-windows-gnu.exe /VERYSILENT /NORESTART /DIR="C:\Program Files (x86)\Rust" - SET PATH=%PATH%;C:\Program Files (x86)\Rust\bin - SET PATH=%PATH%;C:\MinGW\bin From e2781dca23af22e8debf2c5e3d28a3fb70a58a0d Mon Sep 17 00:00:00 2001 From: Tom Parker Date: Sat, 18 Mar 2017 14:26:27 +0000 Subject: [PATCH 098/380] Actually fix Rust command line for AppVeyor --- saphyr/appveyor.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/saphyr/appveyor.yml b/saphyr/appveyor.yml index e7c3127..5ee211c 100644 --- a/saphyr/appveyor.yml +++ b/saphyr/appveyor.yml @@ -1,6 +1,6 @@ install: - ps: Start-FileDownload 'https://static.rust-lang.org/dist/rust-1.16.0-i686-pc-windows-gnu.exe' - - rust-nightly-i686-pc-windows-gnu.exe /VERYSILENT /NORESTART /DIR="C:\Program Files (x86)\Rust" + - rust-1.16.0-i686-pc-windows-gnu.exe /VERYSILENT /NORESTART /DIR="C:\Program Files (x86)\Rust" - SET PATH=%PATH%;C:\Program Files (x86)\Rust\bin - SET PATH=%PATH%;C:\MinGW\bin - rustc -V From d0423912bb7a7e45a2387a30a5a346652c125d83 Mon Sep 17 00:00:00 2001 From: Isobel Redelmeier Date: Tue, 2 May 2017 00:54:46 -0700 Subject: [PATCH 099/380] Do not emit trailing whitespace --- saphyr/src/emitter.rs | 151 +++++++++++++++++++++++++++--------------- 1 file changed, 97 insertions(+), 54 deletions(-) diff --git a/saphyr/src/emitter.rs b/saphyr/src/emitter.rs index 9cd6a2d..f754070 100644 --- a/saphyr/src/emitter.rs +++ b/saphyr/src/emitter.rs @@ -1,6 +1,6 @@ use std::fmt; use std::convert::From; -use yaml::Yaml; +use yaml::{Array, Hash, Yaml}; #[derive(Copy, Clone, Debug)] pub enum EmitError { @@ -151,61 +151,14 @@ impl<'a> YamlEmitter<'a> { self.level -= 1; Ok(()) }, - _ => self.emit_node(node) + _ => self.emit_node(node), } } fn emit_node(&mut self, node: &Yaml) -> EmitResult { match *node { - Yaml::Array(ref v) => { - if v.is_empty() { - try!(write!(self.writer, "[]")); - Ok(()) - } else { - if self.level >= 0 { - try!(write!(self.writer, "\n")); - } - self.level += 1; - for (cnt, x) in v.iter().enumerate() { - if cnt > 0 { - try!(write!(self.writer, "\n")); - } - try!(self.write_indent()); - try!(write!(self.writer, "- ")); - try!(self.emit_node(x)); - } - self.level -= 1; - Ok(()) - } - }, - Yaml::Hash(ref h) => { - if h.is_empty() { - try!(self.writer.write_str("{}")); - Ok(()) - } else { - if self.level >= 0 { - try!(write!(self.writer, "\n")); - } - self.level += 1; - for (cnt, (k, v)) in h.iter().enumerate() { - if cnt > 0 { - try!(write!(self.writer, "\n")); - } - try!(self.write_indent()); - match *k { - Yaml::Array(_) | Yaml::Hash(_) => { - try!(self.emit_node_compact(k)); - //return Err(EmitError::BadHashmapKey); - }, - _ => { try!(self.emit_node(k)); } - } - try!(write!(self.writer, ": ")); - try!(self.emit_node(v)); - } - self.level -= 1; - Ok(()) - } - }, + Yaml::Array(ref v) => self.emit_array(v), + Yaml::Hash(ref h) => self.emit_hash(h), Yaml::String(ref v) => { if need_quotes(v) { try!(escape_str(self.writer, v)); @@ -239,6 +192,73 @@ impl<'a> YamlEmitter<'a> { _ => { Ok(()) } } } + + fn emit_array(&mut self, v: &Array) -> EmitResult { + if v.is_empty() { + try!(write!(self.writer, "[]")); + } else { + for (cnt, x) in v.iter().enumerate() { + if cnt > 0 { + try!(write!(self.writer, "\n")); + } + try!(self.write_indent()); + self.level += 1; + try!(write!(self.writer, "- ")); + try!(self.emit_node(x)); + self.level -= 1; + } + } + Ok(()) + } + + fn emit_hash(&mut self, h: &Hash) -> EmitResult { + if h.is_empty() { + try!(self.writer.write_str("{}")); + } else { + self.level += 1; + for (cnt, (k, v)) in h.iter().enumerate() { + if cnt > 0 { + try!(write!(self.writer, "\n")); + try!(self.write_indent()); + } + match *k { + Yaml::Array(_) | Yaml::Hash(_) => { + try!(self.emit_node_compact(k)); + } + _ => { + try!(self.emit_node(k)); + } + } + match *v { + Yaml::Array(ref v) => { + if v.is_empty() { + try!(write!(self.writer, ": ")); + } else { + try!(write!(self.writer, ":\n")); + } + try!(self.emit_array(v)); + } + Yaml::Hash(ref h) => { + if h.is_empty() { + try!(write!(self.writer, ": ")); + } else { + try!(write!(self.writer, ":\n")); + self.level += 1; + try!(self.write_indent()); + self.level -= 1; + } + try!(self.emit_hash(h)); + } + _ => { + try!(write!(self.writer, ": ")); + try!(self.emit_node(v)); + } + } + } + self.level -= 1; + } + Ok(()) + } } /// Check if the string requires quoting. @@ -369,10 +389,10 @@ float: "2.6" int: "4" nullable: "null" nullable2: "~" -products: - "*coffee": +products: + "*coffee": amount: 4 - "*cookies": + "*cookies": amount: 4 "2.4": real key "[1,2,3,4]": array key @@ -389,6 +409,29 @@ y: string with spaces"#; emitter.dump(doc).unwrap(); } + assert_eq!(s, writer, "actual:\n\n{}\n", writer); + } + + #[test] + fn test_empty_and_nested() { + let s = r#"--- +a: + b: + c: hello + d: {} +e: +- f +- g +- h: []"#; + + let docs = YamlLoader::load_from_str(&s).unwrap(); + let doc = &docs[0]; + let mut writer = String::new(); + { + let mut emitter = YamlEmitter::new(&mut writer); + emitter.dump(doc).unwrap(); + } + assert_eq!(s, writer); } } From d6e14acbe65fbca150d0b6c810f517726db2368d Mon Sep 17 00:00:00 2001 From: Iazel Date: Sat, 6 May 2017 19:06:10 +0200 Subject: [PATCH 100/380] Quotes string starting with a dot To understand why we want to do this, please refer to this issue on ktmpl repository (a way to have templates for kubernetes): https://github.com/InQuicker/ktmpl/issues/15 The problem applies only to string used as keys, but given that the emitters right now doesn't differentiate the two cases for string, I think is better to do it also for values instead of introducing branching and/or flag variables (can't think of cleaner solution for now). --- saphyr/src/emitter.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/saphyr/src/emitter.rs b/saphyr/src/emitter.rs index 9cd6a2d..51366e8 100644 --- a/saphyr/src/emitter.rs +++ b/saphyr/src/emitter.rs @@ -271,6 +271,7 @@ fn need_quotes(string: &str) -> bool { || string == "false" || string == "null" || string == "~" + || string.starts_with('.') || string.parse::().is_ok() || string.parse::().is_ok() } @@ -374,6 +375,8 @@ products: amount: 4 "*cookies": amount: 4 + ".milk": + amount: 1 "2.4": real key "[1,2,3,4]": array key "true": bool key From ae207a6f203a31d454542e922be4d95ebd811403 Mon Sep 17 00:00:00 2001 From: Iazel Date: Sat, 6 May 2017 19:16:00 +0200 Subject: [PATCH 101/380] fix test: add missing end space --- saphyr/src/emitter.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/saphyr/src/emitter.rs b/saphyr/src/emitter.rs index 51366e8..493236a 100644 --- a/saphyr/src/emitter.rs +++ b/saphyr/src/emitter.rs @@ -375,7 +375,7 @@ products: amount: 4 "*cookies": amount: 4 - ".milk": + ".milk": amount: 1 "2.4": real key "[1,2,3,4]": array key From 89f28185ec37eb05aa96a112115574ed6e39d93b Mon Sep 17 00:00:00 2001 From: David Tolnay Date: Sat, 29 Apr 2017 10:00:26 -0700 Subject: [PATCH 102/380] Bump to 0.3.5 --- saphyr/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/saphyr/Cargo.toml b/saphyr/Cargo.toml index b0459e2..ad3ed81 100644 --- a/saphyr/Cargo.toml +++ b/saphyr/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "yaml-rust" -version = "0.3.4" +version = "0.3.5" authors = ["Yuheng Chen "] homepage = "http://chyh1990.github.io/yaml-rust/" documentation = "http://chyh1990.github.io/yaml-rust/doc/yaml_rust/" From 84ffcafbc2f9e88fee6dc75fd0a7ce5ae591bd87 Mon Sep 17 00:00:00 2001 From: Tom Parker Date: Sun, 12 Mar 2017 16:02:18 +0000 Subject: [PATCH 103/380] Add quickcheck to find broken exports --- saphyr/Cargo.toml | 3 +++ saphyr/tests/quickcheck.rs | 21 +++++++++++++++++++++ 2 files changed, 24 insertions(+) create mode 100644 saphyr/tests/quickcheck.rs diff --git a/saphyr/Cargo.toml b/saphyr/Cargo.toml index c4a4780..1e63db6 100644 --- a/saphyr/Cargo.toml +++ b/saphyr/Cargo.toml @@ -11,3 +11,6 @@ repository = "https://github.com/chyh1990/yaml-rust" [dependencies] clippy = { version = "^0.*", optional = true } linked-hash-map = ">=0.0.9, <0.4" + +[dev-dependencies] +quickcheck = "0.4" diff --git a/saphyr/tests/quickcheck.rs b/saphyr/tests/quickcheck.rs new file mode 100644 index 0000000..62db056 --- /dev/null +++ b/saphyr/tests/quickcheck.rs @@ -0,0 +1,21 @@ +extern crate yaml_rust; +#[macro_use] +extern crate quickcheck; + +use quickcheck::TestResult; +use yaml_rust::{Yaml, YamlLoader, YamlEmitter}; +use std::error::Error; + +quickcheck! { + fn test_check_weird_keys(xs: Vec) -> TestResult { + let mut out_str = String::new(); + { + let mut emitter = YamlEmitter::new(&mut out_str); + emitter.dump(&Yaml::Array(xs.into_iter().map(|s| Yaml::String(s)).collect())).unwrap(); + } + if let Err(err) = YamlLoader::load_from_str(&out_str) { + return TestResult::error(err.description()); + } + return TestResult::passed(); + } +} \ No newline at end of file From 21049c8d6a4f9318f9c3b657fa5712a6c7d282a6 Mon Sep 17 00:00:00 2001 From: Tom Parker Date: Sun, 12 Mar 2017 16:02:47 +0000 Subject: [PATCH 104/380] Add quoting for " and ' --- saphyr/src/emitter.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/saphyr/src/emitter.rs b/saphyr/src/emitter.rs index 308bd93..0d2defa 100644 --- a/saphyr/src/emitter.rs +++ b/saphyr/src/emitter.rs @@ -263,7 +263,7 @@ fn need_quotes(string: &str) -> bool { || need_quotes_spaces(string) || string.contains(|character: char| { match character { - ':' | '{' | '}' | '[' | ']' | ',' | '&' | '*' | '#' | '?' | '|' | '-' | '<' | '>' | '=' | '!' | '%' | '@' | '`' | '\\' | '\0' ... '\x06' | '\t' | '\n' | '\r' | '\x0e' ... '\x1a' | '\x1c' ... '\x1f' => true, + ':' | '{' | '}' | '[' | ']' | ',' | '&' | '*' | '#' | '?' | '|' | '-' | '<' | '>' | '=' | '!' | '%' | '@' | '`' | '\"' | '\'' | '\\' | '\0' ... '\x06' | '\t' | '\n' | '\r' | '\x0e' ... '\x1a' | '\x1c' ... '\x1f' => true, _ => false, } }) From dd3d169c1a1e7fde026d4b7710f39d50e4661e05 Mon Sep 17 00:00:00 2001 From: Tom Parker Date: Mon, 8 May 2017 18:31:06 +0100 Subject: [PATCH 105/380] Quickcheck needs Rust at least 1.9 --- saphyr/.travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/saphyr/.travis.yml b/saphyr/.travis.yml index 89e430a..e203825 100644 --- a/saphyr/.travis.yml +++ b/saphyr/.travis.yml @@ -1,6 +1,6 @@ language: rust rust: - - 1.8.0 + - 1.9.0 - 1.16.0 - beta - nightly From b3e287b21e557a6321dfdf044326606258237cd5 Mon Sep 17 00:00:00 2001 From: David Tolnay Date: Mon, 8 May 2017 10:44:17 -0700 Subject: [PATCH 106/380] Restore import that was broken in #57 --- saphyr/src/emitter.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/saphyr/src/emitter.rs b/saphyr/src/emitter.rs index 308bd93..cd83994 100644 --- a/saphyr/src/emitter.rs +++ b/saphyr/src/emitter.rs @@ -278,6 +278,7 @@ fn need_quotes(string: &str) -> bool { #[cfg(test)] mod tests { use super::*; + use YamlLoader; #[test] fn test_emit_simple() { From f06d24fc7d10d1e391728ca214907633be179eaf Mon Sep 17 00:00:00 2001 From: David Tolnay Date: Mon, 8 May 2017 11:01:37 -0700 Subject: [PATCH 107/380] Rust 1.11.0 is required for some of the tests --- saphyr/.travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/saphyr/.travis.yml b/saphyr/.travis.yml index e203825..6e6d706 100644 --- a/saphyr/.travis.yml +++ b/saphyr/.travis.yml @@ -1,6 +1,6 @@ language: rust rust: - - 1.9.0 + - 1.11.0 - 1.16.0 - beta - nightly From 1618a53e0891e60286ea96baabf055582fa650df Mon Sep 17 00:00:00 2001 From: David Tolnay Date: Mon, 8 May 2017 11:02:03 -0700 Subject: [PATCH 108/380] Remember to publish a breaking release --- saphyr/Cargo.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/saphyr/Cargo.toml b/saphyr/Cargo.toml index 1e63db6..651a11f 100644 --- a/saphyr/Cargo.toml +++ b/saphyr/Cargo.toml @@ -7,6 +7,7 @@ documentation = "http://chyh1990.github.io/yaml-rust/doc/yaml_rust/" license = "MIT/Apache-2.0" description = "The missing YAML 1.2 parser for rust" repository = "https://github.com/chyh1990/yaml-rust" +publish = false # this branch contains breaking changes [dependencies] clippy = { version = "^0.*", optional = true } From d6743ab0c7213593e2f879816da03f8cce14276d Mon Sep 17 00:00:00 2001 From: David Tolnay Date: Mon, 8 May 2017 11:03:03 -0700 Subject: [PATCH 109/380] Allow the newest linked-hash-map Closes #52 and closes #55 and closes #63. --- saphyr/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/saphyr/Cargo.toml b/saphyr/Cargo.toml index 651a11f..00688e7 100644 --- a/saphyr/Cargo.toml +++ b/saphyr/Cargo.toml @@ -11,7 +11,7 @@ publish = false # this branch contains breaking changes [dependencies] clippy = { version = "^0.*", optional = true } -linked-hash-map = ">=0.0.9, <0.4" +linked-hash-map = ">=0.0.9, <0.5" [dev-dependencies] quickcheck = "0.4" From 200d33cb4a0f7fa777d74477b806d88f5402a404 Mon Sep 17 00:00:00 2001 From: David Tolnay Date: Mon, 8 May 2017 11:17:30 -0700 Subject: [PATCH 110/380] Revert "fix test: add missing end space" This reverts commit ae207a6f203a31d454542e922be4d95ebd811403. --- saphyr/src/emitter.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/saphyr/src/emitter.rs b/saphyr/src/emitter.rs index baf9412..3ea2f2d 100644 --- a/saphyr/src/emitter.rs +++ b/saphyr/src/emitter.rs @@ -395,7 +395,7 @@ products: amount: 4 "*cookies": amount: 4 - ".milk": + ".milk": amount: 1 "2.4": real key "[1,2,3,4]": array key From 70795865c92059613eb2c9f833773d06685be482 Mon Sep 17 00:00:00 2001 From: David Tolnay Date: Mon, 8 May 2017 11:30:51 -0700 Subject: [PATCH 111/380] Switch from clippy dependency to cargo-clippy --- saphyr/.travis.yml | 6 +++--- saphyr/Cargo.toml | 1 - saphyr/src/lib.rs | 7 ++----- saphyr/src/yaml.rs | 2 +- 4 files changed, 6 insertions(+), 10 deletions(-) diff --git a/saphyr/.travis.yml b/saphyr/.travis.yml index 6e6d706..c7043aa 100644 --- a/saphyr/.travis.yml +++ b/saphyr/.travis.yml @@ -11,10 +11,10 @@ env: global: - secure: ZUcdcbS8xbpdII9FSPx7VtoVhEkJhWL2Hb75tDlKDHNhfXqmt1NyB9q/2qXJ5Ulp4MnYXwsI8LsDloR6gvdB4xElay3smuF/neGvMjrqcB15/2p0MSQ+kZjMsNB6mlb5kAlm8ahduXIscppmw/V+m5hn3Vo+RQz/Ng+pzv0nc8KEXPMYrfRFg+a7FaeIbRbb8ir9EfflUSqArLq2hbi2WdhM3hFMcCIAUt6DD4x5ubjEg60OnIof5FDu0mXMXzQvUfHWOeYnsNcD/DLyDnm6FuQEzk37M4EB8op2SdBUeQMQ5abR3i2rd//DZpbTTEjud0PseWohGAwTwL2aoFrqs7uYQMx+vcGlOzAyDUm4VemVUa3F2BECdzU5BiujcKOITJEVUYWongld93arQq34FuXG/TO/T1XrerxfG6LTkTkKS5Vz7W8z6Rloa99WrQLJg1ZJP6itEU7G7KsDFVgRhsg7rz4/dV/2+cV4UvIwd4HlGXKCFlH0SClqvM3/7i/qqCD0689SJW6Zip+ly38MXlGy2s/AmReEasXvFer9JkOEIuPa8QTBNAjDlw7bWXi6neQWBIZU1VhZcSssnrVmEFN8fNklShzpw5DyKCv8jPTx2O6Dw8B/LgIK8uo+eaTXiO6zz/T1c/qEdsYslvxPA2D3F+ONpPU7238ykT4eRog= script: + - cargo build --verbose - | if [ "$TRAVIS_RUST_VERSION" = nightly ]; then - cargo build --features clippy --verbose - else - cargo build --verbose + cargo install clippy --debug + cargo clippy -- -Dclippy fi - cargo test --verbose diff --git a/saphyr/Cargo.toml b/saphyr/Cargo.toml index 00688e7..0c79598 100644 --- a/saphyr/Cargo.toml +++ b/saphyr/Cargo.toml @@ -10,7 +10,6 @@ repository = "https://github.com/chyh1990/yaml-rust" publish = false # this branch contains breaking changes [dependencies] -clippy = { version = "^0.*", optional = true } linked-hash-map = ">=0.0.9, <0.5" [dev-dependencies] diff --git a/saphyr/src/lib.rs b/saphyr/src/lib.rs index aa35e47..e16449c 100644 --- a/saphyr/src/lib.rs +++ b/saphyr/src/lib.rs @@ -36,11 +36,8 @@ //! //! ``` -#![cfg_attr(feature="clippy", feature(plugin))] -#![cfg_attr(feature="clippy", plugin(clippy))] -#![cfg_attr(feature="clippy", deny(clippy))] -#![cfg_attr(feature="clippy", warn(cyclomatic_complexity))] -#![cfg_attr(feature="clippy", allow(match_same_arms))] +#![cfg_attr(feature = "cargo-clippy", warn(cyclomatic_complexity))] +#![cfg_attr(feature = "cargo-clippy", allow(match_same_arms))] extern crate linked_hash_map; diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index 94f60bf..b8d8163 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -273,7 +273,7 @@ impl Yaml { } } -#[cfg_attr(feature="clippy", allow(should_implement_trait))] +#[cfg_attr(feature = "cargo-clippy", allow(should_implement_trait))] impl Yaml { // Not implementing FromStr because there is no possibility of Error. // This function falls back to Yaml::String if nothing else matches. From 37dc15badb764497c884b50efa60fe1a85e4f590 Mon Sep 17 00:00:00 2001 From: David Tolnay Date: Mon, 8 May 2017 11:35:53 -0700 Subject: [PATCH 112/380] Fix clippy complaints --- saphyr/examples/dump_yaml.rs | 6 +++--- saphyr/src/emitter.rs | 4 ++-- saphyr/tests/quickcheck.rs | 6 +++--- saphyr/tests/spec_test.rs | 2 ++ 4 files changed, 10 insertions(+), 8 deletions(-) diff --git a/saphyr/examples/dump_yaml.rs b/saphyr/examples/dump_yaml.rs index a77d676..5f2e306 100644 --- a/saphyr/examples/dump_yaml.rs +++ b/saphyr/examples/dump_yaml.rs @@ -12,13 +12,13 @@ fn print_indent(indent: usize) { } fn dump_node(doc: &yaml::Yaml, indent: usize) { - match doc { - &yaml::Yaml::Array(ref v) => { + match *doc { + yaml::Yaml::Array(ref v) => { for x in v { dump_node(x, indent + 1); } }, - &yaml::Yaml::Hash(ref h) => { + yaml::Yaml::Hash(ref h) => { for (k, v) in h { print_indent(indent); println!("{:?}:", k); diff --git a/saphyr/src/emitter.rs b/saphyr/src/emitter.rs index 3ea2f2d..7d2ec0a 100644 --- a/saphyr/src/emitter.rs +++ b/saphyr/src/emitter.rs @@ -1,6 +1,6 @@ use std::fmt; use std::convert::From; -use yaml::{Array, Hash, Yaml}; +use yaml::{Hash, Yaml}; #[derive(Copy, Clone, Debug)] pub enum EmitError { @@ -193,7 +193,7 @@ impl<'a> YamlEmitter<'a> { } } - fn emit_array(&mut self, v: &Array) -> EmitResult { + fn emit_array(&mut self, v: &[Yaml]) -> EmitResult { if v.is_empty() { try!(write!(self.writer, "[]")); } else { diff --git a/saphyr/tests/quickcheck.rs b/saphyr/tests/quickcheck.rs index 62db056..54be25d 100644 --- a/saphyr/tests/quickcheck.rs +++ b/saphyr/tests/quickcheck.rs @@ -11,11 +11,11 @@ quickcheck! { let mut out_str = String::new(); { let mut emitter = YamlEmitter::new(&mut out_str); - emitter.dump(&Yaml::Array(xs.into_iter().map(|s| Yaml::String(s)).collect())).unwrap(); + emitter.dump(&Yaml::Array(xs.into_iter().map(Yaml::String).collect())).unwrap(); } if let Err(err) = YamlLoader::load_from_str(&out_str) { return TestResult::error(err.description()); } - return TestResult::passed(); + TestResult::passed() } -} \ No newline at end of file +} diff --git a/saphyr/tests/spec_test.rs b/saphyr/tests/spec_test.rs index a79c051..b7316fc 100644 --- a/saphyr/tests/spec_test.rs +++ b/saphyr/tests/spec_test.rs @@ -5,6 +5,8 @@ extern crate yaml_rust; use yaml_rust::parser::{Parser, EventReceiver, Event}; use yaml_rust::scanner::TScalarStyle; +// These names match the names used in the C++ test suite. +#[cfg_attr(feature = "cargo-clippy", allow(enum_variant_names))] #[derive(Clone, PartialEq, PartialOrd, Debug)] enum TestEvent { OnDocumentStart, From 45eae6fd07336fb0342f2bfa5a866ba7fd5e9feb Mon Sep 17 00:00:00 2001 From: David Tolnay Date: Mon, 8 May 2017 11:43:42 -0700 Subject: [PATCH 113/380] Implement Error for EmitError Fixes #50. --- saphyr/src/emitter.rs | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/saphyr/src/emitter.rs b/saphyr/src/emitter.rs index 7d2ec0a..45f2024 100644 --- a/saphyr/src/emitter.rs +++ b/saphyr/src/emitter.rs @@ -1,5 +1,6 @@ -use std::fmt; +use std::fmt::{self, Display}; use std::convert::From; +use std::error::Error; use yaml::{Hash, Yaml}; #[derive(Copy, Clone, Debug)] @@ -8,6 +9,24 @@ pub enum EmitError { BadHashmapKey, } +impl Error for EmitError { + fn description(&self) -> &str { + match *self { + EmitError::FmtError(ref err) => err.description(), + EmitError::BadHashmapKey => "bad hashmap key", + } + } +} + +impl Display for EmitError { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + match *self { + EmitError::FmtError(ref err) => Display::fmt(err, formatter), + EmitError::BadHashmapKey => formatter.write_str("bad hashmap key"), + } + } +} + impl From for EmitError { fn from(f: fmt::Error) -> Self { EmitError::FmtError(f) From 900ab5d2ab795a8c9d25c51367a04e0b29036b19 Mon Sep 17 00:00:00 2001 From: Alyssa Ross Date: Wed, 10 May 2017 22:09:30 +0100 Subject: [PATCH 114/380] Add/fix README syntax highlighting Adds the missing syntax mode for TOML, and fixes the Rust syntax highlighting syntax --- saphyr/README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/saphyr/README.md b/saphyr/README.md index 0cfca8d..6a334df 100644 --- a/saphyr/README.md +++ b/saphyr/README.md @@ -26,28 +26,28 @@ See [Document](http://chyh1990.github.io/yaml-rust/doc/yaml_rust/) Add the following to the Cargo.toml of your project: -``` +```toml [dependencies] yaml-rust = "*" ``` or -``` +```toml [dependencies.yaml-rust] git = "https://github.com/chyh1990/yaml-rust.git" ``` and import using *extern crate*: -```.rust +```rust extern crate yaml_rust; ``` Use `yaml::YamlLoader` to load the YAML documents and access it as Vec/HashMap: -```.rust +```rust extern crate yaml_rust; use yaml_rust::{YamlLoader, YamlEmitter}; From ff8572352dc3dbd403a65212673fd85606eeaa7d Mon Sep 17 00:00:00 2001 From: Charlie Ozinga Date: Thu, 11 May 2017 12:36:38 -0600 Subject: [PATCH 115/380] Rebased and resolved conflicts with the following: https://github.com/chyh1990/yaml-rust/pull/66 https://github.com/chyh1990/yaml-rust/pull/62 (closed in favor of 66) --- saphyr/src/emitter.rs | 68 +++++++++++---------------------------- saphyr/tests/spec_test.rs | 66 +++++++++++++++++++++++++++++++++++++ 2 files changed, 85 insertions(+), 49 deletions(-) diff --git a/saphyr/src/emitter.rs b/saphyr/src/emitter.rs index 45f2024..ba0bf7e 100644 --- a/saphyr/src/emitter.rs +++ b/saphyr/src/emitter.rs @@ -132,48 +132,6 @@ impl<'a> YamlEmitter<'a> { Ok(()) } - fn emit_node_compact(&mut self, node: &Yaml) -> EmitResult { - match *node { - Yaml::Array(ref v) => { - try!(write!(self.writer, "[")); - if self.level >= 0 { - try!(write!(self.writer, "+ ")); - } - self.level += 1; - for (cnt, x) in v.iter().enumerate() { - try!(self.write_indent()); - if cnt > 0 { try!(write!(self.writer, ", ")); } - try!(self.emit_node(x)); - } - self.level -= 1; - try!(write!(self.writer, "]")); - Ok(()) - }, - Yaml::Hash(ref h) => { - try!(self.writer.write_str("{")); - self.level += 1; - for (cnt, (k, v)) in h.iter().enumerate() { - if cnt > 0 { - try!(write!(self.writer, ", ")); - } - match *k { - // complex key is not supported - Yaml::Array(_) | Yaml::Hash(_) => { - return Err(EmitError::BadHashmapKey); - }, - _ => { try!(self.emit_node(k)); } - } - try!(write!(self.writer, ": ")); - try!(self.emit_node(v)); - } - try!(self.writer.write_str("}")); - self.level -= 1; - Ok(()) - }, - _ => self.emit_node(node), - } - } - fn emit_node(&mut self, node: &Yaml) -> EmitResult { match *node { Yaml::Array(ref v) => self.emit_array(v), @@ -236,28 +194,37 @@ impl<'a> YamlEmitter<'a> { } else { self.level += 1; for (cnt, (k, v)) in h.iter().enumerate() { + let complex_key = match *k { + Yaml::Hash(_) | Yaml::Array(_) => true, + _ => false, + }; if cnt > 0 { try!(write!(self.writer, "\n")); try!(self.write_indent()); } - match *k { - Yaml::Array(_) | Yaml::Hash(_) => { - try!(self.emit_node_compact(k)); - } - _ => { - try!(self.emit_node(k)); - } + if complex_key { + try!(write!(self.writer, "? ")); + self.level += 1; + try!(self.emit_node(k)); + self.level -= 1; + try!(write!(self.writer, "\n")); + try!(self.write_indent()); + } else { + try!(self.emit_node(k)); } match *v { Yaml::Array(ref v) => { + if complex_key { self.level += 1; } if v.is_empty() { try!(write!(self.writer, ": ")); } else { try!(write!(self.writer, ":\n")); } try!(self.emit_array(v)); + if complex_key { self.level -= 1; } } Yaml::Hash(ref h) => { + if complex_key { self.level += 1; } if h.is_empty() { try!(write!(self.writer, ": ")); } else { @@ -267,10 +234,13 @@ impl<'a> YamlEmitter<'a> { self.level -= 1; } try!(self.emit_hash(h)); + if complex_key { self.level -= 1; } } _ => { + if complex_key { self.level += 1; } try!(write!(self.writer, ": ")); try!(self.emit_node(v)); + if complex_key { self.level -= 1; } } } } diff --git a/saphyr/tests/spec_test.rs b/saphyr/tests/spec_test.rs index b7316fc..3e5d043 100644 --- a/saphyr/tests/spec_test.rs +++ b/saphyr/tests/spec_test.rs @@ -73,3 +73,69 @@ include!("spec_test.rs.inc"); //#[test] //fn test_hc_alias() { //} + +#[test] +fn test_mapvec_legal() { + use yaml_rust::yaml::{Array, Hash, Yaml}; + use yaml_rust::{YamlLoader, YamlEmitter}; + + // Emitting a `map>, _>` should result in legal yaml that + // we can parse. + + let mut key = Array::new(); + key.push(Yaml::Integer(1)); + key.push(Yaml::Integer(2)); + key.push(Yaml::Integer(3)); + + let mut keyhash = Hash::new(); + keyhash.insert(Yaml::String("key".into()), Yaml::Array(key)); + + let mut val = Array::new(); + val.push(Yaml::Integer(4)); + val.push(Yaml::Integer(5)); + val.push(Yaml::Integer(6)); + + let mut hash = Hash::new(); + hash.insert(Yaml::Hash(keyhash), Yaml::Array(val)); + + let mut out_str = String::new(); + { + let mut emitter = YamlEmitter::new(&mut out_str); + emitter.dump(&Yaml::Hash(hash)).unwrap(); + } + + // At this point, we are tempted to naively render like this: + // + // ```yaml + // --- + // {key: + // - 1 + // - 2 + // - 3}: + // - 4 + // - 5 + // - 6 + // ``` + // + // However, this doesn't work, because the key sequence [1, 2, 3] is + // rendered in block mode, which is not legal (as far as I can tell) + // inside the flow mode of the key. We need to either fully render + // everything that's in a key in flow mode (which may make for some + // long lines), or use the explicit map identifier '?': + // + // ```yaml + // --- + // ? + // key: + // - 1 + // - 2 + // - 3 + // : + // - 4 + // - 5 + // - 6 + // ``` + + YamlLoader::load_from_str(&out_str).unwrap(); +} + From 06c9b22357002425c6cbb077b1a815c21ae1cea2 Mon Sep 17 00:00:00 2001 From: Charlie Ozinga Date: Thu, 11 May 2017 23:29:41 -0600 Subject: [PATCH 116/380] Fix nested arrays, emit compact in-line --- saphyr/src/emitter.rs | 112 +++++++++++++++++++++++--------------- saphyr/tests/spec_test.rs | 2 + 2 files changed, 70 insertions(+), 44 deletions(-) diff --git a/saphyr/src/emitter.rs b/saphyr/src/emitter.rs index ba0bf7e..065d46b 100644 --- a/saphyr/src/emitter.rs +++ b/saphyr/src/emitter.rs @@ -3,6 +3,15 @@ use std::convert::From; use std::error::Error; use yaml::{Hash, Yaml}; +/// If the emitter should output in 'compact inline notation' form, as +/// described for block +/// [sequences](http://www.yaml.org/spec/1.2/spec.html#id2797382) and +/// [mappings](http://www.yaml.org/spec/1.2/spec.html#id2798057). In +/// this form, blocks cannot have any properties (such as anchors or +/// tags), which should be OK, because this emitter doesn't (currently) +/// emit those anyways. +pub const COMPACT: bool = true; + #[derive(Copy, Clone, Debug)] pub enum EmitError { FmtError(fmt::Error), @@ -174,16 +183,16 @@ impl<'a> YamlEmitter<'a> { if v.is_empty() { try!(write!(self.writer, "[]")); } else { + self.level += 1; for (cnt, x) in v.iter().enumerate() { if cnt > 0 { try!(write!(self.writer, "\n")); + try!(self.write_indent()); } - try!(self.write_indent()); - self.level += 1; - try!(write!(self.writer, "- ")); - try!(self.emit_node(x)); - self.level -= 1; + try!(write!(self.writer, "-")); + try!(self.emit_val(true, x)); } + self.level -= 1; } Ok(()) } @@ -203,51 +212,57 @@ impl<'a> YamlEmitter<'a> { try!(self.write_indent()); } if complex_key { - try!(write!(self.writer, "? ")); - self.level += 1; - try!(self.emit_node(k)); - self.level -= 1; + try!(write!(self.writer, "?")); + try!(self.emit_val(true, k)); try!(write!(self.writer, "\n")); try!(self.write_indent()); + try!(write!(self.writer, ":")); + try!(self.emit_val(true, v)); } else { try!(self.emit_node(k)); - } - match *v { - Yaml::Array(ref v) => { - if complex_key { self.level += 1; } - if v.is_empty() { - try!(write!(self.writer, ": ")); - } else { - try!(write!(self.writer, ":\n")); - } - try!(self.emit_array(v)); - if complex_key { self.level -= 1; } - } - Yaml::Hash(ref h) => { - if complex_key { self.level += 1; } - if h.is_empty() { - try!(write!(self.writer, ": ")); - } else { - try!(write!(self.writer, ":\n")); - self.level += 1; - try!(self.write_indent()); - self.level -= 1; - } - try!(self.emit_hash(h)); - if complex_key { self.level -= 1; } - } - _ => { - if complex_key { self.level += 1; } - try!(write!(self.writer, ": ")); - try!(self.emit_node(v)); - if complex_key { self.level -= 1; } - } + try!(write!(self.writer, ":")); + try!(self.emit_val(false, v)); } } self.level -= 1; } Ok(()) } + + /// Emit a yaml as a hash or array value: i.e., which should appear + /// following a ":" or "-", either after a space, or on a new line. + /// If `inline` is true, then the preceeding characters are distinct + /// and short enough to respects the COMPACT constant. + fn emit_val(&mut self, inline: bool, val: &Yaml) -> EmitResult { + match *val { + Yaml::Array(ref v) => { + if (inline && COMPACT) || v.is_empty() { + try!(write!(self.writer, " ")); + } else { + try!(write!(self.writer, "\n")); + self.level += 1; + try!(self.write_indent()); + self.level -= 1; + } + self.emit_array(v) + }, + Yaml::Hash(ref h) => { + if (inline && COMPACT) || h.is_empty() { + try!(write!(self.writer, " ")); + } else { + try!(write!(self.writer, "\n")); + self.level += 1; + try!(self.write_indent()); + self.level -= 1; + } + self.emit_hash(h) + }, + _ => { + try!(write!(self.writer, " ")); + self.emit_node(val) + } + } + } } /// Check if the string requires quoting. @@ -406,15 +421,24 @@ y: string with spaces"#; #[test] fn test_empty_and_nested() { - let s = r#"--- + let s = if COMPACT { r#"--- a: b: c: hello d: {} e: -- f -- g -- h: []"#; + - f + - g + - h: []"# } else { r#"--- +a: + b: + c: hello + d: {} +e: + - f + - g + - + h: []"# }; let docs = YamlLoader::load_from_str(&s).unwrap(); let doc = &docs[0]; diff --git a/saphyr/tests/spec_test.rs b/saphyr/tests/spec_test.rs index 3e5d043..1896d17 100644 --- a/saphyr/tests/spec_test.rs +++ b/saphyr/tests/spec_test.rs @@ -136,6 +136,8 @@ fn test_mapvec_legal() { // - 6 // ``` + println!("{}", out_str); + YamlLoader::load_from_str(&out_str).unwrap(); } From 80f967bc09bf2396af7770d4bf02d6d7ad2b51a2 Mon Sep 17 00:00:00 2001 From: Charlie Ozinga Date: Thu, 11 May 2017 23:36:51 -0600 Subject: [PATCH 117/380] Remove extraneous debug output from test --- saphyr/tests/spec_test.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/saphyr/tests/spec_test.rs b/saphyr/tests/spec_test.rs index 1896d17..3e5d043 100644 --- a/saphyr/tests/spec_test.rs +++ b/saphyr/tests/spec_test.rs @@ -136,8 +136,6 @@ fn test_mapvec_legal() { // - 6 // ``` - println!("{}", out_str); - YamlLoader::load_from_str(&out_str).unwrap(); } From e5a2439494ad62670ab05a47017fec375ad9bce7 Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Sat, 13 May 2017 20:48:48 +0800 Subject: [PATCH 118/380] Fix #65 --- saphyr/src/scanner.rs | 2 +- saphyr/src/yaml.rs | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index d25ecb8..619e2d9 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -1294,12 +1294,12 @@ impl> Scanner { } self.lookahead(2); } + self.lookahead(1); match self.ch() { '\'' if single => { break; }, '"' if !single => { break; }, _ => {} } - self.lookahead(1); // Consume blank characters. while is_blank(self.ch()) || is_break(self.ch()) { diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index b8d8163..d67d6a2 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -525,6 +525,13 @@ a1: &DEFAULT assert!(YamlLoader::load_from_str(&s).is_err()); } + #[test] + fn test_issue_65() { + // See: https://github.com/chyh1990/yaml-rust/issues/65 + let b = "\n\"ll\\\"ll\\\r\n\"ll\\\"ll\\\r\r\r\rU\r\r\rU"; + assert!(YamlLoader::load_from_str(&b).is_err()); + } + #[test] fn test_bad_docstart() { assert!(YamlLoader::load_from_str("---This used to cause an infinite loop").is_ok()); From f43b50bbce80bd9b04dfd2d5d038794f2d2c1c3e Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Sat, 13 May 2017 21:17:35 +0800 Subject: [PATCH 119/380] Add special f64 parsing Fix #51 --- saphyr/src/yaml.rs | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index d67d6a2..99000df 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -2,6 +2,7 @@ use std::collections::BTreeMap; use std::ops::Index; use std::string; use std::i64; +use std::f64; use std::mem; use std::vec; use parser::*; @@ -226,6 +227,15 @@ pub fn $name(self) -> Option<$t> { ); ); +fn parse_f64(v: &str) -> Option { + match v { + ".inf" | ".Inf" | ".INF" | "+.inf" | "+.Inf" | "+.INF" => Some(f64::INFINITY), + "-.inf" | "-.Inf" | "-.INF" => Some(f64::NEG_INFINITY), + ".nan" | "NaN" | ".NAN" => Some(f64::NAN), + _ => v.parse::().ok() + } +} + impl Yaml { define_as!(as_bool, bool, Boolean); define_as!(as_i64, i64, Integer); @@ -256,18 +266,14 @@ impl Yaml { pub fn as_f64(&self) -> Option { match *self { - Yaml::Real(ref v) => { - v.parse::().ok() - }, + Yaml::Real(ref v) => parse_f64(v), _ => None } } pub fn into_f64(self) -> Option { match self { - Yaml::Real(v) => { - v.parse::().ok() - }, + Yaml::Real(ref v) => parse_f64(v), _ => None } } @@ -299,7 +305,7 @@ impl Yaml { "false" => Yaml::Boolean(false), _ if v.parse::().is_ok() => Yaml::Integer(v.parse::().unwrap()), // try parsing as f64 - _ if v.parse::().is_ok() => Yaml::Real(v.to_owned()), + _ if parse_f64(v).is_some() => Yaml::Real(v.to_owned()), _ => Yaml::String(v.to_owned()) } } @@ -356,6 +362,7 @@ impl Iterator for YamlIter { #[cfg(test)] mod test { use yaml::*; + use std::f64; #[test] fn test_coerce() { let s = "--- @@ -561,6 +568,8 @@ a1: &DEFAULT - 0xFF - 0o77 - +12345 +- -.INF +- .NAN "; let mut out = YamlLoader::load_from_str(&s).unwrap().into_iter(); let mut doc = out.next().unwrap().into_iter(); @@ -582,6 +591,8 @@ a1: &DEFAULT assert_eq!(doc.next().unwrap().into_i64().unwrap(), 255); assert_eq!(doc.next().unwrap().into_i64().unwrap(), 63); assert_eq!(doc.next().unwrap().into_i64().unwrap(), 12345); + assert_eq!(doc.next().unwrap().into_f64().unwrap(), f64::NEG_INFINITY); + assert!(doc.next().unwrap().into_f64().is_some()); } #[test] From 6ba376563bcf3ce3ab83d068722d58aefd69049b Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Sat, 13 May 2017 21:22:19 +0800 Subject: [PATCH 120/380] Parse special f64 in tag --- saphyr/src/yaml.rs | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index 99000df..657de55 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -55,6 +55,17 @@ pub enum Yaml { pub type Array = Vec; pub type Hash = LinkedHashMap; +// parse f64 as Core schema +// See: https://github.com/chyh1990/yaml-rust/issues/51 +fn parse_f64(v: &str) -> Option { + match v { + ".inf" | ".Inf" | ".INF" | "+.inf" | "+.Inf" | "+.INF" => Some(f64::INFINITY), + "-.inf" | "-.Inf" | "-.INF" => Some(f64::NEG_INFINITY), + ".nan" | "NaN" | ".NAN" => Some(f64::NAN), + _ => v.parse::().ok() + } +} + pub struct YamlLoader { docs: Vec, // states @@ -116,9 +127,9 @@ impl MarkedEventReceiver for YamlLoader { } }, "float" => { - match v.parse::() { - Err(_) => Yaml::BadValue, - Ok(_) => Yaml::Real(v.clone()) + match parse_f64(v) { + Some(_) => Yaml::Real(v.clone()), + None => Yaml::BadValue, } }, "null" => { @@ -227,15 +238,6 @@ pub fn $name(self) -> Option<$t> { ); ); -fn parse_f64(v: &str) -> Option { - match v { - ".inf" | ".Inf" | ".INF" | "+.inf" | "+.Inf" | "+.INF" => Some(f64::INFINITY), - "-.inf" | "-.Inf" | "-.INF" => Some(f64::NEG_INFINITY), - ".nan" | "NaN" | ".NAN" => Some(f64::NAN), - _ => v.parse::().ok() - } -} - impl Yaml { define_as!(as_bool, bool, Boolean); define_as!(as_i64, i64, Integer); @@ -570,6 +572,7 @@ a1: &DEFAULT - +12345 - -.INF - .NAN +- !!float .INF "; let mut out = YamlLoader::load_from_str(&s).unwrap().into_iter(); let mut doc = out.next().unwrap().into_iter(); @@ -593,6 +596,7 @@ a1: &DEFAULT assert_eq!(doc.next().unwrap().into_i64().unwrap(), 12345); assert_eq!(doc.next().unwrap().into_f64().unwrap(), f64::NEG_INFINITY); assert!(doc.next().unwrap().into_f64().is_some()); + assert_eq!(doc.next().unwrap().into_f64().unwrap(), f64::INFINITY); } #[test] From 1bbe109b90302dd7014ca362d54b46d19b4e9a2e Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Sat, 13 May 2017 21:55:32 +0800 Subject: [PATCH 121/380] Allow use integer as key to access HashMap Fix #61 --- saphyr/src/yaml.rs | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index 657de55..d745dc6 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -330,9 +330,13 @@ impl Index for Yaml { type Output = Yaml; fn index(&self, idx: usize) -> &Yaml { - match self.as_vec() { - Some(v) => v.get(idx).unwrap_or(&BAD_VALUE), - None => &BAD_VALUE + if let Some(v) = self.as_vec() { + v.get(idx).unwrap_or(&BAD_VALUE) + } else if let Some(v) = self.as_hash() { + let key = Yaml::Integer(idx as i64); + v.get(&key).unwrap_or(&BAD_VALUE) + } else { + &BAD_VALUE } } } @@ -614,4 +618,17 @@ c: ~ assert_eq!(Some((Yaml::String("c".to_owned()), Yaml::Null)), iter.next()); assert_eq!(None, iter.next()); } + + #[test] + fn test_integer_key() { + let s = " +0: + important: true +1: + important: false +"; + let out = YamlLoader::load_from_str(&s).unwrap(); + let first = out.into_iter().next().unwrap(); + assert_eq!(first[0]["important"].as_bool().unwrap(), true); + } } From aa40cb2ffd4ed2833b3ac90c7afa2ca5feea1b5b Mon Sep 17 00:00:00 2001 From: Hendrik Sollich Date: Mon, 22 May 2017 19:30:01 +0200 Subject: [PATCH 122/380] quoting possible booleans fixes #53 --- saphyr/Cargo.toml | 2 +- saphyr/src/emitter.rs | 58 +++++++++++++++++++++++++++++++++++-------- 2 files changed, 48 insertions(+), 12 deletions(-) diff --git a/saphyr/Cargo.toml b/saphyr/Cargo.toml index 0c79598..a4e2fda 100644 --- a/saphyr/Cargo.toml +++ b/saphyr/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "yaml-rust" -version = "0.3.5" +version = "0.3.6" authors = ["Yuheng Chen "] homepage = "http://chyh1990.github.io/yaml-rust/" documentation = "http://chyh1990.github.io/yaml-rust/doc/yaml_rust/" diff --git a/saphyr/src/emitter.rs b/saphyr/src/emitter.rs index 45f2024..91c94b2 100644 --- a/saphyr/src/emitter.rs +++ b/saphyr/src/emitter.rs @@ -137,15 +137,12 @@ impl<'a> YamlEmitter<'a> { Yaml::Array(ref v) => { try!(write!(self.writer, "[")); if self.level >= 0 { - try!(write!(self.writer, "+ ")); + try!(write!(self.writer, "")); } - self.level += 1; for (cnt, x) in v.iter().enumerate() { - try!(self.write_indent()); if cnt > 0 { try!(write!(self.writer, ", ")); } try!(self.emit_node(x)); } - self.level -= 1; try!(write!(self.writer, "]")); Ok(()) }, @@ -181,8 +178,7 @@ impl<'a> YamlEmitter<'a> { Yaml::String(ref v) => { if need_quotes(v) { try!(escape_str(self.writer, v)); - } - else { + } else { try!(write!(self.writer, "{}", v)); } Ok(()) @@ -306,10 +302,13 @@ fn need_quotes(string: &str) -> bool { _ => false, } }) - || string == "true" - || string == "false" - || string == "null" - || string == "~" + || [// http://yaml.org/type/bool.html + "y","Y","yes","Yes","YES","n","N","no","No","NO", + "True", "TRUE", "true", "False", "FALSE", "false", + "on","On","ON","off","Off","OFF", + // http://yaml.org/type/null.html + "null","Null","NULL", "~" + ].contains(&string) || string.starts_with('.') || string.parse::().is_ok() || string.parse::().is_ok() @@ -421,7 +420,8 @@ products: "true": bool key "{}": empty hash key x: test -y: string with spaces"#; +"y": "can't avoid quoting here" +z: string with spaces"#; let docs = YamlLoader::load_from_str(&s).unwrap(); let doc = &docs[0]; @@ -434,6 +434,42 @@ y: string with spaces"#; assert_eq!(s, writer, "actual:\n\n{}\n", writer); } + #[test] + fn emit_quoted_bools() { + let input = r#"--- +string0: yes +string1: no +string2: "true" +string3: "false" +string4: "~" +null0: ~ +[true, false]: real_bools +[True, TRUE, False, FALSE, y,Y,yes,Yes,YES,n,N,no,No,NO,on,On,ON,off,Off,OFF]: false_bools +bool0: true +bool1: false"#; + let expected = r#"--- +string0: "yes" +string1: "no" +string2: "true" +string3: "false" +string4: "~" +null0: ~ +[true, false]: real_bools +["True", "TRUE", "False", "FALSE", "y", "Y", "yes", "Yes", "YES", "n", "N", "no", "No", "NO", "on", "On", "ON", "off", "Off", "OFF"]: false_bools +bool0: true +bool1: false"#; + + let docs = YamlLoader::load_from_str(&input).unwrap(); + let doc = &docs[0]; + let mut writer = String::new(); + { + let mut emitter = YamlEmitter::new(&mut writer); + emitter.dump(doc).unwrap(); + } + + assert_eq!(expected, writer, "actual:\n\n{}\n", writer); + } + #[test] fn test_empty_and_nested() { let s = r#"--- From 1cfd356df8a1511bc50362e543d3f951385be15f Mon Sep 17 00:00:00 2001 From: Charlie Ozinga Date: Tue, 23 May 2017 12:17:50 -0600 Subject: [PATCH 123/380] Move the compact flag into the emitter itself --- saphyr/src/emitter.rs | 45 +++++++++++++++++++++++++++++++------------ 1 file changed, 33 insertions(+), 12 deletions(-) diff --git a/saphyr/src/emitter.rs b/saphyr/src/emitter.rs index 065d46b..2ed20c2 100644 --- a/saphyr/src/emitter.rs +++ b/saphyr/src/emitter.rs @@ -3,14 +3,6 @@ use std::convert::From; use std::error::Error; use yaml::{Hash, Yaml}; -/// If the emitter should output in 'compact inline notation' form, as -/// described for block -/// [sequences](http://www.yaml.org/spec/1.2/spec.html#id2797382) and -/// [mappings](http://www.yaml.org/spec/1.2/spec.html#id2798057). In -/// this form, blocks cannot have any properties (such as anchors or -/// tags), which should be OK, because this emitter doesn't (currently) -/// emit those anyways. -pub const COMPACT: bool = true; #[derive(Copy, Clone, Debug)] pub enum EmitError { @@ -45,6 +37,7 @@ impl From for EmitError { pub struct YamlEmitter<'a> { writer: &'a mut fmt::Write, best_indent: usize, + compact: bool, level: isize, } @@ -119,11 +112,29 @@ impl<'a> YamlEmitter<'a> { YamlEmitter { writer: writer, best_indent: 2, + compact: true, level: -1 } } + /// Set 'compact inline notation' on or off, as described for block + /// [sequences](http://www.yaml.org/spec/1.2/spec.html#id2797382) + /// and + /// [mappings](http://www.yaml.org/spec/1.2/spec.html#id2798057). + /// + /// In this form, blocks cannot have any properties (such as anchors + /// or tags), which should be OK, because this emitter doesn't + /// (currently) emit those anyways. + pub fn compact(&mut self, compact: bool) { + self.compact = compact; + } + + /// Determine if this emitter is using 'compact inline notation'. + pub fn is_compact(&self) -> bool { + self.compact + } + pub fn dump(&mut self, doc: &Yaml) -> EmitResult { // write DocumentStart try!(write!(self.writer, "---\n")); @@ -232,11 +243,11 @@ impl<'a> YamlEmitter<'a> { /// Emit a yaml as a hash or array value: i.e., which should appear /// following a ":" or "-", either after a space, or on a new line. /// If `inline` is true, then the preceeding characters are distinct - /// and short enough to respects the COMPACT constant. + /// and short enough to respect the compact flag. fn emit_val(&mut self, inline: bool, val: &Yaml) -> EmitResult { match *val { Yaml::Array(ref v) => { - if (inline && COMPACT) || v.is_empty() { + if (inline && self.compact) || v.is_empty() { try!(write!(self.writer, " ")); } else { try!(write!(self.writer, "\n")); @@ -247,7 +258,7 @@ impl<'a> YamlEmitter<'a> { self.emit_array(v) }, Yaml::Hash(ref h) => { - if (inline && COMPACT) || h.is_empty() { + if (inline && self.compact) || h.is_empty() { try!(write!(self.writer, " ")); } else { try!(write!(self.writer, "\n")); @@ -421,7 +432,16 @@ y: string with spaces"#; #[test] fn test_empty_and_nested() { - let s = if COMPACT { r#"--- + test_empty_and_nested_flag(false) + } + + #[test] + fn test_empty_and_nested_compact() { + test_empty_and_nested_flag(true) + } + + fn test_empty_and_nested_flag(compact: bool) { + let s = if compact { r#"--- a: b: c: hello @@ -445,6 +465,7 @@ e: let mut writer = String::new(); { let mut emitter = YamlEmitter::new(&mut writer); + emitter.compact(compact); emitter.dump(doc).unwrap(); } From a2c9349417458e8024aee9ae5a17f055db31ab4e Mon Sep 17 00:00:00 2001 From: Hendrik Sollich Date: Sat, 10 Jun 2017 23:39:07 +0200 Subject: [PATCH 124/380] correctly emitting nested arrays fixed #70 too --- saphyr/src/emitter.rs | 111 +++++++++++++++++++++++------------------- saphyr/src/yaml.rs | 7 +++ 2 files changed, 69 insertions(+), 49 deletions(-) diff --git a/saphyr/src/emitter.rs b/saphyr/src/emitter.rs index 91c94b2..c7bc154 100644 --- a/saphyr/src/emitter.rs +++ b/saphyr/src/emitter.rs @@ -134,39 +134,8 @@ impl<'a> YamlEmitter<'a> { fn emit_node_compact(&mut self, node: &Yaml) -> EmitResult { match *node { - Yaml::Array(ref v) => { - try!(write!(self.writer, "[")); - if self.level >= 0 { - try!(write!(self.writer, "")); - } - for (cnt, x) in v.iter().enumerate() { - if cnt > 0 { try!(write!(self.writer, ", ")); } - try!(self.emit_node(x)); - } - try!(write!(self.writer, "]")); - Ok(()) - }, - Yaml::Hash(ref h) => { - try!(self.writer.write_str("{")); - self.level += 1; - for (cnt, (k, v)) in h.iter().enumerate() { - if cnt > 0 { - try!(write!(self.writer, ", ")); - } - match *k { - // complex key is not supported - Yaml::Array(_) | Yaml::Hash(_) => { - return Err(EmitError::BadHashmapKey); - }, - _ => { try!(self.emit_node(k)); } - } - try!(write!(self.writer, ": ")); - try!(self.emit_node(v)); - } - try!(self.writer.write_str("}")); - self.level -= 1; - Ok(()) - }, + Yaml::Array(ref v) => self.emit_array_compact(v), + Yaml::Hash(ref h) => self.emit_hash_compact(h), _ => self.emit_node(node), } } @@ -213,19 +182,36 @@ impl<'a> YamlEmitter<'a> { try!(write!(self.writer, "[]")); } else { for (cnt, x) in v.iter().enumerate() { + self.level += 1; if cnt > 0 { try!(write!(self.writer, "\n")); } try!(self.write_indent()); - self.level += 1; try!(write!(self.writer, "- ")); - try!(self.emit_node(x)); + if self.level >= 1 && x.is_array() { + try!(self.emit_node_compact(x)); + } else { + try!(self.emit_node(x)); + } self.level -= 1; } } Ok(()) } + fn emit_array_compact(&mut self, v: &[Yaml]) -> EmitResult { + try!(write!(self.writer, "[")); + if self.level >= 0 { + try!(write!(self.writer, "")); + } + for (cnt, x) in v.iter().enumerate() { + if cnt > 0 { try!(write!(self.writer, ", ")); } + try!(self.emit_node(x)); + } + try!(write!(self.writer, "]")); + Ok(()) + } + fn emit_hash(&mut self, h: &Hash) -> EmitResult { if h.is_empty() { try!(self.writer.write_str("{}")); @@ -274,6 +260,29 @@ impl<'a> YamlEmitter<'a> { } Ok(()) } + + fn emit_hash_compact(&mut self, h: &Hash) -> EmitResult { + try!(self.writer.write_str("{")); + self.level += 1; + for (cnt, (k, v)) in h.iter().enumerate() { + if cnt > 0 { + try!(write!(self.writer, ", ")); + } + match *k { + // complex key is not supported + Yaml::Array(_) | Yaml::Hash(_) => { + return Err(EmitError::BadHashmapKey); + }, + _ => { try!(self.emit_node(k)); } + } + try!(write!(self.writer, ": ")); + try!(self.emit_node(v)); + } + try!(self.writer.write_str("}")); + self.level -= 1; + Ok(()) + } + } /// Check if the string requires quoting. @@ -315,7 +324,7 @@ fn need_quotes(string: &str) -> bool { } #[cfg(test)] -mod tests { +mod test { use super::*; use YamlLoader; @@ -330,15 +339,8 @@ a1: a2: 4 # i'm comment a3: [1, 2, 3] a4: - - - a1 - - a2 + - [a1, a2] - 2 - - [] - - {} -a5: 'single_quoted' -a6: \"double_quoted\" -a7: 你好 -'key 1': \"ddd\\\tbbb\" "; @@ -349,7 +351,12 @@ a7: 你好 let mut emitter = YamlEmitter::new(&mut writer); emitter.dump(doc).unwrap(); } - let docs_new = YamlLoader::load_from_str(&s).unwrap(); + println!("original:\n{}", s); + println!("emitted:\n{}", writer); + let docs_new = match YamlLoader::load_from_str(&writer) { + Ok(y) => y, + Err(e) => panic!(format!("{}", e)) + }; let doc_new = &docs_new[0]; assert_eq!(doc, doc_new); @@ -383,7 +390,10 @@ products: let mut emitter = YamlEmitter::new(&mut writer); emitter.dump(doc).unwrap(); } - let docs_new = YamlLoader::load_from_str(&s).unwrap(); + let docs_new = match YamlLoader::load_from_str(&writer) { + Ok(y) => y, + Err(e) => panic!(format!("{}", e)) + }; let doc_new = &docs_new[0]; assert_eq!(doc, doc_new); } @@ -470,6 +480,9 @@ bool1: false"#; assert_eq!(expected, writer, "actual:\n\n{}\n", writer); } +//(left: `"---\na:\n b:\n c: hello\n d: {}\ne:\n- f\n- g\n- h: []"`, +//right: `"---\na:\n b:\n c: hello\n d: {}\ne:\n - f\n - g\n - h: []"`) + #[test] fn test_empty_and_nested() { let s = r#"--- @@ -478,9 +491,9 @@ a: c: hello d: {} e: -- f -- g -- h: []"#; + - f + - g + - h: []"#; let docs = YamlLoader::load_from_str(&s).unwrap(); let doc = &docs[0]; diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index d745dc6..b8ccdbd 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -266,6 +266,13 @@ impl Yaml { } } + pub fn is_array(&self) -> bool { + match *self { + Yaml::Array(_) => true, + _ => false + } + } + pub fn as_f64(&self) -> Option { match *self { Yaml::Real(ref v) => parse_f64(v), From da68aaff6d454e7308906c4e58cdd1d7525906e0 Mon Sep 17 00:00:00 2001 From: Hendrik Sollich Date: Sun, 11 Jun 2017 00:19:20 +0200 Subject: [PATCH 125/380] correctly emitting deeply nested arrays --- saphyr/src/emitter.rs | 86 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 77 insertions(+), 9 deletions(-) diff --git a/saphyr/src/emitter.rs b/saphyr/src/emitter.rs index c7bc154..7a5662e 100644 --- a/saphyr/src/emitter.rs +++ b/saphyr/src/emitter.rs @@ -142,7 +142,7 @@ impl<'a> YamlEmitter<'a> { fn emit_node(&mut self, node: &Yaml) -> EmitResult { match *node { - Yaml::Array(ref v) => self.emit_array(v), + Yaml::Array(ref v) => self.emit_array(v, !node.is_array()), Yaml::Hash(ref h) => self.emit_hash(h), Yaml::String(ref v) => { if need_quotes(v) { @@ -177,7 +177,7 @@ impl<'a> YamlEmitter<'a> { } } - fn emit_array(&mut self, v: &[Yaml]) -> EmitResult { + fn emit_array(&mut self, v: &[Yaml], indent_first: bool) -> EmitResult { if v.is_empty() { try!(write!(self.writer, "[]")); } else { @@ -186,12 +186,14 @@ impl<'a> YamlEmitter<'a> { if cnt > 0 { try!(write!(self.writer, "\n")); } - try!(self.write_indent()); + if cnt > 0 || indent_first { + try!(self.write_indent()); + } try!(write!(self.writer, "- ")); - if self.level >= 1 && x.is_array() { + if self.level > 2 { try!(self.emit_node_compact(x)); } else { - try!(self.emit_node(x)); + try!(self.emit_node(x)); } self.level -= 1; } @@ -237,7 +239,7 @@ impl<'a> YamlEmitter<'a> { } else { try!(write!(self.writer, ":\n")); } - try!(self.emit_array(v)); + try!(self.emit_array(v, true)); } Yaml::Hash(ref h) => { if h.is_empty() { @@ -480,9 +482,6 @@ bool1: false"#; assert_eq!(expected, writer, "actual:\n\n{}\n", writer); } -//(left: `"---\na:\n b:\n c: hello\n d: {}\ne:\n- f\n- g\n- h: []"`, -//right: `"---\na:\n b:\n c: hello\n d: {}\ne:\n - f\n - g\n - h: []"`) - #[test] fn test_empty_and_nested() { let s = r#"--- @@ -505,4 +504,73 @@ e: assert_eq!(s, writer); } + + #[test] + fn test_nested_arrays() { + let s = r#"--- +a: + - b + - - c + - d + - - e + - f"#; + + let docs = YamlLoader::load_from_str(&s).unwrap(); + let doc = &docs[0]; + let mut writer = String::new(); + { + let mut emitter = YamlEmitter::new(&mut writer); + emitter.dump(doc).unwrap(); + } + println!("original:\n{}", s); + println!("emitted:\n{}", writer); + + assert_eq!(s, writer); + } + + #[test] + fn test_deeply_nested_arrays() { + let s = r#"--- +a: + - b + - - c + - d + - - e + - [f, e]"#; + + let docs = YamlLoader::load_from_str(&s).unwrap(); + let doc = &docs[0]; + let mut writer = String::new(); + { + let mut emitter = YamlEmitter::new(&mut writer); + emitter.dump(doc).unwrap(); + } + println!("original:\n{}", s); + println!("emitted:\n{}", writer); + + assert_eq!(s, writer); + } + + #[test] + fn test_nested_hashes() { + let s = r#"--- +a: + b: + c: + d: + e: f"#; + + let docs = YamlLoader::load_from_str(&s).unwrap(); + let doc = &docs[0]; + let mut writer = String::new(); + { + let mut emitter = YamlEmitter::new(&mut writer); + emitter.dump(doc).unwrap(); + } + println!("original:\n{}", s); + println!("emitted:\n{}", writer); + + assert_eq!(s, writer); + } + } From f94a1deabe393eb8b3ce64c7a41c7d2a32cac396 Mon Sep 17 00:00:00 2001 From: Christian Hofer Date: Wed, 14 Jun 2017 10:29:27 +0200 Subject: [PATCH 126/380] Allow clients ownership of events Also: Optimize built-in Yaml deserializer to avoid one scalar value cloning step. --- saphyr/src/parser.rs | 82 +++++++++++++++++++++++++++----------------- saphyr/src/yaml.rs | 22 ++++++------ 2 files changed, 62 insertions(+), 42 deletions(-) diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs index 7d147e7..e2bbbd3 100644 --- a/saphyr/src/parser.rs +++ b/saphyr/src/parser.rs @@ -90,6 +90,15 @@ impl MarkedEventReceiver for R { } } +pub trait MarkedOwnedEventReceiver { + fn on_owned_event(&mut self, ev: Event, _mark: Marker); +} + +impl MarkedOwnedEventReceiver for R { + fn on_owned_event(&mut self, ev: Event, _mark: Marker) { + self.on_event(&ev, _mark); + } +} pub type ParseResult = Result<(Event, Marker), ScanError>; @@ -136,38 +145,37 @@ impl> Parser { self.states.push(state); } - fn parse(&mut self, recv: &mut R) - -> Result { + fn parse(&mut self) -> Result<(Event, Marker), ScanError> { if self.state == State::End { - return Ok(Event::StreamEnd); + return Ok((Event::StreamEnd, self.scanner.mark())); } let (ev, mark) = try!(self.state_machine()); // println!("EV {:?}", ev); - recv.on_event(&ev, mark); - Ok(ev) + Ok((ev, mark)) } - pub fn load(&mut self, recv: &mut R, multi: bool) + pub fn load(&mut self, recv: &mut R, multi: bool) -> Result<(), ScanError> { if !self.scanner.stream_started() { - let ev = try!(self.parse(recv)); + let (ev, mark) = try!(self.parse()); assert_eq!(ev, Event::StreamStart); + recv.on_owned_event(ev, mark); } if self.scanner.stream_ended() { // XXX has parsed? - recv.on_event(&Event::StreamEnd, self.scanner.mark()); + recv.on_owned_event(Event::StreamEnd, self.scanner.mark()); return Ok(()); } loop { - let ev = try!(self.parse(recv)); + let (ev, mark) = try!(self.parse()); if ev == Event::StreamEnd { - recv.on_event(&Event::StreamEnd, self.scanner.mark()); + recv.on_owned_event(ev, mark); return Ok(()); } // clear anchors before a new document self.anchors.clear(); - try!(self.load_document(&ev, recv)); + try!(self.load_document(ev, mark, recv)); if !multi { break; } @@ -175,63 +183,75 @@ impl> Parser { Ok(()) } - fn load_document(&mut self, first_ev: &Event, recv: &mut R) + fn load_document(&mut self, first_ev: Event, mark: Marker, recv: &mut R) -> Result<(), ScanError> { - assert_eq!(first_ev, &Event::DocumentStart); + assert_eq!(first_ev, Event::DocumentStart); + recv.on_owned_event(first_ev, mark); - let ev = try!(self.parse(recv)); - try!(self.load_node(&ev, recv)); + let (ev, mark) = try!(self.parse()); + try!(self.load_node(ev, mark, recv)); // DOCUMENT-END is expected. - let ev = try!(self.parse(recv)); + let (ev, mark) = try!(self.parse()); assert_eq!(ev, Event::DocumentEnd); + recv.on_owned_event(ev, mark); Ok(()) } - fn load_node(&mut self, first_ev: &Event, recv: &mut R) + fn load_node(&mut self, first_ev: Event, mark: Marker, recv: &mut R) -> Result<(), ScanError> { - match *first_ev { + match first_ev { Event::Alias(..) | Event::Scalar(..) => { + recv.on_owned_event(first_ev, mark); Ok(()) }, Event::SequenceStart(_) => { - self.load_sequence(first_ev, recv) + recv.on_owned_event(first_ev, mark); + self.load_sequence(recv) }, Event::MappingStart(_) => { - self.load_mapping(first_ev, recv) + recv.on_owned_event(first_ev, mark); + self.load_mapping(recv) }, _ => { println!("UNREACHABLE EVENT: {:?}", first_ev); unreachable!(); } } } - fn load_mapping(&mut self, _first_ev: &Event, recv: &mut R) + fn load_mapping(&mut self, recv: &mut R) -> Result<(), ScanError> { - let mut ev = try!(self.parse(recv)); - while ev != Event::MappingEnd { + let (mut key_ev, mut key_mark) = try!(self.parse()); + while key_ev != Event::MappingEnd { // key - try!(self.load_node(&ev, recv)); + try!(self.load_node(key_ev, key_mark, recv)); // value - ev = try!(self.parse(recv)); - try!(self.load_node(&ev, recv)); + let (ev, mark) = try!(self.parse()); + try!(self.load_node(ev, mark, recv)); // next event - ev = try!(self.parse(recv)); + let (ev, mark) = try!(self.parse()); + key_ev = ev; + key_mark = mark; + } + recv.on_owned_event(key_ev, key_mark); Ok(()) } - fn load_sequence(&mut self, _first_ev: &Event, recv: &mut R) + fn load_sequence(&mut self, recv: &mut R) -> Result<(), ScanError> { - let mut ev = try!(self.parse(recv)); + let (mut ev, mut mark) = try!(self.parse()); while ev != Event::SequenceEnd { - try!(self.load_node(&ev, recv)); + try!(self.load_node(ev, mark, recv)); // next event - ev = try!(self.parse(recv)); + let (next_ev, next_mark) = try!(self.parse()); + ev = next_ev; + mark = next_mark; } + recv.on_owned_event(ev, mark); Ok(()) } diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index b8ccdbd..b9d64f4 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -75,10 +75,10 @@ pub struct YamlLoader { anchor_map: BTreeMap, } -impl MarkedEventReceiver for YamlLoader { - fn on_event(&mut self, ev: &Event, _: Marker) { +impl MarkedOwnedEventReceiver for YamlLoader { + fn on_owned_event(&mut self, ev: Event, _: Marker) { // println!("EV {:?}", ev); - match *ev { + match ev { Event::DocumentStart => { // do nothing }, @@ -106,10 +106,10 @@ impl MarkedEventReceiver for YamlLoader { let node = self.doc_stack.pop().unwrap(); self.insert_new_node(node); }, - Event::Scalar(ref v, style, aid, ref tag) => { + Event::Scalar(v, style, aid, tag) => { let node = if style != TScalarStyle::Plain { - Yaml::String(v.clone()) - } else if let Some(TokenType::Tag(ref handle, ref suffix)) = *tag { + Yaml::String(v) + } else if let Some(TokenType::Tag(ref handle, ref suffix)) = tag { // XXX tag:yaml.org,2002: if handle == "!!" { match suffix.as_ref() { @@ -127,8 +127,8 @@ impl MarkedEventReceiver for YamlLoader { } }, "float" => { - match parse_f64(v) { - Some(_) => Yaml::Real(v.clone()), + match parse_f64(&v) { + Some(_) => Yaml::Real(v), None => Yaml::BadValue, } }, @@ -138,14 +138,14 @@ impl MarkedEventReceiver for YamlLoader { _ => Yaml::BadValue, } } - _ => Yaml::String(v.clone()), + _ => Yaml::String(v), } } else { - Yaml::String(v.clone()) + Yaml::String(v) } } else { // Datatype is not specified, or unrecognized - Yaml::from_str(v.as_ref()) + Yaml::from_str(&v) }; self.insert_new_node((node, aid)); From 505b1d6ec11847de3bb812d4e4a4d450b46cd90a Mon Sep 17 00:00:00 2001 From: Christian Hofer Date: Mon, 19 Jun 2017 15:41:26 +0200 Subject: [PATCH 127/380] Always pass events by value - The EventReceiver gets ownership of events - Breaks compatilibility with previous interface --- saphyr/src/parser.rs | 49 +++++++++++++++------------------------ saphyr/src/yaml.rs | 4 ++-- saphyr/tests/spec_test.rs | 4 ++-- 3 files changed, 23 insertions(+), 34 deletions(-) diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs index e2bbbd3..8b511e9 100644 --- a/saphyr/src/parser.rs +++ b/saphyr/src/parser.rs @@ -76,31 +76,20 @@ pub struct Parser { pub trait EventReceiver { - fn on_event(&mut self, ev: &Event); + fn on_event(&mut self, ev: Event); } pub trait MarkedEventReceiver { - fn on_event(&mut self, ev: &Event, _mark: Marker); + fn on_event(&mut self, ev: Event, _mark: Marker); } impl MarkedEventReceiver for R { - fn on_event(&mut self, ev: &Event, _mark: Marker) { + fn on_event(&mut self, ev: Event, _mark: Marker) { self.on_event(ev) } } -pub trait MarkedOwnedEventReceiver { - fn on_owned_event(&mut self, ev: Event, _mark: Marker); -} - -impl MarkedOwnedEventReceiver for R { - fn on_owned_event(&mut self, ev: Event, _mark: Marker) { - self.on_event(&ev, _mark); - } -} - - pub type ParseResult = Result<(Event, Marker), ScanError>; impl> Parser { @@ -145,7 +134,7 @@ impl> Parser { self.states.push(state); } - fn parse(&mut self) -> Result<(Event, Marker), ScanError> { + fn parse(&mut self) -> ParseResult { if self.state == State::End { return Ok((Event::StreamEnd, self.scanner.mark())); } @@ -154,23 +143,23 @@ impl> Parser { Ok((ev, mark)) } - pub fn load(&mut self, recv: &mut R, multi: bool) + pub fn load(&mut self, recv: &mut R, multi: bool) -> Result<(), ScanError> { if !self.scanner.stream_started() { let (ev, mark) = try!(self.parse()); assert_eq!(ev, Event::StreamStart); - recv.on_owned_event(ev, mark); + recv.on_event(ev, mark); } if self.scanner.stream_ended() { // XXX has parsed? - recv.on_owned_event(Event::StreamEnd, self.scanner.mark()); + recv.on_event(Event::StreamEnd, self.scanner.mark()); return Ok(()); } loop { let (ev, mark) = try!(self.parse()); if ev == Event::StreamEnd { - recv.on_owned_event(ev, mark); + recv.on_event(ev, mark); return Ok(()); } // clear anchors before a new document @@ -183,10 +172,10 @@ impl> Parser { Ok(()) } - fn load_document(&mut self, first_ev: Event, mark: Marker, recv: &mut R) + fn load_document(&mut self, first_ev: Event, mark: Marker, recv: &mut R) -> Result<(), ScanError> { assert_eq!(first_ev, Event::DocumentStart); - recv.on_owned_event(first_ev, mark); + recv.on_event(first_ev, mark); let (ev, mark) = try!(self.parse()); try!(self.load_node(ev, mark, recv)); @@ -194,24 +183,24 @@ impl> Parser { // DOCUMENT-END is expected. let (ev, mark) = try!(self.parse()); assert_eq!(ev, Event::DocumentEnd); - recv.on_owned_event(ev, mark); + recv.on_event(ev, mark); Ok(()) } - fn load_node(&mut self, first_ev: Event, mark: Marker, recv: &mut R) + fn load_node(&mut self, first_ev: Event, mark: Marker, recv: &mut R) -> Result<(), ScanError> { match first_ev { Event::Alias(..) | Event::Scalar(..) => { - recv.on_owned_event(first_ev, mark); + recv.on_event(first_ev, mark); Ok(()) }, Event::SequenceStart(_) => { - recv.on_owned_event(first_ev, mark); + recv.on_event(first_ev, mark); self.load_sequence(recv) }, Event::MappingStart(_) => { - recv.on_owned_event(first_ev, mark); + recv.on_event(first_ev, mark); self.load_mapping(recv) }, _ => { println!("UNREACHABLE EVENT: {:?}", first_ev); @@ -219,7 +208,7 @@ impl> Parser { } } - fn load_mapping(&mut self, recv: &mut R) + fn load_mapping(&mut self, recv: &mut R) -> Result<(), ScanError> { let (mut key_ev, mut key_mark) = try!(self.parse()); while key_ev != Event::MappingEnd { @@ -236,11 +225,11 @@ impl> Parser { key_mark = mark; } - recv.on_owned_event(key_ev, key_mark); + recv.on_event(key_ev, key_mark); Ok(()) } - fn load_sequence(&mut self, recv: &mut R) + fn load_sequence(&mut self, recv: &mut R) -> Result<(), ScanError> { let (mut ev, mut mark) = try!(self.parse()); while ev != Event::SequenceEnd { @@ -251,7 +240,7 @@ impl> Parser { ev = next_ev; mark = next_mark; } - recv.on_owned_event(ev, mark); + recv.on_event(ev, mark); Ok(()) } diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index b9d64f4..5d7ef36 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -75,8 +75,8 @@ pub struct YamlLoader { anchor_map: BTreeMap, } -impl MarkedOwnedEventReceiver for YamlLoader { - fn on_owned_event(&mut self, ev: Event, _: Marker) { +impl MarkedEventReceiver for YamlLoader { + fn on_event(&mut self, ev: Event, _: Marker) { // println!("EV {:?}", ev); match ev { Event::DocumentStart => { diff --git a/saphyr/tests/spec_test.rs b/saphyr/tests/spec_test.rs index b7316fc..61f059d 100644 --- a/saphyr/tests/spec_test.rs +++ b/saphyr/tests/spec_test.rs @@ -25,8 +25,8 @@ struct YamlChecker { } impl EventReceiver for YamlChecker { - fn on_event(&mut self, ev: &Event) { - let tev = match *ev { + fn on_event(&mut self, ev: Event) { + let tev = match ev { Event::DocumentStart => TestEvent::OnDocumentStart, Event::DocumentEnd => TestEvent::OnDocumentEnd, Event::SequenceStart(..) => TestEvent::OnSequenceStart, From 75d1b53914d2e3be73a2516f1ad80786eb078638 Mon Sep 17 00:00:00 2001 From: Christian Hofer Date: Tue, 20 Jun 2017 15:47:19 +0200 Subject: [PATCH 128/380] Parser: Do not clone on peeking This eliminates calls to clone() and to_owned() in the parser - Peeking now returns reference only - To obtain value, fetch_token needs to be called - The parser was adapted accordingly - Also: Pass anchor name by value to register_anchor --- saphyr/src/parser.rs | 536 ++++++++++++++++++++++--------------------- 1 file changed, 279 insertions(+), 257 deletions(-) diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs index 8b511e9..5b95f24 100644 --- a/saphyr/src/parser.rs +++ b/saphyr/src/parser.rs @@ -1,5 +1,7 @@ use scanner::*; use std::collections::HashMap; +use std::mem::swap; + // use yaml::*; #[derive(Clone, Copy, PartialEq, Debug, Eq)] @@ -107,22 +109,35 @@ impl> Parser { } } - fn peek(&mut self) -> Result { - if self.token.is_none() { - self.token = self.scanner.next(); + fn peek(&mut self) -> Result<&Token, ScanError> { + match self.token { + None => { + self.token = Some(self.scan_next_token()?); + Ok(self.token.as_ref().unwrap()) + }, + Some(ref tok) => Ok(tok) } - if self.token.is_none() { - match self.scanner.get_error() { - None => - return Err(ScanError::new(self.scanner.mark(), - "unexpected eof")), - Some(e) => return Err(e), - } - } - // XXX better? - Ok(self.token.clone().unwrap()) } + fn scan_next_token(&mut self) -> Result { + let token = self.scanner.next(); + match token { + None => + match self.scanner.get_error() { + None => return Err(ScanError::new(self.scanner.mark(), "unexpected eof")), + Some(e) => return Err(e), + }, + Some(tok) => Ok(tok) + } + } + + fn fetch_token(&mut self) -> Token { + let mut token = None; + swap(&mut token, &mut self.token); + token.expect("fetch_token needs to be preceded by peek") + } + + fn skip(&mut self) { self.token = None; //self.peek(); @@ -286,45 +301,41 @@ impl> Parser { } fn stream_start(&mut self) -> ParseResult { - let tok = try!(self.peek()); - - match tok.1 { - TokenType::StreamStart(_) => { + match *self.peek()? { + Token(mark, TokenType::StreamStart(_)) => { self.state = State::ImplicitDocumentStart; self.skip(); - Ok((Event::StreamStart, tok.0)) + Ok((Event::StreamStart, mark)) }, - _ => Err(ScanError::new(tok.0, - "did not find expected ")), + Token(mark, _) => Err(ScanError::new(mark, + "did not find expected ")), } } fn document_start(&mut self, implicit: bool) -> ParseResult { - let mut tok = try!(self.peek()); if !implicit { - while let TokenType::DocumentEnd = tok.1 { + while let TokenType::DocumentEnd = self.peek()?.1 { self.skip(); - tok = try!(self.peek()); } } - match tok.1 { - TokenType::StreamEnd => { + match *self.peek()? { + Token(mark, TokenType::StreamEnd) => { self.state = State::End; self.skip(); - Ok((Event::StreamEnd, tok.0)) + Ok((Event::StreamEnd, mark)) }, - TokenType::VersionDirective(..) - | TokenType::TagDirective(..) - | TokenType::DocumentStart => { - // explicit document - self._explict_document_start() - }, - _ if implicit => { + Token(_, TokenType::VersionDirective(..)) + | Token(_, TokenType::TagDirective(..)) + | Token(_, TokenType::DocumentStart) => { + // explicit document + self._explict_document_start() + }, + Token(mark, _) if implicit => { try!(self.parser_process_directives()); self.push_state(State::DocumentEnd); self.state = State::BlockNode; - Ok((Event::DocumentStart, tok.0)) + Ok((Event::DocumentStart, mark)) }, _ => { // explicit document @@ -335,8 +346,7 @@ impl> Parser { fn parser_process_directives(&mut self) -> Result<(), ScanError> { loop { - let tok = try!(self.peek()); - match tok.1 { + match self.peek()?.1 { TokenType::VersionDirective(_, _) => { // XXX parsing with warning according to spec //if major != 1 || minor > 2 { @@ -357,28 +367,28 @@ impl> Parser { fn _explict_document_start(&mut self) -> ParseResult { try!(self.parser_process_directives()); - let tok = try!(self.peek()); - if tok.1 != TokenType::DocumentStart { - return Err(ScanError::new(tok.0, "did not find expected ")); - } - self.push_state(State::DocumentEnd); - self.state = State::DocumentContent; - self.skip(); - Ok((Event::DocumentStart, tok.0)) + match *self.peek()? { + Token(mark, TokenType::DocumentStart) => { + self.push_state(State::DocumentEnd); + self.state = State::DocumentContent; + self.skip(); + Ok((Event::DocumentStart, mark)) + } + Token(mark, _) => Err(ScanError::new(mark, "did not find expected ")) + } } fn document_content(&mut self) -> ParseResult { - let tok = try!(self.peek()); - match tok.1 { - TokenType::VersionDirective(..) - |TokenType::TagDirective(..) - |TokenType::DocumentStart - |TokenType::DocumentEnd - |TokenType::StreamEnd => { - self.pop_state(); - // empty scalar - Ok((Event::empty_scalar(), tok.0)) - }, + match *self.peek()? { + Token(mark, TokenType::VersionDirective(..)) + | Token(mark, TokenType::TagDirective(..)) + | Token(mark, TokenType::DocumentStart) + | Token(mark, TokenType::DocumentEnd) + | Token(mark, TokenType::StreamEnd) => { + self.pop_state(); + // empty scalar + Ok((Event::empty_scalar(), mark)) + }, _ => { self.parse_node(true, false) } @@ -387,20 +397,21 @@ impl> Parser { fn document_end(&mut self) -> ParseResult { let mut _implicit = true; - let tok = try!(self.peek()); - let _start_mark = tok.0; - - if let TokenType::DocumentEnd = tok.1 { - self.skip(); - _implicit = false; - } - + let marker: Marker = match *self.peek()? { + Token(mark, TokenType::DocumentEnd) => { + self.skip(); + _implicit = false; + mark + }, + Token(mark, _) => mark + }; + // TODO tag handling self.state = State::DocumentStart; - Ok((Event::DocumentEnd, tok.0)) + Ok((Event::DocumentEnd, marker)) } - fn register_anchor(&mut self, name: &str, _: &Marker) -> Result { + fn register_anchor(&mut self, name: String, _: &Marker) -> Result { // anchors can be overrided/reused // if self.anchors.contains_key(name) { // return Err(ScanError::new(*mark, @@ -408,77 +419,90 @@ impl> Parser { // } let new_id = self.anchor_id; self.anchor_id += 1; - self.anchors.insert(name.to_owned(), new_id); + self.anchors.insert(name, new_id); Ok(new_id) } fn parse_node(&mut self, block: bool, indentless_sequence: bool) -> ParseResult { - let mut tok = try!(self.peek()); let mut anchor_id = 0; let mut tag = None; - match tok.1 { - TokenType::Alias(name) => { + match *self.peek()? { + Token(_, TokenType::Alias(_)) => { self.pop_state(); - self.skip(); - match self.anchors.get(&name) { - None => return Err(ScanError::new(tok.0, "while parsing node, found unknown anchor")), - Some(id) => return Ok((Event::Alias(*id), tok.0)) + if let Token(mark, TokenType::Alias(name)) = self.fetch_token() { + match self.anchors.get(&name) { + None => return Err(ScanError::new(mark, "while parsing node, found unknown anchor")), + Some(id) => return Ok((Event::Alias(*id), mark)) + } + } else { + unreachable!() } }, - TokenType::Anchor(name) => { - anchor_id = try!(self.register_anchor(&name, &tok.0)); - self.skip(); - tok = try!(self.peek()); - if let TokenType::Tag(_, _) = tok.1 { - tag = Some(tok.1); - self.skip(); - tok = try!(self.peek()); + Token(_, TokenType::Anchor(_)) => { + if let Token(mark, TokenType::Anchor(name)) = self.fetch_token() { + anchor_id = try!(self.register_anchor(name, &mark)); + if let TokenType::Tag(..) = self.peek()?.1 { + if let tg @ TokenType::Tag(..) = self.fetch_token().1 { + tag = Some(tg); + } else { + unreachable!() + } + } + } else { + unreachable!() } }, - TokenType::Tag(..) => { - tag = Some(tok.1); - self.skip(); - tok = try!(self.peek()); - if let TokenType::Anchor(name) = tok.1 { - anchor_id = try!(self.register_anchor(&name, &tok.0)); - self.skip(); - tok = try!(self.peek()); + Token(_, TokenType::Tag(..)) => { + if let tg @ TokenType::Tag(..) = self.fetch_token().1 { + tag = Some(tg); + if let TokenType::Anchor(_) = self.peek()?.1 { + if let Token(mark, TokenType::Anchor(name)) = self.fetch_token() { + anchor_id = try!(self.register_anchor(name, &mark)); + } else { + unreachable!() + } + } + } else { + unreachable!() } }, _ => {} } - match tok.1 { - TokenType::BlockEntry if indentless_sequence => { + match *self.peek()? { + Token(mark, TokenType::BlockEntry) if indentless_sequence => { self.state = State::IndentlessSequenceEntry; - Ok((Event::SequenceStart(anchor_id), tok.0)) + Ok((Event::SequenceStart(anchor_id), mark)) }, - TokenType::Scalar(style, v) => { + Token(_, TokenType::Scalar(..)) => { self.pop_state(); - self.skip(); - Ok((Event::Scalar(v, style, anchor_id, tag), tok.0)) + if let Token(mark, TokenType::Scalar(style, v)) = self.fetch_token() { + Ok((Event::Scalar(v, style, anchor_id, tag), mark)) + } else { + unreachable!() + } }, - TokenType::FlowSequenceStart => { + Token(mark, TokenType::FlowSequenceStart) => { self.state = State::FlowSequenceFirstEntry; - Ok((Event::SequenceStart(anchor_id), tok.0)) + Ok((Event::SequenceStart(anchor_id), mark)) }, - TokenType::FlowMappingStart => { + Token(mark, TokenType::FlowMappingStart) => { self.state = State::FlowMappingFirstKey; - Ok((Event::MappingStart(anchor_id), tok.0)) + Ok((Event::MappingStart(anchor_id), mark)) }, - TokenType::BlockSequenceStart if block => { + Token(mark, TokenType::BlockSequenceStart) if block => { self.state = State::BlockSequenceFirstEntry; - Ok((Event::SequenceStart(anchor_id), tok.0)) + Ok((Event::SequenceStart(anchor_id), mark)) }, - TokenType::BlockMappingStart if block => { + Token(mark, TokenType::BlockMappingStart) if block => { self.state = State::BlockMappingFirstKey; - Ok((Event::MappingStart(anchor_id), tok.0)) + Ok((Event::MappingStart(anchor_id), mark)) }, // ex 7.2, an empty scalar can follow a secondary tag - _ if tag.is_some() || anchor_id > 0 => { + Token(mark, _) if tag.is_some() || anchor_id > 0 => { self.pop_state(); - Ok((Event::empty_scalar_with_anchor(anchor_id, tag), tok.0)) + Ok((Event::empty_scalar_with_anchor(anchor_id, tag), mark)) }, - _ => { Err(ScanError::new(tok.0, "while parsing a node, did not find expected node content")) } + Token(mark, _) => { Err(ScanError::new(mark, "while parsing a node, did not find expected node content")) } } } @@ -489,20 +513,17 @@ impl> Parser { //self.marks.push(tok.0); self.skip(); } - let tok = try!(self.peek()); - match tok.1 { - TokenType::Key => { + match *self.peek()? { + Token(_, TokenType::Key) => { self.skip(); - let tok = try!(self.peek()); - match tok.1 { - TokenType::Key - | TokenType::Value - | TokenType::BlockEnd - => { - self.state = State::BlockMappingValue; - // empty scalar - Ok((Event::empty_scalar(), tok.0)) - } + match *self.peek()? { + Token(mark, TokenType::Key) + | Token(mark, TokenType::Value) + | Token(mark, TokenType::BlockEnd) => { + self.state = State::BlockMappingValue; + // empty scalar + Ok((Event::empty_scalar(), mark)) + } _ => { self.push_state(State::BlockMappingValue); self.parse_node(true, true) @@ -510,46 +531,45 @@ impl> Parser { } }, // XXX(chenyh): libyaml failed to parse spec 1.2, ex8.18 - TokenType::Value => { + Token(mark, TokenType::Value) => { self.state = State::BlockMappingValue; - Ok((Event::empty_scalar(), tok.0)) + Ok((Event::empty_scalar(), mark)) }, - TokenType::BlockEnd => { + Token(mark, TokenType::BlockEnd) => { self.pop_state(); self.skip(); - Ok((Event::MappingEnd, tok.0)) + Ok((Event::MappingEnd, mark)) }, - _ => { - Err(ScanError::new(tok.0, "while parsing a block mapping, did not find expected key")) + Token(mark, _) => { + Err(ScanError::new(mark, "while parsing a block mapping, did not find expected key")) } } } fn block_mapping_value(&mut self) -> ParseResult { - let tok = try!(self.peek()); - match tok.1 { - TokenType::Value => { - self.skip(); - let tok = try!(self.peek()); - match tok.1 { - TokenType::Key | TokenType::Value | TokenType::BlockEnd - => { - self.state = State::BlockMappingKey; - // empty scalar - Ok((Event::empty_scalar(), tok.0)) - } - _ => { - self.push_state(State::BlockMappingKey); - self.parse_node(true, true) - } + match *self.peek()? { + Token(_, TokenType::Value) => { + self.skip(); + match *self.peek()? { + Token(mark, TokenType::Key) + | Token(mark, TokenType::Value) + | Token(mark, TokenType::BlockEnd) => { + self.state = State::BlockMappingKey; + // empty scalar + Ok((Event::empty_scalar(), mark)) + }, + _ => { + self.push_state(State::BlockMappingKey); + self.parse_node(true, true) } - }, - _ => { - self.state = State::BlockMappingKey; - // empty scalar - Ok((Event::empty_scalar(), tok.0)) } + }, + Token(mark, _) => { + self.state = State::BlockMappingKey; + // empty scalar + Ok((Event::empty_scalar(), mark)) } + } } fn flow_mapping_key(&mut self, first: bool) -> ParseResult { @@ -557,71 +577,82 @@ impl> Parser { let _ = try!(self.peek()); self.skip(); } - let mut tok = try!(self.peek()); - - if tok.1 != TokenType::FlowMappingEnd { - if !first { - if tok.1 == TokenType::FlowEntry { - self.skip(); - tok = try!(self.peek()); - } else { - return Err(ScanError::new(tok.0, - "while parsing a flow mapping, did not find expected ',' or '}'")); - } - } - - if tok.1 == TokenType::Key { - self.skip(); - tok = try!(self.peek()); - match tok.1 { - TokenType::Value - | TokenType::FlowEntry - | TokenType::FlowMappingEnd => { - self.state = State::FlowMappingValue; - return Ok((Event::empty_scalar(), tok.0)); - }, - _ => { - self.push_state(State::FlowMappingValue); - return self.parse_node(false, false); + let marker: Marker = { + match *self.peek()? { + Token(mark, TokenType::FlowMappingEnd) => mark, + Token(mark, _) => { + if !first { + match *self.peek()? { + Token(_, TokenType::FlowEntry) => self.skip(), + Token(mark, _) => return Err(ScanError::new(mark, + "while parsing a flow mapping, did not find expected ',' or '}'")) + } } + + match *self.peek()? { + Token(_, TokenType::Key) => { + self.skip(); + match *self.peek()? { + Token(mark, TokenType::Value) + | Token(mark, TokenType::FlowEntry) + | Token(mark, TokenType::FlowMappingEnd) => { + self.state = State::FlowMappingValue; + return Ok((Event::empty_scalar(), mark)); + }, + _ => { + self.push_state(State::FlowMappingValue); + return self.parse_node(false, false); + } + } + }, + Token(marker, TokenType::Value) => { + self.state = State::FlowMappingValue; + return Ok((Event::empty_scalar(), marker)); + }, + Token(_, TokenType::FlowMappingEnd) => (), + _ => { + self.push_state(State::FlowMappingEmptyValue); + return self.parse_node(false, false); + } + } + + mark } - // XXX libyaml fail ex 7.3, empty key - } else if tok.1 == TokenType::Value { - self.state = State::FlowMappingValue; - return Ok((Event::empty_scalar(), tok.0)); - } else if tok.1 != TokenType::FlowMappingEnd { - self.push_state(State::FlowMappingEmptyValue); - return self.parse_node(false, false); } - } + }; self.pop_state(); self.skip(); - Ok((Event::MappingEnd, tok.0)) + Ok((Event::MappingEnd, marker)) } fn flow_mapping_value(&mut self, empty: bool) -> ParseResult { - let tok = try!(self.peek()); - if empty { - self.state = State::FlowMappingKey; - return Ok((Event::empty_scalar(), tok.0)); - } - - if tok.1 == TokenType::Value { - self.skip(); - let tok = try!(self.peek()); - match tok.1 { - TokenType::FlowEntry - | TokenType::FlowMappingEnd => { }, - _ => { - self.push_state(State::FlowMappingKey); - return self.parse_node(false, false); + let mark: Marker = { + if empty { + let Token(mark, _) = *self.peek()?; + self.state = State::FlowMappingKey; + return Ok((Event::empty_scalar(), mark)); + } else { + match *self.peek()? { + Token(marker, TokenType::Value) => { + self.skip(); + match self.peek()?.1 { + TokenType::FlowEntry + | TokenType::FlowMappingEnd => { }, + _ => { + self.push_state(State::FlowMappingKey); + return self.parse_node(false, false); + } + } + marker + }, + Token(marker, _) => marker } } - } - + }; + self.state = State::FlowMappingKey; - Ok((Event::empty_scalar(), tok.0)) + Ok((Event::empty_scalar(), mark)) } fn flow_sequence_entry(&mut self, first: bool) -> ParseResult { @@ -631,33 +662,31 @@ impl> Parser { //self.marks.push(tok.0); self.skip(); } - let mut tok = try!(self.peek()); - match tok.1 { - TokenType::FlowSequenceEnd => { + match *self.peek()? { + Token(mark, TokenType::FlowSequenceEnd) => { self.pop_state(); self.skip(); - return Ok((Event::SequenceEnd, tok.0)); + return Ok((Event::SequenceEnd, mark)); }, - TokenType::FlowEntry if !first => { + Token(_, TokenType::FlowEntry) if !first => { self.skip(); - tok = try!(self.peek()); }, - _ if !first => { - return Err(ScanError::new(tok.0, + Token(mark, _) if !first => { + return Err(ScanError::new(mark, "while parsing a flow sequence, expectd ',' or ']'")); } _ => { /* next */ } } - match tok.1 { - TokenType::FlowSequenceEnd => { + match *self.peek()? { + Token(mark, TokenType::FlowSequenceEnd) => { self.pop_state(); self.skip(); - Ok((Event::SequenceEnd, tok.0)) + Ok((Event::SequenceEnd, mark)) }, - TokenType::Key => { + Token(mark, TokenType::Key) => { self.state = State::FlowSequenceEntryMappingKey; self.skip(); - Ok((Event::MappingStart(0), tok.0)) + Ok((Event::MappingStart(0), mark)) } _ => { self.push_state(State::FlowSequenceEntry); @@ -667,21 +696,21 @@ impl> Parser { } fn indentless_sequence_entry(&mut self) -> ParseResult { - let mut tok = try!(self.peek()); - if tok.1 != TokenType::BlockEntry { - self.pop_state(); - return Ok((Event::SequenceEnd, tok.0)); + match *self.peek()? { + Token(_, TokenType::BlockEntry) => (), + Token(mark, _) => { + self.pop_state(); + return Ok((Event::SequenceEnd, mark)); + } } - self.skip(); - tok = try!(self.peek()); - match tok.1 { - TokenType::BlockEntry - | TokenType::Key - | TokenType::Value - | TokenType::BlockEnd => { + match *self.peek()? { + Token(mark, TokenType::BlockEntry) + | Token(mark, TokenType::Key) + | Token(mark, TokenType::Value) + | Token(mark, TokenType::BlockEnd) => { self.state = State::IndentlessSequenceEntry; - Ok((Event::empty_scalar(), tok.0)) + Ok((Event::empty_scalar(), mark)) }, _ => { self.push_state(State::IndentlessSequenceEntry); @@ -697,21 +726,19 @@ impl> Parser { //self.marks.push(tok.0); self.skip(); } - let mut tok = try!(self.peek()); - match tok.1 { - TokenType::BlockEnd => { + match *self.peek()? { + Token(mark, TokenType::BlockEnd) => { self.pop_state(); self.skip(); - Ok((Event::SequenceEnd, tok.0)) + Ok((Event::SequenceEnd, mark)) }, - TokenType::BlockEntry => { + Token(_, TokenType::BlockEntry) => { self.skip(); - tok = try!(self.peek()); - match tok.1 { - TokenType::BlockEntry - | TokenType::BlockEnd => { + match *self.peek()? { + Token(mark, TokenType::BlockEntry) + | Token(mark, TokenType::BlockEnd) => { self.state = State::BlockSequenceEntry; - Ok((Event::empty_scalar(), tok.0)) + Ok((Event::empty_scalar(), mark)) }, _ => { self.push_state(State::BlockSequenceEntry); @@ -719,23 +746,21 @@ impl> Parser { } } }, - _ => { - Err(ScanError::new(tok.0, + Token(mark, _) => { + Err(ScanError::new(mark, "while parsing a block collection, did not find expected '-' indicator")) } } } fn flow_sequence_entry_mapping_key(&mut self) -> ParseResult { - let tok = try!(self.peek()); - - match tok.1 { - TokenType::Value - | TokenType::FlowEntry - | TokenType::FlowSequenceEnd => { - self.skip(); - self.state = State::FlowSequenceEntryMappingValue; - Ok((Event::empty_scalar(), tok.0)) + match *self.peek()? { + Token(mark, TokenType::Value) + | Token(mark, TokenType::FlowEntry) + | Token(mark, TokenType::FlowSequenceEnd) => { + self.skip(); + self.state = State::FlowSequenceEntryMappingValue; + Ok((Event::empty_scalar(), mark)) }, _ => { self.push_state(State::FlowSequenceEntryMappingValue); @@ -745,18 +770,15 @@ impl> Parser { } fn flow_sequence_entry_mapping_value(&mut self) -> ParseResult { - let tok = try!(self.peek()); - - match tok.1 { - TokenType::Value => { + match *self.peek()? { + Token(_, TokenType::Value) => { self.skip(); - let tok = try!(self.peek()); self.state = State::FlowSequenceEntryMappingValue; - match tok.1 { - TokenType::FlowEntry - | TokenType::FlowSequenceEnd => { - self.state = State::FlowSequenceEntryMappingEnd; - Ok((Event::empty_scalar(), tok.0)) + match *self.peek()? { + Token(mark, TokenType::FlowEntry) + | Token(mark, TokenType::FlowSequenceEnd) => { + self.state = State::FlowSequenceEntryMappingEnd; + Ok((Event::empty_scalar(), mark)) }, _ => { self.push_state(State::FlowSequenceEntryMappingEnd); @@ -764,9 +786,9 @@ impl> Parser { } } }, - _ => { + Token(mark, _) => { self.state = State::FlowSequenceEntryMappingEnd; - Ok((Event::empty_scalar(), tok.0)) + Ok((Event::empty_scalar(), mark)) } } } From 2bc73c90a389def01c82bab5a4e054db0572788c Mon Sep 17 00:00:00 2001 From: Christian Hofer Date: Thu, 22 Jun 2017 09:54:13 +0200 Subject: [PATCH 129/380] Fix version incompatibilities Also: Fix clippy errors --- saphyr/src/parser.rs | 68 ++++++++++++++++++++++---------------------- 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs index 5b95f24..ffaab6b 100644 --- a/saphyr/src/parser.rs +++ b/saphyr/src/parser.rs @@ -112,7 +112,7 @@ impl> Parser { fn peek(&mut self) -> Result<&Token, ScanError> { match self.token { None => { - self.token = Some(self.scan_next_token()?); + self.token = Some(try!(self.scan_next_token())); Ok(self.token.as_ref().unwrap()) }, Some(ref tok) => Ok(tok) @@ -124,8 +124,8 @@ impl> Parser { match token { None => match self.scanner.get_error() { - None => return Err(ScanError::new(self.scanner.mark(), "unexpected eof")), - Some(e) => return Err(e), + None => Err(ScanError::new(self.scanner.mark(), "unexpected eof")), + Some(e) => Err(e), }, Some(tok) => Ok(tok) } @@ -301,7 +301,7 @@ impl> Parser { } fn stream_start(&mut self) -> ParseResult { - match *self.peek()? { + match *try!(self.peek()) { Token(mark, TokenType::StreamStart(_)) => { self.state = State::ImplicitDocumentStart; self.skip(); @@ -314,12 +314,12 @@ impl> Parser { fn document_start(&mut self, implicit: bool) -> ParseResult { if !implicit { - while let TokenType::DocumentEnd = self.peek()?.1 { + while let TokenType::DocumentEnd = try!(self.peek()).1 { self.skip(); } } - match *self.peek()? { + match *try!(self.peek()) { Token(mark, TokenType::StreamEnd) => { self.state = State::End; self.skip(); @@ -346,7 +346,7 @@ impl> Parser { fn parser_process_directives(&mut self) -> Result<(), ScanError> { loop { - match self.peek()?.1 { + match try!(self.peek()).1 { TokenType::VersionDirective(_, _) => { // XXX parsing with warning according to spec //if major != 1 || minor > 2 { @@ -367,7 +367,7 @@ impl> Parser { fn _explict_document_start(&mut self) -> ParseResult { try!(self.parser_process_directives()); - match *self.peek()? { + match *try!(self.peek()) { Token(mark, TokenType::DocumentStart) => { self.push_state(State::DocumentEnd); self.state = State::DocumentContent; @@ -379,7 +379,7 @@ impl> Parser { } fn document_content(&mut self) -> ParseResult { - match *self.peek()? { + match *try!(self.peek()) { Token(mark, TokenType::VersionDirective(..)) | Token(mark, TokenType::TagDirective(..)) | Token(mark, TokenType::DocumentStart) @@ -397,7 +397,7 @@ impl> Parser { fn document_end(&mut self) -> ParseResult { let mut _implicit = true; - let marker: Marker = match *self.peek()? { + let marker: Marker = match *try!(self.peek()) { Token(mark, TokenType::DocumentEnd) => { self.skip(); _implicit = false; @@ -426,7 +426,7 @@ impl> Parser { fn parse_node(&mut self, block: bool, indentless_sequence: bool) -> ParseResult { let mut anchor_id = 0; let mut tag = None; - match *self.peek()? { + match *try!(self.peek()) { Token(_, TokenType::Alias(_)) => { self.pop_state(); if let Token(mark, TokenType::Alias(name)) = self.fetch_token() { @@ -441,7 +441,7 @@ impl> Parser { Token(_, TokenType::Anchor(_)) => { if let Token(mark, TokenType::Anchor(name)) = self.fetch_token() { anchor_id = try!(self.register_anchor(name, &mark)); - if let TokenType::Tag(..) = self.peek()?.1 { + if let TokenType::Tag(..) = try!(self.peek()).1 { if let tg @ TokenType::Tag(..) = self.fetch_token().1 { tag = Some(tg); } else { @@ -455,7 +455,7 @@ impl> Parser { Token(_, TokenType::Tag(..)) => { if let tg @ TokenType::Tag(..) = self.fetch_token().1 { tag = Some(tg); - if let TokenType::Anchor(_) = self.peek()?.1 { + if let TokenType::Anchor(_) = try!(self.peek()).1 { if let Token(mark, TokenType::Anchor(name)) = self.fetch_token() { anchor_id = try!(self.register_anchor(name, &mark)); } else { @@ -468,7 +468,7 @@ impl> Parser { }, _ => {} } - match *self.peek()? { + match *try!(self.peek()) { Token(mark, TokenType::BlockEntry) if indentless_sequence => { self.state = State::IndentlessSequenceEntry; Ok((Event::SequenceStart(anchor_id), mark)) @@ -513,10 +513,10 @@ impl> Parser { //self.marks.push(tok.0); self.skip(); } - match *self.peek()? { + match *try!(self.peek()) { Token(_, TokenType::Key) => { self.skip(); - match *self.peek()? { + match *try!(self.peek()) { Token(mark, TokenType::Key) | Token(mark, TokenType::Value) | Token(mark, TokenType::BlockEnd) => { @@ -547,10 +547,10 @@ impl> Parser { } fn block_mapping_value(&mut self) -> ParseResult { - match *self.peek()? { + match *try!(self.peek()) { Token(_, TokenType::Value) => { self.skip(); - match *self.peek()? { + match *try!(self.peek()) { Token(mark, TokenType::Key) | Token(mark, TokenType::Value) | Token(mark, TokenType::BlockEnd) => { @@ -578,21 +578,21 @@ impl> Parser { self.skip(); } let marker: Marker = { - match *self.peek()? { + match *try!(self.peek()) { Token(mark, TokenType::FlowMappingEnd) => mark, Token(mark, _) => { if !first { - match *self.peek()? { + match *try!(self.peek()) { Token(_, TokenType::FlowEntry) => self.skip(), Token(mark, _) => return Err(ScanError::new(mark, "while parsing a flow mapping, did not find expected ',' or '}'")) } } - match *self.peek()? { + match *try!(self.peek()) { Token(_, TokenType::Key) => { self.skip(); - match *self.peek()? { + match *try!(self.peek()) { Token(mark, TokenType::Value) | Token(mark, TokenType::FlowEntry) | Token(mark, TokenType::FlowMappingEnd) => { @@ -629,14 +629,14 @@ impl> Parser { fn flow_mapping_value(&mut self, empty: bool) -> ParseResult { let mark: Marker = { if empty { - let Token(mark, _) = *self.peek()?; + let Token(mark, _) = *try!(self.peek()); self.state = State::FlowMappingKey; return Ok((Event::empty_scalar(), mark)); } else { - match *self.peek()? { + match *try!(self.peek()) { Token(marker, TokenType::Value) => { self.skip(); - match self.peek()?.1 { + match try!(self.peek()).1 { TokenType::FlowEntry | TokenType::FlowMappingEnd => { }, _ => { @@ -662,7 +662,7 @@ impl> Parser { //self.marks.push(tok.0); self.skip(); } - match *self.peek()? { + match *try!(self.peek()) { Token(mark, TokenType::FlowSequenceEnd) => { self.pop_state(); self.skip(); @@ -677,7 +677,7 @@ impl> Parser { } _ => { /* next */ } } - match *self.peek()? { + match *try!(self.peek()) { Token(mark, TokenType::FlowSequenceEnd) => { self.pop_state(); self.skip(); @@ -696,7 +696,7 @@ impl> Parser { } fn indentless_sequence_entry(&mut self) -> ParseResult { - match *self.peek()? { + match *try!(self.peek()) { Token(_, TokenType::BlockEntry) => (), Token(mark, _) => { self.pop_state(); @@ -704,7 +704,7 @@ impl> Parser { } } self.skip(); - match *self.peek()? { + match *try!(self.peek()) { Token(mark, TokenType::BlockEntry) | Token(mark, TokenType::Key) | Token(mark, TokenType::Value) @@ -726,7 +726,7 @@ impl> Parser { //self.marks.push(tok.0); self.skip(); } - match *self.peek()? { + match *try!(self.peek()) { Token(mark, TokenType::BlockEnd) => { self.pop_state(); self.skip(); @@ -734,7 +734,7 @@ impl> Parser { }, Token(_, TokenType::BlockEntry) => { self.skip(); - match *self.peek()? { + match *try!(self.peek()) { Token(mark, TokenType::BlockEntry) | Token(mark, TokenType::BlockEnd) => { self.state = State::BlockSequenceEntry; @@ -754,7 +754,7 @@ impl> Parser { } fn flow_sequence_entry_mapping_key(&mut self) -> ParseResult { - match *self.peek()? { + match *try!(self.peek()) { Token(mark, TokenType::Value) | Token(mark, TokenType::FlowEntry) | Token(mark, TokenType::FlowSequenceEnd) => { @@ -770,11 +770,11 @@ impl> Parser { } fn flow_sequence_entry_mapping_value(&mut self) -> ParseResult { - match *self.peek()? { + match *try!(self.peek()) { Token(_, TokenType::Value) => { self.skip(); self.state = State::FlowSequenceEntryMappingValue; - match *self.peek()? { + match *try!(self.peek()) { Token(mark, TokenType::FlowEntry) | Token(mark, TokenType::FlowSequenceEnd) => { self.state = State::FlowSequenceEntryMappingEnd; From 9687ac3cc3025a7034dadcce46af60bd25975482 Mon Sep 17 00:00:00 2001 From: Tshepang Lekhonkhobe Date: Mon, 26 Jun 2017 12:32:52 +0200 Subject: [PATCH 130/380] readme: nit-picking --- saphyr/README.md | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/saphyr/README.md b/saphyr/README.md index 6a334df..eb747b0 100644 --- a/saphyr/README.md +++ b/saphyr/README.md @@ -7,12 +7,12 @@ The missing YAML 1.2 implementation for Rust. [![license](https://img.shields.io/crates/l/yaml-rust.svg)](https://crates.io/crates/yaml-rust/) [![version](https://img.shields.io/crates/v/yaml-rust.svg)](https://crates.io/crates/yaml-rust/) -`yaml-rust` is a pure Rust YAML 1.2 implementation without -any external dependencies, which enjoys the memory safety -property and other benefits from the Rust language. +`yaml-rust` is a pure Rust YAML 1.2 implementation, +which enjoys the memory safety +property and other benefits from the Rust language. The parser is heavily influenced by `libyaml` and `yaml-cpp`. -This crate works on all Rust supported platforms and +This crate works on all Rust-supported platforms. It also works on Rust 1.0.0 and nightly! See [Document](http://chyh1990.github.io/yaml-rust/doc/yaml_rust/) @@ -28,7 +28,7 @@ Add the following to the Cargo.toml of your project: ```toml [dependencies] -yaml-rust = "*" +yaml-rust = "0.3" ``` or @@ -38,7 +38,7 @@ or git = "https://github.com/chyh1990/yaml-rust.git" ``` -and import using *extern crate*: +and import: ```rust extern crate yaml_rust; @@ -76,7 +76,7 @@ bar: // Chained key/array access is checked and won't panic, // return BadValue if they are not exist. assert!(doc["INVALID_KEY"][100].is_badvalue()); - + // Dump the YAML object let mut out_str = String::new(); { @@ -112,7 +112,7 @@ examples in the specification, except for the following known bugs: * Empty plain scalar in certain contexts However, the widely used library `libyaml` also fails to parse these examples, -so it may not be a huge problem for most users. +so it may not be a huge problem for most users. ## Goals @@ -136,4 +136,3 @@ Fork & PR on Github. Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in the work by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions. - From 078e1e882d6fe6252439c809da8eb19411028f15 Mon Sep 17 00:00:00 2001 From: Charlie Ozinga Date: Mon, 10 Jul 2017 10:08:20 -0600 Subject: [PATCH 131/380] Version bump --- saphyr/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/saphyr/Cargo.toml b/saphyr/Cargo.toml index a4e2fda..91dac70 100644 --- a/saphyr/Cargo.toml +++ b/saphyr/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "yaml-rust" -version = "0.3.6" +version = "0.3.7" authors = ["Yuheng Chen "] homepage = "http://chyh1990.github.io/yaml-rust/" documentation = "http://chyh1990.github.io/yaml-rust/doc/yaml_rust/" From b2f0df8e00af78756acfed93bc4bc55987955102 Mon Sep 17 00:00:00 2001 From: Christian Hofer Date: Thu, 22 Jun 2017 08:44:46 +0200 Subject: [PATCH 132/380] Extract pull parser. --- saphyr/src/parser.rs | 150 ++++++++++++++++++++++++++++--------------- 1 file changed, 97 insertions(+), 53 deletions(-) diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs index ffaab6b..b59987b 100644 --- a/saphyr/src/parser.rs +++ b/saphyr/src/parser.rs @@ -1,8 +1,5 @@ use scanner::*; use std::collections::HashMap; -use std::mem::swap; - -// use yaml::*; #[derive(Clone, Copy, PartialEq, Debug, Eq)] enum State { @@ -72,6 +69,7 @@ pub struct Parser { state: State, marks: Vec, token: Option, + current: Option<(Event, Marker)>, anchors: HashMap, anchor_id: usize, } @@ -102,6 +100,7 @@ impl> Parser { state: State::StreamStart, marks: Vec::new(), token: None, + current: None, anchors: HashMap::new(), // valid anchor_id starts from 1 @@ -109,7 +108,26 @@ impl> Parser { } } - fn peek(&mut self) -> Result<&Token, ScanError> { + pub fn peek(&mut self) -> Result<&(Event, Marker), ScanError> { + match self.current { + Some(ref x) => Ok(x), + None => { + self.current = Some(self.next()?); + self.peek() + } + } + } + + pub fn next(&mut self) -> ParseResult { + match self.current { + None => self.parse(), + Some(_) => { + Ok(self.current.take().unwrap()) + } + } + } + + fn peek_token(&mut self) -> Result<&Token, ScanError> { match self.token { None => { self.token = Some(try!(self.scan_next_token())); @@ -132,15 +150,13 @@ impl> Parser { } fn fetch_token(&mut self) -> Token { - let mut token = None; - swap(&mut token, &mut self.token); - token.expect("fetch_token needs to be preceded by peek") + self.token.take().expect("fetch_token needs to be preceded by peek_token") } fn skip(&mut self) { self.token = None; - //self.peek(); + //self.peek_token(); } fn pop_state(&mut self) { self.state = self.states.pop().unwrap() @@ -161,7 +177,7 @@ impl> Parser { pub fn load(&mut self, recv: &mut R, multi: bool) -> Result<(), ScanError> { if !self.scanner.stream_started() { - let (ev, mark) = try!(self.parse()); + let (ev, mark) = try!(self.next()); assert_eq!(ev, Event::StreamStart); recv.on_event(ev, mark); } @@ -172,7 +188,7 @@ impl> Parser { return Ok(()); } loop { - let (ev, mark) = try!(self.parse()); + let (ev, mark) = try!(self.next()); if ev == Event::StreamEnd { recv.on_event(ev, mark); return Ok(()); @@ -192,11 +208,11 @@ impl> Parser { assert_eq!(first_ev, Event::DocumentStart); recv.on_event(first_ev, mark); - let (ev, mark) = try!(self.parse()); + let (ev, mark) = try!(self.next()); try!(self.load_node(ev, mark, recv)); // DOCUMENT-END is expected. - let (ev, mark) = try!(self.parse()); + let (ev, mark) = try!(self.next()); assert_eq!(ev, Event::DocumentEnd); recv.on_event(ev, mark); @@ -225,17 +241,17 @@ impl> Parser { fn load_mapping(&mut self, recv: &mut R) -> Result<(), ScanError> { - let (mut key_ev, mut key_mark) = try!(self.parse()); + let (mut key_ev, mut key_mark) = try!(self.next()); while key_ev != Event::MappingEnd { // key try!(self.load_node(key_ev, key_mark, recv)); // value - let (ev, mark) = try!(self.parse()); + let (ev, mark) = try!(self.next()); try!(self.load_node(ev, mark, recv)); // next event - let (ev, mark) = try!(self.parse()); + let (ev, mark) = try!(self.next()); key_ev = ev; key_mark = mark; @@ -246,12 +262,12 @@ impl> Parser { fn load_sequence(&mut self, recv: &mut R) -> Result<(), ScanError> { - let (mut ev, mut mark) = try!(self.parse()); + let (mut ev, mut mark) = try!(self.next()); while ev != Event::SequenceEnd { try!(self.load_node(ev, mark, recv)); // next event - let (next_ev, next_mark) = try!(self.parse()); + let (next_ev, next_mark) = try!(self.next()); ev = next_ev; mark = next_mark; } @@ -260,7 +276,7 @@ impl> Parser { } fn state_machine(&mut self) -> ParseResult { - // let next_tok = try!(self.peek()); + // let next_tok = try!(self.peek_token()); // println!("cur_state {:?}, next tok: {:?}", self.state, next_tok); match self.state { State::StreamStart => self.stream_start(), @@ -301,7 +317,7 @@ impl> Parser { } fn stream_start(&mut self) -> ParseResult { - match *try!(self.peek()) { + match *try!(self.peek_token()) { Token(mark, TokenType::StreamStart(_)) => { self.state = State::ImplicitDocumentStart; self.skip(); @@ -314,12 +330,12 @@ impl> Parser { fn document_start(&mut self, implicit: bool) -> ParseResult { if !implicit { - while let TokenType::DocumentEnd = try!(self.peek()).1 { + while let TokenType::DocumentEnd = try!(self.peek_token()).1 { self.skip(); } } - match *try!(self.peek()) { + match *try!(self.peek_token()) { Token(mark, TokenType::StreamEnd) => { self.state = State::End; self.skip(); @@ -346,7 +362,7 @@ impl> Parser { fn parser_process_directives(&mut self) -> Result<(), ScanError> { loop { - match try!(self.peek()).1 { + match try!(self.peek_token()).1 { TokenType::VersionDirective(_, _) => { // XXX parsing with warning according to spec //if major != 1 || minor > 2 { @@ -367,7 +383,7 @@ impl> Parser { fn _explict_document_start(&mut self) -> ParseResult { try!(self.parser_process_directives()); - match *try!(self.peek()) { + match *try!(self.peek_token()) { Token(mark, TokenType::DocumentStart) => { self.push_state(State::DocumentEnd); self.state = State::DocumentContent; @@ -379,7 +395,7 @@ impl> Parser { } fn document_content(&mut self) -> ParseResult { - match *try!(self.peek()) { + match *try!(self.peek_token()) { Token(mark, TokenType::VersionDirective(..)) | Token(mark, TokenType::TagDirective(..)) | Token(mark, TokenType::DocumentStart) @@ -397,7 +413,7 @@ impl> Parser { fn document_end(&mut self) -> ParseResult { let mut _implicit = true; - let marker: Marker = match *try!(self.peek()) { + let marker: Marker = match *try!(self.peek_token()) { Token(mark, TokenType::DocumentEnd) => { self.skip(); _implicit = false; @@ -426,7 +442,7 @@ impl> Parser { fn parse_node(&mut self, block: bool, indentless_sequence: bool) -> ParseResult { let mut anchor_id = 0; let mut tag = None; - match *try!(self.peek()) { + match *try!(self.peek_token()) { Token(_, TokenType::Alias(_)) => { self.pop_state(); if let Token(mark, TokenType::Alias(name)) = self.fetch_token() { @@ -441,7 +457,7 @@ impl> Parser { Token(_, TokenType::Anchor(_)) => { if let Token(mark, TokenType::Anchor(name)) = self.fetch_token() { anchor_id = try!(self.register_anchor(name, &mark)); - if let TokenType::Tag(..) = try!(self.peek()).1 { + if let TokenType::Tag(..) = try!(self.peek_token()).1 { if let tg @ TokenType::Tag(..) = self.fetch_token().1 { tag = Some(tg); } else { @@ -455,7 +471,7 @@ impl> Parser { Token(_, TokenType::Tag(..)) => { if let tg @ TokenType::Tag(..) = self.fetch_token().1 { tag = Some(tg); - if let TokenType::Anchor(_) = try!(self.peek()).1 { + if let TokenType::Anchor(_) = try!(self.peek_token()).1 { if let Token(mark, TokenType::Anchor(name)) = self.fetch_token() { anchor_id = try!(self.register_anchor(name, &mark)); } else { @@ -468,7 +484,7 @@ impl> Parser { }, _ => {} } - match *try!(self.peek()) { + match *try!(self.peek_token()) { Token(mark, TokenType::BlockEntry) if indentless_sequence => { self.state = State::IndentlessSequenceEntry; Ok((Event::SequenceStart(anchor_id), mark)) @@ -509,14 +525,14 @@ impl> Parser { fn block_mapping_key(&mut self, first: bool) -> ParseResult { // skip BlockMappingStart if first { - let _ = try!(self.peek()); + let _ = try!(self.peek_token()); //self.marks.push(tok.0); self.skip(); } - match *try!(self.peek()) { + match *try!(self.peek_token()) { Token(_, TokenType::Key) => { self.skip(); - match *try!(self.peek()) { + match *try!(self.peek_token()) { Token(mark, TokenType::Key) | Token(mark, TokenType::Value) | Token(mark, TokenType::BlockEnd) => { @@ -547,10 +563,10 @@ impl> Parser { } fn block_mapping_value(&mut self) -> ParseResult { - match *try!(self.peek()) { + match *try!(self.peek_token()) { Token(_, TokenType::Value) => { self.skip(); - match *try!(self.peek()) { + match *try!(self.peek_token()) { Token(mark, TokenType::Key) | Token(mark, TokenType::Value) | Token(mark, TokenType::BlockEnd) => { @@ -574,25 +590,25 @@ impl> Parser { fn flow_mapping_key(&mut self, first: bool) -> ParseResult { if first { - let _ = try!(self.peek()); + let _ = try!(self.peek_token()); self.skip(); } let marker: Marker = { - match *try!(self.peek()) { + match *try!(self.peek_token()) { Token(mark, TokenType::FlowMappingEnd) => mark, Token(mark, _) => { if !first { - match *try!(self.peek()) { + match *try!(self.peek_token()) { Token(_, TokenType::FlowEntry) => self.skip(), Token(mark, _) => return Err(ScanError::new(mark, "while parsing a flow mapping, did not find expected ',' or '}'")) } } - match *try!(self.peek()) { + match *try!(self.peek_token()) { Token(_, TokenType::Key) => { self.skip(); - match *try!(self.peek()) { + match *try!(self.peek_token()) { Token(mark, TokenType::Value) | Token(mark, TokenType::FlowEntry) | Token(mark, TokenType::FlowMappingEnd) => { @@ -629,14 +645,14 @@ impl> Parser { fn flow_mapping_value(&mut self, empty: bool) -> ParseResult { let mark: Marker = { if empty { - let Token(mark, _) = *try!(self.peek()); + let Token(mark, _) = *try!(self.peek_token()); self.state = State::FlowMappingKey; return Ok((Event::empty_scalar(), mark)); } else { - match *try!(self.peek()) { + match *try!(self.peek_token()) { Token(marker, TokenType::Value) => { self.skip(); - match try!(self.peek()).1 { + match try!(self.peek_token()).1 { TokenType::FlowEntry | TokenType::FlowMappingEnd => { }, _ => { @@ -658,11 +674,11 @@ impl> Parser { fn flow_sequence_entry(&mut self, first: bool) -> ParseResult { // skip FlowMappingStart if first { - let _ = try!(self.peek()); + let _ = try!(self.peek_token()); //self.marks.push(tok.0); self.skip(); } - match *try!(self.peek()) { + match *try!(self.peek_token()) { Token(mark, TokenType::FlowSequenceEnd) => { self.pop_state(); self.skip(); @@ -677,7 +693,7 @@ impl> Parser { } _ => { /* next */ } } - match *try!(self.peek()) { + match *try!(self.peek_token()) { Token(mark, TokenType::FlowSequenceEnd) => { self.pop_state(); self.skip(); @@ -696,7 +712,7 @@ impl> Parser { } fn indentless_sequence_entry(&mut self) -> ParseResult { - match *try!(self.peek()) { + match *try!(self.peek_token()) { Token(_, TokenType::BlockEntry) => (), Token(mark, _) => { self.pop_state(); @@ -704,7 +720,7 @@ impl> Parser { } } self.skip(); - match *try!(self.peek()) { + match *try!(self.peek_token()) { Token(mark, TokenType::BlockEntry) | Token(mark, TokenType::Key) | Token(mark, TokenType::Value) @@ -722,11 +738,11 @@ impl> Parser { fn block_sequence_entry(&mut self, first: bool) -> ParseResult { // BLOCK-SEQUENCE-START if first { - let _ = try!(self.peek()); + let _ = try!(self.peek_token()); //self.marks.push(tok.0); self.skip(); } - match *try!(self.peek()) { + match *try!(self.peek_token()) { Token(mark, TokenType::BlockEnd) => { self.pop_state(); self.skip(); @@ -734,7 +750,7 @@ impl> Parser { }, Token(_, TokenType::BlockEntry) => { self.skip(); - match *try!(self.peek()) { + match *try!(self.peek_token()) { Token(mark, TokenType::BlockEntry) | Token(mark, TokenType::BlockEnd) => { self.state = State::BlockSequenceEntry; @@ -754,7 +770,7 @@ impl> Parser { } fn flow_sequence_entry_mapping_key(&mut self) -> ParseResult { - match *try!(self.peek()) { + match *try!(self.peek_token()) { Token(mark, TokenType::Value) | Token(mark, TokenType::FlowEntry) | Token(mark, TokenType::FlowSequenceEnd) => { @@ -770,11 +786,11 @@ impl> Parser { } fn flow_sequence_entry_mapping_value(&mut self) -> ParseResult { - match *try!(self.peek()) { + match *try!(self.peek_token()) { Token(_, TokenType::Value) => { self.skip(); self.state = State::FlowSequenceEntryMappingValue; - match *try!(self.peek()) { + match *try!(self.peek_token()) { Token(mark, TokenType::FlowEntry) | Token(mark, TokenType::FlowSequenceEnd) => { self.state = State::FlowSequenceEntryMappingEnd; @@ -798,3 +814,31 @@ impl> Parser { Ok((Event::MappingEnd, self.scanner.mark())) } } + +#[cfg(test)] +mod test { + use super::{Event, Parser}; + + #[test] + fn test_peek_eq_parse() { + let s = " +a0 bb: val +a1: &x + b1: 4 + b2: d +a2: 4 +a3: [1, 2, 3] +a4: + - [a1, a2] + - 2 +a5: *x +"; + let mut p = Parser::new(s.chars()); + while { + let event_peek = p.peek().unwrap().clone(); + let event = p.next().unwrap(); + assert_eq!(event, event_peek); + event.0 != Event::StreamEnd + } {} + } +} From 7471f27194a0e9e8be96b345ea482c8fa345c978 Mon Sep 17 00:00:00 2001 From: Christian Hofer Date: Sun, 16 Jul 2017 18:00:50 +0200 Subject: [PATCH 133/380] Replace ? by try! --- saphyr/src/parser.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs index b59987b..f8b04b5 100644 --- a/saphyr/src/parser.rs +++ b/saphyr/src/parser.rs @@ -112,7 +112,7 @@ impl> Parser { match self.current { Some(ref x) => Ok(x), None => { - self.current = Some(self.next()?); + self.current = Some(try!(self.next())); self.peek() } } From 14fd2e2e2ee9ef375a89a12d7bffc1d3b303802e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pedro=20C=C3=B4rte-Real?= Date: Fri, 18 Aug 2017 21:42:02 +0100 Subject: [PATCH 134/380] Upgrade to latest linked-hash-map --- saphyr/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/saphyr/Cargo.toml b/saphyr/Cargo.toml index 91dac70..9515208 100644 --- a/saphyr/Cargo.toml +++ b/saphyr/Cargo.toml @@ -10,7 +10,7 @@ repository = "https://github.com/chyh1990/yaml-rust" publish = false # this branch contains breaking changes [dependencies] -linked-hash-map = ">=0.0.9, <0.5" +linked-hash-map = ">=0.0.9, <0.6" [dev-dependencies] quickcheck = "0.4" From 94510e7e68b5e872e0b0c3bb9c496f3cb1283459 Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Mon, 21 Aug 2017 13:58:49 +0800 Subject: [PATCH 135/380] Bump to 0.3.8 --- saphyr/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/saphyr/Cargo.toml b/saphyr/Cargo.toml index 9515208..99d9798 100644 --- a/saphyr/Cargo.toml +++ b/saphyr/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "yaml-rust" -version = "0.3.7" +version = "0.3.8" authors = ["Yuheng Chen "] homepage = "http://chyh1990.github.io/yaml-rust/" documentation = "http://chyh1990.github.io/yaml-rust/doc/yaml_rust/" From b9b79a5cf87b21f569fcbd6531fad51ce401877e Mon Sep 17 00:00:00 2001 From: Martin Hoffmann Date: Sat, 2 Sep 2017 13:49:53 +0200 Subject: [PATCH 136/380] =?UTF-8?q?Allow=20access=20to=20Marker=E2=80=99s?= =?UTF-8?q?=20content.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- saphyr/src/scanner.rs | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index 619e2d9..4663b9f 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -33,6 +33,18 @@ impl Marker { col: col } } + + pub fn index(&self) -> usize { + self.index + } + + pub fn line(&self) -> usize { + self.line + } + + pub fn col(&self) -> usize { + self.col + } } #[derive(Clone, PartialEq, Debug, Eq)] From bccfe355878dd7a231de70fd993e2eb866a2cf03 Mon Sep 17 00:00:00 2001 From: Marc Addeo Date: Sun, 12 Nov 2017 12:01:39 -0500 Subject: [PATCH 137/380] Add a cause() method to EmitError For compatibility with error_chain --- saphyr/src/emitter.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/saphyr/src/emitter.rs b/saphyr/src/emitter.rs index ef3cdc7..332ff9e 100644 --- a/saphyr/src/emitter.rs +++ b/saphyr/src/emitter.rs @@ -17,6 +17,10 @@ impl Error for EmitError { EmitError::BadHashmapKey => "bad hashmap key", } } + + fn cause(&self) -> Option<&Error> { + None + } } impl Display for EmitError { From 201c16217a64bf7d5eaf7508c7a256f8906c72ca Mon Sep 17 00:00:00 2001 From: cetra3 Date: Wed, 15 Nov 2017 12:06:16 +1030 Subject: [PATCH 138/380] Allow the Marker from ScanError --- saphyr/src/scanner.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index 4663b9f..516f6e5 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -60,6 +60,10 @@ impl ScanError { info: info.to_owned() } } + + pub fn marker(&self) -> Marker { + self.mark + } } impl Error for ScanError { From 652e42d1c430928b57d12c15f30d68ce7bcadf6e Mon Sep 17 00:00:00 2001 From: cetra3 Date: Wed, 15 Nov 2017 14:11:39 +1030 Subject: [PATCH 139/380] Include Scanner --- saphyr/src/scanner.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index 516f6e5..8919c4a 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -61,8 +61,8 @@ impl ScanError { } } - pub fn marker(&self) -> Marker { - self.mark + pub fn marker(&self) -> &Marker { + &self.mark } } From d370a2192b1eede677da5044c6f51adb396eba87 Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Wed, 22 Nov 2017 11:02:17 +0800 Subject: [PATCH 140/380] Bump to 0.3.9 --- saphyr/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/saphyr/Cargo.toml b/saphyr/Cargo.toml index 99d9798..b2e495f 100644 --- a/saphyr/Cargo.toml +++ b/saphyr/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "yaml-rust" -version = "0.3.8" +version = "0.3.9" authors = ["Yuheng Chen "] homepage = "http://chyh1990.github.io/yaml-rust/" documentation = "http://chyh1990.github.io/yaml-rust/doc/yaml_rust/" From f15fe186f843c2a486872e7e21db01425343bcde Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Wed, 22 Nov 2017 15:53:12 +0800 Subject: [PATCH 141/380] Bump to 0.4.0 This version contains breaking API changes in low level event-based API: - The EventReceiver gets ownership of events --- saphyr/Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/saphyr/Cargo.toml b/saphyr/Cargo.toml index b2e495f..1fa1293 100644 --- a/saphyr/Cargo.toml +++ b/saphyr/Cargo.toml @@ -1,13 +1,13 @@ [package] name = "yaml-rust" -version = "0.3.9" +version = "0.4.0" authors = ["Yuheng Chen "] homepage = "http://chyh1990.github.io/yaml-rust/" documentation = "http://chyh1990.github.io/yaml-rust/doc/yaml_rust/" license = "MIT/Apache-2.0" description = "The missing YAML 1.2 parser for rust" repository = "https://github.com/chyh1990/yaml-rust" -publish = false # this branch contains breaking changes +# publish = false # this branch contains breaking changes [dependencies] linked-hash-map = ">=0.0.9, <0.6" From 907692cc3ef293f1e8825d8f920cd30ca4564fdd Mon Sep 17 00:00:00 2001 From: Igor Gnatenko Date: Mon, 1 Jan 2018 12:33:30 +0100 Subject: [PATCH 142/380] bump quickcheck to 0.6 --- saphyr/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/saphyr/Cargo.toml b/saphyr/Cargo.toml index 1fa1293..bf82d9d 100644 --- a/saphyr/Cargo.toml +++ b/saphyr/Cargo.toml @@ -13,4 +13,4 @@ repository = "https://github.com/chyh1990/yaml-rust" linked-hash-map = ">=0.0.9, <0.6" [dev-dependencies] -quickcheck = "0.4" +quickcheck = "0.6" From 6df45317d32dc719dcf93804c4a06b49a79443f0 Mon Sep 17 00:00:00 2001 From: Chen Yuheng Date: Tue, 2 Jan 2018 14:49:37 +0800 Subject: [PATCH 143/380] Update .travis.yml --- saphyr/.travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/saphyr/.travis.yml b/saphyr/.travis.yml index c7043aa..1a2be7b 100644 --- a/saphyr/.travis.yml +++ b/saphyr/.travis.yml @@ -1,6 +1,6 @@ language: rust rust: - - 1.11.0 + - 1.22.1 - 1.16.0 - beta - nightly From e14465c6a28fc38fea666a12824f79a39fec0b10 Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Tue, 2 Jan 2018 14:57:27 +0800 Subject: [PATCH 144/380] Fix warning --- saphyr/src/yaml.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index 5d7ef36..b7b2b7c 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -176,7 +176,7 @@ impl YamlLoader { match *parent { (Yaml::Array(ref mut v), _) => v.push(node.0), (Yaml::Hash(ref mut h), _) => { - let mut cur_key = self.key_stack.last_mut().unwrap(); + let cur_key = self.key_stack.last_mut().unwrap(); // current node is a key if cur_key.is_badvalue() { *cur_key = node.0; From 6761ff9b48be001bef5380299528558f1d04d945 Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Tue, 2 Jan 2018 15:55:39 +0800 Subject: [PATCH 145/380] fix boolean quotes Fix #92 --- saphyr/src/emitter.rs | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/saphyr/src/emitter.rs b/saphyr/src/emitter.rs index 332ff9e..c77263c 100644 --- a/saphyr/src/emitter.rs +++ b/saphyr/src/emitter.rs @@ -307,7 +307,10 @@ fn need_quotes(string: &str) -> bool { } }) || [// http://yaml.org/type/bool.html - "y","Y","yes","Yes","YES","n","N","no","No","NO", + // Note: 'y', 'Y', 'n', 'N', is not quoted deliberately, as in libyaml. PyYAML also parse + // them as string, not booleans, although it is volating the YAML 1.1 specification. + // See https://github.com/dtolnay/serde-yaml/pull/83#discussion_r152628088. + "yes","Yes","YES","no","No","NO", "True", "TRUE", "true", "False", "FALSE", "false", "on","On","ON","off","Off","OFF", // http://yaml.org/type/null.html @@ -409,6 +412,7 @@ field: ":" field2: "{" field3: "\\" field4: "\n" +field5: "can't avoid quote" float: "2.6" int: "4" nullable: "null" @@ -425,7 +429,7 @@ products: "true": bool key "{}": empty hash key x: test -"y": "can't avoid quoting here" +y: avoid quoting here z: string with spaces"#; let docs = YamlLoader::load_from_str(&s).unwrap(); @@ -466,13 +470,13 @@ null0: ~ - "TRUE" - "False" - "FALSE" - - "y" - - "Y" + - y + - Y - "yes" - "Yes" - "YES" - - "n" - - "N" + - n + - N - "no" - "No" - "NO" @@ -494,7 +498,7 @@ bool1: false"#; emitter.dump(doc).unwrap(); } - assert_eq!(expected, writer, "actual:\n\n{}\n", writer); + assert_eq!(expected, writer, "expected:\n{}\nactual:\n{}\n", expected, writer); } #[test] From aaf50d6c51fcda0a190759a7d0bdeffb3ada25c1 Mon Sep 17 00:00:00 2001 From: Chen Yuheng Date: Sat, 6 Jan 2018 15:27:42 +0800 Subject: [PATCH 146/380] Update document link Fix #94 --- saphyr/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/saphyr/Cargo.toml b/saphyr/Cargo.toml index bf82d9d..f00d0a9 100644 --- a/saphyr/Cargo.toml +++ b/saphyr/Cargo.toml @@ -3,7 +3,7 @@ name = "yaml-rust" version = "0.4.0" authors = ["Yuheng Chen "] homepage = "http://chyh1990.github.io/yaml-rust/" -documentation = "http://chyh1990.github.io/yaml-rust/doc/yaml_rust/" +documentation = "https://docs.rs/crate/yaml-rust/" license = "MIT/Apache-2.0" description = "The missing YAML 1.2 parser for rust" repository = "https://github.com/chyh1990/yaml-rust" From 410e43f07d15dac81f224773050b9f7d514d9293 Mon Sep 17 00:00:00 2001 From: Chen Yuheng Date: Sat, 6 Jan 2018 15:28:14 +0800 Subject: [PATCH 147/380] Update README.md --- saphyr/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/saphyr/README.md b/saphyr/README.md index eb747b0..75d8393 100644 --- a/saphyr/README.md +++ b/saphyr/README.md @@ -28,7 +28,7 @@ Add the following to the Cargo.toml of your project: ```toml [dependencies] -yaml-rust = "0.3" +yaml-rust = "0.4" ``` or From 2ad74a791b3b3b3abad90cbb4096d80c076dc538 Mon Sep 17 00:00:00 2001 From: Hannes De Valkeneer Date: Fri, 20 Apr 2018 08:06:55 +0200 Subject: [PATCH 148/380] Do not quote hyphenated strings unnecessarily. fixes #54 --- saphyr/src/emitter.rs | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/saphyr/src/emitter.rs b/saphyr/src/emitter.rs index c77263c..68fe8a4 100644 --- a/saphyr/src/emitter.rs +++ b/saphyr/src/emitter.rs @@ -281,8 +281,10 @@ impl<'a> YamlEmitter<'a> { } /// Check if the string requires quoting. +/// Strings starting with any of the following characters must be quoted. +/// :, &, *, ?, |, -, <, >, =, !, %, @ /// Strings containing any of the following characters must be quoted. -/// :, {, }, [, ], ,, &, *, #, ?, |, -, <, >, =, !, %, @, ` +/// {, }, [, ], ,, #, ` /// /// If the string contains any of the following control characters, it must be escaped with double quotes: /// \0, \x01, \x02, \x03, \x04, \x05, \x06, \a, \b, \t, \n, \v, \f, \r, \x0e, \x0f, \x10, \x11, \x12, \x13, \x14, \x15, \x16, \x17, \x18, \x19, \x1a, \e, \x1c, \x1d, \x1e, \x1f, \N, \_, \L, \P @@ -300,9 +302,15 @@ fn need_quotes(string: &str) -> bool { string == "" || need_quotes_spaces(string) + || string.starts_with(|character: char| { + match character { + ':' | '&' | '*' | '?' | '|' | '-' | '<' | '>' | '=' | '!' | '%' | '@' => true, + _ => false, + } + }) || string.contains(|character: char| { match character { - ':' | '{' | '}' | '[' | ']' | ',' | '&' | '*' | '#' | '?' | '|' | '-' | '<' | '>' | '=' | '!' | '%' | '@' | '`' | '\"' | '\'' | '\\' | '\0' ... '\x06' | '\t' | '\n' | '\r' | '\x0e' ... '\x1a' | '\x1c' ... '\x1f' => true, + '{' | '}' | '[' | ']' | ',' | '#' | '`' | '\"' | '\'' | '\\' | '\0' ... '\x06' | '\t' | '\n' | '\r' | '\x0e' ... '\x1a' | '\x1c' ... '\x1f' => true, _ => false, } }) @@ -402,7 +410,7 @@ products: a7: 你好 boolean: "true" boolean2: "false" -date: "2014-12-31" +date: 2014-12-31 empty_string: "" empty_string1: " " empty_string2: " a" From 82377440914dbde55c527275bd8bb0a764eecb03 Mon Sep 17 00:00:00 2001 From: Hannes De Valkeneer Date: Fri, 20 Apr 2018 21:29:50 +0200 Subject: [PATCH 149/380] update minimum version to 1.18.0 because of env_logger --- saphyr/.travis.yml | 2 +- saphyr/appveyor.yml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/saphyr/.travis.yml b/saphyr/.travis.yml index 1a2be7b..75f44f7 100644 --- a/saphyr/.travis.yml +++ b/saphyr/.travis.yml @@ -1,7 +1,7 @@ language: rust rust: - 1.22.1 - - 1.16.0 + - 1.18.0 - beta - nightly matrix: diff --git a/saphyr/appveyor.yml b/saphyr/appveyor.yml index 5ee211c..240c3d9 100644 --- a/saphyr/appveyor.yml +++ b/saphyr/appveyor.yml @@ -1,6 +1,6 @@ install: - - ps: Start-FileDownload 'https://static.rust-lang.org/dist/rust-1.16.0-i686-pc-windows-gnu.exe' - - rust-1.16.0-i686-pc-windows-gnu.exe /VERYSILENT /NORESTART /DIR="C:\Program Files (x86)\Rust" + - ps: Start-FileDownload 'https://static.rust-lang.org/dist/rust-1.18.0-i686-pc-windows-gnu.exe' + - rust-1.18.0-i686-pc-windows-gnu.exe /VERYSILENT /NORESTART /DIR="C:\Program Files (x86)\Rust" - SET PATH=%PATH%;C:\Program Files (x86)\Rust\bin - SET PATH=%PATH%;C:\MinGW\bin - rustc -V From 2249926d8638bb98ae00158b5040d4d677d74cf8 Mon Sep 17 00:00:00 2001 From: Hendrik Sollich Date: Fri, 22 Jun 2018 20:56:54 +0200 Subject: [PATCH 150/380] Add tests to show expected indentation interpretation --- saphyr/src/yaml.rs | 57 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index b7b2b7c..a7ed23c 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -638,4 +638,61 @@ c: ~ let first = out.into_iter().next().unwrap(); assert_eq!(first[0]["important"].as_bool().unwrap(), true); } + + #[test] + fn test_indentation_equality() { + + let four_spaces = YamlLoader::load_from_str(r#" +hash: + with: + indentations +"#).unwrap().into_iter().next().unwrap(); + + let two_spaces = YamlLoader::load_from_str(r#" +hash: + with: + indentations +"#).unwrap().into_iter().next().unwrap(); + + let one_space = YamlLoader::load_from_str(r#" +hash: + with: + indentations +"#).unwrap().into_iter().next().unwrap(); + + let mixed_spaces = YamlLoader::load_from_str(r#" +hash: + with: + indentations +"#).unwrap().into_iter().next().unwrap(); + + assert_eq!(four_spaces, two_spaces); + assert_eq!(two_spaces, one_space); + assert_eq!(four_spaces, mixed_spaces); + } + + #[test] + fn test_two_space_indentations() { + // https://github.com/kbknapp/clap-rs/issues/965 + + let s = r#" +subcommands: + - server: + about: server related commands +subcommands2: + - server: + about: server related commands +subcommands3: + - server: + about: server related commands + "#; + + let out = YamlLoader::load_from_str(&s).unwrap(); + let doc = &out.into_iter().next().unwrap(); + + println!("{:#?}", doc); + assert_eq!(doc["subcommands"][0]["server"], Yaml::Null); + assert!(doc["subcommands2"][0]["server"].as_hash().is_some()); + assert!(doc["subcommands3"][0]["server"].as_hash().is_some()); + } } From 6cc05040381dd177316462652481c171ad7558b9 Mon Sep 17 00:00:00 2001 From: Chen Yuheng Date: Thu, 30 Aug 2018 19:23:41 +0800 Subject: [PATCH 151/380] README: remove warning Fix #106 This library is production-ready. --- saphyr/README.md | 5 ----- 1 file changed, 5 deletions(-) diff --git a/saphyr/README.md b/saphyr/README.md index 75d8393..a012264 100644 --- a/saphyr/README.md +++ b/saphyr/README.md @@ -17,11 +17,6 @@ Rust 1.0.0 and nightly! See [Document](http://chyh1990.github.io/yaml-rust/doc/yaml_rust/) -> NOTE: This library is still under heavy development. - -> WARNING: This library needs more tests and it is NOT ready for -> parsing arbitrary user input from *untrusted source*. - ## Quick Start Add the following to the Cargo.toml of your project: From 1577b9c2248f6e9b327b46e86110af81f7667c43 Mon Sep 17 00:00:00 2001 From: Igor Gnatenko Date: Sun, 2 Sep 2018 18:13:50 +0200 Subject: [PATCH 152/380] Update quickcheck to 0.7 --- saphyr/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/saphyr/Cargo.toml b/saphyr/Cargo.toml index f00d0a9..545c20d 100644 --- a/saphyr/Cargo.toml +++ b/saphyr/Cargo.toml @@ -13,4 +13,4 @@ repository = "https://github.com/chyh1990/yaml-rust" linked-hash-map = ">=0.0.9, <0.6" [dev-dependencies] -quickcheck = "0.6" +quickcheck = "0.7" From d3dadda7042981029dc84112ff5afdef33e1f0f7 Mon Sep 17 00:00:00 2001 From: Chen Yuheng Date: Tue, 4 Sep 2018 16:16:37 +0800 Subject: [PATCH 153/380] Update .travis.yml update ci to rust 1.19 for regex. --- saphyr/.travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/saphyr/.travis.yml b/saphyr/.travis.yml index 75f44f7..dbeac58 100644 --- a/saphyr/.travis.yml +++ b/saphyr/.travis.yml @@ -1,7 +1,7 @@ language: rust rust: - 1.22.1 - - 1.18.0 + - 1.19.0 - beta - nightly matrix: From b17fbe703c9a7c3095b3760c0ad74a8caee2877e Mon Sep 17 00:00:00 2001 From: Konrad Borowski Date: Sat, 15 Sep 2018 12:33:26 +0200 Subject: [PATCH 154/380] Prevent too deep recursion --- saphyr/src/scanner.rs | 10 ++++++---- saphyr/src/yaml.rs | 12 ++++++++++++ 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index 8919c4a..56496d3 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -149,7 +149,7 @@ pub struct Scanner { simple_keys: Vec, indent: isize, indents: Vec, - flow_level: usize, + flow_level: u8, tokens_parsed: usize, token_available: bool, } @@ -906,7 +906,7 @@ impl> Scanner { // The indicators '[' and '{' may start a simple key. try!(self.save_simple_key()); - self.increase_flow_level(); + self.increase_flow_level()?; self.allow_simple_key(); @@ -941,9 +941,11 @@ impl> Scanner { Ok(()) } - fn increase_flow_level(&mut self) { + fn increase_flow_level(&mut self) -> ScanResult { self.simple_keys.push(SimpleKey::new(Marker::new(0,0,0))); - self.flow_level += 1; + self.flow_level = self.flow_level.checked_add(1) + .ok_or_else(|| ScanError::new(self.mark, "Recursion limit exceeded"))?; + Ok(()) } fn decrease_flow_level(&mut self) { if self.flow_level > 0 { diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index b7b2b7c..6652abb 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -638,4 +638,16 @@ c: ~ let first = out.into_iter().next().unwrap(); assert_eq!(first[0]["important"].as_bool().unwrap(), true); } + + #[test] + fn test_recursion_depth_check_objects() { + let s = "{a:".repeat(10_000) + &"}".repeat(10_000); + assert!(YamlLoader::load_from_str(&s).is_err()); + } + + #[test] + fn test_recursion_depth_check_arrays() { + let s = "[".repeat(10_000) + &"]".repeat(10_000); + assert!(YamlLoader::load_from_str(&s).is_err()); + } } From ce1831ec39d7ed3e93499a102597898098897c2b Mon Sep 17 00:00:00 2001 From: David Tolnay Date: Sat, 15 Sep 2018 09:24:52 -0700 Subject: [PATCH 155/380] Raise the tested rustc version to 1.22.1 --- saphyr/.travis.yml | 1 - saphyr/appveyor.yml | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/saphyr/.travis.yml b/saphyr/.travis.yml index dbeac58..ca4a4de 100644 --- a/saphyr/.travis.yml +++ b/saphyr/.travis.yml @@ -1,7 +1,6 @@ language: rust rust: - 1.22.1 - - 1.19.0 - beta - nightly matrix: diff --git a/saphyr/appveyor.yml b/saphyr/appveyor.yml index 240c3d9..8f2e88c 100644 --- a/saphyr/appveyor.yml +++ b/saphyr/appveyor.yml @@ -1,6 +1,6 @@ install: - - ps: Start-FileDownload 'https://static.rust-lang.org/dist/rust-1.18.0-i686-pc-windows-gnu.exe' - - rust-1.18.0-i686-pc-windows-gnu.exe /VERYSILENT /NORESTART /DIR="C:\Program Files (x86)\Rust" + - ps: Start-FileDownload 'https://static.rust-lang.org/dist/rust-1.22.1-i686-pc-windows-gnu.exe' + - rust-1.22.1-i686-pc-windows-gnu.exe /VERYSILENT /NORESTART /DIR="C:\Program Files (x86)\Rust" - SET PATH=%PATH%;C:\Program Files (x86)\Rust\bin - SET PATH=%PATH%;C:\MinGW\bin - rustc -V From 5911cb864eef0aff160ad48b09fa2dbe107e8270 Mon Sep 17 00:00:00 2001 From: David Tolnay Date: Sat, 15 Sep 2018 09:27:26 -0700 Subject: [PATCH 156/380] Switch to matrix style CI target list To allow specifying a different script for each. --- saphyr/.travis.yml | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/saphyr/.travis.yml b/saphyr/.travis.yml index ca4a4de..e45406c 100644 --- a/saphyr/.travis.yml +++ b/saphyr/.travis.yml @@ -1,14 +1,17 @@ language: rust -rust: - - 1.22.1 - - beta - - nightly + matrix: + include: + - rust: 1.22.1 + - rust: beta + - rust: nightly allow_failures: - rust: nightly + env: global: - secure: ZUcdcbS8xbpdII9FSPx7VtoVhEkJhWL2Hb75tDlKDHNhfXqmt1NyB9q/2qXJ5Ulp4MnYXwsI8LsDloR6gvdB4xElay3smuF/neGvMjrqcB15/2p0MSQ+kZjMsNB6mlb5kAlm8ahduXIscppmw/V+m5hn3Vo+RQz/Ng+pzv0nc8KEXPMYrfRFg+a7FaeIbRbb8ir9EfflUSqArLq2hbi2WdhM3hFMcCIAUt6DD4x5ubjEg60OnIof5FDu0mXMXzQvUfHWOeYnsNcD/DLyDnm6FuQEzk37M4EB8op2SdBUeQMQ5abR3i2rd//DZpbTTEjud0PseWohGAwTwL2aoFrqs7uYQMx+vcGlOzAyDUm4VemVUa3F2BECdzU5BiujcKOITJEVUYWongld93arQq34FuXG/TO/T1XrerxfG6LTkTkKS5Vz7W8z6Rloa99WrQLJg1ZJP6itEU7G7KsDFVgRhsg7rz4/dV/2+cV4UvIwd4HlGXKCFlH0SClqvM3/7i/qqCD0689SJW6Zip+ly38MXlGy2s/AmReEasXvFer9JkOEIuPa8QTBNAjDlw7bWXi6neQWBIZU1VhZcSssnrVmEFN8fNklShzpw5DyKCv8jPTx2O6Dw8B/LgIK8uo+eaTXiO6zz/T1c/qEdsYslvxPA2D3F+ONpPU7238ykT4eRog= + script: - cargo build --verbose - | From 5ae7c6d11917ec824c6d0a305e5f07bf35fc856e Mon Sep 17 00:00:00 2001 From: David Tolnay Date: Sat, 15 Sep 2018 09:28:02 -0700 Subject: [PATCH 157/380] Remove travis secure token Since we are no longer publishing docs from Travis. --- saphyr/.travis.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/saphyr/.travis.yml b/saphyr/.travis.yml index e45406c..50efccd 100644 --- a/saphyr/.travis.yml +++ b/saphyr/.travis.yml @@ -8,10 +8,6 @@ matrix: allow_failures: - rust: nightly -env: - global: - - secure: ZUcdcbS8xbpdII9FSPx7VtoVhEkJhWL2Hb75tDlKDHNhfXqmt1NyB9q/2qXJ5Ulp4MnYXwsI8LsDloR6gvdB4xElay3smuF/neGvMjrqcB15/2p0MSQ+kZjMsNB6mlb5kAlm8ahduXIscppmw/V+m5hn3Vo+RQz/Ng+pzv0nc8KEXPMYrfRFg+a7FaeIbRbb8ir9EfflUSqArLq2hbi2WdhM3hFMcCIAUt6DD4x5ubjEg60OnIof5FDu0mXMXzQvUfHWOeYnsNcD/DLyDnm6FuQEzk37M4EB8op2SdBUeQMQ5abR3i2rd//DZpbTTEjud0PseWohGAwTwL2aoFrqs7uYQMx+vcGlOzAyDUm4VemVUa3F2BECdzU5BiujcKOITJEVUYWongld93arQq34FuXG/TO/T1XrerxfG6LTkTkKS5Vz7W8z6Rloa99WrQLJg1ZJP6itEU7G7KsDFVgRhsg7rz4/dV/2+cV4UvIwd4HlGXKCFlH0SClqvM3/7i/qqCD0689SJW6Zip+ly38MXlGy2s/AmReEasXvFer9JkOEIuPa8QTBNAjDlw7bWXi6neQWBIZU1VhZcSssnrVmEFN8fNklShzpw5DyKCv8jPTx2O6Dw8B/LgIK8uo+eaTXiO6zz/T1c/qEdsYslvxPA2D3F+ONpPU7238ykT4eRog= - script: - cargo build --verbose - | From e359ad16de540db9f72727054d143000692a3054 Mon Sep 17 00:00:00 2001 From: David Tolnay Date: Sat, 15 Sep 2018 09:28:56 -0700 Subject: [PATCH 158/380] Add a build-only build on rustc 1.13.0 --- saphyr/.travis.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/saphyr/.travis.yml b/saphyr/.travis.yml index 50efccd..3526957 100644 --- a/saphyr/.travis.yml +++ b/saphyr/.travis.yml @@ -5,6 +5,8 @@ matrix: - rust: 1.22.1 - rust: beta - rust: nightly + - rust: 1.13.0 + script: cargo build allow_failures: - rust: nightly From 389ffe24a1440ba9db8dc3d7860cc7481eda4e03 Mon Sep 17 00:00:00 2001 From: David Tolnay Date: Sat, 15 Sep 2018 09:29:35 -0700 Subject: [PATCH 159/380] Add a travis build on rust stable --- saphyr/.travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/saphyr/.travis.yml b/saphyr/.travis.yml index 3526957..00c1406 100644 --- a/saphyr/.travis.yml +++ b/saphyr/.travis.yml @@ -3,6 +3,7 @@ language: rust matrix: include: - rust: 1.22.1 + - rust: stable - rust: beta - rust: nightly - rust: 1.13.0 From decf69540de7e43b3fbaf9b1aaa679519ed5eb0c Mon Sep 17 00:00:00 2001 From: David Tolnay Date: Sat, 15 Sep 2018 09:31:41 -0700 Subject: [PATCH 160/380] Add a dedicated clippy builder --- saphyr/.travis.yml | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/saphyr/.travis.yml b/saphyr/.travis.yml index 00c1406..fe31ed0 100644 --- a/saphyr/.travis.yml +++ b/saphyr/.travis.yml @@ -8,14 +8,12 @@ matrix: - rust: nightly - rust: 1.13.0 script: cargo build + - rust: nightly + script: rustup component add clippy-preview && cargo clippy -- -Dclippy allow_failures: - rust: nightly + script: rustup component add clippy-preview && cargo clippy -- -Dclippy script: - cargo build --verbose - - | - if [ "$TRAVIS_RUST_VERSION" = nightly ]; then - cargo install clippy --debug - cargo clippy -- -Dclippy - fi - cargo test --verbose From 5929fdbd9007ed6deb9e61076b65b1a50dc77977 Mon Sep 17 00:00:00 2001 From: David Tolnay Date: Sat, 15 Sep 2018 09:32:38 -0700 Subject: [PATCH 161/380] Hide verbose output in travis --- saphyr/.travis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/saphyr/.travis.yml b/saphyr/.travis.yml index fe31ed0..e2333c1 100644 --- a/saphyr/.travis.yml +++ b/saphyr/.travis.yml @@ -2,12 +2,12 @@ language: rust matrix: include: - - rust: 1.22.1 - rust: stable - rust: beta - rust: nightly - rust: 1.13.0 script: cargo build + - rust: 1.22.1 - rust: nightly script: rustup component add clippy-preview && cargo clippy -- -Dclippy allow_failures: @@ -15,5 +15,5 @@ matrix: script: rustup component add clippy-preview && cargo clippy -- -Dclippy script: - - cargo build --verbose - - cargo test --verbose + - cargo build + - cargo test From f26a44bcdc9a9388f58714ab86ca27f4023912d6 Mon Sep 17 00:00:00 2001 From: David Tolnay Date: Sat, 15 Sep 2018 09:49:04 -0700 Subject: [PATCH 162/380] Format with rustfmt 0.99.4 --- saphyr/examples/dump_yaml.rs | 4 +- saphyr/src/emitter.rs | 146 +++++---- saphyr/src/lib.rs | 16 +- saphyr/src/parser.rs | 294 ++++++++++--------- saphyr/src/scanner.rs | 552 ++++++++++++++++++++--------------- saphyr/src/yaml.rs | 172 ++++++----- saphyr/tests/quickcheck.rs | 2 +- saphyr/tests/spec_test.rs | 131 +++++---- 8 files changed, 733 insertions(+), 584 deletions(-) diff --git a/saphyr/examples/dump_yaml.rs b/saphyr/examples/dump_yaml.rs index 5f2e306..8fce0f3 100644 --- a/saphyr/examples/dump_yaml.rs +++ b/saphyr/examples/dump_yaml.rs @@ -17,14 +17,14 @@ fn dump_node(doc: &yaml::Yaml, indent: usize) { for x in v { dump_node(x, indent + 1); } - }, + } yaml::Yaml::Hash(ref h) => { for (k, v) in h { print_indent(indent); println!("{:?}:", k); dump_node(v, indent + 1); } - }, + } _ => { print_indent(indent); println!("{:?}", doc); diff --git a/saphyr/src/emitter.rs b/saphyr/src/emitter.rs index 68fe8a4..cb238be 100644 --- a/saphyr/src/emitter.rs +++ b/saphyr/src/emitter.rs @@ -1,13 +1,12 @@ -use std::fmt::{self, Display}; use std::convert::From; use std::error::Error; +use std::fmt::{self, Display}; use yaml::{Hash, Yaml}; - #[derive(Copy, Clone, Debug)] pub enum EmitError { - FmtError(fmt::Error), - BadHashmapKey, + FmtError(fmt::Error), + BadHashmapKey, } impl Error for EmitError { @@ -91,7 +90,7 @@ fn escape_str(wr: &mut fmt::Write, v: &str) -> Result<(), fmt::Error> { b'\x1e' => "\\u001e", b'\x1f' => "\\u001f", b'\x7f' => "\\u007f", - _ => { continue; } + _ => continue, }; if start < i { @@ -118,7 +117,7 @@ impl<'a> YamlEmitter<'a> { best_indent: 2, compact: true, - level: -1 + level: -1, } } @@ -131,12 +130,12 @@ impl<'a> YamlEmitter<'a> { /// or tags), which should be OK, because this emitter doesn't /// (currently) emit those anyways. pub fn compact(&mut self, compact: bool) { - self.compact = compact; + self.compact = compact; } /// Determine if this emitter is using 'compact inline notation'. pub fn is_compact(&self) -> bool { - self.compact + self.compact } pub fn dump(&mut self, doc: &Yaml) -> EmitResult { @@ -147,7 +146,9 @@ impl<'a> YamlEmitter<'a> { } fn write_indent(&mut self) -> EmitResult { - if self.level <= 0 { return Ok(()); } + if self.level <= 0 { + return Ok(()); + } for _ in 0..self.level { for _ in 0..self.best_indent { try!(write!(self.writer, " ")); @@ -163,12 +164,11 @@ impl<'a> YamlEmitter<'a> { Yaml::String(ref v) => { if need_quotes(v) { try!(escape_str(self.writer, v)); - } - else { + } else { try!(write!(self.writer, "{}", v)); } Ok(()) - }, + } Yaml::Boolean(v) => { if v { try!(self.writer.write_str("true")); @@ -176,21 +176,21 @@ impl<'a> YamlEmitter<'a> { try!(self.writer.write_str("false")); } Ok(()) - }, + } Yaml::Integer(v) => { try!(write!(self.writer, "{}", v)); Ok(()) - }, + } Yaml::Real(ref v) => { try!(write!(self.writer, "{}", v)); Ok(()) - }, + } Yaml::Null | Yaml::BadValue => { try!(write!(self.writer, "~")); Ok(()) - }, + } // XXX(chenyh) Alias - _ => { Ok(()) } + _ => Ok(()), } } @@ -219,24 +219,24 @@ impl<'a> YamlEmitter<'a> { self.level += 1; for (cnt, (k, v)) in h.iter().enumerate() { let complex_key = match *k { - Yaml::Hash(_) | Yaml::Array(_) => true, - _ => false, + Yaml::Hash(_) | Yaml::Array(_) => true, + _ => false, }; if cnt > 0 { try!(write!(self.writer, "\n")); try!(self.write_indent()); } if complex_key { - try!(write!(self.writer, "?")); - try!(self.emit_val(true, k)); - try!(write!(self.writer, "\n")); - try!(self.write_indent()); - try!(write!(self.writer, ":")); - try!(self.emit_val(true, v)); + try!(write!(self.writer, "?")); + try!(self.emit_val(true, k)); + try!(write!(self.writer, "\n")); + try!(self.write_indent()); + try!(write!(self.writer, ":")); + try!(self.emit_val(true, v)); } else { - try!(self.emit_node(k)); - try!(write!(self.writer, ":")); - try!(self.emit_val(false, v)); + try!(self.emit_node(k)); + try!(write!(self.writer, ":")); + try!(self.emit_val(false, v)); } } self.level -= 1; @@ -260,7 +260,7 @@ impl<'a> YamlEmitter<'a> { self.level -= 1; } self.emit_array(v) - }, + } Yaml::Hash(ref h) => { if (inline && self.compact) || h.is_empty() { try!(write!(self.writer, " ")); @@ -271,7 +271,7 @@ impl<'a> YamlEmitter<'a> { self.level -= 1; } self.emit_hash(h) - }, + } _ => { try!(write!(self.writer, " ")); self.emit_node(val) @@ -296,37 +296,48 @@ impl<'a> YamlEmitter<'a> { /// * When the string looks like a date (e.g. 2014-12-31) (otherwise it would be automatically converted into a Unix timestamp). fn need_quotes(string: &str) -> bool { fn need_quotes_spaces(string: &str) -> bool { - string.starts_with(' ') - || string.ends_with(' ') + string.starts_with(' ') || string.ends_with(' ') } string == "" - || need_quotes_spaces(string) - || string.starts_with(|character: char| { - match character { + || need_quotes_spaces(string) + || string.starts_with(|character: char| match character { ':' | '&' | '*' | '?' | '|' | '-' | '<' | '>' | '=' | '!' | '%' | '@' => true, _ => false, - } - }) - || string.contains(|character: char| { - match character { - '{' | '}' | '[' | ']' | ',' | '#' | '`' | '\"' | '\'' | '\\' | '\0' ... '\x06' | '\t' | '\n' | '\r' | '\x0e' ... '\x1a' | '\x1c' ... '\x1f' => true, + }) + || string.contains(|character: char| match character { + '{' + | '}' + | '[' + | ']' + | ',' + | '#' + | '`' + | '\"' + | '\'' + | '\\' + | '\0'...'\x06' + | '\t' + | '\n' + | '\r' + | '\x0e'...'\x1a' + | '\x1c'...'\x1f' => true, _ => false, - } - }) - || [// http://yaml.org/type/bool.html - // Note: 'y', 'Y', 'n', 'N', is not quoted deliberately, as in libyaml. PyYAML also parse - // them as string, not booleans, although it is volating the YAML 1.1 specification. - // See https://github.com/dtolnay/serde-yaml/pull/83#discussion_r152628088. - "yes","Yes","YES","no","No","NO", - "True", "TRUE", "true", "False", "FALSE", "false", - "on","On","ON","off","Off","OFF", - // http://yaml.org/type/null.html - "null","Null","NULL", "~" - ].contains(&string) - || string.starts_with('.') - || string.parse::().is_ok() - || string.parse::().is_ok() + }) + || [ + // http://yaml.org/type/bool.html + // Note: 'y', 'Y', 'n', 'N', is not quoted deliberately, as in libyaml. PyYAML also parse + // them as string, not booleans, although it is volating the YAML 1.1 specification. + // See https://github.com/dtolnay/serde-yaml/pull/83#discussion_r152628088. + "yes", "Yes", "YES", "no", "No", "NO", "True", "TRUE", "true", "False", "FALSE", + "false", "on", "On", "ON", "off", "Off", "OFF", + // http://yaml.org/type/null.html + "null", "Null", "NULL", "~", + ] + .contains(&string) + || string.starts_with('.') + || string.parse::().is_ok() + || string.parse::().is_ok() } #[cfg(test)] @@ -349,7 +360,6 @@ a4: - 2 "; - let docs = YamlLoader::load_from_str(&s).unwrap(); let doc = &docs[0]; let mut writer = String::new(); @@ -361,7 +371,7 @@ a4: println!("emitted:\n{}", writer); let docs_new = match YamlLoader::load_from_str(&writer) { Ok(y) => y, - Err(e) => panic!(format!("{}", e)) + Err(e) => panic!(format!("{}", e)), }; let doc_new = &docs_new[0]; @@ -398,7 +408,7 @@ products: } let docs_new = match YamlLoader::load_from_str(&writer) { Ok(y) => y, - Err(e) => panic!(format!("{}", e)) + Err(e) => panic!(format!("{}", e)), }; let doc_new = &docs_new[0]; assert_eq!(doc, doc_new); @@ -506,21 +516,26 @@ bool1: false"#; emitter.dump(doc).unwrap(); } - assert_eq!(expected, writer, "expected:\n{}\nactual:\n{}\n", expected, writer); + assert_eq!( + expected, writer, + "expected:\n{}\nactual:\n{}\n", + expected, writer + ); } #[test] fn test_empty_and_nested() { - test_empty_and_nested_flag(false) + test_empty_and_nested_flag(false) } #[test] fn test_empty_and_nested_compact() { - test_empty_and_nested_flag(true) + test_empty_and_nested_flag(true) } fn test_empty_and_nested_flag(compact: bool) { - let s = if compact { r#"--- + let s = if compact { + r#"--- a: b: c: hello @@ -528,7 +543,9 @@ a: e: - f - g - - h: []"# } else { r#"--- + - h: []"# + } else { + r#"--- a: b: c: hello @@ -537,7 +554,8 @@ e: - f - g - - h: []"# }; + h: []"# + }; let docs = YamlLoader::load_from_str(&s).unwrap(); let doc = &docs[0]; diff --git a/saphyr/src/lib.rs b/saphyr/src/lib.rs index e16449c..36932d9 100644 --- a/saphyr/src/lib.rs +++ b/saphyr/src/lib.rs @@ -41,16 +41,16 @@ extern crate linked_hash_map; -pub mod yaml; -pub mod scanner; -pub mod parser; pub mod emitter; +pub mod parser; +pub mod scanner; +pub mod yaml; // reexport key APIs -pub use scanner::ScanError; +pub use emitter::{EmitError, YamlEmitter}; pub use parser::Event; +pub use scanner::ScanError; pub use yaml::{Yaml, YamlLoader}; -pub use emitter::{YamlEmitter, EmitError}; #[cfg(test)] mod tests { @@ -58,8 +58,7 @@ mod tests { #[test] fn test_api() { - let s = -" + let s = " # from yaml-cpp example - name: Ogre position: [0, 5, 0] @@ -104,8 +103,7 @@ mod tests { #[test] fn test_fail() { - let s = -" + let s = " # syntax error scalar key: [1, 2]] diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs index f8b04b5..190d84a 100644 --- a/saphyr/src/parser.rs +++ b/saphyr/src/parser.rs @@ -26,7 +26,7 @@ enum State { FlowMappingKey, FlowMappingValue, FlowMappingEmptyValue, - End + End, } /// `Event` is used with the low-level event base parsing API, @@ -48,7 +48,7 @@ pub enum Event { SequenceEnd, /// Anchor ID MappingStart(usize), - MappingEnd + MappingEnd, } impl Event { @@ -74,12 +74,10 @@ pub struct Parser { anchor_id: usize, } - pub trait EventReceiver { fn on_event(&mut self, ev: Event); } - pub trait MarkedEventReceiver { fn on_event(&mut self, ev: Event, _mark: Marker); } @@ -92,7 +90,7 @@ impl MarkedEventReceiver for R { pub type ParseResult = Result<(Event, Marker), ScanError>; -impl> Parser { +impl> Parser { pub fn new(src: T) -> Parser { Parser { scanner: Scanner::new(src), @@ -121,39 +119,37 @@ impl> Parser { pub fn next(&mut self) -> ParseResult { match self.current { None => self.parse(), - Some(_) => { - Ok(self.current.take().unwrap()) - } + Some(_) => Ok(self.current.take().unwrap()), } } fn peek_token(&mut self) -> Result<&Token, ScanError> { match self.token { - None => { + None => { self.token = Some(try!(self.scan_next_token())); Ok(self.token.as_ref().unwrap()) - }, - Some(ref tok) => Ok(tok) + } + Some(ref tok) => Ok(tok), } } fn scan_next_token(&mut self) -> Result { let token = self.scanner.next(); match token { - None => - match self.scanner.get_error() { - None => Err(ScanError::new(self.scanner.mark(), "unexpected eof")), - Some(e) => Err(e), - }, - Some(tok) => Ok(tok) + None => match self.scanner.get_error() { + None => Err(ScanError::new(self.scanner.mark(), "unexpected eof")), + Some(e) => Err(e), + }, + Some(tok) => Ok(tok), } } fn fetch_token(&mut self) -> Token { - self.token.take().expect("fetch_token needs to be preceded by peek_token") + self.token + .take() + .expect("fetch_token needs to be preceded by peek_token") } - fn skip(&mut self) { self.token = None; //self.peek_token(); @@ -174,8 +170,11 @@ impl> Parser { Ok((ev, mark)) } - pub fn load(&mut self, recv: &mut R, multi: bool) - -> Result<(), ScanError> { + pub fn load( + &mut self, + recv: &mut R, + multi: bool, + ) -> Result<(), ScanError> { if !self.scanner.stream_started() { let (ev, mark) = try!(self.next()); assert_eq!(ev, Event::StreamStart); @@ -203,8 +202,12 @@ impl> Parser { Ok(()) } - fn load_document(&mut self, first_ev: Event, mark: Marker, recv: &mut R) - -> Result<(), ScanError> { + fn load_document( + &mut self, + first_ev: Event, + mark: Marker, + recv: &mut R, + ) -> Result<(), ScanError> { assert_eq!(first_ev, Event::DocumentStart); recv.on_event(first_ev, mark); @@ -219,28 +222,33 @@ impl> Parser { Ok(()) } - fn load_node(&mut self, first_ev: Event, mark: Marker, recv: &mut R) - -> Result<(), ScanError> { + fn load_node( + &mut self, + first_ev: Event, + mark: Marker, + recv: &mut R, + ) -> Result<(), ScanError> { match first_ev { Event::Alias(..) | Event::Scalar(..) => { recv.on_event(first_ev, mark); Ok(()) - }, + } Event::SequenceStart(_) => { recv.on_event(first_ev, mark); self.load_sequence(recv) - }, + } Event::MappingStart(_) => { recv.on_event(first_ev, mark); self.load_mapping(recv) - }, - _ => { println!("UNREACHABLE EVENT: {:?}", first_ev); - unreachable!(); } + } + _ => { + println!("UNREACHABLE EVENT: {:?}", first_ev); + unreachable!(); + } } } - fn load_mapping(&mut self, recv: &mut R) - -> Result<(), ScanError> { + fn load_mapping(&mut self, recv: &mut R) -> Result<(), ScanError> { let (mut key_ev, mut key_mark) = try!(self.next()); while key_ev != Event::MappingEnd { // key @@ -254,14 +262,12 @@ impl> Parser { let (ev, mark) = try!(self.next()); key_ev = ev; key_mark = mark; - } recv.on_event(key_ev, key_mark); Ok(()) } - fn load_sequence(&mut self, recv: &mut R) - -> Result<(), ScanError> { + fn load_sequence(&mut self, recv: &mut R) -> Result<(), ScanError> { let (mut ev, mut mark) = try!(self.next()); while ev != Event::SequenceEnd { try!(self.load_node(ev, mark, recv)); @@ -289,7 +295,6 @@ impl> Parser { State::BlockNode => self.parse_node(true, false), // State::BlockNodeOrIndentlessSequence => self.parse_node(true, true), // State::FlowNode => self.parse_node(false, false), - State::BlockMappingFirstKey => self.block_mapping_key(true), State::BlockMappingKey => self.block_mapping_key(false), State::BlockMappingValue => self.block_mapping_value(), @@ -322,9 +327,8 @@ impl> Parser { self.state = State::ImplicitDocumentStart; self.skip(); Ok((Event::StreamStart, mark)) - }, - Token(mark, _) => Err(ScanError::new(mark, - "did not find expected ")), + } + Token(mark, _) => Err(ScanError::new(mark, "did not find expected ")), } } @@ -340,19 +344,19 @@ impl> Parser { self.state = State::End; self.skip(); Ok((Event::StreamEnd, mark)) - }, + } Token(_, TokenType::VersionDirective(..)) | Token(_, TokenType::TagDirective(..)) | Token(_, TokenType::DocumentStart) => { // explicit document self._explict_document_start() - }, + } Token(mark, _) if implicit => { try!(self.parser_process_directives()); self.push_state(State::DocumentEnd); self.state = State::BlockNode; Ok((Event::DocumentStart, mark)) - }, + } _ => { // explicit document self._explict_document_start() @@ -369,11 +373,11 @@ impl> Parser { // return Err(ScanError::new(tok.0, // "found incompatible YAML document")); //} - }, + } TokenType::TagDirective(..) => { // TODO add tag directive - }, - _ => break + } + _ => break, } self.skip(); } @@ -389,9 +393,12 @@ impl> Parser { self.state = State::DocumentContent; self.skip(); Ok((Event::DocumentStart, mark)) - } - Token(mark, _) => Err(ScanError::new(mark, "did not find expected ")) - } + } + Token(mark, _) => Err(ScanError::new( + mark, + "did not find expected ", + )), + } } fn document_content(&mut self) -> ParseResult { @@ -404,10 +411,8 @@ impl> Parser { self.pop_state(); // empty scalar Ok((Event::empty_scalar(), mark)) - }, - _ => { - self.parse_node(true, false) } + _ => self.parse_node(true, false), } } @@ -418,10 +423,10 @@ impl> Parser { self.skip(); _implicit = false; mark - }, - Token(mark, _) => mark + } + Token(mark, _) => mark, }; - + // TODO tag handling self.state = State::DocumentStart; Ok((Event::DocumentEnd, marker)) @@ -447,13 +452,18 @@ impl> Parser { self.pop_state(); if let Token(mark, TokenType::Alias(name)) = self.fetch_token() { match self.anchors.get(&name) { - None => return Err(ScanError::new(mark, "while parsing node, found unknown anchor")), - Some(id) => return Ok((Event::Alias(*id), mark)) + None => { + return Err(ScanError::new( + mark, + "while parsing node, found unknown anchor", + )) + } + Some(id) => return Ok((Event::Alias(*id), mark)), } } else { unreachable!() } - }, + } Token(_, TokenType::Anchor(_)) => { if let Token(mark, TokenType::Anchor(name)) = self.fetch_token() { anchor_id = try!(self.register_anchor(name, &mark)); @@ -467,7 +477,7 @@ impl> Parser { } else { unreachable!() } - }, + } Token(_, TokenType::Tag(..)) => { if let tg @ TokenType::Tag(..) = self.fetch_token().1 { tag = Some(tg); @@ -481,14 +491,14 @@ impl> Parser { } else { unreachable!() } - }, + } _ => {} } match *try!(self.peek_token()) { Token(mark, TokenType::BlockEntry) if indentless_sequence => { self.state = State::IndentlessSequenceEntry; Ok((Event::SequenceStart(anchor_id), mark)) - }, + } Token(_, TokenType::Scalar(..)) => { self.pop_state(); if let Token(mark, TokenType::Scalar(style, v)) = self.fetch_token() { @@ -496,29 +506,32 @@ impl> Parser { } else { unreachable!() } - }, + } Token(mark, TokenType::FlowSequenceStart) => { self.state = State::FlowSequenceFirstEntry; Ok((Event::SequenceStart(anchor_id), mark)) - }, + } Token(mark, TokenType::FlowMappingStart) => { self.state = State::FlowMappingFirstKey; Ok((Event::MappingStart(anchor_id), mark)) - }, + } Token(mark, TokenType::BlockSequenceStart) if block => { self.state = State::BlockSequenceFirstEntry; Ok((Event::SequenceStart(anchor_id), mark)) - }, + } Token(mark, TokenType::BlockMappingStart) if block => { self.state = State::BlockMappingFirstKey; Ok((Event::MappingStart(anchor_id), mark)) - }, + } // ex 7.2, an empty scalar can follow a secondary tag Token(mark, _) if tag.is_some() || anchor_id > 0 => { self.pop_state(); Ok((Event::empty_scalar_with_anchor(anchor_id, tag), mark)) - }, - Token(mark, _) => { Err(ScanError::new(mark, "while parsing a node, did not find expected node content")) } + } + Token(mark, _) => Err(ScanError::new( + mark, + "while parsing a node, did not find expected node content", + )), } } @@ -545,20 +558,21 @@ impl> Parser { self.parse_node(true, true) } } - }, + } // XXX(chenyh): libyaml failed to parse spec 1.2, ex8.18 Token(mark, TokenType::Value) => { self.state = State::BlockMappingValue; Ok((Event::empty_scalar(), mark)) - }, + } Token(mark, TokenType::BlockEnd) => { self.pop_state(); self.skip(); Ok((Event::MappingEnd, mark)) - }, - Token(mark, _) => { - Err(ScanError::new(mark, "while parsing a block mapping, did not find expected key")) } + Token(mark, _) => Err(ScanError::new( + mark, + "while parsing a block mapping, did not find expected key", + )), } } @@ -573,13 +587,13 @@ impl> Parser { self.state = State::BlockMappingKey; // empty scalar Ok((Event::empty_scalar(), mark)) - }, + } _ => { self.push_state(State::BlockMappingKey); self.parse_node(true, true) } } - }, + } Token(mark, _) => { self.state = State::BlockMappingKey; // empty scalar @@ -593,49 +607,50 @@ impl> Parser { let _ = try!(self.peek_token()); self.skip(); } - let marker: Marker = { - match *try!(self.peek_token()) { - Token(mark, TokenType::FlowMappingEnd) => mark, - Token(mark, _) => { - if !first { - match *try!(self.peek_token()) { + let marker: Marker = + { + match *try!(self.peek_token()) { + Token(mark, TokenType::FlowMappingEnd) => mark, + Token(mark, _) => { + if !first { + match *try!(self.peek_token()) { Token(_, TokenType::FlowEntry) => self.skip(), Token(mark, _) => return Err(ScanError::new(mark, "while parsing a flow mapping, did not find expected ',' or '}'")) } - } + } - match *try!(self.peek_token()) { - Token(_, TokenType::Key) => { - self.skip(); - match *try!(self.peek_token()) { - Token(mark, TokenType::Value) - | Token(mark, TokenType::FlowEntry) - | Token(mark, TokenType::FlowMappingEnd) => { - self.state = State::FlowMappingValue; - return Ok((Event::empty_scalar(), mark)); - }, - _ => { - self.push_state(State::FlowMappingValue); - return self.parse_node(false, false); + match *try!(self.peek_token()) { + Token(_, TokenType::Key) => { + self.skip(); + match *try!(self.peek_token()) { + Token(mark, TokenType::Value) + | Token(mark, TokenType::FlowEntry) + | Token(mark, TokenType::FlowMappingEnd) => { + self.state = State::FlowMappingValue; + return Ok((Event::empty_scalar(), mark)); + } + _ => { + self.push_state(State::FlowMappingValue); + return self.parse_node(false, false); + } } } - }, - Token(marker, TokenType::Value) => { - self.state = State::FlowMappingValue; - return Ok((Event::empty_scalar(), marker)); - }, - Token(_, TokenType::FlowMappingEnd) => (), - _ => { - self.push_state(State::FlowMappingEmptyValue); - return self.parse_node(false, false); + Token(marker, TokenType::Value) => { + self.state = State::FlowMappingValue; + return Ok((Event::empty_scalar(), marker)); + } + Token(_, TokenType::FlowMappingEnd) => (), + _ => { + self.push_state(State::FlowMappingEmptyValue); + return self.parse_node(false, false); + } } - } - mark + mark + } } - } - }; + }; self.pop_state(); self.skip(); @@ -653,20 +668,19 @@ impl> Parser { Token(marker, TokenType::Value) => { self.skip(); match try!(self.peek_token()).1 { - TokenType::FlowEntry - | TokenType::FlowMappingEnd => { }, + TokenType::FlowEntry | TokenType::FlowMappingEnd => {} _ => { self.push_state(State::FlowMappingKey); return self.parse_node(false, false); } } marker - }, - Token(marker, _) => marker + } + Token(marker, _) => marker, } } }; - + self.state = State::FlowMappingKey; Ok((Event::empty_scalar(), mark)) } @@ -683,13 +697,15 @@ impl> Parser { self.pop_state(); self.skip(); return Ok((Event::SequenceEnd, mark)); - }, + } Token(_, TokenType::FlowEntry) if !first => { self.skip(); - }, + } Token(mark, _) if !first => { - return Err(ScanError::new(mark, - "while parsing a flow sequence, expectd ',' or ']'")); + return Err(ScanError::new( + mark, + "while parsing a flow sequence, expectd ',' or ']'", + )); } _ => { /* next */ } } @@ -698,7 +714,7 @@ impl> Parser { self.pop_state(); self.skip(); Ok((Event::SequenceEnd, mark)) - }, + } Token(mark, TokenType::Key) => { self.state = State::FlowSequenceEntryMappingKey; self.skip(); @@ -727,7 +743,7 @@ impl> Parser { | Token(mark, TokenType::BlockEnd) => { self.state = State::IndentlessSequenceEntry; Ok((Event::empty_scalar(), mark)) - }, + } _ => { self.push_state(State::IndentlessSequenceEntry); self.parse_node(true, false) @@ -747,25 +763,24 @@ impl> Parser { self.pop_state(); self.skip(); Ok((Event::SequenceEnd, mark)) - }, + } Token(_, TokenType::BlockEntry) => { self.skip(); match *try!(self.peek_token()) { - Token(mark, TokenType::BlockEntry) - | Token(mark, TokenType::BlockEnd) => { + Token(mark, TokenType::BlockEntry) | Token(mark, TokenType::BlockEnd) => { self.state = State::BlockSequenceEntry; Ok((Event::empty_scalar(), mark)) - }, + } _ => { self.push_state(State::BlockSequenceEntry); self.parse_node(true, false) } } - }, - Token(mark, _) => { - Err(ScanError::new(mark, - "while parsing a block collection, did not find expected '-' indicator")) } + Token(mark, _) => Err(ScanError::new( + mark, + "while parsing a block collection, did not find expected '-' indicator", + )), } } @@ -777,7 +792,7 @@ impl> Parser { self.skip(); self.state = State::FlowSequenceEntryMappingValue; Ok((Event::empty_scalar(), mark)) - }, + } _ => { self.push_state(State::FlowSequenceEntryMappingValue); self.parse_node(false, false) @@ -788,20 +803,19 @@ impl> Parser { fn flow_sequence_entry_mapping_value(&mut self) -> ParseResult { match *try!(self.peek_token()) { Token(_, TokenType::Value) => { - self.skip(); - self.state = State::FlowSequenceEntryMappingValue; - match *try!(self.peek_token()) { - Token(mark, TokenType::FlowEntry) - | Token(mark, TokenType::FlowSequenceEnd) => { - self.state = State::FlowSequenceEntryMappingEnd; - Ok((Event::empty_scalar(), mark)) - }, - _ => { - self.push_state(State::FlowSequenceEntryMappingEnd); - self.parse_node(false, false) - } + self.skip(); + self.state = State::FlowSequenceEntryMappingValue; + match *try!(self.peek_token()) { + Token(mark, TokenType::FlowEntry) | Token(mark, TokenType::FlowSequenceEnd) => { + self.state = State::FlowSequenceEntryMappingEnd; + Ok((Event::empty_scalar(), mark)) } - }, + _ => { + self.push_state(State::FlowSequenceEntryMappingEnd); + self.parse_node(false, false) + } + } + } Token(mark, _) => { self.state = State::FlowSequenceEntryMappingEnd; Ok((Event::empty_scalar(), mark)) diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index 56496d3..366eee5 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -1,10 +1,10 @@ use std::collections::VecDeque; -use std::{char, fmt}; use std::error::Error; +use std::{char, fmt}; #[derive(Clone, Copy, PartialEq, Debug, Eq)] pub enum TEncoding { - Utf8 + Utf8, } #[derive(Clone, Copy, PartialEq, Debug, Eq)] @@ -15,7 +15,7 @@ pub enum TScalarStyle { DoubleQuoted, Literal, - Foled + Foled, } #[derive(Clone, Copy, PartialEq, Debug, Eq)] @@ -30,7 +30,7 @@ impl Marker { Marker { index: index, line: line, - col: col + col: col, } } @@ -57,7 +57,7 @@ impl ScanError { pub fn new(loc: Marker, info: &str) -> ScanError { ScanError { mark: loc, - info: info.to_owned() + info: info.to_owned(), } } @@ -79,8 +79,13 @@ impl Error for ScanError { impl fmt::Display for ScanError { // col starts from 0 fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - write!(formatter, "{} at line {} column {}", self.info, - self.mark.line, self.mark.col + 1) + write!( + formatter, + "{} at line {} column {}", + self.info, + self.mark.line, + self.mark.col + 1 + ) } } @@ -110,7 +115,7 @@ pub enum TokenType { Anchor(String), /// handle, suffix Tag(String, String), - Scalar(TScalarStyle, String) + Scalar(TScalarStyle, String), } #[derive(Clone, PartialEq, Debug, Eq)] @@ -154,7 +159,7 @@ pub struct Scanner { token_available: bool, } -impl> Iterator for Scanner { +impl> Iterator for Scanner { type Item = Token; fn next(&mut self) -> Option { if self.error.is_some() { @@ -199,14 +204,12 @@ fn is_alpha(c: char) -> bool { match c { '0'...'9' | 'a'...'z' | 'A'...'Z' => true, '_' | '-' => true, - _ => false + _ => false, } } #[inline] fn is_hex(c: char) -> bool { - (c >= '0' && c <= '9') - || (c >= 'a' && c <= 'f') - || (c >= 'A' && c <= 'F') + (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') } #[inline] fn as_hex(c: char) -> u32 { @@ -214,13 +217,13 @@ fn as_hex(c: char) -> u32 { '0'...'9' => (c as u32) - ('0' as u32), 'a'...'f' => (c as u32) - ('a' as u32) + 10, 'A'...'F' => (c as u32) - ('A' as u32) + 10, - _ => unreachable!() + _ => unreachable!(), } } pub type ScanResult = Result<(), ScanError>; -impl> Scanner { +impl> Scanner { /// Creates the YAML tokenizer. pub fn new(rdr: T) -> Scanner { Scanner { @@ -326,10 +329,10 @@ impl> Scanner { } } fn allow_simple_key(&mut self) { - self.simple_key_allowed = true; + self.simple_key_allowed = true; } fn disallow_simple_key(&mut self) { - self.simple_key_allowed = false; + self.simple_key_allowed = false; } pub fn fetch_next_token(&mut self) -> ScanResult { @@ -363,7 +366,8 @@ impl> Scanner { && self.buffer[0] == '-' && self.buffer[1] == '-' && self.buffer[2] == '-' - && is_blankz(self.buffer[3]) { + && is_blankz(self.buffer[3]) + { try!(self.fetch_document_indicator(TokenType::DocumentStart)); return Ok(()); } @@ -372,7 +376,8 @@ impl> Scanner { && self.buffer[0] == '.' && self.buffer[1] == '.' && self.buffer[2] == '.' - && is_blankz(self.buffer[3]) { + && is_blankz(self.buffer[3]) + { try!(self.fetch_document_indicator(TokenType::DocumentEnd)); return Ok(()); } @@ -402,8 +407,10 @@ impl> Scanner { // plain scalar '-' if !is_blankz(nc) => self.fetch_plain_scalar(), ':' | '?' if !is_blankz(nc) && self.flow_level == 0 => self.fetch_plain_scalar(), - '%' | '@' | '`' => Err(ScanError::new(self.mark, - &format!("unexpected character: `{}'", c))), + '%' | '@' | '`' => Err(ScanError::new( + self.mark, + &format!("unexpected character: `{}'", c), + )), _ => self.fetch_plain_scalar(), } } @@ -442,7 +449,9 @@ impl> Scanner { } } - if !need_more { break; } + if !need_more { + break; + } try!(self.fetch_next_token()); } self.token_available = true; @@ -452,13 +461,14 @@ impl> Scanner { fn stale_simple_keys(&mut self) -> ScanResult { for sk in &mut self.simple_keys { - if sk.possible && (sk.mark.line < self.mark.line - || sk.mark.index + 1024 < self.mark.index) { - if sk.required { - return Err(ScanError::new(self.mark, "simple key expect ':'")); - } - sk.possible = false; + if sk.possible + && (sk.mark.line < self.mark.line || sk.mark.index + 1024 < self.mark.index) + { + if sk.required { + return Err(ScanError::new(self.mark, "simple key expect ':'")); } + sk.possible = false; + } } Ok(()) } @@ -476,9 +486,12 @@ impl> Scanner { if self.flow_level == 0 { self.allow_simple_key(); } + } + '#' => while !is_breakz(self.ch()) { + self.skip(); + self.lookahead(1); }, - '#' => while !is_breakz(self.ch()) { self.skip(); self.lookahead(1); }, - _ => break + _ => break, } } } @@ -488,8 +501,9 @@ impl> Scanner { self.indent = -1; self.stream_start_produced = true; self.allow_simple_key(); - self.tokens.push_back(Token(mark, TokenType::StreamStart(TEncoding::Utf8))); - self.simple_keys.push(SimpleKey::new(Marker::new(0,0,0))); + self.tokens + .push_back(Token(mark, TokenType::StreamStart(TEncoding::Utf8))); + self.simple_keys.push(SimpleKey::new(Marker::new(0, 0, 0))); } fn fetch_stream_end(&mut self) -> ScanResult { @@ -503,7 +517,8 @@ impl> Scanner { try!(self.remove_simple_key()); self.disallow_simple_key(); - self.tokens.push_back(Token(self.mark, TokenType::StreamEnd)); + self.tokens + .push_back(Token(self.mark, TokenType::StreamEnd)); Ok(()) } @@ -526,12 +541,8 @@ impl> Scanner { let name = try!(self.scan_directive_name()); let tok = match name.as_ref() { - "YAML" => { - try!(self.scan_version_directive_value(&start_mark)) - }, - "TAG" => { - try!(self.scan_tag_directive_value(&start_mark)) - }, + "YAML" => try!(self.scan_version_directive_value(&start_mark)), + "TAG" => try!(self.scan_tag_directive_value(&start_mark)), // XXX This should be a warning instead of an error _ => { // skip current line @@ -541,7 +552,10 @@ impl> Scanner { self.lookahead(1); } // XXX return an empty TagDirective token - Token(start_mark, TokenType::TagDirective(String::new(), String::new())) + Token( + start_mark, + TokenType::TagDirective(String::new(), String::new()), + ) // return Err(ScanError::new(start_mark, // "while scanning a directive, found unknown directive name")) } @@ -561,8 +575,10 @@ impl> Scanner { } if !is_breakz(self.ch()) { - return Err(ScanError::new(start_mark, - "while scanning a directive, did not find expected comment or line break")); + return Err(ScanError::new( + start_mark, + "while scanning a directive, did not find expected comment or line break", + )); } // Eat a line break @@ -585,8 +601,10 @@ impl> Scanner { let major = try!(self.scan_version_directive_number(mark)); if self.ch() != '.' { - return Err(ScanError::new(*mark, - "while scanning a YAML directive, did not find expected digit or '.' character")); + return Err(ScanError::new( + *mark, + "while scanning a YAML directive, did not find expected digit or '.' character", + )); } self.skip(); @@ -607,13 +625,17 @@ impl> Scanner { } if string.is_empty() { - return Err(ScanError::new(start_mark, - "while scanning a directive, could not find expected directive name")); + return Err(ScanError::new( + start_mark, + "while scanning a directive, could not find expected directive name", + )); } if !is_blankz(self.ch()) { - return Err(ScanError::new(start_mark, - "while scanning a directive, found unexpected non-alphabetical character")); + return Err(ScanError::new( + start_mark, + "while scanning a directive, found unexpected non-alphabetical character", + )); } Ok(string) @@ -625,8 +647,10 @@ impl> Scanner { self.lookahead(1); while is_digit(self.ch()) { if length + 1 > 9 { - return Err(ScanError::new(*mark, - "while scanning a YAML directive, found extremely long version number")); + return Err(ScanError::new( + *mark, + "while scanning a YAML directive, found extremely long version number", + )); } length += 1; val = val * 10 + ((self.ch() as u32) - ('0' as u32)); @@ -635,8 +659,10 @@ impl> Scanner { } if length == 0 { - return Err(ScanError::new(*mark, - "while scanning a YAML directive, did not find expected version number")); + return Err(ScanError::new( + *mark, + "while scanning a YAML directive, did not find expected version number", + )); } Ok(val) @@ -666,8 +692,10 @@ impl> Scanner { if is_blankz(self.ch()) { Ok(Token(*mark, TokenType::TagDirective(handle, prefix))) } else { - Err(ScanError::new(*mark, - "while scanning TAG, did not find expected whitespace or line break")) + Err(ScanError::new( + *mark, + "while scanning TAG, did not find expected whitespace or line break", + )) } } @@ -696,8 +724,10 @@ impl> Scanner { suffix = try!(self.scan_tag_uri(false, false, &String::new(), &start_mark)); if self.ch() != '>' { - return Err(ScanError::new(start_mark, - "while scanning a tag, did not find the expected '>'")); + return Err(ScanError::new( + start_mark, + "while scanning a tag, did not find the expected '>'", + )); } self.skip(); @@ -727,8 +757,10 @@ impl> Scanner { // XXX: ex 7.2, an empty scalar can follow a secondary tag Ok(Token(start_mark, TokenType::Tag(handle, suffix))) } else { - Err(ScanError::new(start_mark, - "while scanning a tag, did not find expected whitespace or line break")) + Err(ScanError::new( + start_mark, + "while scanning a tag, did not find expected whitespace or line break", + )) } } @@ -736,8 +768,10 @@ impl> Scanner { let mut string = String::new(); self.lookahead(1); if self.ch() != '!' { - return Err(ScanError::new(*mark, - "while scanning a tag, did not find expected '!'")); + return Err(ScanError::new( + *mark, + "while scanning a tag, did not find expected '!'", + )); } string.push(self.ch()); @@ -758,14 +792,21 @@ impl> Scanner { // It's either the '!' tag or not really a tag handle. If it's a %TAG // directive, it's an error. If it's a tag token, it must be a part of // URI. - return Err(ScanError::new(*mark, - "while parsing a tag directive, did not find expected '!'")); + return Err(ScanError::new( + *mark, + "while parsing a tag directive, did not find expected '!'", + )); } Ok(string) } - fn scan_tag_uri(&mut self, directive: bool, _is_secondary: bool, - head: &str, mark: &Marker) -> Result { + fn scan_tag_uri( + &mut self, + directive: bool, + _is_secondary: bool, + head: &str, + mark: &Marker, + ) -> Result { let mut length = head.len(); let mut string = String::new(); @@ -788,7 +829,7 @@ impl> Scanner { '=' | '+' | '$' | ',' | '.' | '!' | '~' | '*' | '\'' | '(' | ')' | '[' | ']' => true, '%' => true, c if is_alpha(c) => true, - _ => false + _ => false, } { // Check if it is a URI-escape sequence. if self.ch() == '%' { @@ -803,25 +844,26 @@ impl> Scanner { } if length == 0 { - return Err(ScanError::new(*mark, - "while parsing a tag, did not find expected tag URI")); + return Err(ScanError::new( + *mark, + "while parsing a tag, did not find expected tag URI", + )); } Ok(string) } - fn scan_uri_escapes(&mut self, _directive: bool, mark: &Marker) - -> Result { + fn scan_uri_escapes(&mut self, _directive: bool, mark: &Marker) -> Result { let mut width = 0usize; let mut code = 0u32; loop { self.lookahead(3); - if !(self.ch() == '%' - && is_hex(self.buffer[1]) - && is_hex(self.buffer[2])) { - return Err(ScanError::new(*mark, - "while parsing a tag, did not find URI escaped octet")); + if !(self.ch() == '%' && is_hex(self.buffer[1]) && is_hex(self.buffer[2])) { + return Err(ScanError::new( + *mark, + "while parsing a tag, did not find URI escaped octet", + )); } let octet = (as_hex(self.buffer[1]) << 4) + as_hex(self.buffer[2]); @@ -832,15 +874,19 @@ impl> Scanner { _ if octet & 0xF0 == 0xE0 => 3, _ if octet & 0xF8 == 0xF0 => 4, _ => { - return Err(ScanError::new(*mark, - "while parsing a tag, found an incorrect leading UTF-8 octet")); + return Err(ScanError::new( + *mark, + "while parsing a tag, found an incorrect leading UTF-8 octet", + )); } }; code = octet; } else { if octet & 0xc0 != 0x80 { - return Err(ScanError::new(*mark, - "while parsing a tag, found an incorrect trailing UTF-8 octet")); + return Err(ScanError::new( + *mark, + "while parsing a tag, found an incorrect trailing UTF-8 octet", + )); } code = (code << 8) + octet; } @@ -857,8 +903,10 @@ impl> Scanner { match char::from_u32(code) { Some(ch) => Ok(ch), - None => Err(ScanError::new(*mark, - "while parsing a tag, found an invalid UTF-8 codepoint")) + None => Err(ScanError::new( + *mark, + "while parsing a tag, found an invalid UTF-8 codepoint", + )), } } @@ -873,8 +921,7 @@ impl> Scanner { Ok(()) } - fn scan_anchor(&mut self, alias: bool) - -> Result { + fn scan_anchor(&mut self, alias: bool) -> Result { let mut string = String::new(); let start_mark = self.mark; @@ -886,12 +933,11 @@ impl> Scanner { self.lookahead(1); } - if string.is_empty() - || match self.ch() { - c if is_blankz(c) => false, - '?' | ':' | ',' | ']' | '}' | '%' | '@' | '`' => false, - _ => true - } { + if string.is_empty() || match self.ch() { + c if is_blankz(c) => false, + '?' | ':' | ',' | ']' | '}' | '%' | '@' | '`' => false, + _ => true, + } { return Err(ScanError::new(start_mark, "while scanning an anchor or alias, did not find expected alphabetic or numeric character")); } @@ -902,7 +948,7 @@ impl> Scanner { } } - fn fetch_flow_collection_start(&mut self, tok :TokenType) -> ScanResult { + fn fetch_flow_collection_start(&mut self, tok: TokenType) -> ScanResult { // The indicators '[' and '{' may start a simple key. try!(self.save_simple_key()); @@ -917,7 +963,7 @@ impl> Scanner { Ok(()) } - fn fetch_flow_collection_end(&mut self, tok :TokenType) -> ScanResult { + fn fetch_flow_collection_end(&mut self, tok: TokenType) -> ScanResult { try!(self.remove_simple_key()); self.decrease_flow_level(); @@ -937,13 +983,16 @@ impl> Scanner { let start_mark = self.mark; self.skip(); - self.tokens.push_back(Token(start_mark, TokenType::FlowEntry)); + self.tokens + .push_back(Token(start_mark, TokenType::FlowEntry)); Ok(()) } fn increase_flow_level(&mut self) -> ScanResult { - self.simple_keys.push(SimpleKey::new(Marker::new(0,0,0))); - self.flow_level = self.flow_level.checked_add(1) + self.simple_keys.push(SimpleKey::new(Marker::new(0, 0, 0))); + self.flow_level = self + .flow_level + .checked_add(1) .ok_or_else(|| ScanError::new(self.mark, "Recursion limit exceeded"))?; Ok(()) } @@ -958,8 +1007,10 @@ impl> Scanner { if self.flow_level == 0 { // Check if we are allowed to start a new entry. if !self.simple_key_allowed { - return Err(ScanError::new(self.mark, - "block sequence entries are not allowed in this context")); + return Err(ScanError::new( + self.mark, + "block sequence entries are not allowed in this context", + )); } let mark = self.mark; @@ -967,7 +1018,10 @@ impl> Scanner { self.roll_indent(mark.col, None, TokenType::BlockSequenceStart, mark); } else { // - * only allowed in block - return Err(ScanError::new(self.mark, r#""-" is only valid inside a block"#)) + return Err(ScanError::new( + self.mark, + r#""-" is only valid inside a block"#, + )); } try!(self.remove_simple_key()); self.allow_simple_key(); @@ -975,7 +1029,8 @@ impl> Scanner { let start_mark = self.mark; self.skip(); - self.tokens.push_back(Token(start_mark, TokenType::BlockEntry)); + self.tokens + .push_back(Token(start_mark, TokenType::BlockEntry)); Ok(()) } @@ -1029,16 +1084,20 @@ impl> Scanner { self.lookahead(1); if is_digit(self.ch()) { if self.ch() == '0' { - return Err(ScanError::new(start_mark, - "while scanning a block scalar, found an intendation indicator equal to 0")); + return Err(ScanError::new( + start_mark, + "while scanning a block scalar, found an intendation indicator equal to 0", + )); } increment = (self.ch() as usize) - ('0' as usize); self.skip(); } } else if is_digit(self.ch()) { if self.ch() == '0' { - return Err(ScanError::new(start_mark, - "while scanning a block scalar, found an intendation indicator equal to 0")); + return Err(ScanError::new( + start_mark, + "while scanning a block scalar, found an intendation indicator equal to 0", + )); } increment = (self.ch() as usize) - ('0' as usize); @@ -1071,8 +1130,10 @@ impl> Scanner { // Check if we are at the end of the line. if !is_breakz(self.ch()) { - return Err(ScanError::new(start_mark, - "while scanning a block scalar, did not find expected comment or line break")); + return Err(ScanError::new( + start_mark, + "while scanning a block scalar, did not find expected comment or line break", + )); } if is_break(self.ch()) { @@ -1081,7 +1142,11 @@ impl> Scanner { } if increment > 0 { - indent = if self.indent >= 0 { (self.indent + increment as isize) as usize } else { increment } + indent = if self.indent >= 0 { + (self.indent + increment as isize) as usize + } else { + increment + } } // Scan the leading line breaks and determine the indentation level if needed. try!(self.block_scalar_breaks(&mut indent, &mut trailing_breaks)); @@ -1093,12 +1158,11 @@ impl> Scanner { while self.mark.col == indent && !is_z(self.ch()) { // We are at the beginning of a non-empty line. trailing_blank = is_blank(self.ch()); - if !literal && !leading_break.is_empty() - && !leading_blank && !trailing_blank { - if trailing_breaks.is_empty() { - string.push(' '); - } - leading_break.clear(); + if !literal && !leading_break.is_empty() && !leading_blank && !trailing_blank { + if trailing_breaks.is_empty() { + string.push(' '); + } + leading_break.clear(); } else { string.push_str(&leading_break); leading_break.clear(); @@ -1115,7 +1179,9 @@ impl> Scanner { self.lookahead(1); } // break on EOF - if is_z(self.ch()) { break; } + if is_z(self.ch()) { + break; + } self.lookahead(2); self.read_break(&mut leading_break); @@ -1134,9 +1200,15 @@ impl> Scanner { } if literal { - Ok(Token(start_mark, TokenType::Scalar(TScalarStyle::Literal, string))) + Ok(Token( + start_mark, + TokenType::Scalar(TScalarStyle::Literal, string), + )) } else { - Ok(Token(start_mark, TokenType::Scalar(TScalarStyle::Foled, string))) + Ok(Token( + start_mark, + TokenType::Scalar(TScalarStyle::Foled, string), + )) } } @@ -1144,10 +1216,9 @@ impl> Scanner { let mut max_indent = 0; loop { self.lookahead(1); - while (*indent == 0 || self.mark.col < *indent) - && self.buffer[0] == ' ' { - self.skip(); - self.lookahead(1); + while (*indent == 0 || self.mark.col < *indent) && self.buffer[0] == ' ' { + self.skip(); + self.lookahead(1); } if self.mark.col > max_indent { @@ -1155,8 +1226,7 @@ impl> Scanner { } // Check for a tab character messing the intendation. - if (*indent == 0 || self.mark.col < *indent) - && self.buffer[0] == '\t' { + if (*indent == 0 || self.mark.col < *indent) && self.buffer[0] == '\t' { return Err(ScanError::new(self.mark, "while scanning a block scalar, found a tab character where an intendation space is expected")); } @@ -1208,21 +1278,24 @@ impl> Scanner { /* Check for a document indicator. */ self.lookahead(4); - if self.mark.col == 0 && - (((self.buffer[0] == '-') && - (self.buffer[1] == '-') && - (self.buffer[2] == '-')) || - ((self.buffer[0] == '.') && - (self.buffer[1] == '.') && - (self.buffer[2] == '.'))) && - is_blankz(self.buffer[3]) { - return Err(ScanError::new(start_mark, - "while scanning a quoted scalar, found unexpected document indicator")); - } + if self.mark.col == 0 + && (((self.buffer[0] == '-') && (self.buffer[1] == '-') && (self.buffer[2] == '-')) + || ((self.buffer[0] == '.') + && (self.buffer[1] == '.') + && (self.buffer[2] == '.'))) + && is_blankz(self.buffer[3]) + { + return Err(ScanError::new( + start_mark, + "while scanning a quoted scalar, found unexpected document indicator", + )); + } if is_z(self.ch()) { - return Err(ScanError::new(start_mark, - "while scanning a quoted scalar, found unexpected end of stream")); + return Err(ScanError::new( + start_mark, + "while scanning a quoted scalar, found unexpected end of stream", + )); } self.lookahead(2); @@ -1237,10 +1310,10 @@ impl> Scanner { string.push('\''); self.skip(); self.skip(); - }, + } // Check for the right quote. - '\'' if single => { break; }, - '"' if !single => { break; }, + '\'' if single => break, + '"' if !single => break, // Check for an escaped line break. '\\' if !single && is_break(self.buffer[1]) => { self.lookahead(3); @@ -1277,8 +1350,12 @@ impl> Scanner { 'x' => code_length = 2, 'u' => code_length = 4, 'U' => code_length = 8, - _ => return Err(ScanError::new(start_mark, - "while parsing a quoted scalar, found unknown escape character")) + _ => { + return Err(ScanError::new( + start_mark, + "while parsing a quoted scalar, found unknown escape character", + )) + } } self.skip(); self.skip(); @@ -1307,15 +1384,18 @@ impl> Scanner { self.skip(); } } - }, - c => { string.push(c); self.skip(); } + } + c => { + string.push(c); + self.skip(); + } } self.lookahead(2); } self.lookahead(1); match self.ch() { - '\'' if single => { break; }, - '"' if !single => { break; }, + '\'' if single => break, + '"' if !single => break, _ => {} } @@ -1368,9 +1448,15 @@ impl> Scanner { self.skip(); if single { - Ok(Token(start_mark, TokenType::Scalar(TScalarStyle::SingleQuoted, string))) + Ok(Token( + start_mark, + TokenType::Scalar(TScalarStyle::SingleQuoted, string), + )) } else { - Ok(Token(start_mark, TokenType::Scalar(TScalarStyle::DoubleQuoted, string))) + Ok(Token( + start_mark, + TokenType::Scalar(TScalarStyle::DoubleQuoted, string), + )) } } @@ -1398,28 +1484,30 @@ impl> Scanner { /* Check for a document indicator. */ self.lookahead(4); - if self.mark.col == 0 && - (((self.buffer[0] == '-') && - (self.buffer[1] == '-') && - (self.buffer[2] == '-')) || - ((self.buffer[0] == '.') && - (self.buffer[1] == '.') && - (self.buffer[2] == '.'))) && - is_blankz(self.buffer[3]) { - break; - } + if self.mark.col == 0 + && (((self.buffer[0] == '-') && (self.buffer[1] == '-') && (self.buffer[2] == '-')) + || ((self.buffer[0] == '.') + && (self.buffer[1] == '.') + && (self.buffer[2] == '.'))) + && is_blankz(self.buffer[3]) + { + break; + } - if self.ch() == '#' { break; } + if self.ch() == '#' { + break; + } while !is_blankz(self.ch()) { - if self.flow_level > 0 && self.ch() == ':' - && is_blankz(self.ch()) { - return Err(ScanError::new(start_mark, - "while scanning a plain scalar, found unexpected ':'")); - } + if self.flow_level > 0 && self.ch() == ':' && is_blankz(self.ch()) { + return Err(ScanError::new( + start_mark, + "while scanning a plain scalar, found unexpected ':'", + )); + } // indicators ends a plain scalar match self.ch() { ':' if is_blankz(self.buffer[1]) => break, - ',' | ':' | '?' | '[' | ']' |'{' |'}' if self.flow_level > 0 => break, + ',' | ':' | '?' | '[' | ']' | '{' | '}' if self.flow_level > 0 => break, _ => {} } @@ -1438,7 +1526,6 @@ impl> Scanner { trailing_breaks.clear(); } leading_break.clear(); - } leading_blanks = false; } else { @@ -1452,15 +1539,18 @@ impl> Scanner { self.lookahead(2); } // is the end? - if !(is_blank(self.ch()) || is_break(self.ch())) { break; } + if !(is_blank(self.ch()) || is_break(self.ch())) { + break; + } self.lookahead(1); while is_blank(self.ch()) || is_break(self.ch()) { if is_blank(self.ch()) { - if leading_blanks && (self.mark.col as isize) < indent - && self.ch() == '\t' { - return Err(ScanError::new(start_mark, - "while scanning a plain scalar, found a tab")); + if leading_blanks && (self.mark.col as isize) < indent && self.ch() == '\t' { + return Err(ScanError::new( + start_mark, + "while scanning a plain scalar, found a tab", + )); } if leading_blanks { @@ -1493,7 +1583,10 @@ impl> Scanner { self.allow_simple_key(); } - Ok(Token(start_mark, TokenType::Scalar(TScalarStyle::Plain, string))) + Ok(Token( + start_mark, + TokenType::Scalar(TScalarStyle::Plain, string), + )) } fn fetch_key(&mut self) -> ScanResult { @@ -1501,10 +1594,17 @@ impl> Scanner { if self.flow_level == 0 { // Check if we are allowed to start a new key (not nessesary simple). if !self.simple_key_allowed { - return Err(ScanError::new(self.mark, "mapping keys are not allowed in this context")); + return Err(ScanError::new( + self.mark, + "mapping keys are not allowed in this context", + )); } - self.roll_indent(start_mark.col, None, - TokenType::BlockMappingStart, start_mark); + self.roll_indent( + start_mark.col, + None, + TokenType::BlockMappingStart, + start_mark, + ); } try!(self.remove_simple_key()); @@ -1530,8 +1630,12 @@ impl> Scanner { self.insert_token(sk.token_number - tokens_parsed, tok); // Add the BLOCK-MAPPING-START token if needed. - self.roll_indent(sk.mark.col, Some(sk.token_number), - TokenType::BlockMappingStart, start_mark); + self.roll_indent( + sk.mark.col, + Some(sk.token_number), + TokenType::BlockMappingStart, + start_mark, + ); self.simple_keys.last_mut().unwrap().possible = false; self.disallow_simple_key(); @@ -1539,12 +1643,18 @@ impl> Scanner { // The ':' indicator follows a complex key. if self.flow_level == 0 { if !self.simple_key_allowed { - return Err(ScanError::new(start_mark, - "mapping values are not allowed in this context")); + return Err(ScanError::new( + start_mark, + "mapping values are not allowed in this context", + )); } - self.roll_indent(start_mark.col, None, - TokenType::BlockMappingStart, start_mark); + self.roll_indent( + start_mark.col, + None, + TokenType::BlockMappingStart, + start_mark, + ); } if self.flow_level == 0 { @@ -1559,8 +1669,7 @@ impl> Scanner { Ok(()) } - fn roll_indent(&mut self, col: usize, number: Option, - tok: TokenType, mark: Marker) { + fn roll_indent(&mut self, col: usize, number: Option, tok: TokenType, mark: Marker) { if self.flow_level > 0 { return; } @@ -1571,7 +1680,7 @@ impl> Scanner { let tokens_parsed = self.tokens_parsed; match number { Some(n) => self.insert_token(n - tokens_parsed, Token(mark, tok)), - None => self.tokens.push_back(Token(mark, tok)) + None => self.tokens.push_back(Token(mark, tok)), } } } @@ -1611,44 +1720,41 @@ impl> Scanner { last.possible = false; Ok(()) } - } #[cfg(test)] mod test { - use super::*; use super::TokenType::*; + use super::*; -macro_rules! next { - ($p:ident, $tk:pat) => {{ - let tok = $p.next().unwrap(); - match tok.1 { - $tk => {}, - _ => { panic!("unexpected token: {:?}", - tok) } - } - }} -} + macro_rules! next { + ($p:ident, $tk:pat) => {{ + let tok = $p.next().unwrap(); + match tok.1 { + $tk => {} + _ => panic!("unexpected token: {:?}", tok), + } + }}; + } -macro_rules! next_scalar { - ($p:ident, $tk:expr, $v:expr) => {{ - let tok = $p.next().unwrap(); - match tok.1 { - Scalar(style, ref v) => { - assert_eq!(style, $tk); - assert_eq!(v, $v); - }, - _ => { panic!("unexpected token: {:?}", - tok) } - } - }} -} + macro_rules! next_scalar { + ($p:ident, $tk:expr, $v:expr) => {{ + let tok = $p.next().unwrap(); + match tok.1 { + Scalar(style, ref v) => { + assert_eq!(style, $tk); + assert_eq!(v, $v); + } + _ => panic!("unexpected token: {:?}", tok), + } + }}; + } -macro_rules! end { - ($p:ident) => {{ - assert_eq!($p.next(), None); - }} -} + macro_rules! end { + ($p:ident) => {{ + assert_eq!($p.next(), None); + }}; + } /// test cases in libyaml scanner.c #[test] fn test_empty() { @@ -1671,8 +1777,7 @@ macro_rules! end { #[test] fn test_explicit_scalar() { - let s = -"--- + let s = "--- 'a scalar' ... "; @@ -1687,8 +1792,7 @@ macro_rules! end { #[test] fn test_multiple_documents() { - let s = -" + let s = " 'a scalar' --- 'a scalar' @@ -1724,8 +1828,7 @@ macro_rules! end { #[test] fn test_a_flow_mapping() { - let s = -" + let s = " { a simple key: a value, # Note that the KEY token is produced. ? a complex key: another value, @@ -1751,8 +1854,7 @@ macro_rules! end { #[test] fn test_block_sequences() { - let s = -" + let s = " - item 1 - item 2 - @@ -1794,8 +1896,7 @@ macro_rules! end { #[test] fn test_block_mappings() { - let s = -" + let s = " a simple key: a value # The KEY token is produced here. ? a complex key : another value @@ -1842,13 +1943,11 @@ a sequence: next!(p, BlockEnd); next!(p, StreamEnd); end!(p); - } #[test] fn test_no_block_sequence_start() { - let s = -" + let s = " key: - item 1 - item 2 @@ -1870,8 +1969,7 @@ key: #[test] fn test_collections_in_sequence() { - let s = -" + let s = " - - item 1 - item 2 - key 1: value 1 @@ -1914,8 +2012,7 @@ key: #[test] fn test_collections_in_mapping() { - let s = -" + let s = " ? a sequence : - item 1 - item 2 @@ -1955,8 +2052,7 @@ key: #[test] fn test_spec_ex7_3() { - let s = -" + let s = " { ? foo :, : bar, diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index f5967b0..7e14435 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -1,13 +1,13 @@ +use linked_hash_map::LinkedHashMap; +use parser::*; +use scanner::{Marker, ScanError, TScalarStyle, TokenType}; use std::collections::BTreeMap; +use std::f64; +use std::i64; +use std::mem; use std::ops::Index; use std::string; -use std::i64; -use std::f64; -use std::mem; use std::vec; -use parser::*; -use scanner::{TScalarStyle, ScanError, TokenType, Marker}; -use linked_hash_map::LinkedHashMap; /// A YAML node is stored as this `Yaml` enumeration, which provides an easy way to /// access your YAML document. @@ -62,7 +62,7 @@ fn parse_f64(v: &str) -> Option { ".inf" | ".Inf" | ".INF" | "+.inf" | "+.Inf" | "+.INF" => Some(f64::INFINITY), "-.inf" | "-.Inf" | "-.INF" => Some(f64::NEG_INFINITY), ".nan" | "NaN" | ".NAN" => Some(f64::NAN), - _ => v.parse::().ok() + _ => v.parse::().ok(), } } @@ -81,31 +81,31 @@ impl MarkedEventReceiver for YamlLoader { match ev { Event::DocumentStart => { // do nothing - }, + } Event::DocumentEnd => { match self.doc_stack.len() { // empty document 0 => self.docs.push(Yaml::BadValue), 1 => self.docs.push(self.doc_stack.pop().unwrap().0), - _ => unreachable!() + _ => unreachable!(), } - }, + } Event::SequenceStart(aid) => { self.doc_stack.push((Yaml::Array(Vec::new()), aid)); - }, + } Event::SequenceEnd => { let node = self.doc_stack.pop().unwrap(); self.insert_new_node(node); - }, + } Event::MappingStart(aid) => { self.doc_stack.push((Yaml::Hash(Hash::new()), aid)); self.key_stack.push(Yaml::BadValue); - }, + } Event::MappingEnd => { self.key_stack.pop().unwrap(); let node = self.doc_stack.pop().unwrap(); self.insert_new_node(node); - }, + } Event::Scalar(v, style, aid, tag) => { let node = if style != TScalarStyle::Plain { Yaml::String(v) @@ -117,28 +117,22 @@ impl MarkedEventReceiver for YamlLoader { // "true" or "false" match v.parse::() { Err(_) => Yaml::BadValue, - Ok(v) => Yaml::Boolean(v) - } - }, - "int" => { - match v.parse::() { - Err(_) => Yaml::BadValue, - Ok(v) => Yaml::Integer(v) - } - }, - "float" => { - match parse_f64(&v) { - Some(_) => Yaml::Real(v), - None => Yaml::BadValue, - } - }, - "null" => { - match v.as_ref() { - "~" | "null" => Yaml::Null, - _ => Yaml::BadValue, + Ok(v) => Yaml::Boolean(v), } } - _ => Yaml::String(v), + "int" => match v.parse::() { + Err(_) => Yaml::BadValue, + Ok(v) => Yaml::Integer(v), + }, + "float" => match parse_f64(&v) { + Some(_) => Yaml::Real(v), + None => Yaml::BadValue, + }, + "null" => match v.as_ref() { + "~" | "null" => Yaml::Null, + _ => Yaml::BadValue, + }, + _ => Yaml::String(v), } } else { Yaml::String(v) @@ -149,7 +143,7 @@ impl MarkedEventReceiver for YamlLoader { }; self.insert_new_node((node, aid)); - }, + } Event::Alias(id) => { let n = match self.anchor_map.get(&id) { Some(v) => v.clone(), @@ -186,13 +180,13 @@ impl YamlLoader { mem::swap(&mut newkey, cur_key); h.insert(newkey, node.0); } - }, + } _ => unreachable!(), } } } - pub fn load_from_str(source: &str) -> Result, ScanError>{ + pub fn load_from_str(source: &str) -> Result, ScanError> { let mut loader = YamlLoader { docs: Vec::new(), doc_stack: Vec::new(), @@ -255,35 +249,35 @@ impl Yaml { pub fn is_null(&self) -> bool { match *self { Yaml::Null => true, - _ => false + _ => false, } } pub fn is_badvalue(&self) -> bool { match *self { Yaml::BadValue => true, - _ => false + _ => false, } } pub fn is_array(&self) -> bool { match *self { Yaml::Array(_) => true, - _ => false + _ => false, } } pub fn as_f64(&self) -> Option { match *self { Yaml::Real(ref v) => parse_f64(v), - _ => None + _ => None, } } pub fn into_f64(self) -> Option { match self { Yaml::Real(ref v) => parse_f64(v), - _ => None + _ => None, } } } @@ -315,7 +309,7 @@ impl Yaml { _ if v.parse::().is_ok() => Yaml::Integer(v.parse::().unwrap()), // try parsing as f64 _ if parse_f64(v).is_some() => Yaml::Real(v.to_owned()), - _ => Yaml::String(v.to_owned()) + _ => Yaml::String(v.to_owned()), } } } @@ -328,7 +322,7 @@ impl<'a> Index<&'a str> for Yaml { let key = Yaml::String(idx.to_owned()); match self.as_hash() { Some(h) => h.get(&key).unwrap_or(&BAD_VALUE), - None => &BAD_VALUE + None => &BAD_VALUE, } } } @@ -354,8 +348,7 @@ impl IntoIterator for Yaml { fn into_iter(self) -> Self::IntoIter { YamlIter { - yaml: self.into_vec() - .unwrap_or_else(Vec::new).into_iter() + yaml: self.into_vec().unwrap_or_else(Vec::new).into_iter(), } } } @@ -374,8 +367,8 @@ impl Iterator for YamlIter { #[cfg(test)] mod test { - use yaml::*; use std::f64; + use yaml::*; #[test] fn test_coerce() { let s = "--- @@ -424,8 +417,7 @@ a7: 你好 #[test] fn test_multi_doc() { - let s = -" + let s = " 'a scalar' --- 'a scalar' @@ -438,8 +430,7 @@ a7: 你好 #[test] fn test_anchor() { - let s = -" + let s = " a1: &DEFAULT b1: 4 b2: d @@ -452,8 +443,7 @@ a2: *DEFAULT #[test] fn test_bad_anchor() { - let s = -" + let s = " a1: &DEFAULT b1: 4 b2: *DEFAULT @@ -461,7 +451,6 @@ a1: &DEFAULT let out = YamlLoader::load_from_str(&s).unwrap(); let doc = &out[0]; assert_eq!(doc["a1"]["b2"], Yaml::BadValue); - } #[test] @@ -475,8 +464,7 @@ a1: &DEFAULT #[test] fn test_plain_datatype() { - let s = -" + let s = " - 'string' - \"string\" - string @@ -555,15 +543,23 @@ a1: &DEFAULT #[test] fn test_bad_docstart() { assert!(YamlLoader::load_from_str("---This used to cause an infinite loop").is_ok()); - assert_eq!(YamlLoader::load_from_str("----"), Ok(vec![Yaml::String(String::from("----"))])); - assert_eq!(YamlLoader::load_from_str("--- #here goes a comment"), Ok(vec![Yaml::Null])); - assert_eq!(YamlLoader::load_from_str("---- #here goes a comment"), Ok(vec![Yaml::String(String::from("----"))])); + assert_eq!( + YamlLoader::load_from_str("----"), + Ok(vec![Yaml::String(String::from("----"))]) + ); + assert_eq!( + YamlLoader::load_from_str("--- #here goes a comment"), + Ok(vec![Yaml::Null]) + ); + assert_eq!( + YamlLoader::load_from_str("---- #here goes a comment"), + Ok(vec![Yaml::String(String::from("----"))]) + ); } #[test] fn test_plain_datatype_with_into_methods() { - let s = -" + let s = " - 'string' - \"string\" - string @@ -620,9 +616,18 @@ c: ~ let out = YamlLoader::load_from_str(&s).unwrap(); let first = out.into_iter().next().unwrap(); let mut iter = first.into_hash().unwrap().into_iter(); - assert_eq!(Some((Yaml::String("b".to_owned()), Yaml::Null)), iter.next()); - assert_eq!(Some((Yaml::String("a".to_owned()), Yaml::Null)), iter.next()); - assert_eq!(Some((Yaml::String("c".to_owned()), Yaml::Null)), iter.next()); + assert_eq!( + Some((Yaml::String("b".to_owned()), Yaml::Null)), + iter.next() + ); + assert_eq!( + Some((Yaml::String("a".to_owned()), Yaml::Null)), + iter.next() + ); + assert_eq!( + Some((Yaml::String("c".to_owned()), Yaml::Null)), + iter.next() + ); assert_eq!(None, iter.next()); } @@ -641,30 +646,49 @@ c: ~ #[test] fn test_indentation_equality() { - - let four_spaces = YamlLoader::load_from_str(r#" + let four_spaces = YamlLoader::load_from_str( + r#" hash: with: indentations -"#).unwrap().into_iter().next().unwrap(); +"#, + ).unwrap() + .into_iter() + .next() + .unwrap(); - let two_spaces = YamlLoader::load_from_str(r#" + let two_spaces = YamlLoader::load_from_str( + r#" hash: with: indentations -"#).unwrap().into_iter().next().unwrap(); +"#, + ).unwrap() + .into_iter() + .next() + .unwrap(); - let one_space = YamlLoader::load_from_str(r#" + let one_space = YamlLoader::load_from_str( + r#" hash: with: indentations -"#).unwrap().into_iter().next().unwrap(); +"#, + ).unwrap() + .into_iter() + .next() + .unwrap(); - let mixed_spaces = YamlLoader::load_from_str(r#" + let mixed_spaces = YamlLoader::load_from_str( + r#" hash: with: indentations -"#).unwrap().into_iter().next().unwrap(); +"#, + ).unwrap() + .into_iter() + .next() + .unwrap(); assert_eq!(four_spaces, two_spaces); assert_eq!(two_spaces, one_space); @@ -691,7 +715,7 @@ subcommands3: let doc = &out.into_iter().next().unwrap(); println!("{:#?}", doc); - assert_eq!(doc["subcommands"][0]["server"], Yaml::Null); + assert_eq!(doc["subcommands"][0]["server"], Yaml::Null); assert!(doc["subcommands2"][0]["server"].as_hash().is_some()); assert!(doc["subcommands3"][0]["server"].as_hash().is_some()); } diff --git a/saphyr/tests/quickcheck.rs b/saphyr/tests/quickcheck.rs index 54be25d..43fd254 100644 --- a/saphyr/tests/quickcheck.rs +++ b/saphyr/tests/quickcheck.rs @@ -3,8 +3,8 @@ extern crate yaml_rust; extern crate quickcheck; use quickcheck::TestResult; -use yaml_rust::{Yaml, YamlLoader, YamlEmitter}; use std::error::Error; +use yaml_rust::{Yaml, YamlEmitter, YamlLoader}; quickcheck! { fn test_check_weird_keys(xs: Vec) -> TestResult { diff --git a/saphyr/tests/spec_test.rs b/saphyr/tests/spec_test.rs index 5e881fc..442728f 100644 --- a/saphyr/tests/spec_test.rs +++ b/saphyr/tests/spec_test.rs @@ -2,7 +2,7 @@ #![allow(non_upper_case_globals)] extern crate yaml_rust; -use yaml_rust::parser::{Parser, EventReceiver, Event}; +use yaml_rust::parser::{Event, EventReceiver, Parser}; use yaml_rust::scanner::TScalarStyle; // These names match the names used in the C++ test suite. @@ -21,7 +21,7 @@ enum TestEvent { } struct YamlChecker { - pub evs: Vec + pub evs: Vec, } impl EventReceiver for YamlChecker { @@ -33,36 +33,36 @@ impl EventReceiver for YamlChecker { Event::SequenceEnd => TestEvent::OnSequenceEnd, Event::MappingStart(..) => TestEvent::OnMapStart, Event::MappingEnd => TestEvent::OnMapEnd, - Event::Scalar(ref v, style, _, _)=> { + Event::Scalar(ref v, style, _, _) => { if v == "~" && style == TScalarStyle::Plain { TestEvent::OnNull } else { TestEvent::OnScalar } - }, + } Event::Alias(_) => TestEvent::OnAlias, - _ => { return } // ignore other events + _ => return, // ignore other events }; self.evs.push(tev); } } fn str_to_test_events(docs: &str) -> Vec { - let mut p = YamlChecker { - evs: Vec::new() - }; + let mut p = YamlChecker { evs: Vec::new() }; let mut parser = Parser::new(docs.chars()); parser.load(&mut p, true).unwrap(); p.evs } macro_rules! assert_next { - ($v:expr, $p:pat) => ( + ($v:expr, $p:pat) => { match $v.next().unwrap() { - $p => {}, - e => { panic!("unexpected event: {:?}", e); } + $p => {} + e => { + panic!("unexpected event: {:?}", e); + } } - ) + }; } // auto generated from handler_spec_test.cpp @@ -76,66 +76,65 @@ include!("spec_test.rs.inc"); #[test] fn test_mapvec_legal() { - use yaml_rust::yaml::{Array, Hash, Yaml}; - use yaml_rust::{YamlLoader, YamlEmitter}; + use yaml_rust::yaml::{Array, Hash, Yaml}; + use yaml_rust::{YamlEmitter, YamlLoader}; - // Emitting a `map>, _>` should result in legal yaml that - // we can parse. + // Emitting a `map>, _>` should result in legal yaml that + // we can parse. - let mut key = Array::new(); - key.push(Yaml::Integer(1)); - key.push(Yaml::Integer(2)); - key.push(Yaml::Integer(3)); + let mut key = Array::new(); + key.push(Yaml::Integer(1)); + key.push(Yaml::Integer(2)); + key.push(Yaml::Integer(3)); - let mut keyhash = Hash::new(); - keyhash.insert(Yaml::String("key".into()), Yaml::Array(key)); + let mut keyhash = Hash::new(); + keyhash.insert(Yaml::String("key".into()), Yaml::Array(key)); - let mut val = Array::new(); - val.push(Yaml::Integer(4)); - val.push(Yaml::Integer(5)); - val.push(Yaml::Integer(6)); + let mut val = Array::new(); + val.push(Yaml::Integer(4)); + val.push(Yaml::Integer(5)); + val.push(Yaml::Integer(6)); - let mut hash = Hash::new(); - hash.insert(Yaml::Hash(keyhash), Yaml::Array(val)); + let mut hash = Hash::new(); + hash.insert(Yaml::Hash(keyhash), Yaml::Array(val)); - let mut out_str = String::new(); - { - let mut emitter = YamlEmitter::new(&mut out_str); - emitter.dump(&Yaml::Hash(hash)).unwrap(); - } + let mut out_str = String::new(); + { + let mut emitter = YamlEmitter::new(&mut out_str); + emitter.dump(&Yaml::Hash(hash)).unwrap(); + } - // At this point, we are tempted to naively render like this: - // - // ```yaml - // --- - // {key: - // - 1 - // - 2 - // - 3}: - // - 4 - // - 5 - // - 6 - // ``` - // - // However, this doesn't work, because the key sequence [1, 2, 3] is - // rendered in block mode, which is not legal (as far as I can tell) - // inside the flow mode of the key. We need to either fully render - // everything that's in a key in flow mode (which may make for some - // long lines), or use the explicit map identifier '?': - // - // ```yaml - // --- - // ? - // key: - // - 1 - // - 2 - // - 3 - // : - // - 4 - // - 5 - // - 6 - // ``` + // At this point, we are tempted to naively render like this: + // + // ```yaml + // --- + // {key: + // - 1 + // - 2 + // - 3}: + // - 4 + // - 5 + // - 6 + // ``` + // + // However, this doesn't work, because the key sequence [1, 2, 3] is + // rendered in block mode, which is not legal (as far as I can tell) + // inside the flow mode of the key. We need to either fully render + // everything that's in a key in flow mode (which may make for some + // long lines), or use the explicit map identifier '?': + // + // ```yaml + // --- + // ? + // key: + // - 1 + // - 2 + // - 3 + // : + // - 4 + // - 5 + // - 6 + // ``` - YamlLoader::load_from_str(&out_str).unwrap(); + YamlLoader::load_from_str(&out_str).unwrap(); } - From 760f4f86d0e1d6755fcf0b08ffed01ca544468a8 Mon Sep 17 00:00:00 2001 From: David Tolnay Date: Sat, 15 Sep 2018 10:02:56 -0700 Subject: [PATCH 163/380] Don't yet use tool attrs because they are not stable --- saphyr/src/lib.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/saphyr/src/lib.rs b/saphyr/src/lib.rs index 36932d9..28cee86 100644 --- a/saphyr/src/lib.rs +++ b/saphyr/src/lib.rs @@ -36,6 +36,7 @@ //! //! ``` +#![cfg_attr(feature = "cargo-clippy", allow(renamed_and_removed_lints))] #![cfg_attr(feature = "cargo-clippy", warn(cyclomatic_complexity))] #![cfg_attr(feature = "cargo-clippy", allow(match_same_arms))] From 1f0d33259b0b89b0b29d3bb0e0ed3bbb442fb67b Mon Sep 17 00:00:00 2001 From: David Tolnay Date: Sat, 15 Sep 2018 10:03:55 -0700 Subject: [PATCH 164/380] Address redundant_field_names lint --- saphyr/.travis.yml | 2 +- saphyr/src/emitter.rs | 3 +-- saphyr/src/scanner.rs | 10 +++------- 3 files changed, 5 insertions(+), 10 deletions(-) diff --git a/saphyr/.travis.yml b/saphyr/.travis.yml index e2333c1..513b2ef 100644 --- a/saphyr/.travis.yml +++ b/saphyr/.travis.yml @@ -5,7 +5,7 @@ matrix: - rust: stable - rust: beta - rust: nightly - - rust: 1.13.0 + - rust: 1.17.0 script: cargo build - rust: 1.22.1 - rust: nightly diff --git a/saphyr/src/emitter.rs b/saphyr/src/emitter.rs index cb238be..ad7882f 100644 --- a/saphyr/src/emitter.rs +++ b/saphyr/src/emitter.rs @@ -113,10 +113,9 @@ fn escape_str(wr: &mut fmt::Write, v: &str) -> Result<(), fmt::Error> { impl<'a> YamlEmitter<'a> { pub fn new(writer: &'a mut fmt::Write) -> YamlEmitter { YamlEmitter { - writer: writer, + writer, best_indent: 2, compact: true, - level: -1, } } diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index 366eee5..da7e4ef 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -27,11 +27,7 @@ pub struct Marker { impl Marker { fn new(index: usize, line: usize, col: usize) -> Marker { - Marker { - index: index, - line: line, - col: col, - } + Marker { index, line, col } } pub fn index(&self) -> usize { @@ -135,7 +131,7 @@ impl SimpleKey { possible: false, required: false, token_number: 0, - mark: mark, + mark, } } } @@ -227,7 +223,7 @@ impl> Scanner { /// Creates the YAML tokenizer. pub fn new(rdr: T) -> Scanner { Scanner { - rdr: rdr, + rdr, buffer: VecDeque::new(), mark: Marker::new(0, 1, 0), tokens: VecDeque::new(), From 3250426de75ca15b139a0d2614affc10bb9547de Mon Sep 17 00:00:00 2001 From: Konrad Borowski Date: Sat, 15 Sep 2018 18:49:17 +0200 Subject: [PATCH 165/380] Add testing whether load(dump(x)) is an identity in quickcheck --- saphyr/tests/quickcheck.rs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/saphyr/tests/quickcheck.rs b/saphyr/tests/quickcheck.rs index 43fd254..c2c89bc 100644 --- a/saphyr/tests/quickcheck.rs +++ b/saphyr/tests/quickcheck.rs @@ -9,13 +9,14 @@ use yaml_rust::{Yaml, YamlEmitter, YamlLoader}; quickcheck! { fn test_check_weird_keys(xs: Vec) -> TestResult { let mut out_str = String::new(); + let input = Yaml::Array(xs.into_iter().map(Yaml::String).collect()); { let mut emitter = YamlEmitter::new(&mut out_str); - emitter.dump(&Yaml::Array(xs.into_iter().map(Yaml::String).collect())).unwrap(); + emitter.dump(&input).unwrap(); } - if let Err(err) = YamlLoader::load_from_str(&out_str) { - return TestResult::error(err.description()); + match YamlLoader::load_from_str(&out_str) { + Ok(output) => TestResult::from_bool(output.len() == 1 && input == output[0]), + Err(err) => TestResult::error(err.description()), } - TestResult::passed() } } From ef9b526d59186a040d3bf2f0780d87873a9e78b8 Mon Sep 17 00:00:00 2001 From: David Tolnay Date: Sat, 15 Sep 2018 10:06:46 -0700 Subject: [PATCH 166/380] Ignore should_implement_trait lint The Parser::next method is already a public API. --- saphyr/src/lib.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/saphyr/src/lib.rs b/saphyr/src/lib.rs index 28cee86..e3c9f64 100644 --- a/saphyr/src/lib.rs +++ b/saphyr/src/lib.rs @@ -38,7 +38,10 @@ #![cfg_attr(feature = "cargo-clippy", allow(renamed_and_removed_lints))] #![cfg_attr(feature = "cargo-clippy", warn(cyclomatic_complexity))] -#![cfg_attr(feature = "cargo-clippy", allow(match_same_arms))] +#![cfg_attr( + feature = "cargo-clippy", + allow(match_same_arms, should_implement_trait) +)] extern crate linked_hash_map; From 261942922f5e2f21ab48e91a0d2293f598c9aa82 Mon Sep 17 00:00:00 2001 From: David Tolnay Date: Sat, 15 Sep 2018 10:11:00 -0700 Subject: [PATCH 167/380] Require clippy to pass when it can be installed --- saphyr/.travis.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/saphyr/.travis.yml b/saphyr/.travis.yml index 513b2ef..b31f2fd 100644 --- a/saphyr/.travis.yml +++ b/saphyr/.travis.yml @@ -9,10 +9,10 @@ matrix: script: cargo build - rust: 1.22.1 - rust: nightly - script: rustup component add clippy-preview && cargo clippy -- -Dclippy - allow_failures: - - rust: nightly - script: rustup component add clippy-preview && cargo clippy -- -Dclippy + script: | + if rustup component add clippy-preview; then + cargo clippy -- -Dclippy + fi script: - cargo build From 51566fafce2ae27a6b2075eae91f9d14f30f8c6b Mon Sep 17 00:00:00 2001 From: David Tolnay Date: Sat, 15 Sep 2018 10:12:16 -0700 Subject: [PATCH 168/380] Tag clippy build in travis build list --- saphyr/.travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/saphyr/.travis.yml b/saphyr/.travis.yml index b31f2fd..873ded1 100644 --- a/saphyr/.travis.yml +++ b/saphyr/.travis.yml @@ -9,6 +9,7 @@ matrix: script: cargo build - rust: 1.22.1 - rust: nightly + env: CLIPPY script: | if rustup component add clippy-preview; then cargo clippy -- -Dclippy From cedb185876076e0f8562238744e3cda22552ba15 Mon Sep 17 00:00:00 2001 From: David Tolnay Date: Sat, 15 Sep 2018 12:20:11 -0700 Subject: [PATCH 169/380] Escape string containing colon I don't know whether this is always necessary but it is required for correctly serializing `["x: %"]`. If we serialize this without quotes to `[x: %]` then the result is not valid YAML. --- saphyr/src/emitter.rs | 5 +++-- saphyr/tests/test_round_trip.rs | 23 +++++++++++++++++++++++ 2 files changed, 26 insertions(+), 2 deletions(-) create mode 100644 saphyr/tests/test_round_trip.rs diff --git a/saphyr/src/emitter.rs b/saphyr/src/emitter.rs index ad7882f..10f43ab 100644 --- a/saphyr/src/emitter.rs +++ b/saphyr/src/emitter.rs @@ -301,11 +301,12 @@ fn need_quotes(string: &str) -> bool { string == "" || need_quotes_spaces(string) || string.starts_with(|character: char| match character { - ':' | '&' | '*' | '?' | '|' | '-' | '<' | '>' | '=' | '!' | '%' | '@' => true, + '&' | '*' | '?' | '|' | '-' | '<' | '>' | '=' | '!' | '%' | '@' => true, _ => false, }) || string.contains(|character: char| match character { - '{' + ':' + | '{' | '}' | '[' | ']' diff --git a/saphyr/tests/test_round_trip.rs b/saphyr/tests/test_round_trip.rs new file mode 100644 index 0000000..bfa9602 --- /dev/null +++ b/saphyr/tests/test_round_trip.rs @@ -0,0 +1,23 @@ +extern crate yaml_rust; + +use yaml_rust::{Yaml, YamlEmitter, YamlLoader}; + +fn test_round_trip(original: &Yaml) { + let mut out = String::new(); + YamlEmitter::new(&mut out).dump(original).unwrap(); + let documents = YamlLoader::load_from_str(&out).unwrap(); + assert_eq!(documents.len(), 1); + assert_eq!(documents[0], *original); +} + +#[test] +fn test_escape_character() { + let y = Yaml::String("\x1b".to_owned()); + test_round_trip(&y); +} + +#[test] +fn test_colon_in_string() { + let y = Yaml::String("x: %".to_owned()); + test_round_trip(&y); +} From b30f9a0a30465024335640f2d726242051d0c6ff Mon Sep 17 00:00:00 2001 From: David Tolnay Date: Sat, 15 Sep 2018 12:24:47 -0700 Subject: [PATCH 170/380] Remove commented key in Cargo.toml --- saphyr/Cargo.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/saphyr/Cargo.toml b/saphyr/Cargo.toml index 545c20d..7907a15 100644 --- a/saphyr/Cargo.toml +++ b/saphyr/Cargo.toml @@ -7,7 +7,6 @@ documentation = "https://docs.rs/crate/yaml-rust/" license = "MIT/Apache-2.0" description = "The missing YAML 1.2 parser for rust" repository = "https://github.com/chyh1990/yaml-rust" -# publish = false # this branch contains breaking changes [dependencies] linked-hash-map = ">=0.0.9, <0.6" From f5afa3315056b6371f27bc2f4f7d80fd47058e62 Mon Sep 17 00:00:00 2001 From: David Tolnay Date: Sat, 15 Sep 2018 12:25:15 -0700 Subject: [PATCH 171/380] Render readme on crates.io --- saphyr/Cargo.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/saphyr/Cargo.toml b/saphyr/Cargo.toml index 7907a15..0d88bf1 100644 --- a/saphyr/Cargo.toml +++ b/saphyr/Cargo.toml @@ -7,6 +7,7 @@ documentation = "https://docs.rs/crate/yaml-rust/" license = "MIT/Apache-2.0" description = "The missing YAML 1.2 parser for rust" repository = "https://github.com/chyh1990/yaml-rust" +readme = "README.md" [dependencies] linked-hash-map = ">=0.0.9, <0.6" From cd4ad696963e0b6803c30967fe97c4933c82219c Mon Sep 17 00:00:00 2001 From: David Tolnay Date: Sat, 15 Sep 2018 12:25:55 -0700 Subject: [PATCH 172/380] Fix docs.rs link --- saphyr/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/saphyr/Cargo.toml b/saphyr/Cargo.toml index 0d88bf1..a0f8559 100644 --- a/saphyr/Cargo.toml +++ b/saphyr/Cargo.toml @@ -3,7 +3,7 @@ name = "yaml-rust" version = "0.4.0" authors = ["Yuheng Chen "] homepage = "http://chyh1990.github.io/yaml-rust/" -documentation = "https://docs.rs/crate/yaml-rust/" +documentation = "https://docs.rs/yaml-rust" license = "MIT/Apache-2.0" description = "The missing YAML 1.2 parser for rust" repository = "https://github.com/chyh1990/yaml-rust" From 9fe981bc69dbd3291d8ff2fc47c37d0b77302efc Mon Sep 17 00:00:00 2001 From: David Tolnay Date: Sat, 15 Sep 2018 12:28:45 -0700 Subject: [PATCH 173/380] Reorganize readme links --- saphyr/README.md | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/saphyr/README.md b/saphyr/README.md index a012264..f410525 100644 --- a/saphyr/README.md +++ b/saphyr/README.md @@ -2,10 +2,10 @@ The missing YAML 1.2 implementation for Rust. -[![Build Status](https://travis-ci.org/chyh1990/yaml-rust.svg?branch=master)](https://travis-ci.org/chyh1990/yaml-rust) -[![Build status](https://ci.appveyor.com/api/projects/status/scf47535ckp4ylg4?svg=true)](https://ci.appveyor.com/project/chyh1990/yaml-rust) -[![license](https://img.shields.io/crates/l/yaml-rust.svg)](https://crates.io/crates/yaml-rust/) -[![version](https://img.shields.io/crates/v/yaml-rust.svg)](https://crates.io/crates/yaml-rust/) +[![Travis](https://travis-ci.org/chyh1990/yaml-rust.svg?branch=master)](https://travis-ci.org/chyh1990/yaml-rust) +[![AppVeyor](https://ci.appveyor.com/api/projects/status/scf47535ckp4ylg4?svg=true)](https://ci.appveyor.com/project/chyh1990/yaml-rust) +[![crates.io](https://img.shields.io/crates/v/yaml-rust.svg)](https://crates.io/crates/yaml-rust) +[![docs.rs](https://img.shields.io/badge/api-rustdoc-blue.svg)](https://docs.rs/yaml-rust) `yaml-rust` is a pure Rust YAML 1.2 implementation, which enjoys the memory safety @@ -15,8 +15,6 @@ The parser is heavily influenced by `libyaml` and `yaml-cpp`. This crate works on all Rust-supported platforms. It also works on Rust 1.0.0 and nightly! -See [Document](http://chyh1990.github.io/yaml-rust/doc/yaml_rust/) - ## Quick Start Add the following to the Cargo.toml of your project: From e7b5041443be43ee19afd72d9460c081e7cdb240 Mon Sep 17 00:00:00 2001 From: David Tolnay Date: Sat, 15 Sep 2018 12:29:02 -0700 Subject: [PATCH 174/380] Remove outdated note about version support --- saphyr/README.md | 3 --- 1 file changed, 3 deletions(-) diff --git a/saphyr/README.md b/saphyr/README.md index f410525..d3fb5bd 100644 --- a/saphyr/README.md +++ b/saphyr/README.md @@ -12,9 +12,6 @@ which enjoys the memory safety property and other benefits from the Rust language. The parser is heavily influenced by `libyaml` and `yaml-cpp`. -This crate works on all Rust-supported platforms. It also works on -Rust 1.0.0 and nightly! - ## Quick Start Add the following to the Cargo.toml of your project: From ddad3c762c9e7039cf41a640dc76093abaf3041c Mon Sep 17 00:00:00 2001 From: David Tolnay Date: Sat, 15 Sep 2018 12:29:20 -0700 Subject: [PATCH 175/380] Stop recommending a git dependency --- saphyr/README.md | 7 ------- 1 file changed, 7 deletions(-) diff --git a/saphyr/README.md b/saphyr/README.md index d3fb5bd..8517ad3 100644 --- a/saphyr/README.md +++ b/saphyr/README.md @@ -21,13 +21,6 @@ Add the following to the Cargo.toml of your project: yaml-rust = "0.4" ``` -or - -```toml -[dependencies.yaml-rust] -git = "https://github.com/chyh1990/yaml-rust.git" -``` - and import: ```rust From 5443ce4368da1e46155a3ff3489a0f1a83580741 Mon Sep 17 00:00:00 2001 From: David Tolnay Date: Sat, 15 Sep 2018 12:30:27 -0700 Subject: [PATCH 176/380] Set html_root_url --- saphyr/Cargo.toml | 2 +- saphyr/src/lib.rs | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/saphyr/Cargo.toml b/saphyr/Cargo.toml index a0f8559..c389dea 100644 --- a/saphyr/Cargo.toml +++ b/saphyr/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "yaml-rust" -version = "0.4.0" +version = "0.4.0" # remember to update html_root_url authors = ["Yuheng Chen "] homepage = "http://chyh1990.github.io/yaml-rust/" documentation = "https://docs.rs/yaml-rust" diff --git a/saphyr/src/lib.rs b/saphyr/src/lib.rs index e3c9f64..d7967d1 100644 --- a/saphyr/src/lib.rs +++ b/saphyr/src/lib.rs @@ -36,6 +36,7 @@ //! //! ``` +#![doc(html_root_url = "https://docs.rs/yaml-rust/0.4.0")] #![cfg_attr(feature = "cargo-clippy", allow(renamed_and_removed_lints))] #![cfg_attr(feature = "cargo-clippy", warn(cyclomatic_complexity))] #![cfg_attr( From fd169229b0dfa0aa1e798fb12ad478a570dd7575 Mon Sep 17 00:00:00 2001 From: David Tolnay Date: Sat, 15 Sep 2018 12:31:11 -0700 Subject: [PATCH 177/380] Release 0.4.1 --- saphyr/Cargo.toml | 2 +- saphyr/src/lib.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/saphyr/Cargo.toml b/saphyr/Cargo.toml index c389dea..cdab262 100644 --- a/saphyr/Cargo.toml +++ b/saphyr/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "yaml-rust" -version = "0.4.0" # remember to update html_root_url +version = "0.4.1" # remember to update html_root_url authors = ["Yuheng Chen "] homepage = "http://chyh1990.github.io/yaml-rust/" documentation = "https://docs.rs/yaml-rust" diff --git a/saphyr/src/lib.rs b/saphyr/src/lib.rs index d7967d1..aa56a8e 100644 --- a/saphyr/src/lib.rs +++ b/saphyr/src/lib.rs @@ -36,7 +36,7 @@ //! //! ``` -#![doc(html_root_url = "https://docs.rs/yaml-rust/0.4.0")] +#![doc(html_root_url = "https://docs.rs/yaml-rust/0.4.1")] #![cfg_attr(feature = "cargo-clippy", allow(renamed_and_removed_lints))] #![cfg_attr(feature = "cargo-clippy", warn(cyclomatic_complexity))] #![cfg_attr( From dc1ed3d39650b81f584b480ddd3a91a12d84a5c6 Mon Sep 17 00:00:00 2001 From: David Tolnay Date: Sat, 15 Sep 2018 13:09:07 -0700 Subject: [PATCH 178/380] Lowercase the recursion limit error message To be consistent with the other errors in this crate. --- saphyr/src/scanner.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index da7e4ef..d4b8774 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -989,7 +989,7 @@ impl> Scanner { self.flow_level = self .flow_level .checked_add(1) - .ok_or_else(|| ScanError::new(self.mark, "Recursion limit exceeded"))?; + .ok_or_else(|| ScanError::new(self.mark, "recursion limit exceeded"))?; Ok(()) } fn decrease_flow_level(&mut self) { From e73d5d02565cad3c3607a52f6c4db74f6f6c27ed Mon Sep 17 00:00:00 2001 From: David Tolnay Date: Sat, 15 Sep 2018 13:09:48 -0700 Subject: [PATCH 179/380] Release 0.4.2 --- saphyr/Cargo.toml | 2 +- saphyr/src/lib.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/saphyr/Cargo.toml b/saphyr/Cargo.toml index cdab262..d251b41 100644 --- a/saphyr/Cargo.toml +++ b/saphyr/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "yaml-rust" -version = "0.4.1" # remember to update html_root_url +version = "0.4.2" # remember to update html_root_url authors = ["Yuheng Chen "] homepage = "http://chyh1990.github.io/yaml-rust/" documentation = "https://docs.rs/yaml-rust" diff --git a/saphyr/src/lib.rs b/saphyr/src/lib.rs index aa56a8e..3ff03c9 100644 --- a/saphyr/src/lib.rs +++ b/saphyr/src/lib.rs @@ -36,7 +36,7 @@ //! //! ``` -#![doc(html_root_url = "https://docs.rs/yaml-rust/0.4.1")] +#![doc(html_root_url = "https://docs.rs/yaml-rust/0.4.2")] #![cfg_attr(feature = "cargo-clippy", allow(renamed_and_removed_lints))] #![cfg_attr(feature = "cargo-clippy", warn(cyclomatic_complexity))] #![cfg_attr( From 20190538af15f606e33c14818e57fe82781914ad Mon Sep 17 00:00:00 2001 From: David Tolnay Date: Sat, 15 Sep 2018 23:58:48 -0700 Subject: [PATCH 180/380] Replace try! with question mark --- saphyr/src/emitter.rs | 76 ++++++++++++++-------------- saphyr/src/lib.rs | 2 +- saphyr/src/parser.rs | 114 +++++++++++++++++++++--------------------- saphyr/src/scanner.rs | 82 +++++++++++++++--------------- saphyr/src/yaml.rs | 2 +- 5 files changed, 138 insertions(+), 138 deletions(-) diff --git a/saphyr/src/emitter.rs b/saphyr/src/emitter.rs index 10f43ab..7806922 100644 --- a/saphyr/src/emitter.rs +++ b/saphyr/src/emitter.rs @@ -49,7 +49,7 @@ pub type EmitResult = Result<(), EmitError>; // from serialize::json fn escape_str(wr: &mut fmt::Write, v: &str) -> Result<(), fmt::Error> { - try!(wr.write_str("\"")); + wr.write_str("\"")?; let mut start = 0; @@ -94,19 +94,19 @@ fn escape_str(wr: &mut fmt::Write, v: &str) -> Result<(), fmt::Error> { }; if start < i { - try!(wr.write_str(&v[start..i])); + wr.write_str(&v[start..i])?; } - try!(wr.write_str(escaped)); + wr.write_str(escaped)?; start = i + 1; } if start != v.len() { - try!(wr.write_str(&v[start..])); + wr.write_str(&v[start..])?; } - try!(wr.write_str("\"")); + wr.write_str("\"")?; Ok(()) } @@ -139,7 +139,7 @@ impl<'a> YamlEmitter<'a> { pub fn dump(&mut self, doc: &Yaml) -> EmitResult { // write DocumentStart - try!(write!(self.writer, "---\n")); + write!(self.writer, "---\n")?; self.level = -1; self.emit_node(doc) } @@ -150,7 +150,7 @@ impl<'a> YamlEmitter<'a> { } for _ in 0..self.level { for _ in 0..self.best_indent { - try!(write!(self.writer, " ")); + write!(self.writer, " ")?; } } Ok(()) @@ -162,30 +162,30 @@ impl<'a> YamlEmitter<'a> { Yaml::Hash(ref h) => self.emit_hash(h), Yaml::String(ref v) => { if need_quotes(v) { - try!(escape_str(self.writer, v)); + escape_str(self.writer, v)?; } else { - try!(write!(self.writer, "{}", v)); + write!(self.writer, "{}", v)?; } Ok(()) } Yaml::Boolean(v) => { if v { - try!(self.writer.write_str("true")); + self.writer.write_str("true")?; } else { - try!(self.writer.write_str("false")); + self.writer.write_str("false")?; } Ok(()) } Yaml::Integer(v) => { - try!(write!(self.writer, "{}", v)); + write!(self.writer, "{}", v)?; Ok(()) } Yaml::Real(ref v) => { - try!(write!(self.writer, "{}", v)); + write!(self.writer, "{}", v)?; Ok(()) } Yaml::Null | Yaml::BadValue => { - try!(write!(self.writer, "~")); + write!(self.writer, "~")?; Ok(()) } // XXX(chenyh) Alias @@ -195,16 +195,16 @@ impl<'a> YamlEmitter<'a> { fn emit_array(&mut self, v: &[Yaml]) -> EmitResult { if v.is_empty() { - try!(write!(self.writer, "[]")); + write!(self.writer, "[]")?; } else { self.level += 1; for (cnt, x) in v.iter().enumerate() { if cnt > 0 { - try!(write!(self.writer, "\n")); - try!(self.write_indent()); + write!(self.writer, "\n")?; + self.write_indent()?; } - try!(write!(self.writer, "-")); - try!(self.emit_val(true, x)); + write!(self.writer, "-")?; + self.emit_val(true, x)?; } self.level -= 1; } @@ -213,7 +213,7 @@ impl<'a> YamlEmitter<'a> { fn emit_hash(&mut self, h: &Hash) -> EmitResult { if h.is_empty() { - try!(self.writer.write_str("{}")); + self.writer.write_str("{}")?; } else { self.level += 1; for (cnt, (k, v)) in h.iter().enumerate() { @@ -222,20 +222,20 @@ impl<'a> YamlEmitter<'a> { _ => false, }; if cnt > 0 { - try!(write!(self.writer, "\n")); - try!(self.write_indent()); + write!(self.writer, "\n")?; + self.write_indent()?; } if complex_key { - try!(write!(self.writer, "?")); - try!(self.emit_val(true, k)); - try!(write!(self.writer, "\n")); - try!(self.write_indent()); - try!(write!(self.writer, ":")); - try!(self.emit_val(true, v)); + write!(self.writer, "?")?; + self.emit_val(true, k)?; + write!(self.writer, "\n")?; + self.write_indent()?; + write!(self.writer, ":")?; + self.emit_val(true, v)?; } else { - try!(self.emit_node(k)); - try!(write!(self.writer, ":")); - try!(self.emit_val(false, v)); + self.emit_node(k)?; + write!(self.writer, ":")?; + self.emit_val(false, v)?; } } self.level -= 1; @@ -251,28 +251,28 @@ impl<'a> YamlEmitter<'a> { match *val { Yaml::Array(ref v) => { if (inline && self.compact) || v.is_empty() { - try!(write!(self.writer, " ")); + write!(self.writer, " ")?; } else { - try!(write!(self.writer, "\n")); + write!(self.writer, "\n")?; self.level += 1; - try!(self.write_indent()); + self.write_indent()?; self.level -= 1; } self.emit_array(v) } Yaml::Hash(ref h) => { if (inline && self.compact) || h.is_empty() { - try!(write!(self.writer, " ")); + write!(self.writer, " ")?; } else { - try!(write!(self.writer, "\n")); + write!(self.writer, "\n")?; self.level += 1; - try!(self.write_indent()); + self.write_indent()?; self.level -= 1; } self.emit_hash(h) } _ => { - try!(write!(self.writer, " ")); + write!(self.writer, " ")?; self.emit_node(val) } } diff --git a/saphyr/src/lib.rs b/saphyr/src/lib.rs index 3ff03c9..b65c973 100644 --- a/saphyr/src/lib.rs +++ b/saphyr/src/lib.rs @@ -102,7 +102,7 @@ mod tests { } fn try_fail(s: &str) -> Result, ScanError> { - let t = try!(YamlLoader::load_from_str(s)); + let t = YamlLoader::load_from_str(s)?; Ok(t) } diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs index 190d84a..22692ff 100644 --- a/saphyr/src/parser.rs +++ b/saphyr/src/parser.rs @@ -110,7 +110,7 @@ impl> Parser { match self.current { Some(ref x) => Ok(x), None => { - self.current = Some(try!(self.next())); + self.current = Some(self.next()?); self.peek() } } @@ -126,7 +126,7 @@ impl> Parser { fn peek_token(&mut self) -> Result<&Token, ScanError> { match self.token { None => { - self.token = Some(try!(self.scan_next_token())); + self.token = Some(self.scan_next_token()?); Ok(self.token.as_ref().unwrap()) } Some(ref tok) => Ok(tok), @@ -165,7 +165,7 @@ impl> Parser { if self.state == State::End { return Ok((Event::StreamEnd, self.scanner.mark())); } - let (ev, mark) = try!(self.state_machine()); + let (ev, mark) = self.state_machine()?; // println!("EV {:?}", ev); Ok((ev, mark)) } @@ -176,7 +176,7 @@ impl> Parser { multi: bool, ) -> Result<(), ScanError> { if !self.scanner.stream_started() { - let (ev, mark) = try!(self.next()); + let (ev, mark) = self.next()?; assert_eq!(ev, Event::StreamStart); recv.on_event(ev, mark); } @@ -187,14 +187,14 @@ impl> Parser { return Ok(()); } loop { - let (ev, mark) = try!(self.next()); + let (ev, mark) = self.next()?; if ev == Event::StreamEnd { recv.on_event(ev, mark); return Ok(()); } // clear anchors before a new document self.anchors.clear(); - try!(self.load_document(ev, mark, recv)); + self.load_document(ev, mark, recv)?; if !multi { break; } @@ -211,11 +211,11 @@ impl> Parser { assert_eq!(first_ev, Event::DocumentStart); recv.on_event(first_ev, mark); - let (ev, mark) = try!(self.next()); - try!(self.load_node(ev, mark, recv)); + let (ev, mark) = self.next()?; + self.load_node(ev, mark, recv)?; // DOCUMENT-END is expected. - let (ev, mark) = try!(self.next()); + let (ev, mark) = self.next()?; assert_eq!(ev, Event::DocumentEnd); recv.on_event(ev, mark); @@ -249,17 +249,17 @@ impl> Parser { } fn load_mapping(&mut self, recv: &mut R) -> Result<(), ScanError> { - let (mut key_ev, mut key_mark) = try!(self.next()); + let (mut key_ev, mut key_mark) = self.next()?; while key_ev != Event::MappingEnd { // key - try!(self.load_node(key_ev, key_mark, recv)); + self.load_node(key_ev, key_mark, recv)?; // value - let (ev, mark) = try!(self.next()); - try!(self.load_node(ev, mark, recv)); + let (ev, mark) = self.next()?; + self.load_node(ev, mark, recv)?; // next event - let (ev, mark) = try!(self.next()); + let (ev, mark) = self.next()?; key_ev = ev; key_mark = mark; } @@ -268,12 +268,12 @@ impl> Parser { } fn load_sequence(&mut self, recv: &mut R) -> Result<(), ScanError> { - let (mut ev, mut mark) = try!(self.next()); + let (mut ev, mut mark) = self.next()?; while ev != Event::SequenceEnd { - try!(self.load_node(ev, mark, recv)); + self.load_node(ev, mark, recv)?; // next event - let (next_ev, next_mark) = try!(self.next()); + let (next_ev, next_mark) = self.next()?; ev = next_ev; mark = next_mark; } @@ -282,7 +282,7 @@ impl> Parser { } fn state_machine(&mut self) -> ParseResult { - // let next_tok = try!(self.peek_token()); + // let next_tok = self.peek_token()?; // println!("cur_state {:?}, next tok: {:?}", self.state, next_tok); match self.state { State::StreamStart => self.stream_start(), @@ -322,7 +322,7 @@ impl> Parser { } fn stream_start(&mut self) -> ParseResult { - match *try!(self.peek_token()) { + match *self.peek_token()? { Token(mark, TokenType::StreamStart(_)) => { self.state = State::ImplicitDocumentStart; self.skip(); @@ -334,12 +334,12 @@ impl> Parser { fn document_start(&mut self, implicit: bool) -> ParseResult { if !implicit { - while let TokenType::DocumentEnd = try!(self.peek_token()).1 { + while let TokenType::DocumentEnd = self.peek_token()?.1 { self.skip(); } } - match *try!(self.peek_token()) { + match *self.peek_token()? { Token(mark, TokenType::StreamEnd) => { self.state = State::End; self.skip(); @@ -352,7 +352,7 @@ impl> Parser { self._explict_document_start() } Token(mark, _) if implicit => { - try!(self.parser_process_directives()); + self.parser_process_directives()?; self.push_state(State::DocumentEnd); self.state = State::BlockNode; Ok((Event::DocumentStart, mark)) @@ -366,7 +366,7 @@ impl> Parser { fn parser_process_directives(&mut self) -> Result<(), ScanError> { loop { - match try!(self.peek_token()).1 { + match self.peek_token()?.1 { TokenType::VersionDirective(_, _) => { // XXX parsing with warning according to spec //if major != 1 || minor > 2 { @@ -386,8 +386,8 @@ impl> Parser { } fn _explict_document_start(&mut self) -> ParseResult { - try!(self.parser_process_directives()); - match *try!(self.peek_token()) { + self.parser_process_directives()?; + match *self.peek_token()? { Token(mark, TokenType::DocumentStart) => { self.push_state(State::DocumentEnd); self.state = State::DocumentContent; @@ -402,7 +402,7 @@ impl> Parser { } fn document_content(&mut self) -> ParseResult { - match *try!(self.peek_token()) { + match *self.peek_token()? { Token(mark, TokenType::VersionDirective(..)) | Token(mark, TokenType::TagDirective(..)) | Token(mark, TokenType::DocumentStart) @@ -418,7 +418,7 @@ impl> Parser { fn document_end(&mut self) -> ParseResult { let mut _implicit = true; - let marker: Marker = match *try!(self.peek_token()) { + let marker: Marker = match *self.peek_token()? { Token(mark, TokenType::DocumentEnd) => { self.skip(); _implicit = false; @@ -447,7 +447,7 @@ impl> Parser { fn parse_node(&mut self, block: bool, indentless_sequence: bool) -> ParseResult { let mut anchor_id = 0; let mut tag = None; - match *try!(self.peek_token()) { + match *self.peek_token()? { Token(_, TokenType::Alias(_)) => { self.pop_state(); if let Token(mark, TokenType::Alias(name)) = self.fetch_token() { @@ -466,8 +466,8 @@ impl> Parser { } Token(_, TokenType::Anchor(_)) => { if let Token(mark, TokenType::Anchor(name)) = self.fetch_token() { - anchor_id = try!(self.register_anchor(name, &mark)); - if let TokenType::Tag(..) = try!(self.peek_token()).1 { + anchor_id = self.register_anchor(name, &mark)?; + if let TokenType::Tag(..) = self.peek_token()?.1 { if let tg @ TokenType::Tag(..) = self.fetch_token().1 { tag = Some(tg); } else { @@ -481,9 +481,9 @@ impl> Parser { Token(_, TokenType::Tag(..)) => { if let tg @ TokenType::Tag(..) = self.fetch_token().1 { tag = Some(tg); - if let TokenType::Anchor(_) = try!(self.peek_token()).1 { + if let TokenType::Anchor(_) = self.peek_token()?.1 { if let Token(mark, TokenType::Anchor(name)) = self.fetch_token() { - anchor_id = try!(self.register_anchor(name, &mark)); + anchor_id = self.register_anchor(name, &mark)?; } else { unreachable!() } @@ -494,7 +494,7 @@ impl> Parser { } _ => {} } - match *try!(self.peek_token()) { + match *self.peek_token()? { Token(mark, TokenType::BlockEntry) if indentless_sequence => { self.state = State::IndentlessSequenceEntry; Ok((Event::SequenceStart(anchor_id), mark)) @@ -538,14 +538,14 @@ impl> Parser { fn block_mapping_key(&mut self, first: bool) -> ParseResult { // skip BlockMappingStart if first { - let _ = try!(self.peek_token()); + let _ = self.peek_token()?; //self.marks.push(tok.0); self.skip(); } - match *try!(self.peek_token()) { + match *self.peek_token()? { Token(_, TokenType::Key) => { self.skip(); - match *try!(self.peek_token()) { + match *self.peek_token()? { Token(mark, TokenType::Key) | Token(mark, TokenType::Value) | Token(mark, TokenType::BlockEnd) => { @@ -577,10 +577,10 @@ impl> Parser { } fn block_mapping_value(&mut self) -> ParseResult { - match *try!(self.peek_token()) { + match *self.peek_token()? { Token(_, TokenType::Value) => { self.skip(); - match *try!(self.peek_token()) { + match *self.peek_token()? { Token(mark, TokenType::Key) | Token(mark, TokenType::Value) | Token(mark, TokenType::BlockEnd) => { @@ -604,26 +604,26 @@ impl> Parser { fn flow_mapping_key(&mut self, first: bool) -> ParseResult { if first { - let _ = try!(self.peek_token()); + let _ = self.peek_token()?; self.skip(); } let marker: Marker = { - match *try!(self.peek_token()) { + match *self.peek_token()? { Token(mark, TokenType::FlowMappingEnd) => mark, Token(mark, _) => { if !first { - match *try!(self.peek_token()) { + match *self.peek_token()? { Token(_, TokenType::FlowEntry) => self.skip(), Token(mark, _) => return Err(ScanError::new(mark, "while parsing a flow mapping, did not find expected ',' or '}'")) } } - match *try!(self.peek_token()) { + match *self.peek_token()? { Token(_, TokenType::Key) => { self.skip(); - match *try!(self.peek_token()) { + match *self.peek_token()? { Token(mark, TokenType::Value) | Token(mark, TokenType::FlowEntry) | Token(mark, TokenType::FlowMappingEnd) => { @@ -660,14 +660,14 @@ impl> Parser { fn flow_mapping_value(&mut self, empty: bool) -> ParseResult { let mark: Marker = { if empty { - let Token(mark, _) = *try!(self.peek_token()); + let Token(mark, _) = *self.peek_token()?; self.state = State::FlowMappingKey; return Ok((Event::empty_scalar(), mark)); } else { - match *try!(self.peek_token()) { + match *self.peek_token()? { Token(marker, TokenType::Value) => { self.skip(); - match try!(self.peek_token()).1 { + match self.peek_token()?.1 { TokenType::FlowEntry | TokenType::FlowMappingEnd => {} _ => { self.push_state(State::FlowMappingKey); @@ -688,11 +688,11 @@ impl> Parser { fn flow_sequence_entry(&mut self, first: bool) -> ParseResult { // skip FlowMappingStart if first { - let _ = try!(self.peek_token()); + let _ = self.peek_token()?; //self.marks.push(tok.0); self.skip(); } - match *try!(self.peek_token()) { + match *self.peek_token()? { Token(mark, TokenType::FlowSequenceEnd) => { self.pop_state(); self.skip(); @@ -709,7 +709,7 @@ impl> Parser { } _ => { /* next */ } } - match *try!(self.peek_token()) { + match *self.peek_token()? { Token(mark, TokenType::FlowSequenceEnd) => { self.pop_state(); self.skip(); @@ -728,7 +728,7 @@ impl> Parser { } fn indentless_sequence_entry(&mut self) -> ParseResult { - match *try!(self.peek_token()) { + match *self.peek_token()? { Token(_, TokenType::BlockEntry) => (), Token(mark, _) => { self.pop_state(); @@ -736,7 +736,7 @@ impl> Parser { } } self.skip(); - match *try!(self.peek_token()) { + match *self.peek_token()? { Token(mark, TokenType::BlockEntry) | Token(mark, TokenType::Key) | Token(mark, TokenType::Value) @@ -754,11 +754,11 @@ impl> Parser { fn block_sequence_entry(&mut self, first: bool) -> ParseResult { // BLOCK-SEQUENCE-START if first { - let _ = try!(self.peek_token()); + let _ = self.peek_token()?; //self.marks.push(tok.0); self.skip(); } - match *try!(self.peek_token()) { + match *self.peek_token()? { Token(mark, TokenType::BlockEnd) => { self.pop_state(); self.skip(); @@ -766,7 +766,7 @@ impl> Parser { } Token(_, TokenType::BlockEntry) => { self.skip(); - match *try!(self.peek_token()) { + match *self.peek_token()? { Token(mark, TokenType::BlockEntry) | Token(mark, TokenType::BlockEnd) => { self.state = State::BlockSequenceEntry; Ok((Event::empty_scalar(), mark)) @@ -785,7 +785,7 @@ impl> Parser { } fn flow_sequence_entry_mapping_key(&mut self) -> ParseResult { - match *try!(self.peek_token()) { + match *self.peek_token()? { Token(mark, TokenType::Value) | Token(mark, TokenType::FlowEntry) | Token(mark, TokenType::FlowSequenceEnd) => { @@ -801,11 +801,11 @@ impl> Parser { } fn flow_sequence_entry_mapping_value(&mut self) -> ParseResult { - match *try!(self.peek_token()) { + match *self.peek_token()? { Token(_, TokenType::Value) => { self.skip(); self.state = State::FlowSequenceEntryMappingValue; - match *try!(self.peek_token()) { + match *self.peek_token()? { Token(mark, TokenType::FlowEntry) | Token(mark, TokenType::FlowSequenceEnd) => { self.state = State::FlowSequenceEntryMappingEnd; Ok((Event::empty_scalar(), mark)) diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index d4b8774..b8ade61 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -341,7 +341,7 @@ impl> Scanner { } self.skip_to_next_token(); - try!(self.stale_simple_keys()); + self.stale_simple_keys()?; let mark = self.mark; self.unroll_indent(mark.col as isize); @@ -349,7 +349,7 @@ impl> Scanner { self.lookahead(4); if is_z(self.ch()) { - try!(self.fetch_stream_end()); + self.fetch_stream_end()?; return Ok(()); } @@ -364,7 +364,7 @@ impl> Scanner { && self.buffer[2] == '-' && is_blankz(self.buffer[3]) { - try!(self.fetch_document_indicator(TokenType::DocumentStart)); + self.fetch_document_indicator(TokenType::DocumentStart)?; return Ok(()); } @@ -374,7 +374,7 @@ impl> Scanner { && self.buffer[2] == '.' && is_blankz(self.buffer[3]) { - try!(self.fetch_document_indicator(TokenType::DocumentEnd)); + self.fetch_document_indicator(TokenType::DocumentEnd)?; return Ok(()); } @@ -417,7 +417,7 @@ impl> Scanner { } if !self.token_available { - try!(self.fetch_more_tokens()); + self.fetch_more_tokens()?; } let t = self.tokens.pop_front().unwrap(); self.token_available = false; @@ -436,7 +436,7 @@ impl> Scanner { if self.tokens.is_empty() { need_more = true; } else { - try!(self.stale_simple_keys()); + self.stale_simple_keys()?; for sk in &self.simple_keys { if sk.possible && sk.token_number == self.tokens_parsed { need_more = true; @@ -448,7 +448,7 @@ impl> Scanner { if !need_more { break; } - try!(self.fetch_next_token()); + self.fetch_next_token()?; } self.token_available = true; @@ -510,7 +510,7 @@ impl> Scanner { } self.unroll_indent(-1); - try!(self.remove_simple_key()); + self.remove_simple_key()?; self.disallow_simple_key(); self.tokens @@ -520,11 +520,11 @@ impl> Scanner { fn fetch_directive(&mut self) -> ScanResult { self.unroll_indent(-1); - try!(self.remove_simple_key()); + self.remove_simple_key()?; self.disallow_simple_key(); - let tok = try!(self.scan_directive()); + let tok = self.scan_directive()?; self.tokens.push_back(tok); @@ -535,10 +535,10 @@ impl> Scanner { let start_mark = self.mark; self.skip(); - let name = try!(self.scan_directive_name()); + let name = self.scan_directive_name()?; let tok = match name.as_ref() { - "YAML" => try!(self.scan_version_directive_value(&start_mark)), - "TAG" => try!(self.scan_tag_directive_value(&start_mark)), + "YAML" => self.scan_version_directive_value(&start_mark)?, + "TAG" => self.scan_tag_directive_value(&start_mark)?, // XXX This should be a warning instead of an error _ => { // skip current line @@ -594,7 +594,7 @@ impl> Scanner { self.lookahead(1); } - let major = try!(self.scan_version_directive_number(mark)); + let major = self.scan_version_directive_number(mark)?; if self.ch() != '.' { return Err(ScanError::new( @@ -605,7 +605,7 @@ impl> Scanner { self.skip(); - let minor = try!(self.scan_version_directive_number(mark)); + let minor = self.scan_version_directive_number(mark)?; Ok(Token(*mark, TokenType::VersionDirective(major, minor))) } @@ -671,7 +671,7 @@ impl> Scanner { self.skip(); self.lookahead(1); } - let handle = try!(self.scan_tag_handle(true, mark)); + let handle = self.scan_tag_handle(true, mark)?; self.lookahead(1); /* Eat whitespaces. */ @@ -681,7 +681,7 @@ impl> Scanner { } let is_secondary = handle == "!!"; - let prefix = try!(self.scan_tag_uri(true, is_secondary, &String::new(), mark)); + let prefix = self.scan_tag_uri(true, is_secondary, &String::new(), mark)?; self.lookahead(1); @@ -696,10 +696,10 @@ impl> Scanner { } fn fetch_tag(&mut self) -> ScanResult { - try!(self.save_simple_key()); + self.save_simple_key()?; self.disallow_simple_key(); - let tok = try!(self.scan_tag()); + let tok = self.scan_tag()?; self.tokens.push_back(tok); Ok(()) } @@ -717,7 +717,7 @@ impl> Scanner { // Eat '!<' self.skip(); self.skip(); - suffix = try!(self.scan_tag_uri(false, false, &String::new(), &start_mark)); + suffix = self.scan_tag_uri(false, false, &String::new(), &start_mark)?; if self.ch() != '>' { return Err(ScanError::new( @@ -729,15 +729,15 @@ impl> Scanner { self.skip(); } else { // The tag has either the '!suffix' or the '!handle!suffix' - handle = try!(self.scan_tag_handle(false, &start_mark)); + handle = self.scan_tag_handle(false, &start_mark)?; // Check if it is, indeed, handle. if handle.len() >= 2 && handle.starts_with('!') && handle.ends_with('!') { if handle == "!!" { secondary = true; } - suffix = try!(self.scan_tag_uri(false, secondary, &String::new(), &start_mark)); + suffix = self.scan_tag_uri(false, secondary, &String::new(), &start_mark)?; } else { - suffix = try!(self.scan_tag_uri(false, false, &handle, &start_mark)); + suffix = self.scan_tag_uri(false, false, &handle, &start_mark)?; handle = "!".to_owned(); // A special case: the '!' tag. Set the handle to '' and the // suffix to '!'. @@ -829,7 +829,7 @@ impl> Scanner { } { // Check if it is a URI-escape sequence. if self.ch() == '%' { - string.push(try!(self.scan_uri_escapes(directive, mark))); + string.push(self.scan_uri_escapes(directive, mark)?); } else { string.push(self.ch()); self.skip(); @@ -907,10 +907,10 @@ impl> Scanner { } fn fetch_anchor(&mut self, alias: bool) -> ScanResult { - try!(self.save_simple_key()); + self.save_simple_key()?; self.disallow_simple_key(); - let tok = try!(self.scan_anchor(alias)); + let tok = self.scan_anchor(alias)?; self.tokens.push_back(tok); @@ -946,7 +946,7 @@ impl> Scanner { fn fetch_flow_collection_start(&mut self, tok: TokenType) -> ScanResult { // The indicators '[' and '{' may start a simple key. - try!(self.save_simple_key()); + self.save_simple_key()?; self.increase_flow_level()?; @@ -960,7 +960,7 @@ impl> Scanner { } fn fetch_flow_collection_end(&mut self, tok: TokenType) -> ScanResult { - try!(self.remove_simple_key()); + self.remove_simple_key()?; self.decrease_flow_level(); self.disallow_simple_key(); @@ -973,7 +973,7 @@ impl> Scanner { } fn fetch_flow_entry(&mut self) -> ScanResult { - try!(self.remove_simple_key()); + self.remove_simple_key()?; self.allow_simple_key(); let start_mark = self.mark; @@ -1019,7 +1019,7 @@ impl> Scanner { r#""-" is only valid inside a block"#, )); } - try!(self.remove_simple_key()); + self.remove_simple_key()?; self.allow_simple_key(); let start_mark = self.mark; @@ -1032,7 +1032,7 @@ impl> Scanner { fn fetch_document_indicator(&mut self, t: TokenType) -> ScanResult { self.unroll_indent(-1); - try!(self.remove_simple_key()); + self.remove_simple_key()?; self.disallow_simple_key(); let mark = self.mark; @@ -1046,9 +1046,9 @@ impl> Scanner { } fn fetch_block_scalar(&mut self, literal: bool) -> ScanResult { - try!(self.save_simple_key()); + self.save_simple_key()?; self.allow_simple_key(); - let tok = try!(self.scan_block_scalar(literal)); + let tok = self.scan_block_scalar(literal)?; self.tokens.push_back(tok); Ok(()) @@ -1145,7 +1145,7 @@ impl> Scanner { } } // Scan the leading line breaks and determine the indentation level if needed. - try!(self.block_scalar_breaks(&mut indent, &mut trailing_breaks)); + self.block_scalar_breaks(&mut indent, &mut trailing_breaks)?; self.lookahead(1); @@ -1183,7 +1183,7 @@ impl> Scanner { self.read_break(&mut leading_break); // Eat the following intendation spaces and line breaks. - try!(self.block_scalar_breaks(&mut indent, &mut trailing_breaks)); + self.block_scalar_breaks(&mut indent, &mut trailing_breaks)?; } // Chomp the tail. @@ -1249,10 +1249,10 @@ impl> Scanner { } fn fetch_flow_scalar(&mut self, single: bool) -> ScanResult { - try!(self.save_simple_key()); + self.save_simple_key()?; self.disallow_simple_key(); - let tok = try!(self.scan_flow_scalar(single)); + let tok = self.scan_flow_scalar(single)?; self.tokens.push_back(tok); Ok(()) @@ -1457,10 +1457,10 @@ impl> Scanner { } fn fetch_plain_scalar(&mut self) -> ScanResult { - try!(self.save_simple_key()); + self.save_simple_key()?; self.disallow_simple_key(); - let tok = try!(self.scan_plain_scalar()); + let tok = self.scan_plain_scalar()?; self.tokens.push_back(tok); Ok(()) @@ -1603,7 +1603,7 @@ impl> Scanner { ); } - try!(self.remove_simple_key()); + self.remove_simple_key()?; if self.flow_level == 0 { self.allow_simple_key(); @@ -1699,7 +1699,7 @@ impl> Scanner { sk.required = required; sk.token_number = self.tokens_parsed + self.tokens.len(); - try!(self.remove_simple_key()); + self.remove_simple_key()?; self.simple_keys.pop(); self.simple_keys.push(sk); diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index 7e14435..e2acf39 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -194,7 +194,7 @@ impl YamlLoader { anchor_map: BTreeMap::new(), }; let mut parser = Parser::new(source.chars()); - try!(parser.load(&mut loader, true)); + parser.load(&mut loader, true)?; Ok(loader.docs) } } From 9fc1b3f22da77632518d94243354e24dcc5a7a1f Mon Sep 17 00:00:00 2001 From: David Tolnay Date: Sun, 16 Sep 2018 00:00:48 -0700 Subject: [PATCH 181/380] Address write_with_newline lint --- saphyr/src/emitter.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/saphyr/src/emitter.rs b/saphyr/src/emitter.rs index 7806922..8a0b60a 100644 --- a/saphyr/src/emitter.rs +++ b/saphyr/src/emitter.rs @@ -139,7 +139,7 @@ impl<'a> YamlEmitter<'a> { pub fn dump(&mut self, doc: &Yaml) -> EmitResult { // write DocumentStart - write!(self.writer, "---\n")?; + writeln!(self.writer, "---")?; self.level = -1; self.emit_node(doc) } @@ -200,7 +200,7 @@ impl<'a> YamlEmitter<'a> { self.level += 1; for (cnt, x) in v.iter().enumerate() { if cnt > 0 { - write!(self.writer, "\n")?; + writeln!(self.writer)?; self.write_indent()?; } write!(self.writer, "-")?; @@ -222,13 +222,13 @@ impl<'a> YamlEmitter<'a> { _ => false, }; if cnt > 0 { - write!(self.writer, "\n")?; + writeln!(self.writer)?; self.write_indent()?; } if complex_key { write!(self.writer, "?")?; self.emit_val(true, k)?; - write!(self.writer, "\n")?; + writeln!(self.writer)?; self.write_indent()?; write!(self.writer, ":")?; self.emit_val(true, v)?; @@ -253,7 +253,7 @@ impl<'a> YamlEmitter<'a> { if (inline && self.compact) || v.is_empty() { write!(self.writer, " ")?; } else { - write!(self.writer, "\n")?; + writeln!(self.writer)?; self.level += 1; self.write_indent()?; self.level -= 1; @@ -264,7 +264,7 @@ impl<'a> YamlEmitter<'a> { if (inline && self.compact) || h.is_empty() { write!(self.writer, " ")?; } else { - write!(self.writer, "\n")?; + writeln!(self.writer)?; self.level += 1; self.write_indent()?; self.level -= 1; From 3f8a60627fe15158e24d8c72c4127845a5476bf3 Mon Sep 17 00:00:00 2001 From: Tibo Delor Date: Thu, 13 Dec 2018 18:35:01 +1100 Subject: [PATCH 182/380] Format using rustfmt 1.0 --- saphyr/src/emitter.rs | 2 +- saphyr/src/scanner.rs | 22 +++++++++++++--------- saphyr/src/yaml.rs | 15 ++++++++++----- 3 files changed, 24 insertions(+), 15 deletions(-) diff --git a/saphyr/src/emitter.rs b/saphyr/src/emitter.rs index 8a0b60a..09e9f87 100644 --- a/saphyr/src/emitter.rs +++ b/saphyr/src/emitter.rs @@ -334,7 +334,7 @@ fn need_quotes(string: &str) -> bool { // http://yaml.org/type/null.html "null", "Null", "NULL", "~", ] - .contains(&string) + .contains(&string) || string.starts_with('.') || string.parse::().is_ok() || string.parse::().is_ok() diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index b8ade61..4eb7912 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -483,10 +483,12 @@ impl> Scanner { self.allow_simple_key(); } } - '#' => while !is_breakz(self.ch()) { - self.skip(); - self.lookahead(1); - }, + '#' => { + while !is_breakz(self.ch()) { + self.skip(); + self.lookahead(1); + } + } _ => break, } } @@ -929,11 +931,13 @@ impl> Scanner { self.lookahead(1); } - if string.is_empty() || match self.ch() { - c if is_blankz(c) => false, - '?' | ':' | ',' | ']' | '}' | '%' | '@' | '`' => false, - _ => true, - } { + if string.is_empty() + || match self.ch() { + c if is_blankz(c) => false, + '?' | ':' | ',' | ']' | '}' | '%' | '@' | '`' => false, + _ => true, + } + { return Err(ScanError::new(start_mark, "while scanning an anchor or alias, did not find expected alphabetic or numeric character")); } diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index e2acf39..6ccbec3 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -409,7 +409,8 @@ a4: a5: 'single_quoted' a6: \"double_quoted\" a7: 你好 -".to_owned(); +" + .to_owned(); let out = YamlLoader::load_from_str(&s).unwrap(); let doc = &out[0]; assert_eq!(doc["a7"].as_str().unwrap(), "你好"); @@ -652,7 +653,8 @@ hash: with: indentations "#, - ).unwrap() + ) + .unwrap() .into_iter() .next() .unwrap(); @@ -663,7 +665,8 @@ hash: with: indentations "#, - ).unwrap() + ) + .unwrap() .into_iter() .next() .unwrap(); @@ -674,7 +677,8 @@ hash: with: indentations "#, - ).unwrap() + ) + .unwrap() .into_iter() .next() .unwrap(); @@ -685,7 +689,8 @@ hash: with: indentations "#, - ).unwrap() + ) + .unwrap() .into_iter() .next() .unwrap(); From f6cbf1bbd3cdf604b022d810f0793c80ff814ae9 Mon Sep 17 00:00:00 2001 From: Tibo Delor Date: Thu, 13 Dec 2018 19:02:54 +1100 Subject: [PATCH 183/380] Bump minimum version for test to 1.24.1 quickcheckv0.7.2 comes with lazy_static v1.2.0 which only support Rust 1.24.1. See https://github.com/rust-lang-nursery/lazy-static.rs/releases/tag/1.2.0 --- saphyr/.travis.yml | 2 +- saphyr/appveyor.yml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/saphyr/.travis.yml b/saphyr/.travis.yml index 873ded1..908b925 100644 --- a/saphyr/.travis.yml +++ b/saphyr/.travis.yml @@ -7,7 +7,7 @@ matrix: - rust: nightly - rust: 1.17.0 script: cargo build - - rust: 1.22.1 + - rust: 1.24.1 - rust: nightly env: CLIPPY script: | diff --git a/saphyr/appveyor.yml b/saphyr/appveyor.yml index 8f2e88c..d6707aa 100644 --- a/saphyr/appveyor.yml +++ b/saphyr/appveyor.yml @@ -1,6 +1,6 @@ install: - - ps: Start-FileDownload 'https://static.rust-lang.org/dist/rust-1.22.1-i686-pc-windows-gnu.exe' - - rust-1.22.1-i686-pc-windows-gnu.exe /VERYSILENT /NORESTART /DIR="C:\Program Files (x86)\Rust" + - ps: Start-FileDownload 'https://static.rust-lang.org/dist/rust-1.24.1-i686-pc-windows-gnu.exe' + - rust-1.24.1-i686-pc-windows-gnu.exe /VERYSILENT /NORESTART /DIR="C:\Program Files (x86)\Rust" - SET PATH=%PATH%;C:\Program Files (x86)\Rust\bin - SET PATH=%PATH%;C:\MinGW\bin - rustc -V From 3ae23d47def920810230ea559f48ad9c0a4a67b2 Mon Sep 17 00:00:00 2001 From: Robin Stocker Date: Wed, 20 Feb 2019 18:23:31 +1100 Subject: [PATCH 184/380] Fix handling of indicators in plain scalars to conform to YAML 1.2 YAML 1.2 has special handling of indicators to be compatible with JSON. The following is equivalent to `{"a": "b"}` (note, no space after `:`): {"a":b} But without the quoted key, a space is required. So the `:` here is part of the plain scalar: {a:b} # == {"a:b"} A plain scalar can also start with a `:` as long as it's followed by "safe" characters: {a: :b} # == {"a": ":b"} (Fixes #118) --- saphyr/src/scanner.rs | 103 +++++++++++++++++++++++++++++++++++++----- saphyr/src/yaml.rs | 2 +- 2 files changed, 93 insertions(+), 12 deletions(-) diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index 4eb7912..6f4fa58 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -146,6 +146,7 @@ pub struct Scanner { stream_start_produced: bool, stream_end_produced: bool, + adjacent_value_allowed_at: usize, simple_key_allowed: bool, simple_keys: Vec, indent: isize, @@ -216,6 +217,13 @@ fn as_hex(c: char) -> u32 { _ => unreachable!(), } } +#[inline] +fn is_flow(c: char) -> bool { + match c { + ',' | '[' | ']' | '{' | '}' => true, + _ => false, + } +} pub type ScanResult = Result<(), ScanError>; @@ -231,6 +239,7 @@ impl> Scanner { stream_start_produced: false, stream_end_produced: false, + adjacent_value_allowed_at: 0, simple_key_allowed: true, simple_keys: Vec::new(), indent: -1, @@ -387,8 +396,13 @@ impl> Scanner { '}' => self.fetch_flow_collection_end(TokenType::FlowMappingEnd), ',' => self.fetch_flow_entry(), '-' if is_blankz(nc) => self.fetch_block_entry(), - '?' if self.flow_level > 0 || is_blankz(nc) => self.fetch_key(), - ':' if self.flow_level > 0 || is_blankz(nc) => self.fetch_value(), + '?' if is_blankz(nc) => self.fetch_key(), + ':' if is_blankz(nc) + || (self.flow_level > 0 + && (is_flow(nc) || self.mark.index == self.adjacent_value_allowed_at)) => + { + self.fetch_value() + } // Is it an alias? '*' => self.fetch_anchor(true), // Is it an anchor? @@ -1258,6 +1272,10 @@ impl> Scanner { let tok = self.scan_flow_scalar(single)?; + // From spec: To ensure JSON compatibility, if a key inside a flow mapping is JSON-like, + // YAML allows the following value to be specified adjacent to the “:”. + self.adjacent_value_allowed_at = self.mark.index; + self.tokens.push_back(tok); Ok(()) } @@ -1498,16 +1516,14 @@ impl> Scanner { break; } while !is_blankz(self.ch()) { - if self.flow_level > 0 && self.ch() == ':' && is_blankz(self.ch()) { - return Err(ScanError::new( - start_mark, - "while scanning a plain scalar, found unexpected ':'", - )); - } - // indicators ends a plain scalar + // indicators can end a plain scalar, see 7.3.3. Plain Style match self.ch() { - ':' if is_blankz(self.buffer[1]) => break, - ',' | ':' | '?' | '[' | ']' | '{' | '}' if self.flow_level > 0 => break, + ':' if is_blankz(self.buffer[1]) + || (self.flow_level > 0 && is_flow(self.buffer[1])) => + { + break; + } + ',' | '[' | ']' | '{' | '}' if self.flow_level > 0 => break, _ => {} } @@ -2073,6 +2089,71 @@ key: end!(p); } + #[test] + fn test_plain_scalar_starting_with_indicators_in_flow() { + // "Plain scalars must not begin with most indicators, as this would cause ambiguity with + // other YAML constructs. However, the “:”, “?” and “-” indicators may be used as the first + // character if followed by a non-space “safe” character, as this causes no ambiguity." + + let s = "{a: :b}"; + let mut p = Scanner::new(s.chars()); + next!(p, StreamStart(..)); + next!(p, FlowMappingStart); + next!(p, Key); + next_scalar!(p, TScalarStyle::Plain, "a"); + next!(p, Value); + next_scalar!(p, TScalarStyle::Plain, ":b"); + next!(p, FlowMappingEnd); + next!(p, StreamEnd); + end!(p); + + let s = "{a: ?b}"; + let mut p = Scanner::new(s.chars()); + next!(p, StreamStart(..)); + next!(p, FlowMappingStart); + next!(p, Key); + next_scalar!(p, TScalarStyle::Plain, "a"); + next!(p, Value); + next_scalar!(p, TScalarStyle::Plain, "?b"); + next!(p, FlowMappingEnd); + next!(p, StreamEnd); + end!(p); + } + + #[test] + fn test_plain_scalar_starting_with_indicators_in_block() { + let s = ":a"; + let mut p = Scanner::new(s.chars()); + next!(p, StreamStart(..)); + next_scalar!(p, TScalarStyle::Plain, ":a"); + next!(p, StreamEnd); + end!(p); + + let s = "?a"; + let mut p = Scanner::new(s.chars()); + next!(p, StreamStart(..)); + next_scalar!(p, TScalarStyle::Plain, "?a"); + next!(p, StreamEnd); + end!(p); + } + + #[test] + fn test_plain_scalar_containing_indicators_in_block() { + let s = "a:,b"; + let mut p = Scanner::new(s.chars()); + next!(p, StreamStart(..)); + next_scalar!(p, TScalarStyle::Plain, "a:,b"); + next!(p, StreamEnd); + end!(p); + + let s = ":,b"; + let mut p = Scanner::new(s.chars()); + next!(p, StreamStart(..)); + next_scalar!(p, TScalarStyle::Plain, ":,b"); + next!(p, StreamEnd); + end!(p); + } + #[test] fn test_scanner_cr() { let s = "---\r\n- tok1\r\n- tok2"; diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index 6ccbec3..fe112cc 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -40,7 +40,7 @@ pub enum Yaml { Array(self::Array), /// YAML hash, can be accessed as a `LinkedHashMap`. /// - /// Itertion order will match the order of insertion into the map. + /// Insertion order will match the order of insertion into the map. Hash(self::Hash), /// Alias, not fully supported yet. Alias(usize), From 508ed37d05209f064a45345920cd3c2ddbbc45c6 Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Thu, 7 Mar 2019 14:27:03 +0800 Subject: [PATCH 185/380] Bump to v0.4.3 --- saphyr/Cargo.toml | 2 +- saphyr/src/lib.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/saphyr/Cargo.toml b/saphyr/Cargo.toml index d251b41..7bc449a 100644 --- a/saphyr/Cargo.toml +++ b/saphyr/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "yaml-rust" -version = "0.4.2" # remember to update html_root_url +version = "0.4.3" # remember to update html_root_url authors = ["Yuheng Chen "] homepage = "http://chyh1990.github.io/yaml-rust/" documentation = "https://docs.rs/yaml-rust" diff --git a/saphyr/src/lib.rs b/saphyr/src/lib.rs index b65c973..40cff18 100644 --- a/saphyr/src/lib.rs +++ b/saphyr/src/lib.rs @@ -36,7 +36,7 @@ //! //! ``` -#![doc(html_root_url = "https://docs.rs/yaml-rust/0.4.2")] +#![doc(html_root_url = "https://docs.rs/yaml-rust/0.4.3")] #![cfg_attr(feature = "cargo-clippy", allow(renamed_and_removed_lints))] #![cfg_attr(feature = "cargo-clippy", warn(cyclomatic_complexity))] #![cfg_attr( From de742fbcd6ca9ab017fcf6c89a1c64da299cd7bf Mon Sep 17 00:00:00 2001 From: Hendrik Sollich Date: Wed, 24 Jul 2019 16:36:15 +0200 Subject: [PATCH 186/380] Fix emitting hexlike strings without quotes The emitter omitted quotes for strings that start with `0x` those would subsequently be parsed as strings again. This should fix #133. --- saphyr/src/emitter.rs | 1 + saphyr/tests/test_round_trip.rs | 54 +++++++++++++++++++++++++++++---- 2 files changed, 49 insertions(+), 6 deletions(-) diff --git a/saphyr/src/emitter.rs b/saphyr/src/emitter.rs index 09e9f87..872d1c8 100644 --- a/saphyr/src/emitter.rs +++ b/saphyr/src/emitter.rs @@ -336,6 +336,7 @@ fn need_quotes(string: &str) -> bool { ] .contains(&string) || string.starts_with('.') + || string.starts_with("0x") || string.parse::().is_ok() || string.parse::().is_ok() } diff --git a/saphyr/tests/test_round_trip.rs b/saphyr/tests/test_round_trip.rs index bfa9602..dc5e85e 100644 --- a/saphyr/tests/test_round_trip.rs +++ b/saphyr/tests/test_round_trip.rs @@ -2,22 +2,64 @@ extern crate yaml_rust; use yaml_rust::{Yaml, YamlEmitter, YamlLoader}; -fn test_round_trip(original: &Yaml) { - let mut out = String::new(); - YamlEmitter::new(&mut out).dump(original).unwrap(); - let documents = YamlLoader::load_from_str(&out).unwrap(); +fn roundtrip(original: &Yaml) { + let mut emitted = String::new(); + YamlEmitter::new(&mut emitted).dump(original).unwrap(); + + let documents = YamlLoader::load_from_str(&emitted).unwrap(); + println!("emitted {}", emitted); + assert_eq!(documents.len(), 1); assert_eq!(documents[0], *original); } +fn double_roundtrip(original: &str) { + let parsed = YamlLoader::load_from_str(&original).unwrap(); + + let mut serialized = String::new(); + YamlEmitter::new(&mut serialized).dump(&parsed[0]).unwrap(); + + let reparsed = YamlLoader::load_from_str(&serialized).unwrap(); + + assert_eq!(parsed, reparsed); +} + #[test] fn test_escape_character() { let y = Yaml::String("\x1b".to_owned()); - test_round_trip(&y); + roundtrip(&y); } #[test] fn test_colon_in_string() { let y = Yaml::String("x: %".to_owned()); - test_round_trip(&y); + roundtrip(&y); +} + +#[test] +fn test_numberlike_strings() { + let docs = [ + r#"x: "1234""#, r#"x: "01234""#, r#""1234""#, + r#""01234""#, r#"" 01234""#, r#""0x1234""#, + r#"" 0x1234""#, + ]; + + for doc in &docs { + roundtrip(&Yaml::String(doc.to_string())); + double_roundtrip(&doc); + } +} + +/// Example from https://github.com/chyh1990/yaml-rust/issues/133 +#[test] +fn test_issue133() { + + let doc = YamlLoader::load_from_str("\"0x123\"").unwrap().pop().unwrap(); + assert_eq!(doc, Yaml::String("0x123".to_string())); + + let mut out_str = String::new(); + YamlEmitter::new(&mut out_str).dump(&doc).unwrap(); + let doc2 = YamlLoader::load_from_str(&out_str).unwrap().pop().unwrap(); + assert_eq!(doc, doc2); // This failed because the type has changed to a number now + } From 32214e92bfd8ab3e3a5421877f102291fd9b1df3 Mon Sep 17 00:00:00 2001 From: Hendrik Sollich Date: Wed, 24 Jul 2019 18:25:10 +0200 Subject: [PATCH 187/380] Raise minimum rustc version to 1.28 (Fixes CI) Known minimum version to build [regex](https://crates.io/crates/regex). Old enough as it is, we're not c++ here! --- saphyr/.travis.yml | 4 +--- saphyr/README.md | 4 ++++ saphyr/appveyor.yml | 4 ++-- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/saphyr/.travis.yml b/saphyr/.travis.yml index 908b925..a640b6e 100644 --- a/saphyr/.travis.yml +++ b/saphyr/.travis.yml @@ -5,9 +5,7 @@ matrix: - rust: stable - rust: beta - rust: nightly - - rust: 1.17.0 - script: cargo build - - rust: 1.24.1 + - rust: 1.28.0 - rust: nightly env: CLIPPY script: | diff --git a/saphyr/README.md b/saphyr/README.md index 8517ad3..5abb430 100644 --- a/saphyr/README.md +++ b/saphyr/README.md @@ -103,6 +103,10 @@ so it may not be a huge problem for most users. * Tag directive * Alias while desearilization +## Minimum Rust version policy + +This crate's minimum supported `rustc` version is 1.28, as this is the currently known minimum version for [`regex`](https://crates.io/crates/regex#minimum-rust-version-policy) as well. + ## License Licensed under either of diff --git a/saphyr/appveyor.yml b/saphyr/appveyor.yml index d6707aa..9d0a4f4 100644 --- a/saphyr/appveyor.yml +++ b/saphyr/appveyor.yml @@ -1,6 +1,6 @@ install: - - ps: Start-FileDownload 'https://static.rust-lang.org/dist/rust-1.24.1-i686-pc-windows-gnu.exe' - - rust-1.24.1-i686-pc-windows-gnu.exe /VERYSILENT /NORESTART /DIR="C:\Program Files (x86)\Rust" + - ps: Start-FileDownload 'https://static.rust-lang.org/dist/rust-1.28.0-i686-pc-windows-gnu.exe' + - rust-1.28.0-i686-pc-windows-gnu.exe /VERYSILENT /NORESTART /DIR="C:\Program Files (x86)\Rust" - SET PATH=%PATH%;C:\Program Files (x86)\Rust\bin - SET PATH=%PATH%;C:\MinGW\bin - rustc -V From 5170d0374dc80f6dae0b60fc0db581e38efd42d3 Mon Sep 17 00:00:00 2001 From: Anton Kochkov Date: Wed, 27 May 2020 14:15:28 +0800 Subject: [PATCH 188/380] Rust 2018 transition --- saphyr/src/emitter.rs | 18 +++++++++--------- saphyr/src/lib.rs | 8 ++++---- saphyr/src/parser.rs | 2 +- saphyr/src/scanner.rs | 10 +++++----- saphyr/src/yaml.rs | 6 +++--- 5 files changed, 22 insertions(+), 22 deletions(-) diff --git a/saphyr/src/emitter.rs b/saphyr/src/emitter.rs index 872d1c8..d461001 100644 --- a/saphyr/src/emitter.rs +++ b/saphyr/src/emitter.rs @@ -1,7 +1,7 @@ use std::convert::From; use std::error::Error; use std::fmt::{self, Display}; -use yaml::{Hash, Yaml}; +use crate::yaml::{Hash, Yaml}; #[derive(Copy, Clone, Debug)] pub enum EmitError { @@ -17,7 +17,7 @@ impl Error for EmitError { } } - fn cause(&self) -> Option<&Error> { + fn cause(&self) -> Option<&dyn Error> { None } } @@ -38,7 +38,7 @@ impl From for EmitError { } pub struct YamlEmitter<'a> { - writer: &'a mut fmt::Write, + writer: &'a mut dyn fmt::Write, best_indent: usize, compact: bool, @@ -48,7 +48,7 @@ pub struct YamlEmitter<'a> { pub type EmitResult = Result<(), EmitError>; // from serialize::json -fn escape_str(wr: &mut fmt::Write, v: &str) -> Result<(), fmt::Error> { +fn escape_str(wr: &mut dyn fmt::Write, v: &str) -> Result<(), fmt::Error> { wr.write_str("\"")?; let mut start = 0; @@ -111,7 +111,7 @@ fn escape_str(wr: &mut fmt::Write, v: &str) -> Result<(), fmt::Error> { } impl<'a> YamlEmitter<'a> { - pub fn new(writer: &'a mut fmt::Write) -> YamlEmitter { + pub fn new(writer: &'a mut dyn fmt::Write) -> YamlEmitter { YamlEmitter { writer, best_indent: 2, @@ -316,12 +316,12 @@ fn need_quotes(string: &str) -> bool { | '\"' | '\'' | '\\' - | '\0'...'\x06' + | '\0'..='\x06' | '\t' | '\n' | '\r' - | '\x0e'...'\x1a' - | '\x1c'...'\x1f' => true, + | '\x0e'..='\x1a' + | '\x1c'..='\x1f' => true, _ => false, }) || [ @@ -344,7 +344,7 @@ fn need_quotes(string: &str) -> bool { #[cfg(test)] mod test { use super::*; - use YamlLoader; + use crate::YamlLoader; #[test] fn test_emit_simple() { diff --git a/saphyr/src/lib.rs b/saphyr/src/lib.rs index 40cff18..af1423c 100644 --- a/saphyr/src/lib.rs +++ b/saphyr/src/lib.rs @@ -52,10 +52,10 @@ pub mod scanner; pub mod yaml; // reexport key APIs -pub use emitter::{EmitError, YamlEmitter}; -pub use parser::Event; -pub use scanner::ScanError; -pub use yaml::{Yaml, YamlLoader}; +pub use crate::emitter::{EmitError, YamlEmitter}; +pub use crate::parser::Event; +pub use crate::scanner::ScanError; +pub use crate::yaml::{Yaml, YamlLoader}; #[cfg(test)] mod tests { diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs index 22692ff..cf25fdd 100644 --- a/saphyr/src/parser.rs +++ b/saphyr/src/parser.rs @@ -1,4 +1,4 @@ -use scanner::*; +use crate::scanner::*; use std::collections::HashMap; #[derive(Clone, Copy, PartialEq, Debug, Eq)] diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index 6f4fa58..b2ce148 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -67,7 +67,7 @@ impl Error for ScanError { self.info.as_ref() } - fn cause(&self) -> Option<&Error> { + fn cause(&self) -> Option<&dyn Error> { None } } @@ -199,7 +199,7 @@ fn is_digit(c: char) -> bool { #[inline] fn is_alpha(c: char) -> bool { match c { - '0'...'9' | 'a'...'z' | 'A'...'Z' => true, + '0'..='9' | 'a'..='z' | 'A'..='Z' => true, '_' | '-' => true, _ => false, } @@ -211,9 +211,9 @@ fn is_hex(c: char) -> bool { #[inline] fn as_hex(c: char) -> u32 { match c { - '0'...'9' => (c as u32) - ('0' as u32), - 'a'...'f' => (c as u32) - ('a' as u32) + 10, - 'A'...'F' => (c as u32) - ('A' as u32) + 10, + '0'..='9' => (c as u32) - ('0' as u32), + 'a'..='f' => (c as u32) - ('a' as u32) + 10, + 'A'..='F' => (c as u32) - ('A' as u32) + 10, _ => unreachable!(), } } diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index fe112cc..f529f38 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -1,6 +1,6 @@ use linked_hash_map::LinkedHashMap; -use parser::*; -use scanner::{Marker, ScanError, TScalarStyle, TokenType}; +use crate::parser::*; +use crate::scanner::{Marker, ScanError, TScalarStyle, TokenType}; use std::collections::BTreeMap; use std::f64; use std::i64; @@ -368,7 +368,7 @@ impl Iterator for YamlIter { #[cfg(test)] mod test { use std::f64; - use yaml::*; + use crate::yaml::*; #[test] fn test_coerce() { let s = "--- From 92d775a448e23c03bd4891123fd17d99b4594032 Mon Sep 17 00:00:00 2001 From: Anton Kochkov Date: Wed, 27 May 2020 14:19:22 +0800 Subject: [PATCH 189/380] Remove deprecated API --- saphyr/Cargo.toml | 1 + saphyr/src/emitter.rs | 7 ------- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/saphyr/Cargo.toml b/saphyr/Cargo.toml index 7bc449a..282de92 100644 --- a/saphyr/Cargo.toml +++ b/saphyr/Cargo.toml @@ -8,6 +8,7 @@ license = "MIT/Apache-2.0" description = "The missing YAML 1.2 parser for rust" repository = "https://github.com/chyh1990/yaml-rust" readme = "README.md" +edition = "2018" [dependencies] linked-hash-map = ">=0.0.9, <0.6" diff --git a/saphyr/src/emitter.rs b/saphyr/src/emitter.rs index d461001..ab589a3 100644 --- a/saphyr/src/emitter.rs +++ b/saphyr/src/emitter.rs @@ -10,13 +10,6 @@ pub enum EmitError { } impl Error for EmitError { - fn description(&self) -> &str { - match *self { - EmitError::FmtError(ref err) => err.description(), - EmitError::BadHashmapKey => "bad hashmap key", - } - } - fn cause(&self) -> Option<&dyn Error> { None } From 3cfe953325d2197561230f4d977614e7fd1c808f Mon Sep 17 00:00:00 2001 From: Anton Kochkov Date: Wed, 27 May 2020 14:25:59 +0800 Subject: [PATCH 190/380] Update quickcheck to 0.9 --- saphyr/Cargo.toml | 2 +- saphyr/tests/quickcheck.rs | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/saphyr/Cargo.toml b/saphyr/Cargo.toml index 282de92..aac5d4a 100644 --- a/saphyr/Cargo.toml +++ b/saphyr/Cargo.toml @@ -14,4 +14,4 @@ edition = "2018" linked-hash-map = ">=0.0.9, <0.6" [dev-dependencies] -quickcheck = "0.7" +quickcheck = "0.9" diff --git a/saphyr/tests/quickcheck.rs b/saphyr/tests/quickcheck.rs index c2c89bc..0efd679 100644 --- a/saphyr/tests/quickcheck.rs +++ b/saphyr/tests/quickcheck.rs @@ -3,7 +3,6 @@ extern crate yaml_rust; extern crate quickcheck; use quickcheck::TestResult; -use std::error::Error; use yaml_rust::{Yaml, YamlEmitter, YamlLoader}; quickcheck! { @@ -16,7 +15,7 @@ quickcheck! { } match YamlLoader::load_from_str(&out_str) { Ok(output) => TestResult::from_bool(output.len() == 1 && input == output[0]), - Err(err) => TestResult::error(err.description()), + Err(err) => TestResult::error(err.to_string()), } } } From 4c86941869f5505904df1b6483deb864d12b8291 Mon Sep 17 00:00:00 2001 From: Anton Kochkov Date: Wed, 27 May 2020 14:29:54 +0800 Subject: [PATCH 191/380] Update AppVeyor --- saphyr/appveyor.yml | 76 +++++++++++++++++++++++++++++++++++++++------ 1 file changed, 66 insertions(+), 10 deletions(-) diff --git a/saphyr/appveyor.yml b/saphyr/appveyor.yml index 9d0a4f4..b00f09f 100644 --- a/saphyr/appveyor.yml +++ b/saphyr/appveyor.yml @@ -1,13 +1,69 @@ +clone_depth: 1 + +branches: + only: + - master + +environment: + LLVM_VERSION: 9.0.1 + PLATFORM: x64 + matrix: + - channel: stable + target: i686-pc-windows-msvc + - channel: stable + target: x86_64-pc-windows-msvc + - channel: stable + target: i686-pc-windows-gnu + - channel: stable + target: x86_64-pc-windows-gnu + - channel: beta + target: i686-pc-windows-msvc + - channel: beta + target: x86_64-pc-windows-msvc + - channel: nightly + target: i686-pc-windows-msvc + type: msvc + - channel: nightly + target: x86_64-pc-windows-msvc + type: msvc + - channel: beta + target: i686-pc-windows-gnu + - channel: beta + target: x86_64-pc-windows-gnu + - channel: nightly + target: i686-pc-windows-gnu + type: gnu + - channel: nightly + target: x86_64-pc-windows-gnu + type: gnu + install: - - ps: Start-FileDownload 'https://static.rust-lang.org/dist/rust-1.28.0-i686-pc-windows-gnu.exe' - - rust-1.28.0-i686-pc-windows-gnu.exe /VERYSILENT /NORESTART /DIR="C:\Program Files (x86)\Rust" - - SET PATH=%PATH%;C:\Program Files (x86)\Rust\bin - - SET PATH=%PATH%;C:\MinGW\bin - - rustc -V - - cargo -V - - git submodule update --init --recursive - -build: false + - if %PLATFORM% == x86 (set RUST_PLATFORM=i686&set MINGW_BITS=32) else (set RUST_PLATFORM=x86_64&set MINGW_BITS=64) + - ps: >- + If ($env:target -eq 'x86_64-pc-windows-gnu') { + $env:PATH += ';C:\msys64\mingw64\bin' + } ElseIf ($env:target -eq 'i686-pc-windows-gnu') { + $env:PATH += ';C:\msys64\mingw32\bin' + } + - appveyor DownloadFile https://win.rustup.rs/ -FileName rustup-init.exe + - rustup-init -yv --default-toolchain %channel% --default-host %target% + - set PATH=%PATH%;%USERPROFILE%\.cargo\bin + - rustc -vV + - cargo -vV + # Install LLVM for GNU + - if %type%==gnu set PATH=C:\msys64\mingw%MINGW_BITS%\bin;C:\msys64\usr\bin;%PATH% + - if %type%==gnu set "MINGW_URL=http://repo.msys2.org/mingw/%RUST_PLATFORM%/mingw-w64-%RUST_PLATFORM%" + - if %type%==gnu set "URL_VER=%LLVM_VERSION%-1-any.pkg.tar.xz" + - if %type%==gnu bash -lc "pacman -U --noconfirm $MINGW_URL-clang-$URL_VER $MINGW_URL-llvm-$URL_VER" + - if %type%==gnu bash -lc "clang --version" + # Use preinstalled LLVM for MSVC + - if %type%==msvc set PATH=%PATH%;C:\Program Files\LLVM\bin + - if %type%==msvc where clang + - if %type%==msvc clang --version +build_script: + - cargo build -vv test_script: - - cargo test --verbose + - cargo test -vv +deploy: off + From 89d53b9498e72741748eeadef636ab1080e4a364 Mon Sep 17 00:00:00 2001 From: Anton Kochkov Date: Wed, 27 May 2020 14:36:22 +0800 Subject: [PATCH 192/380] Fix build --- saphyr/appveyor.yml | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/saphyr/appveyor.yml b/saphyr/appveyor.yml index b00f09f..5b3ee4c 100644 --- a/saphyr/appveyor.yml +++ b/saphyr/appveyor.yml @@ -10,26 +10,22 @@ environment: matrix: - channel: stable target: i686-pc-windows-msvc + type: msvc - channel: stable target: x86_64-pc-windows-msvc + type: msvc - channel: stable target: i686-pc-windows-gnu + type: gnu - channel: stable target: x86_64-pc-windows-gnu - - channel: beta - target: i686-pc-windows-msvc - - channel: beta - target: x86_64-pc-windows-msvc + type: gnu - channel: nightly target: i686-pc-windows-msvc type: msvc - channel: nightly target: x86_64-pc-windows-msvc type: msvc - - channel: beta - target: i686-pc-windows-gnu - - channel: beta - target: x86_64-pc-windows-gnu - channel: nightly target: i686-pc-windows-gnu type: gnu From fcde455b8d9698110a4bd03bff8a0f9f70dc6e2b Mon Sep 17 00:00:00 2001 From: Chen Yuheng Date: Mon, 1 Jun 2020 20:15:32 +0800 Subject: [PATCH 193/380] Update .travis.yml --- saphyr/.travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/saphyr/.travis.yml b/saphyr/.travis.yml index a640b6e..9e453f3 100644 --- a/saphyr/.travis.yml +++ b/saphyr/.travis.yml @@ -5,7 +5,7 @@ matrix: - rust: stable - rust: beta - rust: nightly - - rust: 1.28.0 + - rust: 1.31.0 - rust: nightly env: CLIPPY script: | From b28f7deeb74dd572c3ec880c74da435e77917a6f Mon Sep 17 00:00:00 2001 From: Chen Yuheng Date: Mon, 1 Jun 2020 20:18:27 +0800 Subject: [PATCH 194/380] Raise minimum rustc support to 1.31 --- saphyr/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/saphyr/README.md b/saphyr/README.md index 5abb430..840ff94 100644 --- a/saphyr/README.md +++ b/saphyr/README.md @@ -105,7 +105,7 @@ so it may not be a huge problem for most users. ## Minimum Rust version policy -This crate's minimum supported `rustc` version is 1.28, as this is the currently known minimum version for [`regex`](https://crates.io/crates/regex#minimum-rust-version-policy) as well. +This crate's minimum supported `rustc` version is 1.31 (released with Rust 2018, after v0.4.3), as this is the currently known minimum version for [`regex`](https://crates.io/crates/regex#minimum-rust-version-policy) as well. ## License From e35bbd7c5af47ae5efd1c6c76217a59f524e8aa3 Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Mon, 1 Jun 2020 20:34:13 +0800 Subject: [PATCH 195/380] fix CI for clippy 0.9 --- saphyr/.travis.yml | 2 +- saphyr/src/yaml.rs | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/saphyr/.travis.yml b/saphyr/.travis.yml index 9e453f3..46b9569 100644 --- a/saphyr/.travis.yml +++ b/saphyr/.travis.yml @@ -5,7 +5,7 @@ matrix: - rust: stable - rust: beta - rust: nightly - - rust: 1.31.0 + - rust: 1.33.0 - rust: nightly env: CLIPPY script: | diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index f529f38..d783282 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -288,19 +288,19 @@ impl Yaml { // This function falls back to Yaml::String if nothing else matches. pub fn from_str(v: &str) -> Yaml { if v.starts_with("0x") { - let n = i64::from_str_radix(&v[2..], 16); - if n.is_ok() { - return Yaml::Integer(n.unwrap()); + if let Ok(i) = i64::from_str_radix(&v[2..], 16) { + return Yaml::Integer(i); } } if v.starts_with("0o") { - let n = i64::from_str_radix(&v[2..], 8); - if n.is_ok() { - return Yaml::Integer(n.unwrap()); + if let Ok(i) = i64::from_str_radix(&v[2..], 8) { + return Yaml::Integer(i); } } - if v.starts_with('+') && v[1..].parse::().is_ok() { - return Yaml::Integer(v[1..].parse::().unwrap()); + if v.starts_with('+') { + if let Ok(i) = v[1..].parse::() { + return Yaml::Integer(i); + } } match v { "~" | "null" => Yaml::Null, From 35619eaa5c6eeee1e6dcf9ae1d22101e808cabf9 Mon Sep 17 00:00:00 2001 From: Evan Harvey <41582020+blinklad@users.noreply.github.com> Date: Mon, 1 Jun 2020 22:59:27 +1000 Subject: [PATCH 196/380] Spellcheck (#143) * Spellcheck src * Spellcheck README --- saphyr/README.md | 2 +- saphyr/src/emitter.rs | 4 ++-- saphyr/src/parser.rs | 10 +++++----- saphyr/src/scanner.rs | 16 ++++++++-------- saphyr/src/yaml.rs | 2 +- 5 files changed, 17 insertions(+), 17 deletions(-) diff --git a/saphyr/README.md b/saphyr/README.md index 840ff94..a31faf1 100644 --- a/saphyr/README.md +++ b/saphyr/README.md @@ -101,7 +101,7 @@ so it may not be a huge problem for most users. * Encoder * Tag directive -* Alias while desearilization +* Alias while deserialization ## Minimum Rust version policy diff --git a/saphyr/src/emitter.rs b/saphyr/src/emitter.rs index ab589a3..f20a3ed 100644 --- a/saphyr/src/emitter.rs +++ b/saphyr/src/emitter.rs @@ -238,7 +238,7 @@ impl<'a> YamlEmitter<'a> { /// Emit a yaml as a hash or array value: i.e., which should appear /// following a ":" or "-", either after a space, or on a new line. - /// If `inline` is true, then the preceeding characters are distinct + /// If `inline` is true, then the preceding characters are distinct /// and short enough to respect the compact flag. fn emit_val(&mut self, inline: bool, val: &Yaml) -> EmitResult { match *val { @@ -320,7 +320,7 @@ fn need_quotes(string: &str) -> bool { || [ // http://yaml.org/type/bool.html // Note: 'y', 'Y', 'n', 'N', is not quoted deliberately, as in libyaml. PyYAML also parse - // them as string, not booleans, although it is volating the YAML 1.1 specification. + // them as string, not booleans, although it is violating the YAML 1.1 specification. // See https://github.com/dtolnay/serde-yaml/pull/83#discussion_r152628088. "yes", "Yes", "YES", "no", "No", "NO", "True", "TRUE", "true", "False", "FALSE", "false", "on", "On", "ON", "off", "Off", "OFF", diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs index cf25fdd..4a63146 100644 --- a/saphyr/src/parser.rs +++ b/saphyr/src/parser.rs @@ -349,7 +349,7 @@ impl> Parser { | Token(_, TokenType::TagDirective(..)) | Token(_, TokenType::DocumentStart) => { // explicit document - self._explict_document_start() + self._explicit_document_start() } Token(mark, _) if implicit => { self.parser_process_directives()?; @@ -359,7 +359,7 @@ impl> Parser { } _ => { // explicit document - self._explict_document_start() + self._explicit_document_start() } } } @@ -385,7 +385,7 @@ impl> Parser { Ok(()) } - fn _explict_document_start(&mut self) -> ParseResult { + fn _explicit_document_start(&mut self) -> ParseResult { self.parser_process_directives()?; match *self.peek_token()? { Token(mark, TokenType::DocumentStart) => { @@ -433,7 +433,7 @@ impl> Parser { } fn register_anchor(&mut self, name: String, _: &Marker) -> Result { - // anchors can be overrided/reused + // anchors can be overridden/reused // if self.anchors.contains_key(name) { // return Err(ScanError::new(*mark, // "while parsing anchor, found duplicated anchor")); @@ -704,7 +704,7 @@ impl> Parser { Token(mark, _) if !first => { return Err(ScanError::new( mark, - "while parsing a flow sequence, expectd ',' or ']'", + "while parsing a flow sequence, expected ',' or ']'", )); } _ => { /* next */ } diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index b2ce148..a8659a8 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -1100,7 +1100,7 @@ impl> Scanner { if self.ch() == '0' { return Err(ScanError::new( start_mark, - "while scanning a block scalar, found an intendation indicator equal to 0", + "while scanning a block scalar, found an indentation indicator equal to 0", )); } increment = (self.ch() as usize) - ('0' as usize); @@ -1110,7 +1110,7 @@ impl> Scanner { if self.ch() == '0' { return Err(ScanError::new( start_mark, - "while scanning a block scalar, found an intendation indicator equal to 0", + "while scanning a block scalar, found an indentation indicator equal to 0", )); } @@ -1200,7 +1200,7 @@ impl> Scanner { self.lookahead(2); self.read_break(&mut leading_break); - // Eat the following intendation spaces and line breaks. + // Eat the following indentation spaces and line breaks. self.block_scalar_breaks(&mut indent, &mut trailing_breaks)?; } @@ -1239,10 +1239,10 @@ impl> Scanner { max_indent = self.mark.col; } - // Check for a tab character messing the intendation. + // Check for a tab character messing the indentation. if (*indent == 0 || self.mark.col < *indent) && self.buffer[0] == '\t' { return Err(ScanError::new(self.mark, - "while scanning a block scalar, found a tab character where an intendation space is expected")); + "while scanning a block scalar, found a tab character where an indentation space is expected")); } if !is_break(self.ch()) { @@ -1384,7 +1384,7 @@ impl> Scanner { for i in 0..code_length { if !is_hex(self.buffer[i]) { return Err(ScanError::new(start_mark, - "while parsing a quoted scalar, did not find expected hexdecimal number")); + "while parsing a quoted scalar, did not find expected hexadecimal number")); } value = (value << 4) + as_hex(self.buffer[i]); } @@ -1589,7 +1589,7 @@ impl> Scanner { self.lookahead(1); } - // check intendation level + // check indentation level if self.flow_level == 0 && (self.mark.col as isize) < indent { break; } @@ -1608,7 +1608,7 @@ impl> Scanner { fn fetch_key(&mut self) -> ScanResult { let start_mark = self.mark; if self.flow_level == 0 { - // Check if we are allowed to start a new key (not nessesary simple). + // Check if we are allowed to start a new key (not necessarily simple). if !self.simple_key_allowed { return Err(ScanError::new( self.mark, diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index d783282..4bb70da 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -528,7 +528,7 @@ a1: &DEFAULT } #[test] - fn test_bad_hypen() { + fn test_bad_hyphen() { // See: https://github.com/chyh1990/yaml-rust/issues/23 let s = "{-"; assert!(YamlLoader::load_from_str(&s).is_err()); From a833b49831a3679ab0868cb2f83fad483cb9e7bb Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Mon, 1 Jun 2020 21:02:16 +0800 Subject: [PATCH 197/380] Bump to v0.4.4 --- saphyr/Cargo.toml | 2 +- saphyr/src/lib.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/saphyr/Cargo.toml b/saphyr/Cargo.toml index aac5d4a..ec52263 100644 --- a/saphyr/Cargo.toml +++ b/saphyr/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "yaml-rust" -version = "0.4.3" # remember to update html_root_url +version = "0.4.4" # remember to update html_root_url authors = ["Yuheng Chen "] homepage = "http://chyh1990.github.io/yaml-rust/" documentation = "https://docs.rs/yaml-rust" diff --git a/saphyr/src/lib.rs b/saphyr/src/lib.rs index af1423c..e369c68 100644 --- a/saphyr/src/lib.rs +++ b/saphyr/src/lib.rs @@ -36,7 +36,7 @@ //! //! ``` -#![doc(html_root_url = "https://docs.rs/yaml-rust/0.4.3")] +#![doc(html_root_url = "https://docs.rs/yaml-rust/0.4.4")] #![cfg_attr(feature = "cargo-clippy", allow(renamed_and_removed_lints))] #![cfg_attr(feature = "cargo-clippy", warn(cyclomatic_complexity))] #![cfg_attr( From 2f8b14aaaa486429bf4a765a336afddc6a1af1c9 Mon Sep 17 00:00:00 2001 From: Dylan DPC Date: Fri, 10 Jul 2020 07:44:57 +0200 Subject: [PATCH 198/380] Update Cargo.toml (#162) --- saphyr/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/saphyr/Cargo.toml b/saphyr/Cargo.toml index ec52263..9444efa 100644 --- a/saphyr/Cargo.toml +++ b/saphyr/Cargo.toml @@ -11,7 +11,7 @@ readme = "README.md" edition = "2018" [dependencies] -linked-hash-map = ">=0.0.9, <0.6" +linked-hash-map = "0.5.3" [dev-dependencies] quickcheck = "0.9" From bb249d9b3643b93c01bf55b7c6f244cb0751316a Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Mon, 1 Jun 2020 21:02:16 +0800 Subject: [PATCH 199/380] Bump to v0.4.5 --- saphyr/Cargo.toml | 2 +- saphyr/src/lib.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/saphyr/Cargo.toml b/saphyr/Cargo.toml index 9444efa..91d7da4 100644 --- a/saphyr/Cargo.toml +++ b/saphyr/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "yaml-rust" -version = "0.4.4" # remember to update html_root_url +version = "0.4.5" # remember to update html_root_url authors = ["Yuheng Chen "] homepage = "http://chyh1990.github.io/yaml-rust/" documentation = "https://docs.rs/yaml-rust" diff --git a/saphyr/src/lib.rs b/saphyr/src/lib.rs index e369c68..6cf87c7 100644 --- a/saphyr/src/lib.rs +++ b/saphyr/src/lib.rs @@ -36,7 +36,7 @@ //! //! ``` -#![doc(html_root_url = "https://docs.rs/yaml-rust/0.4.4")] +#![doc(html_root_url = "https://docs.rs/yaml-rust/0.4.5")] #![cfg_attr(feature = "cargo-clippy", allow(renamed_and_removed_lints))] #![cfg_attr(feature = "cargo-clippy", warn(cyclomatic_complexity))] #![cfg_attr( From b6e65fb3598513aabee544c9644f8e4cf2c3e7c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Kj=C3=A4ll?= Date: Mon, 12 Jul 2021 09:48:17 +0200 Subject: [PATCH 200/380] library is now in the crates repo (#164) --- saphyr/src/lib.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/saphyr/src/lib.rs b/saphyr/src/lib.rs index 6cf87c7..ae95e0f 100644 --- a/saphyr/src/lib.rs +++ b/saphyr/src/lib.rs @@ -9,8 +9,8 @@ //! used by adding `yaml-rust` to the dependencies in your project's `Cargo.toml`. //! //! ```toml -//! [dependencies.yaml-rust] -//! git = "https://github.com/chyh1990/yaml-rust.git" +//! [dependencies] +//! yaml-rust = "0.4" //! ``` //! //! And this in your crate root: From fd37ebd6e58694ca6ba59f2918afe301ed4cbb46 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Sat, 12 Aug 2023 01:54:46 +0200 Subject: [PATCH 201/380] Clippy set to pedantic. --- saphyr/examples/dump_yaml.rs | 4 +- saphyr/src/emitter.rs | 82 +++++------ saphyr/src/lib.rs | 12 +- saphyr/src/parser.rs | 248 ++++++++++++++++---------------- saphyr/src/scanner.rs | 48 +++---- saphyr/src/yaml.rs | 109 +++++++------- saphyr/tests/spec_test.rs | 14 +- saphyr/tests/test_round_trip.rs | 25 ++-- 8 files changed, 273 insertions(+), 269 deletions(-) diff --git a/saphyr/examples/dump_yaml.rs b/saphyr/examples/dump_yaml.rs index 8fce0f3..3455a9a 100644 --- a/saphyr/examples/dump_yaml.rs +++ b/saphyr/examples/dump_yaml.rs @@ -21,13 +21,13 @@ fn dump_node(doc: &yaml::Yaml, indent: usize) { yaml::Yaml::Hash(ref h) => { for (k, v) in h { print_indent(indent); - println!("{:?}:", k); + println!("{k:?}:"); dump_node(v, indent + 1); } } _ => { print_indent(indent); - println!("{:?}", doc); + println!("{doc:?}"); } } } diff --git a/saphyr/src/emitter.rs b/saphyr/src/emitter.rs index f20a3ed..bf1bbaa 100644 --- a/saphyr/src/emitter.rs +++ b/saphyr/src/emitter.rs @@ -1,7 +1,7 @@ +use crate::yaml::{Hash, Yaml}; use std::convert::From; use std::error::Error; use std::fmt::{self, Display}; -use crate::yaml::{Hash, Yaml}; #[derive(Copy, Clone, Debug)] pub enum EmitError { @@ -30,6 +30,7 @@ impl From for EmitError { } } +#[allow(clippy::module_name_repetitions)] pub struct YamlEmitter<'a> { writer: &'a mut dyn fmt::Write, best_indent: usize, @@ -126,6 +127,7 @@ impl<'a> YamlEmitter<'a> { } /// Determine if this emitter is using 'compact inline notation'. + #[must_use] pub fn is_compact(&self) -> bool { self.compact } @@ -157,7 +159,7 @@ impl<'a> YamlEmitter<'a> { if need_quotes(v) { escape_str(self.writer, v)?; } else { - write!(self.writer, "{}", v)?; + write!(self.writer, "{v}")?; } Ok(()) } @@ -170,11 +172,11 @@ impl<'a> YamlEmitter<'a> { Ok(()) } Yaml::Integer(v) => { - write!(self.writer, "{}", v)?; + write!(self.writer, "{v}")?; Ok(()) } Yaml::Real(ref v) => { - write!(self.writer, "{}", v)?; + write!(self.writer, "{v}")?; Ok(()) } Yaml::Null | Yaml::BadValue => { @@ -182,7 +184,7 @@ impl<'a> YamlEmitter<'a> { Ok(()) } // XXX(chenyh) Alias - _ => Ok(()), + Yaml::Alias(_) => Ok(()), } } @@ -210,10 +212,7 @@ impl<'a> YamlEmitter<'a> { } else { self.level += 1; for (cnt, (k, v)) in h.iter().enumerate() { - let complex_key = match *k { - Yaml::Hash(_) | Yaml::Array(_) => true, - _ => false, - }; + let complex_key = matches!(*k, Yaml::Hash(_) | Yaml::Array(_)); if cnt > 0 { writeln!(self.writer)?; self.write_indent()?; @@ -286,19 +285,22 @@ impl<'a> YamlEmitter<'a> { /// * When the string is null or ~ (otherwise, it would be considered as a null value); /// * When the string looks like a number, such as integers (e.g. 2, 14, etc.), floats (e.g. 2.6, 14.9) and exponential numbers (e.g. 12e7, etc.) (otherwise, it would be treated as a numeric value); /// * When the string looks like a date (e.g. 2014-12-31) (otherwise it would be automatically converted into a Unix timestamp). +#[allow(clippy::doc_markdown)] fn need_quotes(string: &str) -> bool { fn need_quotes_spaces(string: &str) -> bool { string.starts_with(' ') || string.ends_with(' ') } - string == "" + string.is_empty() || need_quotes_spaces(string) - || string.starts_with(|character: char| match character { - '&' | '*' | '?' | '|' | '-' | '<' | '>' | '=' | '!' | '%' | '@' => true, - _ => false, + || string.starts_with(|character: char| { + matches!( + character, + '&' | '*' | '?' | '|' | '-' | '<' | '>' | '=' | '!' | '%' | '@' + ) }) - || string.contains(|character: char| match character { - ':' + || string.contains(|character: char| { + matches!(character, ':' | '{' | '}' | '[' @@ -314,8 +316,7 @@ fn need_quotes(string: &str) -> bool { | '\n' | '\r' | '\x0e'..='\x1a' - | '\x1c'..='\x1f' => true, - _ => false, + | '\x1c'..='\x1f') }) || [ // http://yaml.org/type/bool.html @@ -335,6 +336,7 @@ fn need_quotes(string: &str) -> bool { } #[cfg(test)] +#[allow(clippy::similar_names)] mod test { use super::*; use crate::YamlLoader; @@ -354,18 +356,18 @@ a4: - 2 "; - let docs = YamlLoader::load_from_str(&s).unwrap(); + let docs = YamlLoader::load_from_str(s).unwrap(); let doc = &docs[0]; let mut writer = String::new(); { let mut emitter = YamlEmitter::new(&mut writer); emitter.dump(doc).unwrap(); } - println!("original:\n{}", s); - println!("emitted:\n{}", writer); + println!("original:\n{s}"); + println!("emitted:\n{writer}"); let docs_new = match YamlLoader::load_from_str(&writer) { Ok(y) => y, - Err(e) => panic!(format!("{}", e)), + Err(e) => panic!("{}", e), }; let doc_new = &docs_new[0]; @@ -393,7 +395,7 @@ products: {}: empty hash key "#; - let docs = YamlLoader::load_from_str(&s).unwrap(); + let docs = YamlLoader::load_from_str(s).unwrap(); let doc = &docs[0]; let mut writer = String::new(); { @@ -402,7 +404,7 @@ products: } let docs_new = match YamlLoader::load_from_str(&writer) { Ok(y) => y, - Err(e) => panic!(format!("{}", e)), + Err(e) => panic!("{}", e), }; let doc_new = &docs_new[0]; assert_eq!(doc, doc_new); @@ -444,7 +446,7 @@ x: test y: avoid quoting here z: string with spaces"#; - let docs = YamlLoader::load_from_str(&s).unwrap(); + let docs = YamlLoader::load_from_str(s).unwrap(); let doc = &docs[0]; let mut writer = String::new(); { @@ -452,7 +454,7 @@ z: string with spaces"#; emitter.dump(doc).unwrap(); } - assert_eq!(s, writer, "actual:\n\n{}\n", writer); + assert_eq!(s, writer, "actual:\n\n{writer}\n"); } #[test] @@ -502,7 +504,7 @@ null0: ~ bool0: true bool1: false"#; - let docs = YamlLoader::load_from_str(&input).unwrap(); + let docs = YamlLoader::load_from_str(input).unwrap(); let doc = &docs[0]; let mut writer = String::new(); { @@ -512,19 +514,18 @@ bool1: false"#; assert_eq!( expected, writer, - "expected:\n{}\nactual:\n{}\n", - expected, writer + "expected:\n{expected}\nactual:\n{writer}\n", ); } #[test] fn test_empty_and_nested() { - test_empty_and_nested_flag(false) + test_empty_and_nested_flag(false); } #[test] fn test_empty_and_nested_compact() { - test_empty_and_nested_flag(true) + test_empty_and_nested_flag(true); } fn test_empty_and_nested_flag(compact: bool) { @@ -551,7 +552,7 @@ e: h: []"# }; - let docs = YamlLoader::load_from_str(&s).unwrap(); + let docs = YamlLoader::load_from_str(s).unwrap(); let doc = &docs[0]; let mut writer = String::new(); { @@ -573,15 +574,15 @@ a: - - e - f"#; - let docs = YamlLoader::load_from_str(&s).unwrap(); + let docs = YamlLoader::load_from_str(s).unwrap(); let doc = &docs[0]; let mut writer = String::new(); { let mut emitter = YamlEmitter::new(&mut writer); emitter.dump(doc).unwrap(); } - println!("original:\n{}", s); - println!("emitted:\n{}", writer); + println!("original:\n{s}"); + println!("emitted:\n{writer}"); assert_eq!(s, writer); } @@ -597,15 +598,15 @@ a: - - f - - e"#; - let docs = YamlLoader::load_from_str(&s).unwrap(); + let docs = YamlLoader::load_from_str(s).unwrap(); let doc = &docs[0]; let mut writer = String::new(); { let mut emitter = YamlEmitter::new(&mut writer); emitter.dump(doc).unwrap(); } - println!("original:\n{}", s); - println!("emitted:\n{}", writer); + println!("original:\n{s}"); + println!("emitted:\n{writer}"); assert_eq!(s, writer); } @@ -619,17 +620,16 @@ a: d: e: f"#; - let docs = YamlLoader::load_from_str(&s).unwrap(); + let docs = YamlLoader::load_from_str(s).unwrap(); let doc = &docs[0]; let mut writer = String::new(); { let mut emitter = YamlEmitter::new(&mut writer); emitter.dump(doc).unwrap(); } - println!("original:\n{}", s); - println!("emitted:\n{}", writer); + println!("original:\n{s}"); + println!("emitted:\n{writer}"); assert_eq!(s, writer); } - } diff --git a/saphyr/src/lib.rs b/saphyr/src/lib.rs index ae95e0f..ee432ca 100644 --- a/saphyr/src/lib.rs +++ b/saphyr/src/lib.rs @@ -37,11 +37,16 @@ //! ``` #![doc(html_root_url = "https://docs.rs/yaml-rust/0.4.5")] -#![cfg_attr(feature = "cargo-clippy", allow(renamed_and_removed_lints))] -#![cfg_attr(feature = "cargo-clippy", warn(cyclomatic_complexity))] +#![cfg_attr(feature = "cargo-clippy", warn(clippy::pedantic))] #![cfg_attr( feature = "cargo-clippy", - allow(match_same_arms, should_implement_trait) + allow( + clippy::match_same_arms, + clippy::should_implement_trait, + clippy::missing_errors_doc, + clippy::missing_panics_doc, + clippy::redundant_else, + ) )] extern crate linked_hash_map; @@ -117,5 +122,4 @@ key1:a2 assert!(YamlLoader::load_from_str(s).is_err()); assert!(try_fail(s).is_err()); } - } diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs index 4a63146..90539f1 100644 --- a/saphyr/src/parser.rs +++ b/saphyr/src/parser.rs @@ -1,4 +1,4 @@ -use crate::scanner::*; +use crate::scanner::{Marker, ScanError, Scanner, TScalarStyle, Token, TokenType}; use std::collections::HashMap; #[derive(Clone, Copy, PartialEq, Debug, Eq)] @@ -58,11 +58,12 @@ impl Event { } fn empty_scalar_with_anchor(anchor: usize, tag: Option) -> Event { - Event::Scalar("".to_owned(), TScalarStyle::Plain, anchor, tag) + Event::Scalar(String::new(), TScalarStyle::Plain, anchor, tag) } } #[derive(Debug)] +#[allow(dead_code)] pub struct Parser { scanner: Scanner, states: Vec, @@ -84,7 +85,7 @@ pub trait MarkedEventReceiver { impl MarkedEventReceiver for R { fn on_event(&mut self, ev: Event, _mark: Marker) { - self.on_event(ev) + self.on_event(ev); } } @@ -107,12 +108,11 @@ impl> Parser { } pub fn peek(&mut self) -> Result<&(Event, Marker), ScanError> { - match self.current { - Some(ref x) => Ok(x), - None => { - self.current = Some(self.next()?); - self.peek() - } + if let Some(ref x) = self.current { + Ok(x) + } else { + self.current = Some(self.next()?); + self.peek() } } @@ -155,7 +155,7 @@ impl> Parser { //self.peek_token(); } fn pop_state(&mut self) { - self.state = self.states.pop().unwrap() + self.state = self.states.pop().unwrap(); } fn push_state(&mut self, state: State) { self.states.push(state); @@ -242,7 +242,7 @@ impl> Parser { self.load_mapping(recv) } _ => { - println!("UNREACHABLE EVENT: {:?}", first_ev); + println!("UNREACHABLE EVENT: {first_ev:?}"); unreachable!(); } } @@ -345,9 +345,12 @@ impl> Parser { self.skip(); Ok((Event::StreamEnd, mark)) } - Token(_, TokenType::VersionDirective(..)) - | Token(_, TokenType::TagDirective(..)) - | Token(_, TokenType::DocumentStart) => { + Token( + _, + TokenType::VersionDirective(..) + | TokenType::TagDirective(..) + | TokenType::DocumentStart, + ) => { // explicit document self._explicit_document_start() } @@ -403,11 +406,14 @@ impl> Parser { fn document_content(&mut self) -> ParseResult { match *self.peek_token()? { - Token(mark, TokenType::VersionDirective(..)) - | Token(mark, TokenType::TagDirective(..)) - | Token(mark, TokenType::DocumentStart) - | Token(mark, TokenType::DocumentEnd) - | Token(mark, TokenType::StreamEnd) => { + Token( + mark, + TokenType::VersionDirective(..) + | TokenType::TagDirective(..) + | TokenType::DocumentStart + | TokenType::DocumentEnd + | TokenType::StreamEnd, + ) => { self.pop_state(); // empty scalar Ok((Event::empty_scalar(), mark)) @@ -417,11 +423,9 @@ impl> Parser { } fn document_end(&mut self) -> ParseResult { - let mut _implicit = true; let marker: Marker = match *self.peek_token()? { Token(mark, TokenType::DocumentEnd) => { self.skip(); - _implicit = false; mark } Token(mark, _) => mark, @@ -432,7 +436,7 @@ impl> Parser { Ok((Event::DocumentEnd, marker)) } - fn register_anchor(&mut self, name: String, _: &Marker) -> Result { + fn register_anchor(&mut self, name: String, _: &Marker) -> usize { // anchors can be overridden/reused // if self.anchors.contains_key(name) { // return Err(ScanError::new(*mark, @@ -441,7 +445,7 @@ impl> Parser { let new_id = self.anchor_id; self.anchor_id += 1; self.anchors.insert(name, new_id); - Ok(new_id) + new_id } fn parse_node(&mut self, block: bool, indentless_sequence: bool) -> ParseResult { @@ -466,7 +470,7 @@ impl> Parser { } Token(_, TokenType::Anchor(_)) => { if let Token(mark, TokenType::Anchor(name)) = self.fetch_token() { - anchor_id = self.register_anchor(name, &mark)?; + anchor_id = self.register_anchor(name, &mark); if let TokenType::Tag(..) = self.peek_token()?.1 { if let tg @ TokenType::Tag(..) = self.fetch_token().1 { tag = Some(tg); @@ -483,7 +487,7 @@ impl> Parser { tag = Some(tg); if let TokenType::Anchor(_) = self.peek_token()?.1 { if let Token(mark, TokenType::Anchor(name)) = self.fetch_token() { - anchor_id = self.register_anchor(name, &mark)?; + anchor_id = self.register_anchor(name, &mark); } else { unreachable!() } @@ -545,18 +549,15 @@ impl> Parser { match *self.peek_token()? { Token(_, TokenType::Key) => { self.skip(); - match *self.peek_token()? { - Token(mark, TokenType::Key) - | Token(mark, TokenType::Value) - | Token(mark, TokenType::BlockEnd) => { - self.state = State::BlockMappingValue; - // empty scalar - Ok((Event::empty_scalar(), mark)) - } - _ => { - self.push_state(State::BlockMappingValue); - self.parse_node(true, true) - } + if let Token(mark, TokenType::Key | TokenType::Value | TokenType::BlockEnd) = + *self.peek_token()? + { + self.state = State::BlockMappingValue; + // empty scalar + Ok((Event::empty_scalar(), mark)) + } else { + self.push_state(State::BlockMappingValue); + self.parse_node(true, true) } } // XXX(chenyh): libyaml failed to parse spec 1.2, ex8.18 @@ -580,18 +581,15 @@ impl> Parser { match *self.peek_token()? { Token(_, TokenType::Value) => { self.skip(); - match *self.peek_token()? { - Token(mark, TokenType::Key) - | Token(mark, TokenType::Value) - | Token(mark, TokenType::BlockEnd) => { - self.state = State::BlockMappingKey; - // empty scalar - Ok((Event::empty_scalar(), mark)) - } - _ => { - self.push_state(State::BlockMappingKey); - self.parse_node(true, true) - } + if let Token(mark, TokenType::Key | TokenType::Value | TokenType::BlockEnd) = + *self.peek_token()? + { + self.state = State::BlockMappingKey; + // empty scalar + Ok((Event::empty_scalar(), mark)) + } else { + self.push_state(State::BlockMappingKey); + self.parse_node(true, true) } } Token(mark, _) => { @@ -607,50 +605,50 @@ impl> Parser { let _ = self.peek_token()?; self.skip(); } - let marker: Marker = - { - match *self.peek_token()? { - Token(mark, TokenType::FlowMappingEnd) => mark, - Token(mark, _) => { - if !first { - match *self.peek_token()? { - Token(_, TokenType::FlowEntry) => self.skip(), - Token(mark, _) => return Err(ScanError::new(mark, - "while parsing a flow mapping, did not find expected ',' or '}'")) - } - } - + let marker: Marker = { + match *self.peek_token()? { + Token(mark, TokenType::FlowMappingEnd) => mark, + Token(mark, _) => { + if !first { match *self.peek_token()? { - Token(_, TokenType::Key) => { - self.skip(); - match *self.peek_token()? { - Token(mark, TokenType::Value) - | Token(mark, TokenType::FlowEntry) - | Token(mark, TokenType::FlowMappingEnd) => { - self.state = State::FlowMappingValue; - return Ok((Event::empty_scalar(), mark)); - } - _ => { - self.push_state(State::FlowMappingValue); - return self.parse_node(false, false); - } - } - } - Token(marker, TokenType::Value) => { + Token(_, TokenType::FlowEntry) => self.skip(), + Token(mark, _) => return Err(ScanError::new( + mark, + "while parsing a flow mapping, did not find expected ',' or '}'", + )), + } + } + + match *self.peek_token()? { + Token(_, TokenType::Key) => { + self.skip(); + if let Token( + mark, + TokenType::Value | TokenType::FlowEntry | TokenType::FlowMappingEnd, + ) = *self.peek_token()? + { self.state = State::FlowMappingValue; - return Ok((Event::empty_scalar(), marker)); - } - Token(_, TokenType::FlowMappingEnd) => (), - _ => { - self.push_state(State::FlowMappingEmptyValue); + return Ok((Event::empty_scalar(), mark)); + } else { + self.push_state(State::FlowMappingValue); return self.parse_node(false, false); } } - - mark + Token(marker, TokenType::Value) => { + self.state = State::FlowMappingValue; + return Ok((Event::empty_scalar(), marker)); + } + Token(_, TokenType::FlowMappingEnd) => (), + _ => { + self.push_state(State::FlowMappingEmptyValue); + return self.parse_node(false, false); + } } + + mark } - }; + } + }; self.pop_state(); self.skip(); @@ -736,18 +734,16 @@ impl> Parser { } } self.skip(); - match *self.peek_token()? { - Token(mark, TokenType::BlockEntry) - | Token(mark, TokenType::Key) - | Token(mark, TokenType::Value) - | Token(mark, TokenType::BlockEnd) => { - self.state = State::IndentlessSequenceEntry; - Ok((Event::empty_scalar(), mark)) - } - _ => { - self.push_state(State::IndentlessSequenceEntry); - self.parse_node(true, false) - } + if let Token( + mark, + TokenType::BlockEntry | TokenType::Key | TokenType::Value | TokenType::BlockEnd, + ) = *self.peek_token()? + { + self.state = State::IndentlessSequenceEntry; + Ok((Event::empty_scalar(), mark)) + } else { + self.push_state(State::IndentlessSequenceEntry); + self.parse_node(true, false) } } @@ -766,15 +762,14 @@ impl> Parser { } Token(_, TokenType::BlockEntry) => { self.skip(); - match *self.peek_token()? { - Token(mark, TokenType::BlockEntry) | Token(mark, TokenType::BlockEnd) => { - self.state = State::BlockSequenceEntry; - Ok((Event::empty_scalar(), mark)) - } - _ => { - self.push_state(State::BlockSequenceEntry); - self.parse_node(true, false) - } + if let Token(mark, TokenType::BlockEntry | TokenType::BlockEnd) = + *self.peek_token()? + { + self.state = State::BlockSequenceEntry; + Ok((Event::empty_scalar(), mark)) + } else { + self.push_state(State::BlockSequenceEntry); + self.parse_node(true, false) } } Token(mark, _) => Err(ScanError::new( @@ -785,18 +780,15 @@ impl> Parser { } fn flow_sequence_entry_mapping_key(&mut self) -> ParseResult { - match *self.peek_token()? { - Token(mark, TokenType::Value) - | Token(mark, TokenType::FlowEntry) - | Token(mark, TokenType::FlowSequenceEnd) => { - self.skip(); - self.state = State::FlowSequenceEntryMappingValue; - Ok((Event::empty_scalar(), mark)) - } - _ => { - self.push_state(State::FlowSequenceEntryMappingValue); - self.parse_node(false, false) - } + if let Token(mark, TokenType::Value | TokenType::FlowEntry | TokenType::FlowSequenceEnd) = + *self.peek_token()? + { + self.skip(); + self.state = State::FlowSequenceEntryMappingValue; + Ok((Event::empty_scalar(), mark)) + } else { + self.push_state(State::FlowSequenceEntryMappingValue); + self.parse_node(false, false) } } @@ -805,15 +797,14 @@ impl> Parser { Token(_, TokenType::Value) => { self.skip(); self.state = State::FlowSequenceEntryMappingValue; - match *self.peek_token()? { - Token(mark, TokenType::FlowEntry) | Token(mark, TokenType::FlowSequenceEnd) => { - self.state = State::FlowSequenceEntryMappingEnd; - Ok((Event::empty_scalar(), mark)) - } - _ => { - self.push_state(State::FlowSequenceEntryMappingEnd); - self.parse_node(false, false) - } + if let Token(mark, TokenType::FlowEntry | TokenType::FlowSequenceEnd) = + *self.peek_token()? + { + self.state = State::FlowSequenceEntryMappingEnd; + Ok((Event::empty_scalar(), mark)) + } else { + self.push_state(State::FlowSequenceEntryMappingEnd); + self.parse_node(false, false) } } Token(mark, _) => { @@ -823,6 +814,7 @@ impl> Parser { } } + #[allow(clippy::unnecessary_wraps)] fn flow_sequence_entry_mapping_end(&mut self) -> ParseResult { self.state = State::FlowSequenceEntry; Ok((Event::MappingEnd, self.scanner.mark())) diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index a8659a8..c352df8 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -1,3 +1,6 @@ +#![allow(clippy::cast_possible_wrap)] +#![allow(clippy::cast_sign_loss)] + use std::collections::VecDeque; use std::error::Error; use std::{char, fmt}; @@ -30,14 +33,17 @@ impl Marker { Marker { index, line, col } } + #[must_use] pub fn index(&self) -> usize { self.index } + #[must_use] pub fn line(&self) -> usize { self.line } + #[must_use] pub fn col(&self) -> usize { self.col } @@ -50,6 +56,7 @@ pub struct ScanError { } impl ScanError { + #[must_use] pub fn new(loc: Marker, info: &str) -> ScanError { ScanError { mark: loc, @@ -57,6 +64,7 @@ impl ScanError { } } + #[must_use] pub fn marker(&self) -> &Marker { &self.mark } @@ -137,6 +145,7 @@ impl SimpleKey { } #[derive(Debug)] +#[allow(clippy::struct_excessive_bools)] pub struct Scanner { rdr: T, mark: Marker, @@ -194,19 +203,15 @@ fn is_blankz(c: char) -> bool { } #[inline] fn is_digit(c: char) -> bool { - c >= '0' && c <= '9' + c.is_ascii_digit() } #[inline] fn is_alpha(c: char) -> bool { - match c { - '0'..='9' | 'a'..='z' | 'A'..='Z' => true, - '_' | '-' => true, - _ => false, - } + matches!(c, '0'..='9' | 'a'..='z' | 'A'..='Z' | '_' | '-') } #[inline] fn is_hex(c: char) -> bool { - (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') + c.is_ascii_digit() || ('a'..='f').contains(&c) || ('A'..='F').contains(&c) } #[inline] fn as_hex(c: char) -> u32 { @@ -219,10 +224,7 @@ fn as_hex(c: char) -> u32 { } #[inline] fn is_flow(c: char) -> bool { - match c { - ',' | '[' | ']' | '{' | '}' => true, - _ => false, - } + matches!(c, ',' | '[' | ']' | '{' | '}') } pub type ScanResult = Result<(), ScanError>; @@ -251,10 +253,7 @@ impl> Scanner { } #[inline] pub fn get_error(&self) -> Option { - match self.error { - None => None, - Some(ref e) => Some(e.clone()), - } + self.error.as_ref().map(std::clone::Clone::clone) } #[inline] @@ -419,7 +418,7 @@ impl> Scanner { ':' | '?' if !is_blankz(nc) && self.flow_level == 0 => self.fetch_plain_scalar(), '%' | '@' | '`' => Err(ScanError::new( self.mark, - &format!("unexpected character: `{}'", c), + &format!("unexpected character: `{c}'"), )), _ => self.fetch_plain_scalar(), } @@ -697,7 +696,7 @@ impl> Scanner { } let is_secondary = handle == "!!"; - let prefix = self.scan_tag_uri(true, is_secondary, &String::new(), mark)?; + let prefix = self.scan_tag_uri(true, is_secondary, "", mark)?; self.lookahead(1); @@ -733,7 +732,7 @@ impl> Scanner { // Eat '!<' self.skip(); self.skip(); - suffix = self.scan_tag_uri(false, false, &String::new(), &start_mark)?; + suffix = self.scan_tag_uri(false, false, "", &start_mark)?; if self.ch() != '>' { return Err(ScanError::new( @@ -751,7 +750,7 @@ impl> Scanner { if handle == "!!" { secondary = true; } - suffix = self.scan_tag_uri(false, secondary, &String::new(), &start_mark)?; + suffix = self.scan_tag_uri(false, secondary, "", &start_mark)?; } else { suffix = self.scan_tag_uri(false, false, &handle, &start_mark)?; handle = "!".to_owned(); @@ -1072,6 +1071,7 @@ impl> Scanner { Ok(()) } + #[allow(clippy::too_many_lines)] fn scan_block_scalar(&mut self, literal: bool) -> Result { let start_mark = self.mark; let mut chomping: i32 = 0; @@ -1280,6 +1280,7 @@ impl> Scanner { Ok(()) } + #[allow(clippy::too_many_lines)] fn scan_flow_scalar(&mut self, single: bool) -> Result { let start_mark = self.mark; @@ -1389,12 +1390,8 @@ impl> Scanner { value = (value << 4) + as_hex(self.buffer[i]); } - let ch = match char::from_u32(value) { - Some(v) => v, - None => { - return Err(ScanError::new(start_mark, - "while parsing a quoted scalar, found invalid Unicode character escape code")); - } + let Some(ch) = char::from_u32(value) else { + return Err(ScanError::new(start_mark, "while parsing a quoted scalar, found invalid Unicode character escape code")); }; string.push(ch); @@ -1739,6 +1736,7 @@ impl> Scanner { } #[cfg(test)] +#[allow(clippy::enum_glob_use)] mod test { use super::TokenType::*; use super::*; diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index 4bb70da..939cb58 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -1,6 +1,8 @@ -use linked_hash_map::LinkedHashMap; -use crate::parser::*; +#![allow(clippy::module_name_repetitions)] + +use crate::parser::{Event, MarkedEventReceiver, Parser}; use crate::scanner::{Marker, ScanError, TScalarStyle, TokenType}; +use linked_hash_map::LinkedHashMap; use std::collections::BTreeMap; use std::f64; use std::i64; @@ -201,6 +203,7 @@ impl YamlLoader { macro_rules! define_as ( ($name:ident, $t:ident, $yt:ident) => ( +#[must_use] pub fn $name(&self) -> Option<$t> { match *self { Yaml::$yt(v) => Some(v), @@ -212,6 +215,7 @@ pub fn $name(&self) -> Option<$t> { macro_rules! define_as_ref ( ($name:ident, $t:ty, $yt:ident) => ( +#[must_use] pub fn $name(&self) -> Option<$t> { match *self { Yaml::$yt(ref v) => Some(v), @@ -223,6 +227,7 @@ pub fn $name(&self) -> Option<$t> { macro_rules! define_into ( ($name:ident, $t:ty, $yt:ident) => ( +#[must_use] pub fn $name(self) -> Option<$t> { match self { Yaml::$yt(v) => Some(v), @@ -246,59 +251,58 @@ impl Yaml { define_into!(into_hash, Hash, Hash); define_into!(into_vec, Array, Array); + /// Returns the is null of this [`Yaml`]. + #[must_use] pub fn is_null(&self) -> bool { - match *self { - Yaml::Null => true, - _ => false, - } + matches!(*self, Yaml::Null) } + /// Returns the is badvalue of this [`Yaml`]. + #[must_use] pub fn is_badvalue(&self) -> bool { - match *self { - Yaml::BadValue => true, - _ => false, - } + matches!(*self, Yaml::BadValue) } + #[must_use] pub fn is_array(&self) -> bool { - match *self { - Yaml::Array(_) => true, - _ => false, - } + matches!(*self, Yaml::Array(_)) } + #[must_use] pub fn as_f64(&self) -> Option { - match *self { - Yaml::Real(ref v) => parse_f64(v), - _ => None, + if let Yaml::Real(ref v) = self { + parse_f64(v) + } else { + None } } + #[must_use] pub fn into_f64(self) -> Option { - match self { - Yaml::Real(ref v) => parse_f64(v), - _ => None, + if let Yaml::Real(ref v) = self { + parse_f64(v) + } else { + None } } } -#[cfg_attr(feature = "cargo-clippy", allow(should_implement_trait))] +#[cfg_attr(feature = "cargo-clippy", allow(clippy::should_implement_trait))] impl Yaml { // Not implementing FromStr because there is no possibility of Error. // This function falls back to Yaml::String if nothing else matches. + #[must_use] pub fn from_str(v: &str) -> Yaml { - if v.starts_with("0x") { - if let Ok(i) = i64::from_str_radix(&v[2..], 16) { + if let Some(number) = v.strip_prefix("0x") { + if let Ok(i) = i64::from_str_radix(number, 16) { return Yaml::Integer(i); } - } - if v.starts_with("0o") { - if let Ok(i) = i64::from_str_radix(&v[2..], 8) { + } else if let Some(number) = v.strip_prefix("0o") { + if let Ok(i) = i64::from_str_radix(number, 8) { return Yaml::Integer(i); } - } - if v.starts_with('+') { - if let Ok(i) = v[1..].parse::() { + } else if let Some(number) = v.strip_prefix('+') { + if let Ok(i) = number.parse::() { return Yaml::Integer(i); } } @@ -306,10 +310,15 @@ impl Yaml { "~" | "null" => Yaml::Null, "true" => Yaml::Boolean(true), "false" => Yaml::Boolean(false), - _ if v.parse::().is_ok() => Yaml::Integer(v.parse::().unwrap()), - // try parsing as f64 - _ if parse_f64(v).is_some() => Yaml::Real(v.to_owned()), - _ => Yaml::String(v.to_owned()), + _ => { + if let Ok(integer) = v.parse::() { + Yaml::Integer(integer) + } else if parse_f64(v).is_some() { + Yaml::Real(v.to_owned()) + } else { + Yaml::String(v.to_owned()) + } + } } } } @@ -348,7 +357,7 @@ impl IntoIterator for Yaml { fn into_iter(self) -> Self::IntoIter { YamlIter { - yaml: self.into_vec().unwrap_or_else(Vec::new).into_iter(), + yaml: self.into_vec().unwrap_or_default().into_iter(), } } } @@ -366,9 +375,11 @@ impl Iterator for YamlIter { } #[cfg(test)] +#[allow(clippy::bool_assert_comparison)] +#[allow(clippy::float_cmp)] mod test { + use crate::yaml::{vec, Yaml, YamlLoader}; use std::f64; - use crate::yaml::*; #[test] fn test_coerce() { let s = "--- @@ -376,7 +387,7 @@ a: 1 b: 2.2 c: [1, 2] "; - let out = YamlLoader::load_from_str(&s).unwrap(); + let out = YamlLoader::load_from_str(s).unwrap(); let doc = &out[0]; assert_eq!(doc["a"].as_i64().unwrap(), 1i64); assert_eq!(doc["b"].as_f64().unwrap(), 2.2f64); @@ -386,7 +397,7 @@ c: [1, 2] #[test] fn test_empty_doc() { - let s: String = "".to_owned(); + let s: String = String::new(); YamlLoader::load_from_str(&s).unwrap(); let s: String = "---".to_owned(); assert_eq!(YamlLoader::load_from_str(&s).unwrap()[0], Yaml::Null); @@ -425,7 +436,7 @@ a7: 你好 --- 'a scalar' "; - let out = YamlLoader::load_from_str(&s).unwrap(); + let out = YamlLoader::load_from_str(s).unwrap(); assert_eq!(out.len(), 3); } @@ -437,7 +448,7 @@ a1: &DEFAULT b2: d a2: *DEFAULT "; - let out = YamlLoader::load_from_str(&s).unwrap(); + let out = YamlLoader::load_from_str(s).unwrap(); let doc = &out[0]; assert_eq!(doc["a2"]["b1"].as_i64().unwrap(), 4); } @@ -449,7 +460,7 @@ a1: &DEFAULT b1: 4 b2: *DEFAULT "; - let out = YamlLoader::load_from_str(&s).unwrap(); + let out = YamlLoader::load_from_str(s).unwrap(); let doc = &out[0]; assert_eq!(doc["a1"]["b2"], Yaml::BadValue); } @@ -458,7 +469,7 @@ a1: &DEFAULT fn test_github_27() { // https://github.com/chyh1990/yaml-rust/issues/27 let s = "&a"; - let out = YamlLoader::load_from_str(&s).unwrap(); + let out = YamlLoader::load_from_str(s).unwrap(); let doc = &out[0]; assert_eq!(doc.as_str().unwrap(), ""); } @@ -494,7 +505,7 @@ a1: &DEFAULT - +12345 - [ true, false ] "; - let out = YamlLoader::load_from_str(&s).unwrap(); + let out = YamlLoader::load_from_str(s).unwrap(); let doc = &out[0]; assert_eq!(doc[0].as_str().unwrap(), "string"); @@ -531,14 +542,14 @@ a1: &DEFAULT fn test_bad_hyphen() { // See: https://github.com/chyh1990/yaml-rust/issues/23 let s = "{-"; - assert!(YamlLoader::load_from_str(&s).is_err()); + assert!(YamlLoader::load_from_str(s).is_err()); } #[test] fn test_issue_65() { // See: https://github.com/chyh1990/yaml-rust/issues/65 let b = "\n\"ll\\\"ll\\\r\n\"ll\\\"ll\\\r\r\r\rU\r\r\rU"; - assert!(YamlLoader::load_from_str(&b).is_err()); + assert!(YamlLoader::load_from_str(b).is_err()); } #[test] @@ -582,7 +593,7 @@ a1: &DEFAULT - .NAN - !!float .INF "; - let mut out = YamlLoader::load_from_str(&s).unwrap().into_iter(); + let mut out = YamlLoader::load_from_str(s).unwrap().into_iter(); let mut doc = out.next().unwrap().into_iter(); assert_eq!(doc.next().unwrap().into_string().unwrap(), "string"); @@ -614,7 +625,7 @@ b: ~ a: ~ c: ~ "; - let out = YamlLoader::load_from_str(&s).unwrap(); + let out = YamlLoader::load_from_str(s).unwrap(); let first = out.into_iter().next().unwrap(); let mut iter = first.into_hash().unwrap().into_iter(); assert_eq!( @@ -640,7 +651,7 @@ c: ~ 1: important: false "; - let out = YamlLoader::load_from_str(&s).unwrap(); + let out = YamlLoader::load_from_str(s).unwrap(); let first = out.into_iter().next().unwrap(); assert_eq!(first[0]["important"].as_bool().unwrap(), true); } @@ -716,10 +727,10 @@ subcommands3: about: server related commands "#; - let out = YamlLoader::load_from_str(&s).unwrap(); + let out = YamlLoader::load_from_str(s).unwrap(); let doc = &out.into_iter().next().unwrap(); - println!("{:#?}", doc); + println!("{doc:#?}"); assert_eq!(doc["subcommands"][0]["server"], Yaml::Null); assert!(doc["subcommands2"][0]["server"].as_hash().is_some()); assert!(doc["subcommands3"][0]["server"].as_hash().is_some()); diff --git a/saphyr/tests/spec_test.rs b/saphyr/tests/spec_test.rs index 442728f..5e6dfa3 100644 --- a/saphyr/tests/spec_test.rs +++ b/saphyr/tests/spec_test.rs @@ -6,7 +6,7 @@ use yaml_rust::parser::{Event, EventReceiver, Parser}; use yaml_rust::scanner::TScalarStyle; // These names match the names used in the C++ test suite. -#[cfg_attr(feature = "cargo-clippy", allow(enum_variant_names))] +#[cfg_attr(feature = "cargo-clippy", allow(clippy::enum_variant_names))] #[derive(Clone, PartialEq, PartialOrd, Debug)] enum TestEvent { OnDocumentStart, @@ -76,24 +76,18 @@ include!("spec_test.rs.inc"); #[test] fn test_mapvec_legal() { - use yaml_rust::yaml::{Array, Hash, Yaml}; + use yaml_rust::yaml::{Hash, Yaml}; use yaml_rust::{YamlEmitter, YamlLoader}; // Emitting a `map>, _>` should result in legal yaml that // we can parse. - let mut key = Array::new(); - key.push(Yaml::Integer(1)); - key.push(Yaml::Integer(2)); - key.push(Yaml::Integer(3)); + let key = vec![Yaml::Integer(1), Yaml::Integer(2), Yaml::Integer(3)]; let mut keyhash = Hash::new(); keyhash.insert(Yaml::String("key".into()), Yaml::Array(key)); - let mut val = Array::new(); - val.push(Yaml::Integer(4)); - val.push(Yaml::Integer(5)); - val.push(Yaml::Integer(6)); + let val = vec![Yaml::Integer(4), Yaml::Integer(5), Yaml::Integer(6)]; let mut hash = Hash::new(); hash.insert(Yaml::Hash(keyhash), Yaml::Array(val)); diff --git a/saphyr/tests/test_round_trip.rs b/saphyr/tests/test_round_trip.rs index dc5e85e..4a383a8 100644 --- a/saphyr/tests/test_round_trip.rs +++ b/saphyr/tests/test_round_trip.rs @@ -7,14 +7,14 @@ fn roundtrip(original: &Yaml) { YamlEmitter::new(&mut emitted).dump(original).unwrap(); let documents = YamlLoader::load_from_str(&emitted).unwrap(); - println!("emitted {}", emitted); + println!("emitted {emitted}"); assert_eq!(documents.len(), 1); assert_eq!(documents[0], *original); } fn double_roundtrip(original: &str) { - let parsed = YamlLoader::load_from_str(&original).unwrap(); + let parsed = YamlLoader::load_from_str(original).unwrap(); let mut serialized = String::new(); YamlEmitter::new(&mut serialized).dump(&parsed[0]).unwrap(); @@ -39,27 +39,32 @@ fn test_colon_in_string() { #[test] fn test_numberlike_strings() { let docs = [ - r#"x: "1234""#, r#"x: "01234""#, r#""1234""#, - r#""01234""#, r#"" 01234""#, r#""0x1234""#, + r#"x: "1234""#, + r#"x: "01234""#, + r#""1234""#, + r#""01234""#, + r#"" 01234""#, + r#""0x1234""#, r#"" 0x1234""#, ]; for doc in &docs { - roundtrip(&Yaml::String(doc.to_string())); - double_roundtrip(&doc); + roundtrip(&Yaml::String((*doc).to_string())); + double_roundtrip(doc); } } -/// Example from https://github.com/chyh1990/yaml-rust/issues/133 +/// Example from #[test] fn test_issue133() { - - let doc = YamlLoader::load_from_str("\"0x123\"").unwrap().pop().unwrap(); + let doc = YamlLoader::load_from_str("\"0x123\"") + .unwrap() + .pop() + .unwrap(); assert_eq!(doc, Yaml::String("0x123".to_string())); let mut out_str = String::new(); YamlEmitter::new(&mut out_str).dump(&doc).unwrap(); let doc2 = YamlLoader::load_from_str(&out_str).unwrap().pop().unwrap(); assert_eq!(doc, doc2); // This failed because the type has changed to a number now - } From 0c4a3958291f34ed9bcc6a37d8e67d4327851e0e Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Thu, 17 Aug 2023 02:17:40 +0200 Subject: [PATCH 202/380] Add documentation and move tests to their folder. --- saphyr/src/lib.rs | 73 +------ saphyr/src/parser.rs | 131 ++++++++++-- saphyr/src/yaml.rs | 449 +++++------------------------------------- saphyr/tests/basic.rs | 424 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 597 insertions(+), 480 deletions(-) create mode 100644 saphyr/tests/basic.rs diff --git a/saphyr/src/lib.rs b/saphyr/src/lib.rs index ee432ca..7ccd4db 100644 --- a/saphyr/src/lib.rs +++ b/saphyr/src/lib.rs @@ -13,21 +13,14 @@ //! yaml-rust = "0.4" //! ``` //! -//! And this in your crate root: -//! -//! ```rust -//! extern crate yaml_rust; -//! ``` -//! -//! Parse a string into `Vec` and then serialize it as a YAML string. -//! //! # Examples +//! Parse a string into `Vec` and then serialize it as a YAML string. //! //! ``` //! use yaml_rust::{YamlLoader, YamlEmitter}; //! //! let docs = YamlLoader::load_from_str("[1, 2, 3]").unwrap(); -//! let doc = &docs[0]; // select the first document +//! let doc = &docs[0]; // select the first YAML document //! assert_eq!(doc[0].as_i64().unwrap(), 1); // access elements by index //! //! let mut out_str = String::new(); @@ -61,65 +54,3 @@ pub use crate::emitter::{EmitError, YamlEmitter}; pub use crate::parser::Event; pub use crate::scanner::ScanError; pub use crate::yaml::{Yaml, YamlLoader}; - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_api() { - let s = " -# from yaml-cpp example -- name: Ogre - position: [0, 5, 0] - powers: - - name: Club - damage: 10 - - name: Fist - damage: 8 -- name: Dragon - position: [1, 0, 10] - powers: - - name: Fire Breath - damage: 25 - - name: Claws - damage: 15 -- name: Wizard - position: [5, -3, 0] - powers: - - name: Acid Rain - damage: 50 - - name: Staff - damage: 3 -"; - let docs = YamlLoader::load_from_str(s).unwrap(); - let doc = &docs[0]; - - assert_eq!(doc[0]["name"].as_str().unwrap(), "Ogre"); - - let mut writer = String::new(); - { - let mut emitter = YamlEmitter::new(&mut writer); - emitter.dump(doc).unwrap(); - } - - assert!(!writer.is_empty()); - } - - fn try_fail(s: &str) -> Result, ScanError> { - let t = YamlLoader::load_from_str(s)?; - Ok(t) - } - - #[test] - fn test_fail() { - let s = " -# syntax error -scalar -key: [1, 2]] -key1:a2 -"; - assert!(YamlLoader::load_from_str(s).is_err()); - assert!(try_fail(s).is_err()); - } -} diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs index 90539f1..c885856 100644 --- a/saphyr/src/parser.rs +++ b/saphyr/src/parser.rs @@ -29,57 +29,145 @@ enum State { End, } -/// `Event` is used with the low-level event base parsing API, -/// see `EventReceiver` trait. +/// An event generated by the YAML parser. +/// +/// Events are used in the low-level event-based API (push parser). The API entrypoint is the +/// [`EventReceiver`] trait. #[derive(Clone, PartialEq, Debug, Eq)] pub enum Event { - /// Reserved for internal use + /// Reserved for internal use. Nothing, + /// Event generated at the very beginning of parsing. StreamStart, + /// Last event that will be generated by the parser. Signals EOF. StreamEnd, + /// The YAML start document directive (`---`). DocumentStart, + /// The YAML end document directive (`...`). DocumentEnd, - /// Refer to an anchor ID - Alias(usize), + /// A YAML Alias. + Alias( + /// The anchor ID the alias refers to. + usize, + ), /// Value, style, anchor_id, tag Scalar(String, TScalarStyle, usize, Option), - /// Anchor ID - SequenceStart(usize), + SequenceStart( + /// The anchor ID of the start of the squence. + usize, + ), SequenceEnd, - /// Anchor ID - MappingStart(usize), + MappingStart( + /// The anchor ID of the start of the mapping. + usize, + ), MappingEnd, } impl Event { + /// Create an empty scalar. fn empty_scalar() -> Event { // a null scalar Event::Scalar("~".to_owned(), TScalarStyle::Plain, 0, None) } + /// Create an empty scalar with the given anchor. fn empty_scalar_with_anchor(anchor: usize, tag: Option) -> Event { Event::Scalar(String::new(), TScalarStyle::Plain, anchor, tag) } } +/// A YAML parser. #[derive(Debug)] -#[allow(dead_code)] pub struct Parser { scanner: Scanner, states: Vec, state: State, - marks: Vec, token: Option, current: Option<(Event, Marker)>, anchors: HashMap, anchor_id: usize, } +/// Trait to be implemented in order to use the low-level parsing API. +/// +/// The low-level parsing API is event-based (a push parser), calling [`EventReceiver::on_event`] +/// for each YAML [`Event`] that occurs. +/// The [`EventReceiver`] trait only receives events. In order to receive both events and their +/// location in the source, use [`MarkedEventReceiver`]. Note that [`EventReceiver`]s implement +/// [`MarkedEventReceiver`] automatically. +/// +/// # Event hierarchy +/// The event stream starts with an [`Event::StreamStart`] event followed by an +/// [`Event::DocumentStart`] event. If the YAML document starts with a mapping (an object), an +/// [`Event::MappingStart`] event is emitted. If it starts with a sequence (an array), an +/// [`Event::SequenceStart`] event is emitted. Otherwise, an [`Event::Scalar`] event is emitted. +/// +/// In a mapping, key-values are sent as consecutive events. The first event after an +/// [`Event::MappingStart`] will be the key, and following its value. If the mapping contains no +/// sub-mapping or sub-sequence, then even events (starting from 0) will always be keys and odd +/// ones will always be values. The mapping ends when an [`Event::MappingEnd`] event is received. +/// +/// In a sequence, values are sent consecutively until the [`Event::SequenceEnd`] event. +/// +/// If a value is a sub-mapping or a sub-sequence, an [`Event::MappingStart`] or +/// [`Event::SequenceStart`] event will be sent respectively. Following events until the associated +/// [`Event::MappingStart`] or [`Event::SequenceEnd`] (beware of nested mappings or sequences) will +/// be part of the value and not another key-value pair or element in the sequence. +/// +/// For instance, the following yaml: +/// ```yaml +/// a: b +/// c: +/// d: e +/// f: +/// - g +/// - h +/// ``` +/// will emit (indented and commented for lisibility): +/// ```text +/// StreamStart, DocumentStart, MappingStart, +/// Scalar("a", ..), Scalar("b", ..) +/// Scalar("c", ..), MappingStart, Scalar("d", ..), Scalar("e", ..), MappingEnd, +/// Scalar("f", ..), SequenceStart, Scalar("g", ..), Scalar("h", ..), SequenceEnd, +/// MappingEnd, DocumentEnd, StreamEnd +/// ``` +/// +/// # Example +/// ``` +/// # use yaml_rust::parser::{Event, EventReceiver, Parser}; +/// # +/// /// Sink of events. Collects them into an array. +/// struct EventSink { +/// events: Vec, +/// } +/// +/// /// Implement `on_event`, pushing into `self.events`. +/// impl EventReceiver for EventSink { +/// fn on_event(&mut self, ev: Event) { +/// self.events.push(ev); +/// } +/// } +/// +/// /// Load events from a yaml string. +/// fn str_to_events(yaml: &str) -> Vec { +/// let mut sink = EventSink { events: Vec::new() }; +/// let mut parser = Parser::new(yaml.chars()); +/// // Load events using our sink as the receiver. +/// parser.load(&mut sink, true).unwrap(); +/// sink.events +/// } +/// ``` pub trait EventReceiver { + /// Handler called for each YAML event that is emitted by the parser. fn on_event(&mut self, ev: Event); } +/// Trait to be implemented for using the low-level parsing API. +/// +/// Functionally similar to [`EventReceiver`], but receives a [`Marker`] as well as the event. pub trait MarkedEventReceiver { + /// Handler called for each event that occurs. fn on_event(&mut self, ev: Event, _mark: Marker); } @@ -92,12 +180,12 @@ impl MarkedEventReceiver for R { pub type ParseResult = Result<(Event, Marker), ScanError>; impl> Parser { + /// Crate a new instance of a parser from the given input of characters. pub fn new(src: T) -> Parser { Parser { scanner: Scanner::new(src), states: Vec::new(), state: State::StreamStart, - marks: Vec::new(), token: None, current: None, @@ -107,6 +195,10 @@ impl> Parser { } } + /// Try to load the next event and return it, but do not consuming it from `self`. + /// + /// Any subsequent call to [`Parser::peek`] will return the same value, until a call to + /// [`Parser::next`] or [`Parser::load`]. pub fn peek(&mut self) -> Result<&(Event, Marker), ScanError> { if let Some(ref x) = self.current { Ok(x) @@ -116,10 +208,11 @@ impl> Parser { } } + /// Try to load the next event and return it, consuming it from `self`. pub fn next(&mut self) -> ParseResult { - match self.current { + match self.current.take() { None => self.parse(), - Some(_) => Ok(self.current.take().unwrap()), + Some(v) => Ok(v), } } @@ -170,6 +263,16 @@ impl> Parser { Ok((ev, mark)) } + /// Load the YAML from the stream in `self`, pushing events into `recv`. + /// + /// The contents of the stream are parsed and the corresponding events are sent into the + /// recveiver. For detailed explanations about how events work, see [`EventReceiver`]. + /// + /// If `multi` is set to `true`, the parser will allow parsing of multiple YAML documents + /// inside the stream. + /// + /// Note that any [`EventReceiver`] is also a [`MarkedEventReceiver`], so implementing the + /// former is enough to call this function. pub fn load( &mut self, recv: &mut R, diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index 939cb58..67b8b76 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -4,12 +4,8 @@ use crate::parser::{Event, MarkedEventReceiver, Parser}; use crate::scanner::{Marker, ScanError, TScalarStyle, TokenType}; use linked_hash_map::LinkedHashMap; use std::collections::BTreeMap; -use std::f64; -use std::i64; use std::mem; use std::ops::Index; -use std::string; -use std::vec; /// A YAML node is stored as this `Yaml` enumeration, which provides an easy way to /// access your YAML document. @@ -30,20 +26,20 @@ use std::vec; #[derive(Clone, PartialEq, PartialOrd, Debug, Eq, Ord, Hash)] pub enum Yaml { /// Float types are stored as String and parsed on demand. - /// Note that f64 does NOT implement Eq trait and can NOT be stored in BTreeMap. - Real(string::String), + /// Note that `f64` does NOT implement Eq trait and can NOT be stored in `BTreeMap`. + Real(String), /// YAML int is stored as i64. Integer(i64), /// YAML scalar. - String(string::String), + String(String), /// YAML bool, e.g. `true` or `false`. Boolean(bool), /// YAML array, can be accessed as a `Vec`. - Array(self::Array), + Array(Array), /// YAML hash, can be accessed as a `LinkedHashMap`. /// /// Insertion order will match the order of insertion into the map. - Hash(self::Hash), + Hash(Hash), /// Alias, not fully supported yet. Alias(usize), /// YAML null, e.g. `null` or `~`. @@ -68,6 +64,9 @@ fn parse_f64(v: &str) -> Option { } } +/// Main structure for quickly parsing YAML. +/// +/// See [`YamlLoader::load_from_str`]. pub struct YamlLoader { docs: Vec, // states @@ -188,14 +187,28 @@ impl YamlLoader { } } + /// Load the given string as a set of YAML documents. + /// + /// The `source` is interpreted as YAML documents and is parsed. Parsing succeeds if and only + /// if all documents are parsed successfully. An error in a latter document prevents the former + /// from being returned. pub fn load_from_str(source: &str) -> Result, ScanError> { + Self::load_from_iter(source.chars()) + } + + /// Load the contents of the given iterator as a set of YAML documents. + /// + /// The `source` is interpreted as YAML documents and is parsed. Parsing succeeds if and only + /// if all documents are parsed successfully. An error in a latter document prevents the former + /// from being returned. + pub fn load_from_iter>(source: I) -> Result, ScanError> { let mut loader = YamlLoader { docs: Vec::new(), doc_stack: Vec::new(), key_stack: Vec::new(), anchor_map: BTreeMap::new(), }; - let mut parser = Parser::new(source.chars()); + let mut parser = Parser::new(source); parser.load(&mut loader, true)?; Ok(loader.docs) } @@ -251,23 +264,28 @@ impl Yaml { define_into!(into_hash, Hash, Hash); define_into!(into_vec, Array, Array); - /// Returns the is null of this [`Yaml`]. + /// Return whether `self` is a [`Yaml::Null`] node. #[must_use] pub fn is_null(&self) -> bool { matches!(*self, Yaml::Null) } - /// Returns the is badvalue of this [`Yaml`]. + /// Return whether `self` is a [`Yaml::BadValue`] node. #[must_use] pub fn is_badvalue(&self) -> bool { matches!(*self, Yaml::BadValue) } + /// Return whether `self` is a [`Yaml::Array`] node. #[must_use] pub fn is_array(&self) -> bool { matches!(*self, Yaml::Array(_)) } + /// Return the `f64` value contained in this YAML node. + /// + /// If the node is not a [`Yaml::Real`] YAML node or its contents is not a valid `f64` string, + /// `None` is returned. #[must_use] pub fn as_f64(&self) -> Option { if let Yaml::Real(ref v) = self { @@ -277,20 +295,35 @@ impl Yaml { } } + /// Return the `f64` value contained in this YAML node. + /// + /// If the node is not a [`Yaml::Real`] YAML node or its contents is not a valid `f64` string, + /// `None` is returned. #[must_use] pub fn into_f64(self) -> Option { - if let Yaml::Real(ref v) = self { - parse_f64(v) - } else { - None - } + self.as_f64() } } #[cfg_attr(feature = "cargo-clippy", allow(clippy::should_implement_trait))] impl Yaml { - // Not implementing FromStr because there is no possibility of Error. - // This function falls back to Yaml::String if nothing else matches. + /// Convert a string to a [`Yaml`] node. + /// + /// [`Yaml`] does not implement [`std::str::FromStr`] since conversion may not fail. This + /// function falls back to [`Yaml::String`] if nothing else matches. + /// + /// # Examples + /// ``` + /// # use yaml_rust::yaml::Yaml; + /// assert!(matches!(Yaml::from_str("42"), Yaml::Integer(42))); + /// assert!(matches!(Yaml::from_str("0x2A"), Yaml::Integer(42))); + /// assert!(matches!(Yaml::from_str("0o52"), Yaml::Integer(42))); + /// assert!(matches!(Yaml::from_str("~"), Yaml::Null)); + /// assert!(matches!(Yaml::from_str("null"), Yaml::Null)); + /// assert!(matches!(Yaml::from_str("true"), Yaml::Boolean(true))); + /// assert!(matches!(Yaml::from_str("3.14"), Yaml::Real(_))); + /// assert!(matches!(Yaml::from_str("foo"), Yaml::String(_))); + /// ``` #[must_use] pub fn from_str(v: &str) -> Yaml { if let Some(number) = v.strip_prefix("0x") { @@ -362,8 +395,9 @@ impl IntoIterator for Yaml { } } +/// An iterator over a [`Yaml`] node. pub struct YamlIter { - yaml: vec::IntoIter, + yaml: std::vec::IntoIter, } impl Iterator for YamlIter { @@ -373,378 +407,3 @@ impl Iterator for YamlIter { self.yaml.next() } } - -#[cfg(test)] -#[allow(clippy::bool_assert_comparison)] -#[allow(clippy::float_cmp)] -mod test { - use crate::yaml::{vec, Yaml, YamlLoader}; - use std::f64; - #[test] - fn test_coerce() { - let s = "--- -a: 1 -b: 2.2 -c: [1, 2] -"; - let out = YamlLoader::load_from_str(s).unwrap(); - let doc = &out[0]; - assert_eq!(doc["a"].as_i64().unwrap(), 1i64); - assert_eq!(doc["b"].as_f64().unwrap(), 2.2f64); - assert_eq!(doc["c"][1].as_i64().unwrap(), 2i64); - assert!(doc["d"][0].is_badvalue()); - } - - #[test] - fn test_empty_doc() { - let s: String = String::new(); - YamlLoader::load_from_str(&s).unwrap(); - let s: String = "---".to_owned(); - assert_eq!(YamlLoader::load_from_str(&s).unwrap()[0], Yaml::Null); - } - - #[test] - fn test_parser() { - let s: String = " -# comment -a0 bb: val -a1: - b1: 4 - b2: d -a2: 4 # i'm comment -a3: [1, 2, 3] -a4: - - - a1 - - a2 - - 2 -a5: 'single_quoted' -a6: \"double_quoted\" -a7: 你好 -" - .to_owned(); - let out = YamlLoader::load_from_str(&s).unwrap(); - let doc = &out[0]; - assert_eq!(doc["a7"].as_str().unwrap(), "你好"); - } - - #[test] - fn test_multi_doc() { - let s = " -'a scalar' ---- -'a scalar' ---- -'a scalar' -"; - let out = YamlLoader::load_from_str(s).unwrap(); - assert_eq!(out.len(), 3); - } - - #[test] - fn test_anchor() { - let s = " -a1: &DEFAULT - b1: 4 - b2: d -a2: *DEFAULT -"; - let out = YamlLoader::load_from_str(s).unwrap(); - let doc = &out[0]; - assert_eq!(doc["a2"]["b1"].as_i64().unwrap(), 4); - } - - #[test] - fn test_bad_anchor() { - let s = " -a1: &DEFAULT - b1: 4 - b2: *DEFAULT -"; - let out = YamlLoader::load_from_str(s).unwrap(); - let doc = &out[0]; - assert_eq!(doc["a1"]["b2"], Yaml::BadValue); - } - - #[test] - fn test_github_27() { - // https://github.com/chyh1990/yaml-rust/issues/27 - let s = "&a"; - let out = YamlLoader::load_from_str(s).unwrap(); - let doc = &out[0]; - assert_eq!(doc.as_str().unwrap(), ""); - } - - #[test] - fn test_plain_datatype() { - let s = " -- 'string' -- \"string\" -- string -- 123 -- -321 -- 1.23 -- -1e4 -- ~ -- null -- true -- false -- !!str 0 -- !!int 100 -- !!float 2 -- !!null ~ -- !!bool true -- !!bool false -- 0xFF -# bad values -- !!int string -- !!float string -- !!bool null -- !!null val -- 0o77 -- [ 0xF, 0xF ] -- +12345 -- [ true, false ] -"; - let out = YamlLoader::load_from_str(s).unwrap(); - let doc = &out[0]; - - assert_eq!(doc[0].as_str().unwrap(), "string"); - assert_eq!(doc[1].as_str().unwrap(), "string"); - assert_eq!(doc[2].as_str().unwrap(), "string"); - assert_eq!(doc[3].as_i64().unwrap(), 123); - assert_eq!(doc[4].as_i64().unwrap(), -321); - assert_eq!(doc[5].as_f64().unwrap(), 1.23); - assert_eq!(doc[6].as_f64().unwrap(), -1e4); - assert!(doc[7].is_null()); - assert!(doc[8].is_null()); - assert_eq!(doc[9].as_bool().unwrap(), true); - assert_eq!(doc[10].as_bool().unwrap(), false); - assert_eq!(doc[11].as_str().unwrap(), "0"); - assert_eq!(doc[12].as_i64().unwrap(), 100); - assert_eq!(doc[13].as_f64().unwrap(), 2.0); - assert!(doc[14].is_null()); - assert_eq!(doc[15].as_bool().unwrap(), true); - assert_eq!(doc[16].as_bool().unwrap(), false); - assert_eq!(doc[17].as_i64().unwrap(), 255); - assert!(doc[18].is_badvalue()); - assert!(doc[19].is_badvalue()); - assert!(doc[20].is_badvalue()); - assert!(doc[21].is_badvalue()); - assert_eq!(doc[22].as_i64().unwrap(), 63); - assert_eq!(doc[23][0].as_i64().unwrap(), 15); - assert_eq!(doc[23][1].as_i64().unwrap(), 15); - assert_eq!(doc[24].as_i64().unwrap(), 12345); - assert!(doc[25][0].as_bool().unwrap()); - assert!(!doc[25][1].as_bool().unwrap()); - } - - #[test] - fn test_bad_hyphen() { - // See: https://github.com/chyh1990/yaml-rust/issues/23 - let s = "{-"; - assert!(YamlLoader::load_from_str(s).is_err()); - } - - #[test] - fn test_issue_65() { - // See: https://github.com/chyh1990/yaml-rust/issues/65 - let b = "\n\"ll\\\"ll\\\r\n\"ll\\\"ll\\\r\r\r\rU\r\r\rU"; - assert!(YamlLoader::load_from_str(b).is_err()); - } - - #[test] - fn test_bad_docstart() { - assert!(YamlLoader::load_from_str("---This used to cause an infinite loop").is_ok()); - assert_eq!( - YamlLoader::load_from_str("----"), - Ok(vec![Yaml::String(String::from("----"))]) - ); - assert_eq!( - YamlLoader::load_from_str("--- #here goes a comment"), - Ok(vec![Yaml::Null]) - ); - assert_eq!( - YamlLoader::load_from_str("---- #here goes a comment"), - Ok(vec![Yaml::String(String::from("----"))]) - ); - } - - #[test] - fn test_plain_datatype_with_into_methods() { - let s = " -- 'string' -- \"string\" -- string -- 123 -- -321 -- 1.23 -- -1e4 -- true -- false -- !!str 0 -- !!int 100 -- !!float 2 -- !!bool true -- !!bool false -- 0xFF -- 0o77 -- +12345 -- -.INF -- .NAN -- !!float .INF -"; - let mut out = YamlLoader::load_from_str(s).unwrap().into_iter(); - let mut doc = out.next().unwrap().into_iter(); - - assert_eq!(doc.next().unwrap().into_string().unwrap(), "string"); - assert_eq!(doc.next().unwrap().into_string().unwrap(), "string"); - assert_eq!(doc.next().unwrap().into_string().unwrap(), "string"); - assert_eq!(doc.next().unwrap().into_i64().unwrap(), 123); - assert_eq!(doc.next().unwrap().into_i64().unwrap(), -321); - assert_eq!(doc.next().unwrap().into_f64().unwrap(), 1.23); - assert_eq!(doc.next().unwrap().into_f64().unwrap(), -1e4); - assert_eq!(doc.next().unwrap().into_bool().unwrap(), true); - assert_eq!(doc.next().unwrap().into_bool().unwrap(), false); - assert_eq!(doc.next().unwrap().into_string().unwrap(), "0"); - assert_eq!(doc.next().unwrap().into_i64().unwrap(), 100); - assert_eq!(doc.next().unwrap().into_f64().unwrap(), 2.0); - assert_eq!(doc.next().unwrap().into_bool().unwrap(), true); - assert_eq!(doc.next().unwrap().into_bool().unwrap(), false); - assert_eq!(doc.next().unwrap().into_i64().unwrap(), 255); - assert_eq!(doc.next().unwrap().into_i64().unwrap(), 63); - assert_eq!(doc.next().unwrap().into_i64().unwrap(), 12345); - assert_eq!(doc.next().unwrap().into_f64().unwrap(), f64::NEG_INFINITY); - assert!(doc.next().unwrap().into_f64().is_some()); - assert_eq!(doc.next().unwrap().into_f64().unwrap(), f64::INFINITY); - } - - #[test] - fn test_hash_order() { - let s = "--- -b: ~ -a: ~ -c: ~ -"; - let out = YamlLoader::load_from_str(s).unwrap(); - let first = out.into_iter().next().unwrap(); - let mut iter = first.into_hash().unwrap().into_iter(); - assert_eq!( - Some((Yaml::String("b".to_owned()), Yaml::Null)), - iter.next() - ); - assert_eq!( - Some((Yaml::String("a".to_owned()), Yaml::Null)), - iter.next() - ); - assert_eq!( - Some((Yaml::String("c".to_owned()), Yaml::Null)), - iter.next() - ); - assert_eq!(None, iter.next()); - } - - #[test] - fn test_integer_key() { - let s = " -0: - important: true -1: - important: false -"; - let out = YamlLoader::load_from_str(s).unwrap(); - let first = out.into_iter().next().unwrap(); - assert_eq!(first[0]["important"].as_bool().unwrap(), true); - } - - #[test] - fn test_indentation_equality() { - let four_spaces = YamlLoader::load_from_str( - r#" -hash: - with: - indentations -"#, - ) - .unwrap() - .into_iter() - .next() - .unwrap(); - - let two_spaces = YamlLoader::load_from_str( - r#" -hash: - with: - indentations -"#, - ) - .unwrap() - .into_iter() - .next() - .unwrap(); - - let one_space = YamlLoader::load_from_str( - r#" -hash: - with: - indentations -"#, - ) - .unwrap() - .into_iter() - .next() - .unwrap(); - - let mixed_spaces = YamlLoader::load_from_str( - r#" -hash: - with: - indentations -"#, - ) - .unwrap() - .into_iter() - .next() - .unwrap(); - - assert_eq!(four_spaces, two_spaces); - assert_eq!(two_spaces, one_space); - assert_eq!(four_spaces, mixed_spaces); - } - - #[test] - fn test_two_space_indentations() { - // https://github.com/kbknapp/clap-rs/issues/965 - - let s = r#" -subcommands: - - server: - about: server related commands -subcommands2: - - server: - about: server related commands -subcommands3: - - server: - about: server related commands - "#; - - let out = YamlLoader::load_from_str(s).unwrap(); - let doc = &out.into_iter().next().unwrap(); - - println!("{doc:#?}"); - assert_eq!(doc["subcommands"][0]["server"], Yaml::Null); - assert!(doc["subcommands2"][0]["server"].as_hash().is_some()); - assert!(doc["subcommands3"][0]["server"].as_hash().is_some()); - } - - #[test] - fn test_recursion_depth_check_objects() { - let s = "{a:".repeat(10_000) + &"}".repeat(10_000); - assert!(YamlLoader::load_from_str(&s).is_err()); - } - - #[test] - fn test_recursion_depth_check_arrays() { - let s = "[".repeat(10_000) + &"]".repeat(10_000); - assert!(YamlLoader::load_from_str(&s).is_err()); - } -} diff --git a/saphyr/tests/basic.rs b/saphyr/tests/basic.rs new file mode 100644 index 0000000..1ec8751 --- /dev/null +++ b/saphyr/tests/basic.rs @@ -0,0 +1,424 @@ +#![allow(clippy::bool_assert_comparison)] +#![allow(clippy::float_cmp)] + +use std::vec; +use yaml_rust::{Yaml, YamlEmitter, YamlLoader}; + +#[test] +fn test_api() { + let s = " +# from yaml-cpp example +- name: Ogre + position: [0, 5, 0] + powers: + - name: Club + damage: 10 + - name: Fist + damage: 8 +- name: Dragon + position: [1, 0, 10] + powers: + - name: Fire Breath + damage: 25 + - name: Claws + damage: 15 +- name: Wizard + position: [5, -3, 0] + powers: + - name: Acid Rain + damage: 50 + - name: Staff + damage: 3 +"; + let docs = YamlLoader::load_from_str(s).unwrap(); + let doc = &docs[0]; + + assert_eq!(doc[0]["name"].as_str().unwrap(), "Ogre"); + + let mut writer = String::new(); + { + let mut emitter = YamlEmitter::new(&mut writer); + emitter.dump(doc).unwrap(); + } + + assert!(!writer.is_empty()); +} + +#[test] +fn test_fail() { + let s = " +# syntax error +scalar +key: [1, 2]] +key1:a2 +"; + assert!(YamlLoader::load_from_str(s).is_err()); +} + +#[test] +fn test_coerce() { + let s = "--- +a: 1 +b: 2.2 +c: [1, 2] +"; + let out = YamlLoader::load_from_str(s).unwrap(); + let doc = &out[0]; + assert_eq!(doc["a"].as_i64().unwrap(), 1i64); + assert_eq!(doc["b"].as_f64().unwrap(), 2.2f64); + assert_eq!(doc["c"][1].as_i64().unwrap(), 2i64); + assert!(doc["d"][0].is_badvalue()); +} + +#[test] +fn test_empty_doc() { + let s: String = String::new(); + YamlLoader::load_from_str(&s).unwrap(); + let s: String = "---".to_owned(); + assert_eq!(YamlLoader::load_from_str(&s).unwrap()[0], Yaml::Null); +} + +#[test] +fn test_parser() { + let s: String = " +# comment +a0 bb: val +a1: + b1: 4 + b2: d +a2: 4 # i'm comment +a3: [1, 2, 3] +a4: + - - a1 + - a2 + - 2 +a5: 'single_quoted' +a6: \"double_quoted\" +a7: 你好 +" + .to_owned(); + let out = YamlLoader::load_from_str(&s).unwrap(); + let doc = &out[0]; + assert_eq!(doc["a7"].as_str().unwrap(), "你好"); +} + +#[test] +fn test_multi_doc() { + let s = " +'a scalar' +--- +'a scalar' +--- +'a scalar' +"; + let out = YamlLoader::load_from_str(s).unwrap(); + assert_eq!(out.len(), 3); +} + +#[test] +fn test_anchor() { + let s = " +a1: &DEFAULT + b1: 4 + b2: d +a2: *DEFAULT +"; + let out = YamlLoader::load_from_str(s).unwrap(); + let doc = &out[0]; + assert_eq!(doc["a2"]["b1"].as_i64().unwrap(), 4); +} + +#[test] +fn test_bad_anchor() { + let s = " +a1: &DEFAULT + b1: 4 + b2: *DEFAULT +"; + let out = YamlLoader::load_from_str(s).unwrap(); + let doc = &out[0]; + assert_eq!(doc["a1"]["b2"], Yaml::BadValue); +} + +#[test] +fn test_github_27() { + // https://github.com/chyh1990/yaml-rust/issues/27 + let s = "&a"; + let out = YamlLoader::load_from_str(s).unwrap(); + let doc = &out[0]; + assert_eq!(doc.as_str().unwrap(), ""); +} + +#[test] +fn test_plain_datatype() { + let s = " +- 'string' +- \"string\" +- string +- 123 +- -321 +- 1.23 +- -1e4 +- ~ +- null +- true +- false +- !!str 0 +- !!int 100 +- !!float 2 +- !!null ~ +- !!bool true +- !!bool false +- 0xFF +# bad values +- !!int string +- !!float string +- !!bool null +- !!null val +- 0o77 +- [ 0xF, 0xF ] +- +12345 +- [ true, false ] +"; + let out = YamlLoader::load_from_str(s).unwrap(); + let doc = &out[0]; + + assert_eq!(doc[0].as_str().unwrap(), "string"); + assert_eq!(doc[1].as_str().unwrap(), "string"); + assert_eq!(doc[2].as_str().unwrap(), "string"); + assert_eq!(doc[3].as_i64().unwrap(), 123); + assert_eq!(doc[4].as_i64().unwrap(), -321); + assert_eq!(doc[5].as_f64().unwrap(), 1.23); + assert_eq!(doc[6].as_f64().unwrap(), -1e4); + assert!(doc[7].is_null()); + assert!(doc[8].is_null()); + assert_eq!(doc[9].as_bool().unwrap(), true); + assert_eq!(doc[10].as_bool().unwrap(), false); + assert_eq!(doc[11].as_str().unwrap(), "0"); + assert_eq!(doc[12].as_i64().unwrap(), 100); + assert_eq!(doc[13].as_f64().unwrap(), 2.0); + assert!(doc[14].is_null()); + assert_eq!(doc[15].as_bool().unwrap(), true); + assert_eq!(doc[16].as_bool().unwrap(), false); + assert_eq!(doc[17].as_i64().unwrap(), 255); + assert!(doc[18].is_badvalue()); + assert!(doc[19].is_badvalue()); + assert!(doc[20].is_badvalue()); + assert!(doc[21].is_badvalue()); + assert_eq!(doc[22].as_i64().unwrap(), 63); + assert_eq!(doc[23][0].as_i64().unwrap(), 15); + assert_eq!(doc[23][1].as_i64().unwrap(), 15); + assert_eq!(doc[24].as_i64().unwrap(), 12345); + assert!(doc[25][0].as_bool().unwrap()); + assert!(!doc[25][1].as_bool().unwrap()); +} + +#[test] +fn test_bad_hyphen() { + // See: https://github.com/chyh1990/yaml-rust/issues/23 + let s = "{-"; + assert!(YamlLoader::load_from_str(s).is_err()); +} + +#[test] +fn test_issue_65() { + // See: https://github.com/chyh1990/yaml-rust/issues/65 + let b = "\n\"ll\\\"ll\\\r\n\"ll\\\"ll\\\r\r\r\rU\r\r\rU"; + assert!(YamlLoader::load_from_str(b).is_err()); +} + +#[test] +fn test_bad_docstart() { + assert!(YamlLoader::load_from_str("---This used to cause an infinite loop").is_ok()); + assert_eq!( + YamlLoader::load_from_str("----"), + Ok(vec![Yaml::String(String::from("----"))]) + ); + assert_eq!( + YamlLoader::load_from_str("--- #here goes a comment"), + Ok(vec![Yaml::Null]) + ); + assert_eq!( + YamlLoader::load_from_str("---- #here goes a comment"), + Ok(vec![Yaml::String(String::from("----"))]) + ); +} + +#[test] +fn test_plain_datatype_with_into_methods() { + let s = " +- 'string' +- \"string\" +- string +- 123 +- -321 +- 1.23 +- -1e4 +- true +- false +- !!str 0 +- !!int 100 +- !!float 2 +- !!bool true +- !!bool false +- 0xFF +- 0o77 +- +12345 +- -.INF +- .NAN +- !!float .INF +"; + let mut out = YamlLoader::load_from_str(s).unwrap().into_iter(); + let mut doc = out.next().unwrap().into_iter(); + + assert_eq!(doc.next().unwrap().into_string().unwrap(), "string"); + assert_eq!(doc.next().unwrap().into_string().unwrap(), "string"); + assert_eq!(doc.next().unwrap().into_string().unwrap(), "string"); + assert_eq!(doc.next().unwrap().into_i64().unwrap(), 123); + assert_eq!(doc.next().unwrap().into_i64().unwrap(), -321); + assert_eq!(doc.next().unwrap().into_f64().unwrap(), 1.23); + assert_eq!(doc.next().unwrap().into_f64().unwrap(), -1e4); + assert_eq!(doc.next().unwrap().into_bool().unwrap(), true); + assert_eq!(doc.next().unwrap().into_bool().unwrap(), false); + assert_eq!(doc.next().unwrap().into_string().unwrap(), "0"); + assert_eq!(doc.next().unwrap().into_i64().unwrap(), 100); + assert_eq!(doc.next().unwrap().into_f64().unwrap(), 2.0); + assert_eq!(doc.next().unwrap().into_bool().unwrap(), true); + assert_eq!(doc.next().unwrap().into_bool().unwrap(), false); + assert_eq!(doc.next().unwrap().into_i64().unwrap(), 255); + assert_eq!(doc.next().unwrap().into_i64().unwrap(), 63); + assert_eq!(doc.next().unwrap().into_i64().unwrap(), 12345); + assert_eq!(doc.next().unwrap().into_f64().unwrap(), f64::NEG_INFINITY); + assert!(doc.next().unwrap().into_f64().is_some()); + assert_eq!(doc.next().unwrap().into_f64().unwrap(), f64::INFINITY); +} + +#[test] +fn test_hash_order() { + let s = "--- +b: ~ +a: ~ +c: ~ +"; + let out = YamlLoader::load_from_str(s).unwrap(); + let first = out.into_iter().next().unwrap(); + let mut iter = first.into_hash().unwrap().into_iter(); + assert_eq!( + Some((Yaml::String("b".to_owned()), Yaml::Null)), + iter.next() + ); + assert_eq!( + Some((Yaml::String("a".to_owned()), Yaml::Null)), + iter.next() + ); + assert_eq!( + Some((Yaml::String("c".to_owned()), Yaml::Null)), + iter.next() + ); + assert_eq!(None, iter.next()); +} + +#[test] +fn test_integer_key() { + let s = " +0: + important: true +1: + important: false +"; + let out = YamlLoader::load_from_str(s).unwrap(); + let first = out.into_iter().next().unwrap(); + assert_eq!(first[0]["important"].as_bool().unwrap(), true); +} + +#[test] +fn test_indentation_equality() { + let four_spaces = YamlLoader::load_from_str( + r#" +hash: + with: + indentations +"#, + ) + .unwrap() + .into_iter() + .next() + .unwrap(); + + let two_spaces = YamlLoader::load_from_str( + r#" +hash: + with: + indentations +"#, + ) + .unwrap() + .into_iter() + .next() + .unwrap(); + + let one_space = YamlLoader::load_from_str( + r#" +hash: + with: + indentations +"#, + ) + .unwrap() + .into_iter() + .next() + .unwrap(); + + let mixed_spaces = YamlLoader::load_from_str( + r#" +hash: + with: + indentations +"#, + ) + .unwrap() + .into_iter() + .next() + .unwrap(); + + assert_eq!(four_spaces, two_spaces); + assert_eq!(two_spaces, one_space); + assert_eq!(four_spaces, mixed_spaces); +} + +#[test] +fn test_two_space_indentations() { + // https://github.com/kbknapp/clap-rs/issues/965 + + let s = r#" +subcommands: + - server: + about: server related commands +subcommands2: + - server: + about: server related commands +subcommands3: + - server: + about: server related commands + "#; + + let out = YamlLoader::load_from_str(s).unwrap(); + let doc = &out.into_iter().next().unwrap(); + + println!("{doc:#?}"); + assert_eq!(doc["subcommands"][0]["server"], Yaml::Null); + assert!(doc["subcommands2"][0]["server"].as_hash().is_some()); + assert!(doc["subcommands3"][0]["server"].as_hash().is_some()); +} + +#[test] +fn test_recursion_depth_check_objects() { + let s = "{a:".repeat(10_000) + &"}".repeat(10_000); + assert!(YamlLoader::load_from_str(&s).is_err()); +} + +#[test] +fn test_recursion_depth_check_arrays() { + let s = "[".repeat(10_000) + &"]".repeat(10_000); + assert!(YamlLoader::load_from_str(&s).is_err()); +} From d77db591bc5adfbb6def2d2352284c9839bdb8d1 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Thu, 17 Aug 2023 02:17:53 +0200 Subject: [PATCH 203/380] Add a release-lto cargo profile. --- saphyr/Cargo.toml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/saphyr/Cargo.toml b/saphyr/Cargo.toml index 91d7da4..b779cfc 100644 --- a/saphyr/Cargo.toml +++ b/saphyr/Cargo.toml @@ -15,3 +15,7 @@ linked-hash-map = "0.5.3" [dev-dependencies] quickcheck = "0.9" + +[profile.release-lto] +inherits = "release" +lto = true From df4d5208786e9ef95c7fad0590d3e94da1fbaa58 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Thu, 17 Aug 2023 02:18:07 +0200 Subject: [PATCH 204/380] Add a dump_events example to aid debugging. --- saphyr/examples/dump_events.rs | 38 ++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 saphyr/examples/dump_events.rs diff --git a/saphyr/examples/dump_events.rs b/saphyr/examples/dump_events.rs new file mode 100644 index 0000000..876902c --- /dev/null +++ b/saphyr/examples/dump_events.rs @@ -0,0 +1,38 @@ +extern crate yaml_rust; + +use std::env; +use std::fs::File; +use std::io::prelude::*; +use yaml_rust::{ + parser::{MarkedEventReceiver, Parser}, + scanner::Marker, + Event, +}; + +#[derive(Debug)] +struct EventSink { + events: Vec<(Event, Marker)>, +} + +impl MarkedEventReceiver for EventSink { + fn on_event(&mut self, ev: Event, mark: Marker) { + self.events.push((ev, mark)); + } +} + +fn str_to_events(yaml: &str) -> Vec<(Event, Marker)> { + let mut sink = EventSink { events: Vec::new() }; + let mut parser = Parser::new(yaml.chars()); + // Load events using our sink as the receiver. + parser.load(&mut sink, true).unwrap(); + sink.events +} + +fn main() { + let args: Vec<_> = env::args().collect(); + let mut f = File::open(&args[1]).unwrap(); + let mut s = String::new(); + f.read_to_string(&mut s).unwrap(); + + dbg!(str_to_events(&s)); +} From 0a11923625f4fad2e332572d6db179d2f159e496 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Thu, 17 Aug 2023 23:31:32 +0200 Subject: [PATCH 205/380] Fix more clippy lints. --- saphyr/tests/specexamples.rs.inc | 224 +++++++++++++++---------------- 1 file changed, 112 insertions(+), 112 deletions(-) diff --git a/saphyr/tests/specexamples.rs.inc b/saphyr/tests/specexamples.rs.inc index a5398c3..f882efc 100644 --- a/saphyr/tests/specexamples.rs.inc +++ b/saphyr/tests/specexamples.rs.inc @@ -1,337 +1,337 @@ -const EX2_1 : &'static str = +const EX2_1 : &str = "- Mark McGwire\n- Sammy Sosa\n- Ken Griffey"; -const EX2_2 : &'static str = +const EX2_2 : &str = "hr: 65 # Home runs\navg: 0.278 # Batting average\nrbi: 147 # Runs Batted In"; -const EX2_3 : &'static str = +const EX2_3 : &str = "american:\n- Boston Red Sox\n- Detroit Tigers\n- New York Yankees\nnational:\n- New York Mets\n- Chicago Cubs\n- Atlanta Braves"; -const EX2_4 : &'static str = +const EX2_4 : &str = "-\n name: Mark McGwire\n hr: 65\n avg: 0.278\n-\n name: Sammy Sosa\n hr: 63\n avg: 0.288"; -const EX2_5 : &'static str = +const EX2_5 : &str = "- [name , hr, avg ]\n- [Mark McGwire, 65, 0.278]\n- [Sammy Sosa , 63, 0.288]"; -const EX2_6 : &'static str = +const EX2_6 : &str = "Mark McGwire: {hr: 65, avg: 0.278}\nSammy Sosa: {\n hr: 63,\n avg: 0.288\n }"; -const EX2_7 : &'static str = +const EX2_7 : &str = "# Ranking of 1998 home runs\n---\n- Mark McGwire\n- Sammy Sosa\n- Ken Griffey\n\n# Team ranking\n---\n- Chicago Cubs\n- St Louis Cardinals"; -const EX2_8 : &'static str = +const EX2_8 : &str = "---\ntime: 20:03:20\nplayer: Sammy Sosa\naction: strike (miss)\n...\n---\ntime: 20:03:47\nplayer: Sammy Sosa\naction: grand slam\n..."; -const EX2_9 : &'static str = +const EX2_9 : &str = "---\nhr: # 1998 hr ranking\n - Mark McGwire\n - Sammy Sosa\nrbi:\n # 1998 rbi ranking\n - Sammy Sosa\n - Ken Griffey"; -const EX2_10 : &'static str = +const EX2_10 : &str = "---\nhr:\n - Mark McGwire\n # Following node labeled SS\n - &SS Sammy Sosa\nrbi:\n - *SS # Subsequent occurrence\n - Ken Griffey"; -const EX2_11 : &'static str = +const EX2_11 : &str = "? - Detroit Tigers\n - Chicago cubs\n:\n - 2001-07-23\n\n? [ New York Yankees,\n Atlanta Braves ]\n: [ 2001-07-02, 2001-08-12,\n 2001-08-14 ]"; -const EX2_12 : &'static str = +const EX2_12 : &str = "---\n# Products purchased\n- item : Super Hoop\n quantity: 1\n- item : Basketball\n quantity: 4\n- item : Big Shoes\n quantity: 1"; -const EX2_13 : &'static str = +const EX2_13 : &str = "# ASCII Art\n--- |\n \\//||\\/||\n // || ||__"; -const EX2_14 : &'static str = +const EX2_14 : &str = "--- >\n Mark McGwire's\n year was crippled\n by a knee injury."; -const EX2_15 : &'static str = +const EX2_15 : &str = ">\n Sammy Sosa completed another\n fine season with great stats.\n \n 63 Home Runs\n 0.288 Batting Average\n \n What a year!"; -const EX2_16 : &'static str = +const EX2_16 : &str = "name: Mark McGwire\naccomplishment: >\n Mark set a major league\n home run record in 1998.\nstats: |\n 65 Home Runs\n 0.278 Batting Average\n"; -const EX2_17 : &'static str = +const EX2_17 : &str = "unicode: \"Sosa did fine.\\u263A\"\ncontrol: \"\\b1998\\t1999\\t2000\\n\"\nhex esc: \"\\x0d\\x0a is \\r\\n\"\n\nsingle: '\"Howdy!\" he cried.'\nquoted: ' # Not a ''comment''.'\ntie-fighter: '|\\-*-/|'"; -const EX2_18 : &'static str = +const EX2_18 : &str = "plain:\n This unquoted scalar\n spans many lines.\n\nquoted: \"So does this\n quoted scalar.\\n\""; // TODO: 2.19 - 2.22 schema tags -const EX2_23 : &'static str = +const EX2_23 : &str = "---\nnot-date: !!str 2002-04-28\n\npicture: !!binary |\n R0lGODlhDAAMAIQAAP//9/X\n 17unp5WZmZgAAAOfn515eXv\n Pz7Y6OjuDg4J+fn5OTk6enp\n 56enmleECcgggoBADs=\n\napplication specific tag: !something |\n The semantics of the tag\n above may be different for\n different documents."; -const EX2_24 : &'static str = +const EX2_24 : &str = "%TAG ! tag:clarkevans.com,2002:\n--- !shape\n # Use the ! handle for presenting\n # tag:clarkevans.com,2002:circle\n- !circle\n center: &ORIGIN {x: 73, y: 129}\n radius: 7\n- !line\n start: *ORIGIN\n finish: { x: 89, y: 102 }\n- !label\n start: *ORIGIN\n color: 0xFFEEBB\n text: Pretty vector drawing."; -const EX2_25 : &'static str = +const EX2_25 : &str = "# Sets are represented as a\n# Mapping where each key is\n# associated with a null value\n--- !!set\n? Mark McGwire\n? Sammy Sosa\n? Ken Griffey"; -const EX2_26 : &'static str = +const EX2_26 : &str = "# Ordered maps are represented as\n# A sequence of mappings, with\n# each mapping having one key\n--- !!omap\n- Mark McGwire: 65\n- Sammy Sosa: 63\n- Ken Griffey: 58"; -const EX2_27 : &'static str = +const EX2_27 : &str = "--- !\ninvoice: 34843\ndate : 2001-01-23\nbill-to: &id001\n given : Chris\n family : Dumars\n address:\n lines: |\n 458 Walkman Dr.\n Suite #292\n city : Royal Oak\n state : MI\n postal : 48046\nship-to: *id001\nproduct:\n - sku : BL394D\n quantity : 4\n description : Basketball\n price : 450.00\n - sku : BL4438H\n quantity : 1\n description : Super Hoop\n price : 2392.00\ntax : 251.42\ntotal: 4443.52\ncomments:\n Late afternoon is best.\n Backup contact is Nancy\n Billsmer @ 338-4338."; -const EX2_28 : &'static str = +const EX2_28 : &str = "---\nTime: 2001-11-23 15:01:42 -5\nUser: ed\nWarning:\n This is an error message\n for the log file\n---\nTime: 2001-11-23 15:02:31 -5\nUser: ed\nWarning:\n A slightly different error\n message.\n---\nDate: 2001-11-23 15:03:17 -5\nUser: ed\nFatal:\n Unknown variable \"bar\"\nStack:\n - file: TopClass.py\n line: 23\n code: |\n x = MoreObject(\"345\\n\")\n - file: MoreClass.py\n line: 58\n code: |-\n foo = bar"; // TODO: 5.1 - 5.2 BOM -const EX5_3 : &'static str = +const EX5_3 : &str = "sequence:\n- one\n- two\nmapping:\n ? sky\n : blue\n sea : green"; -const EX5_4 : &'static str = +const EX5_4 : &str = "sequence: [ one, two, ]\nmapping: { sky: blue, sea: green }"; -const EX5_5 : &'static str = "# Comment only."; +const EX5_5 : &str = "# Comment only."; -const EX5_6 : &'static str = +const EX5_6 : &str = "anchored: !local &anchor value\nalias: *anchor"; -const EX5_7 : &'static str = +const EX5_7 : &str = "literal: |\n some\n text\nfolded: >\n some\n text\n"; -const EX5_8 : &'static str = +const EX5_8 : &str = "single: 'text'\ndouble: \"text\""; // TODO: 5.9 directive // TODO: 5.10 reserved indicator -const EX5_11 : &'static str = +const EX5_11 : &str = "|\n Line break (no glyph)\n Line break (glyphed)\n"; -const EX5_12 : &'static str = +const EX5_12 : &str = "# Tabs and spaces\nquoted: \"Quoted\t\"\nblock: |\n void main() {\n \tprintf(\"Hello, world!\\n\");\n }"; -const EX5_13 : &'static str = +const EX5_13 : &str = "\"Fun with \\\\\n\\\" \\a \\b \\e \\f \\\n\\n \\r \\t \\v \\0 \\\n\\ \\_ \\N \\L \\P \\\n\\x41 \\u0041 \\U00000041\""; -const EX5_14 : &'static str = +const EX5_14 : &str = "Bad escapes:\n \"\\c\n \\xq-\""; -const EX6_1 : &'static str = +const EX6_1 : &str = " # Leading comment line spaces are\n # neither content nor indentation.\n \nNot indented:\n By one space: |\n By four\n spaces\n Flow style: [ # Leading spaces\n By two, # in flow style\n Also by two, # are neither\n \tStill by two # content nor\n ] # indentation."; -const EX6_2 : &'static str = +const EX6_2 : &str = "? a\n: -\tb\n - -\tc\n - d"; -const EX6_3 : &'static str = +const EX6_3 : &str = "- foo:\t bar\n- - baz\n -\tbaz"; -const EX6_4 : &'static str = +const EX6_4 : &str = "plain: text\n lines\nquoted: \"text\n \tlines\"\nblock: |\n text\n \tlines\n"; -const EX6_5 : &'static str = +const EX6_5 : &str = "Folding:\n \"Empty line\n \t\n as a line feed\"\nChomping: |\n Clipped empty lines\n "; -const EX6_6 : &'static str = +const EX6_6 : &str = ">-\n trimmed\n \n \n\n as\n space"; -const EX6_7 : &'static str = +const EX6_7 : &str = ">\n foo \n \n \t bar\n\n baz\n"; -const EX6_8 : &'static str = +const EX6_8 : &str = "\"\n foo \n \n \t bar\n\n baz\n\""; -const EX6_9 : &'static str = +const EX6_9 : &str = "key: # Comment\n value"; -const EX6_10 : &'static str = +const EX6_10 : &str = " # Comment\n \n\n"; -const EX6_11 : &'static str = +const EX6_11 : &str = "key: # Comment\n # lines\n value\n\n"; -const EX6_12 : &'static str = +const EX6_12 : &str = "{ first: Sammy, last: Sosa }:\n# Statistics:\n hr: # Home runs\n 65\n avg: # Average\n 0.278"; -const EX6_13 : &'static str = +const EX6_13 : &str = "%FOO bar baz # Should be ignored\n # with a warning.\n--- \"foo\""; -const EX6_14 : &'static str = +const EX6_14 : &str = "%YAML 1.3 # Attempt parsing\n # with a warning\n---\n\"foo\""; -const EX6_15 : &'static str = +const EX6_15 : &str = "%YAML 1.2\n%YAML 1.1\nfoo"; -const EX6_16 : &'static str = +const EX6_16 : &str = "%TAG !yaml! tag:yaml.org,2002:\n---\n!yaml!str \"foo\""; -const EX6_17 : &'static str = +const EX6_17 : &str = "%TAG ! !foo\n%TAG ! !foo\nbar"; -const EX6_18 : &'static str = +const EX6_18 : &str = "# Private\n!foo \"bar\"\n...\n# Global\n%TAG ! tag:example.com,2000:app/\n---\n!foo \"bar\""; -const EX6_19 : &'static str = +const EX6_19 : &str = "%TAG !! tag:example.com,2000:app/\n---\n!!int 1 - 3 # Interval, not integer"; -const EX6_20 : &'static str = +const EX6_20 : &str = "%TAG !e! tag:example.com,2000:app/\n---\n!e!foo \"bar\""; -const EX6_21 : &'static str = +const EX6_21 : &str = "%TAG !m! !my-\n--- # Bulb here\n!m!light fluorescent\n...\n%TAG !m! !my-\n--- # Color here\n!m!light green"; -const EX6_22 : &'static str = +const EX6_22 : &str = "%TAG !e! tag:example.com,2000:app/\n---\n- !e!foo \"bar\""; -const EX6_23 : &'static str = +const EX6_23 : &str = "!!str &a1 \"foo\":\n !!str bar\n&a2 baz : *a1"; -const EX6_24 : &'static str = +const EX6_24 : &str = "! foo :\n ! baz"; -const EX6_25 : &'static str = +const EX6_25 : &str = "- ! foo\n- !<$:?> bar\n"; -const EX6_26 : &'static str = +const EX6_26 : &str = "%TAG !e! tag:example.com,2000:app/\n---\n- !local foo\n- !!str bar\n- !e!tag%21 baz\n"; -const EX6_27a : &'static str = +const EX6_27a : &str = "%TAG !e! tag:example,2000:app/\n---\n- !e! foo"; -const EX6_27b : &'static str = +const EX6_27b : &str = "%TAG !e! tag:example,2000:app/\n---\n- !h!bar baz"; -const EX6_28 : &'static str = +const EX6_28 : &str = "# Assuming conventional resolution:\n- \"12\"\n- 12\n- ! 12"; -const EX6_29 : &'static str = +const EX6_29 : &str = "First occurrence: &anchor Value\nSecond occurrence: *anchor"; -const EX7_1 : &'static str = +const EX7_1 : &str = "First occurrence: &anchor Foo\nSecond occurrence: *anchor\nOverride anchor: &anchor Bar\nReuse anchor: *anchor"; -const EX7_2 : &'static str = +const EX7_2 : &str = "{\n foo : !!str,\n !!str : bar,\n}"; -const EX7_3 : &'static str = +const EX7_3 : &str = "{\n ? foo :,\n : bar,\n}\n"; -const EX7_4 : &'static str = +const EX7_4 : &str = "\"implicit block key\" : [\n \"implicit flow key\" : value,\n ]"; -const EX7_5 : &'static str = +const EX7_5 : &str = "\"folded \nto a space,\t\n \nto a line feed, or \t\\\n \\ \tnon-content\""; -const EX7_6 : &'static str = +const EX7_6 : &str = "\" 1st non-empty\n\n 2nd non-empty \n\t3rd non-empty \""; -const EX7_7 : &'static str = " 'here''s to \"quotes\"'"; +const EX7_7 : &str = " 'here''s to \"quotes\"'"; -const EX7_8 : &'static str = +const EX7_8 : &str = "'implicit block key' : [\n 'implicit flow key' : value,\n ]"; -const EX7_9 : &'static str = +const EX7_9 : &str = "' 1st non-empty\n\n 2nd non-empty \n\t3rd non-empty '"; -const EX7_10 : &'static str = +const EX7_10 : &str = "# Outside flow collection:\n- ::vector\n- \": - ()\"\n- Up, up, and away!\n- -123\n- http://example.com/foo#bar\n# Inside flow collection:\n- [ ::vector,\n \": - ()\",\n \"Up, up, and away!\",\n -123,\n http://example.com/foo#bar ]"; -const EX7_11 : &'static str = +const EX7_11 : &str = "implicit block key : [\n implicit flow key : value,\n ]"; -const EX7_12 : &'static str = +const EX7_12 : &str = "1st non-empty\n\n 2nd non-empty \n\t3rd non-empty"; -const EX7_13 : &'static str = +const EX7_13 : &str = "- [ one, two, ]\n- [three ,four]"; -const EX7_14 : &'static str = +const EX7_14 : &str = "[\n\"double\n quoted\", 'single\n quoted',\nplain\n text, [ nested ],\nsingle: pair,\n]"; -const EX7_15 : &'static str = +const EX7_15 : &str = "- { one : two , three: four , }\n- {five: six,seven : eight}"; -const EX7_16 : &'static str = +const EX7_16 : &str = "{\n? explicit: entry,\nimplicit: entry,\n?\n}"; -const EX7_17 : &'static str = +const EX7_17 : &str = "{\nunquoted : \"separate\",\nhttp://foo.com,\nomitted value:,\n: omitted key,\n}"; -const EX7_18 : &'static str = +const EX7_18 : &str = "{\n\"adjacent\":value,\n\"readable\":value,\n\"empty\":\n}"; -const EX7_19 : &'static str = +const EX7_19 : &str = "[\nfoo: bar\n]"; -const EX7_20 : &'static str = +const EX7_20 : &str = "[\n? foo\n bar : baz\n]"; -const EX7_21 : &'static str = +const EX7_21 : &str = "- [ YAML : separate ]\n- [ : empty key entry ]\n- [ {JSON: like}:adjacent ]"; -const EX7_22 : &'static str = +const EX7_22 : &str = "[ foo\n bar: invalid,"; // Note: we don't check (on purpose) the >1K chars for an // implicit key -const EX7_23 : &'static str = +const EX7_23 : &str = "- [ a, b ]\n- { a: b }\n- \"a\"\n- 'b'\n- c"; -const EX7_24 : &'static str = +const EX7_24 : &str = "- !!str \"a\"\n- 'b'\n- &anchor \"c\"\n- *anchor\n- !!str"; -const EX8_1 : &'static str = +const EX8_1 : &str = "- | # Empty header\n literal\n- >1 # Indentation indicator\n folded\n- |+ # Chomping indicator\n keep\n\n- >1- # Both indicators\n strip\n"; -const EX8_2 : &'static str = +const EX8_2 : &str = "- |\n detected\n- >\n \n \n # detected\n- |1\n explicit\n- >\n \t\n detected\n"; -const EX8_3a : &'static str = +const EX8_3a : &str = "- |\n \n text"; -const EX8_3b : &'static str = +const EX8_3b : &str = "- >\n text\n text"; -const EX8_3c : &'static str = +const EX8_3c : &str = "- |2\n text"; -const EX8_4 : &'static str = +const EX8_4 : &str = "strip: |-\n text\nclip: |\n text\nkeep: |+\n text\n"; -const EX8_5 : &'static str = +const EX8_5 : &str = " # Strip\n # Comments:\nstrip: |-\n # text\n \n # Clip\n # comments:\n\nclip: |\n # text\n \n # Keep\n # comments:\n\nkeep: |+\n # text\n\n # Trail\n # Comments\n"; -const EX8_6 : &'static str = +const EX8_6 : &str = "strip: >-\n\nclip: >\n\nkeep: |+\n\n"; -const EX8_7 : &'static str = +const EX8_7 : &str = "|\n literal\n \ttext\n\n"; -const EX8_8 : &'static str = +const EX8_8 : &str = "|\n \n \n literal\n \n \n text\n\n # Comment\n"; -const EX8_9 : &'static str = +const EX8_9 : &str = ">\n folded\n text\n\n"; -const EX8_10 : &'static str = +const EX8_10 : &str = ">\n\n folded\n line\n\n next\n line\n * bullet\n\n * list\n * lines\n\n last\n line\n\n# Comment\n"; -const EX8_11 : &'static str = EX8_10; -const EX8_12 : &'static str = EX8_10; -const EX8_13 : &'static str = EX8_10; +const EX8_11 : &str = EX8_10; +const EX8_12 : &str = EX8_10; +const EX8_13 : &str = EX8_10; -const EX8_14 : &'static str = +const EX8_14 : &str = "block sequence:\n - one\n - two : three\n"; -const EX8_15 : &'static str = +const EX8_15 : &str = "- # Empty\n- |\n block node\n- - one # Compact\n - two # sequence\n- one: two # Compact mapping\n"; -const EX8_16 : &'static str = +const EX8_16 : &str = "block mapping:\n key: value\n"; -const EX8_17 : &'static str = +const EX8_17 : &str = "? explicit key # Empty value\n? |\n block key\n: - one # Explicit compact\n - two # block value\n"; // XXX libyaml failed this test -const EX8_18 : &'static str = +const EX8_18 : &str = "plain key: in-line value\n: # Both empty\n\"quoted key\":\n- entry\n"; -const EX8_19 : &'static str = +const EX8_19 : &str = "- sun: yellow\n- ? earth: blue\n : moon: white\n"; -const EX8_20 : &'static str = +const EX8_20 : &str = "-\n \"flow in block\"\n- >\n Block scalar\n- !!map # Block collection\n foo : bar\n"; -const EX8_21 : &'static str = +const EX8_21 : &str = "literal: |2\n value\nfolded:\n !foo\n >1\n value\n"; -const EX8_22 : &'static str = +const EX8_22 : &str = "sequence: !!seq\n- entry\n- !!seq\n - nested\nmapping: !!map\n foo: bar\n"; From ff2d5fc5b64dff9f97d9349efca44d6e0486d5a3 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Thu, 17 Aug 2023 23:43:15 +0200 Subject: [PATCH 206/380] Expose `ScanError::info`. From https://github.com/chyh1990/yaml-rust/pull/190. --- saphyr/src/scanner.rs | 13 +++++++++++++ saphyr/tests/basic.rs | 10 +++++++++- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index c352df8..d5ce224 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -21,6 +21,7 @@ pub enum TScalarStyle { Foled, } +/// A location in a yaml document. #[derive(Clone, Copy, PartialEq, Debug, Eq)] pub struct Marker { index: usize, @@ -33,22 +34,26 @@ impl Marker { Marker { index, line, col } } + /// Return the index (in bytes) of the marker in the source. #[must_use] pub fn index(&self) -> usize { self.index } + /// Return the line of the marker in the source. #[must_use] pub fn line(&self) -> usize { self.line } + /// Return the column of the marker in the source. #[must_use] pub fn col(&self) -> usize { self.col } } +/// An error that occured while scanning. #[derive(Clone, PartialEq, Debug, Eq)] pub struct ScanError { mark: Marker, @@ -56,6 +61,7 @@ pub struct ScanError { } impl ScanError { + /// Create a new error from a location and an error string. #[must_use] pub fn new(loc: Marker, info: &str) -> ScanError { ScanError { @@ -64,10 +70,17 @@ impl ScanError { } } + /// Return the marker pointing to the error in the source. #[must_use] pub fn marker(&self) -> &Marker { &self.mark } + + /// Return the information string describing the error that happened. + #[must_use] + pub fn info(&self) -> &str { + self.info.as_ref() + } } impl Error for ScanError { diff --git a/saphyr/tests/basic.rs b/saphyr/tests/basic.rs index 1ec8751..e551776 100644 --- a/saphyr/tests/basic.rs +++ b/saphyr/tests/basic.rs @@ -52,7 +52,15 @@ scalar key: [1, 2]] key1:a2 "; - assert!(YamlLoader::load_from_str(s).is_err()); + let Err(error) = YamlLoader::load_from_str(s) else { panic!() }; + assert_eq!( + error.info(), + "mapping values are not allowed in this context" + ); + assert_eq!( + error.to_string(), + "mapping values are not allowed in this context at line 4 column 4" + ); } #[test] From 9001d564b690e58156870482aecff5f315a1f0cb Mon Sep 17 00:00:00 2001 From: Denis Lisov Date: Mon, 17 Jan 2022 01:46:49 +0300 Subject: [PATCH 207/380] testing: add an integration test for yaml-test-suite The official YAML test suite (https://github.com/yaml/yaml-test-suite). Requires the submodule to be checked out. --- saphyr/.gitmodules | 3 + saphyr/tests/yaml-test-suite | 1 + saphyr/tests/yaml-test-suite.rs | 277 ++++++++++++++++++++++++++++++++ 3 files changed, 281 insertions(+) create mode 100644 saphyr/.gitmodules create mode 160000 saphyr/tests/yaml-test-suite create mode 100644 saphyr/tests/yaml-test-suite.rs diff --git a/saphyr/.gitmodules b/saphyr/.gitmodules new file mode 100644 index 0000000..cbc1e88 --- /dev/null +++ b/saphyr/.gitmodules @@ -0,0 +1,3 @@ +[submodule "tests/yaml-test-suite"] + path = tests/yaml-test-suite + url = https://github.com/yaml/yaml-test-suite/ diff --git a/saphyr/tests/yaml-test-suite b/saphyr/tests/yaml-test-suite new file mode 160000 index 0000000..534eb75 --- /dev/null +++ b/saphyr/tests/yaml-test-suite @@ -0,0 +1 @@ +Subproject commit 534eb75451fada039442460a79b79989fc87f9c3 diff --git a/saphyr/tests/yaml-test-suite.rs b/saphyr/tests/yaml-test-suite.rs new file mode 100644 index 0000000..04b1fba --- /dev/null +++ b/saphyr/tests/yaml-test-suite.rs @@ -0,0 +1,277 @@ +use std::{ffi::OsStr, fs, path::Path}; + +use yaml_rust::{ + parser::{Event, EventReceiver, Parser}, + scanner::{TokenType, TScalarStyle}, + ScanError, + Yaml, + YamlLoader, + yaml, +}; + +#[test] +fn yaml_test_suite() -> Result<(), Box> { + let mut error_count = 0; + for entry in std::fs::read_dir("tests/yaml-test-suite/src")? { + let entry = entry?; + error_count += run_tests_from_file(&entry.path(), &entry.file_name())?; + } + println!("Expected errors: {}", EXPECTED_FAILURES.len()); + if error_count > 0 { + panic!("Unexpected errors in testsuite: {}", error_count); + } + Ok(()) +} + +fn run_tests_from_file(path: impl AsRef, file_name: &OsStr) -> Result> { + let test_name = path.as_ref() + .file_name().ok_or("")? + .to_string_lossy().strip_suffix(".yaml").ok_or("unexpected filename")?.to_owned(); + let data = fs::read_to_string(path.as_ref())?; + let tests = YamlLoader::load_from_str(&data)?; + let tests = tests[0].as_vec().unwrap(); + let mut error_count = 0; + + let mut test = yaml::Hash::new(); + for (idx, test_data) in tests.iter().enumerate() { + let desc = format!("{}-{}", test_name, idx); + let is_xfail = EXPECTED_FAILURES.contains(&desc.as_str()); + + // Test fields except `fail` are "inherited" + let test_data = test_data.as_hash().unwrap(); + test.remove(&Yaml::String("fail".into())); + for (key, value) in test_data.clone() { + test.insert(key, value); + } + + if let Some(error) = run_single_test(&test) { + if !is_xfail { + eprintln!("[{}] {}", desc, error); + error_count += 1; + } + } else { + if is_xfail { + eprintln!("[{}] UNEXPECTED PASS", desc); + error_count += 1; + } + } + } + Ok(error_count) +} + +fn run_single_test(test: &yaml::Hash) -> Option { + if test.get(&Yaml::String("skip".into())).is_some() { + return None; + } + let source = test[&Yaml::String("yaml".into())].as_str().unwrap(); + let should_fail = test.get(&Yaml::String("fail".into())) == Some(&Yaml::Boolean(true)); + let actual_events = parse_to_events(&yaml_to_raw(source)); + if should_fail { + if actual_events.is_ok() { + return Some(format!("no error while expected")); + } + } else { + let expected_events = yaml_to_raw(test[&Yaml::String("tree".into())].as_str().unwrap()); + match actual_events { + Ok(events) => { + if let Some(diff) = events_differ(events, &expected_events) { + //dbg!(source, yaml_to_raw(source)); + return Some(format!("events differ: {}", diff)); + } + } + Err(error) => { + //dbg!(source, yaml_to_raw(source)); + return Some(format!("unexpected error {:?}", error)); + } + } + } + None +} + +fn parse_to_events(source: &str) -> Result, ScanError> { + let mut reporter = EventReporter::new(); + Parser::new(source.chars()) + .load(&mut reporter, true)?; + Ok(reporter.events) +} + +struct EventReporter { + events: Vec, +} + +impl EventReporter { + fn new() -> Self { + Self { + events: vec![], + } + } +} + +impl EventReceiver for EventReporter { + fn on_event(&mut self, ev: Event) { + let line: String = match ev { + Event::StreamStart => "+STR".into(), + Event::StreamEnd => "-STR".into(), + + Event::DocumentStart => "+DOC".into(), + Event::DocumentEnd => "-DOC".into(), + + Event::SequenceStart(idx) => format!("+SEQ{}", format_index(idx)), + Event::SequenceEnd => "-SEQ".into(), + + Event::MappingStart(idx) => format!("+MAP{}", format_index(idx)), + Event::MappingEnd => "-MAP".into(), + + Event::Scalar(ref text, style, idx, ref tag) => { + let kind = match style { + TScalarStyle::Plain => ":", + TScalarStyle::SingleQuoted => "'", + TScalarStyle::DoubleQuoted => r#"""#, + TScalarStyle::Literal => "|", + TScalarStyle::Foled => ">", + TScalarStyle::Any => unreachable!(), + }; + format!("=VAL{}{} {}{}", + format_index(idx), format_tag(tag), kind, escape_text(text)) + } + Event::Alias(idx) => format!("=ALI *{}", idx), + Event::Nothing => return, + }; + self.events.push(line); + } +} + +fn format_index(idx: usize) -> String { + if idx > 0 { + format!(" &{}", idx) + } else { + "".into() + } +} + +fn escape_text(text: &str) -> String { + let mut text = text.to_owned(); + for (ch, replacement) in [ + ('\\', r#"\\"#), + ('\n', "\\n"), + ('\r', "\\r"), + ('\x08', "\\b"), + ('\t', "\\t"), + ] { + text = text.replace(ch, replacement); + } + text +} + +fn format_tag(tag: &Option) -> String { + if let Some(TokenType::Tag(ns, tag)) = tag { + let ns = match ns.as_str() { + "!!" => "tag:yaml.org,2002:", // Wrong if this ns is overridden + other => other, + }; + format!(" <{}{}>", ns, tag) + } else { + "".into() + } +} + +fn events_differ(actual: Vec, expected: &str) -> Option { + let actual = actual.iter().map(Some).chain(std::iter::repeat(None)); + let expected = expected_events(expected); + let expected = expected.iter().map(Some).chain(std::iter::repeat(None)); + for (idx, (act, exp)) in actual.zip(expected).enumerate() { + return match (act, exp) { + (Some(act), Some(exp)) => { + if act == exp { + continue; + } else { + Some(format!("line {} differs: expected `{}`, found `{}`", idx, exp, act)) + } + } + (Some(a), None) => Some(format!("extra actual line: {:?}", a)), + (None, Some(e)) => Some(format!("extra expected line: {:?}", e)), + (None, None) => None, + } + } + unreachable!() +} + +/// Replace the unprintable characters used in the YAML examples with normal +fn yaml_to_raw(yaml: &str) -> String { + let mut yaml = yaml.to_owned(); + for (pat, replacement) in [ + ("␣", " "), + ("»", "\t"), + ("—", ""), // Tab line continuation ——» + ("←", "\r"), + ("⇔", "\u{FEFF}"), + ("↵", ""), // Trailing newline marker + ("∎\n", ""), + ] { + yaml = yaml.replace(pat, replacement); + } + yaml +} + +/// Adapt the expectations to the yaml-rust reasonable limitations +/// +/// Drop information on node styles (flow/block) and anchor names. +/// Both are things that can be omitted according to spec. +fn expected_events(expected_tree: &str) -> Vec { + let mut anchors = vec![]; + expected_tree.split("\n") + .map(|s| s.trim_start().to_owned()) + .filter(|s| !s.is_empty()) + .map(|mut s| { + // Anchor name-to-number conversion + if let Some(start) = s.find("&") { + if s[..start].find(":").is_none() { + let len = s[start..].find(" ").unwrap_or(s[start..].len()); + anchors.push(s[start+1..start + len].to_owned()); + s = s.replace(&s[start..start + len], &format!("&{}", anchors.len())); + } + } + // Alias nodes name-to-number + if s.starts_with("=ALI") { + let start = s.find("*").unwrap(); + let name = &s[start + 1 ..]; + let idx = anchors.iter().enumerate().filter(|(_, v)| v == &name).last().unwrap().0; + s = s.replace(&s[start..], &format!("*{}", idx + 1)); + } + // Dropping style information + match &*s { + "+DOC ---" => "+DOC".into(), + "-DOC ..." => "-DOC".into(), + s if s.starts_with("+SEQ []") => s.replacen("+SEQ []", "+SEQ", 1), + s if s.starts_with("+MAP {}") => s.replacen("+MAP {}", "+MAP", 1), + "=VAL :" => "=VAL :~".into(), // FIXME: known bug + s => s.into(), + } + }) + .collect() +} + +static EXPECTED_FAILURES: &[&str] = &[ + // These seem to be API limited (not enough information on the event stream level) + // No tag available for SEQ and MAP + "2XXW-0", + "35KP-0", + "57H4-0", + "6JWB-0", + "735Y-0", + "9KAX-0", + "BU8L-0", + "C4HZ-0", + "EHF6-0", + "J7PZ-0", + "UGM3-0", + // Cannot resolve tag namespaces + "5TYM-0", + "6CK3-0", + "6WLZ-0", + "9WXW-0", + "CC74-0", + "U3C3-0", + "Z9M4-0", + "P76L-0", // overriding the `!!` namespace! +]; From a8cf1bb64220c38540290c5ecc63b66f873b9439 Mon Sep 17 00:00:00 2001 From: Denis Lisov Date: Tue, 18 Jan 2022 02:44:29 +0300 Subject: [PATCH 208/380] yaml-test-suite: move to libtest-mimic --- saphyr/Cargo.toml | 5 + saphyr/tests/yaml-test-suite.rs | 189 +++++++++++++++++--------------- 2 files changed, 108 insertions(+), 86 deletions(-) diff --git a/saphyr/Cargo.toml b/saphyr/Cargo.toml index b779cfc..bd8aa94 100644 --- a/saphyr/Cargo.toml +++ b/saphyr/Cargo.toml @@ -14,8 +14,13 @@ edition = "2018" linked-hash-map = "0.5.3" [dev-dependencies] +libtest-mimic = "0.3.0" quickcheck = "0.9" [profile.release-lto] inherits = "release" lto = true + +[[test]] +name = "yaml-test-suite" +harness = false diff --git a/saphyr/tests/yaml-test-suite.rs b/saphyr/tests/yaml-test-suite.rs index 04b1fba..06433bd 100644 --- a/saphyr/tests/yaml-test-suite.rs +++ b/saphyr/tests/yaml-test-suite.rs @@ -1,4 +1,6 @@ -use std::{ffi::OsStr, fs, path::Path}; +use std::fs::{self, DirEntry}; + +use libtest_mimic::{Arguments, Test, Outcome, run_tests}; use yaml_rust::{ parser::{Event, EventReceiver, Parser}, @@ -9,83 +11,98 @@ use yaml_rust::{ yaml, }; -#[test] -fn yaml_test_suite() -> Result<(), Box> { - let mut error_count = 0; - for entry in std::fs::read_dir("tests/yaml-test-suite/src")? { - let entry = entry?; - error_count += run_tests_from_file(&entry.path(), &entry.file_name())?; - } - println!("Expected errors: {}", EXPECTED_FAILURES.len()); - if error_count > 0 { - panic!("Unexpected errors in testsuite: {}", error_count); - } - Ok(()) +type Result> = std::result::Result; + +struct YamlTest { + yaml: String, + expected_events: String, + expected_error: bool, + is_xfail: bool, } -fn run_tests_from_file(path: impl AsRef, file_name: &OsStr) -> Result> { - let test_name = path.as_ref() - .file_name().ok_or("")? - .to_string_lossy().strip_suffix(".yaml").ok_or("unexpected filename")?.to_owned(); - let data = fs::read_to_string(path.as_ref())?; - let tests = YamlLoader::load_from_str(&data)?; - let tests = tests[0].as_vec().unwrap(); - let mut error_count = 0; +fn main() -> Result<()> { + let mut arguments = Arguments::from_args(); + if arguments.num_threads.is_none() { + arguments.num_threads = Some(1); + } + let tests: Vec> = std::fs::read_dir("tests/yaml-test-suite/src")? + .map(|entry| -> Result<_> { + let entry = entry?; + let tests = load_tests_from_file(&entry)?; + Ok(tests) + }) + .collect::>()?; + let mut tests: Vec<_> = tests.into_iter().flatten().collect(); + tests.sort_by_key(|t| t.name.clone()); + run_tests( + &arguments, + tests, + run_yaml_test, + ).exit(); +} - let mut test = yaml::Hash::new(); +fn run_yaml_test(test: &Test) -> Outcome { + let desc = &test.data; + let actual_events = parse_to_events(&desc.yaml); + let events_diff = actual_events.map(|events| events_differ(events, &desc.expected_events)); + let error_text = match (events_diff, desc.expected_error) { + (Ok(_), true) => Some("no error when expected".into()), + (Err(_), true) => None, + (Err(e), false) => Some(format!("unexpected error {:?}", e)), + (Ok(Some(diff)), false) => Some(format!("events differ: {}", diff)), + (Ok(None), false) => None, + }; + match (error_text, desc.is_xfail) { + (None, false) => Outcome::Passed, + (Some(text), false) => Outcome::Failed { msg: Some(text) }, + (Some(_), true) => Outcome::Ignored, + (None, true) => Outcome::Failed { msg: Some("expected to fail but passes".into()) }, + } +} + +fn load_tests_from_file(entry: &DirEntry) -> Result>> { + let file_name = entry.file_name().to_string_lossy().to_string(); + let test_name = file_name.strip_suffix(".yaml").ok_or("unexpected filename")?; + let tests = YamlLoader::load_from_str(&fs::read_to_string(&entry.path())?)?; + let tests = tests[0].as_vec().ok_or("no test list found in file")?; + + let mut result = vec![]; + let mut current_test = yaml::Hash::new(); for (idx, test_data) in tests.iter().enumerate() { - let desc = format!("{}-{}", test_name, idx); - let is_xfail = EXPECTED_FAILURES.contains(&desc.as_str()); + let name = if tests.len() > 1 { + format!("{}-{:02}", test_name, idx) + } else { + test_name.to_string() + }; + let is_xfail = EXPECTED_FAILURES.contains(&name.as_str()); // Test fields except `fail` are "inherited" let test_data = test_data.as_hash().unwrap(); - test.remove(&Yaml::String("fail".into())); + current_test.remove(&Yaml::String("fail".into())); for (key, value) in test_data.clone() { - test.insert(key, value); + current_test.insert(key, value); } - if let Some(error) = run_single_test(&test) { - if !is_xfail { - eprintln!("[{}] {}", desc, error); - error_count += 1; - } - } else { - if is_xfail { - eprintln!("[{}] UNEXPECTED PASS", desc); - error_count += 1; - } - } - } - Ok(error_count) -} + let current_test = Yaml::Hash(current_test.clone()); // Much better indexing -fn run_single_test(test: &yaml::Hash) -> Option { - if test.get(&Yaml::String("skip".into())).is_some() { - return None; - } - let source = test[&Yaml::String("yaml".into())].as_str().unwrap(); - let should_fail = test.get(&Yaml::String("fail".into())) == Some(&Yaml::Boolean(true)); - let actual_events = parse_to_events(&yaml_to_raw(source)); - if should_fail { - if actual_events.is_ok() { - return Some(format!("no error while expected")); - } - } else { - let expected_events = yaml_to_raw(test[&Yaml::String("tree".into())].as_str().unwrap()); - match actual_events { - Ok(events) => { - if let Some(diff) = events_differ(events, &expected_events) { - //dbg!(source, yaml_to_raw(source)); - return Some(format!("events differ: {}", diff)); - } - } - Err(error) => { - //dbg!(source, yaml_to_raw(source)); - return Some(format!("unexpected error {:?}", error)); - } + if current_test["skip"] != Yaml::BadValue { + continue; } + + result.push(Test { + name, + kind: String::new(), + is_ignored: false, + is_bench: false, + data: YamlTest { + yaml: visual_to_raw(current_test["yaml"].as_str().unwrap()), + expected_events: visual_to_raw(current_test["tree"].as_str().unwrap()), + expected_error: current_test["fail"].as_bool() == Some(true), + is_xfail, + }, + }); } - None + Ok(result) } fn parse_to_events(source: &str) -> Result, ScanError> { @@ -196,8 +213,8 @@ fn events_differ(actual: Vec, expected: &str) -> Option { unreachable!() } -/// Replace the unprintable characters used in the YAML examples with normal -fn yaml_to_raw(yaml: &str) -> String { +/// Convert the snippets from "visual" to "actual" representation +fn visual_to_raw(yaml: &str) -> String { let mut yaml = yaml.to_owned(); for (pat, replacement) in [ ("␣", " "), @@ -254,24 +271,24 @@ fn expected_events(expected_tree: &str) -> Vec { static EXPECTED_FAILURES: &[&str] = &[ // These seem to be API limited (not enough information on the event stream level) // No tag available for SEQ and MAP - "2XXW-0", - "35KP-0", - "57H4-0", - "6JWB-0", - "735Y-0", - "9KAX-0", - "BU8L-0", - "C4HZ-0", - "EHF6-0", - "J7PZ-0", - "UGM3-0", + "2XXW", + "35KP", + "57H4", + "6JWB", + "735Y", + "9KAX", + "BU8L", + "C4HZ", + "EHF6", + "J7PZ", + "UGM3", // Cannot resolve tag namespaces - "5TYM-0", - "6CK3-0", - "6WLZ-0", - "9WXW-0", - "CC74-0", - "U3C3-0", - "Z9M4-0", - "P76L-0", // overriding the `!!` namespace! + "5TYM", + "6CK3", + "6WLZ", + "9WXW", + "CC74", + "U3C3", + "Z9M4", + "P76L", // overriding the `!!` namespace! ]; From 4ba34544ad63e1a2114fc1ea0e5d9e37d8354fb8 Mon Sep 17 00:00:00 2001 From: Denis Lisov Date: Tue, 18 Jan 2022 03:57:40 +0300 Subject: [PATCH 209/380] yaml-test-suite: add ignores and classify failures --- saphyr/tests/yaml-test-suite.rs | 79 +++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/saphyr/tests/yaml-test-suite.rs b/saphyr/tests/yaml-test-suite.rs index 06433bd..8316e7a 100644 --- a/saphyr/tests/yaml-test-suite.rs +++ b/saphyr/tests/yaml-test-suite.rs @@ -291,4 +291,83 @@ static EXPECTED_FAILURES: &[&str] = &[ "U3C3", "Z9M4", "P76L", // overriding the `!!` namespace! + + // These seem to be plain bugs + // Leading TAB in literal scalars + "96NN-00", + "96NN-01", + "R4YG", + "Y79Y-01", + // TAB as start of plain scalar instead of whitespace + "6BCT", + "6CA3", + "A2M4", + "DK95-00", + "Q5MG", + "Y79Y-06", + "4EJS", // unexpected pass + "Y79Y-03", // unexpected pass + "Y79Y-04", // unexpected pass + "Y79Y-05", // unexpected pass + // TABs in whitespace-only lines + "DK95-03", + "DK95-04", + // Other TABs + "DK95-01", // in double-quoted scalar + // Empty key in flow mappings + "CFD4", + // Document with no nodes and document end + "HWV9", + "QT73", + // Unusual characters in anchors/aliases + "2SXE", // : + "8XYN", // emoji!! + "W5VH", // :@*!$": + "Y2GN", // : in the middle + // Flow mapping colon on next line / multiline key in flow mapping + "4MUZ-00", + "4MUZ-01", + "5MUD", + "9SA2", + "C4WK", + "K3WX", + "NJ66", + "UT92", + "VJP3-01", + // Bare document after end marker + "7Z25", + "M7A3", + // Scalar marker on document start line + "DK3J", + "FP8R", + // Comments on nonempty lines need leading space + "9JBA", + "CVW2", + "MUS6-00", + "SU5Z", + "X4QW", + // Directives (various) + "9HCY", // Directive after content + "EB22", // Directive after content + "MUS6-01", // no document end marker? + "QLJ7", // TAG directives should not be inherited between documents + "RHX7", // no document end marker + "SF5V", // duplicate directive + "W4TN", // scalar confused as directive + // Losing trailing newline + "JEF9-02", + "L24T-01", + // Dashes in flow sequence (should be forbidden) + "G5U8", + "YJV2", + // Misc + "9MMW", // Mapping key in implicit mapping in flow sequence(!) + "G9HC", // Anchor indent problem(?) + "H7J7", // Anchor indent / linebreak problem? + "3UYS", // Escaped / + "HRE5", // Escaped ' in double-quoted (should not work) + "QB6E", // Indent for multiline double-quoted scalar + "S98Z", // Block scalar and indent problems? + "U99R", // Comma is not allowed in tags + "WZ62", // Empty content ]; From 080d7d527532b37ca05d3c5048ee4784dc31bfdd Mon Sep 17 00:00:00 2001 From: Denis Lisov Date: Tue, 18 Jan 2022 03:59:58 +0300 Subject: [PATCH 210/380] yaml-test-suite: ensure all XFAILs do exist as tests --- saphyr/tests/yaml-test-suite.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/saphyr/tests/yaml-test-suite.rs b/saphyr/tests/yaml-test-suite.rs index 8316e7a..0972e26 100644 --- a/saphyr/tests/yaml-test-suite.rs +++ b/saphyr/tests/yaml-test-suite.rs @@ -34,6 +34,11 @@ fn main() -> Result<()> { .collect::>()?; let mut tests: Vec<_> = tests.into_iter().flatten().collect(); tests.sort_by_key(|t| t.name.clone()); + + for &test in EXPECTED_FAILURES { + assert!(tests.iter().find(|t| t.name == test).is_some()); + } + run_tests( &arguments, tests, From 9fd57469a476e267c4fa54f3036fc71ebfbecfe2 Mon Sep 17 00:00:00 2001 From: Denis Lisov Date: Tue, 18 Jan 2022 13:47:16 +0300 Subject: [PATCH 211/380] yaml-test-suite: print the names of missing XFAILs --- saphyr/tests/yaml-test-suite.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/saphyr/tests/yaml-test-suite.rs b/saphyr/tests/yaml-test-suite.rs index 0972e26..49f74d7 100644 --- a/saphyr/tests/yaml-test-suite.rs +++ b/saphyr/tests/yaml-test-suite.rs @@ -35,8 +35,11 @@ fn main() -> Result<()> { let mut tests: Vec<_> = tests.into_iter().flatten().collect(); tests.sort_by_key(|t| t.name.clone()); - for &test in EXPECTED_FAILURES { - assert!(tests.iter().find(|t| t.name == test).is_some()); + let missing_xfails: Vec<_> = EXPECTED_FAILURES.iter() + .filter(|&&test| !tests.iter().any(|t| t.name == test)) + .collect(); + if !missing_xfails.is_empty() { + panic!("The following EXPECTED_FAILURES not found during discovery: {:?}", missing_xfails); } run_tests( From c7e371a24cf18e0c31fd3336c3df747a1bcecd9f Mon Sep 17 00:00:00 2001 From: Denis Lisov Date: Tue, 18 Jan 2022 13:48:00 +0300 Subject: [PATCH 212/380] yaml-test-suite: update to YTS v2022-01-17 --- saphyr/tests/yaml-test-suite | 2 +- saphyr/tests/yaml-test-suite.rs | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/saphyr/tests/yaml-test-suite b/saphyr/tests/yaml-test-suite index 534eb75..45db50a 160000 --- a/saphyr/tests/yaml-test-suite +++ b/saphyr/tests/yaml-test-suite @@ -1 +1 @@ -Subproject commit 534eb75451fada039442460a79b79989fc87f9c3 +Subproject commit 45db50aecf9b1520f8258938c88f396e96f30831 diff --git a/saphyr/tests/yaml-test-suite.rs b/saphyr/tests/yaml-test-suite.rs index 49f74d7..9048ebe 100644 --- a/saphyr/tests/yaml-test-suite.rs +++ b/saphyr/tests/yaml-test-suite.rs @@ -317,9 +317,14 @@ static EXPECTED_FAILURES: &[&str] = &[ "Y79Y-03", // unexpected pass "Y79Y-04", // unexpected pass "Y79Y-05", // unexpected pass + "Y79Y-10", // TABs in whitespace-only lines "DK95-03", "DK95-04", + // TABs after marker ? or : (space required?) + "Y79Y-07", + "Y79Y-08", + "Y79Y-09", // Other TABs "DK95-01", // in double-quoted scalar // Empty key in flow mappings @@ -335,9 +340,9 @@ static EXPECTED_FAILURES: &[&str] = &[ // Flow mapping colon on next line / multiline key in flow mapping "4MUZ-00", "4MUZ-01", + "4MUZ-02", "5MUD", "9SA2", - "C4WK", "K3WX", "NJ66", "UT92", From fb8ef31546a92ab4350a7802bf3ec568b78e9d1f Mon Sep 17 00:00:00 2001 From: Denis Lisov Date: Wed, 19 Jan 2022 03:03:53 +0300 Subject: [PATCH 213/380] yaml-test-suite: print the YAML text on failure --- saphyr/tests/yaml-test-suite.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/saphyr/tests/yaml-test-suite.rs b/saphyr/tests/yaml-test-suite.rs index 9048ebe..61f54b6 100644 --- a/saphyr/tests/yaml-test-suite.rs +++ b/saphyr/tests/yaml-test-suite.rs @@ -14,6 +14,7 @@ use yaml_rust::{ type Result> = std::result::Result; struct YamlTest { + yaml_visual: String, yaml: String, expected_events: String, expected_error: bool, @@ -53,13 +54,17 @@ fn run_yaml_test(test: &Test) -> Outcome { let desc = &test.data; let actual_events = parse_to_events(&desc.yaml); let events_diff = actual_events.map(|events| events_differ(events, &desc.expected_events)); - let error_text = match (events_diff, desc.expected_error) { + let mut error_text = match (events_diff, desc.expected_error) { (Ok(_), true) => Some("no error when expected".into()), (Err(_), true) => None, (Err(e), false) => Some(format!("unexpected error {:?}", e)), (Ok(Some(diff)), false) => Some(format!("events differ: {}", diff)), (Ok(None), false) => None, }; + if let Some(text) = &mut error_text { + use std::fmt::Write; + let _ = write!(text, "\n### Input:\n{}\n### End", desc.yaml_visual); + } match (error_text, desc.is_xfail) { (None, false) => Outcome::Passed, (Some(text), false) => Outcome::Failed { msg: Some(text) }, @@ -103,6 +108,7 @@ fn load_tests_from_file(entry: &DirEntry) -> Result>> { is_ignored: false, is_bench: false, data: YamlTest { + yaml_visual: current_test["yaml"].as_str().unwrap().to_string(), yaml: visual_to_raw(current_test["yaml"].as_str().unwrap()), expected_events: visual_to_raw(current_test["tree"].as_str().unwrap()), expected_error: current_test["fail"].as_bool() == Some(true), From 8c4ea2c8382a6de06251efc7560fdb65a3c2ac7d Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Fri, 18 Aug 2023 02:04:21 +0200 Subject: [PATCH 214/380] Move emitter/scanner tests to their folder. Change scanner's complex test: ```diff - *coffee: + *coffee : amount: 4 - *cookies: + *cookies : amount: 4 ``` According to https://play.yaml.io/main/parser, this example was invalid in the first place. Adding a space makes it so that the colon is not part of the alias name. Also fix colons not being able to be part of anchors. --- saphyr/src/emitter.rs | 299 --------------------- saphyr/src/scanner.rs | 448 +------------------------------- saphyr/tests/emitter.rs | 294 +++++++++++++++++++++ saphyr/tests/scanner.rs | 440 +++++++++++++++++++++++++++++++ saphyr/tests/yaml-test-suite.rs | 3 +- 5 files changed, 737 insertions(+), 747 deletions(-) create mode 100644 saphyr/tests/emitter.rs create mode 100644 saphyr/tests/scanner.rs diff --git a/saphyr/src/emitter.rs b/saphyr/src/emitter.rs index bf1bbaa..081654a 100644 --- a/saphyr/src/emitter.rs +++ b/saphyr/src/emitter.rs @@ -334,302 +334,3 @@ fn need_quotes(string: &str) -> bool { || string.parse::().is_ok() || string.parse::().is_ok() } - -#[cfg(test)] -#[allow(clippy::similar_names)] -mod test { - use super::*; - use crate::YamlLoader; - - #[test] - fn test_emit_simple() { - let s = " -# comment -a0 bb: val -a1: - b1: 4 - b2: d -a2: 4 # i'm comment -a3: [1, 2, 3] -a4: - - [a1, a2] - - 2 -"; - - let docs = YamlLoader::load_from_str(s).unwrap(); - let doc = &docs[0]; - let mut writer = String::new(); - { - let mut emitter = YamlEmitter::new(&mut writer); - emitter.dump(doc).unwrap(); - } - println!("original:\n{s}"); - println!("emitted:\n{writer}"); - let docs_new = match YamlLoader::load_from_str(&writer) { - Ok(y) => y, - Err(e) => panic!("{}", e), - }; - let doc_new = &docs_new[0]; - - assert_eq!(doc, doc_new); - } - - #[test] - fn test_emit_complex() { - let s = r#" -cataloge: - product: &coffee { name: Coffee, price: 2.5 , unit: 1l } - product: &cookies { name: Cookies!, price: 3.40 , unit: 400g} - -products: - *coffee: - amount: 4 - *cookies: - amount: 4 - [1,2,3,4]: - array key - 2.4: - real key - true: - bool key - {}: - empty hash key - "#; - let docs = YamlLoader::load_from_str(s).unwrap(); - let doc = &docs[0]; - let mut writer = String::new(); - { - let mut emitter = YamlEmitter::new(&mut writer); - emitter.dump(doc).unwrap(); - } - let docs_new = match YamlLoader::load_from_str(&writer) { - Ok(y) => y, - Err(e) => panic!("{}", e), - }; - let doc_new = &docs_new[0]; - assert_eq!(doc, doc_new); - } - - #[test] - fn test_emit_avoid_quotes() { - let s = r#"--- -a7: 你好 -boolean: "true" -boolean2: "false" -date: 2014-12-31 -empty_string: "" -empty_string1: " " -empty_string2: " a" -empty_string3: " a " -exp: "12e7" -field: ":" -field2: "{" -field3: "\\" -field4: "\n" -field5: "can't avoid quote" -float: "2.6" -int: "4" -nullable: "null" -nullable2: "~" -products: - "*coffee": - amount: 4 - "*cookies": - amount: 4 - ".milk": - amount: 1 - "2.4": real key - "[1,2,3,4]": array key - "true": bool key - "{}": empty hash key -x: test -y: avoid quoting here -z: string with spaces"#; - - let docs = YamlLoader::load_from_str(s).unwrap(); - let doc = &docs[0]; - let mut writer = String::new(); - { - let mut emitter = YamlEmitter::new(&mut writer); - emitter.dump(doc).unwrap(); - } - - assert_eq!(s, writer, "actual:\n\n{writer}\n"); - } - - #[test] - fn emit_quoted_bools() { - let input = r#"--- -string0: yes -string1: no -string2: "true" -string3: "false" -string4: "~" -null0: ~ -[true, false]: real_bools -[True, TRUE, False, FALSE, y,Y,yes,Yes,YES,n,N,no,No,NO,on,On,ON,off,Off,OFF]: false_bools -bool0: true -bool1: false"#; - let expected = r#"--- -string0: "yes" -string1: "no" -string2: "true" -string3: "false" -string4: "~" -null0: ~ -? - true - - false -: real_bools -? - "True" - - "TRUE" - - "False" - - "FALSE" - - y - - Y - - "yes" - - "Yes" - - "YES" - - n - - N - - "no" - - "No" - - "NO" - - "on" - - "On" - - "ON" - - "off" - - "Off" - - "OFF" -: false_bools -bool0: true -bool1: false"#; - - let docs = YamlLoader::load_from_str(input).unwrap(); - let doc = &docs[0]; - let mut writer = String::new(); - { - let mut emitter = YamlEmitter::new(&mut writer); - emitter.dump(doc).unwrap(); - } - - assert_eq!( - expected, writer, - "expected:\n{expected}\nactual:\n{writer}\n", - ); - } - - #[test] - fn test_empty_and_nested() { - test_empty_and_nested_flag(false); - } - - #[test] - fn test_empty_and_nested_compact() { - test_empty_and_nested_flag(true); - } - - fn test_empty_and_nested_flag(compact: bool) { - let s = if compact { - r#"--- -a: - b: - c: hello - d: {} -e: - - f - - g - - h: []"# - } else { - r#"--- -a: - b: - c: hello - d: {} -e: - - f - - g - - - h: []"# - }; - - let docs = YamlLoader::load_from_str(s).unwrap(); - let doc = &docs[0]; - let mut writer = String::new(); - { - let mut emitter = YamlEmitter::new(&mut writer); - emitter.compact(compact); - emitter.dump(doc).unwrap(); - } - - assert_eq!(s, writer); - } - - #[test] - fn test_nested_arrays() { - let s = r#"--- -a: - - b - - - c - - d - - - e - - f"#; - - let docs = YamlLoader::load_from_str(s).unwrap(); - let doc = &docs[0]; - let mut writer = String::new(); - { - let mut emitter = YamlEmitter::new(&mut writer); - emitter.dump(doc).unwrap(); - } - println!("original:\n{s}"); - println!("emitted:\n{writer}"); - - assert_eq!(s, writer); - } - - #[test] - fn test_deeply_nested_arrays() { - let s = r#"--- -a: - - b - - - c - - d - - - e - - - f - - - e"#; - - let docs = YamlLoader::load_from_str(s).unwrap(); - let doc = &docs[0]; - let mut writer = String::new(); - { - let mut emitter = YamlEmitter::new(&mut writer); - emitter.dump(doc).unwrap(); - } - println!("original:\n{s}"); - println!("emitted:\n{writer}"); - - assert_eq!(s, writer); - } - - #[test] - fn test_nested_hashes() { - let s = r#"--- -a: - b: - c: - d: - e: f"#; - - let docs = YamlLoader::load_from_str(s).unwrap(); - let doc = &docs[0]; - let mut writer = String::new(); - { - let mut emitter = YamlEmitter::new(&mut writer); - emitter.dump(doc).unwrap(); - } - println!("original:\n{s}"); - println!("emitted:\n{writer}"); - - assert_eq!(s, writer); - } -} diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index d5ce224..ed8c592 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -951,7 +951,7 @@ impl> Scanner { self.skip(); self.lookahead(1); - while is_alpha(self.ch()) { + while is_alpha(self.ch()) || self.ch_is(':') { string.push(self.ch()); self.skip(); self.lookahead(1); @@ -960,7 +960,7 @@ impl> Scanner { if string.is_empty() || match self.ch() { c if is_blankz(c) => false, - '?' | ':' | ',' | ']' | '}' | '%' | '@' | '`' => false, + '?' | ',' | ']' | '}' | '%' | '@' | '`' => false, _ => true, } { @@ -1747,447 +1747,3 @@ impl> Scanner { Ok(()) } } - -#[cfg(test)] -#[allow(clippy::enum_glob_use)] -mod test { - use super::TokenType::*; - use super::*; - - macro_rules! next { - ($p:ident, $tk:pat) => {{ - let tok = $p.next().unwrap(); - match tok.1 { - $tk => {} - _ => panic!("unexpected token: {:?}", tok), - } - }}; - } - - macro_rules! next_scalar { - ($p:ident, $tk:expr, $v:expr) => {{ - let tok = $p.next().unwrap(); - match tok.1 { - Scalar(style, ref v) => { - assert_eq!(style, $tk); - assert_eq!(v, $v); - } - _ => panic!("unexpected token: {:?}", tok), - } - }}; - } - - macro_rules! end { - ($p:ident) => {{ - assert_eq!($p.next(), None); - }}; - } - /// test cases in libyaml scanner.c - #[test] - fn test_empty() { - let s = ""; - let mut p = Scanner::new(s.chars()); - next!(p, StreamStart(..)); - next!(p, StreamEnd); - end!(p); - } - - #[test] - fn test_scalar() { - let s = "a scalar"; - let mut p = Scanner::new(s.chars()); - next!(p, StreamStart(..)); - next!(p, Scalar(TScalarStyle::Plain, _)); - next!(p, StreamEnd); - end!(p); - } - - #[test] - fn test_explicit_scalar() { - let s = "--- -'a scalar' -... -"; - let mut p = Scanner::new(s.chars()); - next!(p, StreamStart(..)); - next!(p, DocumentStart); - next!(p, Scalar(TScalarStyle::SingleQuoted, _)); - next!(p, DocumentEnd); - next!(p, StreamEnd); - end!(p); - } - - #[test] - fn test_multiple_documents() { - let s = " -'a scalar' ---- -'a scalar' ---- -'a scalar' -"; - let mut p = Scanner::new(s.chars()); - next!(p, StreamStart(..)); - next!(p, Scalar(TScalarStyle::SingleQuoted, _)); - next!(p, DocumentStart); - next!(p, Scalar(TScalarStyle::SingleQuoted, _)); - next!(p, DocumentStart); - next!(p, Scalar(TScalarStyle::SingleQuoted, _)); - next!(p, StreamEnd); - end!(p); - } - - #[test] - fn test_a_flow_sequence() { - let s = "[item 1, item 2, item 3]"; - let mut p = Scanner::new(s.chars()); - next!(p, StreamStart(..)); - next!(p, FlowSequenceStart); - next_scalar!(p, TScalarStyle::Plain, "item 1"); - next!(p, FlowEntry); - next!(p, Scalar(TScalarStyle::Plain, _)); - next!(p, FlowEntry); - next!(p, Scalar(TScalarStyle::Plain, _)); - next!(p, FlowSequenceEnd); - next!(p, StreamEnd); - end!(p); - } - - #[test] - fn test_a_flow_mapping() { - let s = " -{ - a simple key: a value, # Note that the KEY token is produced. - ? a complex key: another value, -} -"; - let mut p = Scanner::new(s.chars()); - next!(p, StreamStart(..)); - next!(p, FlowMappingStart); - next!(p, Key); - next!(p, Scalar(TScalarStyle::Plain, _)); - next!(p, Value); - next!(p, Scalar(TScalarStyle::Plain, _)); - next!(p, FlowEntry); - next!(p, Key); - next_scalar!(p, TScalarStyle::Plain, "a complex key"); - next!(p, Value); - next!(p, Scalar(TScalarStyle::Plain, _)); - next!(p, FlowEntry); - next!(p, FlowMappingEnd); - next!(p, StreamEnd); - end!(p); - } - - #[test] - fn test_block_sequences() { - let s = " -- item 1 -- item 2 -- - - item 3.1 - - item 3.2 -- - key 1: value 1 - key 2: value 2 -"; - let mut p = Scanner::new(s.chars()); - next!(p, StreamStart(..)); - next!(p, BlockSequenceStart); - next!(p, BlockEntry); - next_scalar!(p, TScalarStyle::Plain, "item 1"); - next!(p, BlockEntry); - next_scalar!(p, TScalarStyle::Plain, "item 2"); - next!(p, BlockEntry); - next!(p, BlockSequenceStart); - next!(p, BlockEntry); - next_scalar!(p, TScalarStyle::Plain, "item 3.1"); - next!(p, BlockEntry); - next_scalar!(p, TScalarStyle::Plain, "item 3.2"); - next!(p, BlockEnd); - next!(p, BlockEntry); - next!(p, BlockMappingStart); - next!(p, Key); - next_scalar!(p, TScalarStyle::Plain, "key 1"); - next!(p, Value); - next_scalar!(p, TScalarStyle::Plain, "value 1"); - next!(p, Key); - next_scalar!(p, TScalarStyle::Plain, "key 2"); - next!(p, Value); - next_scalar!(p, TScalarStyle::Plain, "value 2"); - next!(p, BlockEnd); - next!(p, BlockEnd); - next!(p, StreamEnd); - end!(p); - } - - #[test] - fn test_block_mappings() { - let s = " -a simple key: a value # The KEY token is produced here. -? a complex key -: another value -a mapping: - key 1: value 1 - key 2: value 2 -a sequence: - - item 1 - - item 2 -"; - let mut p = Scanner::new(s.chars()); - next!(p, StreamStart(..)); - next!(p, BlockMappingStart); - next!(p, Key); - next!(p, Scalar(_, _)); - next!(p, Value); - next!(p, Scalar(_, _)); - next!(p, Key); - next!(p, Scalar(_, _)); - next!(p, Value); - next!(p, Scalar(_, _)); - next!(p, Key); - next!(p, Scalar(_, _)); - next!(p, Value); // libyaml comment seems to be wrong - next!(p, BlockMappingStart); - next!(p, Key); - next!(p, Scalar(_, _)); - next!(p, Value); - next!(p, Scalar(_, _)); - next!(p, Key); - next!(p, Scalar(_, _)); - next!(p, Value); - next!(p, Scalar(_, _)); - next!(p, BlockEnd); - next!(p, Key); - next!(p, Scalar(_, _)); - next!(p, Value); - next!(p, BlockSequenceStart); - next!(p, BlockEntry); - next!(p, Scalar(_, _)); - next!(p, BlockEntry); - next!(p, Scalar(_, _)); - next!(p, BlockEnd); - next!(p, BlockEnd); - next!(p, StreamEnd); - end!(p); - } - - #[test] - fn test_no_block_sequence_start() { - let s = " -key: -- item 1 -- item 2 -"; - let mut p = Scanner::new(s.chars()); - next!(p, StreamStart(..)); - next!(p, BlockMappingStart); - next!(p, Key); - next_scalar!(p, TScalarStyle::Plain, "key"); - next!(p, Value); - next!(p, BlockEntry); - next_scalar!(p, TScalarStyle::Plain, "item 1"); - next!(p, BlockEntry); - next_scalar!(p, TScalarStyle::Plain, "item 2"); - next!(p, BlockEnd); - next!(p, StreamEnd); - end!(p); - } - - #[test] - fn test_collections_in_sequence() { - let s = " -- - item 1 - - item 2 -- key 1: value 1 - key 2: value 2 -- ? complex key - : complex value -"; - let mut p = Scanner::new(s.chars()); - next!(p, StreamStart(..)); - next!(p, BlockSequenceStart); - next!(p, BlockEntry); - next!(p, BlockSequenceStart); - next!(p, BlockEntry); - next_scalar!(p, TScalarStyle::Plain, "item 1"); - next!(p, BlockEntry); - next_scalar!(p, TScalarStyle::Plain, "item 2"); - next!(p, BlockEnd); - next!(p, BlockEntry); - next!(p, BlockMappingStart); - next!(p, Key); - next_scalar!(p, TScalarStyle::Plain, "key 1"); - next!(p, Value); - next_scalar!(p, TScalarStyle::Plain, "value 1"); - next!(p, Key); - next_scalar!(p, TScalarStyle::Plain, "key 2"); - next!(p, Value); - next_scalar!(p, TScalarStyle::Plain, "value 2"); - next!(p, BlockEnd); - next!(p, BlockEntry); - next!(p, BlockMappingStart); - next!(p, Key); - next_scalar!(p, TScalarStyle::Plain, "complex key"); - next!(p, Value); - next_scalar!(p, TScalarStyle::Plain, "complex value"); - next!(p, BlockEnd); - next!(p, BlockEnd); - next!(p, StreamEnd); - end!(p); - } - - #[test] - fn test_collections_in_mapping() { - let s = " -? a sequence -: - item 1 - - item 2 -? a mapping -: key 1: value 1 - key 2: value 2 -"; - let mut p = Scanner::new(s.chars()); - next!(p, StreamStart(..)); - next!(p, BlockMappingStart); - next!(p, Key); - next_scalar!(p, TScalarStyle::Plain, "a sequence"); - next!(p, Value); - next!(p, BlockSequenceStart); - next!(p, BlockEntry); - next_scalar!(p, TScalarStyle::Plain, "item 1"); - next!(p, BlockEntry); - next_scalar!(p, TScalarStyle::Plain, "item 2"); - next!(p, BlockEnd); - next!(p, Key); - next_scalar!(p, TScalarStyle::Plain, "a mapping"); - next!(p, Value); - next!(p, BlockMappingStart); - next!(p, Key); - next_scalar!(p, TScalarStyle::Plain, "key 1"); - next!(p, Value); - next_scalar!(p, TScalarStyle::Plain, "value 1"); - next!(p, Key); - next_scalar!(p, TScalarStyle::Plain, "key 2"); - next!(p, Value); - next_scalar!(p, TScalarStyle::Plain, "value 2"); - next!(p, BlockEnd); - next!(p, BlockEnd); - next!(p, StreamEnd); - end!(p); - } - - #[test] - fn test_spec_ex7_3() { - let s = " -{ - ? foo :, - : bar, -} -"; - let mut p = Scanner::new(s.chars()); - next!(p, StreamStart(..)); - next!(p, FlowMappingStart); - next!(p, Key); - next_scalar!(p, TScalarStyle::Plain, "foo"); - next!(p, Value); - next!(p, FlowEntry); - next!(p, Value); - next_scalar!(p, TScalarStyle::Plain, "bar"); - next!(p, FlowEntry); - next!(p, FlowMappingEnd); - next!(p, StreamEnd); - end!(p); - } - - #[test] - fn test_plain_scalar_starting_with_indicators_in_flow() { - // "Plain scalars must not begin with most indicators, as this would cause ambiguity with - // other YAML constructs. However, the “:”, “?” and “-” indicators may be used as the first - // character if followed by a non-space “safe” character, as this causes no ambiguity." - - let s = "{a: :b}"; - let mut p = Scanner::new(s.chars()); - next!(p, StreamStart(..)); - next!(p, FlowMappingStart); - next!(p, Key); - next_scalar!(p, TScalarStyle::Plain, "a"); - next!(p, Value); - next_scalar!(p, TScalarStyle::Plain, ":b"); - next!(p, FlowMappingEnd); - next!(p, StreamEnd); - end!(p); - - let s = "{a: ?b}"; - let mut p = Scanner::new(s.chars()); - next!(p, StreamStart(..)); - next!(p, FlowMappingStart); - next!(p, Key); - next_scalar!(p, TScalarStyle::Plain, "a"); - next!(p, Value); - next_scalar!(p, TScalarStyle::Plain, "?b"); - next!(p, FlowMappingEnd); - next!(p, StreamEnd); - end!(p); - } - - #[test] - fn test_plain_scalar_starting_with_indicators_in_block() { - let s = ":a"; - let mut p = Scanner::new(s.chars()); - next!(p, StreamStart(..)); - next_scalar!(p, TScalarStyle::Plain, ":a"); - next!(p, StreamEnd); - end!(p); - - let s = "?a"; - let mut p = Scanner::new(s.chars()); - next!(p, StreamStart(..)); - next_scalar!(p, TScalarStyle::Plain, "?a"); - next!(p, StreamEnd); - end!(p); - } - - #[test] - fn test_plain_scalar_containing_indicators_in_block() { - let s = "a:,b"; - let mut p = Scanner::new(s.chars()); - next!(p, StreamStart(..)); - next_scalar!(p, TScalarStyle::Plain, "a:,b"); - next!(p, StreamEnd); - end!(p); - - let s = ":,b"; - let mut p = Scanner::new(s.chars()); - next!(p, StreamStart(..)); - next_scalar!(p, TScalarStyle::Plain, ":,b"); - next!(p, StreamEnd); - end!(p); - } - - #[test] - fn test_scanner_cr() { - let s = "---\r\n- tok1\r\n- tok2"; - let mut p = Scanner::new(s.chars()); - next!(p, StreamStart(..)); - next!(p, DocumentStart); - next!(p, BlockSequenceStart); - next!(p, BlockEntry); - next_scalar!(p, TScalarStyle::Plain, "tok1"); - next!(p, BlockEntry); - next_scalar!(p, TScalarStyle::Plain, "tok2"); - next!(p, BlockEnd); - next!(p, StreamEnd); - end!(p); - } - - #[test] - fn test_uri() { - // TODO - } - - #[test] - fn test_uri_escapes() { - // TODO - } -} diff --git a/saphyr/tests/emitter.rs b/saphyr/tests/emitter.rs new file mode 100644 index 0000000..d60e3a1 --- /dev/null +++ b/saphyr/tests/emitter.rs @@ -0,0 +1,294 @@ +use yaml_rust::{YamlEmitter, YamlLoader}; + +#[allow(clippy::similar_names)] +#[test] +fn test_emit_simple() { + let s = " +# comment +a0 bb: val +a1: + b1: 4 + b2: d +a2: 4 # i'm comment +a3: [1, 2, 3] +a4: + - [a1, a2] + - 2 +"; + + let docs = YamlLoader::load_from_str(s).unwrap(); + let doc = &docs[0]; + let mut writer = String::new(); + { + let mut emitter = YamlEmitter::new(&mut writer); + emitter.dump(doc).unwrap(); + } + println!("original:\n{s}"); + println!("emitted:\n{writer}"); + let docs_new = match YamlLoader::load_from_str(&writer) { + Ok(y) => y, + Err(e) => panic!("{}", e), + }; + let doc_new = &docs_new[0]; + + assert_eq!(doc, doc_new); +} + +#[test] +fn test_emit_complex() { + let s = r#" +cataloge: + product: &coffee { name: Coffee, price: 2.5 , unit: 1l } + product: &cookies { name: Cookies!, price: 3.40 , unit: 400g} + +products: + *coffee : + amount: 4 + *cookies : + amount: 4 + [1,2,3,4]: + array key + 2.4: + real key + true: + bool key + {}: + empty hash key + "#; + let docs = YamlLoader::load_from_str(s).unwrap(); + let doc = &docs[0]; + let mut writer = String::new(); + { + let mut emitter = YamlEmitter::new(&mut writer); + emitter.dump(doc).unwrap(); + } + let docs_new = match YamlLoader::load_from_str(&writer) { + Ok(y) => y, + Err(e) => panic!("{}", e), + }; + let doc_new = &docs_new[0]; + assert_eq!(doc, doc_new); +} + +#[test] +fn test_emit_avoid_quotes() { + let s = r#"--- +a7: 你好 +boolean: "true" +boolean2: "false" +date: 2014-12-31 +empty_string: "" +empty_string1: " " +empty_string2: " a" +empty_string3: " a " +exp: "12e7" +field: ":" +field2: "{" +field3: "\\" +field4: "\n" +field5: "can't avoid quote" +float: "2.6" +int: "4" +nullable: "null" +nullable2: "~" +products: + "*coffee": + amount: 4 + "*cookies": + amount: 4 + ".milk": + amount: 1 + "2.4": real key + "[1,2,3,4]": array key + "true": bool key + "{}": empty hash key +x: test +y: avoid quoting here +z: string with spaces"#; + + let docs = YamlLoader::load_from_str(s).unwrap(); + let doc = &docs[0]; + let mut writer = String::new(); + { + let mut emitter = YamlEmitter::new(&mut writer); + emitter.dump(doc).unwrap(); + } + + assert_eq!(s, writer, "actual:\n\n{writer}\n"); +} + +#[test] +fn emit_quoted_bools() { + let input = r#"--- +string0: yes +string1: no +string2: "true" +string3: "false" +string4: "~" +null0: ~ +[true, false]: real_bools +[True, TRUE, False, FALSE, y,Y,yes,Yes,YES,n,N,no,No,NO,on,On,ON,off,Off,OFF]: false_bools +bool0: true +bool1: false"#; + let expected = r#"--- +string0: "yes" +string1: "no" +string2: "true" +string3: "false" +string4: "~" +null0: ~ +? - true + - false +: real_bools +? - "True" + - "TRUE" + - "False" + - "FALSE" + - y + - Y + - "yes" + - "Yes" + - "YES" + - n + - N + - "no" + - "No" + - "NO" + - "on" + - "On" + - "ON" + - "off" + - "Off" + - "OFF" +: false_bools +bool0: true +bool1: false"#; + + let docs = YamlLoader::load_from_str(input).unwrap(); + let doc = &docs[0]; + let mut writer = String::new(); + { + let mut emitter = YamlEmitter::new(&mut writer); + emitter.dump(doc).unwrap(); + } + + assert_eq!( + expected, writer, + "expected:\n{expected}\nactual:\n{writer}\n", + ); +} + +#[test] +fn test_empty_and_nested() { + test_empty_and_nested_flag(false); +} + +#[test] +fn test_empty_and_nested_compact() { + test_empty_and_nested_flag(true); +} + +fn test_empty_and_nested_flag(compact: bool) { + let s = if compact { + r#"--- +a: + b: + c: hello + d: {} +e: + - f + - g + - h: []"# + } else { + r#"--- +a: + b: + c: hello + d: {} +e: + - f + - g + - + h: []"# + }; + + let docs = YamlLoader::load_from_str(s).unwrap(); + let doc = &docs[0]; + let mut writer = String::new(); + { + let mut emitter = YamlEmitter::new(&mut writer); + emitter.compact(compact); + emitter.dump(doc).unwrap(); + } + + assert_eq!(s, writer); +} + +#[test] +fn test_nested_arrays() { + let s = r#"--- +a: + - b + - - c + - d + - - e + - f"#; + + let docs = YamlLoader::load_from_str(s).unwrap(); + let doc = &docs[0]; + let mut writer = String::new(); + { + let mut emitter = YamlEmitter::new(&mut writer); + emitter.dump(doc).unwrap(); + } + println!("original:\n{s}"); + println!("emitted:\n{writer}"); + + assert_eq!(s, writer); +} + +#[test] +fn test_deeply_nested_arrays() { + let s = r#"--- +a: + - b + - - c + - d + - - e + - - f + - - e"#; + + let docs = YamlLoader::load_from_str(s).unwrap(); + let doc = &docs[0]; + let mut writer = String::new(); + { + let mut emitter = YamlEmitter::new(&mut writer); + emitter.dump(doc).unwrap(); + } + println!("original:\n{s}"); + println!("emitted:\n{writer}"); + + assert_eq!(s, writer); +} + +#[test] +fn test_nested_hashes() { + let s = r#"--- +a: + b: + c: + d: + e: f"#; + + let docs = YamlLoader::load_from_str(s).unwrap(); + let doc = &docs[0]; + let mut writer = String::new(); + { + let mut emitter = YamlEmitter::new(&mut writer); + emitter.dump(doc).unwrap(); + } + println!("original:\n{s}"); + println!("emitted:\n{writer}"); + + assert_eq!(s, writer); +} diff --git a/saphyr/tests/scanner.rs b/saphyr/tests/scanner.rs new file mode 100644 index 0000000..27d3760 --- /dev/null +++ b/saphyr/tests/scanner.rs @@ -0,0 +1,440 @@ +#![allow(clippy::enum_glob_use)] + +use yaml_rust::{scanner::TokenType::*, scanner::*}; + +macro_rules! next { + ($p:ident, $tk:pat) => {{ + let tok = $p.next().unwrap(); + match tok.1 { + $tk => {} + _ => panic!("unexpected token: {:?}", tok), + } + }}; +} + +macro_rules! next_scalar { + ($p:ident, $tk:expr, $v:expr) => {{ + let tok = $p.next().unwrap(); + match tok.1 { + Scalar(style, ref v) => { + assert_eq!(style, $tk); + assert_eq!(v, $v); + } + _ => panic!("unexpected token: {:?}", tok), + } + }}; +} + +macro_rules! end { + ($p:ident) => {{ + assert_eq!($p.next(), None); + }}; +} +/// test cases in libyaml scanner.c +#[test] +fn test_empty() { + let s = ""; + let mut p = Scanner::new(s.chars()); + next!(p, StreamStart(..)); + next!(p, StreamEnd); + end!(p); +} + +#[test] +fn test_scalar() { + let s = "a scalar"; + let mut p = Scanner::new(s.chars()); + next!(p, StreamStart(..)); + next!(p, Scalar(TScalarStyle::Plain, _)); + next!(p, StreamEnd); + end!(p); +} + +#[test] +fn test_explicit_scalar() { + let s = "--- +'a scalar' +... +"; + let mut p = Scanner::new(s.chars()); + next!(p, StreamStart(..)); + next!(p, DocumentStart); + next!(p, Scalar(TScalarStyle::SingleQuoted, _)); + next!(p, DocumentEnd); + next!(p, StreamEnd); + end!(p); +} + +#[test] +fn test_multiple_documents() { + let s = " +'a scalar' +--- +'a scalar' +--- +'a scalar' +"; + let mut p = Scanner::new(s.chars()); + next!(p, StreamStart(..)); + next!(p, Scalar(TScalarStyle::SingleQuoted, _)); + next!(p, DocumentStart); + next!(p, Scalar(TScalarStyle::SingleQuoted, _)); + next!(p, DocumentStart); + next!(p, Scalar(TScalarStyle::SingleQuoted, _)); + next!(p, StreamEnd); + end!(p); +} + +#[test] +fn test_a_flow_sequence() { + let s = "[item 1, item 2, item 3]"; + let mut p = Scanner::new(s.chars()); + next!(p, StreamStart(..)); + next!(p, FlowSequenceStart); + next_scalar!(p, TScalarStyle::Plain, "item 1"); + next!(p, FlowEntry); + next!(p, Scalar(TScalarStyle::Plain, _)); + next!(p, FlowEntry); + next!(p, Scalar(TScalarStyle::Plain, _)); + next!(p, FlowSequenceEnd); + next!(p, StreamEnd); + end!(p); +} + +#[test] +fn test_a_flow_mapping() { + let s = " +{ + a simple key: a value, # Note that the KEY token is produced. + ? a complex key: another value, +} +"; + let mut p = Scanner::new(s.chars()); + next!(p, StreamStart(..)); + next!(p, FlowMappingStart); + next!(p, Key); + next!(p, Scalar(TScalarStyle::Plain, _)); + next!(p, Value); + next!(p, Scalar(TScalarStyle::Plain, _)); + next!(p, FlowEntry); + next!(p, Key); + next_scalar!(p, TScalarStyle::Plain, "a complex key"); + next!(p, Value); + next!(p, Scalar(TScalarStyle::Plain, _)); + next!(p, FlowEntry); + next!(p, FlowMappingEnd); + next!(p, StreamEnd); + end!(p); +} + +#[test] +fn test_block_sequences() { + let s = " +- item 1 +- item 2 +- + - item 3.1 + - item 3.2 +- + key 1: value 1 + key 2: value 2 +"; + let mut p = Scanner::new(s.chars()); + next!(p, StreamStart(..)); + next!(p, BlockSequenceStart); + next!(p, BlockEntry); + next_scalar!(p, TScalarStyle::Plain, "item 1"); + next!(p, BlockEntry); + next_scalar!(p, TScalarStyle::Plain, "item 2"); + next!(p, BlockEntry); + next!(p, BlockSequenceStart); + next!(p, BlockEntry); + next_scalar!(p, TScalarStyle::Plain, "item 3.1"); + next!(p, BlockEntry); + next_scalar!(p, TScalarStyle::Plain, "item 3.2"); + next!(p, BlockEnd); + next!(p, BlockEntry); + next!(p, BlockMappingStart); + next!(p, Key); + next_scalar!(p, TScalarStyle::Plain, "key 1"); + next!(p, Value); + next_scalar!(p, TScalarStyle::Plain, "value 1"); + next!(p, Key); + next_scalar!(p, TScalarStyle::Plain, "key 2"); + next!(p, Value); + next_scalar!(p, TScalarStyle::Plain, "value 2"); + next!(p, BlockEnd); + next!(p, BlockEnd); + next!(p, StreamEnd); + end!(p); +} + +#[test] +fn test_block_mappings() { + let s = " +a simple key: a value # The KEY token is produced here. +? a complex key +: another value +a mapping: + key 1: value 1 + key 2: value 2 +a sequence: + - item 1 + - item 2 +"; + let mut p = Scanner::new(s.chars()); + next!(p, StreamStart(..)); + next!(p, BlockMappingStart); + next!(p, Key); + next!(p, Scalar(_, _)); + next!(p, Value); + next!(p, Scalar(_, _)); + next!(p, Key); + next!(p, Scalar(_, _)); + next!(p, Value); + next!(p, Scalar(_, _)); + next!(p, Key); + next!(p, Scalar(_, _)); + next!(p, Value); // libyaml comment seems to be wrong + next!(p, BlockMappingStart); + next!(p, Key); + next!(p, Scalar(_, _)); + next!(p, Value); + next!(p, Scalar(_, _)); + next!(p, Key); + next!(p, Scalar(_, _)); + next!(p, Value); + next!(p, Scalar(_, _)); + next!(p, BlockEnd); + next!(p, Key); + next!(p, Scalar(_, _)); + next!(p, Value); + next!(p, BlockSequenceStart); + next!(p, BlockEntry); + next!(p, Scalar(_, _)); + next!(p, BlockEntry); + next!(p, Scalar(_, _)); + next!(p, BlockEnd); + next!(p, BlockEnd); + next!(p, StreamEnd); + end!(p); +} + +#[test] +fn test_no_block_sequence_start() { + let s = " +key: +- item 1 +- item 2 +"; + let mut p = Scanner::new(s.chars()); + next!(p, StreamStart(..)); + next!(p, BlockMappingStart); + next!(p, Key); + next_scalar!(p, TScalarStyle::Plain, "key"); + next!(p, Value); + next!(p, BlockEntry); + next_scalar!(p, TScalarStyle::Plain, "item 1"); + next!(p, BlockEntry); + next_scalar!(p, TScalarStyle::Plain, "item 2"); + next!(p, BlockEnd); + next!(p, StreamEnd); + end!(p); +} + +#[test] +fn test_collections_in_sequence() { + let s = " +- - item 1 + - item 2 +- key 1: value 1 + key 2: value 2 +- ? complex key + : complex value +"; + let mut p = Scanner::new(s.chars()); + next!(p, StreamStart(..)); + next!(p, BlockSequenceStart); + next!(p, BlockEntry); + next!(p, BlockSequenceStart); + next!(p, BlockEntry); + next_scalar!(p, TScalarStyle::Plain, "item 1"); + next!(p, BlockEntry); + next_scalar!(p, TScalarStyle::Plain, "item 2"); + next!(p, BlockEnd); + next!(p, BlockEntry); + next!(p, BlockMappingStart); + next!(p, Key); + next_scalar!(p, TScalarStyle::Plain, "key 1"); + next!(p, Value); + next_scalar!(p, TScalarStyle::Plain, "value 1"); + next!(p, Key); + next_scalar!(p, TScalarStyle::Plain, "key 2"); + next!(p, Value); + next_scalar!(p, TScalarStyle::Plain, "value 2"); + next!(p, BlockEnd); + next!(p, BlockEntry); + next!(p, BlockMappingStart); + next!(p, Key); + next_scalar!(p, TScalarStyle::Plain, "complex key"); + next!(p, Value); + next_scalar!(p, TScalarStyle::Plain, "complex value"); + next!(p, BlockEnd); + next!(p, BlockEnd); + next!(p, StreamEnd); + end!(p); +} + +#[test] +fn test_collections_in_mapping() { + let s = " +? a sequence +: - item 1 + - item 2 +? a mapping +: key 1: value 1 + key 2: value 2 +"; + let mut p = Scanner::new(s.chars()); + next!(p, StreamStart(..)); + next!(p, BlockMappingStart); + next!(p, Key); + next_scalar!(p, TScalarStyle::Plain, "a sequence"); + next!(p, Value); + next!(p, BlockSequenceStart); + next!(p, BlockEntry); + next_scalar!(p, TScalarStyle::Plain, "item 1"); + next!(p, BlockEntry); + next_scalar!(p, TScalarStyle::Plain, "item 2"); + next!(p, BlockEnd); + next!(p, Key); + next_scalar!(p, TScalarStyle::Plain, "a mapping"); + next!(p, Value); + next!(p, BlockMappingStart); + next!(p, Key); + next_scalar!(p, TScalarStyle::Plain, "key 1"); + next!(p, Value); + next_scalar!(p, TScalarStyle::Plain, "value 1"); + next!(p, Key); + next_scalar!(p, TScalarStyle::Plain, "key 2"); + next!(p, Value); + next_scalar!(p, TScalarStyle::Plain, "value 2"); + next!(p, BlockEnd); + next!(p, BlockEnd); + next!(p, StreamEnd); + end!(p); +} + +#[test] +fn test_spec_ex7_3() { + let s = " +{ + ? foo :, + : bar, +} +"; + let mut p = Scanner::new(s.chars()); + next!(p, StreamStart(..)); + next!(p, FlowMappingStart); + next!(p, Key); + next_scalar!(p, TScalarStyle::Plain, "foo"); + next!(p, Value); + next!(p, FlowEntry); + next!(p, Value); + next_scalar!(p, TScalarStyle::Plain, "bar"); + next!(p, FlowEntry); + next!(p, FlowMappingEnd); + next!(p, StreamEnd); + end!(p); +} + +#[test] +fn test_plain_scalar_starting_with_indicators_in_flow() { + // "Plain scalars must not begin with most indicators, as this would cause ambiguity with + // other YAML constructs. However, the “:”, “?” and “-” indicators may be used as the first + // character if followed by a non-space “safe” character, as this causes no ambiguity." + + let s = "{a: :b}"; + let mut p = Scanner::new(s.chars()); + next!(p, StreamStart(..)); + next!(p, FlowMappingStart); + next!(p, Key); + next_scalar!(p, TScalarStyle::Plain, "a"); + next!(p, Value); + next_scalar!(p, TScalarStyle::Plain, ":b"); + next!(p, FlowMappingEnd); + next!(p, StreamEnd); + end!(p); + + let s = "{a: ?b}"; + let mut p = Scanner::new(s.chars()); + next!(p, StreamStart(..)); + next!(p, FlowMappingStart); + next!(p, Key); + next_scalar!(p, TScalarStyle::Plain, "a"); + next!(p, Value); + next_scalar!(p, TScalarStyle::Plain, "?b"); + next!(p, FlowMappingEnd); + next!(p, StreamEnd); + end!(p); +} + +#[test] +fn test_plain_scalar_starting_with_indicators_in_block() { + let s = ":a"; + let mut p = Scanner::new(s.chars()); + next!(p, StreamStart(..)); + next_scalar!(p, TScalarStyle::Plain, ":a"); + next!(p, StreamEnd); + end!(p); + + let s = "?a"; + let mut p = Scanner::new(s.chars()); + next!(p, StreamStart(..)); + next_scalar!(p, TScalarStyle::Plain, "?a"); + next!(p, StreamEnd); + end!(p); +} + +#[test] +fn test_plain_scalar_containing_indicators_in_block() { + let s = "a:,b"; + let mut p = Scanner::new(s.chars()); + next!(p, StreamStart(..)); + next_scalar!(p, TScalarStyle::Plain, "a:,b"); + next!(p, StreamEnd); + end!(p); + + let s = ":,b"; + let mut p = Scanner::new(s.chars()); + next!(p, StreamStart(..)); + next_scalar!(p, TScalarStyle::Plain, ":,b"); + next!(p, StreamEnd); + end!(p); +} + +#[test] +fn test_scanner_cr() { + let s = "---\r\n- tok1\r\n- tok2"; + let mut p = Scanner::new(s.chars()); + next!(p, StreamStart(..)); + next!(p, DocumentStart); + next!(p, BlockSequenceStart); + next!(p, BlockEntry); + next_scalar!(p, TScalarStyle::Plain, "tok1"); + next!(p, BlockEntry); + next_scalar!(p, TScalarStyle::Plain, "tok2"); + next!(p, BlockEnd); + next!(p, StreamEnd); + end!(p); +} + +#[test] +fn test_uri() { + // TODO +} + +#[test] +fn test_uri_escapes() { + // TODO +} diff --git a/saphyr/tests/yaml-test-suite.rs b/saphyr/tests/yaml-test-suite.rs index 61f54b6..6b3bf29 100644 --- a/saphyr/tests/yaml-test-suite.rs +++ b/saphyr/tests/yaml-test-suite.rs @@ -282,6 +282,7 @@ fn expected_events(expected_tree: &str) -> Vec { .collect() } +#[rustfmt::skip] static EXPECTED_FAILURES: &[&str] = &[ // These seem to be API limited (not enough information on the event stream level) // No tag available for SEQ and MAP @@ -339,10 +340,8 @@ static EXPECTED_FAILURES: &[&str] = &[ "HWV9", "QT73", // Unusual characters in anchors/aliases - "2SXE", // : "8XYN", // emoji!! "W5VH", // :@*!$": - "Y2GN", // : in the middle // Flow mapping colon on next line / multiline key in flow mapping "4MUZ-00", "4MUZ-01", From 81f9a376fba736f2d686b6820c5e1644f20dfabb Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Fri, 18 Aug 2023 02:09:56 +0200 Subject: [PATCH 215/380] Rustfmt + clippy. --- saphyr/tests/yaml-test-suite.rs | 83 +++++++++++++++++++-------------- 1 file changed, 48 insertions(+), 35 deletions(-) diff --git a/saphyr/tests/yaml-test-suite.rs b/saphyr/tests/yaml-test-suite.rs index 6b3bf29..f32e8a3 100644 --- a/saphyr/tests/yaml-test-suite.rs +++ b/saphyr/tests/yaml-test-suite.rs @@ -1,17 +1,14 @@ use std::fs::{self, DirEntry}; -use libtest_mimic::{Arguments, Test, Outcome, run_tests}; +use libtest_mimic::{run_tests, Arguments, Outcome, Test}; use yaml_rust::{ parser::{Event, EventReceiver, Parser}, - scanner::{TokenType, TScalarStyle}, - ScanError, - Yaml, - YamlLoader, - yaml, + scanner::{TScalarStyle, TokenType}, + yaml, ScanError, Yaml, YamlLoader, }; -type Result> = std::result::Result; +type Result> = std::result::Result; struct YamlTest { yaml_visual: String, @@ -27,7 +24,7 @@ fn main() -> Result<()> { arguments.num_threads = Some(1); } let tests: Vec> = std::fs::read_dir("tests/yaml-test-suite/src")? - .map(|entry| -> Result<_> { + .map(|entry| -> Result<_> { let entry = entry?; let tests = load_tests_from_file(&entry)?; Ok(tests) @@ -36,18 +33,18 @@ fn main() -> Result<()> { let mut tests: Vec<_> = tests.into_iter().flatten().collect(); tests.sort_by_key(|t| t.name.clone()); - let missing_xfails: Vec<_> = EXPECTED_FAILURES.iter() + let missing_xfails: Vec<_> = EXPECTED_FAILURES + .iter() .filter(|&&test| !tests.iter().any(|t| t.name == test)) .collect(); if !missing_xfails.is_empty() { - panic!("The following EXPECTED_FAILURES not found during discovery: {:?}", missing_xfails); + panic!( + "The following EXPECTED_FAILURES not found during discovery: {:?}", + missing_xfails + ); } - run_tests( - &arguments, - tests, - run_yaml_test, - ).exit(); + run_tests(&arguments, tests, run_yaml_test).exit(); } fn run_yaml_test(test: &Test) -> Outcome { @@ -68,14 +65,18 @@ fn run_yaml_test(test: &Test) -> Outcome { match (error_text, desc.is_xfail) { (None, false) => Outcome::Passed, (Some(text), false) => Outcome::Failed { msg: Some(text) }, - (Some(_), true) => Outcome::Ignored, - (None, true) => Outcome::Failed { msg: Some("expected to fail but passes".into()) }, + (Some(_), true) => Outcome::Ignored, + (None, true) => Outcome::Failed { + msg: Some("expected to fail but passes".into()), + }, } } fn load_tests_from_file(entry: &DirEntry) -> Result>> { let file_name = entry.file_name().to_string_lossy().to_string(); - let test_name = file_name.strip_suffix(".yaml").ok_or("unexpected filename")?; + let test_name = file_name + .strip_suffix(".yaml") + .ok_or("unexpected filename")?; let tests = YamlLoader::load_from_str(&fs::read_to_string(&entry.path())?)?; let tests = tests[0].as_vec().ok_or("no test list found in file")?; @@ -121,8 +122,7 @@ fn load_tests_from_file(entry: &DirEntry) -> Result>> { fn parse_to_events(source: &str) -> Result, ScanError> { let mut reporter = EventReporter::new(); - Parser::new(source.chars()) - .load(&mut reporter, true)?; + Parser::new(source.chars()).load(&mut reporter, true)?; Ok(reporter.events) } @@ -132,9 +132,7 @@ struct EventReporter { impl EventReporter { fn new() -> Self { - Self { - events: vec![], - } + Self { events: vec![] } } } @@ -162,8 +160,13 @@ impl EventReceiver for EventReporter { TScalarStyle::Foled => ">", TScalarStyle::Any => unreachable!(), }; - format!("=VAL{}{} {}{}", - format_index(idx), format_tag(tag), kind, escape_text(text)) + format!( + "=VAL{}{} {}{}", + format_index(idx), + format_tag(tag), + kind, + escape_text(text) + ) } Event::Alias(idx) => format!("=ALI *{}", idx), Event::Nothing => return, @@ -216,13 +219,16 @@ fn events_differ(actual: Vec, expected: &str) -> Option { if act == exp { continue; } else { - Some(format!("line {} differs: expected `{}`, found `{}`", idx, exp, act)) + Some(format!( + "line {} differs: expected `{}`, found `{}`", + idx, exp, act + )) } } (Some(a), None) => Some(format!("extra actual line: {:?}", a)), (None, Some(e)) => Some(format!("extra expected line: {:?}", e)), (None, None) => None, - } + }; } unreachable!() } @@ -250,23 +256,30 @@ fn visual_to_raw(yaml: &str) -> String { /// Both are things that can be omitted according to spec. fn expected_events(expected_tree: &str) -> Vec { let mut anchors = vec![]; - expected_tree.split("\n") + expected_tree + .split('\n') .map(|s| s.trim_start().to_owned()) .filter(|s| !s.is_empty()) .map(|mut s| { // Anchor name-to-number conversion - if let Some(start) = s.find("&") { - if s[..start].find(":").is_none() { - let len = s[start..].find(" ").unwrap_or(s[start..].len()); - anchors.push(s[start+1..start + len].to_owned()); + if let Some(start) = s.find('&') { + if s[..start].find(':').is_none() { + let len = s[start..].find(' ').unwrap_or(s[start..].len()); + anchors.push(s[start + 1..start + len].to_owned()); s = s.replace(&s[start..start + len], &format!("&{}", anchors.len())); } } // Alias nodes name-to-number if s.starts_with("=ALI") { - let start = s.find("*").unwrap(); - let name = &s[start + 1 ..]; - let idx = anchors.iter().enumerate().filter(|(_, v)| v == &name).last().unwrap().0; + let start = s.find('*').unwrap(); + let name = &s[start + 1..]; + let idx = anchors + .iter() + .enumerate() + .filter(|(_, v)| v == &name) + .last() + .unwrap() + .0; s = s.replace(&s[start..], &format!("*{}", idx + 1)); } // Dropping style information From 3ffb231e4091ee9e9a1d818e891d025636496a5f Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Sat, 18 Nov 2023 20:29:40 +0100 Subject: [PATCH 216/380] Minor improvements. * Doc comments * Helper functions * Line breaks for readability --- saphyr/src/scanner.rs | 71 +++++++++++++++++++++++---------- saphyr/tests/yaml-test-suite.rs | 1 - 2 files changed, 50 insertions(+), 22 deletions(-) diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index ed8c592..9f42cc1 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -264,11 +264,15 @@ impl> Scanner { token_available: false, } } + #[inline] pub fn get_error(&self) -> Option { self.error.as_ref().map(std::clone::Clone::clone) } + /// Fill `self.buffer` with at least `count` characters. + /// + /// The characters that are extracted this way are not consumed but only placed in the buffer. #[inline] fn lookahead(&mut self, count: usize) { if self.buffer.len() >= count { @@ -278,6 +282,7 @@ impl> Scanner { self.buffer.push_back(self.rdr.next().unwrap_or('\0')); } } + #[inline] fn skip(&mut self) { let c = self.buffer.pop_front().unwrap(); @@ -290,6 +295,7 @@ impl> Scanner { self.mark.col += 1; } } + #[inline] fn skip_line(&mut self) { if self.buffer[0] == '\r' && self.buffer[1] == '\n' { @@ -299,31 +305,62 @@ impl> Scanner { self.skip(); } } + + /// Return the next character in the buffer. + /// + /// The character is not consumed. #[inline] fn ch(&self) -> char { self.buffer[0] } + + /// Look for the next character and return it. + /// + /// The character is not consumed. + /// Equivalent to calling [`Self::lookahead`] and [`Self::ch`]. + #[inline] + fn look_ch(&mut self) -> char { + self.lookahead(1); + self.ch() + } + + /// Consume and return the next character. + /// + /// Equivalent to calling [`Self::ch`] and [`Self::skip`]. + #[inline] + fn ch_skip(&mut self) -> char { + let ret = self.ch(); + self.skip(); + ret + } + + /// Return whether the next character is `c`. #[inline] fn ch_is(&self, c: char) -> bool { self.buffer[0] == c } + #[allow(dead_code)] #[inline] fn eof(&self) -> bool { self.ch_is('\0') } + #[inline] pub fn stream_started(&self) -> bool { self.stream_start_produced } + #[inline] pub fn stream_ended(&self) -> bool { self.stream_end_produced } + #[inline] pub fn mark(&self) -> Marker { self.mark } + #[inline] fn read_break(&mut self, s: &mut String) { if self.buffer[0] == '\r' && self.buffer[1] == '\n' { @@ -337,6 +374,7 @@ impl> Scanner { unreachable!(); } } + fn insert_token(&mut self, pos: usize, tok: Token) { let old_len = self.tokens.len(); assert!(pos <= old_len); @@ -345,9 +383,11 @@ impl> Scanner { self.tokens.swap(old_len - i, old_len - i - 1); } } + fn allow_simple_key(&mut self) { self.simple_key_allowed = true; } + fn disallow_simple_key(&mut self) { self.simple_key_allowed = false; } @@ -736,7 +776,6 @@ impl> Scanner { let start_mark = self.mark; let mut handle = String::new(); let mut suffix; - let mut secondary = false; // Check if the tag is in the canonical form (verbatim). self.lookahead(2); @@ -760,10 +799,9 @@ impl> Scanner { handle = self.scan_tag_handle(false, &start_mark)?; // Check if it is, indeed, handle. if handle.len() >= 2 && handle.starts_with('!') && handle.ends_with('!') { - if handle == "!!" { - secondary = true; - } - suffix = self.scan_tag_uri(false, secondary, "", &start_mark)?; + // A tag handle starting with "!!" is a secondary tag handle. + let is_secondary_handle = handle == "!!"; + suffix = self.scan_tag_uri(false, is_secondary_handle, "", &start_mark)?; } else { suffix = self.scan_tag_uri(false, false, &handle, &start_mark)?; handle = "!".to_owned(); @@ -776,8 +814,7 @@ impl> Scanner { } } - self.lookahead(1); - if is_blankz(self.ch()) { + if is_blankz(self.look_ch()) { // XXX: ex 7.2, an empty scalar can follow a secondary tag Ok(Token(start_mark, TokenType::Tag(handle, suffix))) } else { @@ -790,28 +827,22 @@ impl> Scanner { fn scan_tag_handle(&mut self, directive: bool, mark: &Marker) -> Result { let mut string = String::new(); - self.lookahead(1); - if self.ch() != '!' { + if self.look_ch() != '!' { return Err(ScanError::new( *mark, "while scanning a tag, did not find expected '!'", )); } - string.push(self.ch()); - self.skip(); + string.push(self.ch_skip()); - self.lookahead(1); - while is_alpha(self.ch()) { - string.push(self.ch()); - self.skip(); - self.lookahead(1); + while is_alpha(self.look_ch()) { + string.push(self.ch_skip()); } // Check if the trailing character is '!' and copy it. if self.ch() == '!' { - string.push(self.ch()); - self.skip(); + string.push(self.ch_skip()); } else if directive && string != "!" { // It's either the '!' tag or not really a tag handle. If it's a %TAG // directive, it's an error. If it's a tag token, it must be a part of @@ -840,7 +871,6 @@ impl> Scanner { string.extend(head.chars().skip(1)); } - self.lookahead(1); /* * The set of characters that may appear in URI is as follows: * @@ -848,7 +878,7 @@ impl> Scanner { * '=', '+', '$', ',', '.', '!', '~', '*', '\'', '(', ')', '[', ']', * '%'. */ - while match self.ch() { + while match self.look_ch() { ';' | '/' | '?' | ':' | '@' | '&' => true, '=' | '+' | '$' | ',' | '.' | '!' | '~' | '*' | '\'' | '(' | ')' | '[' | ']' => true, '%' => true, @@ -864,7 +894,6 @@ impl> Scanner { } length += 1; - self.lookahead(1); } if length == 0 { diff --git a/saphyr/tests/yaml-test-suite.rs b/saphyr/tests/yaml-test-suite.rs index f32e8a3..48f265e 100644 --- a/saphyr/tests/yaml-test-suite.rs +++ b/saphyr/tests/yaml-test-suite.rs @@ -299,7 +299,6 @@ fn expected_events(expected_tree: &str) -> Vec { static EXPECTED_FAILURES: &[&str] = &[ // These seem to be API limited (not enough information on the event stream level) // No tag available for SEQ and MAP - "2XXW", "35KP", "57H4", "6JWB", From e6fdcddcebe9badeb3c91907ee55c5930c3f4bb6 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Sun, 19 Nov 2023 01:09:41 +0100 Subject: [PATCH 217/380] Propagate tag to MappingStart event. --- saphyr/src/parser.rs | 33 ++++++++++++++++++++++----------- saphyr/src/scanner.rs | 25 +++++++++++++++++++++---- saphyr/src/yaml.rs | 6 +++--- saphyr/tests/yaml-test-suite.rs | 20 +++++++++----------- 4 files changed, 55 insertions(+), 29 deletions(-) diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs index c885856..a83bdc4 100644 --- a/saphyr/src/parser.rs +++ b/saphyr/src/parser.rs @@ -51,7 +51,7 @@ pub enum Event { usize, ), /// Value, style, anchor_id, tag - Scalar(String, TScalarStyle, usize, Option), + Scalar(String, TScalarStyle, usize, Option), SequenceStart( /// The anchor ID of the start of the squence. usize, @@ -60,10 +60,21 @@ pub enum Event { MappingStart( /// The anchor ID of the start of the mapping. usize, + /// An optional tag + Option, ), MappingEnd, } +/// A YAML tag. +#[derive(Clone, PartialEq, Debug, Eq)] +pub struct Tag { + /// Handle of the tag (`!` included). + pub handle: String, + /// The suffix of the tag. + pub suffix: String, +} + impl Event { /// Create an empty scalar. fn empty_scalar() -> Event { @@ -72,7 +83,7 @@ impl Event { } /// Create an empty scalar with the given anchor. - fn empty_scalar_with_anchor(anchor: usize, tag: Option) -> Event { + fn empty_scalar_with_anchor(anchor: usize, tag: Option) -> Event { Event::Scalar(String::new(), TScalarStyle::Plain, anchor, tag) } } @@ -340,7 +351,7 @@ impl> Parser { recv.on_event(first_ev, mark); self.load_sequence(recv) } - Event::MappingStart(_) => { + Event::MappingStart(..) => { recv.on_event(first_ev, mark); self.load_mapping(recv) } @@ -575,8 +586,8 @@ impl> Parser { if let Token(mark, TokenType::Anchor(name)) = self.fetch_token() { anchor_id = self.register_anchor(name, &mark); if let TokenType::Tag(..) = self.peek_token()?.1 { - if let tg @ TokenType::Tag(..) = self.fetch_token().1 { - tag = Some(tg); + if let TokenType::Tag(handle, suffix) = self.fetch_token().1 { + tag = Some(Tag { handle, suffix }); } else { unreachable!() } @@ -586,9 +597,9 @@ impl> Parser { } } Token(_, TokenType::Tag(..)) => { - if let tg @ TokenType::Tag(..) = self.fetch_token().1 { - tag = Some(tg); - if let TokenType::Anchor(_) = self.peek_token()?.1 { + if let TokenType::Tag(handle, suffix) = self.fetch_token().1 { + tag = Some(Tag { handle, suffix }); + if let TokenType::Anchor(_) = &self.peek_token()?.1 { if let Token(mark, TokenType::Anchor(name)) = self.fetch_token() { anchor_id = self.register_anchor(name, &mark); } else { @@ -620,7 +631,7 @@ impl> Parser { } Token(mark, TokenType::FlowMappingStart) => { self.state = State::FlowMappingFirstKey; - Ok((Event::MappingStart(anchor_id), mark)) + Ok((Event::MappingStart(anchor_id, tag), mark)) } Token(mark, TokenType::BlockSequenceStart) if block => { self.state = State::BlockSequenceFirstEntry; @@ -628,7 +639,7 @@ impl> Parser { } Token(mark, TokenType::BlockMappingStart) if block => { self.state = State::BlockMappingFirstKey; - Ok((Event::MappingStart(anchor_id), mark)) + Ok((Event::MappingStart(anchor_id, tag), mark)) } // ex 7.2, an empty scalar can follow a secondary tag Token(mark, _) if tag.is_some() || anchor_id > 0 => { @@ -819,7 +830,7 @@ impl> Parser { Token(mark, TokenType::Key) => { self.state = State::FlowSequenceEntryMappingKey; self.skip(); - Ok((Event::MappingStart(0), mark)) + Ok((Event::MappingStart(0, None), mark)) } _ => { self.push_state(State::FlowSequenceEntry); diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index 9f42cc1..a271602 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -109,26 +109,43 @@ impl fmt::Display for ScanError { #[derive(Clone, PartialEq, Debug, Eq)] pub enum TokenType { NoToken, + /// The start of the stream. Sent first, before even [`DocumentStart`]. StreamStart(TEncoding), + /// The end of the stream, EOF. StreamEnd, - /// major, minor - VersionDirective(u32, u32), - /// handle, prefix - TagDirective(String, String), + VersionDirective( + /// Major + u32, + /// Minor + u32, + ), + TagDirective( + /// Handle + String, + /// Prefix + String, + ), + /// The start of a YAML document (`---`). DocumentStart, + /// The end of a YAML document (`...`). DocumentEnd, BlockSequenceStart, BlockMappingStart, BlockEnd, + /// Start of an inline array (`[ a, b ]`). FlowSequenceStart, + /// End of an inline array. FlowSequenceEnd, + /// Start of an inline mapping (`{ a: b, c: d }`). FlowMappingStart, + /// End of an inline mapping. FlowMappingEnd, BlockEntry, FlowEntry, Key, Value, Alias(String), + /// A YAML anchor (`&`/`*`). Anchor(String), /// handle, suffix Tag(String, String), diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index 67b8b76..07a8592 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -1,6 +1,6 @@ #![allow(clippy::module_name_repetitions)] -use crate::parser::{Event, MarkedEventReceiver, Parser}; +use crate::parser::{Event, MarkedEventReceiver, Parser, Tag}; use crate::scanner::{Marker, ScanError, TScalarStyle, TokenType}; use linked_hash_map::LinkedHashMap; use std::collections::BTreeMap; @@ -98,7 +98,7 @@ impl MarkedEventReceiver for YamlLoader { let node = self.doc_stack.pop().unwrap(); self.insert_new_node(node); } - Event::MappingStart(aid) => { + Event::MappingStart(aid, _) => { self.doc_stack.push((Yaml::Hash(Hash::new()), aid)); self.key_stack.push(Yaml::BadValue); } @@ -110,7 +110,7 @@ impl MarkedEventReceiver for YamlLoader { Event::Scalar(v, style, aid, tag) => { let node = if style != TScalarStyle::Plain { Yaml::String(v) - } else if let Some(TokenType::Tag(ref handle, ref suffix)) = tag { + } else if let Some(Tag { ref handle, ref suffix }) = tag { // XXX tag:yaml.org,2002: if handle == "!!" { match suffix.as_ref() { diff --git a/saphyr/tests/yaml-test-suite.rs b/saphyr/tests/yaml-test-suite.rs index 48f265e..788484f 100644 --- a/saphyr/tests/yaml-test-suite.rs +++ b/saphyr/tests/yaml-test-suite.rs @@ -3,8 +3,8 @@ use std::fs::{self, DirEntry}; use libtest_mimic::{run_tests, Arguments, Outcome, Test}; use yaml_rust::{ - parser::{Event, EventReceiver, Parser}, - scanner::{TScalarStyle, TokenType}, + parser::{Event, EventReceiver, Parser, Tag}, + scanner::{TScalarStyle}, yaml, ScanError, Yaml, YamlLoader, }; @@ -148,7 +148,9 @@ impl EventReceiver for EventReporter { Event::SequenceStart(idx) => format!("+SEQ{}", format_index(idx)), Event::SequenceEnd => "-SEQ".into(), - Event::MappingStart(idx) => format!("+MAP{}", format_index(idx)), + Event::MappingStart(idx, tag) => { + format!("+MAP{}{}", format_index(idx), format_tag(&tag)) + } Event::MappingEnd => "-MAP".into(), Event::Scalar(ref text, style, idx, ref tag) => { @@ -197,13 +199,13 @@ fn escape_text(text: &str) -> String { text } -fn format_tag(tag: &Option) -> String { - if let Some(TokenType::Tag(ns, tag)) = tag { - let ns = match ns.as_str() { +fn format_tag(tag: &Option) -> String { + if let Some(tag) = tag { + let ns = match tag.handle.as_str() { "!!" => "tag:yaml.org,2002:", // Wrong if this ns is overridden other => other, }; - format!(" <{}{}>", ns, tag) + format!(" <{}{}>", ns, tag.suffix) } else { "".into() } @@ -302,13 +304,9 @@ static EXPECTED_FAILURES: &[&str] = &[ "35KP", "57H4", "6JWB", - "735Y", - "9KAX", - "BU8L", "C4HZ", "EHF6", "J7PZ", - "UGM3", // Cannot resolve tag namespaces "5TYM", "6CK3", From 3f10cf9e5d340ed5a61c51b5bfc06eb5c38bf3d0 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Sun, 19 Nov 2023 14:40:01 +0100 Subject: [PATCH 218/380] Propagate tag to SequenceStart event. --- saphyr/src/parser.rs | 10 ++++++---- saphyr/src/yaml.rs | 8 ++++++-- saphyr/tests/yaml-test-suite.rs | 11 ++++------- 3 files changed, 16 insertions(+), 13 deletions(-) diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs index a83bdc4..85bd252 100644 --- a/saphyr/src/parser.rs +++ b/saphyr/src/parser.rs @@ -55,6 +55,8 @@ pub enum Event { SequenceStart( /// The anchor ID of the start of the squence. usize, + /// An optional tag + Option, ), SequenceEnd, MappingStart( @@ -347,7 +349,7 @@ impl> Parser { recv.on_event(first_ev, mark); Ok(()) } - Event::SequenceStart(_) => { + Event::SequenceStart(..) => { recv.on_event(first_ev, mark); self.load_sequence(recv) } @@ -615,7 +617,7 @@ impl> Parser { match *self.peek_token()? { Token(mark, TokenType::BlockEntry) if indentless_sequence => { self.state = State::IndentlessSequenceEntry; - Ok((Event::SequenceStart(anchor_id), mark)) + Ok((Event::SequenceStart(anchor_id, tag), mark)) } Token(_, TokenType::Scalar(..)) => { self.pop_state(); @@ -627,7 +629,7 @@ impl> Parser { } Token(mark, TokenType::FlowSequenceStart) => { self.state = State::FlowSequenceFirstEntry; - Ok((Event::SequenceStart(anchor_id), mark)) + Ok((Event::SequenceStart(anchor_id, tag), mark)) } Token(mark, TokenType::FlowMappingStart) => { self.state = State::FlowMappingFirstKey; @@ -635,7 +637,7 @@ impl> Parser { } Token(mark, TokenType::BlockSequenceStart) if block => { self.state = State::BlockSequenceFirstEntry; - Ok((Event::SequenceStart(anchor_id), mark)) + Ok((Event::SequenceStart(anchor_id, tag), mark)) } Token(mark, TokenType::BlockMappingStart) if block => { self.state = State::BlockMappingFirstKey; diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index 07a8592..57cc771 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -91,7 +91,7 @@ impl MarkedEventReceiver for YamlLoader { _ => unreachable!(), } } - Event::SequenceStart(aid) => { + Event::SequenceStart(aid, _) => { self.doc_stack.push((Yaml::Array(Vec::new()), aid)); } Event::SequenceEnd => { @@ -110,7 +110,11 @@ impl MarkedEventReceiver for YamlLoader { Event::Scalar(v, style, aid, tag) => { let node = if style != TScalarStyle::Plain { Yaml::String(v) - } else if let Some(Tag { ref handle, ref suffix }) = tag { + } else if let Some(Tag { + ref handle, + ref suffix, + }) = tag + { // XXX tag:yaml.org,2002: if handle == "!!" { match suffix.as_ref() { diff --git a/saphyr/tests/yaml-test-suite.rs b/saphyr/tests/yaml-test-suite.rs index 788484f..62ff87c 100644 --- a/saphyr/tests/yaml-test-suite.rs +++ b/saphyr/tests/yaml-test-suite.rs @@ -4,7 +4,7 @@ use libtest_mimic::{run_tests, Arguments, Outcome, Test}; use yaml_rust::{ parser::{Event, EventReceiver, Parser, Tag}, - scanner::{TScalarStyle}, + scanner::TScalarStyle, yaml, ScanError, Yaml, YamlLoader, }; @@ -145,7 +145,9 @@ impl EventReceiver for EventReporter { Event::DocumentStart => "+DOC".into(), Event::DocumentEnd => "-DOC".into(), - Event::SequenceStart(idx) => format!("+SEQ{}", format_index(idx)), + Event::SequenceStart(idx, tag) => { + format!("+SEQ{}{}", format_index(idx), format_tag(&tag)) + } Event::SequenceEnd => "-SEQ".into(), Event::MappingStart(idx, tag) => { @@ -301,12 +303,7 @@ fn expected_events(expected_tree: &str) -> Vec { static EXPECTED_FAILURES: &[&str] = &[ // These seem to be API limited (not enough information on the event stream level) // No tag available for SEQ and MAP - "35KP", - "57H4", - "6JWB", "C4HZ", - "EHF6", - "J7PZ", // Cannot resolve tag namespaces "5TYM", "6CK3", From 8da6ddef105fb89e227c5ca747b69038ba68bc1c Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Sun, 19 Nov 2023 14:52:21 +0100 Subject: [PATCH 219/380] Remove `_` prefix to used method. --- saphyr/src/parser.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs index 85bd252..aa5a35e 100644 --- a/saphyr/src/parser.rs +++ b/saphyr/src/parser.rs @@ -468,7 +468,7 @@ impl> Parser { | TokenType::DocumentStart, ) => { // explicit document - self._explicit_document_start() + self.explicit_document_start() } Token(mark, _) if implicit => { self.parser_process_directives()?; @@ -478,7 +478,7 @@ impl> Parser { } _ => { // explicit document - self._explicit_document_start() + self.explicit_document_start() } } } @@ -504,7 +504,7 @@ impl> Parser { Ok(()) } - fn _explicit_document_start(&mut self) -> ParseResult { + fn explicit_document_start(&mut self) -> ParseResult { self.parser_process_directives()?; match *self.peek_token()? { Token(mark, TokenType::DocumentStart) => { From 5f6dc2246fc43e480480b456ed9c03226ba48ab7 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Sun, 19 Nov 2023 16:00:19 +0100 Subject: [PATCH 220/380] Reslove tag directives. --- saphyr/src/parser.rs | 70 +++++++++++++++++++++++++++++---- saphyr/src/yaml.rs | 3 +- saphyr/tests/yaml-test-suite.rs | 9 ----- 3 files changed, 64 insertions(+), 18 deletions(-) diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs index aa5a35e..ed07e64 100644 --- a/saphyr/src/parser.rs +++ b/saphyr/src/parser.rs @@ -100,6 +100,10 @@ pub struct Parser { current: Option<(Event, Marker)>, anchors: HashMap, anchor_id: usize, + /// The tag directives (`%TAG`) the parser has encountered. + /// + /// Key is the handle, and value is the prefix. + tags: HashMap, } /// Trait to be implemented in order to use the low-level parsing API. @@ -205,6 +209,7 @@ impl> Parser { anchors: HashMap::new(), // valid anchor_id starts from 1 anchor_id: 1, + tags: HashMap::new(), } } @@ -485,19 +490,24 @@ impl> Parser { fn parser_process_directives(&mut self) -> Result<(), ScanError> { loop { - match self.peek_token()?.1 { - TokenType::VersionDirective(_, _) => { + let mut tags = HashMap::new(); + match self.peek_token()? { + Token(_, TokenType::VersionDirective(_, _)) => { // XXX parsing with warning according to spec //if major != 1 || minor > 2 { // return Err(ScanError::new(tok.0, // "found incompatible YAML document")); //} } - TokenType::TagDirective(..) => { - // TODO add tag directive + Token(mark, TokenType::TagDirective(handle, prefix)) => { + if tags.contains_key(handle) { + return Err(ScanError::new(*mark, "the TAG directive must only be given at most once per handle in the same document")); + } + tags.insert(handle.to_string(), prefix.to_string()); } _ => break, } + self.tags = tags; self.skip(); } // TODO tag directive @@ -589,7 +599,7 @@ impl> Parser { anchor_id = self.register_anchor(name, &mark); if let TokenType::Tag(..) = self.peek_token()?.1 { if let TokenType::Tag(handle, suffix) = self.fetch_token().1 { - tag = Some(Tag { handle, suffix }); + tag = Some(self.resolve_tag(mark, &handle, suffix)?); } else { unreachable!() } @@ -598,9 +608,9 @@ impl> Parser { unreachable!() } } - Token(_, TokenType::Tag(..)) => { + Token(mark, TokenType::Tag(..)) => { if let TokenType::Tag(handle, suffix) = self.fetch_token().1 { - tag = Some(Tag { handle, suffix }); + tag = Some(self.resolve_tag(mark, &handle, suffix)?); if let TokenType::Anchor(_) = &self.peek_token()?.1 { if let Token(mark, TokenType::Anchor(name)) = self.fetch_token() { anchor_id = self.register_anchor(name, &mark); @@ -935,6 +945,52 @@ impl> Parser { self.state = State::FlowSequenceEntry; Ok((Event::MappingEnd, self.scanner.mark())) } + + /// Resolve a tag from the handle and the suffix. + fn resolve_tag(&self, mark: Marker, handle: &str, suffix: String) -> Result { + if handle == "!!" { + // "!!" is a shorthand for "tag:yaml.org,2002:". + Ok(Tag { + handle: "tag:yaml.org,2002:".to_string(), + suffix, + }) + } else if handle.is_empty() && suffix == "!" { + // "!" is a shorthand for "whatever would be the default". However, that + // default can be overridden. + match self.tags.get("") { + Some(prefix) => Ok(Tag { + handle: prefix.to_string(), + suffix, + }), + None => Ok(Tag { + handle: String::new(), + suffix, + }), + } + } else { + // Lookup handle in our tag directives. + let prefix = self.tags.get(handle); + if let Some(prefix) = prefix { + Ok(Tag { + handle: prefix.to_string(), + suffix, + }) + } else { + // Otherwise, it may be a local handle. With a local handle, the handle is set to + // "!" and the suffix to whatever follows it ("!foo" -> ("!", "foo")). + // If the handle is of the form "!foo!", this cannot be a local handle and we need + // to error. + if handle.len() >= 2 && handle.starts_with('!') && handle.ends_with('!') { + Err(ScanError::new(mark, "the handle wasn't declared")) + } else { + Ok(Tag { + handle: handle.to_string(), + suffix, + }) + } + } + } + } } #[cfg(test)] diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index 57cc771..4c04eb1 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -115,8 +115,7 @@ impl MarkedEventReceiver for YamlLoader { ref suffix, }) = tag { - // XXX tag:yaml.org,2002: - if handle == "!!" { + if handle == "tag:yaml.org,2002:" { match suffix.as_ref() { "bool" => { // "true" or "false" diff --git a/saphyr/tests/yaml-test-suite.rs b/saphyr/tests/yaml-test-suite.rs index 62ff87c..a23d69d 100644 --- a/saphyr/tests/yaml-test-suite.rs +++ b/saphyr/tests/yaml-test-suite.rs @@ -302,16 +302,7 @@ fn expected_events(expected_tree: &str) -> Vec { #[rustfmt::skip] static EXPECTED_FAILURES: &[&str] = &[ // These seem to be API limited (not enough information on the event stream level) - // No tag available for SEQ and MAP - "C4HZ", // Cannot resolve tag namespaces - "5TYM", - "6CK3", - "6WLZ", - "9WXW", - "CC74", - "U3C3", - "Z9M4", "P76L", // overriding the `!!` namespace! // These seem to be plain bugs From 92e20e6eb470e0d0ee4cec4608ff2a49ebf3eafa Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Sun, 19 Nov 2023 16:22:04 +0100 Subject: [PATCH 221/380] Don't inherit tag directives between documents. --- saphyr/src/parser.rs | 3 +-- saphyr/tests/yaml-test-suite.rs | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs index ed07e64..433de60 100644 --- a/saphyr/src/parser.rs +++ b/saphyr/src/parser.rs @@ -510,7 +510,6 @@ impl> Parser { self.tags = tags; self.skip(); } - // TODO tag directive Ok(()) } @@ -557,7 +556,7 @@ impl> Parser { Token(mark, _) => mark, }; - // TODO tag handling + self.tags.clear(); self.state = State::DocumentStart; Ok((Event::DocumentEnd, marker)) } diff --git a/saphyr/tests/yaml-test-suite.rs b/saphyr/tests/yaml-test-suite.rs index a23d69d..d1e32d3 100644 --- a/saphyr/tests/yaml-test-suite.rs +++ b/saphyr/tests/yaml-test-suite.rs @@ -366,7 +366,6 @@ static EXPECTED_FAILURES: &[&str] = &[ "9HCY", // Directive after content "EB22", // Directive after content "MUS6-01", // no document end marker? - "QLJ7", // TAG directives should not be inherited between documents "RHX7", // no document end marker "SF5V", // duplicate directive "W4TN", // scalar confused as directive From ffed282a9fe796cf8f27f39d38f95090cb583492 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Sun, 19 Nov 2023 16:28:05 +0100 Subject: [PATCH 222/380] Remove stale tag handling code. --- saphyr/tests/yaml-test-suite.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/saphyr/tests/yaml-test-suite.rs b/saphyr/tests/yaml-test-suite.rs index d1e32d3..bd57d15 100644 --- a/saphyr/tests/yaml-test-suite.rs +++ b/saphyr/tests/yaml-test-suite.rs @@ -203,11 +203,7 @@ fn escape_text(text: &str) -> String { fn format_tag(tag: &Option) -> String { if let Some(tag) = tag { - let ns = match tag.handle.as_str() { - "!!" => "tag:yaml.org,2002:", // Wrong if this ns is overridden - other => other, - }; - format!(" <{}{}>", ns, tag.suffix) + format!(" <{}{}>", tag.handle, tag.suffix) } else { "".into() } From 06a6fb34c187dc02a7916290d074cd4bf11a412a Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Sun, 19 Nov 2023 16:36:04 +0100 Subject: [PATCH 223/380] Handle "!!" tag overriding. --- saphyr/src/parser.rs | 20 +++++++++++++------- saphyr/tests/yaml-test-suite.rs | 4 ---- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs index 433de60..62ffd6f 100644 --- a/saphyr/src/parser.rs +++ b/saphyr/src/parser.rs @@ -948,14 +948,20 @@ impl> Parser { /// Resolve a tag from the handle and the suffix. fn resolve_tag(&self, mark: Marker, handle: &str, suffix: String) -> Result { if handle == "!!" { - // "!!" is a shorthand for "tag:yaml.org,2002:". - Ok(Tag { - handle: "tag:yaml.org,2002:".to_string(), - suffix, - }) + // "!!" is a shorthand for "tag:yaml.org,2002:". However, that default can be + // overridden. + match self.tags.get("!!") { + Some(prefix) => Ok(Tag { + handle: prefix.to_string(), + suffix, + }), + None => Ok(Tag { + handle: "tag:yaml.org,2002:".to_string(), + suffix, + }), + } } else if handle.is_empty() && suffix == "!" { - // "!" is a shorthand for "whatever would be the default". However, that - // default can be overridden. + // "!" introduces a local tag. Local tags may have their prefix overridden. match self.tags.get("") { Some(prefix) => Ok(Tag { handle: prefix.to_string(), diff --git a/saphyr/tests/yaml-test-suite.rs b/saphyr/tests/yaml-test-suite.rs index bd57d15..204cdb6 100644 --- a/saphyr/tests/yaml-test-suite.rs +++ b/saphyr/tests/yaml-test-suite.rs @@ -297,10 +297,6 @@ fn expected_events(expected_tree: &str) -> Vec { #[rustfmt::skip] static EXPECTED_FAILURES: &[&str] = &[ - // These seem to be API limited (not enough information on the event stream level) - // Cannot resolve tag namespaces - "P76L", // overriding the `!!` namespace! - // These seem to be plain bugs // Leading TAB in literal scalars "96NN-00", From c670b32461755d4b9c4221e8bf68bac56db82506 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Sun, 19 Nov 2023 17:08:28 +0100 Subject: [PATCH 224/380] Fix tab used as indentation checks. --- saphyr/src/scanner.rs | 38 ++++++++++++++------------------- saphyr/tests/yaml-test-suite.rs | 5 ----- 2 files changed, 16 insertions(+), 27 deletions(-) diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index a271602..ae77813 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -378,6 +378,12 @@ impl> Scanner { self.mark } + // Read and consume a line break (either `\r`, `\n` or `\r\n`). + // + // A `\n` is pushed into `s`. + // + // # Panics + // If the next characters do not correspond to a line break. #[inline] fn read_break(&mut self, s: &mut String) { if self.buffer[0] == '\r' && self.buffer[1] == '\n' { @@ -1222,7 +1228,7 @@ impl> Scanner { } } // Scan the leading line breaks and determine the indentation level if needed. - self.block_scalar_breaks(&mut indent, &mut trailing_breaks)?; + self.block_scalar_breaks(&mut indent, &mut trailing_breaks); self.lookahead(1); @@ -1260,7 +1266,7 @@ impl> Scanner { self.read_break(&mut leading_break); // Eat the following indentation spaces and line breaks. - self.block_scalar_breaks(&mut indent, &mut trailing_breaks)?; + self.block_scalar_breaks(&mut indent, &mut trailing_breaks); } // Chomp the tail. @@ -1285,44 +1291,32 @@ impl> Scanner { } } - fn block_scalar_breaks(&mut self, indent: &mut usize, breaks: &mut String) -> ScanResult { + fn block_scalar_breaks(&mut self, indent: &mut usize, breaks: &mut String) { let mut max_indent = 0; + // Consume all empty lines. loop { - self.lookahead(1); - while (*indent == 0 || self.mark.col < *indent) && self.buffer[0] == ' ' { + // Consume all spaces. Tabs cannot be used as indentation. + while (*indent == 0 || self.mark.col < *indent) && self.look_ch() == ' ' { self.skip(); - self.lookahead(1); } if self.mark.col > max_indent { max_indent = self.mark.col; } - // Check for a tab character messing the indentation. - if (*indent == 0 || self.mark.col < *indent) && self.buffer[0] == '\t' { - return Err(ScanError::new(self.mark, - "while scanning a block scalar, found a tab character where an indentation space is expected")); - } - - if !is_break(self.ch()) { + // If our current line is not empty, break out of the loop. + if !is_break(self.look_ch()) { break; } - self.lookahead(2); // Consume the line break. + self.lookahead(2); self.read_break(breaks); } if *indent == 0 { - *indent = max_indent; - if *indent < (self.indent + 1) as usize { - *indent = (self.indent + 1) as usize; - } - if *indent < 1 { - *indent = 1; - } + *indent = max_indent.max((self.indent + 1) as usize).max(1); } - Ok(()) } fn fetch_flow_scalar(&mut self, single: bool) -> ScanResult { diff --git a/saphyr/tests/yaml-test-suite.rs b/saphyr/tests/yaml-test-suite.rs index 204cdb6..8679e42 100644 --- a/saphyr/tests/yaml-test-suite.rs +++ b/saphyr/tests/yaml-test-suite.rs @@ -298,11 +298,6 @@ fn expected_events(expected_tree: &str) -> Vec { #[rustfmt::skip] static EXPECTED_FAILURES: &[&str] = &[ // These seem to be plain bugs - // Leading TAB in literal scalars - "96NN-00", - "96NN-01", - "R4YG", - "Y79Y-01", // TAB as start of plain scalar instead of whitespace "6BCT", "6CA3", From 159001831282d8fb4b99d07fe95cfd7ff79d72cd Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Sun, 19 Nov 2023 19:13:01 +0100 Subject: [PATCH 225/380] Doccomment `is_` series of functions. --- saphyr/src/scanner.rs | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index ae77813..4da5515 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -211,38 +211,55 @@ impl> Iterator for Scanner { } } +/// Check whether the character is nil (`\0`). #[inline] fn is_z(c: char) -> bool { c == '\0' } + +/// Check whether the character is a line break (`\r` or `\n`). #[inline] fn is_break(c: char) -> bool { c == '\n' || c == '\r' } + +/// Check whether the character is nil or a line break (`\0`, `\r`, `\n`). #[inline] fn is_breakz(c: char) -> bool { is_break(c) || is_z(c) } + +/// Check whether the character is a whitespace (` ` or `\t`). #[inline] fn is_blank(c: char) -> bool { c == ' ' || c == '\t' } + +/// Check whether the character is nil or a whitespace (`\0`, ` `, `\t`). #[inline] fn is_blankz(c: char) -> bool { is_blank(c) || is_breakz(c) } + +/// Check whether the character is an ascii digit. #[inline] fn is_digit(c: char) -> bool { c.is_ascii_digit() } + +/// Check whether the character is a digit, letter, `_` or `-`. #[inline] fn is_alpha(c: char) -> bool { matches!(c, '0'..='9' | 'a'..='z' | 'A'..='Z' | '_' | '-') } + +/// Check whether the character is a hexadecimal character (case insensitive). #[inline] fn is_hex(c: char) -> bool { c.is_ascii_digit() || ('a'..='f').contains(&c) || ('A'..='F').contains(&c) } + +/// Convert the hexadecimal digit to an integer. #[inline] fn as_hex(c: char) -> u32 { match c { @@ -252,6 +269,8 @@ fn as_hex(c: char) -> u32 { _ => unreachable!(), } } + +/// Check whether the character is a YAML flow character (one of `,[]{}`). #[inline] fn is_flow(c: char) -> bool { matches!(c, ',' | '[' | ']' | '{' | '}') From d9287638b9aadd5a6abd0a7bee580f416c8fad46 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Sun, 19 Nov 2023 21:16:52 +0100 Subject: [PATCH 226/380] Use type aliases where appropriate. --- saphyr/src/scanner.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index 4da5515..4643368 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -1780,7 +1780,7 @@ impl> Scanner { } } - fn save_simple_key(&mut self) -> Result<(), ScanError> { + fn save_simple_key(&mut self) -> ScanResult { let required = self.flow_level > 0 && self.indent == (self.mark.col as isize); if self.simple_key_allowed { let mut sk = SimpleKey::new(self.mark); From e1ae3bd5b2beafd5862032aced5c99cd65d7fff4 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Wed, 20 Dec 2023 23:14:22 +0100 Subject: [PATCH 227/380] Fix more inappropriate use of tabs. --- saphyr/src/scanner.rs | 39 ++++++++++++++++++++++++++------- saphyr/tests/yaml-test-suite.rs | 1 - 2 files changed, 31 insertions(+), 9 deletions(-) diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index 4643368..3fdd98d 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -193,6 +193,8 @@ pub struct Scanner { flow_level: u8, tokens_parsed: usize, token_available: bool, + /// Whether all characters encountered since the last newline were whitespace. + leading_whitespace: bool, } impl> Iterator for Scanner { @@ -298,6 +300,7 @@ impl> Scanner { flow_level: 0, tokens_parsed: 0, token_available: false, + leading_whitespace: true, } } @@ -319,19 +322,26 @@ impl> Scanner { } } + /// Consume the next character. Remove from buffer and update mark. #[inline] fn skip(&mut self) { let c = self.buffer.pop_front().unwrap(); self.mark.index += 1; if c == '\n' { + self.leading_whitespace = true; self.mark.line += 1; self.mark.col = 0; } else { + // TODO(ethiraric, 20/12/2023): change to `self.leading_whitespace &= is_blank(c)`? + if self.leading_whitespace && !is_blank(c) { + self.leading_whitespace = false; + } self.mark.col += 1; } } + /// Consume a linebreak (either CR, LF or CRLF), if any. Do nothing if there's none. #[inline] fn skip_line(&mut self) { if self.buffer[0] == '\r' && self.buffer[1] == '\n' { @@ -442,7 +452,7 @@ impl> Scanner { self.fetch_stream_start(); return Ok(()); } - self.skip_to_next_token(); + self.skip_to_next_token()?; self.stale_simple_keys()?; @@ -577,12 +587,21 @@ impl> Scanner { Ok(()) } - fn skip_to_next_token(&mut self) { + fn skip_to_next_token(&mut self) -> ScanResult { loop { - self.lookahead(1); // TODO(chenyh) BOM - match self.ch() { + match self.look_ch() { ' ' => self.skip(), + // Tabs may not be used as indentation. + // "Indentation" only exists as long as a block is started, but does not exist + // inside of flow-style constructs. Tabs are allowed as part of leaading + // whitespaces outside of indentation. + '\t' if self.is_within_block() && self.leading_whitespace => { + return Err(ScanError::new( + self.mark, + "tabs disallowed within this context (block indentation)", + )) + } '\t' if self.flow_level > 0 || !self.simple_key_allowed => self.skip(), '\n' | '\r' => { self.lookahead(2); @@ -600,6 +619,7 @@ impl> Scanner { _ => break, } } + Ok(()) } fn fetch_stream_start(&mut self) { @@ -1638,12 +1658,10 @@ impl> Scanner { )); } - if leading_blanks { - self.skip(); - } else { + if !leading_blanks { whitespaces.push(self.ch()); - self.skip(); } + self.skip(); } else { self.lookahead(2); // Check if it is a first line break @@ -1805,4 +1823,9 @@ impl> Scanner { last.possible = false; Ok(()) } + + /// Return whether the scanner is inside a block but outside of a flow sequence. + fn is_within_block(&self) -> bool { + !self.indents.is_empty() && self.flow_level == 0 + } } diff --git a/saphyr/tests/yaml-test-suite.rs b/saphyr/tests/yaml-test-suite.rs index 8679e42..a4e1419 100644 --- a/saphyr/tests/yaml-test-suite.rs +++ b/saphyr/tests/yaml-test-suite.rs @@ -305,7 +305,6 @@ static EXPECTED_FAILURES: &[&str] = &[ "DK95-00", "Q5MG", "Y79Y-06", - "4EJS", // unexpected pass "Y79Y-03", // unexpected pass "Y79Y-04", // unexpected pass "Y79Y-05", // unexpected pass From 29b513bea3f1ffa1857b409642d4bad55239dd3f Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Thu, 21 Dec 2023 00:13:53 +0100 Subject: [PATCH 228/380] More fixes towards tabulations. --- saphyr/src/scanner.rs | 6 ++---- saphyr/tests/yaml-test-suite.rs | 3 --- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index 3fdd98d..2cda77c 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -1585,7 +1585,7 @@ impl> Scanner { let mut leading_break = String::new(); let mut trailing_breaks = String::new(); let mut whitespaces = String::new(); - let mut leading_blanks = false; + let mut leading_blanks = true; loop { /* Check for a document indicator. */ @@ -1647,9 +1647,8 @@ impl> Scanner { if !(is_blank(self.ch()) || is_break(self.ch())) { break; } - self.lookahead(1); - while is_blank(self.ch()) || is_break(self.ch()) { + while is_blank(self.look_ch()) || is_break(self.ch()) { if is_blank(self.ch()) { if leading_blanks && (self.mark.col as isize) < indent && self.ch() == '\t' { return Err(ScanError::new( @@ -1673,7 +1672,6 @@ impl> Scanner { leading_blanks = true; } } - self.lookahead(1); } // check indentation level diff --git a/saphyr/tests/yaml-test-suite.rs b/saphyr/tests/yaml-test-suite.rs index a4e1419..c9e9372 100644 --- a/saphyr/tests/yaml-test-suite.rs +++ b/saphyr/tests/yaml-test-suite.rs @@ -299,16 +299,13 @@ fn expected_events(expected_tree: &str) -> Vec { static EXPECTED_FAILURES: &[&str] = &[ // These seem to be plain bugs // TAB as start of plain scalar instead of whitespace - "6BCT", "6CA3", - "A2M4", "DK95-00", "Q5MG", "Y79Y-06", "Y79Y-03", // unexpected pass "Y79Y-04", // unexpected pass "Y79Y-05", // unexpected pass - "Y79Y-10", // TABs in whitespace-only lines "DK95-03", "DK95-04", From abac504295d5cca0bc323eaee772419782055bab Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Thu, 21 Dec 2023 00:14:08 +0100 Subject: [PATCH 229/380] Minor improvements. --- saphyr/src/scanner.rs | 32 +++++++++++++++++++++++++------- saphyr/src/yaml.rs | 2 +- saphyr/tests/yaml-test-suite.rs | 5 +---- 3 files changed, 27 insertions(+), 12 deletions(-) diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index 2cda77c..3c2690c 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -129,8 +129,15 @@ pub enum TokenType { DocumentStart, /// The end of a YAML document (`...`). DocumentEnd, + /// The start of a sequence block. + /// + /// Sequence blocks are arrays starting with a `-`. BlockSequenceStart, + /// The start of a sequence mapping. + /// + /// Sequence mappings are "dictionaries" with "key: value" entries. BlockMappingStart, + /// End of the corresponding `BlockSequenceStart` or `BlockMappingStart`. BlockEnd, /// Start of an inline array (`[ a, b ]`). FlowSequenceStart, @@ -186,6 +193,9 @@ pub struct Scanner { stream_start_produced: bool, stream_end_produced: bool, adjacent_value_allowed_at: usize, + /// Whether a simple key could potentially start at the current position. + /// + /// Simple keys are the opposite of complex keys which are keys starting with `?`. simple_key_allowed: bool, simple_keys: Vec, indent: isize, @@ -427,13 +437,11 @@ impl> Scanner { } } + /// Insert a token at the given position. fn insert_token(&mut self, pos: usize, tok: Token) { let old_len = self.tokens.len(); assert!(pos <= old_len); - self.tokens.push_back(tok); - for i in 0..old_len - pos { - self.tokens.swap(old_len - i, old_len - i - 1); - } + self.tokens.insert(pos, tok); } fn allow_simple_key(&mut self) { @@ -550,10 +558,10 @@ impl> Scanner { pub fn fetch_more_tokens(&mut self) -> ScanResult { let mut need_more; loop { - need_more = false; if self.tokens.is_empty() { need_more = true; } else { + need_more = false; self.stale_simple_keys()?; for sk in &self.simple_keys { if sk.possible && sk.token_number == self.tokens_parsed { @@ -600,9 +608,9 @@ impl> Scanner { return Err(ScanError::new( self.mark, "tabs disallowed within this context (block indentation)", - )) + )); } - '\t' if self.flow_level > 0 || !self.simple_key_allowed => self.skip(), + '\t' => self.skip(), '\n' | '\r' => { self.lookahead(2); self.skip_line(); @@ -1770,6 +1778,11 @@ impl> Scanner { Ok(()) } + /// Add an indentation level to the stack with the given block token, if needed. + /// + /// An indentation level is added only if: + /// - We are not in a flow-style construct (which don't have indentation per-se). + /// - The current column is further indented than the last indent we have registered. fn roll_indent(&mut self, col: usize, number: Option, tok: TokenType, mark: Marker) { if self.flow_level > 0 { return; @@ -1786,6 +1799,11 @@ impl> Scanner { } } + /// Pop indentation levels from the stack as much as needed. + /// + /// Indentation levels are popped from the stack while they are further indented than `col`. + /// If we are in a flow-style construct (which don't have indentation per-se), this function + /// does nothing. fn unroll_indent(&mut self, col: isize) { if self.flow_level > 0 { return; diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index 4c04eb1..22f30eb 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -1,7 +1,7 @@ #![allow(clippy::module_name_repetitions)] use crate::parser::{Event, MarkedEventReceiver, Parser, Tag}; -use crate::scanner::{Marker, ScanError, TScalarStyle, TokenType}; +use crate::scanner::{Marker, ScanError, TScalarStyle}; use linked_hash_map::LinkedHashMap; use std::collections::BTreeMap; use std::mem; diff --git a/saphyr/tests/yaml-test-suite.rs b/saphyr/tests/yaml-test-suite.rs index c9e9372..42b1668 100644 --- a/saphyr/tests/yaml-test-suite.rs +++ b/saphyr/tests/yaml-test-suite.rs @@ -52,7 +52,7 @@ fn run_yaml_test(test: &Test) -> Outcome { let actual_events = parse_to_events(&desc.yaml); let events_diff = actual_events.map(|events| events_differ(events, &desc.expected_events)); let mut error_text = match (events_diff, desc.expected_error) { - (Ok(_), true) => Some("no error when expected".into()), + (Ok(x), true) => Some(format!("no error when expected: {x:#?}")), (Err(_), true) => None, (Err(e), false) => Some(format!("unexpected error {:?}", e)), (Ok(Some(diff)), false) => Some(format!("events differ: {}", diff)), @@ -299,15 +299,12 @@ fn expected_events(expected_tree: &str) -> Vec { static EXPECTED_FAILURES: &[&str] = &[ // These seem to be plain bugs // TAB as start of plain scalar instead of whitespace - "6CA3", "DK95-00", - "Q5MG", "Y79Y-06", "Y79Y-03", // unexpected pass "Y79Y-04", // unexpected pass "Y79Y-05", // unexpected pass // TABs in whitespace-only lines - "DK95-03", "DK95-04", // TABs after marker ? or : (space required?) "Y79Y-07", From e683932e7aba9f6134f4a3a1fc21a69b6f202d08 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Thu, 21 Dec 2023 20:02:56 +0100 Subject: [PATCH 230/380] Rework block scalar indent skipping. --- saphyr/src/scanner.rs | 55 +++++++++++++++++++++++++++++++------------ 1 file changed, 40 insertions(+), 15 deletions(-) diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index 3c2690c..de22531 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -1274,8 +1274,13 @@ impl> Scanner { increment } } + // Scan the leading line breaks and determine the indentation level if needed. - self.block_scalar_breaks(&mut indent, &mut trailing_breaks); + if indent == 0 { + self.skip_block_scalar_first_line_indent(&mut indent, &mut trailing_breaks); + } else { + self.skip_block_scalar_indent(indent, &mut trailing_breaks); + } self.lookahead(1); @@ -1313,7 +1318,7 @@ impl> Scanner { self.read_break(&mut leading_break); // Eat the following indentation spaces and line breaks. - self.block_scalar_breaks(&mut indent, &mut trailing_breaks); + self.skip_block_scalar_indent(indent, &mut trailing_breaks); } // Chomp the tail. @@ -1338,12 +1343,34 @@ impl> Scanner { } } - fn block_scalar_breaks(&mut self, indent: &mut usize, breaks: &mut String) { - let mut max_indent = 0; - // Consume all empty lines. + /// Skip the block scalar indentation and empty lines. + fn skip_block_scalar_indent(&mut self, indent: usize, breaks: &mut String) { loop { // Consume all spaces. Tabs cannot be used as indentation. - while (*indent == 0 || self.mark.col < *indent) && self.look_ch() == ' ' { + while self.mark.col < indent && self.look_ch() == ' ' { + self.skip(); + } + + // If our current line is empty, skip over the break and continue looping. + if is_break(self.look_ch()) { + self.lookahead(2); + self.read_break(breaks); + } else { + // Otherwise, we have a content line. Return control. + break; + } + } + } + + /// Determine the indentation level for a block scalar from the first line of its contents. + /// + /// The function skips over whitespace-only lines and sets `indent` to the the longest + /// whitespace line that was encountered. + fn skip_block_scalar_first_line_indent(&mut self, indent: &mut usize, breaks: &mut String) { + let mut max_indent = 0; + loop { + // Consume all spaces. Tabs cannot be used as indentation. + while self.look_ch() == ' ' { self.skip(); } @@ -1351,19 +1378,17 @@ impl> Scanner { max_indent = self.mark.col; } - // If our current line is not empty, break out of the loop. - if !is_break(self.look_ch()) { + if is_break(self.look_ch()) { + // If our current line is empty, skip over the break and continue looping. + self.lookahead(2); + self.read_break(breaks); + } else { + // Otherwise, we have a content line. Return control. break; } - - // Consume the line break. - self.lookahead(2); - self.read_break(breaks); } - if *indent == 0 { - *indent = max_indent.max((self.indent + 1) as usize).max(1); - } + *indent = max_indent.max((self.indent + 1) as usize).max(1); } fn fetch_flow_scalar(&mut self, single: bool) -> ScanResult { From 88833f8a06f662c40ee4bff907dcca52caa462b2 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Fri, 22 Dec 2023 15:43:28 +0100 Subject: [PATCH 231/380] Fix DK95-00, I guess. --- saphyr/src/scanner.rs | 26 ++++++++++++++++++-------- saphyr/tests/yaml-test-suite.rs | 1 - 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index de22531..23c56ba 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -595,6 +595,11 @@ impl> Scanner { Ok(()) } + /// Skip over all whitespace and comments until the next token. + /// + /// # Errors + /// This function returns an error if a tabulation is encountered where there should not be + /// one. fn skip_to_next_token(&mut self) -> ScanResult { loop { // TODO(chenyh) BOM @@ -604,7 +609,10 @@ impl> Scanner { // "Indentation" only exists as long as a block is started, but does not exist // inside of flow-style constructs. Tabs are allowed as part of leaading // whitespaces outside of indentation. - '\t' if self.is_within_block() && self.leading_whitespace => { + '\t' if self.is_within_block() + && self.leading_whitespace + && (self.mark.col as isize) < self.indent => + { return Err(ScanError::new( self.mark, "tabs disallowed within this context (block indentation)", @@ -1239,18 +1247,13 @@ impl> Scanner { } } - // Eat whitespaces and comments to the end of the line. - self.lookahead(1); - - while is_blank(self.ch()) { + while is_blank(self.look_ch()) { self.skip(); - self.lookahead(1); } if self.ch() == '#' { - while !is_breakz(self.ch()) { + while !is_breakz(self.look_ch()) { self.skip(); - self.lookahead(1); } } @@ -1267,6 +1270,13 @@ impl> Scanner { self.skip_line(); } + if self.look_ch() == '\t' { + return Err(ScanError::new( + start_mark, + "a block scalar content cannot start with a tab", + )); + } + if increment > 0 { indent = if self.indent >= 0 { (self.indent + increment as isize) as usize diff --git a/saphyr/tests/yaml-test-suite.rs b/saphyr/tests/yaml-test-suite.rs index 42b1668..780e08d 100644 --- a/saphyr/tests/yaml-test-suite.rs +++ b/saphyr/tests/yaml-test-suite.rs @@ -299,7 +299,6 @@ fn expected_events(expected_tree: &str) -> Vec { static EXPECTED_FAILURES: &[&str] = &[ // These seem to be plain bugs // TAB as start of plain scalar instead of whitespace - "DK95-00", "Y79Y-06", "Y79Y-03", // unexpected pass "Y79Y-04", // unexpected pass From a80091795b438171f0b4431329da0369e55f594e Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Fri, 22 Dec 2023 16:11:07 +0100 Subject: [PATCH 232/380] More fixes towards invalid tabs. --- saphyr/src/scanner.rs | 45 +++++++++++++++++++++++++++++++++ saphyr/tests/yaml-test-suite.rs | 2 -- 2 files changed, 45 insertions(+), 2 deletions(-) diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index 23c56ba..964a580 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -638,6 +638,50 @@ impl> Scanner { Ok(()) } + /// Skip over YAML whitespace (` `, `\n`, `\r`). + /// + /// # Errors + /// This function returns an error if the character after the whitespaces is a tab (`\t`) + /// character or if no whitespace was found. + fn skip_yaml_whitespace(&mut self) -> ScanResult { + let mut need_whitespace = true; + loop { + match self.look_ch() { + ' ' => { + self.skip(); + + need_whitespace = false; + } + '\n' | '\r' => { + self.lookahead(2); + self.skip_line(); + if self.flow_level == 0 { + self.allow_simple_key(); + } + need_whitespace = false; + } + '#' => { + while !is_breakz(self.ch()) { + self.skip(); + self.lookahead(1); + } + } + _ => break, + } + } + + if need_whitespace { + Err(ScanError::new(self.mark(), "expected whitespace")) + } else if self.ch() == '\t' { + Err(ScanError::new( + self.mark(), + "tabs disallowed in this context", + )) + } else { + Ok(()) + } + } + fn fetch_stream_start(&mut self) { let mark = self.mark; self.indent = -1; @@ -1760,6 +1804,7 @@ impl> Scanner { } self.skip(); + self.skip_yaml_whitespace()?; self.tokens.push_back(Token(start_mark, TokenType::Key)); Ok(()) } diff --git a/saphyr/tests/yaml-test-suite.rs b/saphyr/tests/yaml-test-suite.rs index 780e08d..cc998c0 100644 --- a/saphyr/tests/yaml-test-suite.rs +++ b/saphyr/tests/yaml-test-suite.rs @@ -299,7 +299,6 @@ fn expected_events(expected_tree: &str) -> Vec { static EXPECTED_FAILURES: &[&str] = &[ // These seem to be plain bugs // TAB as start of plain scalar instead of whitespace - "Y79Y-06", "Y79Y-03", // unexpected pass "Y79Y-04", // unexpected pass "Y79Y-05", // unexpected pass @@ -307,7 +306,6 @@ static EXPECTED_FAILURES: &[&str] = &[ "DK95-04", // TABs after marker ? or : (space required?) "Y79Y-07", - "Y79Y-08", "Y79Y-09", // Other TABs "DK95-01", // in double-quoted scalar From 458d22ef802cfc523815044827659fb0d799aad7 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Sat, 23 Dec 2023 23:01:06 +0100 Subject: [PATCH 233/380] Fix indent when `-` & entry have `\n` in-between. --- saphyr/src/scanner.rs | 153 +++++++++++++++++++++++++------- saphyr/tests/yaml-test-suite.rs | 1 - 2 files changed, 123 insertions(+), 31 deletions(-) diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index 964a580..be639a1 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -181,6 +181,31 @@ impl SimpleKey { } } +/// An indentation level on the stack of indentations. +#[derive(Clone, Debug, Default)] +struct Indent { + /// The former indentation level. + indent: isize, + /// Whether, upon closing, this indents generates a `BlockEnd` token. + /// + /// There are levels of indentation which do not start a block. Examples of this would be: + /// ```yaml + /// - + /// foo # ok + /// - + /// bar # ko, bar needs to be indented further than the `-`. + /// - [ + /// baz, # ok + /// quux # ko, quux needs to be indented further than the '-'. + /// ] # ko, the closing bracket needs to be indented further than the `-`. + /// ``` + /// + /// The indentation level created by the `-` is for a single entry in the sequence. Emitting a + /// `BlockEnd` when this indentation block ends would generate one `BlockEnd` per entry in the + /// sequence, although we must have exactly one to end the sequence. + needs_block_end: bool, +} + #[derive(Debug)] #[allow(clippy::struct_excessive_bools)] pub struct Scanner { @@ -190,7 +215,9 @@ pub struct Scanner { buffer: VecDeque, error: Option, + /// Whether we have already emitted the `StreamStart` token. stream_start_produced: bool, + /// Whether we have already emitted the `StreamEnd` token. stream_end_produced: bool, adjacent_value_allowed_at: usize, /// Whether a simple key could potentially start at the current position. @@ -198,8 +225,11 @@ pub struct Scanner { /// Simple keys are the opposite of complex keys which are keys starting with `?`. simple_key_allowed: bool, simple_keys: Vec, + /// The current indentation level. indent: isize, - indents: Vec, + /// List of all block indentation levels we are in (except the current one). + indents: Vec, + /// Level of nesting of flow sequences. flow_level: u8, tokens_parsed: usize, token_available: bool, @@ -247,7 +277,9 @@ fn is_blank(c: char) -> bool { c == ' ' || c == '\t' } -/// Check whether the character is nil or a whitespace (`\0`, ` `, `\t`). +/// Check whether the character is nil, a linebreak or a whitespace. +/// +/// `\0`, ` `, `\t`, `\n`, `\r` #[inline] fn is_blankz(c: char) -> bool { is_blank(c) || is_breakz(c) @@ -454,13 +486,14 @@ impl> Scanner { pub fn fetch_next_token(&mut self) -> ScanResult { self.lookahead(1); - // println!("--> fetch_next_token Cur {:?} {:?}", self.mark, self.ch()); + // eprintln!("--> fetch_next_token Cur {:?} {:?}", self.mark, self.ch()); if !self.stream_start_produced { self.fetch_stream_start(); return Ok(()); } self.skip_to_next_token()?; + // eprintln!("--> fetch_next_token wo ws {:?} {:?}", self.mark, self.ch()); self.stale_simple_keys()?; @@ -607,16 +640,22 @@ impl> Scanner { ' ' => self.skip(), // Tabs may not be used as indentation. // "Indentation" only exists as long as a block is started, but does not exist - // inside of flow-style constructs. Tabs are allowed as part of leaading + // inside of flow-style constructs. Tabs are allowed as part of leading // whitespaces outside of indentation. + // If a flow-style construct is in an indented block, its contents must still be + // indented. Also, tabs are allowed anywhere in it if it has no content. '\t' if self.is_within_block() && self.leading_whitespace && (self.mark.col as isize) < self.indent => { - return Err(ScanError::new( - self.mark, - "tabs disallowed within this context (block indentation)", - )); + self.skip_ws_to_eol(true); + // If we have content on that line with a tab, return an error. + if !is_breakz(self.ch()) { + return Err(ScanError::new( + self.mark, + "tabs disallowed within this context (block indentation)", + )); + } } '\t' => self.skip(), '\n' | '\r' => { @@ -682,6 +721,23 @@ impl> Scanner { } } + /// Skip yaml whitespace at most up to eol. Also skips comments. + fn skip_ws_to_eol(&mut self, skip_tab: bool) { + loop { + match self.look_ch() { + ' ' => self.skip(), + '\t' if skip_tab => self.skip(), + '#' => { + while !is_breakz(self.ch()) { + self.skip(); + self.lookahead(1); + } + } + _ => break, + } + } + } + fn fetch_stream_start(&mut self) { let mark = self.mark; self.indent = -1; @@ -1153,6 +1209,7 @@ impl> Scanner { Ok(()) } + /// Push the `FlowEntry` token and skip over the `,`. fn fetch_flow_entry(&mut self) -> ScanResult { self.remove_simple_key()?; self.allow_simple_key(); @@ -1173,6 +1230,7 @@ impl> Scanner { .ok_or_else(|| ScanError::new(self.mark, "recursion limit exceeded"))?; Ok(()) } + fn decrease_flow_level(&mut self) { if self.flow_level > 0 { self.flow_level -= 1; @@ -1180,34 +1238,48 @@ impl> Scanner { } } + /// Push the `Block*` token(s) and skip over the `-`. + /// + /// Add an indentation level and push a `BlockSequenceStart` token if needed, then push a + /// `BlockEntry` token. + /// This function only skips over the `-` and does not fetch the entry value. fn fetch_block_entry(&mut self) -> ScanResult { - if self.flow_level == 0 { - // Check if we are allowed to start a new entry. - if !self.simple_key_allowed { - return Err(ScanError::new( - self.mark, - "block sequence entries are not allowed in this context", - )); - } - - let mark = self.mark; - // generate BLOCK-SEQUENCE-START if indented - self.roll_indent(mark.col, None, TokenType::BlockSequenceStart, mark); - } else { + if self.flow_level > 0 { // - * only allowed in block return Err(ScanError::new( self.mark, r#""-" is only valid inside a block"#, )); } + // Check if we are allowed to start a new entry. + if !self.simple_key_allowed { + return Err(ScanError::new( + self.mark, + "block sequence entries are not allowed in this context", + )); + } + + // Skip over the `-`. + let mark = self.mark; + self.skip(); + + // generate BLOCK-SEQUENCE-START if indented + self.roll_indent(mark.col, None, TokenType::BlockSequenceStart, mark); + self.skip_ws_to_eol(false); + if is_break(self.look_ch()) || is_flow(self.ch()) { + self.indents.push(Indent { + indent: self.indent, + needs_block_end: false, + }); + self.indent += 1; + } + self.remove_simple_key()?; self.allow_simple_key(); - let start_mark = self.mark; - self.skip(); - self.tokens - .push_back(Token(start_mark, TokenType::BlockEntry)); + .push_back(Token(self.mark, TokenType::BlockEntry)); + Ok(()) } @@ -1809,6 +1881,7 @@ impl> Scanner { Ok(()) } + /// Fetch a value from a mapping (after a `:`). fn fetch_value(&mut self) -> ScanResult { let sk = self.simple_keys.last().unwrap().clone(); let start_mark = self.mark; @@ -1868,8 +1941,23 @@ impl> Scanner { return; } + // If the last indent was a non-block indent, remove it. + // This means that we prepared an indent that we thought we wouldn't use, but realized just + // now that it is a block indent. + if self.indent == col as isize { + if let Some(indent) = self.indents.last() { + if !indent.needs_block_end { + self.indent = indent.indent; + self.indents.pop(); + } + } + } + if self.indent < col as isize { - self.indents.push(self.indent); + self.indents.push(Indent { + indent: self.indent, + needs_block_end: true, + }); self.indent = col as isize; let tokens_parsed = self.tokens_parsed; match number { @@ -1889,14 +1977,19 @@ impl> Scanner { return; } while self.indent > col { - self.tokens.push_back(Token(self.mark, TokenType::BlockEnd)); - self.indent = self.indents.pop().unwrap(); + let indent = self.indents.pop().unwrap(); + self.indent = indent.indent; + if indent.needs_block_end { + self.tokens.push_back(Token(self.mark, TokenType::BlockEnd)); + } } } fn save_simple_key(&mut self) -> ScanResult { - let required = self.flow_level > 0 && self.indent == (self.mark.col as isize); if self.simple_key_allowed { + let required = self.flow_level > 0 + && self.indent == (self.mark.col as isize) + && self.indents.last().unwrap().needs_block_end; let mut sk = SimpleKey::new(self.mark); sk.possible = true; sk.required = required; @@ -1922,6 +2015,6 @@ impl> Scanner { /// Return whether the scanner is inside a block but outside of a flow sequence. fn is_within_block(&self) -> bool { - !self.indents.is_empty() && self.flow_level == 0 + !self.indents.is_empty() } } diff --git a/saphyr/tests/yaml-test-suite.rs b/saphyr/tests/yaml-test-suite.rs index cc998c0..d9d4f72 100644 --- a/saphyr/tests/yaml-test-suite.rs +++ b/saphyr/tests/yaml-test-suite.rs @@ -299,7 +299,6 @@ fn expected_events(expected_tree: &str) -> Vec { static EXPECTED_FAILURES: &[&str] = &[ // These seem to be plain bugs // TAB as start of plain scalar instead of whitespace - "Y79Y-03", // unexpected pass "Y79Y-04", // unexpected pass "Y79Y-05", // unexpected pass // TABs in whitespace-only lines From 125c0a411af9b701d67b7ee3337a842df2536ff0 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Sat, 23 Dec 2023 23:25:14 +0100 Subject: [PATCH 234/380] Fix towards invalid tabs. --- saphyr/src/scanner.rs | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index be639a1..dc3a314 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -680,8 +680,7 @@ impl> Scanner { /// Skip over YAML whitespace (` `, `\n`, `\r`). /// /// # Errors - /// This function returns an error if the character after the whitespaces is a tab (`\t`) - /// character or if no whitespace was found. + /// This function returns an error if no whitespace was found. fn skip_yaml_whitespace(&mut self) -> ScanResult { let mut need_whitespace = true; loop { @@ -711,11 +710,6 @@ impl> Scanner { if need_whitespace { Err(ScanError::new(self.mark(), "expected whitespace")) - } else if self.ch() == '\t' { - Err(ScanError::new( - self.mark(), - "tabs disallowed in this context", - )) } else { Ok(()) } @@ -1265,6 +1259,17 @@ impl> Scanner { // generate BLOCK-SEQUENCE-START if indented self.roll_indent(mark.col, None, TokenType::BlockSequenceStart, mark); + if self.ch() == '\t' { + self.skip_to_next_token()?; + self.lookahead(2); + if self.buffer[0] == '-' && is_blankz(self.buffer[1]) { + return Err(ScanError::new( + self.mark, + "'-' must be followed by a valid YAML whitespace", + )); + } + } + self.skip_ws_to_eol(false); if is_break(self.look_ch()) || is_flow(self.ch()) { self.indents.push(Indent { @@ -1877,6 +1882,12 @@ impl> Scanner { self.skip(); self.skip_yaml_whitespace()?; + if self.ch() == '\t' { + return Err(ScanError::new( + self.mark(), + "tabs disallowed in this context", + )); + } self.tokens.push_back(Token(start_mark, TokenType::Key)); Ok(()) } From 49bfa590a606b8d46eff5d3c59211a5e77ce3b70 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Sun, 24 Dec 2023 00:02:42 +0100 Subject: [PATCH 235/380] More fixes towards invalid tabs. --- saphyr/src/scanner.rs | 53 ++++++++++++++++++++++++++++++++----------- 1 file changed, 40 insertions(+), 13 deletions(-) diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index dc3a314..4d31447 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -648,7 +648,7 @@ impl> Scanner { && self.leading_whitespace && (self.mark.col as isize) < self.indent => { - self.skip_ws_to_eol(true); + self.skip_ws_to_eol(SkipTabs::Yes); // If we have content on that line with a tab, return an error. if !is_breakz(self.ch()) { return Err(ScanError::new( @@ -716,11 +716,15 @@ impl> Scanner { } /// Skip yaml whitespace at most up to eol. Also skips comments. - fn skip_ws_to_eol(&mut self, skip_tab: bool) { + fn skip_ws_to_eol(&mut self, skip_tabs: SkipTabs) -> SkipTabs { + let mut encountered_tab = false; loop { match self.look_ch() { ' ' => self.skip(), - '\t' if skip_tab => self.skip(), + '\t' if skip_tabs != SkipTabs::No => { + encountered_tab = true; + self.skip(); + } '#' => { while !is_breakz(self.ch()) { self.skip(); @@ -730,6 +734,8 @@ impl> Scanner { _ => break, } } + + SkipTabs::Result(encountered_tab) } fn fetch_stream_start(&mut self) { @@ -1259,18 +1265,16 @@ impl> Scanner { // generate BLOCK-SEQUENCE-START if indented self.roll_indent(mark.col, None, TokenType::BlockSequenceStart, mark); - if self.ch() == '\t' { - self.skip_to_next_token()?; - self.lookahead(2); - if self.buffer[0] == '-' && is_blankz(self.buffer[1]) { - return Err(ScanError::new( - self.mark, - "'-' must be followed by a valid YAML whitespace", - )); - } + let found_tabs = self.skip_ws_to_eol(SkipTabs::Yes).found_tabs(); + self.lookahead(2); + if found_tabs && self.buffer[0] == '-' && is_blankz(self.buffer[1]) { + return Err(ScanError::new( + self.mark, + "'-' must be followed by a valid YAML whitespace", + )); } - self.skip_ws_to_eol(false); + self.skip_ws_to_eol(SkipTabs::No); if is_break(self.look_ch()) || is_flow(self.ch()) { self.indents.push(Indent { indent: self.indent, @@ -2029,3 +2033,26 @@ impl> Scanner { !self.indents.is_empty() } } + +/// Behavior to adopt regarding treating tabs as whitespace. +#[derive(Copy, Clone, Eq, PartialEq)] +enum SkipTabs { + /// Skip all tabs as whitespace. + Yes, + /// Don't skip any tab. Return from the function when encountering one. + No, + /// Return value from the function. + Result( + /// Whether tabs were encountered. + bool, + ), +} + +impl SkipTabs { + /// Whether tabs were found while skipping whitespace. + /// + /// This function must be called after a call to `skip_ws_to_eol`. + fn found_tabs(self) -> bool { + matches!(self, SkipTabs::Result(true)) + } +} From 5437f5d9cb9d8e229959327bfc0db2de0d098cde Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Mon, 25 Dec 2023 23:48:32 +0100 Subject: [PATCH 236/380] More fixes towards invalid tabs. --- saphyr/src/scanner.rs | 6 ++++++ saphyr/tests/yaml-test-suite.rs | 3 --- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index 4d31447..03e2a1a 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -1819,6 +1819,12 @@ impl> Scanner { while is_blank(self.look_ch()) || is_break(self.ch()) { if is_blank(self.ch()) { if leading_blanks && (self.mark.col as isize) < indent && self.ch() == '\t' { + // If our line contains only whitespace, this is not an error. + // Skip over it. + self.skip_ws_to_eol(SkipTabs::Yes); + if is_breakz(self.ch()) { + continue; + } return Err(ScanError::new( start_mark, "while scanning a plain scalar, found a tab", diff --git a/saphyr/tests/yaml-test-suite.rs b/saphyr/tests/yaml-test-suite.rs index d9d4f72..ba9abdb 100644 --- a/saphyr/tests/yaml-test-suite.rs +++ b/saphyr/tests/yaml-test-suite.rs @@ -299,10 +299,7 @@ fn expected_events(expected_tree: &str) -> Vec { static EXPECTED_FAILURES: &[&str] = &[ // These seem to be plain bugs // TAB as start of plain scalar instead of whitespace - "Y79Y-04", // unexpected pass - "Y79Y-05", // unexpected pass // TABs in whitespace-only lines - "DK95-04", // TABs after marker ? or : (space required?) "Y79Y-07", "Y79Y-09", From 06b03f5cf5ea5e964a5db7453ec233695d042906 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Tue, 26 Dec 2023 00:34:29 +0100 Subject: [PATCH 237/380] More fixes towards invalid tabs. --- saphyr/src/scanner.rs | 35 +++++++++++++++++++++++++++++---- saphyr/tests/yaml-test-suite.rs | 1 - 2 files changed, 31 insertions(+), 5 deletions(-) diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index 03e2a1a..423a930 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -718,9 +718,13 @@ impl> Scanner { /// Skip yaml whitespace at most up to eol. Also skips comments. fn skip_ws_to_eol(&mut self, skip_tabs: SkipTabs) -> SkipTabs { let mut encountered_tab = false; + let mut has_yaml_ws = false; loop { match self.look_ch() { - ' ' => self.skip(), + ' ' => { + has_yaml_ws = true; + self.skip(); + } '\t' if skip_tabs != SkipTabs::No => { encountered_tab = true; self.skip(); @@ -735,7 +739,7 @@ impl> Scanner { } } - SkipTabs::Result(encountered_tab) + SkipTabs::Result(encountered_tab, has_yaml_ws) } fn fetch_stream_start(&mut self) { @@ -1906,6 +1910,19 @@ impl> Scanner { fn fetch_value(&mut self) -> ScanResult { let sk = self.simple_keys.last().unwrap().clone(); let start_mark = self.mark; + + // Skip over ':'. + self.skip(); + if self.look_ch() == '\t' + && !self.skip_ws_to_eol(SkipTabs::Yes).has_valid_yaml_ws() + && self.ch() == '-' + { + return Err(ScanError::new( + self.mark, + "':' must be followed by a valid YAML whitespace", + )); + } + if sk.possible { // insert simple key let tok = Token(sk.mark, TokenType::Key); @@ -1946,7 +1963,6 @@ impl> Scanner { self.disallow_simple_key(); } } - self.skip(); self.tokens.push_back(Token(start_mark, TokenType::Value)); Ok(()) @@ -2041,6 +2057,8 @@ impl> Scanner { } /// Behavior to adopt regarding treating tabs as whitespace. +/// +/// Although tab is a valid yaml whitespace, it doesn't always behave the same as a space. #[derive(Copy, Clone, Eq, PartialEq)] enum SkipTabs { /// Skip all tabs as whitespace. @@ -2051,6 +2069,8 @@ enum SkipTabs { Result( /// Whether tabs were encountered. bool, + /// Whether at least 1 valid yaml whitespace has been encountered. + bool, ), } @@ -2059,6 +2079,13 @@ impl SkipTabs { /// /// This function must be called after a call to `skip_ws_to_eol`. fn found_tabs(self) -> bool { - matches!(self, SkipTabs::Result(true)) + matches!(self, SkipTabs::Result(true, _)) + } + + /// Whether a valid YAML whitespace has been found in skipped-over content. + /// + /// This function must be called after a call to `skip_ws_to_eol`. + fn has_valid_yaml_ws(self) -> bool { + matches!(self, SkipTabs::Result(_, true)) } } diff --git a/saphyr/tests/yaml-test-suite.rs b/saphyr/tests/yaml-test-suite.rs index ba9abdb..1ef3b48 100644 --- a/saphyr/tests/yaml-test-suite.rs +++ b/saphyr/tests/yaml-test-suite.rs @@ -301,7 +301,6 @@ static EXPECTED_FAILURES: &[&str] = &[ // TAB as start of plain scalar instead of whitespace // TABs in whitespace-only lines // TABs after marker ? or : (space required?) - "Y79Y-07", "Y79Y-09", // Other TABs "DK95-01", // in double-quoted scalar From 6e8af26435268f581197846b4be803ff48d1b1ea Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Tue, 26 Dec 2023 00:48:36 +0100 Subject: [PATCH 238/380] More fixes towards invalid tabs? --- saphyr/src/scanner.rs | 2 +- saphyr/tests/yaml-test-suite.rs | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index 423a930..6da43bd 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -1915,7 +1915,7 @@ impl> Scanner { self.skip(); if self.look_ch() == '\t' && !self.skip_ws_to_eol(SkipTabs::Yes).has_valid_yaml_ws() - && self.ch() == '-' + && (self.ch() == '-' || is_alpha(self.ch())) { return Err(ScanError::new( self.mark, diff --git a/saphyr/tests/yaml-test-suite.rs b/saphyr/tests/yaml-test-suite.rs index 1ef3b48..9fbef36 100644 --- a/saphyr/tests/yaml-test-suite.rs +++ b/saphyr/tests/yaml-test-suite.rs @@ -301,7 +301,6 @@ static EXPECTED_FAILURES: &[&str] = &[ // TAB as start of plain scalar instead of whitespace // TABs in whitespace-only lines // TABs after marker ? or : (space required?) - "Y79Y-09", // Other TABs "DK95-01", // in double-quoted scalar // Empty key in flow mappings From 2e5605ddc2497548bf76aca9e7b7a01cc6fb6695 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Tue, 26 Dec 2023 18:06:20 +0100 Subject: [PATCH 239/380] More fixes towards invalid tabs. --- saphyr/src/parser.rs | 4 +++ saphyr/src/scanner.rs | 64 +++++++++++++++++++++------------ saphyr/tests/yaml-test-suite.rs | 6 ---- 3 files changed, 46 insertions(+), 28 deletions(-) diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs index 62ffd6f..f55e4e3 100644 --- a/saphyr/src/parser.rs +++ b/saphyr/src/parser.rs @@ -234,6 +234,7 @@ impl> Parser { } } + /// Peek at the next token from the scanner. fn peek_token(&mut self) -> Result<&Token, ScanError> { match self.token { None => { @@ -244,6 +245,9 @@ impl> Parser { } } + /// Extract and return the next token from the scanner. + /// + /// This function does _not_ make use of `self.token`. fn scan_next_token(&mut self) -> Result { let token = self.scanner.next(); match token { diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index 6da43bd..c6c1015 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -730,9 +730,8 @@ impl> Scanner { self.skip(); } '#' => { - while !is_breakz(self.ch()) { + while !is_breakz(self.look_ch()) { self.skip(); - self.lookahead(1); } } _ => break, @@ -1280,11 +1279,7 @@ impl> Scanner { self.skip_ws_to_eol(SkipTabs::No); if is_break(self.look_ch()) || is_flow(self.ch()) { - self.indents.push(Indent { - indent: self.indent, - needs_block_end: false, - }); - self.indent += 1; + self.roll_one_col_indent(); } self.remove_simple_key()?; @@ -1335,17 +1330,16 @@ impl> Scanner { // skip '|' or '>' self.skip(); - self.lookahead(1); + self.unroll_non_block_indents(); - if self.ch() == '+' || self.ch() == '-' { + if self.look_ch() == '+' || self.ch() == '-' { if self.ch() == '+' { chomping = 1; } else { chomping = -1; } self.skip(); - self.lookahead(1); - if is_digit(self.ch()) { + if is_digit(self.look_ch()) { if self.ch() == '0' { return Err(ScanError::new( start_mark, @@ -1376,15 +1370,7 @@ impl> Scanner { } } - while is_blank(self.look_ch()) { - self.skip(); - } - - if self.ch() == '#' { - while !is_breakz(self.look_ch()) { - self.skip(); - } - } + self.skip_ws_to_eol(SkipTabs::Yes); // Check if we are at the end of the line. if !is_breakz(self.ch()) { @@ -1584,8 +1570,8 @@ impl> Scanner { self.lookahead(2); leading_blanks = false; - // Consume non-blank characters. + // Consume non-blank characters. while !is_blankz(self.ch()) { match self.ch() { // Check for an escaped single quote. @@ -1683,6 +1669,12 @@ impl> Scanner { if is_blank(self.ch()) { // Consume a space or a tab character. if leading_blanks { + if self.ch() == '\t' && (self.mark.col as isize) < self.indent { + return Err(ScanError::new( + self.mark, + "tab cannot be used as indentation", + )); + } self.skip(); } else { whitespaces.push(self.ch()); @@ -1750,6 +1742,7 @@ impl> Scanner { } fn scan_plain_scalar(&mut self) -> Result { + self.unroll_non_block_indents(); let indent = self.indent + 1; let start_mark = self.mark; @@ -1936,6 +1929,7 @@ impl> Scanner { TokenType::BlockMappingStart, start_mark, ); + self.roll_one_col_indent(); self.simple_keys.last_mut().unwrap().possible = false; self.disallow_simple_key(); @@ -1956,6 +1950,7 @@ impl> Scanner { start_mark, ); } + self.roll_one_col_indent(); if self.flow_level == 0 { self.allow_simple_key(); @@ -1981,7 +1976,7 @@ impl> Scanner { // If the last indent was a non-block indent, remove it. // This means that we prepared an indent that we thought we wouldn't use, but realized just // now that it is a block indent. - if self.indent == col as isize { + if self.indent <= col as isize { if let Some(indent) = self.indents.last() { if !indent.needs_block_end { self.indent = indent.indent; @@ -2022,6 +2017,31 @@ impl> Scanner { } } + /// Add an indentation level of 1 column that does not start a block. + /// + /// See the documentation of [`Indent::needs_block_end`] for more details. + fn roll_one_col_indent(&mut self) { + if self.flow_level == 0 { + self.indents.push(Indent { + indent: self.indent, + needs_block_end: false, + }); + self.indent += 1; + } + } + + /// Unroll all last indents created with [`Self::roll_one_col_indent`]. + fn unroll_non_block_indents(&mut self) { + while let Some(indent) = self.indents.last() { + if indent.needs_block_end { + break; + } else { + self.indent = indent.indent; + self.indents.pop(); + } + } + } + fn save_simple_key(&mut self) -> ScanResult { if self.simple_key_allowed { let required = self.flow_level > 0 diff --git a/saphyr/tests/yaml-test-suite.rs b/saphyr/tests/yaml-test-suite.rs index 9fbef36..49a0445 100644 --- a/saphyr/tests/yaml-test-suite.rs +++ b/saphyr/tests/yaml-test-suite.rs @@ -297,12 +297,6 @@ fn expected_events(expected_tree: &str) -> Vec { #[rustfmt::skip] static EXPECTED_FAILURES: &[&str] = &[ - // These seem to be plain bugs - // TAB as start of plain scalar instead of whitespace - // TABs in whitespace-only lines - // TABs after marker ? or : (space required?) - // Other TABs - "DK95-01", // in double-quoted scalar // Empty key in flow mappings "CFD4", // Document with no nodes and document end From be6a05916f5e5e6ecf247bb049b7d1760bda931f Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Tue, 26 Dec 2023 18:08:21 +0100 Subject: [PATCH 240/380] Add debugging helpers. --- saphyr/examples/dump_events.rs | 4 +++- saphyr/src/scanner.rs | 6 ++++++ saphyr/tests/spec_test.rs | 2 +- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/saphyr/examples/dump_events.rs b/saphyr/examples/dump_events.rs index 876902c..49c564c 100644 --- a/saphyr/examples/dump_events.rs +++ b/saphyr/examples/dump_events.rs @@ -16,6 +16,7 @@ struct EventSink { impl MarkedEventReceiver for EventSink { fn on_event(&mut self, ev: Event, mark: Marker) { + eprintln!(" \x1B[;34m\u{21B3} {:?}\x1B[;m", &ev); self.events.push((ev, mark)); } } @@ -34,5 +35,6 @@ fn main() { let mut s = String::new(); f.read_to_string(&mut s).unwrap(); - dbg!(str_to_events(&s)); + // dbg!(str_to_events(&s)); + str_to_events(&s); } diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index c6c1015..9de8771 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -244,6 +244,12 @@ impl> Iterator for Scanner { return None; } match self.next_token() { + Ok(Some(tok)) => { + if std::env::var("YAMLRUST_DEBUG").is_ok() { + eprintln!("\x1B[;32m{:?} \x1B[;36m{:?}\x1B[;m", tok.1, tok.0); + } + Some(tok) + } Ok(tok) => tok, Err(e) => { self.error = Some(e); diff --git a/saphyr/tests/spec_test.rs b/saphyr/tests/spec_test.rs index 5e6dfa3..f78aa58 100644 --- a/saphyr/tests/spec_test.rs +++ b/saphyr/tests/spec_test.rs @@ -59,7 +59,7 @@ macro_rules! assert_next { match $v.next().unwrap() { $p => {} e => { - panic!("unexpected event: {:?}", e); + panic!("unexpected event: {:?} (expected {:?})", e, stringify!($p)); } } }; From 70066200275ff49e01983ce3c4a76a486d079517 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Tue, 26 Dec 2023 18:27:47 +0100 Subject: [PATCH 241/380] Add comments to `TokenType` and `Scanner`. --- saphyr/src/scanner.rs | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index 9de8771..07c76b5 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -147,15 +147,26 @@ pub enum TokenType { FlowMappingStart, /// End of an inline mapping. FlowMappingEnd, + /// An entry in a block sequence (c.f.: [`TokenType::BlockSequenceStart`]). BlockEntry, + /// An entry in a flow sequence (c.f.: [`TokenType::FlowSequenceStart`]). FlowEntry, + /// A key in a mapping. Key, + /// A value in a mapping. Value, + /// A reference to an anchor. Alias(String), /// A YAML anchor (`&`/`*`). Anchor(String), - /// handle, suffix - Tag(String, String), + /// A YAML tag (starting with bangs `!`). + Tag( + /// The handle of the tag. + String, + /// The suffix of the tag. + String, + ), + /// A regular YAML scalar. Scalar(TScalarStyle, String), } @@ -209,10 +220,15 @@ struct Indent { #[derive(Debug)] #[allow(clippy::struct_excessive_bools)] pub struct Scanner { + /// The reader, providing with characters. rdr: T, + /// The position of the cursor within the reader. mark: Marker, + /// Buffer for tokens to be read. tokens: VecDeque, + /// Buffer for the next characters to consume. buffer: VecDeque, + /// The last error that happened. error: Option, /// Whether we have already emitted the `StreamStart` token. From e7f29450cae5dba14d2d577760382f6b0eab6db0 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Tue, 26 Dec 2023 19:11:17 +0100 Subject: [PATCH 242/380] Fix empty keys in implicit mappings. --- saphyr/src/scanner.rs | 43 +++++++++++++++++++++++++++++++++ saphyr/tests/yaml-test-suite.rs | 2 -- 2 files changed, 43 insertions(+), 2 deletions(-) diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index 07c76b5..75cc1ab 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -251,6 +251,15 @@ pub struct Scanner { token_available: bool, /// Whether all characters encountered since the last newline were whitespace. leading_whitespace: bool, + /// Whether we started a flow mapping. + /// + /// This is used to detect implicit flow mapping starts such as: + /// ```yaml + /// [ : foo ] # { null: "foo" } + /// ``` + flow_mapping_started: bool, + /// Whether we currently are in an implicit flow mapping. + implicit_flow_mapping: bool, } impl> Iterator for Scanner { @@ -365,6 +374,8 @@ impl> Scanner { tokens_parsed: 0, token_available: false, leading_whitespace: true, + flow_mapping_started: false, + implicit_flow_mapping: false, } } @@ -1217,6 +1228,10 @@ impl> Scanner { let start_mark = self.mark; self.skip(); + if tok == TokenType::FlowMappingStart { + self.flow_mapping_started = true; + } + self.tokens.push_back(Token(start_mark, tok)); Ok(()) } @@ -1227,6 +1242,8 @@ impl> Scanner { self.disallow_simple_key(); + self.end_implicit_mapping(self.mark); + let start_mark = self.mark; self.skip(); @@ -1239,6 +1256,8 @@ impl> Scanner { self.remove_simple_key()?; self.allow_simple_key(); + self.end_implicit_mapping(self.mark); + let start_mark = self.mark; self.skip(); @@ -1899,6 +1918,9 @@ impl> Scanner { TokenType::BlockMappingStart, start_mark, ); + } else { + // The parser, upon receiving a `Key`, will insert a `MappingStart` event. + self.flow_mapping_started = true; } self.remove_simple_key()?; @@ -1925,6 +1947,7 @@ impl> Scanner { fn fetch_value(&mut self) -> ScanResult { let sk = self.simple_keys.last().unwrap().clone(); let start_mark = self.mark; + self.implicit_flow_mapping = self.flow_level > 0 && !self.flow_mapping_started; // Skip over ':'. self.skip(); @@ -1943,6 +1966,12 @@ impl> Scanner { let tok = Token(sk.mark, TokenType::Key); let tokens_parsed = self.tokens_parsed; self.insert_token(sk.token_number - tokens_parsed, tok); + if self.implicit_flow_mapping { + self.insert_token( + sk.token_number - tokens_parsed, + Token(self.mark, TokenType::FlowMappingStart), + ); + } // Add the BLOCK-MAPPING-START token if needed. self.roll_indent( @@ -1956,6 +1985,10 @@ impl> Scanner { self.simple_keys.last_mut().unwrap().possible = false; self.disallow_simple_key(); } else { + if self.implicit_flow_mapping { + self.tokens + .push_back(Token(self.mark, TokenType::FlowMappingStart)); + } // The ':' indicator follows a complex key. if self.flow_level == 0 { if !self.simple_key_allowed { @@ -2096,6 +2129,16 @@ impl> Scanner { fn is_within_block(&self) -> bool { !self.indents.is_empty() } + + /// If an implicit mapping had started, end it. + fn end_implicit_mapping(&mut self, mark: Marker) { + if self.implicit_flow_mapping { + self.implicit_flow_mapping = false; + self.flow_mapping_started = false; + self.tokens + .push_back(Token(mark, TokenType::FlowMappingEnd)); + } + } } /// Behavior to adopt regarding treating tabs as whitespace. diff --git a/saphyr/tests/yaml-test-suite.rs b/saphyr/tests/yaml-test-suite.rs index 49a0445..0396fb4 100644 --- a/saphyr/tests/yaml-test-suite.rs +++ b/saphyr/tests/yaml-test-suite.rs @@ -297,8 +297,6 @@ fn expected_events(expected_tree: &str) -> Vec { #[rustfmt::skip] static EXPECTED_FAILURES: &[&str] = &[ - // Empty key in flow mappings - "CFD4", // Document with no nodes and document end "HWV9", "QT73", From 6308bbe98f37fecaf17e98374bd7ab4a8e706661 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Thu, 28 Dec 2023 01:12:38 +0100 Subject: [PATCH 243/380] Fix empty documents tests. --- saphyr/src/parser.rs | 10 +++++----- saphyr/tests/yaml-test-suite.rs | 3 --- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs index f55e4e3..a6f80a4 100644 --- a/saphyr/src/parser.rs +++ b/saphyr/src/parser.rs @@ -3,6 +3,7 @@ use std::collections::HashMap; #[derive(Clone, Copy, PartialEq, Debug, Eq)] enum State { + /// We await the start of the stream. StreamStart, ImplicitDocumentStart, DocumentStart, @@ -265,6 +266,7 @@ impl> Parser { .expect("fetch_token needs to be preceded by peek_token") } + /// Skip the next token from the scanner. fn skip(&mut self) { self.token = None; //self.peek_token(); @@ -407,7 +409,7 @@ impl> Parser { } fn state_machine(&mut self) -> ParseResult { - // let next_tok = self.peek_token()?; + // let next_tok = self.peek_token().cloned()?; // println!("cur_state {:?}, next tok: {:?}", self.state, next_tok); match self.state { State::StreamStart => self.stream_start(), @@ -458,10 +460,8 @@ impl> Parser { } fn document_start(&mut self, implicit: bool) -> ParseResult { - if !implicit { - while let TokenType::DocumentEnd = self.peek_token()?.1 { - self.skip(); - } + while let TokenType::DocumentEnd = self.peek_token()?.1 { + self.skip(); } match *self.peek_token()? { diff --git a/saphyr/tests/yaml-test-suite.rs b/saphyr/tests/yaml-test-suite.rs index 0396fb4..1778e6f 100644 --- a/saphyr/tests/yaml-test-suite.rs +++ b/saphyr/tests/yaml-test-suite.rs @@ -297,9 +297,6 @@ fn expected_events(expected_tree: &str) -> Vec { #[rustfmt::skip] static EXPECTED_FAILURES: &[&str] = &[ - // Document with no nodes and document end - "HWV9", - "QT73", // Unusual characters in anchors/aliases "8XYN", // emoji!! "W5VH", // :@*!$": From e9bcc8a28b1e87c50362da64eeeda6c13c9d7e39 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Thu, 28 Dec 2023 01:48:19 +0100 Subject: [PATCH 244/380] Fix anchor names' character set. --- saphyr/src/scanner.rs | 46 ++++++++++++++++++++++++++------- saphyr/tests/yaml-test-suite.rs | 3 --- 2 files changed, 36 insertions(+), 13 deletions(-) diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index 75cc1ab..ce3ed86 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -351,6 +351,31 @@ fn is_flow(c: char) -> bool { matches!(c, ',' | '[' | ']' | '{' | '}') } +/// Check whether the character is the BOM character. +#[inline] +fn is_bom(c: char) -> bool { + c == '\u{FEFF}' +} + +/// Check whether the character is a YAML non-breaking character. +#[inline] +fn is_yaml_non_break(c: char) -> bool { + // TODO(ethiraric, 28/12/2023): is_printable + !is_break(c) && !is_bom(c) +} + +/// Check whether the character is NOT a YAML whitespace (` ` / `\t`). +#[inline] +fn is_yaml_non_space(c: char) -> bool { + is_yaml_non_break(c) && !is_blank(c) +} + +/// Check whether the character is a valid YAML anchor name character. +#[inline] +fn is_anchor_char(c: char) -> bool { + is_yaml_non_space(c) && !is_flow(c) && !is_z(c) +} + pub type ScanResult = Result<(), ScanError>; impl> Scanner { @@ -1193,20 +1218,12 @@ impl> Scanner { let start_mark = self.mark; self.skip(); - self.lookahead(1); - while is_alpha(self.ch()) || self.ch_is(':') { + while is_anchor_char(self.look_ch()) { string.push(self.ch()); self.skip(); - self.lookahead(1); } - if string.is_empty() - || match self.ch() { - c if is_blankz(c) => false, - '?' | ',' | ']' | '}' | '%' | '@' | '`' => false, - _ => true, - } - { + if string.is_empty() { return Err(ScanError::new(start_mark, "while scanning an anchor or alias, did not find expected alphabetic or numeric character")); } @@ -2174,3 +2191,12 @@ impl SkipTabs { matches!(self, SkipTabs::Result(_, true)) } } + +#[cfg(test)] +mod test { + #[test] + fn test_is_anchor_char() { + use super::is_anchor_char; + assert!(is_anchor_char('x')); + } +} diff --git a/saphyr/tests/yaml-test-suite.rs b/saphyr/tests/yaml-test-suite.rs index 1778e6f..2f11c2a 100644 --- a/saphyr/tests/yaml-test-suite.rs +++ b/saphyr/tests/yaml-test-suite.rs @@ -297,9 +297,6 @@ fn expected_events(expected_tree: &str) -> Vec { #[rustfmt::skip] static EXPECTED_FAILURES: &[&str] = &[ - // Unusual characters in anchors/aliases - "8XYN", // emoji!! - "W5VH", // :@*!$": // Flow mapping colon on next line / multiline key in flow mapping "4MUZ-00", "4MUZ-01", From 79046225e734d8af404ff730fee920d4dbeac0c3 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Sat, 30 Dec 2023 03:35:43 +0100 Subject: [PATCH 245/380] Minor improvement to debug prints. --- saphyr/examples/dump_events.rs | 2 +- saphyr/src/parser.rs | 3 +++ saphyr/src/scanner.rs | 14 ++++++++++++-- 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/saphyr/examples/dump_events.rs b/saphyr/examples/dump_events.rs index 49c564c..452eb69 100644 --- a/saphyr/examples/dump_events.rs +++ b/saphyr/examples/dump_events.rs @@ -16,7 +16,7 @@ struct EventSink { impl MarkedEventReceiver for EventSink { fn on_event(&mut self, ev: Event, mark: Marker) { - eprintln!(" \x1B[;34m\u{21B3} {:?}\x1B[;m", &ev); + eprintln!(" \x1B[;34m\u{21B3} {:?}\x1B[;m", &ev); self.events.push((ev, mark)); } } diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs index a6f80a4..41dc8b0 100644 --- a/saphyr/src/parser.rs +++ b/saphyr/src/parser.rs @@ -411,6 +411,9 @@ impl> Parser { fn state_machine(&mut self) -> ParseResult { // let next_tok = self.peek_token().cloned()?; // println!("cur_state {:?}, next tok: {:?}", self.state, next_tok); + if std::env::var("YAMLRUST_DEBUG").is_ok() { + eprintln!("\n\x1B[;33mParser state: {:?} \x1B[;0m", self.state); + } match self.state { State::StreamStart => self.stream_start(), diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index ce3ed86..a155108 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -271,7 +271,10 @@ impl> Iterator for Scanner { match self.next_token() { Ok(Some(tok)) => { if std::env::var("YAMLRUST_DEBUG").is_ok() { - eprintln!("\x1B[;32m{:?} \x1B[;36m{:?}\x1B[;m", tok.1, tok.0); + eprintln!( + " \x1B[;32m\u{21B3} {:?} \x1B[;36m{:?}\x1B[;m", + tok.1, tok.0 + ); } Some(tok) } @@ -551,7 +554,14 @@ impl> Scanner { return Ok(()); } self.skip_to_next_token()?; - // eprintln!("--> fetch_next_token wo ws {:?} {:?}", self.mark, self.ch()); + + if std::env::var("YAMLRUST_DEBUG").is_ok() { + eprintln!( + " \x1B[38;5;244m\u{2192} fetch_next_token after whitespace {:?} {:?}\x1B[m", + self.mark, + self.ch() + ); + } self.stale_simple_keys()?; From 4c1376a2b08e5abefedb7c8203bd435e3dcccddf Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Sat, 30 Dec 2023 03:37:17 +0100 Subject: [PATCH 246/380] Finally grasped how `SimpleKey` works. This is a huge commit that cannot easily be broken down as it contains fixes for the next ignored test in the suite which, one fixed, broke tests that used to pass and were only then fixed. There is also a substantial amount of comments that were added, especially around `SimpleKey`. Minor improvements around the code were added and I did not bother making a separate commit for them. Overall, that commit fixes 7 tests from the matrix that were related to the handling of simple keys. --- saphyr/src/scanner.rs | 164 ++++++++++++++++++++++++++------ saphyr/tests/yaml-test-suite.rs | 7 -- 2 files changed, 137 insertions(+), 34 deletions(-) diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index a155108..8f2be8c 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -106,6 +106,7 @@ impl fmt::Display for ScanError { } } +/// The contents of a scanner token. #[derive(Clone, PartialEq, Debug, Eq)] pub enum TokenType { NoToken, @@ -170,18 +171,79 @@ pub enum TokenType { Scalar(TScalarStyle, String), } +/// A scanner token. #[derive(Clone, PartialEq, Debug, Eq)] pub struct Token(pub Marker, pub TokenType); +/// A scalar that was parsed and may correspond to a simple key. +/// +/// Upon scanning the following yaml: +/// ```yaml +/// a: b +/// ``` +/// We do not know that `a` is a key for a map until we have reached the following `:`. For this +/// YAML, we would store `a` as a scalar token in the [`Scanner`], but not emit it yet. It would be +/// kept inside the scanner until more context is fetched and we are able to know whether it is a +/// plain scalar or a key. +/// +/// For example, see the following 2 yaml documents: +/// ```yaml +/// --- +/// a: b # Here, `a` is a key. +/// ... +/// --- +/// a # Here, `a` is a plain scalar. +/// ... +/// ``` +/// An instance of [`SimpleKey`] is created in the [`Scanner`] when such ambiguity occurs. +/// +/// In both documents, scanning `a` would lead to the creation of a [`SimpleKey`] with +/// [`Self::possible`] set to `true`. The token for `a` would be pushed in the [`Scanner`] but not +/// yet emitted. Instead, more context would be fetched (through [`Scanner::fetch_more_tokens`]). +/// +/// In the first document, upon reaching the `:`, the [`SimpleKey`] would be inspected and our +/// scalar `a` since it is a possible key, would be "turned" into a key. This is done by prepending +/// a [`TokenType::Key`] to our scalar token in the [`Scanner`]. This way, the +/// [`crate::parser::Parser`] would read the [`TokenType::Key`] token before the +/// [`TokenType::Scalar`] token. +/// +/// In the second document however, reaching the EOF would stale the [`SimpleKey`] and no +/// [`TokenType::Key`] would be emitted by the scanner. #[derive(Clone, PartialEq, Debug, Eq)] struct SimpleKey { + /// Whether the token this [`SimpleKey`] refers to may still be a key. + /// + /// Sometimes, when we have more context, we notice that what we thought could be a key no + /// longer can be. In that case, [`Self::possible`] is set to `false`. + /// + /// For instance, let us consider the following invalid YAML: + /// ```yaml + /// key + /// : value + /// ``` + /// Upon reading the `\n` after `key`, the [`SimpleKey`] that was created for `key` is staled + /// and [`Self::possible`] set to `false`. possible: bool, + /// Whether the token this [`SimpleKey`] refers to is required to be a key. + /// + /// With more context, we may know for sure that the token must be a key. If the YAML is + /// invalid, it may happen that the token be deemed not a key. In such event, an error has to + /// be raised. This boolean helps us know when to raise such error. + /// + /// TODO(ethiraric, 30/12/2023): Example of when this happens. required: bool, + /// The index of the token referred to by the [`SimpleKey`]. + /// + /// This is the index in the scanner, which takes into account both the tokens that have been + /// emitted and those about to be emitted. See [`Scanner::tokens_parsed`] and + /// [`Scanner::tokens`] for more details. token_number: usize, + /// The position at which the token the [`SimpleKey`] refers to is. mark: Marker, } impl SimpleKey { + /// Create a new [`SimpleKey`] at the given `Marker` and with the given flow level. fn new(mark: Marker) -> SimpleKey { SimpleKey { possible: false, @@ -217,6 +279,15 @@ struct Indent { needs_block_end: bool, } +/// The YAML scanner. +/// +/// This corresponds to the low-level interface when reading YAML. The scanner emits token as they +/// are read (akin to a lexer), but it also holds sufficient context to be able to disambiguate +/// some of the constructs. It has understanding of indentation and whitespace and is able to +/// generate error messages for some invalid YAML constructs. +/// +/// It is however not a full parser and needs [`parser::Parser`] to fully detect invalid YAML +/// documents. #[derive(Debug)] #[allow(clippy::struct_excessive_bools)] pub struct Scanner { @@ -224,7 +295,12 @@ pub struct Scanner { rdr: T, /// The position of the cursor within the reader. mark: Marker, - /// Buffer for tokens to be read. + /// Buffer for tokens to be returned. + /// + /// This buffer can hold some temporary tokens that are not yet ready to be returned. For + /// instance, if we just read a scalar, it can be a value or a key if an implicit mapping + /// follows. In this case, the token stays in the `VecDeque` but cannot be returned from + /// [`Self::next`] until we have more context. tokens: VecDeque, /// Buffer for the next characters to consume. buffer: VecDeque, @@ -240,6 +316,10 @@ pub struct Scanner { /// /// Simple keys are the opposite of complex keys which are keys starting with `?`. simple_key_allowed: bool, + /// A stack of potential simple keys. + /// + /// Refer to the documentation of [`SimpleKey`] for a more in-depth explanation of what they + /// are. simple_keys: Vec, /// The current indentation level. indent: isize, @@ -247,7 +327,11 @@ pub struct Scanner { indents: Vec, /// Level of nesting of flow sequences. flow_level: u8, + /// The number of tokens that have been returned from the scanner. + /// + /// This excludes the tokens from [`Self::tokens`]. tokens_parsed: usize, + /// Whether a token is ready to be taken from [`Self::tokens`]. token_available: bool, /// Whether all characters encountered since the last newline were whitespace. leading_whitespace: bool, @@ -407,6 +491,10 @@ impl> Scanner { } } + /// Get a copy of the last error that was encountered, if any. + /// + /// This does not clear the error state and further calls to [`Self::get_error`] will return (a + /// clone of) the same error. #[inline] pub fn get_error(&self) -> Option { self.error.as_ref().map(std::clone::Clone::clone) @@ -425,7 +513,7 @@ impl> Scanner { } } - /// Consume the next character. Remove from buffer and update mark. + /// Consume the next character, remove it from the buffer and update the mark. #[inline] fn skip(&mut self) { let c = self.buffer.pop_front().unwrap(); @@ -489,12 +577,6 @@ impl> Scanner { self.buffer[0] == c } - #[allow(dead_code)] - #[inline] - fn eof(&self) -> bool { - self.ch_is('\0') - } - #[inline] pub fn stream_started(&self) -> bool { self.stream_start_produced @@ -600,6 +682,10 @@ impl> Scanner { return Ok(()); } + if (self.mark.col as isize) < self.indent { + return Err(ScanError::new(self.mark, "invalid indentation")); + } + let c = self.buffer[0]; let nc = self.buffer[1]; match c { @@ -663,7 +749,9 @@ impl> Scanner { need_more = true; } else { need_more = false; + // Stale potential keys that we know won't be keys. self.stale_simple_keys()?; + // If our next token to be emitted may be a key, fetch more context. for sk in &self.simple_keys { if sk.possible && sk.token_number == self.tokens_parsed { need_more = true; @@ -682,10 +770,19 @@ impl> Scanner { Ok(()) } + /// Mark simple keys that can no longer be keys as such. + /// + /// This function sets `possible` to `false` to each key that, now we have more context, we + /// know will not be keys. + /// + /// # Errors + /// This function returns an error if one of the key we would stale was required to be a key. fn stale_simple_keys(&mut self) -> ScanResult { - for sk in &mut self.simple_keys { + for (_, sk) in self.simple_keys.iter_mut().enumerate() { if sk.possible - && (sk.mark.line < self.mark.line || sk.mark.index + 1024 < self.mark.index) + // If not in a flow construct, simple keys cannot span multiple lines. + && self.flow_level == 0 + && (sk.mark.line < self.mark.line || sk.mark.index + 1024 < self.mark.index) { if sk.required { return Err(ScanError::new(self.mark, "simple key expect ':'")); @@ -826,6 +923,15 @@ impl> Scanner { self.mark.line += 1; } + // If the stream ended, we won't have more context. We can stall all the simple keys we + // had. If one was required, however, that was an error and we must propagate it. + for sk in &mut self.simple_keys { + if sk.required && sk.possible { + return Err(ScanError::new(self.mark, "simple key expected")); + } + sk.possible = false; + } + self.unroll_indent(-1); self.remove_simple_key()?; self.disallow_simple_key(); @@ -990,11 +1096,9 @@ impl> Scanner { } let handle = self.scan_tag_handle(true, mark)?; - self.lookahead(1); /* Eat whitespaces. */ - while is_blank(self.ch()) { + while is_blank(self.look_ch()) { self.skip(); - self.lookahead(1); } let is_secondary = handle == "!!"; @@ -1013,7 +1117,7 @@ impl> Scanner { } fn fetch_tag(&mut self) -> ScanResult { - self.save_simple_key()?; + self.save_simple_key(); self.disallow_simple_key(); let tok = self.scan_tag()?; @@ -1213,7 +1317,7 @@ impl> Scanner { } fn fetch_anchor(&mut self, alias: bool) -> ScanResult { - self.save_simple_key()?; + self.save_simple_key(); self.disallow_simple_key(); let tok = self.scan_anchor(alias)?; @@ -1246,8 +1350,9 @@ impl> Scanner { fn fetch_flow_collection_start(&mut self, tok: TokenType) -> ScanResult { // The indicators '[' and '{' may start a simple key. - self.save_simple_key()?; + self.save_simple_key(); + self.roll_one_col_indent(); self.increase_flow_level()?; self.allow_simple_key(); @@ -1375,7 +1480,7 @@ impl> Scanner { } fn fetch_block_scalar(&mut self, literal: bool) -> ScanResult { - self.save_simple_key()?; + self.save_simple_key(); self.allow_simple_key(); let tok = self.scan_block_scalar(literal)?; @@ -1585,7 +1690,7 @@ impl> Scanner { } fn fetch_flow_scalar(&mut self, single: bool) -> ScanResult { - self.save_simple_key()?; + self.save_simple_key(); self.disallow_simple_key(); let tok = self.scan_flow_scalar(single)?; @@ -1800,7 +1905,7 @@ impl> Scanner { } fn fetch_plain_scalar(&mut self) -> ScanResult { - self.save_simple_key()?; + self.save_simple_key(); self.disallow_simple_key(); let tok = self.scan_plain_scalar()?; @@ -1991,11 +2096,16 @@ impl> Scanner { if sk.possible { // insert simple key let tok = Token(sk.mark, TokenType::Key); - let tokens_parsed = self.tokens_parsed; - self.insert_token(sk.token_number - tokens_parsed, tok); + self.insert_token(sk.token_number - self.tokens_parsed, tok); if self.implicit_flow_mapping { + if sk.mark.line < start_mark.line { + return Err(ScanError::new( + start_mark, + "illegal placement of ':' indicator", + )); + } self.insert_token( - sk.token_number - tokens_parsed, + sk.token_number - self.tokens_parsed, Token(self.mark, TokenType::FlowMappingStart), ); } @@ -2102,8 +2212,10 @@ impl> Scanner { /// Add an indentation level of 1 column that does not start a block. /// /// See the documentation of [`Indent::needs_block_end`] for more details. + /// An indentation is not added if we are inside a flow level or if the last indent is already + /// a non-block indent. fn roll_one_col_indent(&mut self) { - if self.flow_level == 0 { + if self.flow_level == 0 && self.indents.last().map_or(false, |x| x.needs_block_end) { self.indents.push(Indent { indent: self.indent, needs_block_end: false, @@ -2124,7 +2236,8 @@ impl> Scanner { } } - fn save_simple_key(&mut self) -> ScanResult { + /// Save the last token in [`Self::tokens`] as a simple key. + fn save_simple_key(&mut self) { if self.simple_key_allowed { let required = self.flow_level > 0 && self.indent == (self.mark.col as isize) @@ -2134,12 +2247,9 @@ impl> Scanner { sk.required = required; sk.token_number = self.tokens_parsed + self.tokens.len(); - self.remove_simple_key()?; - self.simple_keys.pop(); self.simple_keys.push(sk); } - Ok(()) } fn remove_simple_key(&mut self) -> ScanResult { diff --git a/saphyr/tests/yaml-test-suite.rs b/saphyr/tests/yaml-test-suite.rs index 2f11c2a..f13cf71 100644 --- a/saphyr/tests/yaml-test-suite.rs +++ b/saphyr/tests/yaml-test-suite.rs @@ -298,15 +298,8 @@ fn expected_events(expected_tree: &str) -> Vec { #[rustfmt::skip] static EXPECTED_FAILURES: &[&str] = &[ // Flow mapping colon on next line / multiline key in flow mapping - "4MUZ-00", - "4MUZ-01", - "4MUZ-02", "5MUD", - "9SA2", "K3WX", - "NJ66", - "UT92", - "VJP3-01", // Bare document after end marker "7Z25", "M7A3", From f0ae6473aab043ed11634cc9db650359561e85b0 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Thu, 18 Jan 2024 15:46:15 +0100 Subject: [PATCH 247/380] Fix towards flow mapping and colons. --- saphyr/src/scanner.rs | 1 + saphyr/tests/yaml-test-suite.rs | 3 --- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index 8f2be8c..57ee8c8 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -1697,6 +1697,7 @@ impl> Scanner { // From spec: To ensure JSON compatibility, if a key inside a flow mapping is JSON-like, // YAML allows the following value to be specified adjacent to the “:”. + self.skip_to_next_token()?; self.adjacent_value_allowed_at = self.mark.index; self.tokens.push_back(tok); diff --git a/saphyr/tests/yaml-test-suite.rs b/saphyr/tests/yaml-test-suite.rs index f13cf71..570f696 100644 --- a/saphyr/tests/yaml-test-suite.rs +++ b/saphyr/tests/yaml-test-suite.rs @@ -297,9 +297,6 @@ fn expected_events(expected_tree: &str) -> Vec { #[rustfmt::skip] static EXPECTED_FAILURES: &[&str] = &[ - // Flow mapping colon on next line / multiline key in flow mapping - "5MUD", - "K3WX", // Bare document after end marker "7Z25", "M7A3", From 4118cfab7c5a7cc7491a402ca64ec7a71f2a3f72 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Thu, 18 Jan 2024 19:15:19 +0100 Subject: [PATCH 248/380] Split `fetch_flow_scalar`. --- saphyr/src/scanner.rs | 237 ++++++++++++++++++++++++------------------ 1 file changed, 138 insertions(+), 99 deletions(-) diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index 57ee8c8..c9da122 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -1741,98 +1741,15 @@ impl> Scanner { )); } - self.lookahead(2); - leading_blanks = false; + self.consume_flow_scalar_non_whitespace_chars( + single, + &mut string, + &mut leading_blanks, + &start_mark, + )?; - // Consume non-blank characters. - while !is_blankz(self.ch()) { - match self.ch() { - // Check for an escaped single quote. - '\'' if self.buffer[1] == '\'' && single => { - string.push('\''); - self.skip(); - self.skip(); - } - // Check for the right quote. - '\'' if single => break, - '"' if !single => break, - // Check for an escaped line break. - '\\' if !single && is_break(self.buffer[1]) => { - self.lookahead(3); - self.skip(); - self.skip_line(); - leading_blanks = true; - break; - } - // Check for an escape sequence. - '\\' if !single => { - let mut code_length = 0usize; - match self.buffer[1] { - '0' => string.push('\0'), - 'a' => string.push('\x07'), - 'b' => string.push('\x08'), - 't' | '\t' => string.push('\t'), - 'n' => string.push('\n'), - 'v' => string.push('\x0b'), - 'f' => string.push('\x0c'), - 'r' => string.push('\x0d'), - 'e' => string.push('\x1b'), - ' ' => string.push('\x20'), - '"' => string.push('"'), - '\'' => string.push('\''), - '\\' => string.push('\\'), - // NEL (#x85) - 'N' => string.push(char::from_u32(0x85).unwrap()), - // #xA0 - '_' => string.push(char::from_u32(0xA0).unwrap()), - // LS (#x2028) - 'L' => string.push(char::from_u32(0x2028).unwrap()), - // PS (#x2029) - 'P' => string.push(char::from_u32(0x2029).unwrap()), - 'x' => code_length = 2, - 'u' => code_length = 4, - 'U' => code_length = 8, - _ => { - return Err(ScanError::new( - start_mark, - "while parsing a quoted scalar, found unknown escape character", - )) - } - } - self.skip(); - self.skip(); - // Consume an arbitrary escape code. - if code_length > 0 { - self.lookahead(code_length); - let mut value = 0u32; - for i in 0..code_length { - if !is_hex(self.buffer[i]) { - return Err(ScanError::new(start_mark, - "while parsing a quoted scalar, did not find expected hexadecimal number")); - } - value = (value << 4) + as_hex(self.buffer[i]); - } - - let Some(ch) = char::from_u32(value) else { - return Err(ScanError::new(start_mark, "while parsing a quoted scalar, found invalid Unicode character escape code")); - }; - string.push(ch); - - for _ in 0..code_length { - self.skip(); - } - } - } - c => { - string.push(c); - self.skip(); - } - } - self.lookahead(2); - } - self.lookahead(1); - match self.ch() { + match self.look_ch() { '\'' if single => break, '"' if !single => break, _ => {} @@ -1867,6 +1784,7 @@ impl> Scanner { } self.lookahead(1); } + // Join the whitespaces or fold line breaks. if leading_blanks { if leading_break.is_empty() { @@ -1892,17 +1810,138 @@ impl> Scanner { // Eat the right quote. self.skip(); - if single { - Ok(Token( - start_mark, - TokenType::Scalar(TScalarStyle::SingleQuoted, string), - )) + let style = if single { + TScalarStyle::SingleQuoted } else { - Ok(Token( - start_mark, - TokenType::Scalar(TScalarStyle::DoubleQuoted, string), - )) + TScalarStyle::DoubleQuoted + }; + Ok(Token(start_mark, TokenType::Scalar(style, string))) + } + + /// Consume successive non-whitespace characters from a flow scalar. + /// + /// This function resolves escape sequences and stops upon encountering a whitespace, the end + /// of the stream or the closing character for the scalar (`'` for single quoted scalars, `"` + /// for double quoted scalars). + /// + /// # Errors + /// Return an error if an invalid escape sequence is found. + fn consume_flow_scalar_non_whitespace_chars( + &mut self, + single: bool, + string: &mut String, + leading_blanks: &mut bool, + start_mark: &Marker, + ) -> Result<(), ScanError> { + self.lookahead(2); + while !is_blankz(self.ch()) { + match self.ch() { + // Check for an escaped single quote. + '\'' if self.buffer[1] == '\'' && single => { + string.push('\''); + self.skip(); + self.skip(); + } + // Check for the right quote. + '\'' if single => break, + '"' if !single => break, + // Check for an escaped line break. + '\\' if !single && is_break(self.buffer[1]) => { + self.lookahead(3); + self.skip(); + self.skip_line(); + *leading_blanks = true; + break; + } + // Check for an escape sequence. + '\\' if !single => { + string.push(self.resolve_flow_scalar_escape_sequence(start_mark)?); + } + c => { + string.push(c); + self.skip(); + } + } + self.lookahead(2); } + Ok(()) + } + + /// Escape the sequence we encounter in a flow scalar. + /// + /// `self.ch()` must point to the `\` starting the escape sequence. + /// + /// # Errors + /// Return an error if an invalid escape sequence is found. + fn resolve_flow_scalar_escape_sequence( + &mut self, + start_mark: &Marker, + ) -> Result { + let mut code_length = 0usize; + let mut ret = '\0'; + + match self.buffer[1] { + '0' => ret = '\0', + 'a' => ret = '\x07', + 'b' => ret = '\x08', + 't' | '\t' => ret = '\t', + 'n' => ret = '\n', + 'v' => ret = '\x0b', + 'f' => ret = '\x0c', + 'r' => ret = '\x0d', + 'e' => ret = '\x1b', + ' ' => ret = '\x20', + '"' => ret = '"', + '\'' => ret = '\'', + '\\' => ret = '\\', + // Unicode next line (#x85) + 'N' => ret = char::from_u32(0x85).unwrap(), + // Unicode non-breaking space (#xA0) + '_' => ret = char::from_u32(0xA0).unwrap(), + // Unicode line separator (#x2028) + 'L' => ret = char::from_u32(0x2028).unwrap(), + // Unicode paragraph separator (#x2029) + 'P' => ret = char::from_u32(0x2029).unwrap(), + 'x' => code_length = 2, + 'u' => code_length = 4, + 'U' => code_length = 8, + _ => { + return Err(ScanError::new( + *start_mark, + "while parsing a quoted scalar, found unknown escape character", + )) + } + } + self.skip(); + self.skip(); + + // Consume an arbitrary escape code. + if code_length > 0 { + self.lookahead(code_length); + let mut value = 0u32; + for i in 0..code_length { + if !is_hex(self.buffer[i]) { + return Err(ScanError::new( + *start_mark, + "while parsing a quoted scalar, did not find expected hexadecimal number", + )); + } + value = (value << 4) + as_hex(self.buffer[i]); + } + + let Some(ch) = char::from_u32(value) else { + return Err(ScanError::new( + *start_mark, + "while parsing a quoted scalar, found invalid Unicode character escape code", + )); + }; + ret = ch; + + for _ in 0..code_length { + self.skip(); + } + } + Ok(ret) } fn fetch_plain_scalar(&mut self) -> ScanResult { From 1606523193cbde224d2aecc2cd8b12e762c5933b Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Thu, 18 Jan 2024 19:16:02 +0100 Subject: [PATCH 249/380] Fix towards invalid trailing characters. --- saphyr/src/scanner.rs | 23 +++++++++++++++++++++++ saphyr/tests/basic.rs | 12 +++++++++++- 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index c9da122..cd139f4 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -679,6 +679,10 @@ impl> Scanner { && is_blankz(self.buffer[3]) { self.fetch_document_indicator(TokenType::DocumentEnd)?; + self.skip_ws_to_eol(SkipTabs::Yes); + if !is_breakz(self.ch()) { + return Err(ScanError::new(self.mark, "invalid content after document end marker")); + } return Ok(()); } @@ -1809,6 +1813,25 @@ impl> Scanner { // Eat the right quote. self.skip(); + // Ensure there is no invalid trailing content. + self.skip_ws_to_eol(SkipTabs::Yes); + match self.ch() { + // These can be encountered in flow sequences or mappings. + ',' | '}' | ']' if self.flow_level > 0 => {} + // An end-of-line / end-of-stream is fine. No trailing content. + c if is_breakz(c) => {} + // ':' can be encountered if our scalar is a key. + // Outside of flow contexts, keys cannot span multiple lines + ':' if self.flow_level == 0 && start_mark.line == self.mark.line => {} + // Inside a flow context, this is allowed. + ':' if self.flow_level > 0 => {} + _ => { + return Err(ScanError::new( + self.mark, + "invalid trailing content after double-quoted scalar", + )); + } + } let style = if single { TScalarStyle::SingleQuoted diff --git a/saphyr/tests/basic.rs b/saphyr/tests/basic.rs index e551776..946843a 100644 --- a/saphyr/tests/basic.rs +++ b/saphyr/tests/basic.rs @@ -52,7 +52,9 @@ scalar key: [1, 2]] key1:a2 "; - let Err(error) = YamlLoader::load_from_str(s) else { panic!() }; + let Err(error) = YamlLoader::load_from_str(s) else { + panic!() + }; assert_eq!( error.info(), "mapping values are not allowed in this context" @@ -235,6 +237,14 @@ fn test_issue_65() { assert!(YamlLoader::load_from_str(b).is_err()); } +#[test] +fn test_issue_65_mwe() { + // A MWE for `test_issue_65`. The error over there is that there is invalid trailing content + // after a double quoted string. + let b = r#""foo" l"#; + assert!(YamlLoader::load_from_str(b).is_err()); +} + #[test] fn test_bad_docstart() { assert!(YamlLoader::load_from_str("---This used to cause an infinite loop").is_ok()); From 0a1aebaf3c4473b2ee12dd7a1bef4f547677bf9d Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Thu, 18 Jan 2024 19:16:17 +0100 Subject: [PATCH 250/380] Fix towards multiple documents in a single stream. --- saphyr/src/parser.rs | 8 +++++++- saphyr/tests/yaml-test-suite.rs | 1 - 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs index 41dc8b0..658ffd5 100644 --- a/saphyr/src/parser.rs +++ b/saphyr/src/parser.rs @@ -555,8 +555,10 @@ impl> Parser { } fn document_end(&mut self) -> ParseResult { + let mut explicit_end = false; let marker: Marker = match *self.peek_token()? { Token(mark, TokenType::DocumentEnd) => { + explicit_end = true; self.skip(); mark } @@ -564,7 +566,11 @@ impl> Parser { }; self.tags.clear(); - self.state = State::DocumentStart; + if explicit_end { + self.state = State::ImplicitDocumentStart; + } else { + self.state = State::DocumentStart; + } Ok((Event::DocumentEnd, marker)) } diff --git a/saphyr/tests/yaml-test-suite.rs b/saphyr/tests/yaml-test-suite.rs index 570f696..2fb6284 100644 --- a/saphyr/tests/yaml-test-suite.rs +++ b/saphyr/tests/yaml-test-suite.rs @@ -298,7 +298,6 @@ fn expected_events(expected_tree: &str) -> Vec { #[rustfmt::skip] static EXPECTED_FAILURES: &[&str] = &[ // Bare document after end marker - "7Z25", "M7A3", // Scalar marker on document start line "DK3J", From 0b58be145882238c966bc762fc96d755e2b2b3b6 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Fri, 19 Jan 2024 14:55:06 +0100 Subject: [PATCH 251/380] Fix possible misindent in block scalar. --- saphyr/src/scanner.rs | 18 ++++++++++++++++-- saphyr/tests/yaml-test-suite.rs | 5 ----- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index cd139f4..60a3d5b 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -681,7 +681,10 @@ impl> Scanner { self.fetch_document_indicator(TokenType::DocumentEnd)?; self.skip_ws_to_eol(SkipTabs::Yes); if !is_breakz(self.ch()) { - return Err(ScanError::new(self.mark, "invalid content after document end marker")); + return Err(ScanError::new( + self.mark, + "invalid content after document end marker", + )); } return Ok(()); } @@ -1690,7 +1693,18 @@ impl> Scanner { } } - *indent = max_indent.max((self.indent + 1) as usize).max(1); + // In case a yaml looks like: + // ```yaml + // | + // foo + // bar + // ``` + // We need to set the indent to 0 and not 1. In all other cases, the indent must be at + // least 1. When in the above example, `self.indent` will be set to -1. + *indent = max_indent.max((self.indent + 1) as usize); + if self.indent > 0 { + *indent = (*indent).max(1); + } } fn fetch_flow_scalar(&mut self, single: bool) -> ScanResult { diff --git a/saphyr/tests/yaml-test-suite.rs b/saphyr/tests/yaml-test-suite.rs index 2fb6284..63ad2e4 100644 --- a/saphyr/tests/yaml-test-suite.rs +++ b/saphyr/tests/yaml-test-suite.rs @@ -297,11 +297,6 @@ fn expected_events(expected_tree: &str) -> Vec { #[rustfmt::skip] static EXPECTED_FAILURES: &[&str] = &[ - // Bare document after end marker - "M7A3", - // Scalar marker on document start line - "DK3J", - "FP8R", // Comments on nonempty lines need leading space "9JBA", "CVW2", From f27a7c25a898dc4afc1c4c9a89c053b7cc381305 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Fri, 19 Jan 2024 16:21:56 +0100 Subject: [PATCH 252/380] Fixes towards spaces before comments. --- saphyr/src/scanner.rs | 50 ++++++++++++++++----------------- saphyr/tests/yaml-test-suite.rs | 6 ---- 2 files changed, 25 insertions(+), 31 deletions(-) diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index 60a3d5b..c0a7cef 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -679,7 +679,7 @@ impl> Scanner { && is_blankz(self.buffer[3]) { self.fetch_document_indicator(TokenType::DocumentEnd)?; - self.skip_ws_to_eol(SkipTabs::Yes); + self.skip_ws_to_eol(SkipTabs::Yes)?; if !is_breakz(self.ch()) { return Err(ScanError::new( self.mark, @@ -820,7 +820,7 @@ impl> Scanner { && self.leading_whitespace && (self.mark.col as isize) < self.indent => { - self.skip_ws_to_eol(SkipTabs::Yes); + self.skip_ws_to_eol(SkipTabs::Yes)?; // If we have content on that line with a tab, return an error. if !is_breakz(self.ch()) { return Err(ScanError::new( @@ -888,7 +888,7 @@ impl> Scanner { } /// Skip yaml whitespace at most up to eol. Also skips comments. - fn skip_ws_to_eol(&mut self, skip_tabs: SkipTabs) -> SkipTabs { + fn skip_ws_to_eol(&mut self, skip_tabs: SkipTabs) -> Result { let mut encountered_tab = false; let mut has_yaml_ws = false; loop { @@ -901,6 +901,13 @@ impl> Scanner { encountered_tab = true; self.skip(); } + // YAML comments must be preceded by whitespace. + '#' if !encountered_tab && !has_yaml_ws => { + return Err(ScanError::new( + self.mark, + "comments must be separated from other tokens by whitespace", + )); + } '#' => { while !is_breakz(self.look_ch()) { self.skip(); @@ -910,7 +917,7 @@ impl> Scanner { } } - SkipTabs::Result(encountered_tab, has_yaml_ws) + Ok(SkipTabs::Result(encountered_tab, has_yaml_ws)) } fn fetch_stream_start(&mut self) { @@ -955,6 +962,7 @@ impl> Scanner { self.disallow_simple_key(); let tok = self.scan_directive()?; + self.skip_ws_to_eol(SkipTabs::Yes)?; self.tokens.push_back(tok); @@ -988,17 +996,7 @@ impl> Scanner { }; self.lookahead(1); - while is_blank(self.ch()) { - self.skip(); - self.lookahead(1); - } - - if self.ch() == '#' { - while !is_breakz(self.ch()) { - self.skip(); - self.lookahead(1); - } - } + self.skip_ws_to_eol(SkipTabs::Yes)?; if !is_breakz(self.ch()) { return Err(ScanError::new( @@ -1070,8 +1068,7 @@ impl> Scanner { fn scan_version_directive_number(&mut self, mark: &Marker) -> Result { let mut val = 0u32; let mut length = 0usize; - self.lookahead(1); - while is_digit(self.ch()) { + while let Some(digit) = self.look_ch().to_digit(10) { if length + 1 > 9 { return Err(ScanError::new( *mark, @@ -1079,9 +1076,8 @@ impl> Scanner { )); } length += 1; - val = val * 10 + ((self.ch() as u32) - ('0' as u32)); + val = val * 10 + digit; self.skip(); - self.lookahead(1); } if length == 0 { @@ -1371,6 +1367,8 @@ impl> Scanner { self.flow_mapping_started = true; } + self.skip_ws_to_eol(SkipTabs::Yes)?; + self.tokens.push_back(Token(start_mark, tok)); Ok(()) } @@ -1385,6 +1383,7 @@ impl> Scanner { let start_mark = self.mark; self.skip(); + self.skip_ws_to_eol(SkipTabs::Yes)?; self.tokens.push_back(Token(start_mark, tok)); Ok(()) @@ -1399,6 +1398,7 @@ impl> Scanner { let start_mark = self.mark; self.skip(); + self.skip_ws_to_eol(SkipTabs::Yes)?; self.tokens .push_back(Token(start_mark, TokenType::FlowEntry)); @@ -1448,7 +1448,7 @@ impl> Scanner { // generate BLOCK-SEQUENCE-START if indented self.roll_indent(mark.col, None, TokenType::BlockSequenceStart, mark); - let found_tabs = self.skip_ws_to_eol(SkipTabs::Yes).found_tabs(); + let found_tabs = self.skip_ws_to_eol(SkipTabs::Yes)?.found_tabs(); self.lookahead(2); if found_tabs && self.buffer[0] == '-' && is_blankz(self.buffer[1]) { return Err(ScanError::new( @@ -1457,7 +1457,7 @@ impl> Scanner { )); } - self.skip_ws_to_eol(SkipTabs::No); + self.skip_ws_to_eol(SkipTabs::No)?; if is_break(self.look_ch()) || is_flow(self.ch()) { self.roll_one_col_indent(); } @@ -1550,7 +1550,7 @@ impl> Scanner { } } - self.skip_ws_to_eol(SkipTabs::Yes); + self.skip_ws_to_eol(SkipTabs::Yes)?; // Check if we are at the end of the line. if !is_breakz(self.ch()) { @@ -1828,7 +1828,7 @@ impl> Scanner { // Eat the right quote. self.skip(); // Ensure there is no invalid trailing content. - self.skip_ws_to_eol(SkipTabs::Yes); + self.skip_ws_to_eol(SkipTabs::Yes)?; match self.ch() { // These can be encountered in flow sequences or mappings. ',' | '}' | ']' if self.flow_level > 0 => {} @@ -2068,7 +2068,7 @@ impl> Scanner { if leading_blanks && (self.mark.col as isize) < indent && self.ch() == '\t' { // If our line contains only whitespace, this is not an error. // Skip over it. - self.skip_ws_to_eol(SkipTabs::Yes); + self.skip_ws_to_eol(SkipTabs::Yes)?; if is_breakz(self.ch()) { continue; } @@ -2161,7 +2161,7 @@ impl> Scanner { // Skip over ':'. self.skip(); if self.look_ch() == '\t' - && !self.skip_ws_to_eol(SkipTabs::Yes).has_valid_yaml_ws() + && !self.skip_ws_to_eol(SkipTabs::Yes)?.has_valid_yaml_ws() && (self.ch() == '-' || is_alpha(self.ch())) { return Err(ScanError::new( diff --git a/saphyr/tests/yaml-test-suite.rs b/saphyr/tests/yaml-test-suite.rs index 63ad2e4..a64a90c 100644 --- a/saphyr/tests/yaml-test-suite.rs +++ b/saphyr/tests/yaml-test-suite.rs @@ -297,12 +297,6 @@ fn expected_events(expected_tree: &str) -> Vec { #[rustfmt::skip] static EXPECTED_FAILURES: &[&str] = &[ - // Comments on nonempty lines need leading space - "9JBA", - "CVW2", - "MUS6-00", - "SU5Z", - "X4QW", // Directives (various) "9HCY", // Directive after content "EB22", // Directive after content From 7b744d092e874cd908af9bc14216c639efdb7526 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Fri, 19 Jan 2024 18:33:09 +0100 Subject: [PATCH 253/380] Fixes towards implicit document end. --- saphyr/src/parser.rs | 11 +++++++++++ saphyr/tests/yaml-test-suite.rs | 4 ---- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs index 658ffd5..b0b3188 100644 --- a/saphyr/src/parser.rs +++ b/saphyr/src/parser.rs @@ -271,9 +271,11 @@ impl> Parser { self.token = None; //self.peek_token(); } + /// Pops the top-most state and make it the current state. fn pop_state(&mut self) { self.state = self.states.pop().unwrap(); } + /// Push a new state atop the state stack. fn push_state(&mut self, state: State) { self.states.push(state); } @@ -569,8 +571,17 @@ impl> Parser { if explicit_end { self.state = State::ImplicitDocumentStart; } else { + if let Token(mark, TokenType::VersionDirective(..) | TokenType::TagDirective(..)) = + *self.peek_token()? + { + return Err(ScanError::new( + mark, + "missing explicit document end marker before directive", + )); + } self.state = State::DocumentStart; } + Ok((Event::DocumentEnd, marker)) } diff --git a/saphyr/tests/yaml-test-suite.rs b/saphyr/tests/yaml-test-suite.rs index a64a90c..7c5ace6 100644 --- a/saphyr/tests/yaml-test-suite.rs +++ b/saphyr/tests/yaml-test-suite.rs @@ -298,10 +298,6 @@ fn expected_events(expected_tree: &str) -> Vec { #[rustfmt::skip] static EXPECTED_FAILURES: &[&str] = &[ // Directives (various) - "9HCY", // Directive after content - "EB22", // Directive after content - "MUS6-01", // no document end marker? - "RHX7", // no document end marker "SF5V", // duplicate directive "W4TN", // scalar confused as directive // Losing trailing newline From ff28b55d223ba4cd574caa95c895a57e1ef9e232 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Fri, 19 Jan 2024 18:37:38 +0100 Subject: [PATCH 254/380] Fix duplicate version directive. --- saphyr/src/parser.rs | 7 ++++++- saphyr/tests/yaml-test-suite.rs | 1 - 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs index b0b3188..d8c1009 100644 --- a/saphyr/src/parser.rs +++ b/saphyr/src/parser.rs @@ -498,15 +498,20 @@ impl> Parser { } fn parser_process_directives(&mut self) -> Result<(), ScanError> { + let mut version_directive_received = false; loop { let mut tags = HashMap::new(); match self.peek_token()? { - Token(_, TokenType::VersionDirective(_, _)) => { + Token(mark, TokenType::VersionDirective(_, _)) => { // XXX parsing with warning according to spec //if major != 1 || minor > 2 { // return Err(ScanError::new(tok.0, // "found incompatible YAML document")); //} + if version_directive_received == true { + return Err(ScanError::new(*mark, "duplicate version directive")); + } + version_directive_received = true; } Token(mark, TokenType::TagDirective(handle, prefix)) => { if tags.contains_key(handle) { diff --git a/saphyr/tests/yaml-test-suite.rs b/saphyr/tests/yaml-test-suite.rs index 7c5ace6..1e88b65 100644 --- a/saphyr/tests/yaml-test-suite.rs +++ b/saphyr/tests/yaml-test-suite.rs @@ -298,7 +298,6 @@ fn expected_events(expected_tree: &str) -> Vec { #[rustfmt::skip] static EXPECTED_FAILURES: &[&str] = &[ // Directives (various) - "SF5V", // duplicate directive "W4TN", // scalar confused as directive // Losing trailing newline "JEF9-02", From c0caeb84e93b8e69f90a7603dc8800ed58f6a889 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Fri, 19 Jan 2024 19:33:26 +0100 Subject: [PATCH 255/380] Fix block scalars and document end interaction. --- saphyr/src/scanner.rs | 16 ++++++++++++++++ saphyr/tests/yaml-test-suite.rs | 4 +--- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index c0a7cef..18a681f 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -612,6 +612,16 @@ impl> Scanner { } } + /// Check whether the next characters correspond to an end of document. + /// + /// [`Self::lookahead`] must have been called before calling this function. + fn next_is_document_end(&self) -> bool { + self.buffer[0] == '.' + && self.buffer[1] == '.' + && self.buffer[2] == '.' + && is_blankz(self.buffer[3]) + } + /// Insert a token at the given position. fn insert_token(&mut self, pos: usize, tok: Token) { let old_len = self.tokens.len(); @@ -1592,6 +1602,12 @@ impl> Scanner { let start_mark = self.mark; while self.mark.col == indent && !is_z(self.ch()) { + if indent == 0 { + self.lookahead(4); + if self.next_is_document_end() { + break; + } + } // We are at the beginning of a non-empty line. trailing_blank = is_blank(self.ch()); if !literal && !leading_break.is_empty() && !leading_blank && !trailing_blank { diff --git a/saphyr/tests/yaml-test-suite.rs b/saphyr/tests/yaml-test-suite.rs index 1e88b65..45cd15d 100644 --- a/saphyr/tests/yaml-test-suite.rs +++ b/saphyr/tests/yaml-test-suite.rs @@ -220,7 +220,7 @@ fn events_differ(actual: Vec, expected: &str) -> Option { continue; } else { Some(format!( - "line {} differs: expected `{}`, found `{}`", + "line {} differs: \n=> expected `{}`\n=> found `{}`", idx, exp, act )) } @@ -297,8 +297,6 @@ fn expected_events(expected_tree: &str) -> Vec { #[rustfmt::skip] static EXPECTED_FAILURES: &[&str] = &[ - // Directives (various) - "W4TN", // scalar confused as directive // Losing trailing newline "JEF9-02", "L24T-01", From 628549085cca81e0e9e934e57f2af318219a5666 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Fri, 19 Jan 2024 20:21:36 +0100 Subject: [PATCH 256/380] Fix block scalar / eof interactions. --- saphyr/src/scanner.rs | 80 ++++++++++++++++++++++++--------- saphyr/tests/yaml-test-suite.rs | 5 +-- 2 files changed, 59 insertions(+), 26 deletions(-) diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index 18a681f..6b3572f 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -18,7 +18,7 @@ pub enum TScalarStyle { DoubleQuoted, Literal, - Foled, + Folded, } /// A location in a yaml document. @@ -1508,15 +1508,21 @@ impl> Scanner { #[allow(clippy::too_many_lines)] fn scan_block_scalar(&mut self, literal: bool) -> Result { let start_mark = self.mark; - let mut chomping: i32 = 0; + let mut chomping = Chomping::Clip; let mut increment: usize = 0; let mut indent: usize = 0; let mut trailing_blank: bool; let mut leading_blank: bool = false; + let style = if literal { + TScalarStyle::Literal + } else { + TScalarStyle::Folded + }; let mut string = String::new(); let mut leading_break = String::new(); let mut trailing_breaks = String::new(); + let mut chomping_break = String::new(); // skip '|' or '>' self.skip(); @@ -1524,9 +1530,9 @@ impl> Scanner { if self.look_ch() == '+' || self.ch() == '-' { if self.ch() == '+' { - chomping = 1; + chomping = Chomping::Keep; } else { - chomping = -1; + chomping = Chomping::Strip; } self.skip(); if is_digit(self.look_ch()) { @@ -1552,9 +1558,9 @@ impl> Scanner { self.lookahead(1); if self.ch() == '+' || self.ch() == '-' { if self.ch() == '+' { - chomping = 1; + chomping = Chomping::Keep; } else { - chomping = -1; + chomping = Chomping::Strip; } self.skip(); } @@ -1563,7 +1569,7 @@ impl> Scanner { self.skip_ws_to_eol(SkipTabs::Yes)?; // Check if we are at the end of the line. - if !is_breakz(self.ch()) { + if !is_breakz(self.look_ch()) { return Err(ScanError::new( start_mark, "while scanning a block scalar, did not find expected comment or line break", @@ -1572,7 +1578,7 @@ impl> Scanner { if is_break(self.ch()) { self.lookahead(2); - self.skip_line(); + self.read_break(&mut chomping_break); } if self.look_ch() == '\t' { @@ -1597,7 +1603,27 @@ impl> Scanner { self.skip_block_scalar_indent(indent, &mut trailing_breaks); } - self.lookahead(1); + // We have an end-of-stream with no content, e.g.: + // ```yaml + // - |+ + // ``` + if is_z(self.ch()) { + let contents = match chomping { + // We strip trailing linebreaks. Nothing remain. + Chomping::Strip => String::new(), + // There was no newline after the chomping indicator. + _ if self.mark.line == start_mark.line() => String::new(), + // We clip lines, and there was a newline after the chomping indicator. + // All other breaks are ignored. + Chomping::Clip => chomping_break, + // We keep lines. There was a newline after the chomping indicator but nothing + // else. + Chomping::Keep if trailing_breaks.is_empty() => chomping_break, + // Otherwise, the newline after chomping is ignored. + Chomping::Keep => trailing_breaks, + }; + return Ok(Token(start_mark, TokenType::Scalar(style, contents))); + } let start_mark = self.mark; @@ -1608,6 +1634,7 @@ impl> Scanner { break; } } + // We are at the beginning of a non-empty line. trailing_blank = is_blank(self.ch()); if !literal && !leading_break.is_empty() && !leading_blank && !trailing_blank { @@ -1643,25 +1670,21 @@ impl> Scanner { } // Chomp the tail. - if chomping != -1 { + if chomping != Chomping::Strip { string.push_str(&leading_break); + // If we had reached an eof but the last character wasn't an end-of-line, check if the + // last line was indented at least as the rest of the scalar, then we need to consider + // there is a newline. + if is_z(self.ch()) && self.mark.col >= indent.max(1) { + string.push('\n'); + } } - if chomping == 1 { + if chomping == Chomping::Keep { string.push_str(&trailing_breaks); } - if literal { - Ok(Token( - start_mark, - TokenType::Scalar(TScalarStyle::Literal, string), - )) - } else { - Ok(Token( - start_mark, - TokenType::Scalar(TScalarStyle::Foled, string), - )) - } + Ok(Token(start_mark, TokenType::Scalar(style, string))) } /// Skip the block scalar indentation and empty lines. @@ -2405,6 +2428,19 @@ impl SkipTabs { } } +/// Chomping, how final line breaks and trailing empty lines are interpreted. +/// +/// See YAML spec 8.1.1.2. +#[derive(PartialEq, Eq)] +pub enum Chomping { + /// The final line break and any trailing empty lines are excluded. + Strip, + /// The final line break is preserved, but trailing empty lines are excluded. + Clip, + /// The final line break and trailing empty lines are included. + Keep, +} + #[cfg(test)] mod test { #[test] diff --git a/saphyr/tests/yaml-test-suite.rs b/saphyr/tests/yaml-test-suite.rs index 45cd15d..f807ed7 100644 --- a/saphyr/tests/yaml-test-suite.rs +++ b/saphyr/tests/yaml-test-suite.rs @@ -161,7 +161,7 @@ impl EventReceiver for EventReporter { TScalarStyle::SingleQuoted => "'", TScalarStyle::DoubleQuoted => r#"""#, TScalarStyle::Literal => "|", - TScalarStyle::Foled => ">", + TScalarStyle::Folded => ">", TScalarStyle::Any => unreachable!(), }; format!( @@ -297,9 +297,6 @@ fn expected_events(expected_tree: &str) -> Vec { #[rustfmt::skip] static EXPECTED_FAILURES: &[&str] = &[ - // Losing trailing newline - "JEF9-02", - "L24T-01", // Dashes in flow sequence (should be forbidden) "G5U8", "YJV2", From 5dbd6f9a5589c47f83a8ec3a125ef9f2930806c4 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Fri, 19 Jan 2024 21:57:39 +0100 Subject: [PATCH 257/380] Fix use of dashes in flow contexts. --- saphyr/src/scanner.rs | 12 ++++++++++-- saphyr/tests/yaml-test-suite.rs | 3 --- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index 6b3572f..528fd8e 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -2030,6 +2030,7 @@ impl> Scanner { Ok(()) } + #[allow(clippy::too_many_lines)] fn scan_plain_scalar(&mut self) -> Result { self.unroll_non_block_indents(); let indent = self.indent + 1; @@ -2044,7 +2045,6 @@ impl> Scanner { loop { /* Check for a document indicator. */ self.lookahead(4); - if self.mark.col == 0 && (((self.buffer[0] == '-') && (self.buffer[1] == '-') && (self.buffer[2] == '-')) || ((self.buffer[0] == '.') @@ -2058,6 +2058,14 @@ impl> Scanner { if self.ch() == '#' { break; } + + if self.flow_level > 0 && self.ch() == '-' && is_flow(self.buffer[1]) { + return Err(ScanError::new( + self.mark, + "plain scalar cannot start with '-' followed by ,[]{}", + )); + } + while !is_blankz(self.ch()) { // indicators can end a plain scalar, see 7.3.3. Plain Style match self.ch() { @@ -2066,7 +2074,7 @@ impl> Scanner { { break; } - ',' | '[' | ']' | '{' | '}' if self.flow_level > 0 => break, + c if is_flow(c) && self.flow_level > 0 => break, _ => {} } diff --git a/saphyr/tests/yaml-test-suite.rs b/saphyr/tests/yaml-test-suite.rs index f807ed7..3637689 100644 --- a/saphyr/tests/yaml-test-suite.rs +++ b/saphyr/tests/yaml-test-suite.rs @@ -297,9 +297,6 @@ fn expected_events(expected_tree: &str) -> Vec { #[rustfmt::skip] static EXPECTED_FAILURES: &[&str] = &[ - // Dashes in flow sequence (should be forbidden) - "G5U8", - "YJV2", // Misc "9MMW", // Mapping key in implicit mapping in flow sequence(!) "G9HC", // Anchor indent problem(?) From 7c3e8ed7c4ac34c9f7f28165cf2df09041f49880 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Fri, 19 Jan 2024 22:15:41 +0100 Subject: [PATCH 258/380] Lint and improve formatting. --- saphyr/tests/yaml-test-suite.rs | 67 +++++++++++++++++++++------------ 1 file changed, 42 insertions(+), 25 deletions(-) diff --git a/saphyr/tests/yaml-test-suite.rs b/saphyr/tests/yaml-test-suite.rs index 3637689..0933a59 100644 --- a/saphyr/tests/yaml-test-suite.rs +++ b/saphyr/tests/yaml-test-suite.rs @@ -37,12 +37,11 @@ fn main() -> Result<()> { .iter() .filter(|&&test| !tests.iter().any(|t| t.name == test)) .collect(); - if !missing_xfails.is_empty() { - panic!( - "The following EXPECTED_FAILURES not found during discovery: {:?}", - missing_xfails - ); - } + assert!( + missing_xfails.is_empty(), + "The following EXPECTED_FAILURES not found during discovery: {:?}", + missing_xfails + ); run_tests(&arguments, tests, run_yaml_test).exit(); } @@ -50,21 +49,40 @@ fn main() -> Result<()> { fn run_yaml_test(test: &Test) -> Outcome { let desc = &test.data; let actual_events = parse_to_events(&desc.yaml); - let events_diff = actual_events.map(|events| events_differ(events, &desc.expected_events)); - let mut error_text = match (events_diff, desc.expected_error) { + let events_diff = actual_events.map(|events| events_differ(&events, &desc.expected_events)); + let mut error_text = match (&events_diff, desc.expected_error) { (Ok(x), true) => Some(format!("no error when expected: {x:#?}")), - (Err(_), true) => None, - (Err(e), false) => Some(format!("unexpected error {:?}", e)), - (Ok(Some(diff)), false) => Some(format!("events differ: {}", diff)), - (Ok(None), false) => None, + (Err(_), true) | (Ok(None), false) => None, + (Err(e), false) => Some(format!("unexpected error {e:?}")), + (Ok(Some(diff)), false) => Some(format!("events differ: {diff}")), }; + + // Show a caret on error. if let Some(text) = &mut error_text { use std::fmt::Write; - let _ = write!(text, "\n### Input:\n{}\n### End", desc.yaml_visual); + let _ = writeln!(text, "\n### Input:\n{}\n### End", desc.yaml_visual); + if let Err(err) = &events_diff { + writeln!(text, "### Error position").unwrap(); + let mut lines = desc.yaml.lines(); + for _ in 0..(err.marker().line() - 1) { + let l = lines.next().unwrap(); + writeln!(text, "{l}").unwrap(); + } + writeln!(text, "\x1B[91;1m{}", lines.next().unwrap()).unwrap(); + for _ in 0..err.marker().col() { + write!(text, " ").unwrap(); + } + writeln!(text, "^\x1b[m").unwrap(); + for l in lines { + writeln!(text, "{l}").unwrap(); + } + writeln!(text, "### End error position").unwrap(); + } } + match (error_text, desc.is_xfail) { (None, false) => Outcome::Passed, - (Some(text), false) => Outcome::Failed { msg: Some(text) }, + (Some(txt), false) => Outcome::Failed { msg: Some(txt) }, (Some(_), true) => Outcome::Ignored, (None, true) => Outcome::Failed { msg: Some("expected to fail but passes".into()), @@ -84,7 +102,7 @@ fn load_tests_from_file(entry: &DirEntry) -> Result>> { let mut current_test = yaml::Hash::new(); for (idx, test_data) in tests.iter().enumerate() { let name = if tests.len() > 1 { - format!("{}-{:02}", test_name, idx) + format!("{test_name}-{idx:02}") } else { test_name.to_string() }; @@ -172,7 +190,7 @@ impl EventReceiver for EventReporter { escape_text(text) ) } - Event::Alias(idx) => format!("=ALI *{}", idx), + Event::Alias(idx) => format!("=ALI *{idx}"), Event::Nothing => return, }; self.events.push(line); @@ -181,16 +199,16 @@ impl EventReceiver for EventReporter { fn format_index(idx: usize) -> String { if idx > 0 { - format!(" &{}", idx) + format!(" &{idx}") } else { - "".into() + String::new() } } fn escape_text(text: &str) -> String { let mut text = text.to_owned(); for (ch, replacement) in [ - ('\\', r#"\\"#), + ('\\', r"\\"), ('\n', "\\n"), ('\r', "\\r"), ('\x08', "\\b"), @@ -205,11 +223,11 @@ fn format_tag(tag: &Option) -> String { if let Some(tag) = tag { format!(" <{}{}>", tag.handle, tag.suffix) } else { - "".into() + String::new() } } -fn events_differ(actual: Vec, expected: &str) -> Option { +fn events_differ(actual: &[String], expected: &str) -> Option { let actual = actual.iter().map(Some).chain(std::iter::repeat(None)); let expected = expected_events(expected); let expected = expected.iter().map(Some).chain(std::iter::repeat(None)); @@ -220,13 +238,12 @@ fn events_differ(actual: Vec, expected: &str) -> Option { continue; } else { Some(format!( - "line {} differs: \n=> expected `{}`\n=> found `{}`", - idx, exp, act + "line {idx} differs: \n=> expected `{exp}`\n=> found `{act}`", )) } } - (Some(a), None) => Some(format!("extra actual line: {:?}", a)), - (None, Some(e)) => Some(format!("extra expected line: {:?}", e)), + (Some(a), None) => Some(format!("extra actual line: {a:?}")), + (None, Some(e)) => Some(format!("extra expected line: {e:?}")), (None, None) => None, }; } From 9281e35bbbc89d0d6126d17cba4b91a2095eb59d Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Fri, 19 Jan 2024 22:38:06 +0100 Subject: [PATCH 259/380] Fix flow adjacent value with complex key. --- saphyr/src/scanner.rs | 11 +++++++++++ saphyr/tests/yaml-test-suite.rs | 1 - 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index 528fd8e..4a49666 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -311,6 +311,8 @@ pub struct Scanner { stream_start_produced: bool, /// Whether we have already emitted the `StreamEnd` token. stream_end_produced: bool, + /// In some flow contexts, the value of a mapping is allowed to be adjacent to the `:`. When it + /// is, the index at which the `:` may be must be stored in `adjacent_value_allowed_at`. adjacent_value_allowed_at: usize, /// Whether a simple key could potentially start at the current position. /// @@ -1395,6 +1397,15 @@ impl> Scanner { self.skip(); self.skip_ws_to_eol(SkipTabs::Yes)?; + // A flow collection within a flow mapping can be a key. In that case, the value may be + // adjacent to the `:`. + // ```yaml + // - [ {a: b}:value ] + // ``` + if self.flow_level > 0 { + self.adjacent_value_allowed_at = self.mark.index; + } + self.tokens.push_back(Token(start_mark, tok)); Ok(()) } diff --git a/saphyr/tests/yaml-test-suite.rs b/saphyr/tests/yaml-test-suite.rs index 0933a59..dbd00db 100644 --- a/saphyr/tests/yaml-test-suite.rs +++ b/saphyr/tests/yaml-test-suite.rs @@ -315,7 +315,6 @@ fn expected_events(expected_tree: &str) -> Vec { #[rustfmt::skip] static EXPECTED_FAILURES: &[&str] = &[ // Misc - "9MMW", // Mapping key in implicit mapping in flow sequence(!) "G9HC", // Anchor indent problem(?) "H7J7", // Anchor indent / linebreak problem? "3UYS", // Escaped / From 7145ee828234b3d46e11c97b031a0d808d95add3 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Mon, 22 Jan 2024 21:33:18 +0100 Subject: [PATCH 260/380] Fix tests related to anchor/alias indentation. I have no idea what I'm doing. --- saphyr/src/scanner.rs | 15 +++++++++++++-- saphyr/tests/yaml-test-suite.rs | 2 -- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index 4a49666..b9d0f16 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -1463,6 +1463,13 @@ impl> Scanner { )); } + // ???, fixes test G9HC. + if let Some(Token(mark, TokenType::Anchor(..) | TokenType::Tag(..))) = self.tokens.back() { + if self.mark.col == 0 && mark.col == 0 && self.indent > -1 { + return Err(ScanError::new(*mark, "invalid indentation for anchor")); + } + } + // Skip over the `-`. let mark = self.mark; self.skip(); @@ -2047,6 +2054,10 @@ impl> Scanner { let indent = self.indent + 1; let start_mark = self.mark; + if self.flow_level > 0 && (start_mark.col as isize) < indent { + return Err(ScanError::new(start_mark, "foo")); + } + let mut string = String::new(); let mut leading_break = String::new(); let mut trailing_breaks = String::new(); @@ -2371,10 +2382,10 @@ impl> Scanner { } } - /// Save the last token in [`Self::tokens`] as a simple key. + /// Mark the next token to be inserted as a potential simple key. fn save_simple_key(&mut self) { if self.simple_key_allowed { - let required = self.flow_level > 0 + let required = self.flow_level == 0 && self.indent == (self.mark.col as isize) && self.indents.last().unwrap().needs_block_end; let mut sk = SimpleKey::new(self.mark); diff --git a/saphyr/tests/yaml-test-suite.rs b/saphyr/tests/yaml-test-suite.rs index dbd00db..a1e421d 100644 --- a/saphyr/tests/yaml-test-suite.rs +++ b/saphyr/tests/yaml-test-suite.rs @@ -315,8 +315,6 @@ fn expected_events(expected_tree: &str) -> Vec { #[rustfmt::skip] static EXPECTED_FAILURES: &[&str] = &[ // Misc - "G9HC", // Anchor indent problem(?) - "H7J7", // Anchor indent / linebreak problem? "3UYS", // Escaped / "HRE5", // Escaped ' in double-quoted (should not work) "QB6E", // Indent for multiline double-quoted scalar From 2e94a4217ba17e95a5547ec2157a65e89af83ce7 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Mon, 22 Jan 2024 22:56:18 +0100 Subject: [PATCH 261/380] Fix dquote string escape sequences. --- saphyr/src/scanner.rs | 2 +- saphyr/tests/yaml-test-suite.rs | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index b9d0f16..57a4295 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -1986,7 +1986,7 @@ impl> Scanner { 'e' => ret = '\x1b', ' ' => ret = '\x20', '"' => ret = '"', - '\'' => ret = '\'', + '/' => ret = '/', '\\' => ret = '\\', // Unicode next line (#x85) 'N' => ret = char::from_u32(0x85).unwrap(), diff --git a/saphyr/tests/yaml-test-suite.rs b/saphyr/tests/yaml-test-suite.rs index a1e421d..e4470ac 100644 --- a/saphyr/tests/yaml-test-suite.rs +++ b/saphyr/tests/yaml-test-suite.rs @@ -315,8 +315,6 @@ fn expected_events(expected_tree: &str) -> Vec { #[rustfmt::skip] static EXPECTED_FAILURES: &[&str] = &[ // Misc - "3UYS", // Escaped / - "HRE5", // Escaped ' in double-quoted (should not work) "QB6E", // Indent for multiline double-quoted scalar "S98Z", // Block scalar and indent problems? "U99R", // Comma is not allowed in tags From 9f4b1480dfbff7fa0d9858d1bbe704aec2284347 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Mon, 22 Jan 2024 23:03:02 +0100 Subject: [PATCH 262/380] Fix dquote indentation. --- saphyr/src/scanner.rs | 9 ++++++++- saphyr/tests/yaml-test-suite.rs | 1 - 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index 57a4295..ef4986a 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -1816,6 +1816,10 @@ impl> Scanner { )); } + if (self.mark.col as isize) < self.indent { + return Err(ScanError::new(start_mark, "invalid identation in quoted scalar")); + } + leading_blanks = false; self.consume_flow_scalar_non_whitespace_chars( single, @@ -2055,7 +2059,10 @@ impl> Scanner { let start_mark = self.mark; if self.flow_level > 0 && (start_mark.col as isize) < indent { - return Err(ScanError::new(start_mark, "foo")); + return Err(ScanError::new( + start_mark, + "invalid indentation in flow construct", + )); } let mut string = String::new(); diff --git a/saphyr/tests/yaml-test-suite.rs b/saphyr/tests/yaml-test-suite.rs index e4470ac..fee25c3 100644 --- a/saphyr/tests/yaml-test-suite.rs +++ b/saphyr/tests/yaml-test-suite.rs @@ -315,7 +315,6 @@ fn expected_events(expected_tree: &str) -> Vec { #[rustfmt::skip] static EXPECTED_FAILURES: &[&str] = &[ // Misc - "QB6E", // Indent for multiline double-quoted scalar "S98Z", // Block scalar and indent problems? "U99R", // Comma is not allowed in tags "WZ62", // Empty content From 76b3773ffdf2be00570913c4b025b5d328f52951 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Mon, 22 Jan 2024 23:09:20 +0100 Subject: [PATCH 263/380] Fix indent in block scalars. --- saphyr/src/scanner.rs | 13 +++++++++++-- saphyr/tests/yaml-test-suite.rs | 1 - 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index ef4986a..3f1b199 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -1643,8 +1643,14 @@ impl> Scanner { return Ok(Token(start_mark, TokenType::Scalar(style, contents))); } - let start_mark = self.mark; + if self.mark.col < indent && (self.mark.col as isize) > self.indent { + return Err(ScanError::new( + self.mark, + "wrongly indented line in block scalar", + )); + } + let start_mark = self.mark; while self.mark.col == indent && !is_z(self.ch()) { if indent == 0 { self.lookahead(4); @@ -1817,7 +1823,10 @@ impl> Scanner { } if (self.mark.col as isize) < self.indent { - return Err(ScanError::new(start_mark, "invalid identation in quoted scalar")); + return Err(ScanError::new( + start_mark, + "invalid identation in quoted scalar", + )); } leading_blanks = false; diff --git a/saphyr/tests/yaml-test-suite.rs b/saphyr/tests/yaml-test-suite.rs index fee25c3..0d35790 100644 --- a/saphyr/tests/yaml-test-suite.rs +++ b/saphyr/tests/yaml-test-suite.rs @@ -315,7 +315,6 @@ fn expected_events(expected_tree: &str) -> Vec { #[rustfmt::skip] static EXPECTED_FAILURES: &[&str] = &[ // Misc - "S98Z", // Block scalar and indent problems? "U99R", // Comma is not allowed in tags "WZ62", // Empty content ]; From 7f7919748aaab07eb76e9619f86342bad4b2e5f2 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Tue, 23 Jan 2024 00:04:46 +0100 Subject: [PATCH 264/380] Fix tag scanning. --- saphyr/src/scanner.rs | 124 +++++++++++++++++++++++--------- saphyr/tests/yaml-test-suite.rs | 1 - 2 files changed, 89 insertions(+), 36 deletions(-) diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index 3f1b199..6379234 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -465,6 +465,24 @@ fn is_anchor_char(c: char) -> bool { is_yaml_non_space(c) && !is_flow(c) && !is_z(c) } +/// Check whether the character is a valid word character. +#[inline] +fn is_word_char(c: char) -> bool { + is_alpha(c) && c != '_' +} + +/// Check whether the character is a valid URI character. +#[inline] +fn is_uri_char(c: char) -> bool { + is_word_char(c) || "#;/?:@&=+$,_.!~*\'()[]%".contains(c) +} + +/// Check whether the character is a valid tag character. +#[inline] +fn is_tag_char(c: char) -> bool { + is_uri_char(c) && !is_flow(c) && c != '!' +} + pub type ScanResult = Result<(), ScanError>; impl> Scanner { @@ -1116,8 +1134,7 @@ impl> Scanner { self.skip(); } - let is_secondary = handle == "!!"; - let prefix = self.scan_tag_uri(true, is_secondary, "", mark)?; + let prefix = self.scan_tag_prefix(mark)?; self.lookahead(1); @@ -1149,19 +1166,7 @@ impl> Scanner { self.lookahead(2); if self.buffer[1] == '<' { - // Eat '!<' - self.skip(); - self.skip(); - suffix = self.scan_tag_uri(false, false, "", &start_mark)?; - - if self.ch() != '>' { - return Err(ScanError::new( - start_mark, - "while scanning a tag, did not find the expected '>'", - )); - } - - self.skip(); + suffix = self.scan_verbatim_tag(&start_mark)?; } else { // The tag has either the '!suffix' or the '!handle!suffix' handle = self.scan_tag_handle(false, &start_mark)?; @@ -1169,9 +1174,10 @@ impl> Scanner { if handle.len() >= 2 && handle.starts_with('!') && handle.ends_with('!') { // A tag handle starting with "!!" is a secondary tag handle. let is_secondary_handle = handle == "!!"; - suffix = self.scan_tag_uri(false, is_secondary_handle, "", &start_mark)?; + suffix = + self.scan_tag_shorthand_suffix(false, is_secondary_handle, "", &start_mark)?; } else { - suffix = self.scan_tag_uri(false, false, &handle, &start_mark)?; + suffix = self.scan_tag_shorthand_suffix(false, false, &handle, &start_mark)?; handle = "!".to_owned(); // A special case: the '!' tag. Set the handle to '' and the // suffix to '!'. @@ -1223,9 +1229,70 @@ impl> Scanner { Ok(string) } - fn scan_tag_uri( + /// Scan for a tag prefix (6.8.2.2). + /// + /// There are 2 kinds of tag prefixes: + /// - Local: Starts with a `!`, contains only URI chars (`!foo`) + /// - Global: Starts with a tag char, contains then URI chars (`!foo,2000:app/`) + fn scan_tag_prefix(&mut self, start_mark: &Marker) -> Result { + let mut string = String::new(); + + if self.look_ch() == '!' { + // If we have a local tag, insert and skip `!`. + string.push(self.ch_skip()); + } else if !is_tag_char(self.ch()) { + // Otherwise, check if the first global tag character is valid. + return Err(ScanError::new(*start_mark, "invalid global tag character")); + } else if self.ch() == '%' { + // If it is valid and an escape sequence, escape it. + string.push(self.scan_uri_escapes(start_mark)?); + } else { + // Otherwise, push the first character. + string.push(self.ch_skip()); + } + + while is_uri_char(self.look_ch()) { + if self.ch() == '%' { + string.push(self.scan_uri_escapes(start_mark)?); + } else { + string.push(self.ch_skip()); + } + } + + Ok(string) + } + + /// Scan for a verbatim tag. + /// + /// The prefixing `!<` must _not_ have been skipped. + fn scan_verbatim_tag(&mut self, start_mark: &Marker) -> Result { + // Eat `!<` + self.skip(); + self.skip(); + + let mut string = String::new(); + while is_uri_char(self.look_ch()) { + if self.ch() == '%' { + string.push(self.scan_uri_escapes(start_mark)?); + } else { + string.push(self.ch_skip()); + } + } + + if self.ch() != '>' { + return Err(ScanError::new( + *start_mark, + "while scanning a verbatim tag, did not find the expected '>'", + )); + } + self.skip(); + + Ok(string) + } + + fn scan_tag_shorthand_suffix( &mut self, - directive: bool, + _directive: bool, _is_secondary: bool, head: &str, mark: &Marker, @@ -1239,23 +1306,10 @@ impl> Scanner { string.extend(head.chars().skip(1)); } - /* - * The set of characters that may appear in URI is as follows: - * - * '0'-'9', 'A'-'Z', 'a'-'z', '_', '-', ';', '/', '?', ':', '@', '&', - * '=', '+', '$', ',', '.', '!', '~', '*', '\'', '(', ')', '[', ']', - * '%'. - */ - while match self.look_ch() { - ';' | '/' | '?' | ':' | '@' | '&' => true, - '=' | '+' | '$' | ',' | '.' | '!' | '~' | '*' | '\'' | '(' | ')' | '[' | ']' => true, - '%' => true, - c if is_alpha(c) => true, - _ => false, - } { + while is_tag_char(self.look_ch()) { // Check if it is a URI-escape sequence. if self.ch() == '%' { - string.push(self.scan_uri_escapes(directive, mark)?); + string.push(self.scan_uri_escapes(mark)?); } else { string.push(self.ch()); self.skip(); @@ -1274,7 +1328,7 @@ impl> Scanner { Ok(string) } - fn scan_uri_escapes(&mut self, _directive: bool, mark: &Marker) -> Result { + fn scan_uri_escapes(&mut self, mark: &Marker) -> Result { let mut width = 0usize; let mut code = 0u32; loop { diff --git a/saphyr/tests/yaml-test-suite.rs b/saphyr/tests/yaml-test-suite.rs index 0d35790..d6b3f3f 100644 --- a/saphyr/tests/yaml-test-suite.rs +++ b/saphyr/tests/yaml-test-suite.rs @@ -315,6 +315,5 @@ fn expected_events(expected_tree: &str) -> Vec { #[rustfmt::skip] static EXPECTED_FAILURES: &[&str] = &[ // Misc - "U99R", // Comma is not allowed in tags "WZ62", // Empty content ]; From 68c7afcb4c946100d4db82c864202cf191f1998a Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Tue, 23 Jan 2024 00:08:05 +0100 Subject: [PATCH 265/380] Fix null nodes with tags in flow constructs. --- saphyr/src/scanner.rs | 2 +- saphyr/tests/yaml-test-suite.rs | 29 +++-------------------------- 2 files changed, 4 insertions(+), 27 deletions(-) diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index 6379234..ebbe35a 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -1188,7 +1188,7 @@ impl> Scanner { } } - if is_blankz(self.look_ch()) { + if is_blankz(self.look_ch()) || (self.flow_level > 0 && is_flow(self.ch())) { // XXX: ex 7.2, an empty scalar can follow a secondary tag Ok(Token(start_mark, TokenType::Tag(handle, suffix))) } else { diff --git a/saphyr/tests/yaml-test-suite.rs b/saphyr/tests/yaml-test-suite.rs index d6b3f3f..a39957a 100644 --- a/saphyr/tests/yaml-test-suite.rs +++ b/saphyr/tests/yaml-test-suite.rs @@ -15,7 +15,6 @@ struct YamlTest { yaml: String, expected_events: String, expected_error: bool, - is_xfail: bool, } fn main() -> Result<()> { @@ -33,16 +32,6 @@ fn main() -> Result<()> { let mut tests: Vec<_> = tests.into_iter().flatten().collect(); tests.sort_by_key(|t| t.name.clone()); - let missing_xfails: Vec<_> = EXPECTED_FAILURES - .iter() - .filter(|&&test| !tests.iter().any(|t| t.name == test)) - .collect(); - assert!( - missing_xfails.is_empty(), - "The following EXPECTED_FAILURES not found during discovery: {:?}", - missing_xfails - ); - run_tests(&arguments, tests, run_yaml_test).exit(); } @@ -80,13 +69,9 @@ fn run_yaml_test(test: &Test) -> Outcome { } } - match (error_text, desc.is_xfail) { - (None, false) => Outcome::Passed, - (Some(txt), false) => Outcome::Failed { msg: Some(txt) }, - (Some(_), true) => Outcome::Ignored, - (None, true) => Outcome::Failed { - msg: Some("expected to fail but passes".into()), - }, + match error_text { + None => Outcome::Passed, + Some(txt) => Outcome::Failed { msg: Some(txt) }, } } @@ -106,7 +91,6 @@ fn load_tests_from_file(entry: &DirEntry) -> Result>> { } else { test_name.to_string() }; - let is_xfail = EXPECTED_FAILURES.contains(&name.as_str()); // Test fields except `fail` are "inherited" let test_data = test_data.as_hash().unwrap(); @@ -131,7 +115,6 @@ fn load_tests_from_file(entry: &DirEntry) -> Result>> { yaml: visual_to_raw(current_test["yaml"].as_str().unwrap()), expected_events: visual_to_raw(current_test["tree"].as_str().unwrap()), expected_error: current_test["fail"].as_bool() == Some(true), - is_xfail, }, }); } @@ -311,9 +294,3 @@ fn expected_events(expected_tree: &str) -> Vec { }) .collect() } - -#[rustfmt::skip] -static EXPECTED_FAILURES: &[&str] = &[ - // Misc - "WZ62", // Empty content -]; From eed3433841d7a7dfe0bb99bea238a9ea5800832b Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Thu, 8 Feb 2024 07:12:14 +0100 Subject: [PATCH 266/380] Update licence, readme, doc. --- .../ChenYuheng-Apache} | 40 ++-- .../{LICENSE-MIT => .licenses/ChenYuheng-MIT} | 0 saphyr/.licenses/Ethiraric-Apache | 191 ++++++++++++++++++ saphyr/.licenses/Ethiraric-MIT | 21 ++ saphyr/Cargo.toml | 16 +- saphyr/LICENSE | 9 + saphyr/README.md | 87 ++++---- saphyr/examples/dump_events.rs | 4 +- saphyr/examples/dump_yaml.rs | 4 +- saphyr/src/lib.rs | 15 +- saphyr/src/parser.rs | 4 +- saphyr/src/yaml.rs | 14 +- saphyr/tests/basic.rs | 2 +- saphyr/tests/emitter.rs | 2 +- saphyr/tests/quickcheck.rs | 4 +- saphyr/tests/scanner.rs | 2 +- saphyr/tests/spec_test.rs | 10 +- saphyr/tests/test_round_trip.rs | 4 +- saphyr/tests/yaml-test-suite.rs | 2 +- 19 files changed, 311 insertions(+), 120 deletions(-) rename saphyr/{LICENSE-APACHE => .licenses/ChenYuheng-Apache} (94%) rename saphyr/{LICENSE-MIT => .licenses/ChenYuheng-MIT} (100%) create mode 100644 saphyr/.licenses/Ethiraric-Apache create mode 100644 saphyr/.licenses/Ethiraric-MIT create mode 100644 saphyr/LICENSE diff --git a/saphyr/LICENSE-APACHE b/saphyr/.licenses/ChenYuheng-Apache similarity index 94% rename from saphyr/LICENSE-APACHE rename to saphyr/.licenses/ChenYuheng-Apache index 16fe87b..7a77d29 100644 --- a/saphyr/LICENSE-APACHE +++ b/saphyr/.licenses/ChenYuheng-Apache @@ -1,3 +1,18 @@ +Copyright (c) 2015 Chen Yuheng + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + + Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ @@ -174,28 +189,3 @@ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS - -APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - -Copyright [yyyy] [name of copyright owner] - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. diff --git a/saphyr/LICENSE-MIT b/saphyr/.licenses/ChenYuheng-MIT similarity index 100% rename from saphyr/LICENSE-MIT rename to saphyr/.licenses/ChenYuheng-MIT diff --git a/saphyr/.licenses/Ethiraric-Apache b/saphyr/.licenses/Ethiraric-Apache new file mode 100644 index 0000000..fd9f614 --- /dev/null +++ b/saphyr/.licenses/Ethiraric-Apache @@ -0,0 +1,191 @@ +Copyright (c) 2023 Ethiraric + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS diff --git a/saphyr/.licenses/Ethiraric-MIT b/saphyr/.licenses/Ethiraric-MIT new file mode 100644 index 0000000..8f63ff0 --- /dev/null +++ b/saphyr/.licenses/Ethiraric-MIT @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2023 Ethiraric + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/saphyr/Cargo.toml b/saphyr/Cargo.toml index bd8aa94..845c80c 100644 --- a/saphyr/Cargo.toml +++ b/saphyr/Cargo.toml @@ -1,12 +1,14 @@ [package] -name = "yaml-rust" -version = "0.4.5" # remember to update html_root_url -authors = ["Yuheng Chen "] -homepage = "http://chyh1990.github.io/yaml-rust/" -documentation = "https://docs.rs/yaml-rust" +name = "yaml-rust2" +version = "0.5.0" +authors = [ + "Yuheng Chen ", + "Ethiraric " +] +documentation = "https://docs.rs/yaml-rust2" license = "MIT/Apache-2.0" -description = "The missing YAML 1.2 parser for rust" -repository = "https://github.com/chyh1990/yaml-rust" +description = "A fully YAML 1.2 compliant YAML parser" +repository = "https://github.com/Ethiraric/yaml-rust2" readme = "README.md" edition = "2018" diff --git a/saphyr/LICENSE b/saphyr/LICENSE new file mode 100644 index 0000000..98646c3 --- /dev/null +++ b/saphyr/LICENSE @@ -0,0 +1,9 @@ +Code up to and including commit `da52a68615f2ecdd6b7e4567019f280c433c1521` is licensed by Chen Yuheng under either of: + - [Apache License, Version 2.0](.licenses/ChenYuheng-Apache) (http://www.apache.org/licenses/LICENSE-2.0) + - [MIT License](./licenses/ChenYuheng-MIT) (http://opensource.org/licenses/MIT) + +Code modifications starting with commit `1d71a23b151dcc12b289d0f06d8207dd9c764216` (included) are licenced by Ethiraric under either of: + - [Apache License, Version 2.0](.licenses/Ethiraric-Apache) (http://www.apache.org/licenses/LICENSE-2.0) + - [MIT License](./licenses/Ethiraric-MIT) (http://opensource.org/licenses/MIT) + +Redistributions of this Work must include licenses of both Chen Yuheng and Ethiraric. diff --git a/saphyr/README.md b/saphyr/README.md index a31faf1..cb6d4c7 100644 --- a/saphyr/README.md +++ b/saphyr/README.md @@ -1,16 +1,13 @@ -# yaml-rust +# yaml-rust2 -The missing YAML 1.2 implementation for Rust. +A fully compliant YAML 1.2 implementation written in pure Rust. +This work is based on [`yaml-rust`](https://github.com/chyh1990/yaml-rust) with +fixes towards being compliant to the [YAML test +suite](https://github.com/yaml/yaml-test-suite/). `yaml-rust`'s parser is +heavily influenced by `libyaml` and `yaml-cpp`. -[![Travis](https://travis-ci.org/chyh1990/yaml-rust.svg?branch=master)](https://travis-ci.org/chyh1990/yaml-rust) -[![AppVeyor](https://ci.appveyor.com/api/projects/status/scf47535ckp4ylg4?svg=true)](https://ci.appveyor.com/project/chyh1990/yaml-rust) -[![crates.io](https://img.shields.io/crates/v/yaml-rust.svg)](https://crates.io/crates/yaml-rust) -[![docs.rs](https://img.shields.io/badge/api-rustdoc-blue.svg)](https://docs.rs/yaml-rust) - -`yaml-rust` is a pure Rust YAML 1.2 implementation, -which enjoys the memory safety -property and other benefits from the Rust language. -The parser is heavily influenced by `libyaml` and `yaml-cpp`. +`yaml-rust2` is a pure Rust YAML 1.2 implementation, which enjoys the memory +safety property and other benefits from the Rust language. ## Quick Start @@ -18,21 +15,13 @@ Add the following to the Cargo.toml of your project: ```toml [dependencies] -yaml-rust = "0.4" +yaml-rust2 = "0.5" ``` -and import: +Use `yaml_rust2::YamlLoader` to load YAML documents and access them as `Yaml` objects: ```rust -extern crate yaml_rust; -``` - -Use `yaml::YamlLoader` to load the YAML documents and access it -as Vec/HashMap: - -```rust -extern crate yaml_rust; -use yaml_rust::{YamlLoader, YamlEmitter}; +use yaml_rust2::{YamlLoader, YamlEmitter}; fn main() { let s = @@ -56,8 +45,8 @@ bar: assert_eq!(doc["foo"][0].as_str().unwrap(), "list1"); assert_eq!(doc["bar"][1].as_f64().unwrap(), 2.0); - // Chained key/array access is checked and won't panic, - // return BadValue if they are not exist. + // Array/map-like accesses are checked and won't panic. + // They will return `BadValue` if the access is invalid. assert!(doc["INVALID_KEY"][100].is_badvalue()); // Dump the YAML object @@ -70,56 +59,48 @@ bar: } ``` -Note that `yaml_rust::Yaml` implements `Index<&'a str>` & `Index`: +Note that `yaml_rust2::Yaml` implements `Index<&'a str>` and `Index`: -* `Index` assumes the container is an Array -* `Index<&'a str>` assumes the container is a string to value Map +* `Index` assumes the container is an array +* `Index<&'a str>` assumes the container is a string to value map * otherwise, `Yaml::BadValue` is returned -If your document does not conform to this convention (e.g. map with -complex type key), you can use the `Yaml::as_XXX` family API to access your -documents. +If your document does not conform to this convention (e.g. map with complex +type key), you can use the `Yaml::as_XXX` family API of functions to access +your objects. ## Features * Pure Rust -* Ruby-like Array/Hash access API +* `Vec`/`HashMap` access API * Low-level YAML events emission ## Specification Compliance -This implementation aims to provide YAML parser fully compatible with -the YAML 1.2 specification. The parser can correctly parse almost all -examples in the specification, except for the following known bugs: - -* Empty plain scalar in certain contexts - -However, the widely used library `libyaml` also fails to parse these examples, -so it may not be a huge problem for most users. - -## Goals - -* Encoder -* Tag directive -* Alias while deserialization - -## Minimum Rust version policy - -This crate's minimum supported `rustc` version is 1.31 (released with Rust 2018, after v0.4.3), as this is the currently known minimum version for [`regex`](https://crates.io/crates/regex#minimum-rust-version-policy) as well. +This implementation is fully compatible with the YAML 1.2 specification. In +order to help with compliance, `yaml-rust2` tests against (and passes) the [YAML +test suite](https://github.com/yaml/yaml-test-suite/). ## License Licensed under either of - * Apache License, Version 2.0 ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0) - * MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT) + * Apache License, Version 2.0 (http://www.apache.org/licenses/LICENSE-2.0) + * MIT license (http://opensource.org/licenses/MIT) at your option. +Since this repository was originally maintained by +[chyh1990](https://github.com/chyh1990), there are 2 sets of licenses. +A license of each set must be included in redistributions. See the +[LICENSE](LICENSE) file for more details. + +You can find licences in the [`.licenses`](.licenses) subfolder. + ## Contribution Fork & PR on Github. Unless you explicitly state otherwise, any contribution intentionally submitted -for inclusion in the work by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any -additional terms or conditions. +for inclusion in the work by you, as defined in the Apache-2.0 license, shall +be dual licensed as above, without any additional terms or conditions. diff --git a/saphyr/examples/dump_events.rs b/saphyr/examples/dump_events.rs index 452eb69..8bf9e01 100644 --- a/saphyr/examples/dump_events.rs +++ b/saphyr/examples/dump_events.rs @@ -1,9 +1,7 @@ -extern crate yaml_rust; - use std::env; use std::fs::File; use std::io::prelude::*; -use yaml_rust::{ +use yaml_rust2::{ parser::{MarkedEventReceiver, Parser}, scanner::Marker, Event, diff --git a/saphyr/examples/dump_yaml.rs b/saphyr/examples/dump_yaml.rs index 3455a9a..1c3c452 100644 --- a/saphyr/examples/dump_yaml.rs +++ b/saphyr/examples/dump_yaml.rs @@ -1,9 +1,7 @@ -extern crate yaml_rust; - use std::env; use std::fs::File; use std::io::prelude::*; -use yaml_rust::yaml; +use yaml_rust2::yaml; fn print_indent(indent: usize) { for _ in 0..indent { diff --git a/saphyr/src/lib.rs b/saphyr/src/lib.rs index 7ccd4db..d4d9def 100644 --- a/saphyr/src/lib.rs +++ b/saphyr/src/lib.rs @@ -1,23 +1,24 @@ -// Copyright 2015, Yuheng Chen. See the LICENSE file at the top-level -// directory of this distribution. +// Copyright 2015, Yuheng Chen. +// Copyright 2023, Ethiraric. +// See the LICENSE file at the top-level directory of this distribution. //! YAML 1.2 implementation in pure Rust. //! //! # Usage //! -//! This crate is [on github](https://github.com/chyh1990/yaml-rust) and can be -//! used by adding `yaml-rust` to the dependencies in your project's `Cargo.toml`. +//! This crate is [on github](https://github.com/Ethiraric/yaml-rust2) and can be used by adding +//! `yaml-rust2` to the dependencies in your project's `Cargo.toml`. //! //! ```toml //! [dependencies] -//! yaml-rust = "0.4" +//! yaml-rust2 = "0.5.0" //! ``` //! //! # Examples //! Parse a string into `Vec` and then serialize it as a YAML string. //! //! ``` -//! use yaml_rust::{YamlLoader, YamlEmitter}; +//! use yaml_rust2::{YamlLoader, YamlEmitter}; //! //! let docs = YamlLoader::load_from_str("[1, 2, 3]").unwrap(); //! let doc = &docs[0]; // select the first YAML document @@ -29,7 +30,7 @@ //! //! ``` -#![doc(html_root_url = "https://docs.rs/yaml-rust/0.4.5")] +#![doc(html_root_url = "https://docs.rs/yaml-rust2/0.5.0")] #![cfg_attr(feature = "cargo-clippy", warn(clippy::pedantic))] #![cfg_attr( feature = "cargo-clippy", diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs index d8c1009..56e8a6f 100644 --- a/saphyr/src/parser.rs +++ b/saphyr/src/parser.rs @@ -153,7 +153,7 @@ pub struct Parser { /// /// # Example /// ``` -/// # use yaml_rust::parser::{Event, EventReceiver, Parser}; +/// # use yaml_rust2::parser::{Event, EventReceiver, Parser}; /// # /// /// Sink of events. Collects them into an array. /// struct EventSink { @@ -508,7 +508,7 @@ impl> Parser { // return Err(ScanError::new(tok.0, // "found incompatible YAML document")); //} - if version_directive_received == true { + if version_directive_received { return Err(ScanError::new(*mark, "duplicate version directive")); } version_directive_received = true; diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index 22f30eb..f5e6738 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -1,11 +1,11 @@ #![allow(clippy::module_name_repetitions)] +use std::{collections::BTreeMap, convert::TryFrom, mem, ops::Index}; + +use linked_hash_map::LinkedHashMap; + use crate::parser::{Event, MarkedEventReceiver, Parser, Tag}; use crate::scanner::{Marker, ScanError, TScalarStyle}; -use linked_hash_map::LinkedHashMap; -use std::collections::BTreeMap; -use std::mem; -use std::ops::Index; /// A YAML node is stored as this `Yaml` enumeration, which provides an easy way to /// access your YAML document. @@ -13,7 +13,7 @@ use std::ops::Index; /// # Examples /// /// ``` -/// use yaml_rust::Yaml; +/// use yaml_rust2::Yaml; /// let foo = Yaml::from_str("-123"); // convert the string to the appropriate YAML type /// assert_eq!(foo.as_i64().unwrap(), -123); /// @@ -317,7 +317,7 @@ impl Yaml { /// /// # Examples /// ``` - /// # use yaml_rust::yaml::Yaml; + /// # use yaml_rust2::yaml::Yaml; /// assert!(matches!(Yaml::from_str("42"), Yaml::Integer(42))); /// assert!(matches!(Yaml::from_str("0x2A"), Yaml::Integer(42))); /// assert!(matches!(Yaml::from_str("0o52"), Yaml::Integer(42))); @@ -379,7 +379,7 @@ impl Index for Yaml { if let Some(v) = self.as_vec() { v.get(idx).unwrap_or(&BAD_VALUE) } else if let Some(v) = self.as_hash() { - let key = Yaml::Integer(idx as i64); + let key = Yaml::Integer(i64::try_from(idx).unwrap()); v.get(&key).unwrap_or(&BAD_VALUE) } else { &BAD_VALUE diff --git a/saphyr/tests/basic.rs b/saphyr/tests/basic.rs index 946843a..b82ac2d 100644 --- a/saphyr/tests/basic.rs +++ b/saphyr/tests/basic.rs @@ -2,7 +2,7 @@ #![allow(clippy::float_cmp)] use std::vec; -use yaml_rust::{Yaml, YamlEmitter, YamlLoader}; +use yaml_rust2::{Yaml, YamlEmitter, YamlLoader}; #[test] fn test_api() { diff --git a/saphyr/tests/emitter.rs b/saphyr/tests/emitter.rs index d60e3a1..59028df 100644 --- a/saphyr/tests/emitter.rs +++ b/saphyr/tests/emitter.rs @@ -1,4 +1,4 @@ -use yaml_rust::{YamlEmitter, YamlLoader}; +use yaml_rust2::{YamlEmitter, YamlLoader}; #[allow(clippy::similar_names)] #[test] diff --git a/saphyr/tests/quickcheck.rs b/saphyr/tests/quickcheck.rs index 0efd679..fdf2549 100644 --- a/saphyr/tests/quickcheck.rs +++ b/saphyr/tests/quickcheck.rs @@ -1,9 +1,9 @@ -extern crate yaml_rust; +extern crate yaml_rust2; #[macro_use] extern crate quickcheck; use quickcheck::TestResult; -use yaml_rust::{Yaml, YamlEmitter, YamlLoader}; +use yaml_rust2::{Yaml, YamlEmitter, YamlLoader}; quickcheck! { fn test_check_weird_keys(xs: Vec) -> TestResult { diff --git a/saphyr/tests/scanner.rs b/saphyr/tests/scanner.rs index 27d3760..0a09517 100644 --- a/saphyr/tests/scanner.rs +++ b/saphyr/tests/scanner.rs @@ -1,6 +1,6 @@ #![allow(clippy::enum_glob_use)] -use yaml_rust::{scanner::TokenType::*, scanner::*}; +use yaml_rust2::{scanner::TokenType::*, scanner::*}; macro_rules! next { ($p:ident, $tk:pat) => {{ diff --git a/saphyr/tests/spec_test.rs b/saphyr/tests/spec_test.rs index f78aa58..c54a2df 100644 --- a/saphyr/tests/spec_test.rs +++ b/saphyr/tests/spec_test.rs @@ -1,9 +1,9 @@ #![allow(dead_code)] #![allow(non_upper_case_globals)] -extern crate yaml_rust; +extern crate yaml_rust2; -use yaml_rust::parser::{Event, EventReceiver, Parser}; -use yaml_rust::scanner::TScalarStyle; +use yaml_rust2::parser::{Event, EventReceiver, Parser}; +use yaml_rust2::scanner::TScalarStyle; // These names match the names used in the C++ test suite. #[cfg_attr(feature = "cargo-clippy", allow(clippy::enum_variant_names))] @@ -76,8 +76,8 @@ include!("spec_test.rs.inc"); #[test] fn test_mapvec_legal() { - use yaml_rust::yaml::{Hash, Yaml}; - use yaml_rust::{YamlEmitter, YamlLoader}; + use yaml_rust2::yaml::{Hash, Yaml}; + use yaml_rust2::{YamlEmitter, YamlLoader}; // Emitting a `map>, _>` should result in legal yaml that // we can parse. diff --git a/saphyr/tests/test_round_trip.rs b/saphyr/tests/test_round_trip.rs index 4a383a8..d051281 100644 --- a/saphyr/tests/test_round_trip.rs +++ b/saphyr/tests/test_round_trip.rs @@ -1,6 +1,6 @@ -extern crate yaml_rust; +extern crate yaml_rust2; -use yaml_rust::{Yaml, YamlEmitter, YamlLoader}; +use yaml_rust2::{Yaml, YamlEmitter, YamlLoader}; fn roundtrip(original: &Yaml) { let mut emitted = String::new(); diff --git a/saphyr/tests/yaml-test-suite.rs b/saphyr/tests/yaml-test-suite.rs index a39957a..8e67dd4 100644 --- a/saphyr/tests/yaml-test-suite.rs +++ b/saphyr/tests/yaml-test-suite.rs @@ -2,7 +2,7 @@ use std::fs::{self, DirEntry}; use libtest_mimic::{run_tests, Arguments, Outcome, Test}; -use yaml_rust::{ +use yaml_rust2::{ parser::{Event, EventReceiver, Parser, Tag}, scanner::TScalarStyle, yaml, ScanError, Yaml, YamlLoader, From 6e5286c2e17344d889a202d5fd7d20199b322de3 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Tue, 23 Jan 2024 15:20:12 +0100 Subject: [PATCH 267/380] Use OR for dual licensing. https://doc.rust-lang.org/book/ch14-02-publishing-to-crates-io.html#adding-metadata-to-a-new-crate --- saphyr/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/saphyr/Cargo.toml b/saphyr/Cargo.toml index 845c80c..209b969 100644 --- a/saphyr/Cargo.toml +++ b/saphyr/Cargo.toml @@ -6,7 +6,7 @@ authors = [ "Ethiraric " ] documentation = "https://docs.rs/yaml-rust2" -license = "MIT/Apache-2.0" +license = "MIT OR Apache-2.0" description = "A fully YAML 1.2 compliant YAML parser" repository = "https://github.com/Ethiraric/yaml-rust2" readme = "README.md" From 36e8b06e8288149af0759561382756ff492ef7f0 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Tue, 23 Jan 2024 22:48:20 +0100 Subject: [PATCH 268/380] Add benchmarking tools. --- saphyr/Cargo.toml | 2 + saphyr/examples/gen_large_yaml_array/gen.rs | 150 +++++++++++++ saphyr/examples/gen_large_yaml_array/main.rs | 214 +++++++++++++++++++ saphyr/examples/time_parse.rs | 31 +++ 4 files changed, 397 insertions(+) create mode 100644 saphyr/examples/gen_large_yaml_array/gen.rs create mode 100644 saphyr/examples/gen_large_yaml_array/main.rs create mode 100644 saphyr/examples/time_parse.rs diff --git a/saphyr/Cargo.toml b/saphyr/Cargo.toml index 209b969..bf409e1 100644 --- a/saphyr/Cargo.toml +++ b/saphyr/Cargo.toml @@ -18,6 +18,8 @@ linked-hash-map = "0.5.3" [dev-dependencies] libtest-mimic = "0.3.0" quickcheck = "0.9" +rand = "0.8.5" +lipsum = "0.9.0" [profile.release-lto] inherits = "release" diff --git a/saphyr/examples/gen_large_yaml_array/gen.rs b/saphyr/examples/gen_large_yaml_array/gen.rs new file mode 100644 index 0000000..b687371 --- /dev/null +++ b/saphyr/examples/gen_large_yaml_array/gen.rs @@ -0,0 +1,150 @@ +#![allow(clippy::too_many_arguments)] + +use rand::{distributions::Alphanumeric, rngs::ThreadRng, Rng}; + +/// Generate a string with hexadecimal digits of the specified length. +pub fn hex_string(rng: &mut ThreadRng, len: usize) -> String { + const DIGITS: &[u8] = b"0123456789abcdef"; + string_from_set(rng, len, len + 1, DIGITS) +} + +/// Generate an e-mail address. +pub fn email(rng: &mut ThreadRng, len_lo: usize, len_hi: usize) -> String { + const CHARSET: &[u8] = b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-_.0123456789"; + format!( + "{}@example.com", + string_from_set(rng, len_lo, len_hi, CHARSET) + ) +} + +/// Generate a random URL. +pub fn url( + rng: &mut ThreadRng, + scheme: &str, + n_paths_lo: usize, + n_paths_hi: usize, + path_len_lo: usize, + path_len_hi: usize, + extension: Option<&str>, +) -> String { + let mut string = format!("{scheme}://example.com"); + for _ in 0..rng.gen_range(n_paths_lo..n_paths_hi) { + string.push('/'); + string.push_str(&alnum_string(rng, path_len_lo, path_len_hi)); + } + if let Some(extension) = extension { + string.push('.'); + string.push_str(extension); + } + string +} + +/// Generate a random integer. +pub fn integer(rng: &mut ThreadRng, lo: i64, hi: i64) -> i64 { + rng.gen_range(lo..hi) +} + +/// Generate an alphanumeric string with a length between `lo_len` and `hi_len`. +pub fn alnum_string(rng: &mut ThreadRng, lo_len: usize, hi_len: usize) -> String { + let len = rng.gen_range(lo_len..hi_len); + rng.sample_iter(&Alphanumeric) + .take(len) + .map(char::from) + .collect() +} + +/// Generate a string with hexadecimal digits of the specified length. +pub fn string_from_set(rng: &mut ThreadRng, len_lo: usize, len_hi: usize, set: &[u8]) -> String { + (0..rng.gen_range(len_lo..len_hi)) + .map(|_| set[rng.gen_range(0..set.len())] as char) + .collect() +} + +/// Generate a lipsum paragraph. +pub fn paragraph( + rng: &mut ThreadRng, + lines_lo: usize, + lines_hi: usize, + wps_lo: usize, + wps_hi: usize, + line_maxcol: usize, +) -> Vec { + let mut ret = Vec::new(); + let nlines = rng.gen_range(lines_lo..lines_hi); + + while ret.len() < nlines { + let words_in_sentence = rng.gen_range(wps_lo..wps_hi); + let mut sentence = lipsum::lipsum_words_with_rng(rng.clone(), words_in_sentence); + + if let Some(last_line) = ret.pop() { + sentence = format!("{last_line} {sentence}"); + } + + while sentence.len() > line_maxcol { + let last_space_idx = line_maxcol + - sentence[0..line_maxcol] + .chars() + .rev() + .position(char::is_whitespace) + .unwrap(); + ret.push(sentence[0..last_space_idx].to_string()); + sentence = sentence[last_space_idx + 1..].to_string(); + } + if !sentence.is_empty() { + ret.push(sentence); + } + } + + ret +} + +/// Generate a full name. +pub fn full_name(rng: &mut ThreadRng, len_lo: usize, len_hi: usize) -> String { + format!( + "{} {}", + name(rng, len_lo, len_hi), + name(rng, len_lo, len_hi) + ) +} + +/// Generate a name. +pub fn name(rng: &mut ThreadRng, len_lo: usize, len_hi: usize) -> String { + const UPPER: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + const LOWER: &[u8] = b"abcdefghijklmnopqrstuvwxyz"; + + let len = rng.gen_range(len_lo..len_hi); + let mut ret = String::new(); + ret.push(UPPER[rng.gen_range(0..UPPER.len())] as char); + ret.push_str(string_from_set(rng, len, len + 1, LOWER).as_str()); + + ret +} + +/// Generate a lipsum text. +/// +/// Texts are composed of some paragraphs and empty lines between them. +pub fn text( + rng: &mut ThreadRng, + paragraphs_lo: usize, + paragraphs_hi: usize, + lines_lo: usize, + lines_hi: usize, + wps_lo: usize, + wps_hi: usize, + line_maxcol: usize, +) -> Vec { + let mut ret = Vec::new(); + let mut first = true; + + for _ in 0..rng.gen_range(paragraphs_lo..paragraphs_hi) { + if first { + first = false; + } else { + ret.push(String::new()); + } + + ret.extend(paragraph(rng, lines_lo, lines_hi, wps_lo, wps_hi, line_maxcol).into_iter()); + } + + ret +} diff --git a/saphyr/examples/gen_large_yaml_array/main.rs b/saphyr/examples/gen_large_yaml_array/main.rs new file mode 100644 index 0000000..3d21c48 --- /dev/null +++ b/saphyr/examples/gen_large_yaml_array/main.rs @@ -0,0 +1,214 @@ +#![allow(dead_code)] + +mod gen; + +use std::collections::HashMap; + +use rand::{rngs::ThreadRng, Rng}; + +fn main() -> std::fmt::Result { + let mut s = String::new(); + let mut g = Generator::new(); + g.gen_record_array(&mut s, 100_000, 100_001)?; + println!("{s}"); + Ok(()) +} + +/// YAML Generator. +struct Generator { + /// The RNG state. + rng: ThreadRng, + /// The stack of indentations. + indents: Vec, +} + +type GenFn = dyn FnOnce(&mut Generator, &mut W) -> std::fmt::Result; + +impl Generator { + /// Create a new generator. + fn new() -> Self { + Generator { + rng: rand::thread_rng(), + indents: vec![0], + } + } + + /// Generate an array of records as per [`Self::gen_record_object`]. + fn gen_record_array( + &mut self, + writer: &mut W, + items_lo: usize, + items_hi: usize, + ) -> std::fmt::Result { + self.gen_array(writer, items_lo, items_hi, Generator::gen_record_object) + } + + /// Generate a YAML object/mapping containing a record. + /// + /// Fields are description, hash, version, home, repository and pdf. + fn gen_record_object(&mut self, writer: &mut W) -> std::fmt::Result { + let mut fields = HashMap::>>::new(); + fields.insert( + "description".to_string(), + Box::new(|gen, w| { + write!(w, "|")?; + gen.push_indent(2); + gen.nl(w)?; + let indent = gen.indent(); + let text = gen::text(&mut gen.rng, 1, 9, 3, 8, 10, 20, 80 - indent); + gen.write_lines(w, &text)?; + gen.pop_indent(); + Ok(()) + }), + ); + + fields.insert( + "authors".to_string(), + Box::new(|gen, w| { + gen.push_indent(2); + gen.nl(w)?; + gen.gen_authors_array(w, 1, 10)?; + gen.pop_indent(); + Ok(()) + }), + ); + + fields.insert( + "hash".to_string(), + Box::new(|gen, w| write!(w, "{}", gen::hex_string(&mut gen.rng, 64))), + ); + fields.insert( + "version".to_string(), + Box::new(|gen, w| write!(w, "{}", gen::integer(&mut gen.rng, 1, 9))), + ); + fields.insert( + "home".to_string(), + Box::new(|gen, w| write!(w, "{}", gen::url(&mut gen.rng, "https", 0, 1, 0, 0, None))), + ); + fields.insert( + "repository".to_string(), + Box::new(|gen, w| write!(w, "{}", gen::url(&mut gen.rng, "git", 1, 4, 10, 20, None))), + ); + fields.insert( + "pdf".to_string(), + Box::new(|gen, w| { + write!( + w, + "{}", + gen::url(&mut gen.rng, "https", 1, 4, 10, 30, Some("pdf")) + ) + }), + ); + self.gen_object(writer, fields) + } + + /// Generate an array of authors as per [`Self::gen_author_object`]. + fn gen_authors_array( + &mut self, + writer: &mut W, + items_lo: usize, + items_hi: usize, + ) -> std::fmt::Result { + self.gen_array(writer, items_lo, items_hi, Generator::gen_author_object) + } + + fn gen_author_object(&mut self, writer: &mut W) -> std::fmt::Result { + let mut fields = HashMap::>>::new(); + fields.insert( + "name".to_string(), + Box::new(|gen, w| write!(w, "{}", gen::full_name(&mut gen.rng, 10, 15))), + ); + fields.insert( + "email".to_string(), + Box::new(|gen, w| write!(w, "{}", gen::email(&mut gen.rng, 1, 9))), + ); + self.gen_object(writer, fields) + } + + /// Generate a YAML array/sequence containing nodes generated by the given function. + fn gen_array std::fmt::Result>( + &mut self, + writer: &mut W, + len_lo: usize, + len_hi: usize, + mut obj_creator: F, + ) -> std::fmt::Result { + let mut first = true; + for _ in 0..self.rng.gen_range(len_lo..len_hi) { + if first { + first = false; + } else { + self.nl(writer)?; + } + write!(writer, "- ")?; + self.push_indent(2); + (obj_creator)(self, writer)?; + self.pop_indent(); + } + Ok(()) + } + + /// Create a Yaml object with some fields in it. + fn gen_object( + &mut self, + writer: &mut W, + fields: HashMap>>, + ) -> std::fmt::Result { + let mut first = true; + for (key, f) in fields { + if first { + first = false; + } else { + self.nl(writer)?; + } + write!(writer, "{key}: ")?; + f(self, writer)?; + } + Ok(()) + } + + /// Write the given lines at the right indentation. + fn write_lines( + &mut self, + writer: &mut W, + lines: &[String], + ) -> std::fmt::Result { + let mut first = true; + + for line in lines { + if first { + first = false; + } else { + self.nl(writer)?; + } + write!(writer, "{line}")?; + } + + Ok(()) + } + + /// Write a new line to the writer and indent. + fn nl(&mut self, writer: &mut W) -> std::fmt::Result { + writeln!(writer)?; + for _ in 0..self.indent() { + write!(writer, " ")?; + } + Ok(()) + } + + /// Return the given indent. + fn indent(&self) -> usize { + *self.indents.last().unwrap() + } + + /// Push a new indent with the given relative offset. + fn push_indent(&mut self, offset: usize) { + self.indents.push(self.indent() + offset); + } + + /// Pops the last indent. + fn pop_indent(&mut self) { + self.indents.pop(); + assert!(!self.indents.is_empty()); + } +} diff --git a/saphyr/examples/time_parse.rs b/saphyr/examples/time_parse.rs new file mode 100644 index 0000000..9b551e1 --- /dev/null +++ b/saphyr/examples/time_parse.rs @@ -0,0 +1,31 @@ +use std::env; +use std::fs::File; +use std::io::prelude::*; +use yaml_rust2::{ + parser::{MarkedEventReceiver, Parser}, + scanner::Marker, + Event, +}; + +/// A sink which discards any event sent. +struct NullSink {} + +impl MarkedEventReceiver for NullSink { + fn on_event(&mut self, _: Event, _: Marker) {} +} + +fn main() { + let args: Vec<_> = env::args().collect(); + let mut f = File::open(&args[1]).unwrap(); + let mut s = String::new(); + f.read_to_string(&mut s).unwrap(); + + let mut sink = NullSink {}; + let mut parser = Parser::new(s.chars()); + + // Load events using our sink as the receiver. + let begin = std::time::Instant::now(); + parser.load(&mut sink, true).unwrap(); + let end = std::time::Instant::now(); + println!("Loaded {}MiB in {:?}", s.len() / 1024 / 1024, end - begin); +} From b4f66c457a24c110ab60e0eb96ad5ae58b127cf1 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Wed, 24 Jan 2024 01:02:20 +0100 Subject: [PATCH 269/380] Move char `is_xxx` fn to their own file. --- saphyr/src/char_traits.rs | 111 ++++++++++++++++++++++++++++++++++ saphyr/src/lib.rs | 1 + saphyr/src/scanner.rs | 124 ++++---------------------------------- 3 files changed, 123 insertions(+), 113 deletions(-) create mode 100644 saphyr/src/char_traits.rs diff --git a/saphyr/src/char_traits.rs b/saphyr/src/char_traits.rs new file mode 100644 index 0000000..b95318c --- /dev/null +++ b/saphyr/src/char_traits.rs @@ -0,0 +1,111 @@ +//! Holds functions to determine if a character belongs to a specific character set. + +/// Check whether the character is nil (`\0`). +#[inline] +pub(crate) fn is_z(c: char) -> bool { + c == '\0' +} + +/// Check whether the character is a line break (`\r` or `\n`). +#[inline] +pub(crate) fn is_break(c: char) -> bool { + c == '\n' || c == '\r' +} + +/// Check whether the character is nil or a line break (`\0`, `\r`, `\n`). +#[inline] +pub(crate) fn is_breakz(c: char) -> bool { + is_break(c) || is_z(c) +} + +/// Check whether the character is a whitespace (` ` or `\t`). +#[inline] +pub(crate) fn is_blank(c: char) -> bool { + c == ' ' || c == '\t' +} + +/// Check whether the character is nil, a linebreak or a whitespace. +/// +/// `\0`, ` `, `\t`, `\n`, `\r` +#[inline] +pub(crate) fn is_blankz(c: char) -> bool { + is_blank(c) || is_breakz(c) +} + +/// Check whether the character is an ascii digit. +#[inline] +pub(crate) fn is_digit(c: char) -> bool { + c.is_ascii_digit() +} + +/// Check whether the character is a digit, letter, `_` or `-`. +#[inline] +pub(crate) fn is_alpha(c: char) -> bool { + matches!(c, '0'..='9' | 'a'..='z' | 'A'..='Z' | '_' | '-') +} + +/// Check whether the character is a hexadecimal character (case insensitive). +#[inline] +pub(crate) fn is_hex(c: char) -> bool { + c.is_ascii_digit() || ('a'..='f').contains(&c) || ('A'..='F').contains(&c) +} + +/// Convert the hexadecimal digit to an integer. +#[inline] +pub(crate) fn as_hex(c: char) -> u32 { + match c { + '0'..='9' => (c as u32) - ('0' as u32), + 'a'..='f' => (c as u32) - ('a' as u32) + 10, + 'A'..='F' => (c as u32) - ('A' as u32) + 10, + _ => unreachable!(), + } +} + +/// Check whether the character is a YAML flow character (one of `,[]{}`). +#[inline] +pub(crate) fn is_flow(c: char) -> bool { + matches!(c, ',' | '[' | ']' | '{' | '}') +} + +/// Check whether the character is the BOM character. +#[inline] +pub(crate) fn is_bom(c: char) -> bool { + c == '\u{FEFF}' +} + +/// Check whether the character is a YAML non-breaking character. +#[inline] +pub(crate) fn is_yaml_non_break(c: char) -> bool { + // TODO(ethiraric, 28/12/2023): is_printable + !is_break(c) && !is_bom(c) +} + +/// Check whether the character is NOT a YAML whitespace (` ` / `\t`). +#[inline] +pub(crate) fn is_yaml_non_space(c: char) -> bool { + is_yaml_non_break(c) && !is_blank(c) +} + +/// Check whether the character is a valid YAML anchor name character. +#[inline] +pub(crate) fn is_anchor_char(c: char) -> bool { + is_yaml_non_space(c) && !is_flow(c) && !is_z(c) +} + +/// Check whether the character is a valid word character. +#[inline] +pub(crate) fn is_word_char(c: char) -> bool { + is_alpha(c) && c != '_' +} + +/// Check whether the character is a valid URI character. +#[inline] +pub(crate) fn is_uri_char(c: char) -> bool { + is_word_char(c) || "#;/?:@&=+$,_.!~*\'()[]%".contains(c) +} + +/// Check whether the character is a valid tag character. +#[inline] +pub(crate) fn is_tag_char(c: char) -> bool { + is_uri_char(c) && !is_flow(c) && c != '!' +} diff --git a/saphyr/src/lib.rs b/saphyr/src/lib.rs index d4d9def..5107745 100644 --- a/saphyr/src/lib.rs +++ b/saphyr/src/lib.rs @@ -45,6 +45,7 @@ extern crate linked_hash_map; +pub(crate) mod char_traits; pub mod emitter; pub mod parser; pub mod scanner; diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index ebbe35a..391dc28 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -1,9 +1,12 @@ #![allow(clippy::cast_possible_wrap)] #![allow(clippy::cast_sign_loss)] -use std::collections::VecDeque; -use std::error::Error; -use std::{char, fmt}; +use std::{char, collections::VecDeque, error::Error, fmt}; + +use crate::char_traits::{ + as_hex, is_alpha, is_anchor_char, is_blank, is_blankz, is_break, is_breakz, is_digit, is_flow, + is_hex, is_tag_char, is_uri_char, is_z, +}; #[derive(Clone, Copy, PartialEq, Debug, Eq)] pub enum TEncoding { @@ -24,8 +27,11 @@ pub enum TScalarStyle { /// A location in a yaml document. #[derive(Clone, Copy, PartialEq, Debug, Eq)] pub struct Marker { + /// The index (in chars) in the input string. index: usize, + /// The line (1-indexed). line: usize, + /// The column (1-indexed). col: usize, } @@ -56,7 +62,9 @@ impl Marker { /// An error that occured while scanning. #[derive(Clone, PartialEq, Debug, Eq)] pub struct ScanError { + /// The position at which the error happened in the source. mark: Marker, + /// Human-readable details about the error. info: String, } @@ -373,116 +381,6 @@ impl> Iterator for Scanner { } } -/// Check whether the character is nil (`\0`). -#[inline] -fn is_z(c: char) -> bool { - c == '\0' -} - -/// Check whether the character is a line break (`\r` or `\n`). -#[inline] -fn is_break(c: char) -> bool { - c == '\n' || c == '\r' -} - -/// Check whether the character is nil or a line break (`\0`, `\r`, `\n`). -#[inline] -fn is_breakz(c: char) -> bool { - is_break(c) || is_z(c) -} - -/// Check whether the character is a whitespace (` ` or `\t`). -#[inline] -fn is_blank(c: char) -> bool { - c == ' ' || c == '\t' -} - -/// Check whether the character is nil, a linebreak or a whitespace. -/// -/// `\0`, ` `, `\t`, `\n`, `\r` -#[inline] -fn is_blankz(c: char) -> bool { - is_blank(c) || is_breakz(c) -} - -/// Check whether the character is an ascii digit. -#[inline] -fn is_digit(c: char) -> bool { - c.is_ascii_digit() -} - -/// Check whether the character is a digit, letter, `_` or `-`. -#[inline] -fn is_alpha(c: char) -> bool { - matches!(c, '0'..='9' | 'a'..='z' | 'A'..='Z' | '_' | '-') -} - -/// Check whether the character is a hexadecimal character (case insensitive). -#[inline] -fn is_hex(c: char) -> bool { - c.is_ascii_digit() || ('a'..='f').contains(&c) || ('A'..='F').contains(&c) -} - -/// Convert the hexadecimal digit to an integer. -#[inline] -fn as_hex(c: char) -> u32 { - match c { - '0'..='9' => (c as u32) - ('0' as u32), - 'a'..='f' => (c as u32) - ('a' as u32) + 10, - 'A'..='F' => (c as u32) - ('A' as u32) + 10, - _ => unreachable!(), - } -} - -/// Check whether the character is a YAML flow character (one of `,[]{}`). -#[inline] -fn is_flow(c: char) -> bool { - matches!(c, ',' | '[' | ']' | '{' | '}') -} - -/// Check whether the character is the BOM character. -#[inline] -fn is_bom(c: char) -> bool { - c == '\u{FEFF}' -} - -/// Check whether the character is a YAML non-breaking character. -#[inline] -fn is_yaml_non_break(c: char) -> bool { - // TODO(ethiraric, 28/12/2023): is_printable - !is_break(c) && !is_bom(c) -} - -/// Check whether the character is NOT a YAML whitespace (` ` / `\t`). -#[inline] -fn is_yaml_non_space(c: char) -> bool { - is_yaml_non_break(c) && !is_blank(c) -} - -/// Check whether the character is a valid YAML anchor name character. -#[inline] -fn is_anchor_char(c: char) -> bool { - is_yaml_non_space(c) && !is_flow(c) && !is_z(c) -} - -/// Check whether the character is a valid word character. -#[inline] -fn is_word_char(c: char) -> bool { - is_alpha(c) && c != '_' -} - -/// Check whether the character is a valid URI character. -#[inline] -fn is_uri_char(c: char) -> bool { - is_word_char(c) || "#;/?:@&=+$,_.!~*\'()[]%".contains(c) -} - -/// Check whether the character is a valid tag character. -#[inline] -fn is_tag_char(c: char) -> bool { - is_uri_char(c) && !is_flow(c) && c != '!' -} - pub type ScanResult = Result<(), ScanError>; impl> Scanner { From e6fae1c67990fecb99e37cc94ffa25fccdd1314a Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Wed, 24 Jan 2024 01:11:53 +0100 Subject: [PATCH 270/380] Add `justfile` for my pre-commit todo-list. --- saphyr/justfile | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 saphyr/justfile diff --git a/saphyr/justfile b/saphyr/justfile new file mode 100644 index 0000000..a9d1a56 --- /dev/null +++ b/saphyr/justfile @@ -0,0 +1,6 @@ +before_commit: + cargo clippy --release --all-targets -- -D warnings + cargo clippy --all-targets -- -D warnings + cargo build --release --all-targets + cargo build --all-targets + cargo test --release From 3e1ffedb8addb130fe1658d288f5881735c7ea05 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Wed, 3 Apr 2024 20:06:11 +0200 Subject: [PATCH 271/380] Add license. --- bench/LICENSE | 191 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 191 insertions(+) create mode 100644 bench/LICENSE diff --git a/bench/LICENSE b/bench/LICENSE new file mode 100644 index 0000000..0734aeb --- /dev/null +++ b/bench/LICENSE @@ -0,0 +1,191 @@ +Copyright (c) 2024 Ethiraric + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS From 1c36ffbc4d1aea8c4fbf9749665abb7b95ec7d87 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Wed, 24 Jan 2024 01:11:53 +0100 Subject: [PATCH 272/380] Add `justfile` for my pre-commit todo-list. --- bench/justfile | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 bench/justfile diff --git a/bench/justfile b/bench/justfile new file mode 100644 index 0000000..a9d1a56 --- /dev/null +++ b/bench/justfile @@ -0,0 +1,6 @@ +before_commit: + cargo clippy --release --all-targets -- -D warnings + cargo clippy --all-targets -- -D warnings + cargo build --release --all-targets + cargo build --all-targets + cargo test --release From 5789169ceb402be42764359aca31cc5c0d9ae28e Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Wed, 24 Jan 2024 02:22:02 +0100 Subject: [PATCH 273/380] Remove debug prints code from release builds. If building release mode, remove debug code. Now, the `debug_print!` macro resolves to nothing in release build. In debug build, don't check the environment for each print. This has a huge overhead. The environment is only checked once and next checks are made against a simple boolean value. --- saphyr/src/debug.rs | 41 +++++++++++++++++++++++++++++++++++++++++ saphyr/src/lib.rs | 2 ++ saphyr/src/parser.rs | 5 ++--- saphyr/src/scanner.rs | 23 ++++++++++------------- 4 files changed, 55 insertions(+), 16 deletions(-) create mode 100644 saphyr/src/debug.rs diff --git a/saphyr/src/debug.rs b/saphyr/src/debug.rs new file mode 100644 index 0000000..b43ea2e --- /dev/null +++ b/saphyr/src/debug.rs @@ -0,0 +1,41 @@ +//! Debugging helpers. +//! +//! Debugging is governed by two conditions: +//! 1. The build mode. Debugging code is not emitted in release builds and thus not available. +//! 2. The `YAMLALL_DEBUG` environment variable. If built in debug mode, the program must be fed +//! the `YAMLALL_DEBUG` variable in its environment. While debugging code is present in debug +//! build, debug helpers will only trigger if that variable is set when running the program. + +// If a debug build, use stuff in the debug submodule. +#[cfg(debug_assertions)] +pub use debug::enabled; + +// Otherwise, just export dummies for publicly visible functions. +/// Evaluates to nothing. +#[cfg(not(debug_assertions))] +macro_rules! debug_print { + ($($arg:tt)*) => {{}}; +} + +#[cfg(debug_assertions)] +#[macro_use] +#[allow(clippy::module_inception)] +mod debug { + use std::sync::OnceLock; + + /// If debugging is [`enabled`], print the format string on the error output. + macro_rules! debug_print { + ($($arg:tt)*) => {{ + if $crate::debug::enabled() { + eprintln!($($arg)*) + } + }}; + } + + /// Return whether debugging features are enabled in this execution. + #[cfg(debug_assertions)] + pub fn enabled() -> bool { + static ENABLED: OnceLock = OnceLock::new(); + *ENABLED.get_or_init(|| std::env::var("YAMLRUST2_DEBUG").is_ok()) + } +} diff --git a/saphyr/src/lib.rs b/saphyr/src/lib.rs index 5107745..9c13e9a 100644 --- a/saphyr/src/lib.rs +++ b/saphyr/src/lib.rs @@ -46,6 +46,8 @@ extern crate linked_hash_map; pub(crate) mod char_traits; +#[macro_use] +pub(crate) mod debug; pub mod emitter; pub mod parser; pub mod scanner; diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs index 56e8a6f..a6e40b8 100644 --- a/saphyr/src/parser.rs +++ b/saphyr/src/parser.rs @@ -413,9 +413,8 @@ impl> Parser { fn state_machine(&mut self) -> ParseResult { // let next_tok = self.peek_token().cloned()?; // println!("cur_state {:?}, next tok: {:?}", self.state, next_tok); - if std::env::var("YAMLRUST_DEBUG").is_ok() { - eprintln!("\n\x1B[;33mParser state: {:?} \x1B[;0m", self.state); - } + debug_print!("\n\x1B[;33mParser state: {:?} \x1B[;0m", self.state); + match self.state { State::StreamStart => self.stream_start(), diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index 391dc28..a666269 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -364,12 +364,11 @@ impl> Iterator for Scanner { } match self.next_token() { Ok(Some(tok)) => { - if std::env::var("YAMLRUST_DEBUG").is_ok() { - eprintln!( - " \x1B[;32m\u{21B3} {:?} \x1B[;36m{:?}\x1B[;m", - tok.1, tok.0 - ); - } + debug_print!( + " \x1B[;32m\u{21B3} {:?} \x1B[;36m{:?}\x1B[;m", + tok.1, + tok.0 + ); Some(tok) } Ok(tok) => tok, @@ -565,13 +564,11 @@ impl> Scanner { } self.skip_to_next_token()?; - if std::env::var("YAMLRUST_DEBUG").is_ok() { - eprintln!( - " \x1B[38;5;244m\u{2192} fetch_next_token after whitespace {:?} {:?}\x1B[m", - self.mark, - self.ch() - ); - } + debug_print!( + " \x1B[38;5;244m\u{2192} fetch_next_token after whitespace {:?} {:?}\x1B[m", + self.mark, + self.ch() + ); self.stale_simple_keys()?; From 2cebf0af1d19c0bd4a9ac382c8fc44683dbd0bbb Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Wed, 24 Jan 2024 17:14:52 +0100 Subject: [PATCH 274/380] Help the compiler inline `read_break`. --- saphyr/src/scanner.rs | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index a666269..01fcfd4 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -513,20 +513,18 @@ impl> Scanner { // // A `\n` is pushed into `s`. // - // # Panics + // # Panics (in debug) // If the next characters do not correspond to a line break. #[inline] fn read_break(&mut self, s: &mut String) { - if self.buffer[0] == '\r' && self.buffer[1] == '\n' { - s.push('\n'); + let c = self.buffer[0]; + debug_assert!(is_break(c)); + self.skip(); + if c == '\r' && self.buffer[0] == '\n' { self.skip(); - self.skip(); - } else if self.buffer[0] == '\r' || self.buffer[0] == '\n' { - s.push('\n'); - self.skip(); - } else { - unreachable!(); } + + s.push('\n'); } /// Check whether the next characters correspond to an end of document. From aaa9288842b9e96713d6a9ae07558111e8def93a Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Wed, 24 Jan 2024 19:31:45 +0100 Subject: [PATCH 275/380] Pre-load chars in `skip_block_scalar_indent`. --- saphyr/src/scanner.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index 01fcfd4..1dcc583 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -1661,14 +1661,14 @@ impl> Scanner { /// Skip the block scalar indentation and empty lines. fn skip_block_scalar_indent(&mut self, indent: usize, breaks: &mut String) { loop { + self.lookahead(indent + 2); // Consume all spaces. Tabs cannot be used as indentation. - while self.mark.col < indent && self.look_ch() == ' ' { + while self.mark.col < indent && self.ch() == ' ' { self.skip(); } // If our current line is empty, skip over the break and continue looping. - if is_break(self.look_ch()) { - self.lookahead(2); + if is_break(self.ch()) { self.read_break(breaks); } else { // Otherwise, we have a content line. Return control. From 6babe8de60907d23374d196bf990dec371f70c82 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Wed, 24 Jan 2024 19:42:18 +0100 Subject: [PATCH 276/380] Doing this leads to worse performance. --- saphyr/src/scanner.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index 1dcc583..f932ca2 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -441,7 +441,6 @@ impl> Scanner { self.mark.line += 1; self.mark.col = 0; } else { - // TODO(ethiraric, 20/12/2023): change to `self.leading_whitespace &= is_blank(c)`? if self.leading_whitespace && !is_blank(c) { self.leading_whitespace = false; } From 26ef839cd3fbccc66b4f7ce0a81cfa053925a9d3 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Wed, 24 Jan 2024 20:20:52 +0100 Subject: [PATCH 277/380] Split `skip` into more specific variants. This function is a hotpath and sometimes removing the conditional jump to detect line breaks saves a bunch of instructions. --- saphyr/src/scanner.rs | 235 ++++++++++++++++++++++-------------------- 1 file changed, 121 insertions(+), 114 deletions(-) diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index f932ca2..c79ee82 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -430,32 +430,58 @@ impl> Scanner { } } - /// Consume the next character, remove it from the buffer and update the mark. + /// Consume the next character. It is assumed the next character is a blank. #[inline] - fn skip(&mut self) { - let c = self.buffer.pop_front().unwrap(); + fn skip_blank(&mut self) { + self.buffer.pop_front(); self.mark.index += 1; - if c == '\n' { - self.leading_whitespace = true; - self.mark.line += 1; - self.mark.col = 0; - } else { - if self.leading_whitespace && !is_blank(c) { - self.leading_whitespace = false; - } - self.mark.col += 1; + self.mark.col += 1; + } + + /// Consume the next character. It is assumed the next character is not a blank. + #[inline] + fn skip_non_blank(&mut self) { + self.buffer.pop_front(); + + self.mark.index += 1; + self.mark.col += 1; + self.leading_whitespace = false; + } + + /// Consume the next characters. It is assumed none of the next characters are blanks. + #[inline] + fn skip_n_non_blank(&mut self, n: usize) { + for _ in 0..n { + self.buffer.pop_front(); } + + self.mark.index += n; + self.mark.col += n; + self.leading_whitespace = false; + } + + /// Consume the next character. It is assumed the next character is a newline. + #[inline] + fn skip_nl(&mut self) { + self.buffer.pop_front(); + + self.mark.index += 1; + self.mark.col = 0; + self.mark.line += 1; + self.leading_whitespace = true; } /// Consume a linebreak (either CR, LF or CRLF), if any. Do nothing if there's none. #[inline] fn skip_line(&mut self) { if self.buffer[0] == '\r' && self.buffer[1] == '\n' { - self.skip(); - self.skip(); + // While technically not a blank, this does not matter as `self.leading_whitespace` + // will be reset by `skip_nl`. + self.skip_blank(); + self.skip_nl(); } else if is_break(self.buffer[0]) { - self.skip(); + self.skip_nl(); } } @@ -477,16 +503,6 @@ impl> Scanner { self.ch() } - /// Consume and return the next character. - /// - /// Equivalent to calling [`Self::ch`] and [`Self::skip`]. - #[inline] - fn ch_skip(&mut self) -> char { - let ret = self.ch(); - self.skip(); - ret - } - /// Return whether the next character is `c`. #[inline] fn ch_is(&self, c: char) -> bool { @@ -517,11 +533,12 @@ impl> Scanner { #[inline] fn read_break(&mut self, s: &mut String) { let c = self.buffer[0]; + let nc = self.buffer[1]; debug_assert!(is_break(c)); - self.skip(); - if c == '\r' && self.buffer[0] == '\n' { - self.skip(); + if c == '\r' && nc == '\n' { + self.skip_blank(); } + self.skip_nl(); s.push('\n'); } @@ -731,7 +748,7 @@ impl> Scanner { loop { // TODO(chenyh) BOM match self.look_ch() { - ' ' => self.skip(), + ' ' => self.skip_blank(), // Tabs may not be used as indentation. // "Indentation" only exists as long as a block is started, but does not exist // inside of flow-style constructs. Tabs are allowed as part of leading @@ -751,7 +768,7 @@ impl> Scanner { )); } } - '\t' => self.skip(), + '\t' => self.skip_blank(), '\n' | '\r' => { self.lookahead(2); self.skip_line(); @@ -760,9 +777,8 @@ impl> Scanner { } } '#' => { - while !is_breakz(self.ch()) { - self.skip(); - self.lookahead(1); + while !is_breakz(self.look_ch()) { + self.skip_non_blank(); } } _ => break, @@ -780,7 +796,7 @@ impl> Scanner { loop { match self.look_ch() { ' ' => { - self.skip(); + self.skip_blank(); need_whitespace = false; } @@ -793,9 +809,8 @@ impl> Scanner { need_whitespace = false; } '#' => { - while !is_breakz(self.ch()) { - self.skip(); - self.lookahead(1); + while !is_breakz(self.look_ch()) { + self.skip_non_blank(); } } _ => break, @@ -817,11 +832,11 @@ impl> Scanner { match self.look_ch() { ' ' => { has_yaml_ws = true; - self.skip(); + self.skip_blank(); } '\t' if skip_tabs != SkipTabs::No => { encountered_tab = true; - self.skip(); + self.skip_blank(); } // YAML comments must be preceded by whitespace. '#' if !encountered_tab && !has_yaml_ws => { @@ -832,7 +847,7 @@ impl> Scanner { } '#' => { while !is_breakz(self.look_ch()) { - self.skip(); + self.skip_non_blank(); } } _ => break, @@ -893,7 +908,7 @@ impl> Scanner { fn scan_directive(&mut self) -> Result { let start_mark = self.mark; - self.skip(); + self.skip_non_blank(); let name = self.scan_directive_name()?; let tok = match name.as_ref() { @@ -902,10 +917,8 @@ impl> Scanner { // XXX This should be a warning instead of an error _ => { // skip current line - self.lookahead(1); - while !is_breakz(self.ch()) { - self.skip(); - self.lookahead(1); + while !is_breakz(self.look_ch()) { + self.skip_non_blank(); } // XXX return an empty TagDirective token Token( @@ -916,7 +929,6 @@ impl> Scanner { // "while scanning a directive, found unknown directive name")) } }; - self.lookahead(1); self.skip_ws_to_eol(SkipTabs::Yes)?; @@ -937,11 +949,8 @@ impl> Scanner { } fn scan_version_directive_value(&mut self, mark: &Marker) -> Result { - self.lookahead(1); - - while is_blank(self.ch()) { - self.skip(); - self.lookahead(1); + while is_blank(self.look_ch()) { + self.skip_blank(); } let major = self.scan_version_directive_number(mark)?; @@ -952,8 +961,7 @@ impl> Scanner { "while scanning a YAML directive, did not find expected digit or '.' character", )); } - - self.skip(); + self.skip_non_blank(); let minor = self.scan_version_directive_number(mark)?; @@ -963,11 +971,9 @@ impl> Scanner { fn scan_directive_name(&mut self) -> Result { let start_mark = self.mark; let mut string = String::new(); - self.lookahead(1); - while is_alpha(self.ch()) { + while is_alpha(self.look_ch()) { string.push(self.ch()); - self.skip(); - self.lookahead(1); + self.skip_non_blank(); } if string.is_empty() { @@ -999,7 +1005,7 @@ impl> Scanner { } length += 1; val = val * 10 + digit; - self.skip(); + self.skip_non_blank(); } if length == 0 { @@ -1013,17 +1019,15 @@ impl> Scanner { } fn scan_tag_directive_value(&mut self, mark: &Marker) -> Result { - self.lookahead(1); /* Eat whitespaces. */ - while is_blank(self.ch()) { - self.skip(); - self.lookahead(1); + while is_blank(self.look_ch()) { + self.skip_blank(); } let handle = self.scan_tag_handle(true, mark)?; /* Eat whitespaces. */ while is_blank(self.look_ch()) { - self.skip(); + self.skip_blank(); } let prefix = self.scan_tag_prefix(mark)?; @@ -1100,15 +1104,18 @@ impl> Scanner { )); } - string.push(self.ch_skip()); + string.push(self.ch()); + self.skip_non_blank(); while is_alpha(self.look_ch()) { - string.push(self.ch_skip()); + string.push(self.ch()); + self.skip_non_blank(); } // Check if the trailing character is '!' and copy it. if self.ch() == '!' { - string.push(self.ch_skip()); + string.push(self.ch()); + self.skip_non_blank(); } else if directive && string != "!" { // It's either the '!' tag or not really a tag handle. If it's a %TAG // directive, it's an error. If it's a tag token, it must be a part of @@ -1131,7 +1138,8 @@ impl> Scanner { if self.look_ch() == '!' { // If we have a local tag, insert and skip `!`. - string.push(self.ch_skip()); + string.push(self.ch()); + self.skip_non_blank(); } else if !is_tag_char(self.ch()) { // Otherwise, check if the first global tag character is valid. return Err(ScanError::new(*start_mark, "invalid global tag character")); @@ -1140,14 +1148,16 @@ impl> Scanner { string.push(self.scan_uri_escapes(start_mark)?); } else { // Otherwise, push the first character. - string.push(self.ch_skip()); + string.push(self.ch()); + self.skip_non_blank(); } while is_uri_char(self.look_ch()) { if self.ch() == '%' { string.push(self.scan_uri_escapes(start_mark)?); } else { - string.push(self.ch_skip()); + string.push(self.ch()); + self.skip_non_blank(); } } @@ -1159,15 +1169,16 @@ impl> Scanner { /// The prefixing `!<` must _not_ have been skipped. fn scan_verbatim_tag(&mut self, start_mark: &Marker) -> Result { // Eat `!<` - self.skip(); - self.skip(); + self.skip_non_blank(); + self.skip_non_blank(); let mut string = String::new(); while is_uri_char(self.look_ch()) { if self.ch() == '%' { string.push(self.scan_uri_escapes(start_mark)?); } else { - string.push(self.ch_skip()); + string.push(self.ch()); + self.skip_non_blank(); } } @@ -1177,7 +1188,7 @@ impl> Scanner { "while scanning a verbatim tag, did not find the expected '>'", )); } - self.skip(); + self.skip_non_blank(); Ok(string) } @@ -1204,7 +1215,7 @@ impl> Scanner { string.push(self.scan_uri_escapes(mark)?); } else { string.push(self.ch()); - self.skip(); + self.skip_non_blank(); } length += 1; @@ -1258,9 +1269,7 @@ impl> Scanner { code = (code << 8) + octet; } - self.skip(); - self.skip(); - self.skip(); + self.skip_n_non_blank(3); width -= 1; if width == 0 { @@ -1292,10 +1301,10 @@ impl> Scanner { let mut string = String::new(); let start_mark = self.mark; - self.skip(); + self.skip_non_blank(); while is_anchor_char(self.look_ch()) { string.push(self.ch()); - self.skip(); + self.skip_non_blank(); } if string.is_empty() { @@ -1319,7 +1328,7 @@ impl> Scanner { self.allow_simple_key(); let start_mark = self.mark; - self.skip(); + self.skip_non_blank(); if tok == TokenType::FlowMappingStart { self.flow_mapping_started = true; @@ -1340,7 +1349,7 @@ impl> Scanner { self.end_implicit_mapping(self.mark); let start_mark = self.mark; - self.skip(); + self.skip_non_blank(); self.skip_ws_to_eol(SkipTabs::Yes)?; // A flow collection within a flow mapping can be a key. In that case, the value may be @@ -1364,7 +1373,7 @@ impl> Scanner { self.end_implicit_mapping(self.mark); let start_mark = self.mark; - self.skip(); + self.skip_non_blank(); self.skip_ws_to_eol(SkipTabs::Yes)?; self.tokens @@ -1418,7 +1427,7 @@ impl> Scanner { // Skip over the `-`. let mark = self.mark; - self.skip(); + self.skip_non_blank(); // generate BLOCK-SEQUENCE-START if indented self.roll_indent(mark.col, None, TokenType::BlockSequenceStart, mark); @@ -1452,9 +1461,7 @@ impl> Scanner { let mark = self.mark; - self.skip(); - self.skip(); - self.skip(); + self.skip_n_non_blank(3); self.tokens.push_back(Token(mark, t)); Ok(()) @@ -1489,7 +1496,7 @@ impl> Scanner { let mut chomping_break = String::new(); // skip '|' or '>' - self.skip(); + self.skip_non_blank(); self.unroll_non_block_indents(); if self.look_ch() == '+' || self.ch() == '-' { @@ -1498,7 +1505,7 @@ impl> Scanner { } else { chomping = Chomping::Strip; } - self.skip(); + self.skip_non_blank(); if is_digit(self.look_ch()) { if self.ch() == '0' { return Err(ScanError::new( @@ -1507,7 +1514,7 @@ impl> Scanner { )); } increment = (self.ch() as usize) - ('0' as usize); - self.skip(); + self.skip_non_blank(); } } else if is_digit(self.ch()) { if self.ch() == '0' { @@ -1518,7 +1525,7 @@ impl> Scanner { } increment = (self.ch() as usize) - ('0' as usize); - self.skip(); + self.skip_non_blank(); self.lookahead(1); if self.ch() == '+' || self.ch() == '-' { if self.ch() == '+' { @@ -1526,7 +1533,7 @@ impl> Scanner { } else { chomping = Chomping::Strip; } - self.skip(); + self.skip_non_blank(); } } @@ -1622,10 +1629,14 @@ impl> Scanner { leading_blank = is_blank(self.ch()); - while !is_breakz(self.ch()) { + while !is_breakz(self.look_ch()) { string.push(self.ch()); - self.skip(); - self.lookahead(1); + // We may technically skip non-blank characters. However, the only distinction is + // to determine what is leading whitespace and what is not. Here, we read the + // contents of the line until either eof or a linebreak. We know we will not read + // `self.leading_whitespace` until the end of the line, where it will be reset. + // This allows us to call a slightly less expensive function. + self.skip_blank(); } // break on EOF if is_z(self.ch()) { @@ -1663,7 +1674,7 @@ impl> Scanner { self.lookahead(indent + 2); // Consume all spaces. Tabs cannot be used as indentation. while self.mark.col < indent && self.ch() == ' ' { - self.skip(); + self.skip_blank(); } // If our current line is empty, skip over the break and continue looping. @@ -1685,14 +1696,14 @@ impl> Scanner { loop { // Consume all spaces. Tabs cannot be used as indentation. while self.look_ch() == ' ' { - self.skip(); + self.skip_blank(); } if self.mark.col > max_indent { max_indent = self.mark.col; } - if is_break(self.look_ch()) { + if is_break(self.ch()) { // If our current line is empty, skip over the break and continue looping. self.lookahead(2); self.read_break(breaks); @@ -1742,7 +1753,7 @@ impl> Scanner { let mut leading_blanks; /* Eat the left quote. */ - self.skip(); + self.skip_non_blank(); loop { /* Check for a document indicator. */ @@ -1800,10 +1811,10 @@ impl> Scanner { "tab cannot be used as indentation", )); } - self.skip(); + self.skip_blank(); } else { whitespaces.push(self.ch()); - self.skip(); + self.skip_blank(); } } else { self.lookahead(2); @@ -1842,7 +1853,7 @@ impl> Scanner { } // loop // Eat the right quote. - self.skip(); + self.skip_non_blank(); // Ensure there is no invalid trailing content. self.skip_ws_to_eol(SkipTabs::Yes)?; match self.ch() { @@ -1892,8 +1903,7 @@ impl> Scanner { // Check for an escaped single quote. '\'' if self.buffer[1] == '\'' && single => { string.push('\''); - self.skip(); - self.skip(); + self.skip_n_non_blank(2); } // Check for the right quote. '\'' if single => break, @@ -1901,7 +1911,7 @@ impl> Scanner { // Check for an escaped line break. '\\' if !single && is_break(self.buffer[1]) => { self.lookahead(3); - self.skip(); + self.skip_non_blank(); self.skip_line(); *leading_blanks = true; break; @@ -1912,7 +1922,7 @@ impl> Scanner { } c => { string.push(c); - self.skip(); + self.skip_non_blank(); } } self.lookahead(2); @@ -1965,8 +1975,7 @@ impl> Scanner { )) } } - self.skip(); - self.skip(); + self.skip_n_non_blank(2); // Consume an arbitrary escape code. if code_length > 0 { @@ -1990,9 +1999,7 @@ impl> Scanner { }; ret = ch; - for _ in 0..code_length { - self.skip(); - } + self.skip_n_non_blank(code_length); } Ok(ret) } @@ -2086,7 +2093,7 @@ impl> Scanner { } string.push(self.ch()); - self.skip(); + self.skip_non_blank(); self.lookahead(2); } // is the end? @@ -2112,7 +2119,7 @@ impl> Scanner { if !leading_blanks { whitespaces.push(self.ch()); } - self.skip(); + self.skip_blank(); } else { self.lookahead(2); // Check if it is a first line break @@ -2171,7 +2178,7 @@ impl> Scanner { self.disallow_simple_key(); } - self.skip(); + self.skip_non_blank(); self.skip_yaml_whitespace()?; if self.ch() == '\t' { return Err(ScanError::new( @@ -2190,7 +2197,7 @@ impl> Scanner { self.implicit_flow_mapping = self.flow_level > 0 && !self.flow_mapping_started; // Skip over ':'. - self.skip(); + self.skip_non_blank(); if self.look_ch() == '\t' && !self.skip_ws_to_eol(SkipTabs::Yes)?.has_valid_yaml_ws() && (self.ch() == '-' || is_alpha(self.ch())) From cfbf287b3df89693a860ea7e2e7e472ecf1c4be3 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Wed, 24 Jan 2024 21:45:18 +0100 Subject: [PATCH 278/380] Buffer block scalar lines. Instead of doing a loop that goes: * fetch from input stream * push char into string Make a loop that fetches characters while they're not a breakz and _then_ extend the string. This avoids a bunch of reallocations. --- saphyr/src/scanner.rs | 41 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 36 insertions(+), 5 deletions(-) diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index c79ee82..900b243 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -503,6 +503,17 @@ impl> Scanner { self.ch() } + /// Read a character from the input stream, place it in the buffer and return it. + /// + /// No character is consumed. The character returned is the one at the back of the buffer (the + /// one we just read from the input stream). + #[inline] + fn read_ch(&mut self) -> char { + let c = self.rdr.next().unwrap_or('\0'); + self.buffer.push_back(c); + c + } + /// Return whether the next character is `c`. #[inline] fn ch_is(&self, c: char) -> bool { @@ -1612,24 +1623,25 @@ impl> Scanner { } } - // We are at the beginning of a non-empty line. + // We are at the first content character of a content line. trailing_blank = is_blank(self.ch()); if !literal && !leading_break.is_empty() && !leading_blank && !trailing_blank { + string.push_str(&trailing_breaks); if trailing_breaks.is_empty() { string.push(' '); } - leading_break.clear(); } else { string.push_str(&leading_break); - leading_break.clear(); + string.push_str(&trailing_breaks); } - string.push_str(&trailing_breaks); + leading_break.clear(); trailing_breaks.clear(); leading_blank = is_blank(self.ch()); - while !is_breakz(self.look_ch()) { + // Start by evaluating characters in the buffer. + while !self.buffer.is_empty() && !is_breakz(self.ch()) { string.push(self.ch()); // We may technically skip non-blank characters. However, the only distinction is // to determine what is leading whitespace and what is not. Here, we read the @@ -1638,6 +1650,25 @@ impl> Scanner { // This allows us to call a slightly less expensive function. self.skip_blank(); } + + // All characters that were in the buffer were consumed. We need to check if more + // follow. + if self.buffer.is_empty() { + // We will read all consecutive non-breakz characters into `self.buffer` before + // pushing them all in `string` instead of moving them one by one. + while !is_breakz(self.read_ch()) {} + // The last character from the buffer is a breakz. We must not insert it. + let last_char = self.buffer.pop_back().unwrap(); + // We need to manually update our position; we won't call a `skip` function. + self.mark.col += self.buffer.len(); + self.mark.index += self.buffer.len(); + string.reserve(self.buffer.len()); + string.extend(self.buffer.iter()); + // Put back our breakz character, we didn't consume this one. + self.buffer.clear(); + self.buffer.push_back(last_char); + } + // break on EOF if is_z(self.ch()) { break; From f535e505a71ab66c0688ab73407e5be5d3d7ab6a Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Wed, 24 Jan 2024 23:02:02 +0100 Subject: [PATCH 279/380] Avoid a trip to `self.buffer`. `self.buffer` is a `VecDeque`, meaning that characters are stored on 4B. When reading as we used to do, this means that every 1 byte character we read was turned into 4 bytes, which was turned into 1 byte in `String::extend`. Instead of going through `self.buffer`, use a local `String` to store the characters before pushing them to `string`. --- saphyr/src/scanner.rs | 94 +++++++++++++++++++++++++++---------------- 1 file changed, 59 insertions(+), 35 deletions(-) diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index 900b243..4b46cb6 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -503,15 +503,13 @@ impl> Scanner { self.ch() } - /// Read a character from the input stream, place it in the buffer and return it. + /// Read a character from the input stream, returning it directly. /// - /// No character is consumed. The character returned is the one at the back of the buffer (the - /// one we just read from the input stream). + /// The buffer is bypassed and `self.mark` would need to be updated manually. #[inline] - fn read_ch(&mut self) -> char { - let c = self.rdr.next().unwrap_or('\0'); - self.buffer.push_back(c); - c + #[must_use] + fn raw_read_ch(&mut self) -> char { + self.rdr.next().unwrap_or('\0') } /// Return whether the next character is `c`. @@ -1614,6 +1612,7 @@ impl> Scanner { )); } + let mut line_buffer = String::with_capacity(100); let start_mark = self.mark; while self.mark.col == indent && !is_z(self.ch()) { if indent == 0 { @@ -1640,34 +1639,7 @@ impl> Scanner { leading_blank = is_blank(self.ch()); - // Start by evaluating characters in the buffer. - while !self.buffer.is_empty() && !is_breakz(self.ch()) { - string.push(self.ch()); - // We may technically skip non-blank characters. However, the only distinction is - // to determine what is leading whitespace and what is not. Here, we read the - // contents of the line until either eof or a linebreak. We know we will not read - // `self.leading_whitespace` until the end of the line, where it will be reset. - // This allows us to call a slightly less expensive function. - self.skip_blank(); - } - - // All characters that were in the buffer were consumed. We need to check if more - // follow. - if self.buffer.is_empty() { - // We will read all consecutive non-breakz characters into `self.buffer` before - // pushing them all in `string` instead of moving them one by one. - while !is_breakz(self.read_ch()) {} - // The last character from the buffer is a breakz. We must not insert it. - let last_char = self.buffer.pop_back().unwrap(); - // We need to manually update our position; we won't call a `skip` function. - self.mark.col += self.buffer.len(); - self.mark.index += self.buffer.len(); - string.reserve(self.buffer.len()); - string.extend(self.buffer.iter()); - // Put back our breakz character, we didn't consume this one. - self.buffer.clear(); - self.buffer.push_back(last_char); - } + self.scan_block_scalar_content_line(&mut string, &mut line_buffer); // break on EOF if is_z(self.ch()) { @@ -1699,6 +1671,58 @@ impl> Scanner { Ok(Token(start_mark, TokenType::Scalar(style, string))) } + /// Retrieve the contents of the line, parsing it as a block scalar. + /// + /// The contents will be appended to `string`. `line_buffer` is used as a temporary buffer to + /// store bytes before pushing them to `string` and thus avoiding reallocating more than + /// necessary. `line_buffer` is assumed to be empty upon calling this function. It will be + /// `clear`ed before the end of the function. + /// + /// This function assumed the first character to read is the first content character in the + /// line. This function does not consume the line break character(s) after the line. + fn scan_block_scalar_content_line(&mut self, string: &mut String, line_buffer: &mut String) { + // Start by evaluating characters in the buffer. + while !self.buffer.is_empty() && !is_breakz(self.ch()) { + string.push(self.ch()); + // We may technically skip non-blank characters. However, the only distinction is + // to determine what is leading whitespace and what is not. Here, we read the + // contents of the line until either eof or a linebreak. We know we will not read + // `self.leading_whitespace` until the end of the line, where it will be reset. + // This allows us to call a slightly less expensive function. + self.skip_blank(); + } + + // All characters that were in the buffer were consumed. We need to check if more + // follow. + if self.buffer.is_empty() { + // We will read all consecutive non-breakz characters. We push them into a + // temporary buffer. The main difference with going through `self.buffer` is that + // characters are appended here as their real size (1B for ascii, or up to 4 bytes for + // UTF-8). We can then use the internal `line_buffer` `Vec` to push data into `string` + // (using `String::push_str`). + let mut c = self.raw_read_ch(); + while !is_breakz(c) { + line_buffer.push(c); + c = self.raw_read_ch(); + } + + // Our last character read is stored in `c`. It is either an EOF or a break. In any + // case, we need to push it back into `self.buffer` so it may be properly read + // after. We must not insert it in `string`. + self.buffer.push_back(c); + + // We need to manually update our position; we haven't called a `skip` function. + self.mark.col += line_buffer.len(); + self.mark.index += line_buffer.len(); + + // We can now append our bytes to our `string`. + string.reserve(line_buffer.as_bytes().len()); + string.push_str(line_buffer); + // This clears the _contents_ without touching the _capacity_. + line_buffer.clear(); + } + } + /// Skip the block scalar indentation and empty lines. fn skip_block_scalar_indent(&mut self, indent: usize, breaks: &mut String) { loop { From 99fb05c9375e8383083b4a8c59d3420cdbabf4ac Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Thu, 25 Jan 2024 03:06:18 +0100 Subject: [PATCH 280/380] Improve `scan_plain_scalar` readability. Take whitespace checking out of the innermost loop for performance. --- saphyr/src/char_traits.rs | 2 +- saphyr/src/scanner.rs | 151 ++++++++++++++++++++++---------------- 2 files changed, 89 insertions(+), 64 deletions(-) diff --git a/saphyr/src/char_traits.rs b/saphyr/src/char_traits.rs index b95318c..4a08da1 100644 --- a/saphyr/src/char_traits.rs +++ b/saphyr/src/char_traits.rs @@ -28,7 +28,7 @@ pub(crate) fn is_blank(c: char) -> bool { /// /// `\0`, ` `, `\t`, `\n`, `\r` #[inline] -pub(crate) fn is_blankz(c: char) -> bool { +pub(crate) fn is_blank_or_breakz(c: char) -> bool { is_blank(c) || is_breakz(c) } diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index 4b46cb6..b30cc9d 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -4,8 +4,8 @@ use std::{char, collections::VecDeque, error::Error, fmt}; use crate::char_traits::{ - as_hex, is_alpha, is_anchor_char, is_blank, is_blankz, is_break, is_breakz, is_digit, is_flow, - is_hex, is_tag_char, is_uri_char, is_z, + as_hex, is_alpha, is_anchor_char, is_blank, is_blank_or_breakz, is_break, is_breakz, is_digit, + is_flow, is_hex, is_tag_char, is_uri_char, is_z, }; #[derive(Clone, Copy, PartialEq, Debug, Eq)] @@ -505,7 +505,7 @@ impl> Scanner { /// Read a character from the input stream, returning it directly. /// - /// The buffer is bypassed and `self.mark` would need to be updated manually. + /// The buffer is bypassed and `self.mark` needs to be updated manually. #[inline] #[must_use] fn raw_read_ch(&mut self) -> char { @@ -559,7 +559,7 @@ impl> Scanner { self.buffer[0] == '.' && self.buffer[1] == '.' && self.buffer[2] == '.' - && is_blankz(self.buffer[3]) + && is_blank_or_breakz(self.buffer[3]) } /// Insert a token at the given position. @@ -614,7 +614,7 @@ impl> Scanner { && self.buffer[0] == '-' && self.buffer[1] == '-' && self.buffer[2] == '-' - && is_blankz(self.buffer[3]) + && is_blank_or_breakz(self.buffer[3]) { self.fetch_document_indicator(TokenType::DocumentStart)?; return Ok(()); @@ -624,7 +624,7 @@ impl> Scanner { && self.buffer[0] == '.' && self.buffer[1] == '.' && self.buffer[2] == '.' - && is_blankz(self.buffer[3]) + && is_blank_or_breakz(self.buffer[3]) { self.fetch_document_indicator(TokenType::DocumentEnd)?; self.skip_ws_to_eol(SkipTabs::Yes)?; @@ -649,9 +649,9 @@ impl> Scanner { ']' => self.fetch_flow_collection_end(TokenType::FlowSequenceEnd), '}' => self.fetch_flow_collection_end(TokenType::FlowMappingEnd), ',' => self.fetch_flow_entry(), - '-' if is_blankz(nc) => self.fetch_block_entry(), - '?' if is_blankz(nc) => self.fetch_key(), - ':' if is_blankz(nc) + '-' if is_blank_or_breakz(nc) => self.fetch_block_entry(), + '?' if is_blank_or_breakz(nc) => self.fetch_key(), + ':' if is_blank_or_breakz(nc) || (self.flow_level > 0 && (is_flow(nc) || self.mark.index == self.adjacent_value_allowed_at)) => { @@ -669,8 +669,10 @@ impl> Scanner { '\'' => self.fetch_flow_scalar(true), '"' => self.fetch_flow_scalar(false), // plain scalar - '-' if !is_blankz(nc) => self.fetch_plain_scalar(), - ':' | '?' if !is_blankz(nc) && self.flow_level == 0 => self.fetch_plain_scalar(), + '-' if !is_blank_or_breakz(nc) => self.fetch_plain_scalar(), + ':' | '?' if !is_blank_or_breakz(nc) && self.flow_level == 0 => { + self.fetch_plain_scalar() + } '%' | '@' | '`' => Err(ScanError::new( self.mark, &format!("unexpected character: `{c}'"), @@ -992,7 +994,7 @@ impl> Scanner { )); } - if !is_blankz(self.ch()) { + if !is_blank_or_breakz(self.ch()) { return Err(ScanError::new( start_mark, "while scanning a directive, found unexpected non-alphabetical character", @@ -1043,7 +1045,7 @@ impl> Scanner { self.lookahead(1); - if is_blankz(self.ch()) { + if is_blank_or_breakz(self.ch()) { Ok(Token(*mark, TokenType::TagDirective(handle, prefix))) } else { Err(ScanError::new( @@ -1093,7 +1095,7 @@ impl> Scanner { } } - if is_blankz(self.look_ch()) || (self.flow_level > 0 && is_flow(self.ch())) { + if is_blank_or_breakz(self.look_ch()) || (self.flow_level > 0 && is_flow(self.ch())) { // XXX: ex 7.2, an empty scalar can follow a secondary tag Ok(Token(start_mark, TokenType::Tag(handle, suffix))) } else { @@ -1442,7 +1444,7 @@ impl> Scanner { self.roll_indent(mark.col, None, TokenType::BlockSequenceStart, mark); let found_tabs = self.skip_ws_to_eol(SkipTabs::Yes)?.found_tabs(); self.lookahead(2); - if found_tabs && self.buffer[0] == '-' && is_blankz(self.buffer[1]) { + if found_tabs && self.buffer[0] == '-' && is_blank_or_breakz(self.buffer[1]) { return Err(ScanError::new( self.mark, "'-' must be followed by a valid YAML whitespace", @@ -1819,7 +1821,7 @@ impl> Scanner { || ((self.buffer[0] == '.') && (self.buffer[1] == '.') && (self.buffer[2] == '.'))) - && is_blankz(self.buffer[3]) + && is_blank_or_breakz(self.buffer[3]) { return Err(ScanError::new( start_mark, @@ -1953,7 +1955,7 @@ impl> Scanner { start_mark: &Marker, ) -> Result<(), ScanError> { self.lookahead(2); - while !is_blankz(self.ch()) { + while !is_blank_or_breakz(self.ch()) { match self.ch() { // Check for an escaped single quote. '\'' if self.buffer[1] == '\'' && single => { @@ -2069,6 +2071,10 @@ impl> Scanner { Ok(()) } + /// Scan for a plain scalar. + /// + /// Plain scalars are the most readable but restricted style. They may span multiple lines in + /// some contexts. #[allow(clippy::too_many_lines)] fn scan_plain_scalar(&mut self) -> Result { self.unroll_non_block_indents(); @@ -2086,7 +2092,6 @@ impl> Scanner { let mut leading_break = String::new(); let mut trailing_breaks = String::new(); let mut whitespaces = String::new(); - let mut leading_blanks = true; loop { /* Check for a document indicator. */ @@ -2096,7 +2101,7 @@ impl> Scanner { || ((self.buffer[0] == '.') && (self.buffer[1] == '.') && (self.buffer[2] == '.'))) - && is_blankz(self.buffer[3]) + && is_blank_or_breakz(self.buffer[3]) { break; } @@ -2112,78 +2117,79 @@ impl> Scanner { )); } - while !is_blankz(self.ch()) { - // indicators can end a plain scalar, see 7.3.3. Plain Style - match self.ch() { - ':' if is_blankz(self.buffer[1]) - || (self.flow_level > 0 && is_flow(self.buffer[1])) => - { - break; - } - c if is_flow(c) && self.flow_level > 0 => break, - _ => {} - } - - if leading_blanks || !whitespaces.is_empty() { - if leading_blanks { - if leading_break.is_empty() { - string.push_str(&leading_break); + if !is_blank_or_breakz(self.ch()) + && self.next_can_be_plain_scalar() + && (self.leading_whitespace || !whitespaces.is_empty()) + { + if self.leading_whitespace { + if leading_break.is_empty() { + string.push_str(&leading_break); + string.push_str(&trailing_breaks); + trailing_breaks.clear(); + leading_break.clear(); + } else { + if trailing_breaks.is_empty() { + string.push(' '); + } else { string.push_str(&trailing_breaks); trailing_breaks.clear(); - leading_break.clear(); - } else { - if trailing_breaks.is_empty() { - string.push(' '); - } else { - string.push_str(&trailing_breaks); - trailing_breaks.clear(); - } - leading_break.clear(); } - leading_blanks = false; - } else { - string.push_str(&whitespaces); - whitespaces.clear(); + leading_break.clear(); } + self.leading_whitespace = false; + } else { + string.push_str(&whitespaces); + whitespaces.clear(); + } + } + + // Add content non-blank characters to the scalar. + while !is_blank_or_breakz(self.ch()) { + if !self.next_can_be_plain_scalar() { + break; } string.push(self.ch()); self.skip_non_blank(); self.lookahead(2); } - // is the end? + + // We may reach the end of a plain scalar if: + // - We reach eof + // - We reach ": " + // - We find a flow character in a flow context if !(is_blank(self.ch()) || is_break(self.ch())) { break; } + // Process blank characters. while is_blank(self.look_ch()) || is_break(self.ch()) { if is_blank(self.ch()) { - if leading_blanks && (self.mark.col as isize) < indent && self.ch() == '\t' { - // If our line contains only whitespace, this is not an error. - // Skip over it. + if !self.leading_whitespace { + whitespaces.push(self.ch()); + self.skip_blank(); + } else if (self.mark.col as isize) < indent && self.ch() == '\t' { + // Tabs in an indentation columns are allowed if and only if the line is + // empty. Skip to the end of the line. self.skip_ws_to_eol(SkipTabs::Yes)?; - if is_breakz(self.ch()) { - continue; - } + if !is_breakz(self.ch()) { return Err(ScanError::new( start_mark, "while scanning a plain scalar, found a tab", )); + } + } else { + self.skip_blank(); } - - if !leading_blanks { - whitespaces.push(self.ch()); - } - self.skip_blank(); } else { self.lookahead(2); // Check if it is a first line break - if leading_blanks { + if self.leading_whitespace { self.read_break(&mut trailing_breaks); } else { whitespaces.clear(); self.read_break(&mut leading_break); - leading_blanks = true; + self.leading_whitespace = true; } } } @@ -2194,7 +2200,7 @@ impl> Scanner { } } - if leading_blanks { + if self.leading_whitespace { self.allow_simple_key(); } @@ -2432,6 +2438,25 @@ impl> Scanner { Ok(()) } + /// Check whether the next characters may be part of a plain scalar. + /// + /// This function assumes we are not given a blankz character. + // For some reason, `#[inline]` is not enough. + #[allow(clippy::inline_always)] + #[inline(always)] + fn next_can_be_plain_scalar(&self) -> bool { + match self.ch() { + // indicators can end a plain scalar, see 7.3.3. Plain Style + ':' if is_blank_or_breakz(self.buffer[1]) + || (self.flow_level > 0 && is_flow(self.buffer[1])) => + { + false + } + c if self.flow_level > 0 && is_flow(c) => false, + _ => true, + } + } + /// Return whether the scanner is inside a block but outside of a flow sequence. fn is_within_block(&self) -> bool { !self.indents.is_empty() From 1dcb1eee34fa23e430f93058ba1916c1b16341b3 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Tue, 30 Jan 2024 22:37:32 +0100 Subject: [PATCH 281/380] Rename generator tool. --- saphyr/examples/{gen_large_yaml_array => gen_large_yaml}/gen.rs | 0 saphyr/examples/{gen_large_yaml_array => gen_large_yaml}/main.rs | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename saphyr/examples/{gen_large_yaml_array => gen_large_yaml}/gen.rs (100%) rename saphyr/examples/{gen_large_yaml_array => gen_large_yaml}/main.rs (100%) diff --git a/saphyr/examples/gen_large_yaml_array/gen.rs b/saphyr/examples/gen_large_yaml/gen.rs similarity index 100% rename from saphyr/examples/gen_large_yaml_array/gen.rs rename to saphyr/examples/gen_large_yaml/gen.rs diff --git a/saphyr/examples/gen_large_yaml_array/main.rs b/saphyr/examples/gen_large_yaml/main.rs similarity index 100% rename from saphyr/examples/gen_large_yaml_array/main.rs rename to saphyr/examples/gen_large_yaml/main.rs From 5d94b8174d46024bcafed9d1b6ea27d541949ea9 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Tue, 30 Jan 2024 23:11:00 +0100 Subject: [PATCH 282/380] Add big string array bench generation. --- saphyr/examples/gen_large_yaml/gen.rs | 6 ++++++ saphyr/examples/gen_large_yaml/main.rs | 18 +++++++++++++++++- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/saphyr/examples/gen_large_yaml/gen.rs b/saphyr/examples/gen_large_yaml/gen.rs index b687371..2a7dffe 100644 --- a/saphyr/examples/gen_large_yaml/gen.rs +++ b/saphyr/examples/gen_large_yaml/gen.rs @@ -120,6 +120,12 @@ pub fn name(rng: &mut ThreadRng, len_lo: usize, len_hi: usize) -> String { ret } +/// Generate a set of words. +pub fn words(rng: &mut ThreadRng, words_lo: usize, words_hi: usize) -> String { + let nwords = rng.gen_range(words_lo..words_hi); + lipsum::lipsum_words_with_rng(rng.clone(), nwords).replace(|c| "-\'\",*:".contains(c), "") +} + /// Generate a lipsum text. /// /// Texts are composed of some paragraphs and empty lines between them. diff --git a/saphyr/examples/gen_large_yaml/main.rs b/saphyr/examples/gen_large_yaml/main.rs index 3d21c48..15d0c31 100644 --- a/saphyr/examples/gen_large_yaml/main.rs +++ b/saphyr/examples/gen_large_yaml/main.rs @@ -9,7 +9,7 @@ use rand::{rngs::ThreadRng, Rng}; fn main() -> std::fmt::Result { let mut s = String::new(); let mut g = Generator::new(); - g.gen_record_array(&mut s, 100_000, 100_001)?; + g.gen_strings_array(&mut s, 1_300_000, 1_300_001, 10, 40)?; println!("{s}"); Ok(()) } @@ -43,9 +43,25 @@ impl Generator { self.gen_array(writer, items_lo, items_hi, Generator::gen_record_object) } + /// Generate an array of strings. + fn gen_strings_array( + &mut self, + writer: &mut W, + items_lo: usize, + items_hi: usize, + words_lo: usize, + words_hi: usize, + ) -> std::fmt::Result { + self.gen_array(writer, items_lo, items_hi, |gen, writer| { + write!(writer, "{}", gen::words(&mut gen.rng, words_lo, words_hi)) + }) + } + /// Generate a YAML object/mapping containing a record. /// /// Fields are description, hash, version, home, repository and pdf. + /// The `description` field is a long string and puts a lot of weight in plain scalar / block + /// scalar parsing. fn gen_record_object(&mut self, writer: &mut W) -> std::fmt::Result { let mut fields = HashMap::>>::new(); fields.insert( From 671a498bf49b2d65ba3c1848cadfc450112c6073 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Wed, 31 Jan 2024 22:02:53 +0100 Subject: [PATCH 283/380] Minor improvements. --- saphyr/src/scanner.rs | 72 ++++++++++++++++++++++--------------------- 1 file changed, 37 insertions(+), 35 deletions(-) diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index b30cc9d..15cda9c 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -452,9 +452,7 @@ impl> Scanner { /// Consume the next characters. It is assumed none of the next characters are blanks. #[inline] fn skip_n_non_blank(&mut self, n: usize) { - for _ in 0..n { - self.buffer.pop_front(); - } + self.buffer.drain(0..n); self.mark.index += n; self.mark.col += n; @@ -556,12 +554,25 @@ impl> Scanner { /// /// [`Self::lookahead`] must have been called before calling this function. fn next_is_document_end(&self) -> bool { + assert!(self.buffer.len() >= 4); self.buffer[0] == '.' && self.buffer[1] == '.' && self.buffer[2] == '.' && is_blank_or_breakz(self.buffer[3]) } + /// Check whether the next characters correspond to a document indicator. + /// + /// [`Self::lookahead`] must have been called before calling this function. + #[inline] + fn next_is_document_indicator(&self) -> bool { + assert!(self.buffer.len() >= 4); + self.mark.col == 0 + && (((self.buffer[0] == '-') && (self.buffer[1] == '-') && (self.buffer[2] == '-')) + || ((self.buffer[0] == '.') && (self.buffer[1] == '.') && (self.buffer[2] == '.'))) + && is_blank_or_breakz(self.buffer[3]) + } + /// Insert a token at the given position. fn insert_token(&mut self, pos: usize, tok: Token) { let old_len = self.tokens.len(); @@ -2088,25 +2099,14 @@ impl> Scanner { )); } - let mut string = String::new(); - let mut leading_break = String::new(); - let mut trailing_breaks = String::new(); - let mut whitespaces = String::new(); + let mut string = String::with_capacity(32); + let mut leading_break = String::with_capacity(32); + let mut trailing_breaks = String::with_capacity(32); + let mut whitespaces = String::with_capacity(32); loop { - /* Check for a document indicator. */ self.lookahead(4); - if self.mark.col == 0 - && (((self.buffer[0] == '-') && (self.buffer[1] == '-') && (self.buffer[2] == '-')) - || ((self.buffer[0] == '.') - && (self.buffer[1] == '.') - && (self.buffer[2] == '.'))) - && is_blank_or_breakz(self.buffer[3]) - { - break; - } - - if self.ch() == '#' { + if self.next_is_document_indicator() || self.ch() == '#' { break; } @@ -2117,10 +2117,7 @@ impl> Scanner { )); } - if !is_blank_or_breakz(self.ch()) - && self.next_can_be_plain_scalar() - && (self.leading_whitespace || !whitespaces.is_empty()) - { + if !is_blank_or_breakz(self.ch()) && self.next_can_be_plain_scalar() { if self.leading_whitespace { if leading_break.is_empty() { string.push_str(&leading_break); @@ -2137,21 +2134,26 @@ impl> Scanner { leading_break.clear(); } self.leading_whitespace = false; - } else { + } else if !whitespaces.is_empty() { string.push_str(&whitespaces); whitespaces.clear(); } - } - - // Add content non-blank characters to the scalar. - while !is_blank_or_breakz(self.ch()) { - if !self.next_can_be_plain_scalar() { - break; - } + // We can unroll the first iteration of the loop. string.push(self.ch()); self.skip_non_blank(); self.lookahead(2); + + // Add content non-blank characters to the scalar. + while !is_blank_or_breakz(self.ch()) { + if !self.next_can_be_plain_scalar() { + break; + } + + string.push(self.ch()); + self.skip_non_blank(); + self.lookahead(2); + } } // We may reach the end of a plain scalar if: @@ -2173,10 +2175,10 @@ impl> Scanner { // empty. Skip to the end of the line. self.skip_ws_to_eol(SkipTabs::Yes)?; if !is_breakz(self.ch()) { - return Err(ScanError::new( - start_mark, - "while scanning a plain scalar, found a tab", - )); + return Err(ScanError::new( + start_mark, + "while scanning a plain scalar, found a tab", + )); } } else { self.skip_blank(); From 7b431c77d47a0fbaaf2f8706050537fb3a078a9c Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Thu, 8 Feb 2024 07:04:38 +0100 Subject: [PATCH 284/380] Improve comments. --- saphyr/examples/gen_large_yaml/main.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/saphyr/examples/gen_large_yaml/main.rs b/saphyr/examples/gen_large_yaml/main.rs index 15d0c31..4ba7ad9 100644 --- a/saphyr/examples/gen_large_yaml/main.rs +++ b/saphyr/examples/gen_large_yaml/main.rs @@ -43,7 +43,7 @@ impl Generator { self.gen_array(writer, items_lo, items_hi, Generator::gen_record_object) } - /// Generate an array of strings. + /// Generate an array of lipsum one-liners. fn gen_strings_array( &mut self, writer: &mut W, @@ -128,6 +128,7 @@ impl Generator { self.gen_array(writer, items_lo, items_hi, Generator::gen_author_object) } + /// Generate a small object with 2 string fields. fn gen_author_object(&mut self, writer: &mut W) -> std::fmt::Result { let mut fields = HashMap::>>::new(); fields.insert( From 8a80e547c65520d03842bcfd0ae76d6386b5ec9d Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Tue, 13 Feb 2024 23:10:17 +0100 Subject: [PATCH 285/380] Fix some clippy lints. --- saphyr/src/scanner.rs | 4 ++-- saphyr/tests/basic.rs | 20 ++++++++++---------- saphyr/tests/emitter.rs | 28 ++++++++++++++-------------- saphyr/tests/yaml-test-suite.rs | 2 +- 4 files changed, 27 insertions(+), 27 deletions(-) diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index 15cda9c..eb16a44 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -414,7 +414,7 @@ impl> Scanner { /// clone of) the same error. #[inline] pub fn get_error(&self) -> Option { - self.error.as_ref().map(std::clone::Clone::clone) + self.error.clone() } /// Fill `self.buffer` with at least `count` characters. @@ -746,7 +746,7 @@ impl> Scanner { /// # Errors /// This function returns an error if one of the key we would stale was required to be a key. fn stale_simple_keys(&mut self) -> ScanResult { - for (_, sk) in self.simple_keys.iter_mut().enumerate() { + for sk in &mut self.simple_keys { if sk.possible // If not in a flow construct, simple keys cannot span multiple lines. && self.flow_level == 0 diff --git a/saphyr/tests/basic.rs b/saphyr/tests/basic.rs index b82ac2d..e516cd6 100644 --- a/saphyr/tests/basic.rs +++ b/saphyr/tests/basic.rs @@ -352,11 +352,11 @@ fn test_integer_key() { #[test] fn test_indentation_equality() { let four_spaces = YamlLoader::load_from_str( - r#" + r" hash: with: indentations -"#, +", ) .unwrap() .into_iter() @@ -364,11 +364,11 @@ hash: .unwrap(); let two_spaces = YamlLoader::load_from_str( - r#" + r" hash: with: indentations -"#, +", ) .unwrap() .into_iter() @@ -376,11 +376,11 @@ hash: .unwrap(); let one_space = YamlLoader::load_from_str( - r#" + r" hash: with: indentations -"#, +", ) .unwrap() .into_iter() @@ -388,11 +388,11 @@ hash: .unwrap(); let mixed_spaces = YamlLoader::load_from_str( - r#" + r" hash: with: indentations -"#, +", ) .unwrap() .into_iter() @@ -408,7 +408,7 @@ hash: fn test_two_space_indentations() { // https://github.com/kbknapp/clap-rs/issues/965 - let s = r#" + let s = r" subcommands: - server: about: server related commands @@ -418,7 +418,7 @@ subcommands2: subcommands3: - server: about: server related commands - "#; + "; let out = YamlLoader::load_from_str(s).unwrap(); let doc = &out.into_iter().next().unwrap(); diff --git a/saphyr/tests/emitter.rs b/saphyr/tests/emitter.rs index 59028df..6460468 100644 --- a/saphyr/tests/emitter.rs +++ b/saphyr/tests/emitter.rs @@ -36,7 +36,7 @@ a4: #[test] fn test_emit_complex() { - let s = r#" + let s = r" cataloge: product: &coffee { name: Coffee, price: 2.5 , unit: 1l } product: &cookies { name: Cookies!, price: 3.40 , unit: 400g} @@ -54,7 +54,7 @@ products: bool key {}: empty hash key - "#; + "; let docs = YamlLoader::load_from_str(s).unwrap(); let doc = &docs[0]; let mut writer = String::new(); @@ -66,8 +66,8 @@ products: Ok(y) => y, Err(e) => panic!("{}", e), }; - let doc_new = &docs_new[0]; - assert_eq!(doc, doc_new); + let new_doc = &docs_new[0]; + assert_eq!(doc, new_doc); } #[test] @@ -190,7 +190,7 @@ fn test_empty_and_nested_compact() { fn test_empty_and_nested_flag(compact: bool) { let s = if compact { - r#"--- + r"--- a: b: c: hello @@ -198,9 +198,9 @@ a: e: - f - g - - h: []"# + - h: []" } else { - r#"--- + r"--- a: b: c: hello @@ -209,7 +209,7 @@ e: - f - g - - h: []"# + h: []" }; let docs = YamlLoader::load_from_str(s).unwrap(); @@ -226,13 +226,13 @@ e: #[test] fn test_nested_arrays() { - let s = r#"--- + let s = r"--- a: - b - - c - d - - e - - f"#; + - f"; let docs = YamlLoader::load_from_str(s).unwrap(); let doc = &docs[0]; @@ -249,14 +249,14 @@ a: #[test] fn test_deeply_nested_arrays() { - let s = r#"--- + let s = r"--- a: - b - - c - d - - e - - f - - - e"#; + - - e"; let docs = YamlLoader::load_from_str(s).unwrap(); let doc = &docs[0]; @@ -273,12 +273,12 @@ a: #[test] fn test_nested_hashes() { - let s = r#"--- + let s = r"--- a: b: c: d: - e: f"#; + e: f"; let docs = YamlLoader::load_from_str(s).unwrap(); let doc = &docs[0]; diff --git a/saphyr/tests/yaml-test-suite.rs b/saphyr/tests/yaml-test-suite.rs index 8e67dd4..befebc1 100644 --- a/saphyr/tests/yaml-test-suite.rs +++ b/saphyr/tests/yaml-test-suite.rs @@ -80,7 +80,7 @@ fn load_tests_from_file(entry: &DirEntry) -> Result>> { let test_name = file_name .strip_suffix(".yaml") .ok_or("unexpected filename")?; - let tests = YamlLoader::load_from_str(&fs::read_to_string(&entry.path())?)?; + let tests = YamlLoader::load_from_str(&fs::read_to_string(entry.path())?)?; let tests = tests[0].as_vec().ok_or("no test list found in file")?; let mut result = vec![]; From 765f2bb672656966d1cc79ea98cd72c1f7292264 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Tue, 13 Feb 2024 23:10:32 +0100 Subject: [PATCH 286/380] Replace `VecDeque` with `ArrayDeque`. This removes all allocations in the `Scanner` code. The downside is that the buffer is now stored in the `Scanner` structure, making it 48 bytes larger. This however makes the code much more performant. --- saphyr/Cargo.toml | 1 + saphyr/src/scanner.rs | 42 +++++++++++++++++++++++++++++++++++------- 2 files changed, 36 insertions(+), 7 deletions(-) diff --git a/saphyr/Cargo.toml b/saphyr/Cargo.toml index bf409e1..a24c258 100644 --- a/saphyr/Cargo.toml +++ b/saphyr/Cargo.toml @@ -13,6 +13,7 @@ readme = "README.md" edition = "2018" [dependencies] +arraydeque = "0.5.1" linked-hash-map = "0.5.3" [dev-dependencies] diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index eb16a44..24a33dd 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -3,6 +3,8 @@ use std::{char, collections::VecDeque, error::Error, fmt}; +use arraydeque::ArrayDeque; + use crate::char_traits::{ as_hex, is_alpha, is_anchor_char, is_blank, is_blank_or_breakz, is_break, is_breakz, is_digit, is_flow, is_hex, is_tag_char, is_uri_char, is_z, @@ -287,6 +289,17 @@ struct Indent { needs_block_end: bool, } +/// The size of the [`Scanner`] buffer. +/// +/// The buffer is statically allocated to avoid conditions for reallocations each time we +/// consume/push a character. As of now, almost all lookaheads are 4 characters maximum, except: +/// - Escape sequences parsing: some escape codes are 8 characters +/// - Scanning indent in scalars: this looks ahead `indent + 2` characters +/// This constant must be set to at least 8. When scanning indent in scalars, the lookahead is done +/// in a single call if and only if the indent is `BUFFER_LEN - 2` or less. If the indent is higher +/// than that, the code will fall back to a loop of lookaheads. +const BUFFER_LEN: usize = 16; + /// The YAML scanner. /// /// This corresponds to the low-level interface when reading YAML. The scanner emits token as they @@ -311,7 +324,7 @@ pub struct Scanner { /// [`Self::next`] until we have more context. tokens: VecDeque, /// Buffer for the next characters to consume. - buffer: VecDeque, + buffer: ArrayDeque, /// The last error that happened. error: Option, @@ -387,7 +400,7 @@ impl> Scanner { pub fn new(rdr: T) -> Scanner { Scanner { rdr, - buffer: VecDeque::new(), + buffer: ArrayDeque::new(), mark: Marker::new(0, 1, 0), tokens: VecDeque::new(), error: None, @@ -426,7 +439,9 @@ impl> Scanner { return; } for _ in 0..(count - self.buffer.len()) { - self.buffer.push_back(self.rdr.next().unwrap_or('\0')); + self.buffer + .push_back(self.rdr.next().unwrap_or('\0')) + .unwrap(); } } @@ -1722,7 +1737,7 @@ impl> Scanner { // Our last character read is stored in `c`. It is either an EOF or a break. In any // case, we need to push it back into `self.buffer` so it may be properly read // after. We must not insert it in `string`. - self.buffer.push_back(c); + self.buffer.push_back(c).unwrap(); // We need to manually update our position; we haven't called a `skip` function. self.mark.col += line_buffer.len(); @@ -1739,10 +1754,23 @@ impl> Scanner { /// Skip the block scalar indentation and empty lines. fn skip_block_scalar_indent(&mut self, indent: usize, breaks: &mut String) { loop { - self.lookahead(indent + 2); // Consume all spaces. Tabs cannot be used as indentation. - while self.mark.col < indent && self.ch() == ' ' { - self.skip_blank(); + if indent < BUFFER_LEN - 2 { + self.lookahead(BUFFER_LEN); + while self.mark.col < indent && self.ch() == ' ' { + self.skip_blank(); + } + } else { + loop { + self.lookahead(BUFFER_LEN); + while !self.buffer.is_empty() && self.mark.col < indent && self.ch() == ' ' { + self.skip_blank(); + } + if !(!self.buffer.is_empty() && self.mark.col < indent && self.ch() == ' ') { + break; + } + } + self.lookahead(2); } // If our current line is empty, skip over the break and continue looping. From 3d8a54d3841a69f16dec9e061c3a8b1b19a9e0c1 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Wed, 14 Feb 2024 00:35:41 +0100 Subject: [PATCH 287/380] Add a generator for nested objects. --- saphyr/examples/gen_large_yaml/main.rs | 6 +- saphyr/examples/gen_large_yaml/nested.rs | 108 +++++++++++++++++++++++ 2 files changed, 112 insertions(+), 2 deletions(-) create mode 100644 saphyr/examples/gen_large_yaml/nested.rs diff --git a/saphyr/examples/gen_large_yaml/main.rs b/saphyr/examples/gen_large_yaml/main.rs index 4ba7ad9..cfc8d70 100644 --- a/saphyr/examples/gen_large_yaml/main.rs +++ b/saphyr/examples/gen_large_yaml/main.rs @@ -1,6 +1,7 @@ #![allow(dead_code)] mod gen; +mod nested; use std::collections::HashMap; @@ -8,8 +9,9 @@ use rand::{rngs::ThreadRng, Rng}; fn main() -> std::fmt::Result { let mut s = String::new(); - let mut g = Generator::new(); - g.gen_strings_array(&mut s, 1_300_000, 1_300_001, 10, 40)?; + // let mut g = Generator::new(); + // g.gen_strings_array(&mut s, 1_300_000, 1_300_001, 10, 40)?; + nested::create_deep_object(&mut s, 5_000_000)?; println!("{s}"); Ok(()) } diff --git a/saphyr/examples/gen_large_yaml/nested.rs b/saphyr/examples/gen_large_yaml/nested.rs new file mode 100644 index 0000000..3977901 --- /dev/null +++ b/saphyr/examples/gen_large_yaml/nested.rs @@ -0,0 +1,108 @@ +use std::{cell::RefCell, rc::Rc}; + +use rand::{rngs::ThreadRng, Rng}; + +/// Create a deep object with the given amount of nodes. +pub fn create_deep_object(writer: &mut W, n_nodes: usize) -> std::fmt::Result { + let mut tree = Tree::new(); + for _ in 0..n_nodes { + tree.push_node(); + } + tree.write_to(writer) +} + +/// An n-tree. +/// +/// The algorithm used to generate a potentially deep object is to create a tree, one node at a +/// time, where each node is put as a child of a random existing node in the tree. +struct Tree { + /// The tree-view of the tree. + root: Rc>, + /// Array of all the nodes in the tree, including the root node. + nodes: Vec>>, + /// The RNG state. + rng: ThreadRng, +} + +/// A node in a tree. +struct Node { + /// All the children of the node. + children: Vec>>, +} + +impl Tree { + /// Create a new tree. + fn new() -> Self { + let root = Node::new_rc_refcell(); + Tree { + root: root.clone(), + nodes: vec![root], + rng: rand::thread_rng(), + } + } + + /// Add a new node as a child of a random node in the tree. + fn push_node(&mut self) { + let new_node = Node::new_rc_refcell(); + let n_nodes = self.nodes.len(); + let parent = &mut self.nodes[self.rng.gen_range(0..n_nodes)]; + (**parent).borrow_mut().push_child(new_node.clone()); + self.nodes.push(new_node); + } + + /// Write the YAML representation of the tree to `writer`. + fn write_to(&self, writer: &mut W) -> std::fmt::Result { + (*self.root).borrow().write_to(writer, 0) + } +} + +impl Node { + /// Create a new node. + fn new() -> Self { + Node { children: vec![] } + } + + fn new_rc_refcell() -> Rc> { + Rc::new(RefCell::new(Self::new())) + } + + /// Append a child to the node. + fn push_child(&mut self, child: Rc>) { + self.children.push(child); + } + + /// Write the YAML representation of the node to `writer`. + fn write_to(&self, writer: &mut W, indent: usize) -> std::fmt::Result { + if self.children.is_empty() { + write_n(writer, ' ', indent)?; + writer.write_str("a: 1\n")?; + } else { + for (n, child) in self.children.iter().enumerate() { + write_n(writer, ' ', indent)?; + write_id_for_number(writer, n)?; + writer.write_str(":\n")?; + (**child).borrow().write_to(writer, indent + 2)?; + } + } + Ok(()) + } +} + +/// Write `n` times `c` to `out`. +fn write_n(out: &mut W, c: char, n: usize) -> std::fmt::Result { + for _ in 0..n { + out.write_char(c)?; + } + Ok(()) +} + +/// Create a valid identifier for the given number. +fn write_id_for_number(out: &mut W, mut n: usize) -> std::fmt::Result { + const DIGITS: &[u8] = b"_abcdefghijklmnopqrstuvwxyz"; + n += 1; + while n > 0 { + out.write_char(DIGITS[n % DIGITS.len()] as char)?; + n /= DIGITS.len(); + } + Ok(()) +} From 861dfb64979e86b1c4135307ffacf43d710ddd64 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Thu, 14 Mar 2024 19:20:56 +0100 Subject: [PATCH 288/380] Remove tools from examples. Add documentation for those tools, and make it so that `gen_large_yaml` generates a predetermined set of files instead of outputting to its standard output. --- bench/tools/README.md | 188 +++++++++++++++++ bench/tools/dump_events.rs | 38 ++++ bench/tools/gen_large_yaml/Cargo.toml | 20 ++ bench/tools/gen_large_yaml/src/gen.rs | 156 ++++++++++++++ bench/tools/gen_large_yaml/src/main.rs | 255 +++++++++++++++++++++++ bench/tools/gen_large_yaml/src/nested.rs | 111 ++++++++++ bench/tools/time_parse.rs | 31 +++ 7 files changed, 799 insertions(+) create mode 100644 bench/tools/README.md create mode 100644 bench/tools/dump_events.rs create mode 100644 bench/tools/gen_large_yaml/Cargo.toml create mode 100644 bench/tools/gen_large_yaml/src/gen.rs create mode 100644 bench/tools/gen_large_yaml/src/main.rs create mode 100644 bench/tools/gen_large_yaml/src/nested.rs create mode 100644 bench/tools/time_parse.rs diff --git a/bench/tools/README.md b/bench/tools/README.md new file mode 100644 index 0000000..de71873 --- /dev/null +++ b/bench/tools/README.md @@ -0,0 +1,188 @@ +# `yaml-rust2` tools +This directory contains tools that are used to develop the crate. +Due to dependency management, only some of them are available as binaries from the `yaml-rust2` crate. + +| Tool | Invocation | +|------|------------| +| `dump_events` | `cargo run --bin dump_events -- [...]` | +| `gen_large_yaml` | `cargo gen_large_yaml` | +| `time_parse` | `cargo run --bin time_parse -- [...]` | + +## `dump_events` +This is a debugging helper for the parser. It outputs events emitted by the parser for a given file. This can be paired with the `YAMLRUST2_DEBUG` environment variable to have an in-depth overview of which steps the scanner and the parser are taking. + +### Example +Consider the following `input.yaml` YAML file: +```yaml +- foo: bar +- baz: + c: [3, 4, 5] +``` + +Running `cargo run --bin dump_events -- input.yaml` outputs: +``` + ↳ StreamStart + ↳ DocumentStart + ↳ SequenceStart(0, None) + ↳ MappingStart(0, None) + ↳ Scalar("foo", Plain, 0, None) + ↳ Scalar("bar", Plain, 0, None) + ↳ MappingEnd + ↳ MappingStart(0, None) + ↳ Scalar("baz", Plain, 0, None) + ↳ Scalar("~", Plain, 0, None) + ↳ Scalar("c", Plain, 0, None) + ↳ SequenceStart(0, None) + ↳ Scalar("3", Plain, 0, None) + ↳ Scalar("4", Plain, 0, None) + ↳ Scalar("5", Plain, 0, None) + ↳ SequenceEnd + ↳ MappingEnd + ↳ SequenceEnd + ↳ DocumentEnd + ↳ StreamEnd +``` + +Running `YAMLRUST2_DEBUG=1 cargo run --bin dump_events -- input.yaml` outputs much more details: +
+ Full output + +``` +Parser state: StreamStart + ↳ StreamStart(Utf8) Marker { index: 0, line: 1, col: 0 } + ↳ StreamStart + +Parser state: ImplicitDocumentStart + → fetch_next_token after whitespace Marker { index: 0, line: 1, col: 0 } '-' + ↳ BlockSequenceStart Marker { index: 0, line: 1, col: 0 } + ↳ DocumentStart + +Parser state: BlockNode + ↳ SequenceStart(0, None) + +Parser state: BlockSequenceFirstEntry + ↳ BlockEntry Marker { index: 2, line: 1, col: 2 } + → fetch_next_token after whitespace Marker { index: 2, line: 1, col: 2 } 'f' + → fetch_next_token after whitespace Marker { index: 5, line: 1, col: 5 } ':' + ↳ BlockMappingStart Marker { index: 5, line: 1, col: 5 } + ↳ MappingStart(0, None) + +Parser state: BlockMappingFirstKey + ↳ Key Marker { index: 2, line: 1, col: 2 } + ↳ Scalar(Plain, "foo") Marker { index: 2, line: 1, col: 2 } + ↳ Scalar("foo", Plain, 0, None) + +Parser state: BlockMappingValue + ↳ Value Marker { index: 5, line: 1, col: 5 } + → fetch_next_token after whitespace Marker { index: 7, line: 1, col: 7 } 'b' + ↳ Scalar(Plain, "bar") Marker { index: 7, line: 1, col: 7 } + ↳ Scalar("bar", Plain, 0, None) + +Parser state: BlockMappingKey + → fetch_next_token after whitespace Marker { index: 11, line: 2, col: 0 } '-' + ↳ BlockEnd Marker { index: 11, line: 2, col: 0 } + ↳ MappingEnd + +Parser state: BlockSequenceEntry + ↳ BlockEntry Marker { index: 13, line: 2, col: 2 } + → fetch_next_token after whitespace Marker { index: 13, line: 2, col: 2 } 'b' + → fetch_next_token after whitespace Marker { index: 16, line: 2, col: 5 } ':' + ↳ BlockMappingStart Marker { index: 16, line: 2, col: 5 } + ↳ MappingStart(0, None) + +Parser state: BlockMappingFirstKey + ↳ Key Marker { index: 13, line: 2, col: 2 } + ↳ Scalar(Plain, "baz") Marker { index: 13, line: 2, col: 2 } + ↳ Scalar("baz", Plain, 0, None) + +Parser state: BlockMappingValue + ↳ Value Marker { index: 16, line: 2, col: 5 } + → fetch_next_token after whitespace Marker { index: 20, line: 3, col: 2 } 'c' + → fetch_next_token after whitespace Marker { index: 21, line: 3, col: 3 } ':' + ↳ Key Marker { index: 20, line: 3, col: 2 } + ↳ Scalar("~", Plain, 0, None) + +Parser state: BlockMappingKey + ↳ Scalar(Plain, "c") Marker { index: 20, line: 3, col: 2 } + ↳ Scalar("c", Plain, 0, None) + +Parser state: BlockMappingValue + ↳ Value Marker { index: 21, line: 3, col: 3 } + → fetch_next_token after whitespace Marker { index: 23, line: 3, col: 5 } '[' + ↳ FlowSequenceStart Marker { index: 23, line: 3, col: 5 } + ↳ SequenceStart(0, None) + +Parser state: FlowSequenceFirstEntry + → fetch_next_token after whitespace Marker { index: 24, line: 3, col: 6 } '3' + → fetch_next_token after whitespace Marker { index: 25, line: 3, col: 7 } ',' + ↳ Scalar(Plain, "3") Marker { index: 24, line: 3, col: 6 } + ↳ Scalar("3", Plain, 0, None) + +Parser state: FlowSequenceEntry + ↳ FlowEntry Marker { index: 25, line: 3, col: 7 } + → fetch_next_token after whitespace Marker { index: 27, line: 3, col: 9 } '4' + → fetch_next_token after whitespace Marker { index: 28, line: 3, col: 10 } ',' + ↳ Scalar(Plain, "4") Marker { index: 27, line: 3, col: 9 } + ↳ Scalar("4", Plain, 0, None) + +Parser state: FlowSequenceEntry + ↳ FlowEntry Marker { index: 28, line: 3, col: 10 } + → fetch_next_token after whitespace Marker { index: 30, line: 3, col: 12 } '5' + → fetch_next_token after whitespace Marker { index: 31, line: 3, col: 13 } ']' + ↳ Scalar(Plain, "5") Marker { index: 30, line: 3, col: 12 } + ↳ Scalar("5", Plain, 0, None) + +Parser state: FlowSequenceEntry + ↳ FlowSequenceEnd Marker { index: 31, line: 3, col: 13 } + ↳ SequenceEnd + +Parser state: BlockMappingKey + → fetch_next_token after whitespace Marker { index: 33, line: 4, col: 0 } '\0' + ↳ BlockEnd Marker { index: 33, line: 4, col: 0 } + ↳ MappingEnd + +Parser state: BlockSequenceEntry + ↳ BlockEnd Marker { index: 33, line: 4, col: 0 } + ↳ SequenceEnd + +Parser state: DocumentEnd + ↳ StreamEnd Marker { index: 33, line: 4, col: 0 } + ↳ DocumentEnd + +Parser state: DocumentStart + ↳ StreamEnd +``` + +
+ +While this cannot be shown in Markdown, the output is colored so that it is a bit easier to read. + +## `gen_large_yaml` +It is hard to find large (100+MiB) real-world YAML files that could be used to benchmark a parser. This utility generates multiple large files that are meant to stress the parser with different layouts of YAML files. The resulting files do not look like anything that would be encountered in production, but can serve as a base to test several features of a YAML parser. + +The generated files are the following: + + - `big.yaml`: A large array of records with few fields. One of the fields is a description, a large text block scalar spanning multiple lines. Most of the scanning happens in block scalars. + - `nested.yaml`: Very short key-value pairs that nest deeply. + - `small_objects.yaml`: A large array of 2 key-value mappings. + - `strings_array.yaml`: A large array of lipsum one-liners (~150-175 characters in length). + +All generated files are meant to be between 200 and 250 MiB in size. + +This tool depends on external dependencies that are not part of `yaml-rust2`'s dependencies or `dev-dependencies` and as such can't be called through `cargo run` directly. A dedicated `cargo gen_large_yaml` alias can be used to generate the benchmark files. + +## `time_parse` +This is a benchmarking helper that times how long it takes for the parser to emit all events. It calls the parser on the given input file, receives parsing events and then immediately discards them. It is advised to run this tool with `--release`. + +### Examples +Loading a small file could output the following: +```sh +$> cargo run --release --bin time_parse -- input.yaml +Loaded 0MiB in 14.189µs +``` + +While loading a larger file could output the following: +```sh +$> cargo run --release --bin time_parse -- bench_yaml/big.yaml +Loaded 220MiB in 1.612677853s +``` diff --git a/bench/tools/dump_events.rs b/bench/tools/dump_events.rs new file mode 100644 index 0000000..8bf9e01 --- /dev/null +++ b/bench/tools/dump_events.rs @@ -0,0 +1,38 @@ +use std::env; +use std::fs::File; +use std::io::prelude::*; +use yaml_rust2::{ + parser::{MarkedEventReceiver, Parser}, + scanner::Marker, + Event, +}; + +#[derive(Debug)] +struct EventSink { + events: Vec<(Event, Marker)>, +} + +impl MarkedEventReceiver for EventSink { + fn on_event(&mut self, ev: Event, mark: Marker) { + eprintln!(" \x1B[;34m\u{21B3} {:?}\x1B[;m", &ev); + self.events.push((ev, mark)); + } +} + +fn str_to_events(yaml: &str) -> Vec<(Event, Marker)> { + let mut sink = EventSink { events: Vec::new() }; + let mut parser = Parser::new(yaml.chars()); + // Load events using our sink as the receiver. + parser.load(&mut sink, true).unwrap(); + sink.events +} + +fn main() { + let args: Vec<_> = env::args().collect(); + let mut f = File::open(&args[1]).unwrap(); + let mut s = String::new(); + f.read_to_string(&mut s).unwrap(); + + // dbg!(str_to_events(&s)); + str_to_events(&s); +} diff --git a/bench/tools/gen_large_yaml/Cargo.toml b/bench/tools/gen_large_yaml/Cargo.toml new file mode 100644 index 0000000..54b6b3c --- /dev/null +++ b/bench/tools/gen_large_yaml/Cargo.toml @@ -0,0 +1,20 @@ +[package] +name = "gen_large_yaml" +version = "0.5.0" +authors = [ + "Ethiraric " +] +license = "MIT OR Apache-2.0" +description = "A helper to generate large YAML files" +repository = "https://github.com/Ethiraric/yaml-rust2" +readme = "README.md" +edition = "2018" + +[dependencies] +yaml-rust2 = { version = "0.5.0", path = "../../" } +rand = "0.8.5" +lipsum = "0.9.0" + +[profile.release-lto] +inherits = "release" +lto = true diff --git a/bench/tools/gen_large_yaml/src/gen.rs b/bench/tools/gen_large_yaml/src/gen.rs new file mode 100644 index 0000000..2a7dffe --- /dev/null +++ b/bench/tools/gen_large_yaml/src/gen.rs @@ -0,0 +1,156 @@ +#![allow(clippy::too_many_arguments)] + +use rand::{distributions::Alphanumeric, rngs::ThreadRng, Rng}; + +/// Generate a string with hexadecimal digits of the specified length. +pub fn hex_string(rng: &mut ThreadRng, len: usize) -> String { + const DIGITS: &[u8] = b"0123456789abcdef"; + string_from_set(rng, len, len + 1, DIGITS) +} + +/// Generate an e-mail address. +pub fn email(rng: &mut ThreadRng, len_lo: usize, len_hi: usize) -> String { + const CHARSET: &[u8] = b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-_.0123456789"; + format!( + "{}@example.com", + string_from_set(rng, len_lo, len_hi, CHARSET) + ) +} + +/// Generate a random URL. +pub fn url( + rng: &mut ThreadRng, + scheme: &str, + n_paths_lo: usize, + n_paths_hi: usize, + path_len_lo: usize, + path_len_hi: usize, + extension: Option<&str>, +) -> String { + let mut string = format!("{scheme}://example.com"); + for _ in 0..rng.gen_range(n_paths_lo..n_paths_hi) { + string.push('/'); + string.push_str(&alnum_string(rng, path_len_lo, path_len_hi)); + } + if let Some(extension) = extension { + string.push('.'); + string.push_str(extension); + } + string +} + +/// Generate a random integer. +pub fn integer(rng: &mut ThreadRng, lo: i64, hi: i64) -> i64 { + rng.gen_range(lo..hi) +} + +/// Generate an alphanumeric string with a length between `lo_len` and `hi_len`. +pub fn alnum_string(rng: &mut ThreadRng, lo_len: usize, hi_len: usize) -> String { + let len = rng.gen_range(lo_len..hi_len); + rng.sample_iter(&Alphanumeric) + .take(len) + .map(char::from) + .collect() +} + +/// Generate a string with hexadecimal digits of the specified length. +pub fn string_from_set(rng: &mut ThreadRng, len_lo: usize, len_hi: usize, set: &[u8]) -> String { + (0..rng.gen_range(len_lo..len_hi)) + .map(|_| set[rng.gen_range(0..set.len())] as char) + .collect() +} + +/// Generate a lipsum paragraph. +pub fn paragraph( + rng: &mut ThreadRng, + lines_lo: usize, + lines_hi: usize, + wps_lo: usize, + wps_hi: usize, + line_maxcol: usize, +) -> Vec { + let mut ret = Vec::new(); + let nlines = rng.gen_range(lines_lo..lines_hi); + + while ret.len() < nlines { + let words_in_sentence = rng.gen_range(wps_lo..wps_hi); + let mut sentence = lipsum::lipsum_words_with_rng(rng.clone(), words_in_sentence); + + if let Some(last_line) = ret.pop() { + sentence = format!("{last_line} {sentence}"); + } + + while sentence.len() > line_maxcol { + let last_space_idx = line_maxcol + - sentence[0..line_maxcol] + .chars() + .rev() + .position(char::is_whitespace) + .unwrap(); + ret.push(sentence[0..last_space_idx].to_string()); + sentence = sentence[last_space_idx + 1..].to_string(); + } + if !sentence.is_empty() { + ret.push(sentence); + } + } + + ret +} + +/// Generate a full name. +pub fn full_name(rng: &mut ThreadRng, len_lo: usize, len_hi: usize) -> String { + format!( + "{} {}", + name(rng, len_lo, len_hi), + name(rng, len_lo, len_hi) + ) +} + +/// Generate a name. +pub fn name(rng: &mut ThreadRng, len_lo: usize, len_hi: usize) -> String { + const UPPER: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + const LOWER: &[u8] = b"abcdefghijklmnopqrstuvwxyz"; + + let len = rng.gen_range(len_lo..len_hi); + let mut ret = String::new(); + ret.push(UPPER[rng.gen_range(0..UPPER.len())] as char); + ret.push_str(string_from_set(rng, len, len + 1, LOWER).as_str()); + + ret +} + +/// Generate a set of words. +pub fn words(rng: &mut ThreadRng, words_lo: usize, words_hi: usize) -> String { + let nwords = rng.gen_range(words_lo..words_hi); + lipsum::lipsum_words_with_rng(rng.clone(), nwords).replace(|c| "-\'\",*:".contains(c), "") +} + +/// Generate a lipsum text. +/// +/// Texts are composed of some paragraphs and empty lines between them. +pub fn text( + rng: &mut ThreadRng, + paragraphs_lo: usize, + paragraphs_hi: usize, + lines_lo: usize, + lines_hi: usize, + wps_lo: usize, + wps_hi: usize, + line_maxcol: usize, +) -> Vec { + let mut ret = Vec::new(); + let mut first = true; + + for _ in 0..rng.gen_range(paragraphs_lo..paragraphs_hi) { + if first { + first = false; + } else { + ret.push(String::new()); + } + + ret.extend(paragraph(rng, lines_lo, lines_hi, wps_lo, wps_hi, line_maxcol).into_iter()); + } + + ret +} diff --git a/bench/tools/gen_large_yaml/src/main.rs b/bench/tools/gen_large_yaml/src/main.rs new file mode 100644 index 0000000..d478e8b --- /dev/null +++ b/bench/tools/gen_large_yaml/src/main.rs @@ -0,0 +1,255 @@ +#![allow(dead_code)] + +mod gen; +mod nested; + +use std::collections::HashMap; +use std::fs::File; +use std::io::BufWriter; +use std::path::Path; + +use rand::{rngs::ThreadRng, Rng}; + +/// The path into which the generated YAML files will be written. +const OUTPUT_DIR: &str = "bench_yaml"; + +fn main() -> std::io::Result<()> { + let mut generator = Generator::new(); + let output_path = Path::new(OUTPUT_DIR); + if !output_path.is_dir() { + std::fs::create_dir(output_path).unwrap(); + } + + println!("Generating big.yaml"); + let mut out = BufWriter::new(File::create(output_path.join("big.yaml")).unwrap()); + generator.gen_record_array(&mut out, 100_000, 100_001)?; + + println!("Generating nested.yaml"); + let mut out = BufWriter::new(File::create(output_path.join("nested.yaml")).unwrap()); + nested::create_deep_object(&mut out, 5_000_000)?; + + println!("Generating small_objects.yaml"); + let mut out = BufWriter::new(File::create(output_path.join("small_objects.yaml")).unwrap()); + generator.gen_authors_array(&mut out, 4_000_000, 4_000_001)?; + + println!("Generating strings_array.yaml"); + let mut out = BufWriter::new(File::create(output_path.join("strings_array.yaml")).unwrap()); + generator.gen_strings_array(&mut out, 1_300_000, 1_300_001, 10, 40)?; + Ok(()) +} + +/// YAML Generator. +struct Generator { + /// The RNG state. + rng: ThreadRng, + /// The stack of indentations. + indents: Vec, +} + +type GenFn = dyn FnOnce(&mut Generator, &mut W) -> std::io::Result<()>; + +impl Generator { + /// Create a new generator. + fn new() -> Self { + Generator { + rng: rand::thread_rng(), + indents: vec![0], + } + } + + /// Generate an array of records as per [`Self::gen_record_object`]. + fn gen_record_array( + &mut self, + writer: &mut W, + items_lo: usize, + items_hi: usize, + ) -> std::io::Result<()> { + self.gen_array(writer, items_lo, items_hi, Generator::gen_record_object) + } + + /// Generate an array of lipsum one-liners. + fn gen_strings_array( + &mut self, + writer: &mut W, + items_lo: usize, + items_hi: usize, + words_lo: usize, + words_hi: usize, + ) -> std::io::Result<()> { + self.gen_array(writer, items_lo, items_hi, |gen, writer| { + write!(writer, "{}", gen::words(&mut gen.rng, words_lo, words_hi)) + }) + } + + /// Generate a YAML object/mapping containing a record. + /// + /// Fields are description, hash, version, home, repository and pdf. + /// The `description` field is a long string and puts a lot of weight in plain scalar / block + /// scalar parsing. + fn gen_record_object(&mut self, writer: &mut W) -> std::io::Result<()> { + let mut fields = HashMap::>>::new(); + fields.insert( + "description".to_string(), + Box::new(|gen, w| { + write!(w, "|")?; + gen.push_indent(2); + gen.nl(w)?; + let indent = gen.indent(); + let text = gen::text(&mut gen.rng, 1, 9, 3, 8, 10, 20, 80 - indent); + gen.write_lines(w, &text)?; + gen.pop_indent(); + Ok(()) + }), + ); + + fields.insert( + "authors".to_string(), + Box::new(|gen, w| { + gen.push_indent(2); + gen.nl(w)?; + gen.gen_authors_array(w, 1, 10)?; + gen.pop_indent(); + Ok(()) + }), + ); + + fields.insert( + "hash".to_string(), + Box::new(|gen, w| write!(w, "{}", gen::hex_string(&mut gen.rng, 64))), + ); + fields.insert( + "version".to_string(), + Box::new(|gen, w| write!(w, "{}", gen::integer(&mut gen.rng, 1, 9))), + ); + fields.insert( + "home".to_string(), + Box::new(|gen, w| write!(w, "{}", gen::url(&mut gen.rng, "https", 0, 1, 0, 0, None))), + ); + fields.insert( + "repository".to_string(), + Box::new(|gen, w| write!(w, "{}", gen::url(&mut gen.rng, "git", 1, 4, 10, 20, None))), + ); + fields.insert( + "pdf".to_string(), + Box::new(|gen, w| { + write!( + w, + "{}", + gen::url(&mut gen.rng, "https", 1, 4, 10, 30, Some("pdf")) + ) + }), + ); + self.gen_object(writer, fields) + } + + /// Generate an array of authors as per [`Self::gen_author_object`]. + fn gen_authors_array( + &mut self, + writer: &mut W, + items_lo: usize, + items_hi: usize, + ) -> std::io::Result<()> { + self.gen_array(writer, items_lo, items_hi, Generator::gen_author_object) + } + + /// Generate a small object with 2 string fields. + fn gen_author_object(&mut self, writer: &mut W) -> std::io::Result<()> { + let mut fields = HashMap::>>::new(); + fields.insert( + "name".to_string(), + Box::new(|gen, w| write!(w, "{}", gen::full_name(&mut gen.rng, 10, 15))), + ); + fields.insert( + "email".to_string(), + Box::new(|gen, w| write!(w, "{}", gen::email(&mut gen.rng, 1, 9))), + ); + self.gen_object(writer, fields) + } + + /// Generate a YAML array/sequence containing nodes generated by the given function. + fn gen_array std::io::Result<()>>( + &mut self, + writer: &mut W, + len_lo: usize, + len_hi: usize, + mut obj_creator: F, + ) -> std::io::Result<()> { + let mut first = true; + for _ in 0..self.rng.gen_range(len_lo..len_hi) { + if first { + first = false; + } else { + self.nl(writer)?; + } + write!(writer, "- ")?; + self.push_indent(2); + (obj_creator)(self, writer)?; + self.pop_indent(); + } + Ok(()) + } + + /// Create a Yaml object with some fields in it. + fn gen_object( + &mut self, + writer: &mut W, + fields: HashMap>>, + ) -> std::io::Result<()> { + let mut first = true; + for (key, f) in fields { + if first { + first = false; + } else { + self.nl(writer)?; + } + write!(writer, "{key}: ")?; + f(self, writer)?; + } + Ok(()) + } + + /// Write the given lines at the right indentation. + fn write_lines( + &mut self, + writer: &mut W, + lines: &[String], + ) -> std::io::Result<()> { + let mut first = true; + + for line in lines { + if first { + first = false; + } else { + self.nl(writer)?; + } + write!(writer, "{line}")?; + } + + Ok(()) + } + + /// Write a new line to the writer and indent. + fn nl(&mut self, writer: &mut W) -> std::io::Result<()> { + writeln!(writer)?; + for _ in 0..self.indent() { + write!(writer, " ")?; + } + Ok(()) + } + + /// Return the given indent. + fn indent(&self) -> usize { + *self.indents.last().unwrap() + } + + /// Push a new indent with the given relative offset. + fn push_indent(&mut self, offset: usize) { + self.indents.push(self.indent() + offset); + } + + /// Pops the last indent. + fn pop_indent(&mut self) { + self.indents.pop(); + assert!(!self.indents.is_empty()); + } +} diff --git a/bench/tools/gen_large_yaml/src/nested.rs b/bench/tools/gen_large_yaml/src/nested.rs new file mode 100644 index 0000000..92dc21a --- /dev/null +++ b/bench/tools/gen_large_yaml/src/nested.rs @@ -0,0 +1,111 @@ +use std::{cell::RefCell, rc::Rc}; + +use rand::{rngs::ThreadRng, Rng}; + +/// Create a deep object with the given amount of nodes. +pub fn create_deep_object( + writer: &mut W, + n_nodes: usize, +) -> std::io::Result<()> { + let mut tree = Tree::new(); + for _ in 0..n_nodes { + tree.push_node(); + } + tree.write_to(writer) +} + +/// An n-tree. +/// +/// The algorithm used to generate a potentially deep object is to create a tree, one node at a +/// time, where each node is put as a child of a random existing node in the tree. +struct Tree { + /// The tree-view of the tree. + root: Rc>, + /// Array of all the nodes in the tree, including the root node. + nodes: Vec>>, + /// The RNG state. + rng: ThreadRng, +} + +/// A node in a tree. +struct Node { + /// All the children of the node. + children: Vec>>, +} + +impl Tree { + /// Create a new tree. + fn new() -> Self { + let root = Node::new_rc_refcell(); + Tree { + root: root.clone(), + nodes: vec![root], + rng: rand::thread_rng(), + } + } + + /// Add a new node as a child of a random node in the tree. + fn push_node(&mut self) { + let new_node = Node::new_rc_refcell(); + let n_nodes = self.nodes.len(); + let parent = &mut self.nodes[self.rng.gen_range(0..n_nodes)]; + (**parent).borrow_mut().push_child(new_node.clone()); + self.nodes.push(new_node); + } + + /// Write the YAML representation of the tree to `writer`. + fn write_to(&self, writer: &mut W) -> std::io::Result<()> { + (*self.root).borrow().write_to(writer, 0) + } +} + +impl Node { + /// Create a new node. + fn new() -> Self { + Node { children: vec![] } + } + + fn new_rc_refcell() -> Rc> { + Rc::new(RefCell::new(Self::new())) + } + + /// Append a child to the node. + fn push_child(&mut self, child: Rc>) { + self.children.push(child); + } + + /// Write the YAML representation of the node to `writer`. + fn write_to(&self, writer: &mut W, indent: usize) -> std::io::Result<()> { + if self.children.is_empty() { + write_n(writer, ' ', indent)?; + writer.write_all(b"a: 1\n")?; + } else { + for (n, child) in self.children.iter().enumerate() { + write_n(writer, ' ', indent)?; + write_id_for_number(writer, n)?; + writer.write_all(b":\n")?; + (**child).borrow().write_to(writer, indent + 2)?; + } + } + Ok(()) + } +} + +/// Write `n` times `c` to `out`. +fn write_n(out: &mut W, c: char, n: usize) -> std::io::Result<()> { + for _ in 0..n { + write!(out, "{c}")?; + } + Ok(()) +} + +/// Create a valid identifier for the given number. +fn write_id_for_number(out: &mut W, mut n: usize) -> std::io::Result<()> { + const DIGITS: &[u8] = b"_abcdefghijklmnopqrstuvwxyz"; + n += 1; + while n > 0 { + write!(out, "{}", DIGITS[n % DIGITS.len()] as char)?; + n /= DIGITS.len(); + } + Ok(()) +} diff --git a/bench/tools/time_parse.rs b/bench/tools/time_parse.rs new file mode 100644 index 0000000..9b551e1 --- /dev/null +++ b/bench/tools/time_parse.rs @@ -0,0 +1,31 @@ +use std::env; +use std::fs::File; +use std::io::prelude::*; +use yaml_rust2::{ + parser::{MarkedEventReceiver, Parser}, + scanner::Marker, + Event, +}; + +/// A sink which discards any event sent. +struct NullSink {} + +impl MarkedEventReceiver for NullSink { + fn on_event(&mut self, _: Event, _: Marker) {} +} + +fn main() { + let args: Vec<_> = env::args().collect(); + let mut f = File::open(&args[1]).unwrap(); + let mut s = String::new(); + f.read_to_string(&mut s).unwrap(); + + let mut sink = NullSink {}; + let mut parser = Parser::new(s.chars()); + + // Load events using our sink as the receiver. + let begin = std::time::Instant::now(); + parser.load(&mut sink, true).unwrap(); + let end = std::time::Instant::now(); + println!("Loaded {}MiB in {:?}", s.len() / 1024 / 1024, end - begin); +} From b7755e119c37b77b04147ad623c9cbc7265b9167 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Thu, 14 Mar 2024 19:20:56 +0100 Subject: [PATCH 289/380] Remove tools from examples. Add documentation for those tools, and make it so that `gen_large_yaml` generates a predetermined set of files instead of outputting to its standard output. --- saphyr/.cargo/config.toml | 2 + saphyr/Cargo.toml | 10 +- saphyr/tools/README.md | 188 ++++++++++++++++++ saphyr/{examples => tools}/dump_events.rs | 0 saphyr/tools/gen_large_yaml/Cargo.toml | 20 ++ .../gen_large_yaml/src}/gen.rs | 0 .../gen_large_yaml/src}/main.rs | 66 ++++-- .../gen_large_yaml/src}/nested.rs | 21 +- saphyr/{examples => tools}/time_parse.rs | 0 9 files changed, 274 insertions(+), 33 deletions(-) create mode 100644 saphyr/.cargo/config.toml create mode 100644 saphyr/tools/README.md rename saphyr/{examples => tools}/dump_events.rs (100%) create mode 100644 saphyr/tools/gen_large_yaml/Cargo.toml rename saphyr/{examples/gen_large_yaml => tools/gen_large_yaml/src}/gen.rs (100%) rename saphyr/{examples/gen_large_yaml => tools/gen_large_yaml/src}/main.rs (74%) rename saphyr/{examples/gen_large_yaml => tools/gen_large_yaml/src}/nested.rs (79%) rename saphyr/{examples => tools}/time_parse.rs (100%) diff --git a/saphyr/.cargo/config.toml b/saphyr/.cargo/config.toml new file mode 100644 index 0000000..7bc65f1 --- /dev/null +++ b/saphyr/.cargo/config.toml @@ -0,0 +1,2 @@ +[alias] +gen_large_yaml = "run --profile=release-lto --package gen_large_yaml --bin gen_large_yaml --manifest-path tools/gen_large_yaml/Cargo.toml --" diff --git a/saphyr/Cargo.toml b/saphyr/Cargo.toml index a24c258..15425df 100644 --- a/saphyr/Cargo.toml +++ b/saphyr/Cargo.toml @@ -19,8 +19,6 @@ linked-hash-map = "0.5.3" [dev-dependencies] libtest-mimic = "0.3.0" quickcheck = "0.9" -rand = "0.8.5" -lipsum = "0.9.0" [profile.release-lto] inherits = "release" @@ -29,3 +27,11 @@ lto = true [[test]] name = "yaml-test-suite" harness = false + +[[bin]] +name = "dump_events" +path = "tools/dump_events.rs" + +[[bin]] +name = "time_parse" +path = "tools/time_parse.rs" diff --git a/saphyr/tools/README.md b/saphyr/tools/README.md new file mode 100644 index 0000000..de71873 --- /dev/null +++ b/saphyr/tools/README.md @@ -0,0 +1,188 @@ +# `yaml-rust2` tools +This directory contains tools that are used to develop the crate. +Due to dependency management, only some of them are available as binaries from the `yaml-rust2` crate. + +| Tool | Invocation | +|------|------------| +| `dump_events` | `cargo run --bin dump_events -- [...]` | +| `gen_large_yaml` | `cargo gen_large_yaml` | +| `time_parse` | `cargo run --bin time_parse -- [...]` | + +## `dump_events` +This is a debugging helper for the parser. It outputs events emitted by the parser for a given file. This can be paired with the `YAMLRUST2_DEBUG` environment variable to have an in-depth overview of which steps the scanner and the parser are taking. + +### Example +Consider the following `input.yaml` YAML file: +```yaml +- foo: bar +- baz: + c: [3, 4, 5] +``` + +Running `cargo run --bin dump_events -- input.yaml` outputs: +``` + ↳ StreamStart + ↳ DocumentStart + ↳ SequenceStart(0, None) + ↳ MappingStart(0, None) + ↳ Scalar("foo", Plain, 0, None) + ↳ Scalar("bar", Plain, 0, None) + ↳ MappingEnd + ↳ MappingStart(0, None) + ↳ Scalar("baz", Plain, 0, None) + ↳ Scalar("~", Plain, 0, None) + ↳ Scalar("c", Plain, 0, None) + ↳ SequenceStart(0, None) + ↳ Scalar("3", Plain, 0, None) + ↳ Scalar("4", Plain, 0, None) + ↳ Scalar("5", Plain, 0, None) + ↳ SequenceEnd + ↳ MappingEnd + ↳ SequenceEnd + ↳ DocumentEnd + ↳ StreamEnd +``` + +Running `YAMLRUST2_DEBUG=1 cargo run --bin dump_events -- input.yaml` outputs much more details: +
+ Full output + +``` +Parser state: StreamStart + ↳ StreamStart(Utf8) Marker { index: 0, line: 1, col: 0 } + ↳ StreamStart + +Parser state: ImplicitDocumentStart + → fetch_next_token after whitespace Marker { index: 0, line: 1, col: 0 } '-' + ↳ BlockSequenceStart Marker { index: 0, line: 1, col: 0 } + ↳ DocumentStart + +Parser state: BlockNode + ↳ SequenceStart(0, None) + +Parser state: BlockSequenceFirstEntry + ↳ BlockEntry Marker { index: 2, line: 1, col: 2 } + → fetch_next_token after whitespace Marker { index: 2, line: 1, col: 2 } 'f' + → fetch_next_token after whitespace Marker { index: 5, line: 1, col: 5 } ':' + ↳ BlockMappingStart Marker { index: 5, line: 1, col: 5 } + ↳ MappingStart(0, None) + +Parser state: BlockMappingFirstKey + ↳ Key Marker { index: 2, line: 1, col: 2 } + ↳ Scalar(Plain, "foo") Marker { index: 2, line: 1, col: 2 } + ↳ Scalar("foo", Plain, 0, None) + +Parser state: BlockMappingValue + ↳ Value Marker { index: 5, line: 1, col: 5 } + → fetch_next_token after whitespace Marker { index: 7, line: 1, col: 7 } 'b' + ↳ Scalar(Plain, "bar") Marker { index: 7, line: 1, col: 7 } + ↳ Scalar("bar", Plain, 0, None) + +Parser state: BlockMappingKey + → fetch_next_token after whitespace Marker { index: 11, line: 2, col: 0 } '-' + ↳ BlockEnd Marker { index: 11, line: 2, col: 0 } + ↳ MappingEnd + +Parser state: BlockSequenceEntry + ↳ BlockEntry Marker { index: 13, line: 2, col: 2 } + → fetch_next_token after whitespace Marker { index: 13, line: 2, col: 2 } 'b' + → fetch_next_token after whitespace Marker { index: 16, line: 2, col: 5 } ':' + ↳ BlockMappingStart Marker { index: 16, line: 2, col: 5 } + ↳ MappingStart(0, None) + +Parser state: BlockMappingFirstKey + ↳ Key Marker { index: 13, line: 2, col: 2 } + ↳ Scalar(Plain, "baz") Marker { index: 13, line: 2, col: 2 } + ↳ Scalar("baz", Plain, 0, None) + +Parser state: BlockMappingValue + ↳ Value Marker { index: 16, line: 2, col: 5 } + → fetch_next_token after whitespace Marker { index: 20, line: 3, col: 2 } 'c' + → fetch_next_token after whitespace Marker { index: 21, line: 3, col: 3 } ':' + ↳ Key Marker { index: 20, line: 3, col: 2 } + ↳ Scalar("~", Plain, 0, None) + +Parser state: BlockMappingKey + ↳ Scalar(Plain, "c") Marker { index: 20, line: 3, col: 2 } + ↳ Scalar("c", Plain, 0, None) + +Parser state: BlockMappingValue + ↳ Value Marker { index: 21, line: 3, col: 3 } + → fetch_next_token after whitespace Marker { index: 23, line: 3, col: 5 } '[' + ↳ FlowSequenceStart Marker { index: 23, line: 3, col: 5 } + ↳ SequenceStart(0, None) + +Parser state: FlowSequenceFirstEntry + → fetch_next_token after whitespace Marker { index: 24, line: 3, col: 6 } '3' + → fetch_next_token after whitespace Marker { index: 25, line: 3, col: 7 } ',' + ↳ Scalar(Plain, "3") Marker { index: 24, line: 3, col: 6 } + ↳ Scalar("3", Plain, 0, None) + +Parser state: FlowSequenceEntry + ↳ FlowEntry Marker { index: 25, line: 3, col: 7 } + → fetch_next_token after whitespace Marker { index: 27, line: 3, col: 9 } '4' + → fetch_next_token after whitespace Marker { index: 28, line: 3, col: 10 } ',' + ↳ Scalar(Plain, "4") Marker { index: 27, line: 3, col: 9 } + ↳ Scalar("4", Plain, 0, None) + +Parser state: FlowSequenceEntry + ↳ FlowEntry Marker { index: 28, line: 3, col: 10 } + → fetch_next_token after whitespace Marker { index: 30, line: 3, col: 12 } '5' + → fetch_next_token after whitespace Marker { index: 31, line: 3, col: 13 } ']' + ↳ Scalar(Plain, "5") Marker { index: 30, line: 3, col: 12 } + ↳ Scalar("5", Plain, 0, None) + +Parser state: FlowSequenceEntry + ↳ FlowSequenceEnd Marker { index: 31, line: 3, col: 13 } + ↳ SequenceEnd + +Parser state: BlockMappingKey + → fetch_next_token after whitespace Marker { index: 33, line: 4, col: 0 } '\0' + ↳ BlockEnd Marker { index: 33, line: 4, col: 0 } + ↳ MappingEnd + +Parser state: BlockSequenceEntry + ↳ BlockEnd Marker { index: 33, line: 4, col: 0 } + ↳ SequenceEnd + +Parser state: DocumentEnd + ↳ StreamEnd Marker { index: 33, line: 4, col: 0 } + ↳ DocumentEnd + +Parser state: DocumentStart + ↳ StreamEnd +``` + +
+ +While this cannot be shown in Markdown, the output is colored so that it is a bit easier to read. + +## `gen_large_yaml` +It is hard to find large (100+MiB) real-world YAML files that could be used to benchmark a parser. This utility generates multiple large files that are meant to stress the parser with different layouts of YAML files. The resulting files do not look like anything that would be encountered in production, but can serve as a base to test several features of a YAML parser. + +The generated files are the following: + + - `big.yaml`: A large array of records with few fields. One of the fields is a description, a large text block scalar spanning multiple lines. Most of the scanning happens in block scalars. + - `nested.yaml`: Very short key-value pairs that nest deeply. + - `small_objects.yaml`: A large array of 2 key-value mappings. + - `strings_array.yaml`: A large array of lipsum one-liners (~150-175 characters in length). + +All generated files are meant to be between 200 and 250 MiB in size. + +This tool depends on external dependencies that are not part of `yaml-rust2`'s dependencies or `dev-dependencies` and as such can't be called through `cargo run` directly. A dedicated `cargo gen_large_yaml` alias can be used to generate the benchmark files. + +## `time_parse` +This is a benchmarking helper that times how long it takes for the parser to emit all events. It calls the parser on the given input file, receives parsing events and then immediately discards them. It is advised to run this tool with `--release`. + +### Examples +Loading a small file could output the following: +```sh +$> cargo run --release --bin time_parse -- input.yaml +Loaded 0MiB in 14.189µs +``` + +While loading a larger file could output the following: +```sh +$> cargo run --release --bin time_parse -- bench_yaml/big.yaml +Loaded 220MiB in 1.612677853s +``` diff --git a/saphyr/examples/dump_events.rs b/saphyr/tools/dump_events.rs similarity index 100% rename from saphyr/examples/dump_events.rs rename to saphyr/tools/dump_events.rs diff --git a/saphyr/tools/gen_large_yaml/Cargo.toml b/saphyr/tools/gen_large_yaml/Cargo.toml new file mode 100644 index 0000000..54b6b3c --- /dev/null +++ b/saphyr/tools/gen_large_yaml/Cargo.toml @@ -0,0 +1,20 @@ +[package] +name = "gen_large_yaml" +version = "0.5.0" +authors = [ + "Ethiraric " +] +license = "MIT OR Apache-2.0" +description = "A helper to generate large YAML files" +repository = "https://github.com/Ethiraric/yaml-rust2" +readme = "README.md" +edition = "2018" + +[dependencies] +yaml-rust2 = { version = "0.5.0", path = "../../" } +rand = "0.8.5" +lipsum = "0.9.0" + +[profile.release-lto] +inherits = "release" +lto = true diff --git a/saphyr/examples/gen_large_yaml/gen.rs b/saphyr/tools/gen_large_yaml/src/gen.rs similarity index 100% rename from saphyr/examples/gen_large_yaml/gen.rs rename to saphyr/tools/gen_large_yaml/src/gen.rs diff --git a/saphyr/examples/gen_large_yaml/main.rs b/saphyr/tools/gen_large_yaml/src/main.rs similarity index 74% rename from saphyr/examples/gen_large_yaml/main.rs rename to saphyr/tools/gen_large_yaml/src/main.rs index cfc8d70..d478e8b 100644 --- a/saphyr/examples/gen_large_yaml/main.rs +++ b/saphyr/tools/gen_large_yaml/src/main.rs @@ -4,15 +4,37 @@ mod gen; mod nested; use std::collections::HashMap; +use std::fs::File; +use std::io::BufWriter; +use std::path::Path; use rand::{rngs::ThreadRng, Rng}; -fn main() -> std::fmt::Result { - let mut s = String::new(); - // let mut g = Generator::new(); - // g.gen_strings_array(&mut s, 1_300_000, 1_300_001, 10, 40)?; - nested::create_deep_object(&mut s, 5_000_000)?; - println!("{s}"); +/// The path into which the generated YAML files will be written. +const OUTPUT_DIR: &str = "bench_yaml"; + +fn main() -> std::io::Result<()> { + let mut generator = Generator::new(); + let output_path = Path::new(OUTPUT_DIR); + if !output_path.is_dir() { + std::fs::create_dir(output_path).unwrap(); + } + + println!("Generating big.yaml"); + let mut out = BufWriter::new(File::create(output_path.join("big.yaml")).unwrap()); + generator.gen_record_array(&mut out, 100_000, 100_001)?; + + println!("Generating nested.yaml"); + let mut out = BufWriter::new(File::create(output_path.join("nested.yaml")).unwrap()); + nested::create_deep_object(&mut out, 5_000_000)?; + + println!("Generating small_objects.yaml"); + let mut out = BufWriter::new(File::create(output_path.join("small_objects.yaml")).unwrap()); + generator.gen_authors_array(&mut out, 4_000_000, 4_000_001)?; + + println!("Generating strings_array.yaml"); + let mut out = BufWriter::new(File::create(output_path.join("strings_array.yaml")).unwrap()); + generator.gen_strings_array(&mut out, 1_300_000, 1_300_001, 10, 40)?; Ok(()) } @@ -24,7 +46,7 @@ struct Generator { indents: Vec, } -type GenFn = dyn FnOnce(&mut Generator, &mut W) -> std::fmt::Result; +type GenFn = dyn FnOnce(&mut Generator, &mut W) -> std::io::Result<()>; impl Generator { /// Create a new generator. @@ -36,24 +58,24 @@ impl Generator { } /// Generate an array of records as per [`Self::gen_record_object`]. - fn gen_record_array( + fn gen_record_array( &mut self, writer: &mut W, items_lo: usize, items_hi: usize, - ) -> std::fmt::Result { + ) -> std::io::Result<()> { self.gen_array(writer, items_lo, items_hi, Generator::gen_record_object) } /// Generate an array of lipsum one-liners. - fn gen_strings_array( + fn gen_strings_array( &mut self, writer: &mut W, items_lo: usize, items_hi: usize, words_lo: usize, words_hi: usize, - ) -> std::fmt::Result { + ) -> std::io::Result<()> { self.gen_array(writer, items_lo, items_hi, |gen, writer| { write!(writer, "{}", gen::words(&mut gen.rng, words_lo, words_hi)) }) @@ -64,7 +86,7 @@ impl Generator { /// Fields are description, hash, version, home, repository and pdf. /// The `description` field is a long string and puts a lot of weight in plain scalar / block /// scalar parsing. - fn gen_record_object(&mut self, writer: &mut W) -> std::fmt::Result { + fn gen_record_object(&mut self, writer: &mut W) -> std::io::Result<()> { let mut fields = HashMap::>>::new(); fields.insert( "description".to_string(), @@ -121,17 +143,17 @@ impl Generator { } /// Generate an array of authors as per [`Self::gen_author_object`]. - fn gen_authors_array( + fn gen_authors_array( &mut self, writer: &mut W, items_lo: usize, items_hi: usize, - ) -> std::fmt::Result { + ) -> std::io::Result<()> { self.gen_array(writer, items_lo, items_hi, Generator::gen_author_object) } /// Generate a small object with 2 string fields. - fn gen_author_object(&mut self, writer: &mut W) -> std::fmt::Result { + fn gen_author_object(&mut self, writer: &mut W) -> std::io::Result<()> { let mut fields = HashMap::>>::new(); fields.insert( "name".to_string(), @@ -145,13 +167,13 @@ impl Generator { } /// Generate a YAML array/sequence containing nodes generated by the given function. - fn gen_array std::fmt::Result>( + fn gen_array std::io::Result<()>>( &mut self, writer: &mut W, len_lo: usize, len_hi: usize, mut obj_creator: F, - ) -> std::fmt::Result { + ) -> std::io::Result<()> { let mut first = true; for _ in 0..self.rng.gen_range(len_lo..len_hi) { if first { @@ -168,11 +190,11 @@ impl Generator { } /// Create a Yaml object with some fields in it. - fn gen_object( + fn gen_object( &mut self, writer: &mut W, fields: HashMap>>, - ) -> std::fmt::Result { + ) -> std::io::Result<()> { let mut first = true; for (key, f) in fields { if first { @@ -187,11 +209,11 @@ impl Generator { } /// Write the given lines at the right indentation. - fn write_lines( + fn write_lines( &mut self, writer: &mut W, lines: &[String], - ) -> std::fmt::Result { + ) -> std::io::Result<()> { let mut first = true; for line in lines { @@ -207,7 +229,7 @@ impl Generator { } /// Write a new line to the writer and indent. - fn nl(&mut self, writer: &mut W) -> std::fmt::Result { + fn nl(&mut self, writer: &mut W) -> std::io::Result<()> { writeln!(writer)?; for _ in 0..self.indent() { write!(writer, " ")?; diff --git a/saphyr/examples/gen_large_yaml/nested.rs b/saphyr/tools/gen_large_yaml/src/nested.rs similarity index 79% rename from saphyr/examples/gen_large_yaml/nested.rs rename to saphyr/tools/gen_large_yaml/src/nested.rs index 3977901..92dc21a 100644 --- a/saphyr/examples/gen_large_yaml/nested.rs +++ b/saphyr/tools/gen_large_yaml/src/nested.rs @@ -3,7 +3,10 @@ use std::{cell::RefCell, rc::Rc}; use rand::{rngs::ThreadRng, Rng}; /// Create a deep object with the given amount of nodes. -pub fn create_deep_object(writer: &mut W, n_nodes: usize) -> std::fmt::Result { +pub fn create_deep_object( + writer: &mut W, + n_nodes: usize, +) -> std::io::Result<()> { let mut tree = Tree::new(); for _ in 0..n_nodes { tree.push_node(); @@ -51,7 +54,7 @@ impl Tree { } /// Write the YAML representation of the tree to `writer`. - fn write_to(&self, writer: &mut W) -> std::fmt::Result { + fn write_to(&self, writer: &mut W) -> std::io::Result<()> { (*self.root).borrow().write_to(writer, 0) } } @@ -72,15 +75,15 @@ impl Node { } /// Write the YAML representation of the node to `writer`. - fn write_to(&self, writer: &mut W, indent: usize) -> std::fmt::Result { + fn write_to(&self, writer: &mut W, indent: usize) -> std::io::Result<()> { if self.children.is_empty() { write_n(writer, ' ', indent)?; - writer.write_str("a: 1\n")?; + writer.write_all(b"a: 1\n")?; } else { for (n, child) in self.children.iter().enumerate() { write_n(writer, ' ', indent)?; write_id_for_number(writer, n)?; - writer.write_str(":\n")?; + writer.write_all(b":\n")?; (**child).borrow().write_to(writer, indent + 2)?; } } @@ -89,19 +92,19 @@ impl Node { } /// Write `n` times `c` to `out`. -fn write_n(out: &mut W, c: char, n: usize) -> std::fmt::Result { +fn write_n(out: &mut W, c: char, n: usize) -> std::io::Result<()> { for _ in 0..n { - out.write_char(c)?; + write!(out, "{c}")?; } Ok(()) } /// Create a valid identifier for the given number. -fn write_id_for_number(out: &mut W, mut n: usize) -> std::fmt::Result { +fn write_id_for_number(out: &mut W, mut n: usize) -> std::io::Result<()> { const DIGITS: &[u8] = b"_abcdefghijklmnopqrstuvwxyz"; n += 1; while n > 0 { - out.write_char(DIGITS[n % DIGITS.len()] as char)?; + write!(out, "{}", DIGITS[n % DIGITS.len()] as char)?; n /= DIGITS.len(); } Ok(()) diff --git a/saphyr/examples/time_parse.rs b/saphyr/tools/time_parse.rs similarity index 100% rename from saphyr/examples/time_parse.rs rename to saphyr/tools/time_parse.rs From 13923cd27041a5e81ca6768d6a20f304e5aaeb4d Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Fri, 15 Mar 2024 12:42:10 +0100 Subject: [PATCH 290/380] Add gen_large_yaml to justfile's before_commit. --- bench/justfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bench/justfile b/bench/justfile index a9d1a56..c941027 100644 --- a/bench/justfile +++ b/bench/justfile @@ -3,4 +3,6 @@ before_commit: cargo clippy --all-targets -- -D warnings cargo build --release --all-targets cargo build --all-targets + cargo test cargo test --release + cargo build --profile=release-lto --package gen_large_yaml --bin gen_large_yaml --manifest-path tools/gen_large_yaml/Cargo.toml From 0a05d3c0ed7936f78070444e4dfe57cb0d49dcbd Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Fri, 15 Mar 2024 12:42:10 +0100 Subject: [PATCH 291/380] Add gen_large_yaml to justfile's before_commit. --- saphyr/justfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/saphyr/justfile b/saphyr/justfile index a9d1a56..c941027 100644 --- a/saphyr/justfile +++ b/saphyr/justfile @@ -3,4 +3,6 @@ before_commit: cargo clippy --all-targets -- -D warnings cargo build --release --all-targets cargo build --all-targets + cargo test cargo test --release + cargo build --profile=release-lto --package gen_large_yaml --bin gen_large_yaml --manifest-path tools/gen_large_yaml/Cargo.toml From 3db16ce517c1527a901b3c909fe7a5e9157b1307 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Fri, 15 Mar 2024 12:47:40 +0100 Subject: [PATCH 292/380] Create a deeper YAML in `nested.yaml`. This requires heavily reducing the number of nodes since they are on average more indented. Leaving 5M nodes results in files larger than 1GB. --- bench/tools/gen_large_yaml/src/main.rs | 2 +- bench/tools/gen_large_yaml/src/nested.rs | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/bench/tools/gen_large_yaml/src/main.rs b/bench/tools/gen_large_yaml/src/main.rs index d478e8b..1c7ee6e 100644 --- a/bench/tools/gen_large_yaml/src/main.rs +++ b/bench/tools/gen_large_yaml/src/main.rs @@ -26,7 +26,7 @@ fn main() -> std::io::Result<()> { println!("Generating nested.yaml"); let mut out = BufWriter::new(File::create(output_path.join("nested.yaml")).unwrap()); - nested::create_deep_object(&mut out, 5_000_000)?; + nested::create_deep_object(&mut out, 1_100_000)?; println!("Generating small_objects.yaml"); let mut out = BufWriter::new(File::create(output_path.join("small_objects.yaml")).unwrap()); diff --git a/bench/tools/gen_large_yaml/src/nested.rs b/bench/tools/gen_large_yaml/src/nested.rs index 92dc21a..f54b55c 100644 --- a/bench/tools/gen_large_yaml/src/nested.rs +++ b/bench/tools/gen_large_yaml/src/nested.rs @@ -48,7 +48,8 @@ impl Tree { fn push_node(&mut self) { let new_node = Node::new_rc_refcell(); let n_nodes = self.nodes.len(); - let parent = &mut self.nodes[self.rng.gen_range(0..n_nodes)]; + // Bias the nodes towards the end so that there is more nesting. + let parent = &mut self.nodes[self.rng.gen_range((3 * n_nodes / 4)..n_nodes)]; (**parent).borrow_mut().push_child(new_node.clone()); self.nodes.push(new_node); } From f44cb1dd22ecc4bd4356a8f2d04a3d5bd0206ae5 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Fri, 15 Mar 2024 12:47:40 +0100 Subject: [PATCH 293/380] Create a deeper YAML in `nested.yaml`. This requires heavily reducing the number of nodes since they are on average more indented. Leaving 5M nodes results in files larger than 1GB. --- saphyr/tools/gen_large_yaml/src/main.rs | 2 +- saphyr/tools/gen_large_yaml/src/nested.rs | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/saphyr/tools/gen_large_yaml/src/main.rs b/saphyr/tools/gen_large_yaml/src/main.rs index d478e8b..1c7ee6e 100644 --- a/saphyr/tools/gen_large_yaml/src/main.rs +++ b/saphyr/tools/gen_large_yaml/src/main.rs @@ -26,7 +26,7 @@ fn main() -> std::io::Result<()> { println!("Generating nested.yaml"); let mut out = BufWriter::new(File::create(output_path.join("nested.yaml")).unwrap()); - nested::create_deep_object(&mut out, 5_000_000)?; + nested::create_deep_object(&mut out, 1_100_000)?; println!("Generating small_objects.yaml"); let mut out = BufWriter::new(File::create(output_path.join("small_objects.yaml")).unwrap()); diff --git a/saphyr/tools/gen_large_yaml/src/nested.rs b/saphyr/tools/gen_large_yaml/src/nested.rs index 92dc21a..f54b55c 100644 --- a/saphyr/tools/gen_large_yaml/src/nested.rs +++ b/saphyr/tools/gen_large_yaml/src/nested.rs @@ -48,7 +48,8 @@ impl Tree { fn push_node(&mut self) { let new_node = Node::new_rc_refcell(); let n_nodes = self.nodes.len(); - let parent = &mut self.nodes[self.rng.gen_range(0..n_nodes)]; + // Bias the nodes towards the end so that there is more nesting. + let parent = &mut self.nodes[self.rng.gen_range((3 * n_nodes / 4)..n_nodes)]; (**parent).borrow_mut().push_child(new_node.clone()); self.nodes.push(new_node); } From 6b273165862a2ac227646108d0cfe1b51deab65d Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Fri, 15 Mar 2024 17:34:39 +0100 Subject: [PATCH 294/380] Prepare for benchmarks. --- saphyr/Cargo.toml | 4 +++ saphyr/tools/run_bench.rs | 71 ++++++++++++++++++++++++++++++++++++++ saphyr/tools/time_parse.rs | 7 +++- 3 files changed, 81 insertions(+), 1 deletion(-) create mode 100644 saphyr/tools/run_bench.rs diff --git a/saphyr/Cargo.toml b/saphyr/Cargo.toml index 15425df..a87b239 100644 --- a/saphyr/Cargo.toml +++ b/saphyr/Cargo.toml @@ -35,3 +35,7 @@ path = "tools/dump_events.rs" [[bin]] name = "time_parse" path = "tools/time_parse.rs" + +[[bin]] +name = "run_bench" +path = "tools/run_bench.rs" diff --git a/saphyr/tools/run_bench.rs b/saphyr/tools/run_bench.rs new file mode 100644 index 0000000..cda9db2 --- /dev/null +++ b/saphyr/tools/run_bench.rs @@ -0,0 +1,71 @@ +#![allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)] + +use std::{env, fs::File, io::prelude::*}; +use yaml_rust2::{ + parser::{MarkedEventReceiver, Parser}, + scanner::Marker, + Event, +}; + +/// A sink which discards any event sent. +struct NullSink {} + +impl MarkedEventReceiver for NullSink { + fn on_event(&mut self, _: Event, _: Marker) {} +} + +/// Parse the given input, returning elapsed time in nanoseconds. +fn do_parse(input: &str) -> u64 { + let mut sink = NullSink {}; + let mut parser = Parser::new(input.chars()); + let begin = std::time::Instant::now(); + parser.load(&mut sink, true).unwrap(); + let end = std::time::Instant::now(); + (end - begin).as_nanos() as u64 +} + +fn main() { + let args: Vec<_> = env::args().collect(); + let iterations: u64 = args[2].parse().unwrap(); + let output_yaml = args.len() == 4 && args[3] == "--output-yaml"; + let mut f = File::open(&args[1]).unwrap(); + let mut s = String::new(); + f.read_to_string(&mut s).unwrap(); + + // Warmup + do_parse(&s); + do_parse(&s); + do_parse(&s); + + // Bench + let times: Vec<_> = (0..iterations).map(|_| do_parse(&s)).collect(); + + let mut sorted_times = times.clone(); + sorted_times.sort_unstable(); + + // Compute relevant metrics. + let sum: u64 = times.iter().sum(); + let avg = sum / iterations; + let min = sorted_times[0]; + let max = sorted_times[(iterations - 1) as usize]; + let percentile95 = sorted_times[((95 * iterations) / 100) as usize]; + + if output_yaml { + println!("parser: yaml-rust2"); + println!("input: {}", args[1]); + println!("average: {avg}"); + println!("min: {min}"); + println!("max: {max}"); + println!("percentile95: {percentile95}"); + println!("iterations: {iterations}"); + println!("times:"); + for time in × { + println!(" - {time}"); + } + } else { + println!("Average: {}s", (avg as f64) / 1_000_000_000.0); + println!("Min: {}s", (min as f64) / 1_000_000_000.0); + println!("Max: {}s", (max as f64) / 1_000_000_000.0); + println!("95%: {}s", (percentile95 as f64) / 1_000_000_000.0); + } +} diff --git a/saphyr/tools/time_parse.rs b/saphyr/tools/time_parse.rs index 9b551e1..014fcfc 100644 --- a/saphyr/tools/time_parse.rs +++ b/saphyr/tools/time_parse.rs @@ -27,5 +27,10 @@ fn main() { let begin = std::time::Instant::now(); parser.load(&mut sink, true).unwrap(); let end = std::time::Instant::now(); - println!("Loaded {}MiB in {:?}", s.len() / 1024 / 1024, end - begin); + + if args.len() == 3 && args[2] == "--short" { + println!("{}", (end - begin).as_nanos()); + } else { + println!("Loaded {}MiB in {:?}", s.len() / 1024 / 1024, end - begin); + } } From 7d824ef88f7142c5fab276f058b6b85814515234 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Fri, 15 Mar 2024 17:34:39 +0100 Subject: [PATCH 295/380] Prepare for benchmarks. --- bench/tools/run_bench.rs | 71 +++++++++++++++++++++++++++++++++++++++ bench/tools/time_parse.rs | 7 +++- 2 files changed, 77 insertions(+), 1 deletion(-) create mode 100644 bench/tools/run_bench.rs diff --git a/bench/tools/run_bench.rs b/bench/tools/run_bench.rs new file mode 100644 index 0000000..cda9db2 --- /dev/null +++ b/bench/tools/run_bench.rs @@ -0,0 +1,71 @@ +#![allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)] + +use std::{env, fs::File, io::prelude::*}; +use yaml_rust2::{ + parser::{MarkedEventReceiver, Parser}, + scanner::Marker, + Event, +}; + +/// A sink which discards any event sent. +struct NullSink {} + +impl MarkedEventReceiver for NullSink { + fn on_event(&mut self, _: Event, _: Marker) {} +} + +/// Parse the given input, returning elapsed time in nanoseconds. +fn do_parse(input: &str) -> u64 { + let mut sink = NullSink {}; + let mut parser = Parser::new(input.chars()); + let begin = std::time::Instant::now(); + parser.load(&mut sink, true).unwrap(); + let end = std::time::Instant::now(); + (end - begin).as_nanos() as u64 +} + +fn main() { + let args: Vec<_> = env::args().collect(); + let iterations: u64 = args[2].parse().unwrap(); + let output_yaml = args.len() == 4 && args[3] == "--output-yaml"; + let mut f = File::open(&args[1]).unwrap(); + let mut s = String::new(); + f.read_to_string(&mut s).unwrap(); + + // Warmup + do_parse(&s); + do_parse(&s); + do_parse(&s); + + // Bench + let times: Vec<_> = (0..iterations).map(|_| do_parse(&s)).collect(); + + let mut sorted_times = times.clone(); + sorted_times.sort_unstable(); + + // Compute relevant metrics. + let sum: u64 = times.iter().sum(); + let avg = sum / iterations; + let min = sorted_times[0]; + let max = sorted_times[(iterations - 1) as usize]; + let percentile95 = sorted_times[((95 * iterations) / 100) as usize]; + + if output_yaml { + println!("parser: yaml-rust2"); + println!("input: {}", args[1]); + println!("average: {avg}"); + println!("min: {min}"); + println!("max: {max}"); + println!("percentile95: {percentile95}"); + println!("iterations: {iterations}"); + println!("times:"); + for time in × { + println!(" - {time}"); + } + } else { + println!("Average: {}s", (avg as f64) / 1_000_000_000.0); + println!("Min: {}s", (min as f64) / 1_000_000_000.0); + println!("Max: {}s", (max as f64) / 1_000_000_000.0); + println!("95%: {}s", (percentile95 as f64) / 1_000_000_000.0); + } +} diff --git a/bench/tools/time_parse.rs b/bench/tools/time_parse.rs index 9b551e1..014fcfc 100644 --- a/bench/tools/time_parse.rs +++ b/bench/tools/time_parse.rs @@ -27,5 +27,10 @@ fn main() { let begin = std::time::Instant::now(); parser.load(&mut sink, true).unwrap(); let end = std::time::Instant::now(); - println!("Loaded {}MiB in {:?}", s.len() / 1024 / 1024, end - begin); + + if args.len() == 3 && args[2] == "--short" { + println!("{}", (end - begin).as_nanos()); + } else { + println!("Loaded {}MiB in {:?}", s.len() / 1024 / 1024, end - begin); + } } From dc88910c23cecc80601ee2b6044935f09f63c4d0 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Fri, 15 Mar 2024 18:29:54 +0100 Subject: [PATCH 296/380] Add `bench_compare` tool. --- saphyr/.cargo/config.toml | 1 + saphyr/justfile | 6 + saphyr/tools/README.md | 41 ++++++ saphyr/tools/bench_compare/Cargo.toml | 21 +++ saphyr/tools/bench_compare/README.md | 120 +++++++++++++++++ saphyr/tools/bench_compare/src/main.rs | 174 +++++++++++++++++++++++++ 6 files changed, 363 insertions(+) create mode 100644 saphyr/tools/bench_compare/Cargo.toml create mode 100644 saphyr/tools/bench_compare/README.md create mode 100644 saphyr/tools/bench_compare/src/main.rs diff --git a/saphyr/.cargo/config.toml b/saphyr/.cargo/config.toml index 7bc65f1..497dd42 100644 --- a/saphyr/.cargo/config.toml +++ b/saphyr/.cargo/config.toml @@ -1,2 +1,3 @@ [alias] gen_large_yaml = "run --profile=release-lto --package gen_large_yaml --bin gen_large_yaml --manifest-path tools/gen_large_yaml/Cargo.toml --" +bench_compare = "run --package bench_compare --bin bench_compare --manifest-path tools/bench_compare/Cargo.toml --" diff --git a/saphyr/justfile b/saphyr/justfile index c941027..4d0c444 100644 --- a/saphyr/justfile +++ b/saphyr/justfile @@ -6,3 +6,9 @@ before_commit: cargo test cargo test --release cargo build --profile=release-lto --package gen_large_yaml --bin gen_large_yaml --manifest-path tools/gen_large_yaml/Cargo.toml + +ethi_bench: + cargo build --release --all-targets + cd ../Yaml-rust && cargo build --release --all-targets + cd ../libfyaml/build && ninja + cargo bench_compare run_bench diff --git a/saphyr/tools/README.md b/saphyr/tools/README.md index de71873..7728a0f 100644 --- a/saphyr/tools/README.md +++ b/saphyr/tools/README.md @@ -4,10 +4,15 @@ Due to dependency management, only some of them are available as binaries from t | Tool | Invocation | |------|------------| +| `bench_compare` | `cargo bench_compare` | | `dump_events` | `cargo run --bin dump_events -- [...]` | | `gen_large_yaml` | `cargo gen_large_yaml` | +| `run_bench` | `cargo run --bin run_bench -- [...]` | | `time_parse` | `cargo run --bin time_parse -- [...]` | +## `bench_compare` +See the [dedicated README file](./bench_compare/README.md). + ## `dump_events` This is a debugging helper for the parser. It outputs events emitted by the parser for a given file. This can be paired with the `YAMLRUST2_DEBUG` environment variable to have an in-depth overview of which steps the scanner and the parser are taking. @@ -171,6 +176,42 @@ All generated files are meant to be between 200 and 250 MiB in size. This tool depends on external dependencies that are not part of `yaml-rust2`'s dependencies or `dev-dependencies` and as such can't be called through `cargo run` directly. A dedicated `cargo gen_large_yaml` alias can be used to generate the benchmark files. +## `run_bench` +This is a benchmarking helper that runs the parser on the given file a given number of times and is able to extract simple metrics out of the results. The `--output-yaml` flag can be specified to make the output a YAML file that can be fed into other tools. + +This binary is made to be used by `bench_compare`. + +Synopsis: `run_bench input.yaml [--output-yaml]` + +### Examples +```sh +$> cargo run --release --bin run_bench -- bench_yaml/big.yaml 10 +Average: 1.631936191s +Min: 1.629654651s +Max: 1.633045284s +95%: 1.633045284s + +$> cargo run --release --bin run_bench -- bench_yaml/big.yaml 10 --output-yaml +parser: yaml-rust2 +input: bench_yaml/big.yaml +average: 1649847674 +min: 1648277149 +max: 1651936305 +percentile95: 1651936305 +iterations: 10 +times: + - 1650216129 + - 1649349978 + - 1649507018 + - 1648277149 + - 1649036548 + - 1650323982 + - 1650917692 + - 1648702081 + - 1650209860 + - 1651936305 +``` + ## `time_parse` This is a benchmarking helper that times how long it takes for the parser to emit all events. It calls the parser on the given input file, receives parsing events and then immediately discards them. It is advised to run this tool with `--release`. diff --git a/saphyr/tools/bench_compare/Cargo.toml b/saphyr/tools/bench_compare/Cargo.toml new file mode 100644 index 0000000..7c7f97c --- /dev/null +++ b/saphyr/tools/bench_compare/Cargo.toml @@ -0,0 +1,21 @@ +[package] +name = "bench_compare" +version = "0.5.0" +authors = [ + "Ethiraric " +] +license = "MIT OR Apache-2.0" +description = "Run multiple YAML parsers and compare their times" +repository = "https://github.com/Ethiraric/yaml-rust2" +readme = "README.md" +edition = "2018" + +[dependencies] +anyhow = { version = "1.0.81", features = ["backtrace"] } +serde = { version = "1.0.197", features = ["derive"] } +serde_yaml = "0.9.32" +toml = "0.8.11" + +[profile.release-lto] +inherits = "release" +lto = true diff --git a/saphyr/tools/bench_compare/README.md b/saphyr/tools/bench_compare/README.md new file mode 100644 index 0000000..b9e990b --- /dev/null +++ b/saphyr/tools/bench_compare/README.md @@ -0,0 +1,120 @@ +# `bench_compare` +This tool helps with comparing times different YAML parsers take to parse the same input. + +## Synopsis +``` +bench_compare time_parse +bench_compare run_bench +``` + +This will run either `time_parse` or `run_bench` (described below) with the given set of parsers from the configuration file. + +## Parsers requirements +Parsers are expected to be event-based. In order to be fair to this crate's benchmark implementation, parsers should: + +* Load the file into memory (a string, `mmap`, ...) **prior** to starting the clock +* Initialize the parser, if needed +* **Start the clock** +* Read events from the parser while the parser has not finished parsing +* Discard events as they are received (dropping them, `free`ing them or anything similar) so as to not grow their memory consumption too high, and allowing the parser to reuse event structures +* **Stop the clock** +* Destroy the resources, if needed/wanted (parser, file buffer, ...). The kernel will reap after the process exits. + + +## Parsers required binaries +This tool recognizes 2 binaries: `time_parse` and `run_bench`. + +### `time_parse` +Synopsis: +``` +time_parse file.yaml [--short] +``` + +The binary must run the aforementioned steps and display on its output the time the parser took to parse the given file. +With the `--short` option, the binary must only output the benchmark time in nanoseconds. + +```sh +# This is meant to be human-readable. +# The example below is what this crate implements. +$> time_parse file.yaml +Loaded 200MiB in 1.74389s. + +# This will be read by this tool. +# This must output ONLY the time, in nanoseconds. +$> time_parse file.yaml --short +1743892394 +``` + +This tool will always provide the `--short` option. + +### `run_bench` +Synopsis: +``` +run_bench file.yaml [--output-yaml] +``` + +The binary is expected to run `` runs of the aforementioned steps and display on its output relevant information. +The `--output-yaml` instructs the binary to output details about its runs in YAML on its standard output. +The binary may optionally perform some warmup runs prior to running the benchmark. The time it took the binary to run will not be evaluated. + +```sh +# This is meant to be human-readable. +# The example below is what this crate implements. +$> run_bench file.yaml 100 +Average: 1.589485s +Min : 1.583078s +Max : 1.597028s +95% : 1.593219s + +# This will be read by this tool. +# This must output a YAML as described below. +$> run_bench ../file.yaml 10 --output-yaml +parser: yaml-rust2 +input: ../file.yaml +average: 1620303590 +min: 1611632108 +max: 1636401896 +percentile95: 1636401896 +iterations: 10 +times: + - 1636401896 + - 1623914538 + - 1611632108 + - 1612973608 + - 1617748930 + - 1615419514 + - 1612172250 + - 1620791346 + - 1629339306 + - 1622642412 +``` + +The expected fields are (all times in nanoseconds): + +* `parser`: The name of the parser (in case of a mistake renaming files) +* `input`: The path to the input file as given to the binary arguments +* `average`: The average time it took to run the parser +* `min`: The shortest time it took to run the parser +* `max`: The longest time it took to run the parser +* `percentile95`: The 95th percentile time of the runs +* `iterations`: The number of times the parser was run (``) +* `times`: An array of `iterations` times, one for each run, in the order they were run (first run first) + +## Configuration +`bench_compare` is configured through a `bench_compare.toml` file. This file must be located in the current directory. +As of now, default values are unsupported and all fields must be set. The following fields are required: +```toml +yaml_input_dir = "bench_yaml" # The path to the directory containing the input yaml files +iterations = 10 # The number of iterations, if using `run_bench` +yaml_output_dir = "yaml_output" # The directory in which `run_bench`'s yamls are saved +csv_output = "benchmark.csv" # The CSV output aggregating times for each parser and file + +[[parsers]] # A parser, can be repeated as many times as there are parsers +name = "yaml-rust2" # The name of the parser (used for logging) +path = "target/release/" # The path in which the parsers' `run_bench` and `time_parse` are + +# If there is another parser, another block can be added +# [[parsers]] +# name = "libfyaml" +# path = "../libfyaml/build" +``` diff --git a/saphyr/tools/bench_compare/src/main.rs b/saphyr/tools/bench_compare/src/main.rs new file mode 100644 index 0000000..ac33f9c --- /dev/null +++ b/saphyr/tools/bench_compare/src/main.rs @@ -0,0 +1,174 @@ +use std::{fs::File, io::BufWriter, io::Write, path::Path}; + +use anyhow::Error; +use serde::{Deserialize, Serialize}; + +fn main() { + if let Err(e) = entrypoint() { + eprintln!("{e:?}"); + std::process::exit(1); + } +} + +fn entrypoint() -> Result<(), Error> { + let config: Config = + toml::from_str(&std::fs::read_to_string("bench_compare.toml").unwrap()).unwrap(); + if config.parsers.is_empty() { + println!("Please add at least one parser. Refer to the README for instructions."); + return Ok(()); + } + let args: Vec<_> = std::env::args().collect(); + if args.len() != 2 + || (args.len() == 2 && !["time_parse", "run_bench"].contains(&args[1].as_str())) + { + println!("Usage: bench_compare "); + return Ok(()); + } + match args[1].as_str() { + "run_bench" => run_bench(&config)?, + "time_parse" => unimplemented!(), + _ => unreachable!(), + } + Ok(()) +} + +/// Run the `run_bench` binary on the given parsers. +fn run_bench(config: &Config) -> Result<(), Error> { + // Create output directory + std::fs::create_dir_all(&config.yaml_output_dir)?; + + let inputs = list_input_files(config)?; + let iterations = format!("{}", config.iterations); + let mut averages = vec![]; + + // Inputs are ordered, so are parsers. + for input in &inputs { + let input_basename = Path::new(&input).file_name().unwrap().to_string_lossy(); + let mut input_times = vec![]; + + // Run each input for each parser. + for parser in &config.parsers { + println!("Running {input_basename} against {}", parser.name); + // Run benchmark + let path = Path::new(&parser.path).join("run_bench"); + let output = std::process::Command::new(path) + .arg(input) + .arg(&iterations) + .arg("--output-yaml") + .output()?; + // Check exit status. + if output.status.code().unwrap_or(1) == 0 { + let s = String::from_utf8_lossy(&output.stdout); + // Get output as yaml. + match serde_yaml::from_str::(&s) { + Ok(output) => { + // Push average into our CSV-to-be. + input_times.push(output.average); + // Save the YAML for later. + serde_yaml::to_writer( + BufWriter::new(File::create(format!( + "{}/{}-{}", + config.yaml_output_dir, parser.name, input_basename + ))?), + &output, + )?; + } + Err(e) => { + // Yaml is invalid, use 0 as "didn't run properly". + println!("Errored: Invalid YAML output: {e}"); + input_times.push(0); + } + } + } else { + // An error happened, use 0 as "didn't run properly". + println!("Errored: process did exit non-zero"); + input_times.push(0); + } + } + averages.push(input_times); + } + + // Finally, save a CSV. + save_run_bench_csv(config, &inputs, &averages) +} + +/// General configuration structure. +#[derive(Serialize, Deserialize)] +struct Config { + /// The path to the directory containing the input yaml files. + yaml_input_dir: String, + /// Number of iterations to run, if using `run_bench`. + iterations: u32, + /// The parsers to run. + parsers: Vec, + /// The path to the directory in which `run_bench`'s yamls are saved. + yaml_output_dir: String, + /// The path to the CSV output aggregating times for each parser and file. + csv_output: String, +} + +/// A parser configuration. +#[derive(Serialize, Deserialize)] +struct Parser { + /// The name of the parser. + name: String, + /// The path in which the parser's `run_bench` and `time_parse` are located. + path: String, +} + +/// Ourput of running `run_bench` on a given parser. +#[derive(Serialize, Deserialize)] +struct BenchYamlOutput { + /// The name of the parser. + parser: String, + /// The file taken as input. + input: String, + /// Average parsing time (ns). + average: u64, + /// Shortest parsing time (ns). + min: u64, + /// Longest parsing time (ns). + max: u64, + /// 95th percentile of parsing times (ns). + percentile95: u64, + /// Number of iterations. + iterations: u64, + /// Parsing times for each run. + times: Vec, +} + +/// Save a CSV file with all averages from `run_bench`. +fn save_run_bench_csv( + config: &Config, + inputs: &[String], + averages: &[Vec], +) -> Result<(), Error> { + let mut csv = BufWriter::new(File::create(&config.csv_output)?); + for parser in &config.parsers { + write!(csv, ",{}", parser.name,)?; + } + writeln!(csv)?; + for (path, averages) in inputs.iter().zip(averages.iter()) { + let filename = Path::new(path).file_name().unwrap().to_string_lossy(); + write!(csv, "{}", filename)?; + for avg in averages { + write!(csv, ",{avg}")?; + } + writeln!(csv)?; + } + + Ok(()) +} + +/// Returns the paths to the input yaml files. +fn list_input_files(config: &Config) -> Result, Error> { + Ok(std::fs::read_dir(&config.yaml_input_dir)? + .filter_map(Result::ok) + .map(|entry| entry.path().to_string_lossy().to_string()) + .filter(|path| { + Path::new(path) + .extension() + .map_or(false, |ext| ext.eq_ignore_ascii_case("yaml")) + }) + .collect()) +} From 006cc66b59c05af83b3fb2753f85876e29f225ba Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Fri, 15 Mar 2024 18:29:54 +0100 Subject: [PATCH 297/380] Add `bench_compare` tool. --- bench/justfile | 6 + bench/tools/README.md | 41 ++++++ bench/tools/bench_compare/Cargo.toml | 21 ++++ bench/tools/bench_compare/README.md | 120 ++++++++++++++++++ bench/tools/bench_compare/src/main.rs | 174 ++++++++++++++++++++++++++ 5 files changed, 362 insertions(+) create mode 100644 bench/tools/bench_compare/Cargo.toml create mode 100644 bench/tools/bench_compare/README.md create mode 100644 bench/tools/bench_compare/src/main.rs diff --git a/bench/justfile b/bench/justfile index c941027..4d0c444 100644 --- a/bench/justfile +++ b/bench/justfile @@ -6,3 +6,9 @@ before_commit: cargo test cargo test --release cargo build --profile=release-lto --package gen_large_yaml --bin gen_large_yaml --manifest-path tools/gen_large_yaml/Cargo.toml + +ethi_bench: + cargo build --release --all-targets + cd ../Yaml-rust && cargo build --release --all-targets + cd ../libfyaml/build && ninja + cargo bench_compare run_bench diff --git a/bench/tools/README.md b/bench/tools/README.md index de71873..7728a0f 100644 --- a/bench/tools/README.md +++ b/bench/tools/README.md @@ -4,10 +4,15 @@ Due to dependency management, only some of them are available as binaries from t | Tool | Invocation | |------|------------| +| `bench_compare` | `cargo bench_compare` | | `dump_events` | `cargo run --bin dump_events -- [...]` | | `gen_large_yaml` | `cargo gen_large_yaml` | +| `run_bench` | `cargo run --bin run_bench -- [...]` | | `time_parse` | `cargo run --bin time_parse -- [...]` | +## `bench_compare` +See the [dedicated README file](./bench_compare/README.md). + ## `dump_events` This is a debugging helper for the parser. It outputs events emitted by the parser for a given file. This can be paired with the `YAMLRUST2_DEBUG` environment variable to have an in-depth overview of which steps the scanner and the parser are taking. @@ -171,6 +176,42 @@ All generated files are meant to be between 200 and 250 MiB in size. This tool depends on external dependencies that are not part of `yaml-rust2`'s dependencies or `dev-dependencies` and as such can't be called through `cargo run` directly. A dedicated `cargo gen_large_yaml` alias can be used to generate the benchmark files. +## `run_bench` +This is a benchmarking helper that runs the parser on the given file a given number of times and is able to extract simple metrics out of the results. The `--output-yaml` flag can be specified to make the output a YAML file that can be fed into other tools. + +This binary is made to be used by `bench_compare`. + +Synopsis: `run_bench input.yaml [--output-yaml]` + +### Examples +```sh +$> cargo run --release --bin run_bench -- bench_yaml/big.yaml 10 +Average: 1.631936191s +Min: 1.629654651s +Max: 1.633045284s +95%: 1.633045284s + +$> cargo run --release --bin run_bench -- bench_yaml/big.yaml 10 --output-yaml +parser: yaml-rust2 +input: bench_yaml/big.yaml +average: 1649847674 +min: 1648277149 +max: 1651936305 +percentile95: 1651936305 +iterations: 10 +times: + - 1650216129 + - 1649349978 + - 1649507018 + - 1648277149 + - 1649036548 + - 1650323982 + - 1650917692 + - 1648702081 + - 1650209860 + - 1651936305 +``` + ## `time_parse` This is a benchmarking helper that times how long it takes for the parser to emit all events. It calls the parser on the given input file, receives parsing events and then immediately discards them. It is advised to run this tool with `--release`. diff --git a/bench/tools/bench_compare/Cargo.toml b/bench/tools/bench_compare/Cargo.toml new file mode 100644 index 0000000..7c7f97c --- /dev/null +++ b/bench/tools/bench_compare/Cargo.toml @@ -0,0 +1,21 @@ +[package] +name = "bench_compare" +version = "0.5.0" +authors = [ + "Ethiraric " +] +license = "MIT OR Apache-2.0" +description = "Run multiple YAML parsers and compare their times" +repository = "https://github.com/Ethiraric/yaml-rust2" +readme = "README.md" +edition = "2018" + +[dependencies] +anyhow = { version = "1.0.81", features = ["backtrace"] } +serde = { version = "1.0.197", features = ["derive"] } +serde_yaml = "0.9.32" +toml = "0.8.11" + +[profile.release-lto] +inherits = "release" +lto = true diff --git a/bench/tools/bench_compare/README.md b/bench/tools/bench_compare/README.md new file mode 100644 index 0000000..b9e990b --- /dev/null +++ b/bench/tools/bench_compare/README.md @@ -0,0 +1,120 @@ +# `bench_compare` +This tool helps with comparing times different YAML parsers take to parse the same input. + +## Synopsis +``` +bench_compare time_parse +bench_compare run_bench +``` + +This will run either `time_parse` or `run_bench` (described below) with the given set of parsers from the configuration file. + +## Parsers requirements +Parsers are expected to be event-based. In order to be fair to this crate's benchmark implementation, parsers should: + +* Load the file into memory (a string, `mmap`, ...) **prior** to starting the clock +* Initialize the parser, if needed +* **Start the clock** +* Read events from the parser while the parser has not finished parsing +* Discard events as they are received (dropping them, `free`ing them or anything similar) so as to not grow their memory consumption too high, and allowing the parser to reuse event structures +* **Stop the clock** +* Destroy the resources, if needed/wanted (parser, file buffer, ...). The kernel will reap after the process exits. + + +## Parsers required binaries +This tool recognizes 2 binaries: `time_parse` and `run_bench`. + +### `time_parse` +Synopsis: +``` +time_parse file.yaml [--short] +``` + +The binary must run the aforementioned steps and display on its output the time the parser took to parse the given file. +With the `--short` option, the binary must only output the benchmark time in nanoseconds. + +```sh +# This is meant to be human-readable. +# The example below is what this crate implements. +$> time_parse file.yaml +Loaded 200MiB in 1.74389s. + +# This will be read by this tool. +# This must output ONLY the time, in nanoseconds. +$> time_parse file.yaml --short +1743892394 +``` + +This tool will always provide the `--short` option. + +### `run_bench` +Synopsis: +``` +run_bench file.yaml [--output-yaml] +``` + +The binary is expected to run `` runs of the aforementioned steps and display on its output relevant information. +The `--output-yaml` instructs the binary to output details about its runs in YAML on its standard output. +The binary may optionally perform some warmup runs prior to running the benchmark. The time it took the binary to run will not be evaluated. + +```sh +# This is meant to be human-readable. +# The example below is what this crate implements. +$> run_bench file.yaml 100 +Average: 1.589485s +Min : 1.583078s +Max : 1.597028s +95% : 1.593219s + +# This will be read by this tool. +# This must output a YAML as described below. +$> run_bench ../file.yaml 10 --output-yaml +parser: yaml-rust2 +input: ../file.yaml +average: 1620303590 +min: 1611632108 +max: 1636401896 +percentile95: 1636401896 +iterations: 10 +times: + - 1636401896 + - 1623914538 + - 1611632108 + - 1612973608 + - 1617748930 + - 1615419514 + - 1612172250 + - 1620791346 + - 1629339306 + - 1622642412 +``` + +The expected fields are (all times in nanoseconds): + +* `parser`: The name of the parser (in case of a mistake renaming files) +* `input`: The path to the input file as given to the binary arguments +* `average`: The average time it took to run the parser +* `min`: The shortest time it took to run the parser +* `max`: The longest time it took to run the parser +* `percentile95`: The 95th percentile time of the runs +* `iterations`: The number of times the parser was run (``) +* `times`: An array of `iterations` times, one for each run, in the order they were run (first run first) + +## Configuration +`bench_compare` is configured through a `bench_compare.toml` file. This file must be located in the current directory. +As of now, default values are unsupported and all fields must be set. The following fields are required: +```toml +yaml_input_dir = "bench_yaml" # The path to the directory containing the input yaml files +iterations = 10 # The number of iterations, if using `run_bench` +yaml_output_dir = "yaml_output" # The directory in which `run_bench`'s yamls are saved +csv_output = "benchmark.csv" # The CSV output aggregating times for each parser and file + +[[parsers]] # A parser, can be repeated as many times as there are parsers +name = "yaml-rust2" # The name of the parser (used for logging) +path = "target/release/" # The path in which the parsers' `run_bench` and `time_parse` are + +# If there is another parser, another block can be added +# [[parsers]] +# name = "libfyaml" +# path = "../libfyaml/build" +``` diff --git a/bench/tools/bench_compare/src/main.rs b/bench/tools/bench_compare/src/main.rs new file mode 100644 index 0000000..ac33f9c --- /dev/null +++ b/bench/tools/bench_compare/src/main.rs @@ -0,0 +1,174 @@ +use std::{fs::File, io::BufWriter, io::Write, path::Path}; + +use anyhow::Error; +use serde::{Deserialize, Serialize}; + +fn main() { + if let Err(e) = entrypoint() { + eprintln!("{e:?}"); + std::process::exit(1); + } +} + +fn entrypoint() -> Result<(), Error> { + let config: Config = + toml::from_str(&std::fs::read_to_string("bench_compare.toml").unwrap()).unwrap(); + if config.parsers.is_empty() { + println!("Please add at least one parser. Refer to the README for instructions."); + return Ok(()); + } + let args: Vec<_> = std::env::args().collect(); + if args.len() != 2 + || (args.len() == 2 && !["time_parse", "run_bench"].contains(&args[1].as_str())) + { + println!("Usage: bench_compare "); + return Ok(()); + } + match args[1].as_str() { + "run_bench" => run_bench(&config)?, + "time_parse" => unimplemented!(), + _ => unreachable!(), + } + Ok(()) +} + +/// Run the `run_bench` binary on the given parsers. +fn run_bench(config: &Config) -> Result<(), Error> { + // Create output directory + std::fs::create_dir_all(&config.yaml_output_dir)?; + + let inputs = list_input_files(config)?; + let iterations = format!("{}", config.iterations); + let mut averages = vec![]; + + // Inputs are ordered, so are parsers. + for input in &inputs { + let input_basename = Path::new(&input).file_name().unwrap().to_string_lossy(); + let mut input_times = vec![]; + + // Run each input for each parser. + for parser in &config.parsers { + println!("Running {input_basename} against {}", parser.name); + // Run benchmark + let path = Path::new(&parser.path).join("run_bench"); + let output = std::process::Command::new(path) + .arg(input) + .arg(&iterations) + .arg("--output-yaml") + .output()?; + // Check exit status. + if output.status.code().unwrap_or(1) == 0 { + let s = String::from_utf8_lossy(&output.stdout); + // Get output as yaml. + match serde_yaml::from_str::(&s) { + Ok(output) => { + // Push average into our CSV-to-be. + input_times.push(output.average); + // Save the YAML for later. + serde_yaml::to_writer( + BufWriter::new(File::create(format!( + "{}/{}-{}", + config.yaml_output_dir, parser.name, input_basename + ))?), + &output, + )?; + } + Err(e) => { + // Yaml is invalid, use 0 as "didn't run properly". + println!("Errored: Invalid YAML output: {e}"); + input_times.push(0); + } + } + } else { + // An error happened, use 0 as "didn't run properly". + println!("Errored: process did exit non-zero"); + input_times.push(0); + } + } + averages.push(input_times); + } + + // Finally, save a CSV. + save_run_bench_csv(config, &inputs, &averages) +} + +/// General configuration structure. +#[derive(Serialize, Deserialize)] +struct Config { + /// The path to the directory containing the input yaml files. + yaml_input_dir: String, + /// Number of iterations to run, if using `run_bench`. + iterations: u32, + /// The parsers to run. + parsers: Vec, + /// The path to the directory in which `run_bench`'s yamls are saved. + yaml_output_dir: String, + /// The path to the CSV output aggregating times for each parser and file. + csv_output: String, +} + +/// A parser configuration. +#[derive(Serialize, Deserialize)] +struct Parser { + /// The name of the parser. + name: String, + /// The path in which the parser's `run_bench` and `time_parse` are located. + path: String, +} + +/// Ourput of running `run_bench` on a given parser. +#[derive(Serialize, Deserialize)] +struct BenchYamlOutput { + /// The name of the parser. + parser: String, + /// The file taken as input. + input: String, + /// Average parsing time (ns). + average: u64, + /// Shortest parsing time (ns). + min: u64, + /// Longest parsing time (ns). + max: u64, + /// 95th percentile of parsing times (ns). + percentile95: u64, + /// Number of iterations. + iterations: u64, + /// Parsing times for each run. + times: Vec, +} + +/// Save a CSV file with all averages from `run_bench`. +fn save_run_bench_csv( + config: &Config, + inputs: &[String], + averages: &[Vec], +) -> Result<(), Error> { + let mut csv = BufWriter::new(File::create(&config.csv_output)?); + for parser in &config.parsers { + write!(csv, ",{}", parser.name,)?; + } + writeln!(csv)?; + for (path, averages) in inputs.iter().zip(averages.iter()) { + let filename = Path::new(path).file_name().unwrap().to_string_lossy(); + write!(csv, "{}", filename)?; + for avg in averages { + write!(csv, ",{avg}")?; + } + writeln!(csv)?; + } + + Ok(()) +} + +/// Returns the paths to the input yaml files. +fn list_input_files(config: &Config) -> Result, Error> { + Ok(std::fs::read_dir(&config.yaml_input_dir)? + .filter_map(Result::ok) + .map(|entry| entry.path().to_string_lossy().to_string()) + .filter(|path| { + Path::new(path) + .extension() + .map_or(false, |ext| ext.eq_ignore_ascii_case("yaml")) + }) + .collect()) +} From e4ae1d054654bbc507a798eba64d0063507a5280 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Fri, 15 Mar 2024 20:14:26 +0100 Subject: [PATCH 298/380] Update to v0.6. --- saphyr/Cargo.toml | 2 +- saphyr/README.md | 2 +- saphyr/documents/2024-03-15-FirstRelease.md | 153 ++++++++++++++++++ .../documents/img/2024-03-15-benchmarks.csv | 5 + saphyr/documents/img/benchmarks-v0.6.svg | 69 ++++++++ saphyr/src/lib.rs | 2 +- saphyr/tools/bench_compare/Cargo.toml | 2 +- saphyr/tools/gen_large_yaml/Cargo.toml | 4 +- 8 files changed, 233 insertions(+), 6 deletions(-) create mode 100644 saphyr/documents/2024-03-15-FirstRelease.md create mode 100644 saphyr/documents/img/2024-03-15-benchmarks.csv create mode 100644 saphyr/documents/img/benchmarks-v0.6.svg diff --git a/saphyr/Cargo.toml b/saphyr/Cargo.toml index a87b239..89b7583 100644 --- a/saphyr/Cargo.toml +++ b/saphyr/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "yaml-rust2" -version = "0.5.0" +version = "0.6.0" authors = [ "Yuheng Chen ", "Ethiraric " diff --git a/saphyr/README.md b/saphyr/README.md index cb6d4c7..4b5df49 100644 --- a/saphyr/README.md +++ b/saphyr/README.md @@ -15,7 +15,7 @@ Add the following to the Cargo.toml of your project: ```toml [dependencies] -yaml-rust2 = "0.5" +yaml-rust2 = "0.6" ``` Use `yaml_rust2::YamlLoader` to load YAML documents and access them as `Yaml` objects: diff --git a/saphyr/documents/2024-03-15-FirstRelease.md b/saphyr/documents/2024-03-15-FirstRelease.md new file mode 100644 index 0000000..2c193f0 --- /dev/null +++ b/saphyr/documents/2024-03-15-FirstRelease.md @@ -0,0 +1,153 @@ +# `yaml-rust2`'s first real release +If you are not interested in how this crate was born and just want to know what differs from `yaml-rust`, scroll down to +["This release" or click here](#this-release). + +## The why +Sometime in August 2023, an ordinary developer (that's me) felt the urge to start scribbling about an OpenAPI linter. I +had worked with the OpenAPI format and tried different linters, but none of them felt right. And me needing 3 different +linters to lint my OpenAPI was a pain to me. Like any sane person would do, I would write my own (author's note: you are +not not sane if you wouldn't). In order to get things started, I needed a YAML parser. + +On August 14th 2023, I forked `yaml-rust` and started working on it. The crate stated that some YAML features were not +yet available and I felt that was an issue I could tackle. I started by getting to know the code, understanding it, +adding warnings, refactoring, tinkering, documenting, ... . Anything I could do that made me feel that codebase was +better, I would do it. I wanted this crate to be as clean as it could be. + +## Fixing YAML compliance +In my quest to understand YAML better, I found [the YAML test suite](https://github.com/yaml/yaml-test-suite/): a +compilation of corner cases and intricate YAML examples with their expected output / behavior. Interestingly enough, +there was an [open pull request on yaml-rust](https://github.com/chyh1990/yaml-rust/pull/187) by +[tanriol](https://github.com/tanriol) which integrated the YAML test suite as part of the crate tests. Comments mention +that the maintainer wasn't around anymore and that new contributions would probably never be accepted. + +That, however, was a problem for future-past-me, as I was determined (somehow) to have `yaml-rust` pass every single +test of the YAML test suite. Slowly, over the course of multiple months (from August 2023 to January 2024), I would +sometimes pick a test from the test suite, fix it, commit and start again. On the 23rd of January, the last commit +fixing a test was created. + +According to the [YAML test matrix](https://matrix.yaml.info/), there is to this day only 1 library that is fully +compliant (aside from the Perl parser generated by the reference). This would make `yaml-rust2` the second library to be +fully YAML-compliant. You really wouldn't believe how much you have to stretch YAML so that it's not valid YAML anymore. + +## Performance +With so many improvements, the crate was now perfect!.. Except for performance. Adding conditions for every little bit +of compliance has lead the code to be much more complex and branch-y, which CPUs hate. I was around 20% slower than the +code was when I started. + +For a bit over 3 weeks, I stared at flamegraphs and made my CPU repeat the same instructions until it could do it +faster. There have been a bunch of improvements for performance since `yaml-rust`'s last commit. Here are a few of them: + +* Avoid putting characters in a `VecDeque` buffer when we can push them directly into a `String`. +* Be a bit smarter about reallocating temporaries: it's best if we know the size in advance, but when we don't we can + sometimes avoid pushing characters 1 at a time. +* The scanner skips over characters one at a time. When skipping them, it needs to check whether they're a linebreak to + update the location. Sometimes, we know we skip over a letter (which is not a linebreak). Several "skip" functions + have been added for specific uses. + +And the big winner, for an around 15% decrease in runtime was: use a statically-sized buffer instead of a dynamically +allocated one. (Almost) Every character goes from the input stream into the buffer and then gets read from the buffer. +This means that `VecDeque::push` and `VecDeque::pop` were called very frequently. The former always has to check for +capacity. Using an `ArrayDeque` removed the need for constant capacity checks, at the cost of a minor decrease in +performance if a line is deeply indented. Hopefully, nobody has 42 nested YAML objects. + +Here is in the end the performance breakdown: + +![Comparison of the performance between `yaml-rust`, `yaml-rust2` and the C `libfyaml`. `yaml-rust2` is faster in every +test than `yaml-rust`, but `libfyaml` remains faster overall.](./img/benchmarks-v0.6.svg) + +Here is a shot description of what the files contain: + + * `big`: A large array of records with few fields. One of the fields is a description, a large text block scalar + spanning multiple lines. Most of the scanning happens in block scalars. + * `nested`: Very short key-value pairs that nest deeply. + * `small_objects`: A large array of 2 key-value mappings. + * `strings_array`: A large array of lipsum one-liners (~150-175 characters in length). + +As you can see, `yaml-rust2` performs better than `yaml-rust` on every benchmark. However, when compared against the C +[`libfyaml`](https://github.com/pantoniou/libfyaml), we can see that there is still much room for improvement. + +I'd like to end this section with a small disclaimer: I am not a benchmark expert. I tried to have an heterogenous set +of files that would highlight how the parser performs when stressed different ways. I invite you to take a look at [the +code generating the YAML files](https://github.com/Ethiraric/yaml-rust2/tree/master/tools/gen_large_yaml) and, if you +are more knowledgeable than I am, improve upon them. `yaml-rust2` performs better with these files because those are the +ones I could work with. If you find a fil with which `yaml-rust2` is slower than `yaml-rust`, do file an issue! + +## This release +### Improvements from `yaml-rust` +This release should improve over `yaml-rust` over 3 major points: + + * Performance: We all love fast software. I want to help you achieve it. I haven't managed to make this crate twice as + fast, but you should notice a 15-20% improvement in performance. + * Compliance: You may not notice it, since I didn't know most of the bugs I fixed were bugs to begin with, but this + crate should now be fully YAML-comliant. + * Documentation: The documentation of `yaml-rust` is unfortunately incomplete. Documentation here is not exhaustive, + but most items are documented. Notably, private items are documented, making it much easier to understand where + something happens. There are also in-code comments that help figure out what is going on under the hood. + +Also, last but not least, I do plan on keeping this crate alive as long as I can. Nobody can make promises on that +regard, of course, but I have poured hours of work into this, and I would hate to see this go to waste. + +### Switching to `yaml-rust2` +This release is `v0.6.0`, chosen to explicitly differ in minor from `yaml-rust`. `v0.4.x` does not exist in this crate +to avoid any confusion between the 2 crates. + +Switching to `yaml-rust2` should be a very simple process. Change your `Cargo.toml` to use `yaml-rust2` instead of +`yaml-rust`: + +```diff +-yaml-rust = "0.4.4" ++yaml-rust2 = "0.6.0" +``` + +As for your code, you have one of two solutions: + + * Changing your imports from `use yaml_rust::Yaml` to `use yaml_rust2::Yaml` if you import items directly, or change + occurences of `yaml_rust` to `yaml_rust2` if you use fully qualified paths. + * Alternatively, you can alias `yaml_rust2` with `use yaml_rust2 as yaml_rust`. This would keep your code working if + you use fully qualified paths. + +Whichever you decide is up to you. + +#### What about API breakage? +Most of what I have changed is in the implementation details. You might notice more documentation appearing on your LSP, +but documentation isn't bound by the API. There is only one change I made that could lead to compile errors. It is +unlikely you used that feature, but I'd hate to leave this undocumented. + +If you use the low-level event parsing API (`Parser`, +`EventReceiver` / `MarkedEventReceiver`) and namely the `yaml_rust::Event` enumeration, there is one change that might +break your code. This was needed for tests in the YAML test suite. In `yaml-rust`, YAML tags are not forwarded from the +lower-level `Scanner` API to the low-level `Parser` API. + +Here is the change that was made in the library: + +```diff + pub enum Event { + // ... +-SequenceStart(usize), +-MappingStart(usize), ++SequenceStart(usize, Option), ++MappingStart(usize, Option), + // ... + } +``` + +This means that you may now see YAML tags appearing in your code. + +## Closing words +YAML is hard. Much more than I had anticipated. If you are exploring dark corners of YAML that `yaml-rust2` supports but +`yaml-rust` doesn't, I'm curious to know what it is. + +Work on this crate is far from over. I will try and match `libfyaml`'s performance. Today is the first time I benched +against it, and I wouldn't have guessed it to outperform `yaml-rust2` that much. + +If you're interested in upgrading your `yaml-rust` crate, please do take a look at [davvid](https://github.com/davvid)'s +[fork of `yaml-rust`](https://github.com/davvid/yaml-rust). Very recent developements on this crate sparked from an +[issue on advisory-db](https://github.com/rustsec/advisory-db/issues/1921) about the unmaintained state of `yaml-rust`. +I hope it will be that YAML in Rust will improve following this issue. + +Thank you for reading through this. If you happen to have issues with `yaml-rust2` or suggestions, do [drop an +issue](https://github.com/Ethiraric/yaml-rust2/issues)! + +If however you wanted an OpenAPI linter, I'm afraid you're out of luck. Just as much as I'm out of time ;) + +-Ethiraric diff --git a/saphyr/documents/img/2024-03-15-benchmarks.csv b/saphyr/documents/img/2024-03-15-benchmarks.csv new file mode 100644 index 0000000..685b6cc --- /dev/null +++ b/saphyr/documents/img/2024-03-15-benchmarks.csv @@ -0,0 +1,5 @@ +,yaml-rust2,yaml-rust,libfyaml +big.yaml,1644933464,2097747837,1642761913 +nested.yaml,1186706803,1461738560,1104480120 +small_objects.yaml,5459915062,5686715239,4402878726 +strings_array.yaml,1698194153,2044921291,924246153 diff --git a/saphyr/documents/img/benchmarks-v0.6.svg b/saphyr/documents/img/benchmarks-v0.6.svg new file mode 100644 index 0000000..2e9ddd7 --- /dev/null +++ b/saphyr/documents/img/benchmarks-v0.6.svg @@ -0,0 +1,69 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + big + nested + small_objects + strings_array + 0 + 1000000 + 2000000 + 3000000 + 4000000 + 5000000 + 6000000 + + + + yaml-rust + yaml-rust2 + libfyaml + Time in ms (less is better) + \ No newline at end of file diff --git a/saphyr/src/lib.rs b/saphyr/src/lib.rs index 9c13e9a..7c01b66 100644 --- a/saphyr/src/lib.rs +++ b/saphyr/src/lib.rs @@ -11,7 +11,7 @@ //! //! ```toml //! [dependencies] -//! yaml-rust2 = "0.5.0" +//! yaml-rust2 = "0.6.0" //! ``` //! //! # Examples diff --git a/saphyr/tools/bench_compare/Cargo.toml b/saphyr/tools/bench_compare/Cargo.toml index 7c7f97c..4ca9b33 100644 --- a/saphyr/tools/bench_compare/Cargo.toml +++ b/saphyr/tools/bench_compare/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "bench_compare" -version = "0.5.0" +version = "0.6.0" authors = [ "Ethiraric " ] diff --git a/saphyr/tools/gen_large_yaml/Cargo.toml b/saphyr/tools/gen_large_yaml/Cargo.toml index 54b6b3c..0fe3eac 100644 --- a/saphyr/tools/gen_large_yaml/Cargo.toml +++ b/saphyr/tools/gen_large_yaml/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "gen_large_yaml" -version = "0.5.0" +version = "0.6.0" authors = [ "Ethiraric " ] @@ -11,7 +11,7 @@ readme = "README.md" edition = "2018" [dependencies] -yaml-rust2 = { version = "0.5.0", path = "../../" } +yaml-rust2 = { version = "0.6.0", path = "../../" } rand = "0.8.5" lipsum = "0.9.0" From 04e407df91b15a5fa1224d227121cc416720b0fd Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Fri, 15 Mar 2024 20:14:26 +0100 Subject: [PATCH 299/380] Update to v0.6. --- bench/tools/bench_compare/Cargo.toml | 2 +- bench/tools/gen_large_yaml/Cargo.toml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/bench/tools/bench_compare/Cargo.toml b/bench/tools/bench_compare/Cargo.toml index 7c7f97c..4ca9b33 100644 --- a/bench/tools/bench_compare/Cargo.toml +++ b/bench/tools/bench_compare/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "bench_compare" -version = "0.5.0" +version = "0.6.0" authors = [ "Ethiraric " ] diff --git a/bench/tools/gen_large_yaml/Cargo.toml b/bench/tools/gen_large_yaml/Cargo.toml index 54b6b3c..0fe3eac 100644 --- a/bench/tools/gen_large_yaml/Cargo.toml +++ b/bench/tools/gen_large_yaml/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "gen_large_yaml" -version = "0.5.0" +version = "0.6.0" authors = [ "Ethiraric " ] @@ -11,7 +11,7 @@ readme = "README.md" edition = "2018" [dependencies] -yaml-rust2 = { version = "0.5.0", path = "../../" } +yaml-rust2 = { version = "0.6.0", path = "../../" } rand = "0.8.5" lipsum = "0.9.0" From a6c8dfe5b8e336ab622ffea9e0715a9836c99e02 Mon Sep 17 00:00:00 2001 From: David Aguilar Date: Sun, 17 Mar 2024 00:47:11 -0700 Subject: [PATCH 300/380] Emit multi-line string values as block scalars --- saphyr/src/char_traits.rs | 13 +++++++ saphyr/src/emitter.rs | 69 ++++++++++++++++++++++++++++++++- saphyr/tests/test_round_trip.rs | 12 ++++++ 3 files changed, 92 insertions(+), 2 deletions(-) diff --git a/saphyr/src/char_traits.rs b/saphyr/src/char_traits.rs index 4a08da1..5965cc4 100644 --- a/saphyr/src/char_traits.rs +++ b/saphyr/src/char_traits.rs @@ -109,3 +109,16 @@ pub(crate) fn is_uri_char(c: char) -> bool { pub(crate) fn is_tag_char(c: char) -> bool { is_uri_char(c) && !is_flow(c) && c != '!' } + +/// Check if the string can be expressed a valid literal block scalar. +/// The YAML spec supports all of the following in block literals except #xFEFF: +/// ```ignore +/// #x9 | #xA | [#x20-#x7E] /* 8 bit */ +/// | #x85 | [#xA0-#xD7FF] | [#xE000-#xFFFD] /* 16 bit */ +/// | [#x10000-#x10FFFF] /* 32 bit */ +/// ``` +#[inline] +pub(crate) fn is_valid_literal_block_scalar(string: &str) -> bool { + string.chars().all(|character: char| + matches!(character, '\t' | '\n' | '\x20'..='\x7e' | '\u{0085}' | '\u{00a0}'..='\u{d7fff}')) +} diff --git a/saphyr/src/emitter.rs b/saphyr/src/emitter.rs index 081654a..213da01 100644 --- a/saphyr/src/emitter.rs +++ b/saphyr/src/emitter.rs @@ -1,3 +1,4 @@ +use crate::char_traits; use crate::yaml::{Hash, Yaml}; use std::convert::From; use std::error::Error; @@ -35,8 +36,8 @@ pub struct YamlEmitter<'a> { writer: &'a mut dyn fmt::Write, best_indent: usize, compact: bool, - level: isize, + multiline_strings: bool, } pub type EmitResult = Result<(), EmitError>; @@ -111,6 +112,7 @@ impl<'a> YamlEmitter<'a> { best_indent: 2, compact: true, level: -1, + multiline_strings: false, } } @@ -132,6 +134,40 @@ impl<'a> YamlEmitter<'a> { self.compact } + /// Render strings containing multiple lines in [literal style]. + /// + /// # Examples + /// + /// ```rust + /// use yaml_rust2::{Yaml, YamlEmitter, YamlLoader}; + /// + /// let input = r#"{foo: "bar!\nbar!", baz: 42}"#; + /// let parsed = YamlLoader::load_from_str(input).unwrap(); + /// eprintln!("{:?}", parsed); + /// + /// let mut output = String::new(); + /// let mut emitter = YamlEmitter::new(&mut output); + /// emitter.multiline_strings(true); + /// emitter.dump(&parsed[0]).unwrap(); + /// assert_eq!(output.as_str(), "\ + /// --- + /// foo: | + /// bar! + /// bar! + /// baz: 42"); + /// ``` + /// + /// [literal style]: https://yaml.org/spec/1.2/spec.html#id2795688 + pub fn multiline_strings(&mut self, multiline_strings: bool) { + self.multiline_strings = multiline_strings; + } + + /// Determine if this emitter will emit multiline strings when appropriate. + #[must_use] + pub fn is_multiline_strings(&self) -> bool { + self.multiline_strings + } + pub fn dump(&mut self, doc: &Yaml) -> EmitResult { // write DocumentStart writeln!(self.writer, "---")?; @@ -156,7 +192,20 @@ impl<'a> YamlEmitter<'a> { Yaml::Array(ref v) => self.emit_array(v), Yaml::Hash(ref h) => self.emit_hash(h), Yaml::String(ref v) => { - if need_quotes(v) { + if self.multiline_strings + && v.contains('\n') + && char_traits::is_valid_literal_block_scalar(v) + { + write!(self.writer, "|")?; + self.level += 1; + for line in v.lines() { + writeln!(self.writer)?; + self.write_indent()?; + // It's literal text, so don't escape special chars. + write!(self.writer, "{line}")?; + } + self.level -= 1; + } else if need_quotes(v) { escape_str(self.writer, v)?; } else { write!(self.writer, "{v}")?; @@ -334,3 +383,19 @@ fn need_quotes(string: &str) -> bool { || string.parse::().is_ok() || string.parse::().is_ok() } + +#[cfg(test)] +mod test { + use super::YamlEmitter; + use crate::YamlLoader; + + #[test] + fn test_multiline_string() { + let input = r#"{foo: "bar!\nbar!", baz: 42}"#; + let parsed = YamlLoader::load_from_str(input).unwrap(); + let mut output = String::new(); + let mut emitter = YamlEmitter::new(&mut output); + emitter.multiline_strings(true); + emitter.dump(&parsed[0]).unwrap(); + } +} diff --git a/saphyr/tests/test_round_trip.rs b/saphyr/tests/test_round_trip.rs index d051281..5f0a7a1 100644 --- a/saphyr/tests/test_round_trip.rs +++ b/saphyr/tests/test_round_trip.rs @@ -68,3 +68,15 @@ fn test_issue133() { let doc2 = YamlLoader::load_from_str(&out_str).unwrap().pop().unwrap(); assert_eq!(doc, doc2); // This failed because the type has changed to a number now } + +#[test] +fn test_newline() { + let y = Yaml::Array(vec![Yaml::String("\n".to_owned())]); + roundtrip(&y); +} + +#[test] +fn test_crlf() { + let y = Yaml::Array(vec![Yaml::String("\r\n".to_owned())]); + roundtrip(&y); +} From 99fb843550e5dadb6ff313848dc7c76a3b67a5c2 Mon Sep 17 00:00:00 2001 From: David Aguilar Date: Sun, 17 Mar 2024 00:48:26 -0700 Subject: [PATCH 301/380] Cargo.toml: add myself to the authors list --- saphyr/Cargo.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/saphyr/Cargo.toml b/saphyr/Cargo.toml index 89b7583..c69712e 100644 --- a/saphyr/Cargo.toml +++ b/saphyr/Cargo.toml @@ -3,7 +3,8 @@ name = "yaml-rust2" version = "0.6.0" authors = [ "Yuheng Chen ", - "Ethiraric " + "Ethiraric ", + "David Aguilar " ] documentation = "https://docs.rs/yaml-rust2" license = "MIT OR Apache-2.0" From 07113c6e1c477f4c7f0b7147e71680c9d12e507d Mon Sep 17 00:00:00 2001 From: David Aguilar Date: Sun, 17 Mar 2024 00:49:13 -0700 Subject: [PATCH 302/380] Cargo.toml: enable the Rust 2021 Edition --- saphyr/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/saphyr/Cargo.toml b/saphyr/Cargo.toml index c69712e..a62945d 100644 --- a/saphyr/Cargo.toml +++ b/saphyr/Cargo.toml @@ -11,7 +11,7 @@ license = "MIT OR Apache-2.0" description = "A fully YAML 1.2 compliant YAML parser" repository = "https://github.com/Ethiraric/yaml-rust2" readme = "README.md" -edition = "2018" +edition = "2021" [dependencies] arraydeque = "0.5.1" From ef1a2d1d5ccb3af3bb66d0707bcdfbf536b72bb2 Mon Sep 17 00:00:00 2001 From: David Aguilar Date: Sat, 22 Jul 2023 21:36:37 -0700 Subject: [PATCH 303/380] ci: replace travis with github actions --- saphyr/.github/workflows/ci.yml | 36 +++++++++++++++++++++++++++++++++ saphyr/.travis.yml | 18 ----------------- 2 files changed, 36 insertions(+), 18 deletions(-) create mode 100644 saphyr/.github/workflows/ci.yml delete mode 100644 saphyr/.travis.yml diff --git a/saphyr/.github/workflows/ci.yml b/saphyr/.github/workflows/ci.yml new file mode 100644 index 0000000..6a3354c --- /dev/null +++ b/saphyr/.github/workflows/ci.yml @@ -0,0 +1,36 @@ +name: CI + +on: [push, pull_request] + +jobs: + check: + name: Lints and checks + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v3 + - run: rustup toolchain install stable --profile minimal --component rustfmt --component clippy --no-self-update + - uses: Swatinem/rust-cache@v2 + - name: Run clippy checks + run: cargo clippy --all-targets -- -D warnings + - name: Run format checks + run: cargo fmt --check + + test: + name: Test using Rust ${{ matrix.rust }} on ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, macos-latest] + rust: [stable] + runs-on: ${{ matrix.os }} + steps: + - name: Checkout + uses: actions/checkout@v3 + - run: git submodule update --init + - run: rustup toolchain install ${{ matrix.rust }} --profile minimal --no-self-update + - uses: Swatinem/rust-cache@v2 + - name: Run build + run: cargo build + - name: Run tests + run: cargo test -v diff --git a/saphyr/.travis.yml b/saphyr/.travis.yml deleted file mode 100644 index 46b9569..0000000 --- a/saphyr/.travis.yml +++ /dev/null @@ -1,18 +0,0 @@ -language: rust - -matrix: - include: - - rust: stable - - rust: beta - - rust: nightly - - rust: 1.33.0 - - rust: nightly - env: CLIPPY - script: | - if rustup component add clippy-preview; then - cargo clippy -- -Dclippy - fi - -script: - - cargo build - - cargo test From e1119bb3bfb8cfd03183dacc7dd08c1b21d0e484 Mon Sep 17 00:00:00 2001 From: larkbyte Date: Tue, 26 May 2020 18:35:06 +0800 Subject: [PATCH 304/380] Replace linked_hash_map with hashlink --- saphyr/Cargo.toml | 2 +- saphyr/src/lib.rs | 2 +- saphyr/src/yaml.rs | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/saphyr/Cargo.toml b/saphyr/Cargo.toml index a62945d..b4a2f62 100644 --- a/saphyr/Cargo.toml +++ b/saphyr/Cargo.toml @@ -15,7 +15,7 @@ edition = "2021" [dependencies] arraydeque = "0.5.1" -linked-hash-map = "0.5.3" +hashlink = "0.8" [dev-dependencies] libtest-mimic = "0.3.0" diff --git a/saphyr/src/lib.rs b/saphyr/src/lib.rs index 7c01b66..abb41c8 100644 --- a/saphyr/src/lib.rs +++ b/saphyr/src/lib.rs @@ -43,7 +43,7 @@ ) )] -extern crate linked_hash_map; +extern crate hashlink; pub(crate) mod char_traits; #[macro_use] diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index f5e6738..0848b22 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -2,7 +2,7 @@ use std::{collections::BTreeMap, convert::TryFrom, mem, ops::Index}; -use linked_hash_map::LinkedHashMap; +use hashlink::LinkedHashMap; use crate::parser::{Event, MarkedEventReceiver, Parser, Tag}; use crate::scanner::{Marker, ScanError, TScalarStyle}; From c9627417191f2ef1eea50ed56c26f12d7a9081be Mon Sep 17 00:00:00 2001 From: Jim Turner Date: Tue, 17 Aug 2021 22:07:07 -0400 Subject: [PATCH 305/380] Add byte a offset to the error message --- saphyr/src/scanner.rs | 5 +++-- saphyr/tests/basic.rs | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index 24a33dd..dff285b 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -108,10 +108,11 @@ impl fmt::Display for ScanError { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { write!( formatter, - "{} at line {} column {}", + "{} at byte {} line {} column {}", self.info, + self.mark.index, self.mark.line, - self.mark.col + 1 + self.mark.col + 1, ) } } diff --git a/saphyr/tests/basic.rs b/saphyr/tests/basic.rs index e516cd6..b769c2b 100644 --- a/saphyr/tests/basic.rs +++ b/saphyr/tests/basic.rs @@ -61,7 +61,7 @@ key1:a2 ); assert_eq!( error.to_string(), - "mapping values are not allowed in this context at line 4 column 4" + "mapping values are not allowed in this context at byte 26 line 4 column 4" ); } From 7705e87ff6849bb987ce4ae9017a17e42e9fff42 Mon Sep 17 00:00:00 2001 From: Marko Mikulicic Date: Tue, 5 May 2020 14:53:06 +0200 Subject: [PATCH 306/380] Implement load_from_bytes Also helps in some cases with #142, when the BOM is at the beginning of the file (common), but not in corner case where the BOM is at the start of a document which is not the first one. Closes: #155 --- saphyr/Cargo.toml | 1 + saphyr/src/yaml.rs | 125 ++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 120 insertions(+), 6 deletions(-) diff --git a/saphyr/Cargo.toml b/saphyr/Cargo.toml index b4a2f62..bcf98be 100644 --- a/saphyr/Cargo.toml +++ b/saphyr/Cargo.toml @@ -15,6 +15,7 @@ edition = "2021" [dependencies] arraydeque = "0.5.1" +encoding = "0.2" hashlink = "0.8" [dev-dependencies] diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index 0848b22..5dc8c87 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -67,6 +67,7 @@ fn parse_f64(v: &str) -> Option { /// Main structure for quickly parsing YAML. /// /// See [`YamlLoader::load_from_str`]. +#[derive(Default)] pub struct YamlLoader { docs: Vec, // states @@ -161,6 +162,19 @@ impl MarkedEventReceiver for YamlLoader { } } +#[derive(Debug)] +pub enum LoadError { + IO(std::io::Error), + Scan(ScanError), + Decode(std::borrow::Cow<'static, str>), +} + +impl From for LoadError { + fn from(error: std::io::Error) -> Self { + LoadError::IO(error) + } +} + impl YamlLoader { fn insert_new_node(&mut self, node: (Yaml, usize)) { // valid anchor id starts from 1 @@ -205,16 +219,47 @@ impl YamlLoader { /// if all documents are parsed successfully. An error in a latter document prevents the former /// from being returned. pub fn load_from_iter>(source: I) -> Result, ScanError> { - let mut loader = YamlLoader { - docs: Vec::new(), - doc_stack: Vec::new(), - key_stack: Vec::new(), - anchor_map: BTreeMap::new(), - }; + let mut loader = YamlLoader::default(); let mut parser = Parser::new(source); parser.load(&mut loader, true)?; Ok(loader.docs) } + + pub fn load_from_bytes(mut source: impl std::io::Read) -> Result, LoadError> { + let mut buffer = Vec::new(); + source.read_to_end(&mut buffer)?; + + // Decodes the input buffer using either UTF-8, UTF-16LE or UTF-16BE depending on the BOM codepoint. + // If the buffer doesn't start with a BOM codepoint, it will use a fallback encoding obtained by + // detect_utf16_endianness. + let (res, _) = encoding::types::decode( + &buffer, + encoding::DecoderTrap::Strict, + detect_utf16_endianness(&buffer), + ); + let s = res.map_err(LoadError::Decode)?; + YamlLoader::load_from_str(&s).map_err(LoadError::Scan) + } +} + +/// The encoding crate knows how to tell apart UTF-8 from UTF-16LE and utf-16BE, when the +/// bytestream starts with BOM codepoint. +/// However, it doesn't even attempt to guess the UTF-16 endianness of the input bytestream since +/// in the general case the bytestream could start with a codepoint that uses both bytes. +/// +/// The YAML-1.2 spec mandates that the first character of a YAML document is an ASCII character. +/// This allows the encoding to be deduced by the pattern of null (#x00) characters. +// +/// See spec at +fn detect_utf16_endianness(b: &[u8]) -> encoding::types::EncodingRef { + if b.len() > 1 && (b[0] != b[1]) { + if b[0] == 0 { + return encoding::all::UTF_16BE; + } else if b[1] == 0 { + return encoding::all::UTF_16LE; + } + } + encoding::all::UTF_8 } macro_rules! define_as ( @@ -410,3 +455,71 @@ impl Iterator for YamlIter { self.yaml.next() } } + +#[cfg(test)] +mod test { + use crate::YamlLoader; + + #[test] + fn test_read_bom() { + let s = b"\xef\xbb\xbf--- +a: 1 +b: 2.2 +c: [1, 2] +"; + let out = YamlLoader::load_from_bytes(s as &[u8]).unwrap(); + let doc = &out[0]; + assert_eq!(doc["a"].as_i64().unwrap(), 1i64); + assert!((doc["b"].as_f64().unwrap() - 2.2f64).abs() <= f64::EPSILON); + assert_eq!(doc["c"][1].as_i64().unwrap(), 2i64); + assert!(doc["d"][0].is_badvalue()); + } + + #[test] + fn test_read_utf16le() { + let s = b"\xff\xfe-\x00-\x00-\x00 +\x00a\x00:\x00 \x001\x00 +\x00b\x00:\x00 \x002\x00.\x002\x00 +\x00c\x00:\x00 \x00[\x001\x00,\x00 \x002\x00]\x00 +\x00"; + let out = YamlLoader::load_from_bytes(s as &[u8]).unwrap(); + let doc = &out[0]; + println!("GOT: {doc:?}"); + assert_eq!(doc["a"].as_i64().unwrap(), 1i64); + assert!((doc["b"].as_f64().unwrap() - 2.2f64) <= f64::EPSILON); + assert_eq!(doc["c"][1].as_i64().unwrap(), 2i64); + assert!(doc["d"][0].is_badvalue()); + } + + #[test] + fn test_read_utf16be() { + let s = b"\xfe\xff\x00-\x00-\x00-\x00 +\x00a\x00:\x00 \x001\x00 +\x00b\x00:\x00 \x002\x00.\x002\x00 +\x00c\x00:\x00 \x00[\x001\x00,\x00 \x002\x00]\x00 +"; + let out = YamlLoader::load_from_bytes(s as &[u8]).unwrap(); + let doc = &out[0]; + println!("GOT: {doc:?}"); + assert_eq!(doc["a"].as_i64().unwrap(), 1i64); + assert!((doc["b"].as_f64().unwrap() - 2.2f64).abs() <= f64::EPSILON); + assert_eq!(doc["c"][1].as_i64().unwrap(), 2i64); + assert!(doc["d"][0].is_badvalue()); + } + + #[test] + fn test_read_utf16le_nobom() { + let s = b"-\x00-\x00-\x00 +\x00a\x00:\x00 \x001\x00 +\x00b\x00:\x00 \x002\x00.\x002\x00 +\x00c\x00:\x00 \x00[\x001\x00,\x00 \x002\x00]\x00 +\x00"; + let out = YamlLoader::load_from_bytes(s as &[u8]).unwrap(); + let doc = &out[0]; + println!("GOT: {doc:?}"); + assert_eq!(doc["a"].as_i64().unwrap(), 1i64); + assert!((doc["b"].as_f64().unwrap() - 2.2f64).abs() <= f64::EPSILON); + assert_eq!(doc["c"][1].as_i64().unwrap(), 2i64); + assert!(doc["d"][0].is_badvalue()); + } +} From 068525a6d76a37c947f780ed4ebaffd6c28221f7 Mon Sep 17 00:00:00 2001 From: Marko Mikulicic Date: Thu, 30 Jul 2020 03:23:53 +0200 Subject: [PATCH 307/380] Use a YamlDecoder builder to implement optional encoding_trap parameter. --- saphyr/src/yaml.rs | 75 ++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 63 insertions(+), 12 deletions(-) diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index 5dc8c87..0d02f34 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -224,19 +224,51 @@ impl YamlLoader { parser.load(&mut loader, true)?; Ok(loader.docs) } +} - pub fn load_from_bytes(mut source: impl std::io::Read) -> Result, LoadError> { +/// YamlDecoder is a YamlLoader builder that allows you to supply your own encoding error trap. +/// For example, to read a YAML file while ignoring Unicode decoding errors you can set the +/// `encoding_trap` to `encoding::DecoderTrap::Ignore`. +/// ```rust +/// use yaml_rust2::yaml::YamlDecoder; +/// +/// let string = b"--- +/// a\xa9: 1 +/// b: 2.2 +/// c: [1, 2] +/// "; +/// let out = YamlDecoder::read(string as &[u8]) +/// .encoding_trap(encoding::DecoderTrap::Ignore) +/// .decode() +/// .unwrap(); +/// ``` +pub struct YamlDecoder { + source: T, + trap: encoding::types::DecoderTrap, +} + +impl YamlDecoder { + pub fn read(source: T) -> YamlDecoder { + YamlDecoder { + source, + trap: encoding::DecoderTrap::Strict, + } + } + + pub fn encoding_trap(&mut self, trap: encoding::types::DecoderTrap) -> &mut Self { + self.trap = trap; + self + } + + pub fn decode(&mut self) -> Result, LoadError> { let mut buffer = Vec::new(); - source.read_to_end(&mut buffer)?; + self.source.read_to_end(&mut buffer)?; // Decodes the input buffer using either UTF-8, UTF-16LE or UTF-16BE depending on the BOM codepoint. // If the buffer doesn't start with a BOM codepoint, it will use a fallback encoding obtained by // detect_utf16_endianness. - let (res, _) = encoding::types::decode( - &buffer, - encoding::DecoderTrap::Strict, - detect_utf16_endianness(&buffer), - ); + let (res, _) = + encoding::types::decode(&buffer, self.trap, detect_utf16_endianness(&buffer)); let s = res.map_err(LoadError::Decode)?; YamlLoader::load_from_str(&s).map_err(LoadError::Scan) } @@ -458,7 +490,7 @@ impl Iterator for YamlIter { #[cfg(test)] mod test { - use crate::YamlLoader; + use super::YamlDecoder; #[test] fn test_read_bom() { @@ -467,7 +499,7 @@ a: 1 b: 2.2 c: [1, 2] "; - let out = YamlLoader::load_from_bytes(s as &[u8]).unwrap(); + let out = YamlDecoder::read(s as &[u8]).decode().unwrap(); let doc = &out[0]; assert_eq!(doc["a"].as_i64().unwrap(), 1i64); assert!((doc["b"].as_f64().unwrap() - 2.2f64).abs() <= f64::EPSILON); @@ -482,7 +514,7 @@ c: [1, 2] \x00b\x00:\x00 \x002\x00.\x002\x00 \x00c\x00:\x00 \x00[\x001\x00,\x00 \x002\x00]\x00 \x00"; - let out = YamlLoader::load_from_bytes(s as &[u8]).unwrap(); + let out = YamlDecoder::read(s as &[u8]).decode().unwrap(); let doc = &out[0]; println!("GOT: {doc:?}"); assert_eq!(doc["a"].as_i64().unwrap(), 1i64); @@ -498,7 +530,7 @@ c: [1, 2] \x00b\x00:\x00 \x002\x00.\x002\x00 \x00c\x00:\x00 \x00[\x001\x00,\x00 \x002\x00]\x00 "; - let out = YamlLoader::load_from_bytes(s as &[u8]).unwrap(); + let out = YamlDecoder::read(s as &[u8]).decode().unwrap(); let doc = &out[0]; println!("GOT: {doc:?}"); assert_eq!(doc["a"].as_i64().unwrap(), 1i64); @@ -514,7 +546,26 @@ c: [1, 2] \x00b\x00:\x00 \x002\x00.\x002\x00 \x00c\x00:\x00 \x00[\x001\x00,\x00 \x002\x00]\x00 \x00"; - let out = YamlLoader::load_from_bytes(s as &[u8]).unwrap(); + let out = YamlDecoder::read(s as &[u8]).decode().unwrap(); + let doc = &out[0]; + println!("GOT: {doc:?}"); + assert_eq!(doc["a"].as_i64().unwrap(), 1i64); + assert!((doc["b"].as_f64().unwrap() - 2.2f64).abs() <= f64::EPSILON); + assert_eq!(doc["c"][1].as_i64().unwrap(), 2i64); + assert!(doc["d"][0].is_badvalue()); + } + + #[test] + fn test_read_trap() { + let s = b"--- +a\xa9: 1 +b: 2.2 +c: [1, 2] +"; + let out = YamlDecoder::read(s as &[u8]) + .encoding_trap(encoding::DecoderTrap::Ignore) + .decode() + .unwrap(); let doc = &out[0]; println!("GOT: {doc:?}"); assert_eq!(doc["a"].as_i64().unwrap(), 1i64); From 38aaea20db36768978b422aa2a2f7dc3f65aef51 Mon Sep 17 00:00:00 2001 From: David Aguilar Date: Sun, 17 Mar 2024 01:40:46 -0700 Subject: [PATCH 308/380] Update to quickcheck 1.0 --- saphyr/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/saphyr/Cargo.toml b/saphyr/Cargo.toml index bcf98be..07da692 100644 --- a/saphyr/Cargo.toml +++ b/saphyr/Cargo.toml @@ -20,7 +20,7 @@ hashlink = "0.8" [dev-dependencies] libtest-mimic = "0.3.0" -quickcheck = "0.9" +quickcheck = "1.0" [profile.release-lto] inherits = "release" From 5ba5dfa6e64e3a8dbc2ea55e6b86b787ee395fa1 Mon Sep 17 00:00:00 2001 From: "lincoln auster [they/them]" Date: Tue, 28 Sep 2021 23:06:37 -0600 Subject: [PATCH 309/380] introduce `or` function Similarly to `or` for Rust's options, this patch provides a way to 'override' the value of a Yaml node if it's some form of error. --- saphyr/src/yaml.rs | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index 0d02f34..a6dc1f0 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -383,6 +383,23 @@ impl Yaml { pub fn into_f64(self) -> Option { self.as_f64() } + + /// If a value is null or otherwise bad (see variants), consume it and + /// replace it with a given value `other`. Otherwise, return self unchanged. + /// + /// ``` + /// use yaml_rust2::yaml::Yaml; + /// + /// assert_eq!(Yaml::BadValue.or(Yaml::Integer(3)), Yaml::Integer(3)); + /// assert_eq!(Yaml::Integer(3).or(Yaml::BadValue), Yaml::Integer(3)); + /// ``` + #[must_use] + pub fn or(self, other: Self) -> Self { + match self { + Yaml::BadValue | Yaml::Null => other, + this => this, + } + } } #[cfg_attr(feature = "cargo-clippy", allow(clippy::should_implement_trait))] @@ -490,7 +507,7 @@ impl Iterator for YamlIter { #[cfg(test)] mod test { - use super::YamlDecoder; + use super::{Yaml, YamlDecoder}; #[test] fn test_read_bom() { @@ -573,4 +590,10 @@ c: [1, 2] assert_eq!(doc["c"][1].as_i64().unwrap(), 2i64); assert!(doc["d"][0].is_badvalue()); } + + #[test] + fn test_or() { + assert_eq!(Yaml::Null.or(Yaml::Integer(3)), Yaml::Integer(3)); + assert_eq!(Yaml::Integer(3).or(Yaml::Integer(7)), Yaml::Integer(3)); + } } From 2cf6436fb1e3e130fa82cf89fabc91b81836d4c0 Mon Sep 17 00:00:00 2001 From: "lincoln auster [they/them]" Date: Fri, 1 Oct 2021 19:11:09 -0600 Subject: [PATCH 310/380] implement `borrowed_or` Allow the usage of `or` without consuming self. This can be useful for pipelines that maintain some sort of owned state. --- saphyr/src/yaml.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index a6dc1f0..328775d 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -400,6 +400,16 @@ impl Yaml { this => this, } } + + /// See `or` for behavior. This performs the same operations, but with + /// borrowed values for less linear pipelines. + #[must_use] + pub fn borrowed_or<'a>(&'a self, other: &'a Self) -> &'a Self { + match self { + Yaml::BadValue | Yaml::Null => other, + this => this, + } + } } #[cfg_attr(feature = "cargo-clippy", allow(clippy::should_implement_trait))] From f4c4e2ee4a2727e45bb362a3a44009249908b068 Mon Sep 17 00:00:00 2001 From: David Aguilar Date: Sun, 17 Mar 2024 02:18:39 -0700 Subject: [PATCH 311/380] Enable the case-arms clippy checks --- saphyr/src/lib.rs | 2 -- saphyr/src/scanner.rs | 3 +-- saphyr/src/yaml.rs | 3 +-- 3 files changed, 2 insertions(+), 6 deletions(-) diff --git a/saphyr/src/lib.rs b/saphyr/src/lib.rs index abb41c8..5c5ca2b 100644 --- a/saphyr/src/lib.rs +++ b/saphyr/src/lib.rs @@ -30,12 +30,10 @@ //! //! ``` -#![doc(html_root_url = "https://docs.rs/yaml-rust2/0.5.0")] #![cfg_attr(feature = "cargo-clippy", warn(clippy::pedantic))] #![cfg_attr( feature = "cargo-clippy", allow( - clippy::match_same_arms, clippy::should_implement_trait, clippy::missing_errors_doc, clippy::missing_panics_doc, diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index dff285b..226f801 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -786,7 +786,6 @@ impl> Scanner { loop { // TODO(chenyh) BOM match self.look_ch() { - ' ' => self.skip_blank(), // Tabs may not be used as indentation. // "Indentation" only exists as long as a block is started, but does not exist // inside of flow-style constructs. Tabs are allowed as part of leading @@ -806,7 +805,7 @@ impl> Scanner { )); } } - '\t' => self.skip_blank(), + '\t' | ' ' => self.skip_blank(), '\n' | '\r' => { self.lookahead(2); self.skip_line(); diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index 328775d..50d0b85 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -81,7 +81,7 @@ impl MarkedEventReceiver for YamlLoader { fn on_event(&mut self, ev: Event, _: Marker) { // println!("EV {:?}", ev); match ev { - Event::DocumentStart => { + Event::DocumentStart | Event::Nothing | Event::StreamStart | Event::StreamEnd => { // do nothing } Event::DocumentEnd => { @@ -156,7 +156,6 @@ impl MarkedEventReceiver for YamlLoader { }; self.insert_new_node((n, 0)); } - _ => { /* ignore */ } } // println!("DOC {:?}", self.doc_stack); } From 0546a314edf66269f920c661cc8306b28cdc5b43 Mon Sep 17 00:00:00 2001 From: Ben Boeckel Date: Sat, 19 Aug 2023 10:41:51 -0400 Subject: [PATCH 312/380] doc: mention that types are not interpreted Closes: #87 --- saphyr/README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/saphyr/README.md b/saphyr/README.md index 4b5df49..2014be3 100644 --- a/saphyr/README.md +++ b/saphyr/README.md @@ -75,6 +75,12 @@ your objects. * `Vec`/`HashMap` access API * Low-level YAML events emission +## Security + +This library does not try to interpret any type specifiers in a YAML document, +so there is no risk of, say, instantiating a socket with fields and +communicating with the outside world just by parsing a YAML document. + ## Specification Compliance This implementation is fully compatible with the YAML 1.2 specification. In From b4cb0ea9e6f60c4071810ef58a51777ff9d177df Mon Sep 17 00:00:00 2001 From: David Aguilar Date: Sun, 17 Mar 2024 02:05:21 -0700 Subject: [PATCH 313/380] Add a garden file for dev tasks Garden is a simple command runner that uses YAML (via yaml-rust) as its file format. Garden will be switching over to yaml-rust2 shortly. --- saphyr/garden.yaml | 85 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) create mode 100644 saphyr/garden.yaml diff --git a/saphyr/garden.yaml b/saphyr/garden.yaml new file mode 100644 index 0000000..78b309c --- /dev/null +++ b/saphyr/garden.yaml @@ -0,0 +1,85 @@ +# Use "cargo install garden-tools" to install garden https://gitlab.com/garden-rs/garden +# +# usage: +# garden build +# garden test +# garden check +# garden fmt +# garden fix + +commands: + bench: cargo bench "$@" + build: cargo build "$@" + check>: + - check/clippy + - check/fmt + - build + - test + - check/profile + check/clippy: cargo clippy --all-targets "$@" -- -D warnings + check/fmt: cargo fmt --check + check/profile: | + cargo build \ + --profile=release-lto \ + --package gen_large_yaml \ + --bin gen_large_yaml \ + --manifest-path tools/gen_large_yaml/Cargo.toml + clean: cargo clean "$@" + coverage: cargo kcov "$@" + doc: cargo doc --no-deps --package yaml-rust2 "$@" + ethi/bench: | + cargo build --release --all-targets + cd ../Yaml-rust && cargo build --release --all-targets + cd ../libfyaml/build && ninja + cargo bench_compare run_bench + fix: cargo clippy --all-targets --fix "$@" -- -D warnings + fmt: cargo fmt "$@" + test: cargo test "$@" + update: cargo update "$@" + watch: cargo watch --shell "garden check" + +trees: + yaml-rust2: + description: A pure Rust YAML implementation + path: ${GARDEN_CONFIG_DIR} + url: "git@github.com:Ethiraric/yaml-rust2.git" + remotes: + byte1234: "git@github.com:byte1234/yaml-rust.git" + davvid: "git@github.com:davvid/yaml-rust.git" + gyscos: "git@github.com:gyscos/yaml-rust.git" + jturner314: "git@github.com:jturner314/yaml-rust.git" + mathstuf: "git@github.com:mathstuf/yaml-rust.git" + mkmik: "git@github.com:mkmik/yaml-rust.git" + olalonde: "git@github.com:olalonde/yaml-rust.git" + oldaccountdeadname: "git@github.com:oldaccountdeadname/yaml-rust.git" + ramosbugs: "git@github.com:ramosbugs/yaml-rust.git" + rocallahan: "git@github.com:rocallahan/yaml-rust.git" + smoelius: "git@github.com:smoelius/yaml-rust.git" + yaml-rust: "git@github.com:chyh1990/yaml-rust.git" + gitconfig: + remote.yaml-rust.fetch: + - "+refs/heads/*:refs/remotes/yaml-rust/*" + # Access yaml-rust pull requests as yaml-rust/pull/* + - "+refs/pull/*/head:refs/remotes/yaml-rust/pull/*" + links: + - "byte1234: replace linked_hash_map with hashlink" + - https://github.com/chyh1990/yaml-rust/pull/157 + - "ramosbugs: emit multi-line string values as block scalars" + - https://github.com/chyh1990/yaml-rust/pull/136 + - "oldaccountdeadname: introduce or function" + - https://github.com/chyh1990/yaml-rust/pull/179 + - "rocallahan: Update to quickcheck 1.0" + - https://github.com/chyh1990/yaml-rust/pull/188 + - "jturner314: Add byte index to error message" + - https://github.com/chyh1990/yaml-rust/pull/176 + - "mathstuf: mention that types are not interpreted" + - https://github.com/chyh1990/yaml-rust/pull/195 + - "olalonde: Expose info() api" + - https://github.com/chyh1990/yaml-rust/pull/190 + - "mkmik: Implement load_from_bytes" + - https://github.com/chyh1990/yaml-rust/pull/156 + + yaml-test-suite: + description: Comprehensive, language independent Test Suite for YAML + path: tests/yaml-test-suite + url: https://github.com/yaml/yaml-test-suite From b1b1af24dca8f9e724ad41630923c440985df7a0 Mon Sep 17 00:00:00 2001 From: David Aguilar Date: Sun, 17 Mar 2024 02:24:09 -0700 Subject: [PATCH 314/380] README: trivial grammar tweak --- saphyr/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/saphyr/README.md b/saphyr/README.md index 2014be3..04afc52 100644 --- a/saphyr/README.md +++ b/saphyr/README.md @@ -6,8 +6,8 @@ fixes towards being compliant to the [YAML test suite](https://github.com/yaml/yaml-test-suite/). `yaml-rust`'s parser is heavily influenced by `libyaml` and `yaml-cpp`. -`yaml-rust2` is a pure Rust YAML 1.2 implementation, which enjoys the memory -safety property and other benefits from the Rust language. +`yaml-rust2` is a pure Rust YAML 1.2 implementation that benefits from the +memory safety and other benefits from the Rust language. ## Quick Start From 432041aafb7e41bc1897d85aa4885a0552720a10 Mon Sep 17 00:00:00 2001 From: David Aguilar Date: Sun, 17 Mar 2024 01:39:38 -0700 Subject: [PATCH 315/380] Add a CHANGELOG to keep track of yaml-rust2 development --- saphyr/CHANGELOG.md | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 saphyr/CHANGELOG.md diff --git a/saphyr/CHANGELOG.md b/saphyr/CHANGELOG.md new file mode 100644 index 0000000..0c0e4f7 --- /dev/null +++ b/saphyr/CHANGELOG.md @@ -0,0 +1,37 @@ +# Changelog + +## v0.6.0 + +**Features**: + +- Multi-line strings are now + [emitted using block scalars](https://github.com/chyh1990/yaml-rust/pull/136). + +- Error messages now contain a byte offset to aid debugging. + ([#176](https://github.com/chyh1990/yaml-rust/pull/176)) + +- Yaml now has `or` and `borrowed_or` methods. + ([#179](https://github.com/chyh1990/yaml-rust/pull/179)) + +- `Yaml::load_from_bytes()` is now available. + ([#156](https://github.com/chyh1990/yaml-rust/pull/156)) + +**Development**: + +- The documentation was updated to include a security note mentioning that + yaml-rust is safe because it does not interpret types. + ([#195](https://github.com/chyh1990/yaml-rust/pull/195)) + +- Updated to quickcheck 1.0. + ([#188](https://github.com/chyh1990/yaml-rust/pull/188)) + +- `hashlink` is [now used](https://github.com/chyh1990/yaml-rust/pull/157) + instead of `linked_hash_map`. + +## v0.5.0 + +- The parser now supports tag directives. + ([#35](https://github.com/chyh1990/yaml-rust/issues/35) + +- The `info` field has been exposed via a new `Yaml::info()` API method. + ([#190](https://github.com/chyh1990/yaml-rust/pull/190)) From 04f8b99f49cde08d53ac1d024b12cac3ded35798 Mon Sep 17 00:00:00 2001 From: David Aguilar Date: Sun, 17 Mar 2024 02:47:41 -0700 Subject: [PATCH 316/380] Enable the redundant-else clippy checks --- saphyr/src/lib.rs | 1 - saphyr/src/parser.rs | 31 ++++++++++++++----------------- saphyr/src/scanner.rs | 5 ++--- 3 files changed, 16 insertions(+), 21 deletions(-) diff --git a/saphyr/src/lib.rs b/saphyr/src/lib.rs index 5c5ca2b..c0ef93b 100644 --- a/saphyr/src/lib.rs +++ b/saphyr/src/lib.rs @@ -37,7 +37,6 @@ clippy::should_implement_trait, clippy::missing_errors_doc, clippy::missing_panics_doc, - clippy::redundant_else, ) )] diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs index a6e40b8..5f65785 100644 --- a/saphyr/src/parser.rs +++ b/saphyr/src/parser.rs @@ -617,9 +617,8 @@ impl> Parser { } Some(id) => return Ok((Event::Alias(*id), mark)), } - } else { - unreachable!() } + unreachable!() } Token(_, TokenType::Anchor(_)) => { if let Token(mark, TokenType::Anchor(name)) = self.fetch_token() { @@ -782,10 +781,9 @@ impl> Parser { { self.state = State::FlowMappingValue; return Ok((Event::empty_scalar(), mark)); - } else { - self.push_state(State::FlowMappingValue); - return self.parse_node(false, false); } + self.push_state(State::FlowMappingValue); + return self.parse_node(false, false); } Token(marker, TokenType::Value) => { self.state = State::FlowMappingValue; @@ -814,21 +812,20 @@ impl> Parser { let Token(mark, _) = *self.peek_token()?; self.state = State::FlowMappingKey; return Ok((Event::empty_scalar(), mark)); - } else { - match *self.peek_token()? { - Token(marker, TokenType::Value) => { - self.skip(); - match self.peek_token()?.1 { - TokenType::FlowEntry | TokenType::FlowMappingEnd => {} - _ => { - self.push_state(State::FlowMappingKey); - return self.parse_node(false, false); - } + } + match *self.peek_token()? { + Token(marker, TokenType::Value) => { + self.skip(); + match self.peek_token()?.1 { + TokenType::FlowEntry | TokenType::FlowMappingEnd => {} + _ => { + self.push_state(State::FlowMappingKey); + return self.parse_node(false, false); } - marker } - Token(marker, _) => marker, + marker } + Token(marker, _) => marker, } }; diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index 226f801..cd96ad4 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -2435,10 +2435,9 @@ impl> Scanner { while let Some(indent) = self.indents.last() { if indent.needs_block_end { break; - } else { - self.indent = indent.indent; - self.indents.pop(); } + self.indent = indent.indent; + self.indents.pop(); } } From 4c64feb5ad35a29dbe5df309bd38b3b650259611 Mon Sep 17 00:00:00 2001 From: David Aguilar Date: Sun, 17 Mar 2024 02:56:21 -0700 Subject: [PATCH 317/380] Eliminate panics and enable the missing panics docs check --- saphyr/CHANGELOG.md | 2 ++ saphyr/src/lib.rs | 6 +----- saphyr/src/parser.rs | 11 +++++++++-- saphyr/src/scanner.rs | 7 ++++++- 4 files changed, 18 insertions(+), 8 deletions(-) diff --git a/saphyr/CHANGELOG.md b/saphyr/CHANGELOG.md index 0c0e4f7..f45c78d 100644 --- a/saphyr/CHANGELOG.md +++ b/saphyr/CHANGELOG.md @@ -16,6 +16,8 @@ - `Yaml::load_from_bytes()` is now available. ([#156](https://github.com/chyh1990/yaml-rust/pull/156)) +- The parser and scanner now return Err() instead of calling panic. + **Development**: - The documentation was updated to include a security note mentioning that diff --git a/saphyr/src/lib.rs b/saphyr/src/lib.rs index c0ef93b..4ba632e 100644 --- a/saphyr/src/lib.rs +++ b/saphyr/src/lib.rs @@ -33,11 +33,7 @@ #![cfg_attr(feature = "cargo-clippy", warn(clippy::pedantic))] #![cfg_attr( feature = "cargo-clippy", - allow( - clippy::should_implement_trait, - clippy::missing_errors_doc, - clippy::missing_panics_doc, - ) + allow(clippy::should_implement_trait, clippy::missing_errors_doc,) )] extern crate hashlink; diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs index 5f65785..f6c2eb5 100644 --- a/saphyr/src/parser.rs +++ b/saphyr/src/parser.rs @@ -306,7 +306,9 @@ impl> Parser { ) -> Result<(), ScanError> { if !self.scanner.stream_started() { let (ev, mark) = self.next()?; - assert_eq!(ev, Event::StreamStart); + if ev != Event::StreamStart { + return Err(ScanError::new(mark, "did not find expected ")); + } recv.on_event(ev, mark); } @@ -337,7 +339,12 @@ impl> Parser { mark: Marker, recv: &mut R, ) -> Result<(), ScanError> { - assert_eq!(first_ev, Event::DocumentStart); + if first_ev != Event::DocumentStart { + return Err(ScanError::new( + mark, + "did not find expected ", + )); + } recv.on_event(first_ev, mark); let (ev, mark) = self.next()?; diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index cd96ad4..611d9c0 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -716,7 +716,12 @@ impl> Scanner { if !self.token_available { self.fetch_more_tokens()?; } - let t = self.tokens.pop_front().unwrap(); + let Some(t) = self.tokens.pop_front() else { + return Err(ScanError::new( + self.mark, + "did not find expected next token", + )); + }; self.token_available = false; self.tokens_parsed += 1; From 429813a0cd0e3e982820a4da319433fa4793f19d Mon Sep 17 00:00:00 2001 From: David Aguilar Date: Sun, 17 Mar 2024 03:15:56 -0700 Subject: [PATCH 318/380] Enable the missing-errors-doc clippy checks --- saphyr/src/emitter.rs | 3 +++ saphyr/src/lib.rs | 5 +---- saphyr/src/parser.rs | 6 ++++++ saphyr/src/scanner.rs | 9 +++++++++ saphyr/src/yaml.rs | 6 ++++++ 5 files changed, 25 insertions(+), 4 deletions(-) diff --git a/saphyr/src/emitter.rs b/saphyr/src/emitter.rs index 213da01..0c365a8 100644 --- a/saphyr/src/emitter.rs +++ b/saphyr/src/emitter.rs @@ -168,6 +168,9 @@ impl<'a> YamlEmitter<'a> { self.multiline_strings } + /// Dump Yaml to an output stream. + /// # Errors + /// Returns `EmitError` when an error occurs. pub fn dump(&mut self, doc: &Yaml) -> EmitResult { // write DocumentStart writeln!(self.writer, "---")?; diff --git a/saphyr/src/lib.rs b/saphyr/src/lib.rs index 4ba632e..0651aea 100644 --- a/saphyr/src/lib.rs +++ b/saphyr/src/lib.rs @@ -31,10 +31,7 @@ //! ``` #![cfg_attr(feature = "cargo-clippy", warn(clippy::pedantic))] -#![cfg_attr( - feature = "cargo-clippy", - allow(clippy::should_implement_trait, clippy::missing_errors_doc,) -)] +#![cfg_attr(feature = "cargo-clippy", allow(clippy::should_implement_trait))] extern crate hashlink; diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs index f6c2eb5..8c21edb 100644 --- a/saphyr/src/parser.rs +++ b/saphyr/src/parser.rs @@ -218,6 +218,8 @@ impl> Parser { /// /// Any subsequent call to [`Parser::peek`] will return the same value, until a call to /// [`Parser::next`] or [`Parser::load`]. + /// # Errors + /// Returns `ScanError` when loading the next event fails. pub fn peek(&mut self) -> Result<&(Event, Marker), ScanError> { if let Some(ref x) = self.current { Ok(x) @@ -228,6 +230,8 @@ impl> Parser { } /// Try to load the next event and return it, consuming it from `self`. + /// # Errors + /// Returns `ScanError` when loading the next event fails. pub fn next(&mut self) -> ParseResult { match self.current.take() { None => self.parse(), @@ -299,6 +303,8 @@ impl> Parser { /// /// Note that any [`EventReceiver`] is also a [`MarkedEventReceiver`], so implementing the /// former is enough to call this function. + /// # Errors + /// Returns `ScanError` when loading fails. pub fn load( &mut self, recv: &mut R, diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index 611d9c0..d6e9788 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -604,6 +604,9 @@ impl> Scanner { self.simple_key_allowed = false; } + /// Fetch the next token in the stream. + /// # Errors + /// Returns `ScanError` when the scanner does not find the next expected token. pub fn fetch_next_token(&mut self) -> ScanResult { self.lookahead(1); // eprintln!("--> fetch_next_token Cur {:?} {:?}", self.mark, self.ch()); @@ -708,6 +711,9 @@ impl> Scanner { } } + /// Return the next token in the stream. + /// # Errors + /// Returns `ScanError` when scanning fails to find an expected next token. pub fn next_token(&mut self) -> Result, ScanError> { if self.stream_end_produced { return Ok(None); @@ -731,6 +737,9 @@ impl> Scanner { Ok(Some(t)) } + /// Fetch tokens from the token stream. + /// # Errors + /// Returns `ScanError` when loading fails. pub fn fetch_more_tokens(&mut self) -> ScanResult { let mut need_more; loop { diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index 50d0b85..3e2298a 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -208,6 +208,8 @@ impl YamlLoader { /// The `source` is interpreted as YAML documents and is parsed. Parsing succeeds if and only /// if all documents are parsed successfully. An error in a latter document prevents the former /// from being returned. + /// # Errors + /// Returns `ScanError` when loading fails. pub fn load_from_str(source: &str) -> Result, ScanError> { Self::load_from_iter(source.chars()) } @@ -217,6 +219,8 @@ impl YamlLoader { /// The `source` is interpreted as YAML documents and is parsed. Parsing succeeds if and only /// if all documents are parsed successfully. An error in a latter document prevents the former /// from being returned. + /// # Errors + /// Returns `ScanError` when loading fails. pub fn load_from_iter>(source: I) -> Result, ScanError> { let mut loader = YamlLoader::default(); let mut parser = Parser::new(source); @@ -259,6 +263,8 @@ impl YamlDecoder { self } + /// # Errors + /// Returns `LoadError` when decoding fails. pub fn decode(&mut self) -> Result, LoadError> { let mut buffer = Vec::new(); self.source.read_to_end(&mut buffer)?; From c1145fc814f00bbc6bdebc2c1188328401d88457 Mon Sep 17 00:00:00 2001 From: David Aguilar Date: Sun, 17 Mar 2024 03:22:15 -0700 Subject: [PATCH 319/380] Enable all clippy warnings Rename Scanner::next() to Scanner::next_token() to avoid confusiong with the Iterator trait. --- saphyr/src/lib.rs | 3 --- saphyr/src/parser.rs | 24 ++++++++++++------------ 2 files changed, 12 insertions(+), 15 deletions(-) diff --git a/saphyr/src/lib.rs b/saphyr/src/lib.rs index 0651aea..e95e452 100644 --- a/saphyr/src/lib.rs +++ b/saphyr/src/lib.rs @@ -30,9 +30,6 @@ //! //! ``` -#![cfg_attr(feature = "cargo-clippy", warn(clippy::pedantic))] -#![cfg_attr(feature = "cargo-clippy", allow(clippy::should_implement_trait))] - extern crate hashlink; pub(crate) mod char_traits; diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs index 8c21edb..7543e97 100644 --- a/saphyr/src/parser.rs +++ b/saphyr/src/parser.rs @@ -224,7 +224,7 @@ impl> Parser { if let Some(ref x) = self.current { Ok(x) } else { - self.current = Some(self.next()?); + self.current = Some(self.next_token()?); self.peek() } } @@ -232,7 +232,7 @@ impl> Parser { /// Try to load the next event and return it, consuming it from `self`. /// # Errors /// Returns `ScanError` when loading the next event fails. - pub fn next(&mut self) -> ParseResult { + pub fn next_token(&mut self) -> ParseResult { match self.current.take() { None => self.parse(), Some(v) => Ok(v), @@ -311,7 +311,7 @@ impl> Parser { multi: bool, ) -> Result<(), ScanError> { if !self.scanner.stream_started() { - let (ev, mark) = self.next()?; + let (ev, mark) = self.next_token()?; if ev != Event::StreamStart { return Err(ScanError::new(mark, "did not find expected ")); } @@ -324,7 +324,7 @@ impl> Parser { return Ok(()); } loop { - let (ev, mark) = self.next()?; + let (ev, mark) = self.next_token()?; if ev == Event::StreamEnd { recv.on_event(ev, mark); return Ok(()); @@ -353,11 +353,11 @@ impl> Parser { } recv.on_event(first_ev, mark); - let (ev, mark) = self.next()?; + let (ev, mark) = self.next_token()?; self.load_node(ev, mark, recv)?; // DOCUMENT-END is expected. - let (ev, mark) = self.next()?; + let (ev, mark) = self.next_token()?; assert_eq!(ev, Event::DocumentEnd); recv.on_event(ev, mark); @@ -391,17 +391,17 @@ impl> Parser { } fn load_mapping(&mut self, recv: &mut R) -> Result<(), ScanError> { - let (mut key_ev, mut key_mark) = self.next()?; + let (mut key_ev, mut key_mark) = self.next_token()?; while key_ev != Event::MappingEnd { // key self.load_node(key_ev, key_mark, recv)?; // value - let (ev, mark) = self.next()?; + let (ev, mark) = self.next_token()?; self.load_node(ev, mark, recv)?; // next event - let (ev, mark) = self.next()?; + let (ev, mark) = self.next_token()?; key_ev = ev; key_mark = mark; } @@ -410,12 +410,12 @@ impl> Parser { } fn load_sequence(&mut self, recv: &mut R) -> Result<(), ScanError> { - let (mut ev, mut mark) = self.next()?; + let (mut ev, mut mark) = self.next_token()?; while ev != Event::SequenceEnd { self.load_node(ev, mark, recv)?; // next event - let (next_ev, next_mark) = self.next()?; + let (next_ev, next_mark) = self.next_token()?; ev = next_ev; mark = next_mark; } @@ -1057,7 +1057,7 @@ a5: *x let mut p = Parser::new(s.chars()); while { let event_peek = p.peek().unwrap().clone(); - let event = p.next().unwrap(); + let event = p.next_token().unwrap(); assert_eq!(event, event_peek); event.0 != Event::StreamEnd } {} From 9133bb0a34b07f2e35d0ba470973ffd44a4384f5 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Tue, 19 Mar 2024 18:18:59 +0100 Subject: [PATCH 320/380] Re-enable clippy warnings. --- saphyr/src/char_traits.rs | 4 ++-- saphyr/src/lib.rs | 9 +++++++++ saphyr/src/yaml.rs | 2 +- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/saphyr/src/char_traits.rs b/saphyr/src/char_traits.rs index 5965cc4..a48d4db 100644 --- a/saphyr/src/char_traits.rs +++ b/saphyr/src/char_traits.rs @@ -111,9 +111,9 @@ pub(crate) fn is_tag_char(c: char) -> bool { } /// Check if the string can be expressed a valid literal block scalar. -/// The YAML spec supports all of the following in block literals except #xFEFF: +/// The YAML spec supports all of the following in block literals except `#xFEFF`: /// ```ignore -/// #x9 | #xA | [#x20-#x7E] /* 8 bit */ +/// #x9 | #xA | [#x20-#x7E] /* 8 bit */ /// | #x85 | [#xA0-#xD7FF] | [#xE000-#xFFFD] /* 16 bit */ /// | [#x10000-#x10FFFF] /* 32 bit */ /// ``` diff --git a/saphyr/src/lib.rs b/saphyr/src/lib.rs index e95e452..4428f5a 100644 --- a/saphyr/src/lib.rs +++ b/saphyr/src/lib.rs @@ -30,6 +30,15 @@ //! //! ``` +#![warn(clippy::pedantic)] +#![allow( + clippy::match_same_arms, + clippy::should_implement_trait, + clippy::missing_errors_doc, + clippy::missing_panics_doc, + clippy::redundant_else +)] + extern crate hashlink; pub(crate) mod char_traits; diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index 3e2298a..c1cecd7 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -229,7 +229,7 @@ impl YamlLoader { } } -/// YamlDecoder is a YamlLoader builder that allows you to supply your own encoding error trap. +/// `YamlDecoder` is a `YamlLoader` builder that allows you to supply your own encoding error trap. /// For example, to read a YAML file while ignoring Unicode decoding errors you can set the /// `encoding_trap` to `encoding::DecoderTrap::Ignore`. /// ```rust From 6dc586e892feedce5fec03825d921134ed37df9d Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Tue, 19 Mar 2024 19:29:25 +0100 Subject: [PATCH 321/380] Add `serde-yaml` to benchmark. This internally uses `libfyaml`, so this is more of a benchmark of libfyaml in the end. --- saphyr/justfile | 1 + 1 file changed, 1 insertion(+) diff --git a/saphyr/justfile b/saphyr/justfile index 4d0c444..238ded7 100644 --- a/saphyr/justfile +++ b/saphyr/justfile @@ -10,5 +10,6 @@ before_commit: ethi_bench: cargo build --release --all-targets cd ../Yaml-rust && cargo build --release --all-targets + cd ../serde-yaml/ && cargo build --release --all-targets cd ../libfyaml/build && ninja cargo bench_compare run_bench From a994c2ddf4f824ff9ef3794307394688eeaa0cfd Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Tue, 19 Mar 2024 19:29:25 +0100 Subject: [PATCH 322/380] Add `serde-yaml` to benchmark. This internally uses `libfyaml`, so this is more of a benchmark of libfyaml in the end. --- bench/justfile | 1 + 1 file changed, 1 insertion(+) diff --git a/bench/justfile b/bench/justfile index 4d0c444..238ded7 100644 --- a/bench/justfile +++ b/bench/justfile @@ -10,5 +10,6 @@ before_commit: ethi_bench: cargo build --release --all-targets cd ../Yaml-rust && cargo build --release --all-targets + cd ../serde-yaml/ && cargo build --release --all-targets cd ../libfyaml/build && ninja cargo bench_compare run_bench From 1e43066d15d6ea6faa0ace8fa69a583367785ec7 Mon Sep 17 00:00:00 2001 From: David Aguilar Date: Tue, 19 Mar 2024 23:34:17 -0700 Subject: [PATCH 323/380] README: add an "Upgrading from yaml-rust" section --- saphyr/README.md | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/saphyr/README.md b/saphyr/README.md index 04afc52..18db862 100644 --- a/saphyr/README.md +++ b/saphyr/README.md @@ -87,6 +87,21 @@ This implementation is fully compatible with the YAML 1.2 specification. In order to help with compliance, `yaml-rust2` tests against (and passes) the [YAML test suite](https://github.com/yaml/yaml-test-suite/). +## Upgrading from yaml-rust + +You can use `yaml-rust2` as a drop-in replacement for the original `yaml-rust` crate. + +```toml +[dependencies] +yaml-rust = { version = "#.#", package = "yaml-rust2" } +``` + +This `Cargo.toml` declaration allows you to refer to this crate as `yaml_rust` in your code. + +```rust +use yaml_rust::{YamlLoader, YamlEmitter}; +``` + ## License Licensed under either of From e00d7f251ea26388a4a8419461df986bd0694a6a Mon Sep 17 00:00:00 2001 From: David Aguilar Date: Tue, 19 Mar 2024 23:43:57 -0700 Subject: [PATCH 324/380] CHANGELOG: move recent updates to v0.7.0 and add a v0.6.0 section --- saphyr/CHANGELOG.md | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/saphyr/CHANGELOG.md b/saphyr/CHANGELOG.md index f45c78d..c348e51 100644 --- a/saphyr/CHANGELOG.md +++ b/saphyr/CHANGELOG.md @@ -1,6 +1,6 @@ # Changelog -## v0.6.0 +## v0.7.0 **Features**: @@ -30,6 +30,16 @@ - `hashlink` is [now used](https://github.com/chyh1990/yaml-rust/pull/157) instead of `linked_hash_map`. +## v0.6.0 + +**Development**: + +- `is_xxx` functions were moved into the private `char_traits` module. + +- Benchmarking tools were added. + +- Performance was improved. + ## v0.5.0 - The parser now supports tag directives. From ee8a999075a67f33b85400876461e94947e36c35 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Wed, 20 Mar 2024 14:42:31 +0100 Subject: [PATCH 325/380] Update cargo version to 0.7. --- saphyr/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/saphyr/Cargo.toml b/saphyr/Cargo.toml index 07da692..bf96f58 100644 --- a/saphyr/Cargo.toml +++ b/saphyr/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "yaml-rust2" -version = "0.6.0" +version = "0.7.0" authors = [ "Yuheng Chen ", "Ethiraric ", From a94a434b12a2ed615b508b2f7b9a5343ac7c8502 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Wed, 20 Mar 2024 15:45:24 +0100 Subject: [PATCH 326/380] Forgot to bump some v0.6.0 to v0.7.0. --- bench/tools/gen_large_yaml/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bench/tools/gen_large_yaml/Cargo.toml b/bench/tools/gen_large_yaml/Cargo.toml index 0fe3eac..750a4c4 100644 --- a/bench/tools/gen_large_yaml/Cargo.toml +++ b/bench/tools/gen_large_yaml/Cargo.toml @@ -11,7 +11,7 @@ readme = "README.md" edition = "2018" [dependencies] -yaml-rust2 = { version = "0.6.0", path = "../../" } +yaml-rust2 = { version = "0.7.0", path = "../../" } rand = "0.8.5" lipsum = "0.9.0" From 075d43a9c4882c439b2db4f5045164c4e1b0f2d8 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Wed, 20 Mar 2024 15:50:48 +0100 Subject: [PATCH 327/380] Add `missing_docs` warning. --- bench/justfile | 1 + 1 file changed, 1 insertion(+) diff --git a/bench/justfile b/bench/justfile index 238ded7..58c8c4d 100644 --- a/bench/justfile +++ b/bench/justfile @@ -5,6 +5,7 @@ before_commit: cargo build --all-targets cargo test cargo test --release + cargo test --doc cargo build --profile=release-lto --package gen_large_yaml --bin gen_large_yaml --manifest-path tools/gen_large_yaml/Cargo.toml ethi_bench: From a06ba5205ac19b40d44a40fe2c576815cdc08522 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Wed, 20 Mar 2024 15:45:24 +0100 Subject: [PATCH 328/380] Forgot to bump some v0.6.0 to v0.7.0. --- saphyr/src/lib.rs | 2 +- saphyr/tools/gen_large_yaml/Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/saphyr/src/lib.rs b/saphyr/src/lib.rs index 4428f5a..a6e49b5 100644 --- a/saphyr/src/lib.rs +++ b/saphyr/src/lib.rs @@ -11,7 +11,7 @@ //! //! ```toml //! [dependencies] -//! yaml-rust2 = "0.6.0" +//! yaml-rust2 = "0.7.0" //! ``` //! //! # Examples diff --git a/saphyr/tools/gen_large_yaml/Cargo.toml b/saphyr/tools/gen_large_yaml/Cargo.toml index 0fe3eac..750a4c4 100644 --- a/saphyr/tools/gen_large_yaml/Cargo.toml +++ b/saphyr/tools/gen_large_yaml/Cargo.toml @@ -11,7 +11,7 @@ readme = "README.md" edition = "2018" [dependencies] -yaml-rust2 = { version = "0.6.0", path = "../../" } +yaml-rust2 = { version = "0.7.0", path = "../../" } rand = "0.8.5" lipsum = "0.9.0" From c3ba2070e50675737740669428c345dd42537061 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Wed, 20 Mar 2024 15:50:48 +0100 Subject: [PATCH 329/380] Add `missing_docs` warning. --- saphyr/justfile | 1 + saphyr/src/emitter.rs | 22 ++++++++++++++++++++-- saphyr/src/lib.rs | 9 +-------- saphyr/src/parser.rs | 11 +++++++++++ saphyr/src/scanner.rs | 24 ++++++++++++++++++++++-- saphyr/src/yaml.rs | 28 ++++++++++++++++++++++++++++ saphyr/tests/yaml-test-suite.rs | 1 - 7 files changed, 83 insertions(+), 13 deletions(-) diff --git a/saphyr/justfile b/saphyr/justfile index 238ded7..58c8c4d 100644 --- a/saphyr/justfile +++ b/saphyr/justfile @@ -5,6 +5,7 @@ before_commit: cargo build --all-targets cargo test cargo test --release + cargo test --doc cargo build --profile=release-lto --package gen_large_yaml --bin gen_large_yaml --manifest-path tools/gen_large_yaml/Cargo.toml ethi_bench: diff --git a/saphyr/src/emitter.rs b/saphyr/src/emitter.rs index 0c365a8..15f8cab 100644 --- a/saphyr/src/emitter.rs +++ b/saphyr/src/emitter.rs @@ -1,13 +1,16 @@ +//! YAML serialization helpers. + use crate::char_traits; use crate::yaml::{Hash, Yaml}; use std::convert::From; use std::error::Error; use std::fmt::{self, Display}; +/// An error when emitting YAML. #[derive(Copy, Clone, Debug)] pub enum EmitError { + /// A formatting error. FmtError(fmt::Error), - BadHashmapKey, } impl Error for EmitError { @@ -20,7 +23,6 @@ impl Display for EmitError { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { match *self { EmitError::FmtError(ref err) => Display::fmt(err, formatter), - EmitError::BadHashmapKey => formatter.write_str("bad hashmap key"), } } } @@ -31,6 +33,20 @@ impl From for EmitError { } } +/// The YAML serializer. +/// +/// ``` +/// # use yaml_rust2::{YamlLoader, YamlEmitter}; +/// let input_string = "a: b\nc: d"; +/// let yaml = YamlLoader::load_from_str(input_string).unwrap(); +/// +/// let mut output = String::new(); +/// YamlEmitter::new(&mut output).dump(&yaml[0]).unwrap(); +/// +/// assert_eq!(output, r#"--- +/// a: b +/// c: d"#); +/// ``` #[allow(clippy::module_name_repetitions)] pub struct YamlEmitter<'a> { writer: &'a mut dyn fmt::Write, @@ -40,6 +56,7 @@ pub struct YamlEmitter<'a> { multiline_strings: bool, } +/// A convenience alias for emitter functions that may fail without returning a value. pub type EmitResult = Result<(), EmitError>; // from serialize::json @@ -106,6 +123,7 @@ fn escape_str(wr: &mut dyn fmt::Write, v: &str) -> Result<(), fmt::Error> { } impl<'a> YamlEmitter<'a> { + /// Create a nwe emitter serializing into `writer`. pub fn new(writer: &'a mut dyn fmt::Write) -> YamlEmitter { YamlEmitter { writer, diff --git a/saphyr/src/lib.rs b/saphyr/src/lib.rs index a6e49b5..9ca9a45 100644 --- a/saphyr/src/lib.rs +++ b/saphyr/src/lib.rs @@ -30,14 +30,7 @@ //! //! ``` -#![warn(clippy::pedantic)] -#![allow( - clippy::match_same_arms, - clippy::should_implement_trait, - clippy::missing_errors_doc, - clippy::missing_panics_doc, - clippy::redundant_else -)] +#![warn(missing_docs, clippy::pedantic)] extern crate hashlink; diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs index 7543e97..b834c38 100644 --- a/saphyr/src/parser.rs +++ b/saphyr/src/parser.rs @@ -1,3 +1,9 @@ +//! Home to the YAML Parser. +//! +//! The parser takes input from the [`crate::scanner::Scanner`], performs final checks for YAML +//! compliance, and emits a stream of tokens that can be used by the [`crate::YamlLoader`] to +//! construct the [`crate::Yaml`] object. + use crate::scanner::{Marker, ScanError, Scanner, TScalarStyle, Token, TokenType}; use std::collections::HashMap; @@ -53,19 +59,23 @@ pub enum Event { ), /// Value, style, anchor_id, tag Scalar(String, TScalarStyle, usize, Option), + /// The start of a YAML sequence (array). SequenceStart( /// The anchor ID of the start of the squence. usize, /// An optional tag Option, ), + /// The end of a YAML sequence (array). SequenceEnd, + /// The start of a YAML mapping (object, hash). MappingStart( /// The anchor ID of the start of the mapping. usize, /// An optional tag Option, ), + /// The end of a YAML mapping (object, hash). MappingEnd, } @@ -195,6 +205,7 @@ impl MarkedEventReceiver for R { } } +/// A convenience alias for a `Result` of a parser event. pub type ParseResult = Result<(Event, Marker), ScanError>; impl> Parser { diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index d6e9788..556ca9b 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -1,3 +1,11 @@ +//! Home to the YAML Scanner. +//! +//! The scanner is the lowest-level parsing utility. It is the lexer / tokenizer, reading input a +//! character at a time and emitting tokens that can later be interpreted by the [`crate::parser`] +//! to check for more context and validity. +//! +//! Due to the grammar of YAML, the scanner has to have some context and is not error-free. + #![allow(clippy::cast_possible_wrap)] #![allow(clippy::cast_sign_loss)] @@ -10,19 +18,26 @@ use crate::char_traits::{ is_flow, is_hex, is_tag_char, is_uri_char, is_z, }; +/// The encoding of the input. Currently, only UTF-8 is supported. #[derive(Clone, Copy, PartialEq, Debug, Eq)] pub enum TEncoding { + /// UTF-8 encoding. Utf8, } +/// The style as which the scalar was written in the YAML document. #[derive(Clone, Copy, PartialEq, Debug, Eq)] pub enum TScalarStyle { - Any, + /// A YAML plain scalar. Plain, + /// A YAML single quoted scalar. SingleQuoted, + /// A YAML double quoted scalar. DoubleQuoted, + /// A YAML literal block (`|` block). Literal, + /// A YAML folded block (`>` block). Folded, } @@ -120,17 +135,18 @@ impl fmt::Display for ScanError { /// The contents of a scanner token. #[derive(Clone, PartialEq, Debug, Eq)] pub enum TokenType { - NoToken, /// The start of the stream. Sent first, before even [`DocumentStart`]. StreamStart(TEncoding), /// The end of the stream, EOF. StreamEnd, + /// A YAML version directive. VersionDirective( /// Major u32, /// Minor u32, ), + /// A YAML tag directive (e.g.: `!!str`, `!foo!bar`, ...). TagDirective( /// Handle String, @@ -394,6 +410,7 @@ impl> Iterator for Scanner { } } +/// A convenience alias for scanner functions that may fail without returning a value. pub type ScanResult = Result<(), ScanError>; impl> Scanner { @@ -532,16 +549,19 @@ impl> Scanner { self.buffer[0] == c } + /// Return whether the [`TokenType::StreamStart`] event has been emitted. #[inline] pub fn stream_started(&self) -> bool { self.stream_start_produced } + /// Return whether the [`TokenType::StreamEnd`] event has been emitted. #[inline] pub fn stream_ended(&self) -> bool { self.stream_end_produced } + /// Get the current position in the input stream. #[inline] pub fn mark(&self) -> Marker { self.mark diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index c1cecd7..e310795 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -1,3 +1,5 @@ +//! YAML objects manipulation utilities. + #![allow(clippy::module_name_repetitions)] use std::{collections::BTreeMap, convert::TryFrom, mem, ops::Index}; @@ -50,7 +52,9 @@ pub enum Yaml { BadValue, } +/// The type contained in the `Yaml::Array` variant. This corresponds to YAML sequences. pub type Array = Vec; +/// The type contained in the `Yaml::Hash` variant. This corresponds to YAML mappings. pub type Hash = LinkedHashMap; // parse f64 as Core schema @@ -69,6 +73,7 @@ fn parse_f64(v: &str) -> Option { /// See [`YamlLoader::load_from_str`]. #[derive(Default)] pub struct YamlLoader { + /// The different YAML documents that are loaded. docs: Vec, // states // (current node, anchor_id) tuple @@ -161,10 +166,14 @@ impl MarkedEventReceiver for YamlLoader { } } +/// An error that happened when loading a YAML document. #[derive(Debug)] pub enum LoadError { + /// An I/O error. IO(std::io::Error), + /// An error within the scanner. This indicates a malformed YAML input. Scan(ScanError), + /// A decoding error (e.g.: Invalid UTF_8). Decode(std::borrow::Cow<'static, str>), } @@ -251,6 +260,7 @@ pub struct YamlDecoder { } impl YamlDecoder { + /// Create a `YamlDecoder` decoding the given source. pub fn read(source: T) -> YamlDecoder { YamlDecoder { source, @@ -258,11 +268,14 @@ impl YamlDecoder { } } + /// Set the behavior of the decoder when the encoding is invalid. pub fn encoding_trap(&mut self, trap: encoding::types::DecoderTrap) -> &mut Self { self.trap = trap; self } + /// Run the decode operation with the source and trap the `YamlDecoder` was built with. + /// /// # Errors /// Returns `LoadError` when decoding fails. pub fn decode(&mut self) -> Result, LoadError> { @@ -301,6 +314,11 @@ fn detect_utf16_endianness(b: &[u8]) -> encoding::types::EncodingRef { macro_rules! define_as ( ($name:ident, $t:ident, $yt:ident) => ( +/// Get a copy of the inner object in the YAML enum if it is a `$t`. +/// +/// # Return +/// If the variant of `self` is `Yaml::$yt`, return `Some($t)` with a copy of the `$t` contained. +/// Otherwise, return `None`. #[must_use] pub fn $name(&self) -> Option<$t> { match *self { @@ -313,6 +331,11 @@ pub fn $name(&self) -> Option<$t> { macro_rules! define_as_ref ( ($name:ident, $t:ty, $yt:ident) => ( +/// Get a reference to the inner object in the YAML enum if it is a `$t`. +/// +/// # Return +/// If the variant of `self` is `Yaml::$yt`, return `Some(&$t)` with the `$t` contained. Otherwise, +/// return `None`. #[must_use] pub fn $name(&self) -> Option<$t> { match *self { @@ -325,6 +348,11 @@ pub fn $name(&self) -> Option<$t> { macro_rules! define_into ( ($name:ident, $t:ty, $yt:ident) => ( +/// Get the inner object in the YAML enum if it is a `$t`. +/// +/// # Return +/// If the variant of `self` is `Yaml::$yt`, return `Some($t)` with the `$t` contained. Otherwise, +/// return `None`. #[must_use] pub fn $name(self) -> Option<$t> { match self { diff --git a/saphyr/tests/yaml-test-suite.rs b/saphyr/tests/yaml-test-suite.rs index befebc1..e7ad10e 100644 --- a/saphyr/tests/yaml-test-suite.rs +++ b/saphyr/tests/yaml-test-suite.rs @@ -163,7 +163,6 @@ impl EventReceiver for EventReporter { TScalarStyle::DoubleQuoted => r#"""#, TScalarStyle::Literal => "|", TScalarStyle::Folded => ">", - TScalarStyle::Any => unreachable!(), }; format!( "=VAL{}{} {}{}", From 898dab9851e78a34d7d03fab951fbc647074b14a Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Wed, 20 Mar 2024 15:54:09 +0100 Subject: [PATCH 330/380] Fix rustdoc ignore directive. --- saphyr/src/char_traits.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/saphyr/src/char_traits.rs b/saphyr/src/char_traits.rs index a48d4db..82f81bd 100644 --- a/saphyr/src/char_traits.rs +++ b/saphyr/src/char_traits.rs @@ -112,7 +112,7 @@ pub(crate) fn is_tag_char(c: char) -> bool { /// Check if the string can be expressed a valid literal block scalar. /// The YAML spec supports all of the following in block literals except `#xFEFF`: -/// ```ignore +/// ```no_compile /// #x9 | #xA | [#x20-#x7E] /* 8 bit */ /// | #x85 | [#xA0-#xD7FF] | [#xE000-#xFFFD] /* 16 bit */ /// | [#x10000-#x10FFFF] /* 32 bit */ From e8415713ab3c6cd0d30b8080d5c55330b8813ac2 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Wed, 20 Mar 2024 21:39:38 +0100 Subject: [PATCH 331/380] Edit v0.6 post with cargo alias. --- saphyr/documents/2024-03-15-FirstRelease.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/saphyr/documents/2024-03-15-FirstRelease.md b/saphyr/documents/2024-03-15-FirstRelease.md index 2c193f0..519a50d 100644 --- a/saphyr/documents/2024-03-15-FirstRelease.md +++ b/saphyr/documents/2024-03-15-FirstRelease.md @@ -108,6 +108,18 @@ As for your code, you have one of two solutions: Whichever you decide is up to you. +[Courtesy of davvid](https://github.com/chyh1990/yaml-rust/issues/160#issuecomment-2008931473), there is another +solution. You can combine both approaches and tell `Cargo.toml` to add `yaml-rust2` and to create a `yaml_rust` alias +for your code with the following: + +```diff +-yaml-rust = "0.4.4" ++yaml-rust = { version = "0.6", package = "yaml-rust2" } +``` + +This allows you to switch to `yaml-rust2` while continuing to refer to `yaml_rust` in your code (e.g. use +`yaml_rust::YamlLoader;` will continue to work so that no Rust code changes are required). + #### What about API breakage? Most of what I have changed is in the implementation details. You might notice more documentation appearing on your LSP, but documentation isn't bound by the API. There is only one change I made that could lead to compile errors. It is @@ -151,3 +163,5 @@ issue](https://github.com/Ethiraric/yaml-rust2/issues)! If however you wanted an OpenAPI linter, I'm afraid you're out of luck. Just as much as I'm out of time ;) -Ethiraric + +EDIT(20-03-2024): Add davvid's method of switching to `yaml-rust2` by creating a Cargo alias. From 869a2d1a157e0db99bbae5a986f3726f165dede3 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Wed, 20 Mar 2024 23:07:08 +0100 Subject: [PATCH 332/380] Make `gen_large_yaml` reproductible. * Use a seedable RNG so that we can have the same number sequence. * Replace `HashMap`s with `Vec`s to avoid undeterministic iteration. --- saphyr/tools/gen_large_yaml/Cargo.toml | 2 +- saphyr/tools/gen_large_yaml/src/gen.rs | 24 ++-- saphyr/tools/gen_large_yaml/src/main.rs | 138 +++++++++++----------- saphyr/tools/gen_large_yaml/src/nested.rs | 9 +- 4 files changed, 91 insertions(+), 82 deletions(-) diff --git a/saphyr/tools/gen_large_yaml/Cargo.toml b/saphyr/tools/gen_large_yaml/Cargo.toml index 750a4c4..a70e779 100644 --- a/saphyr/tools/gen_large_yaml/Cargo.toml +++ b/saphyr/tools/gen_large_yaml/Cargo.toml @@ -12,7 +12,7 @@ edition = "2018" [dependencies] yaml-rust2 = { version = "0.7.0", path = "../../" } -rand = "0.8.5" +rand = { version = "0.8.5", features = [ "small_rng" ] } lipsum = "0.9.0" [profile.release-lto] diff --git a/saphyr/tools/gen_large_yaml/src/gen.rs b/saphyr/tools/gen_large_yaml/src/gen.rs index 2a7dffe..78d16ba 100644 --- a/saphyr/tools/gen_large_yaml/src/gen.rs +++ b/saphyr/tools/gen_large_yaml/src/gen.rs @@ -1,15 +1,15 @@ #![allow(clippy::too_many_arguments)] -use rand::{distributions::Alphanumeric, rngs::ThreadRng, Rng}; +use rand::{distributions::Alphanumeric, rngs::SmallRng, Rng}; /// Generate a string with hexadecimal digits of the specified length. -pub fn hex_string(rng: &mut ThreadRng, len: usize) -> String { +pub fn hex_string(rng: &mut SmallRng, len: usize) -> String { const DIGITS: &[u8] = b"0123456789abcdef"; string_from_set(rng, len, len + 1, DIGITS) } /// Generate an e-mail address. -pub fn email(rng: &mut ThreadRng, len_lo: usize, len_hi: usize) -> String { +pub fn email(rng: &mut SmallRng, len_lo: usize, len_hi: usize) -> String { const CHARSET: &[u8] = b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-_.0123456789"; format!( "{}@example.com", @@ -19,7 +19,7 @@ pub fn email(rng: &mut ThreadRng, len_lo: usize, len_hi: usize) -> String { /// Generate a random URL. pub fn url( - rng: &mut ThreadRng, + rng: &mut SmallRng, scheme: &str, n_paths_lo: usize, n_paths_hi: usize, @@ -40,12 +40,12 @@ pub fn url( } /// Generate a random integer. -pub fn integer(rng: &mut ThreadRng, lo: i64, hi: i64) -> i64 { +pub fn integer(rng: &mut SmallRng, lo: i64, hi: i64) -> i64 { rng.gen_range(lo..hi) } /// Generate an alphanumeric string with a length between `lo_len` and `hi_len`. -pub fn alnum_string(rng: &mut ThreadRng, lo_len: usize, hi_len: usize) -> String { +pub fn alnum_string(rng: &mut SmallRng, lo_len: usize, hi_len: usize) -> String { let len = rng.gen_range(lo_len..hi_len); rng.sample_iter(&Alphanumeric) .take(len) @@ -54,7 +54,7 @@ pub fn alnum_string(rng: &mut ThreadRng, lo_len: usize, hi_len: usize) -> String } /// Generate a string with hexadecimal digits of the specified length. -pub fn string_from_set(rng: &mut ThreadRng, len_lo: usize, len_hi: usize, set: &[u8]) -> String { +pub fn string_from_set(rng: &mut SmallRng, len_lo: usize, len_hi: usize, set: &[u8]) -> String { (0..rng.gen_range(len_lo..len_hi)) .map(|_| set[rng.gen_range(0..set.len())] as char) .collect() @@ -62,7 +62,7 @@ pub fn string_from_set(rng: &mut ThreadRng, len_lo: usize, len_hi: usize, set: & /// Generate a lipsum paragraph. pub fn paragraph( - rng: &mut ThreadRng, + rng: &mut SmallRng, lines_lo: usize, lines_hi: usize, wps_lo: usize, @@ -99,7 +99,7 @@ pub fn paragraph( } /// Generate a full name. -pub fn full_name(rng: &mut ThreadRng, len_lo: usize, len_hi: usize) -> String { +pub fn full_name(rng: &mut SmallRng, len_lo: usize, len_hi: usize) -> String { format!( "{} {}", name(rng, len_lo, len_hi), @@ -108,7 +108,7 @@ pub fn full_name(rng: &mut ThreadRng, len_lo: usize, len_hi: usize) -> String { } /// Generate a name. -pub fn name(rng: &mut ThreadRng, len_lo: usize, len_hi: usize) -> String { +pub fn name(rng: &mut SmallRng, len_lo: usize, len_hi: usize) -> String { const UPPER: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"; const LOWER: &[u8] = b"abcdefghijklmnopqrstuvwxyz"; @@ -121,7 +121,7 @@ pub fn name(rng: &mut ThreadRng, len_lo: usize, len_hi: usize) -> String { } /// Generate a set of words. -pub fn words(rng: &mut ThreadRng, words_lo: usize, words_hi: usize) -> String { +pub fn words(rng: &mut SmallRng, words_lo: usize, words_hi: usize) -> String { let nwords = rng.gen_range(words_lo..words_hi); lipsum::lipsum_words_with_rng(rng.clone(), nwords).replace(|c| "-\'\",*:".contains(c), "") } @@ -130,7 +130,7 @@ pub fn words(rng: &mut ThreadRng, words_lo: usize, words_hi: usize) -> String { /// /// Texts are composed of some paragraphs and empty lines between them. pub fn text( - rng: &mut ThreadRng, + rng: &mut SmallRng, paragraphs_lo: usize, paragraphs_hi: usize, lines_lo: usize, diff --git a/saphyr/tools/gen_large_yaml/src/main.rs b/saphyr/tools/gen_large_yaml/src/main.rs index 1c7ee6e..86423bf 100644 --- a/saphyr/tools/gen_large_yaml/src/main.rs +++ b/saphyr/tools/gen_large_yaml/src/main.rs @@ -3,12 +3,11 @@ mod gen; mod nested; -use std::collections::HashMap; use std::fs::File; use std::io::BufWriter; use std::path::Path; -use rand::{rngs::ThreadRng, Rng}; +use rand::{rngs::SmallRng, Rng, SeedableRng}; /// The path into which the generated YAML files will be written. const OUTPUT_DIR: &str = "bench_yaml"; @@ -41,7 +40,10 @@ fn main() -> std::io::Result<()> { /// YAML Generator. struct Generator { /// The RNG state. - rng: ThreadRng, + /// + /// We don't need to be cryptographically secure. [`SmallRng`] also implements the + /// [`SeedableRng`] trait, allowing runs to be predictible. + rng: SmallRng, /// The stack of indentations. indents: Vec, } @@ -52,7 +54,7 @@ impl Generator { /// Create a new generator. fn new() -> Self { Generator { - rng: rand::thread_rng(), + rng: SmallRng::seed_from_u64(42), indents: vec![0], } } @@ -87,58 +89,61 @@ impl Generator { /// The `description` field is a long string and puts a lot of weight in plain scalar / block /// scalar parsing. fn gen_record_object(&mut self, writer: &mut W) -> std::io::Result<()> { - let mut fields = HashMap::>>::new(); - fields.insert( - "description".to_string(), - Box::new(|gen, w| { - write!(w, "|")?; - gen.push_indent(2); - gen.nl(w)?; - let indent = gen.indent(); - let text = gen::text(&mut gen.rng, 1, 9, 3, 8, 10, 20, 80 - indent); - gen.write_lines(w, &text)?; - gen.pop_indent(); - Ok(()) - }), - ); - - fields.insert( - "authors".to_string(), - Box::new(|gen, w| { - gen.push_indent(2); - gen.nl(w)?; - gen.gen_authors_array(w, 1, 10)?; - gen.pop_indent(); - Ok(()) - }), - ); - - fields.insert( - "hash".to_string(), - Box::new(|gen, w| write!(w, "{}", gen::hex_string(&mut gen.rng, 64))), - ); - fields.insert( - "version".to_string(), - Box::new(|gen, w| write!(w, "{}", gen::integer(&mut gen.rng, 1, 9))), - ); - fields.insert( - "home".to_string(), - Box::new(|gen, w| write!(w, "{}", gen::url(&mut gen.rng, "https", 0, 1, 0, 0, None))), - ); - fields.insert( - "repository".to_string(), - Box::new(|gen, w| write!(w, "{}", gen::url(&mut gen.rng, "git", 1, 4, 10, 20, None))), - ); - fields.insert( - "pdf".to_string(), - Box::new(|gen, w| { - write!( - w, - "{}", - gen::url(&mut gen.rng, "https", 1, 4, 10, 30, Some("pdf")) - ) - }), - ); + let fields: Vec<(String, Box>)> = vec![ + ( + "description".to_string(), + Box::new(|gen, w| { + write!(w, "|")?; + gen.push_indent(2); + gen.nl(w)?; + let indent = gen.indent(); + let text = gen::text(&mut gen.rng, 1, 9, 3, 8, 10, 20, 80 - indent); + gen.write_lines(w, &text)?; + gen.pop_indent(); + Ok(()) + }), + ), + ( + "authors".to_string(), + Box::new(|gen, w| { + gen.push_indent(2); + gen.nl(w)?; + gen.gen_authors_array(w, 1, 10)?; + gen.pop_indent(); + Ok(()) + }), + ), + ( + "hash".to_string(), + Box::new(|gen, w| write!(w, "{}", gen::hex_string(&mut gen.rng, 64))), + ), + ( + "version".to_string(), + Box::new(|gen, w| write!(w, "{}", gen::integer(&mut gen.rng, 1, 9))), + ), + ( + "home".to_string(), + Box::new(|gen, w| { + write!(w, "{}", gen::url(&mut gen.rng, "https", 0, 1, 0, 0, None)) + }), + ), + ( + "repository".to_string(), + Box::new(|gen, w| { + write!(w, "{}", gen::url(&mut gen.rng, "git", 1, 4, 10, 20, None)) + }), + ), + ( + "pdf".to_string(), + Box::new(|gen, w| { + write!( + w, + "{}", + gen::url(&mut gen.rng, "https", 1, 4, 10, 30, Some("pdf")) + ) + }), + ), + ]; self.gen_object(writer, fields) } @@ -154,15 +159,16 @@ impl Generator { /// Generate a small object with 2 string fields. fn gen_author_object(&mut self, writer: &mut W) -> std::io::Result<()> { - let mut fields = HashMap::>>::new(); - fields.insert( - "name".to_string(), - Box::new(|gen, w| write!(w, "{}", gen::full_name(&mut gen.rng, 10, 15))), - ); - fields.insert( - "email".to_string(), - Box::new(|gen, w| write!(w, "{}", gen::email(&mut gen.rng, 1, 9))), - ); + let fields: Vec<(String, Box>)> = vec![ + ( + "name".to_string(), + Box::new(|gen, w| write!(w, "{}", gen::full_name(&mut gen.rng, 10, 15))), + ), + ( + "email".to_string(), + Box::new(|gen, w| write!(w, "{}", gen::email(&mut gen.rng, 1, 9))), + ), + ]; self.gen_object(writer, fields) } @@ -193,7 +199,7 @@ impl Generator { fn gen_object( &mut self, writer: &mut W, - fields: HashMap>>, + fields: Vec<(String, Box>)>, ) -> std::io::Result<()> { let mut first = true; for (key, f) in fields { diff --git a/saphyr/tools/gen_large_yaml/src/nested.rs b/saphyr/tools/gen_large_yaml/src/nested.rs index f54b55c..db93ff9 100644 --- a/saphyr/tools/gen_large_yaml/src/nested.rs +++ b/saphyr/tools/gen_large_yaml/src/nested.rs @@ -1,6 +1,6 @@ use std::{cell::RefCell, rc::Rc}; -use rand::{rngs::ThreadRng, Rng}; +use rand::{rngs::SmallRng, Rng, SeedableRng}; /// Create a deep object with the given amount of nodes. pub fn create_deep_object( @@ -24,7 +24,10 @@ struct Tree { /// Array of all the nodes in the tree, including the root node. nodes: Vec>>, /// The RNG state. - rng: ThreadRng, + /// + /// We don't need to be cryptographically secure. [`SmallRng`] also implements the + /// [`SeedableRng`] trait, allowing runs to be predictible. + rng: SmallRng, } /// A node in a tree. @@ -40,7 +43,7 @@ impl Tree { Tree { root: root.clone(), nodes: vec![root], - rng: rand::thread_rng(), + rng: SmallRng::seed_from_u64(42), } } From 0819c0cd465f1b8164cdd98d128be5a0246dd7b0 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Wed, 20 Mar 2024 23:07:08 +0100 Subject: [PATCH 333/380] Make `gen_large_yaml` reproductible. * Use a seedable RNG so that we can have the same number sequence. * Replace `HashMap`s with `Vec`s to avoid undeterministic iteration. --- bench/tools/gen_large_yaml/Cargo.toml | 2 +- bench/tools/gen_large_yaml/src/gen.rs | 24 ++-- bench/tools/gen_large_yaml/src/main.rs | 138 ++++++++++++----------- bench/tools/gen_large_yaml/src/nested.rs | 9 +- 4 files changed, 91 insertions(+), 82 deletions(-) diff --git a/bench/tools/gen_large_yaml/Cargo.toml b/bench/tools/gen_large_yaml/Cargo.toml index 750a4c4..a70e779 100644 --- a/bench/tools/gen_large_yaml/Cargo.toml +++ b/bench/tools/gen_large_yaml/Cargo.toml @@ -12,7 +12,7 @@ edition = "2018" [dependencies] yaml-rust2 = { version = "0.7.0", path = "../../" } -rand = "0.8.5" +rand = { version = "0.8.5", features = [ "small_rng" ] } lipsum = "0.9.0" [profile.release-lto] diff --git a/bench/tools/gen_large_yaml/src/gen.rs b/bench/tools/gen_large_yaml/src/gen.rs index 2a7dffe..78d16ba 100644 --- a/bench/tools/gen_large_yaml/src/gen.rs +++ b/bench/tools/gen_large_yaml/src/gen.rs @@ -1,15 +1,15 @@ #![allow(clippy::too_many_arguments)] -use rand::{distributions::Alphanumeric, rngs::ThreadRng, Rng}; +use rand::{distributions::Alphanumeric, rngs::SmallRng, Rng}; /// Generate a string with hexadecimal digits of the specified length. -pub fn hex_string(rng: &mut ThreadRng, len: usize) -> String { +pub fn hex_string(rng: &mut SmallRng, len: usize) -> String { const DIGITS: &[u8] = b"0123456789abcdef"; string_from_set(rng, len, len + 1, DIGITS) } /// Generate an e-mail address. -pub fn email(rng: &mut ThreadRng, len_lo: usize, len_hi: usize) -> String { +pub fn email(rng: &mut SmallRng, len_lo: usize, len_hi: usize) -> String { const CHARSET: &[u8] = b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-_.0123456789"; format!( "{}@example.com", @@ -19,7 +19,7 @@ pub fn email(rng: &mut ThreadRng, len_lo: usize, len_hi: usize) -> String { /// Generate a random URL. pub fn url( - rng: &mut ThreadRng, + rng: &mut SmallRng, scheme: &str, n_paths_lo: usize, n_paths_hi: usize, @@ -40,12 +40,12 @@ pub fn url( } /// Generate a random integer. -pub fn integer(rng: &mut ThreadRng, lo: i64, hi: i64) -> i64 { +pub fn integer(rng: &mut SmallRng, lo: i64, hi: i64) -> i64 { rng.gen_range(lo..hi) } /// Generate an alphanumeric string with a length between `lo_len` and `hi_len`. -pub fn alnum_string(rng: &mut ThreadRng, lo_len: usize, hi_len: usize) -> String { +pub fn alnum_string(rng: &mut SmallRng, lo_len: usize, hi_len: usize) -> String { let len = rng.gen_range(lo_len..hi_len); rng.sample_iter(&Alphanumeric) .take(len) @@ -54,7 +54,7 @@ pub fn alnum_string(rng: &mut ThreadRng, lo_len: usize, hi_len: usize) -> String } /// Generate a string with hexadecimal digits of the specified length. -pub fn string_from_set(rng: &mut ThreadRng, len_lo: usize, len_hi: usize, set: &[u8]) -> String { +pub fn string_from_set(rng: &mut SmallRng, len_lo: usize, len_hi: usize, set: &[u8]) -> String { (0..rng.gen_range(len_lo..len_hi)) .map(|_| set[rng.gen_range(0..set.len())] as char) .collect() @@ -62,7 +62,7 @@ pub fn string_from_set(rng: &mut ThreadRng, len_lo: usize, len_hi: usize, set: & /// Generate a lipsum paragraph. pub fn paragraph( - rng: &mut ThreadRng, + rng: &mut SmallRng, lines_lo: usize, lines_hi: usize, wps_lo: usize, @@ -99,7 +99,7 @@ pub fn paragraph( } /// Generate a full name. -pub fn full_name(rng: &mut ThreadRng, len_lo: usize, len_hi: usize) -> String { +pub fn full_name(rng: &mut SmallRng, len_lo: usize, len_hi: usize) -> String { format!( "{} {}", name(rng, len_lo, len_hi), @@ -108,7 +108,7 @@ pub fn full_name(rng: &mut ThreadRng, len_lo: usize, len_hi: usize) -> String { } /// Generate a name. -pub fn name(rng: &mut ThreadRng, len_lo: usize, len_hi: usize) -> String { +pub fn name(rng: &mut SmallRng, len_lo: usize, len_hi: usize) -> String { const UPPER: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"; const LOWER: &[u8] = b"abcdefghijklmnopqrstuvwxyz"; @@ -121,7 +121,7 @@ pub fn name(rng: &mut ThreadRng, len_lo: usize, len_hi: usize) -> String { } /// Generate a set of words. -pub fn words(rng: &mut ThreadRng, words_lo: usize, words_hi: usize) -> String { +pub fn words(rng: &mut SmallRng, words_lo: usize, words_hi: usize) -> String { let nwords = rng.gen_range(words_lo..words_hi); lipsum::lipsum_words_with_rng(rng.clone(), nwords).replace(|c| "-\'\",*:".contains(c), "") } @@ -130,7 +130,7 @@ pub fn words(rng: &mut ThreadRng, words_lo: usize, words_hi: usize) -> String { /// /// Texts are composed of some paragraphs and empty lines between them. pub fn text( - rng: &mut ThreadRng, + rng: &mut SmallRng, paragraphs_lo: usize, paragraphs_hi: usize, lines_lo: usize, diff --git a/bench/tools/gen_large_yaml/src/main.rs b/bench/tools/gen_large_yaml/src/main.rs index 1c7ee6e..86423bf 100644 --- a/bench/tools/gen_large_yaml/src/main.rs +++ b/bench/tools/gen_large_yaml/src/main.rs @@ -3,12 +3,11 @@ mod gen; mod nested; -use std::collections::HashMap; use std::fs::File; use std::io::BufWriter; use std::path::Path; -use rand::{rngs::ThreadRng, Rng}; +use rand::{rngs::SmallRng, Rng, SeedableRng}; /// The path into which the generated YAML files will be written. const OUTPUT_DIR: &str = "bench_yaml"; @@ -41,7 +40,10 @@ fn main() -> std::io::Result<()> { /// YAML Generator. struct Generator { /// The RNG state. - rng: ThreadRng, + /// + /// We don't need to be cryptographically secure. [`SmallRng`] also implements the + /// [`SeedableRng`] trait, allowing runs to be predictible. + rng: SmallRng, /// The stack of indentations. indents: Vec, } @@ -52,7 +54,7 @@ impl Generator { /// Create a new generator. fn new() -> Self { Generator { - rng: rand::thread_rng(), + rng: SmallRng::seed_from_u64(42), indents: vec![0], } } @@ -87,58 +89,61 @@ impl Generator { /// The `description` field is a long string and puts a lot of weight in plain scalar / block /// scalar parsing. fn gen_record_object(&mut self, writer: &mut W) -> std::io::Result<()> { - let mut fields = HashMap::>>::new(); - fields.insert( - "description".to_string(), - Box::new(|gen, w| { - write!(w, "|")?; - gen.push_indent(2); - gen.nl(w)?; - let indent = gen.indent(); - let text = gen::text(&mut gen.rng, 1, 9, 3, 8, 10, 20, 80 - indent); - gen.write_lines(w, &text)?; - gen.pop_indent(); - Ok(()) - }), - ); - - fields.insert( - "authors".to_string(), - Box::new(|gen, w| { - gen.push_indent(2); - gen.nl(w)?; - gen.gen_authors_array(w, 1, 10)?; - gen.pop_indent(); - Ok(()) - }), - ); - - fields.insert( - "hash".to_string(), - Box::new(|gen, w| write!(w, "{}", gen::hex_string(&mut gen.rng, 64))), - ); - fields.insert( - "version".to_string(), - Box::new(|gen, w| write!(w, "{}", gen::integer(&mut gen.rng, 1, 9))), - ); - fields.insert( - "home".to_string(), - Box::new(|gen, w| write!(w, "{}", gen::url(&mut gen.rng, "https", 0, 1, 0, 0, None))), - ); - fields.insert( - "repository".to_string(), - Box::new(|gen, w| write!(w, "{}", gen::url(&mut gen.rng, "git", 1, 4, 10, 20, None))), - ); - fields.insert( - "pdf".to_string(), - Box::new(|gen, w| { - write!( - w, - "{}", - gen::url(&mut gen.rng, "https", 1, 4, 10, 30, Some("pdf")) - ) - }), - ); + let fields: Vec<(String, Box>)> = vec![ + ( + "description".to_string(), + Box::new(|gen, w| { + write!(w, "|")?; + gen.push_indent(2); + gen.nl(w)?; + let indent = gen.indent(); + let text = gen::text(&mut gen.rng, 1, 9, 3, 8, 10, 20, 80 - indent); + gen.write_lines(w, &text)?; + gen.pop_indent(); + Ok(()) + }), + ), + ( + "authors".to_string(), + Box::new(|gen, w| { + gen.push_indent(2); + gen.nl(w)?; + gen.gen_authors_array(w, 1, 10)?; + gen.pop_indent(); + Ok(()) + }), + ), + ( + "hash".to_string(), + Box::new(|gen, w| write!(w, "{}", gen::hex_string(&mut gen.rng, 64))), + ), + ( + "version".to_string(), + Box::new(|gen, w| write!(w, "{}", gen::integer(&mut gen.rng, 1, 9))), + ), + ( + "home".to_string(), + Box::new(|gen, w| { + write!(w, "{}", gen::url(&mut gen.rng, "https", 0, 1, 0, 0, None)) + }), + ), + ( + "repository".to_string(), + Box::new(|gen, w| { + write!(w, "{}", gen::url(&mut gen.rng, "git", 1, 4, 10, 20, None)) + }), + ), + ( + "pdf".to_string(), + Box::new(|gen, w| { + write!( + w, + "{}", + gen::url(&mut gen.rng, "https", 1, 4, 10, 30, Some("pdf")) + ) + }), + ), + ]; self.gen_object(writer, fields) } @@ -154,15 +159,16 @@ impl Generator { /// Generate a small object with 2 string fields. fn gen_author_object(&mut self, writer: &mut W) -> std::io::Result<()> { - let mut fields = HashMap::>>::new(); - fields.insert( - "name".to_string(), - Box::new(|gen, w| write!(w, "{}", gen::full_name(&mut gen.rng, 10, 15))), - ); - fields.insert( - "email".to_string(), - Box::new(|gen, w| write!(w, "{}", gen::email(&mut gen.rng, 1, 9))), - ); + let fields: Vec<(String, Box>)> = vec![ + ( + "name".to_string(), + Box::new(|gen, w| write!(w, "{}", gen::full_name(&mut gen.rng, 10, 15))), + ), + ( + "email".to_string(), + Box::new(|gen, w| write!(w, "{}", gen::email(&mut gen.rng, 1, 9))), + ), + ]; self.gen_object(writer, fields) } @@ -193,7 +199,7 @@ impl Generator { fn gen_object( &mut self, writer: &mut W, - fields: HashMap>>, + fields: Vec<(String, Box>)>, ) -> std::io::Result<()> { let mut first = true; for (key, f) in fields { diff --git a/bench/tools/gen_large_yaml/src/nested.rs b/bench/tools/gen_large_yaml/src/nested.rs index f54b55c..db93ff9 100644 --- a/bench/tools/gen_large_yaml/src/nested.rs +++ b/bench/tools/gen_large_yaml/src/nested.rs @@ -1,6 +1,6 @@ use std::{cell::RefCell, rc::Rc}; -use rand::{rngs::ThreadRng, Rng}; +use rand::{rngs::SmallRng, Rng, SeedableRng}; /// Create a deep object with the given amount of nodes. pub fn create_deep_object( @@ -24,7 +24,10 @@ struct Tree { /// Array of all the nodes in the tree, including the root node. nodes: Vec>>, /// The RNG state. - rng: ThreadRng, + /// + /// We don't need to be cryptographically secure. [`SmallRng`] also implements the + /// [`SeedableRng`] trait, allowing runs to be predictible. + rng: SmallRng, } /// A node in a tree. @@ -40,7 +43,7 @@ impl Tree { Tree { root: root.clone(), nodes: vec![root], - rng: rand::thread_rng(), + rng: SmallRng::seed_from_u64(42), } } From 61e26deb9923907abc764b429c2c62a28a2c4b1e Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Thu, 21 Mar 2024 12:33:00 +0100 Subject: [PATCH 334/380] Run CI only on master pushes. --- saphyr/.github/workflows/ci.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/saphyr/.github/workflows/ci.yml b/saphyr/.github/workflows/ci.yml index 6a3354c..aa3a0fc 100644 --- a/saphyr/.github/workflows/ci.yml +++ b/saphyr/.github/workflows/ci.yml @@ -1,6 +1,10 @@ name: CI -on: [push, pull_request] +on: + pull_request: + push: + branches: + - master jobs: check: From 7d3825c049d55c4c3383ef4bd4080fe27e19297a Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Thu, 21 Mar 2024 12:37:10 +0100 Subject: [PATCH 335/380] Remove no longer needed test files. These tests were sucecssfully converted to Rust files to include and are no longer necessary in this repository. Should they become relevant again in the future, they can always be checked out from a previous commit. --- saphyr/tests/specs/cpp2rust.rb | 78 -- saphyr/tests/specs/handler_spec_test.cpp | 1532 ---------------------- saphyr/tests/specs/libyaml_fail-01.yaml | 6 - saphyr/tests/specs/libyaml_fail-02.yaml | 7 - saphyr/tests/specs/libyaml_fail-03.yaml | 5 - 5 files changed, 1628 deletions(-) delete mode 100755 saphyr/tests/specs/cpp2rust.rb delete mode 100644 saphyr/tests/specs/handler_spec_test.cpp delete mode 100644 saphyr/tests/specs/libyaml_fail-01.yaml delete mode 100644 saphyr/tests/specs/libyaml_fail-02.yaml delete mode 100644 saphyr/tests/specs/libyaml_fail-03.yaml diff --git a/saphyr/tests/specs/cpp2rust.rb b/saphyr/tests/specs/cpp2rust.rb deleted file mode 100755 index 25813c8..0000000 --- a/saphyr/tests/specs/cpp2rust.rb +++ /dev/null @@ -1,78 +0,0 @@ -#!/usr/bin/env ruby - -TEST_REGEX = /TEST_F\([a-zA-Z0-9_]+,\s+([a-zA-Z0-9_]+)\)/ - -DISABLED_TESTS = %w( - test_ex7_10_plain_characters - test_ex7_17_flow_mapping_separate_values - test_ex7_21_single_pair_implicit_entries - test_ex7_2_empty_nodes - test_ex8_2_block_indentation_header -) - -class Context - attr_accessor :name, :ev, :src - def initialize - @name = "" - @src = "" - @ev = [] - end -end - -class String - def snakecase - self - .gsub(/([A-Z]+)([A-Z][a-z])/, '\1_\2') - .gsub(/([a-z\d])([A-Z])/, '\1_\2') - .tr('-', '_') - .gsub(/\s/, '_') - .gsub(/__+/, '_') - .downcase - end -end - -ctx = nil - -tests = [] -IO.foreach(ARGV[0]) do |line| - line.strip! - if ctx - fail "unexpected TEST_F" if line =~ TEST_REGEX - if line =~ /^}/ - tests << ctx - ctx = nil - end - if line =~ /^EXPECT_CALL/ - fail 'not end with ;' unless line[-1] == ';' - v = line.gsub('(', ' ').gsub(')', ' ').split - ctx.ev << v[2] - end - else - next unless line =~ TEST_REGEX - name = $1 - next unless name =~ /^(Ex\d+_\d+)/ - str = $1.upcase - $stderr.puts "found #{name}" - ctx = Context.new - ctx.name = "test_#{name.snakecase}" - ctx.src = str - end -end - -# code gen -tests.each do |t| - next if t.ev.size == 0 - if DISABLED_TESTS.include? t.name - puts "#[allow(dead_code)]" - else - puts "#[test]" - end - puts "fn #{t.name}() {" - puts " let mut v = str_to_test_events(#{t.src}).into_iter();" - t.ev.each do |e| - puts " assert_next!(v, TestEvent::#{e});" - end - puts "}" - puts -end - diff --git a/saphyr/tests/specs/handler_spec_test.cpp b/saphyr/tests/specs/handler_spec_test.cpp deleted file mode 100644 index aa4f7ca..0000000 --- a/saphyr/tests/specs/handler_spec_test.cpp +++ /dev/null @@ -1,1532 +0,0 @@ -#include "handler_test.h" -#include "specexamples.h" // IWYU pragma: keep -#include "yaml-cpp/yaml.h" // IWYU pragma: keep - -#include "gmock/gmock.h" -#include "gtest/gtest.h" - -using ::testing::_; - -#define EXPECT_THROW_PARSER_EXCEPTION(statement, message) \ - ASSERT_THROW(statement, ParserException); \ - try { \ - statement; \ - } catch (const ParserException& e) { \ - EXPECT_EQ(e.msg, message); \ - } - -namespace YAML { -namespace { - -typedef HandlerTest HandlerSpecTest; - -TEST_F(HandlerSpecTest, Ex2_1_SeqScalars) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "Mark McGwire")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "Sammy Sosa")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "Ken Griffey")); - EXPECT_CALL(handler, OnSequenceEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex2_1); -} - -TEST_F(HandlerSpecTest, Ex2_2_MappingScalarsToScalars) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "hr")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "65")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "avg")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "0.278")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "rbi")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "147")); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex2_2); -} - -TEST_F(HandlerSpecTest, Ex2_3_MappingScalarsToSequences) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "american")); - EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "Boston Red Sox")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "Detroit Tigers")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "New York Yankees")); - EXPECT_CALL(handler, OnSequenceEnd()); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "national")); - EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "New York Mets")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "Chicago Cubs")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "Atlanta Braves")); - EXPECT_CALL(handler, OnSequenceEnd()); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex2_3); -} - -TEST_F(HandlerSpecTest, Ex2_4_SequenceOfMappings) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "name")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "Mark McGwire")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "hr")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "65")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "avg")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "0.278")); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "name")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "Sammy Sosa")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "hr")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "63")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "avg")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "0.288")); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnSequenceEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex2_4); -} - -TEST_F(HandlerSpecTest, Ex2_5_SequenceOfSequences) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Flow)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "name")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "hr")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "avg")); - EXPECT_CALL(handler, OnSequenceEnd()); - EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Flow)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "Mark McGwire")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "65")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "0.278")); - EXPECT_CALL(handler, OnSequenceEnd()); - EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Flow)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "Sammy Sosa")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "63")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "0.288")); - EXPECT_CALL(handler, OnSequenceEnd()); - EXPECT_CALL(handler, OnSequenceEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex2_5); -} - -TEST_F(HandlerSpecTest, Ex2_6_MappingOfMappings) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "Mark McGwire")); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Flow)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "hr")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "65")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "avg")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "0.278")); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "Sammy Sosa")); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Flow)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "hr")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "63")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "avg")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "0.288")); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex2_6); -} - -TEST_F(HandlerSpecTest, Ex2_7_TwoDocumentsInAStream) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "Mark McGwire")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "Sammy Sosa")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "Ken Griffey")); - EXPECT_CALL(handler, OnSequenceEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "Chicago Cubs")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "St Louis Cardinals")); - EXPECT_CALL(handler, OnSequenceEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex2_7); -} - -TEST_F(HandlerSpecTest, Ex2_8_PlayByPlayFeed) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "time")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "20:03:20")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "player")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "Sammy Sosa")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "action")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "strike (miss)")); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "time")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "20:03:47")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "player")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "Sammy Sosa")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "action")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "grand slam")); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex2_8); -} - -TEST_F(HandlerSpecTest, Ex2_9_SingleDocumentWithTwoComments) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "hr")); - EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "Mark McGwire")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "Sammy Sosa")); - EXPECT_CALL(handler, OnSequenceEnd()); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "rbi")); - EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "Sammy Sosa")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "Ken Griffey")); - EXPECT_CALL(handler, OnSequenceEnd()); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex2_9); -} - -TEST_F(HandlerSpecTest, Ex2_10_SimpleAnchor) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "hr")); - EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "Mark McGwire")); - EXPECT_CALL(handler, OnScalar(_, "?", 1, "Sammy Sosa")); - EXPECT_CALL(handler, OnSequenceEnd()); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "rbi")); - EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnAlias(_, 1)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "Ken Griffey")); - EXPECT_CALL(handler, OnSequenceEnd()); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex2_10); -} - -TEST_F(HandlerSpecTest, Ex2_11_MappingBetweenSequences) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "Detroit Tigers")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "Chicago cubs")); - EXPECT_CALL(handler, OnSequenceEnd()); - EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "2001-07-23")); - EXPECT_CALL(handler, OnSequenceEnd()); - EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Flow)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "New York Yankees")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "Atlanta Braves")); - EXPECT_CALL(handler, OnSequenceEnd()); - EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Flow)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "2001-07-02")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "2001-08-12")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "2001-08-14")); - EXPECT_CALL(handler, OnSequenceEnd()); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex2_11); -} - -TEST_F(HandlerSpecTest, Ex2_12_CompactNestedMapping) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "item")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "Super Hoop")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "quantity")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "1")); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "item")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "Basketball")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "quantity")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "4")); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "item")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "Big Shoes")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "quantity")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "1")); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnSequenceEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex2_12); -} - -TEST_F(HandlerSpecTest, Ex2_13_InLiteralsNewlinesArePreserved) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "\\//||\\/||\n// || ||__")); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex2_13); -} - -TEST_F(HandlerSpecTest, Ex2_14_InFoldedScalarsNewlinesBecomeSpaces) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "Mark McGwire's year was crippled by a knee injury.")); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex2_14); -} - -TEST_F(HandlerSpecTest, Ex2_15_FoldedNewlinesArePreservedForMoreIndentedAndBlankLines) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "Sammy Sosa completed another fine season with great stats.\n\n 63 Home Runs\n 0.288 Batting Average\n\nWhat a year!")); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex2_15); -} - -TEST_F(HandlerSpecTest, Ex2_16_IndentationDeterminesScope) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "name")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "Mark McGwire")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "accomplishment")); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "Mark set a major league home run record in 1998.\n")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "stats")); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "65 Home Runs\n0.278 Batting Average\n")); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex2_16); -} - -TEST_F(HandlerSpecTest, Ex2_17_QuotedScalars) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "unicode")); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "Sosa did fine.\xE2\x98\xBA")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "control")); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "\b1998\t1999\t2000\n")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "hex esc")); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "\x0d\x0a is \r\n")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "single")); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "\"Howdy!\" he cried.")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "quoted")); - EXPECT_CALL(handler, OnScalar(_, "!", 0, " # Not a 'comment'.")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "tie-fighter")); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "|\\-*-/|")); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex2_17); -} - -TEST_F(HandlerSpecTest, Ex2_18_MultiLineFlowScalars) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "plain")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "This unquoted scalar spans many lines.")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "quoted")); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "So does this quoted scalar.\n")); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex2_18); -} - -// TODO: 2.19 - 2.22 schema tags - -TEST_F(HandlerSpecTest, Ex2_23_VariousExplicitTags) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "not-date")); - EXPECT_CALL(handler, OnScalar(_, "tag:yaml.org,2002:str", 0, "2002-04-28")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "picture")); - EXPECT_CALL(handler, OnScalar(_, "tag:yaml.org,2002:binary", 0, "R0lGODlhDAAMAIQAAP//9/X\n17unp5WZmZgAAAOfn515eXv\nPz7Y6OjuDg4J+fn5OTk6enp\n56enmleECcgggoBADs=\n")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "application specific tag")); - EXPECT_CALL(handler, OnScalar(_, "!something", 0, "The semantics of the tag\nabove may be different for\ndifferent documents.")); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex2_23); -} - -TEST_F(HandlerSpecTest, Ex2_24_GlobalTags) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnSequenceStart(_, "tag:clarkevans.com,2002:shape", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnMapStart(_, "tag:clarkevans.com,2002:circle", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "center")); - EXPECT_CALL(handler, OnMapStart(_, "?", 1, EmitterStyle::Flow)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "x")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "73")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "y")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "129")); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "radius")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "7")); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnMapStart(_, "tag:clarkevans.com,2002:line", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "start")); - EXPECT_CALL(handler, OnAlias(_, 1)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "finish")); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Flow)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "x")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "89")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "y")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "102")); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnMapStart(_, "tag:clarkevans.com,2002:label", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "start")); - EXPECT_CALL(handler, OnAlias(_, 1)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "color")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "0xFFEEBB")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "text")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "Pretty vector drawing.")); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnSequenceEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex2_24); -} - -TEST_F(HandlerSpecTest, Ex2_25_UnorderedSets) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnMapStart(_, "tag:yaml.org,2002:set", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "Mark McGwire")); - EXPECT_CALL(handler, OnNull(_, 0)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "Sammy Sosa")); - EXPECT_CALL(handler, OnNull(_, 0)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "Ken Griffey")); - EXPECT_CALL(handler, OnNull(_, 0)); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex2_25); -} - -TEST_F(HandlerSpecTest, Ex2_26_OrderedMappings) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnSequenceStart(_, "tag:yaml.org,2002:omap", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "Mark McGwire")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "65")); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "Sammy Sosa")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "63")); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "Ken Griffey")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "58")); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnSequenceEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex2_26); -} - -TEST_F(HandlerSpecTest, Ex2_27_Invoice) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnMapStart(_, "tag:clarkevans.com,2002:invoice", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "invoice")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "34843")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "date")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "2001-01-23")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "bill-to")); - EXPECT_CALL(handler, OnMapStart(_, "?", 1, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "given")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "Chris")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "family")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "Dumars")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "address")); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "lines")); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "458 Walkman Dr.\nSuite #292\n")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "city")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "Royal Oak")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "state")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "MI")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "postal")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "48046")); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "ship-to")); - EXPECT_CALL(handler, OnAlias(_, 1)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "product")); - EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "sku")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "BL394D")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "quantity")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "4")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "description")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "Basketball")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "price")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "450.00")); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "sku")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "BL4438H")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "quantity")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "1")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "description")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "Super Hoop")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "price")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "2392.00")); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnSequenceEnd()); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "tax")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "251.42")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "total")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "4443.52")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "comments")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "Late afternoon is best. Backup contact is Nancy Billsmer @ 338-4338.")); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex2_27); -} - -TEST_F(HandlerSpecTest, Ex2_28_LogFile) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "Time")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "2001-11-23 15:01:42 -5")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "User")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "ed")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "Warning")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "This is an error message for the log file")); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "Time")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "2001-11-23 15:02:31 -5")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "User")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "ed")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "Warning")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "A slightly different error message.")); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "Date")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "2001-11-23 15:03:17 -5")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "User")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "ed")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "Fatal")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "Unknown variable \"bar\"")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "Stack")); - EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "file")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "TopClass.py")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "line")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "23")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "code")); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "x = MoreObject(\"345\\n\")\n")); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "file")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "MoreClass.py")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "line")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "58")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "code")); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "foo = bar")); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnSequenceEnd()); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex2_28); -} - -// TODO: 5.1 - 5.2 BOM - -TEST_F(HandlerSpecTest, Ex5_3_BlockStructureIndicators) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "sequence")); - EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "one")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "two")); - EXPECT_CALL(handler, OnSequenceEnd()); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "mapping")); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "sky")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "blue")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "sea")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "green")); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex5_3); -} - -TEST_F(HandlerSpecTest, Ex5_4_FlowStructureIndicators) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "sequence")); - EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Flow)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "one")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "two")); - EXPECT_CALL(handler, OnSequenceEnd()); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "mapping")); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Flow)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "sky")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "blue")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "sea")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "green")); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex5_4); -} - - -TEST_F(HandlerSpecTest, Ex5_6_NodePropertyIndicators) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "anchored")); - EXPECT_CALL(handler, OnScalar(_, "!local", 1, "value")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "alias")); - EXPECT_CALL(handler, OnAlias(_, 1)); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex5_6); -} - -TEST_F(HandlerSpecTest, Ex5_7_BlockScalarIndicators) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "literal")); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "some\ntext\n")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "folded")); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "some text\n")); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex5_7); -} - -TEST_F(HandlerSpecTest, Ex5_8_QuotedScalarIndicators) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "single")); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "text")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "double")); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "text")); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex5_8); -} - -// TODO: 5.9 directive -// TODO: 5.10 reserved indicator - -TEST_F(HandlerSpecTest, Ex5_11_LineBreakCharacters) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "Line break (no glyph)\nLine break (glyphed)\n")); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex5_11); -} - -TEST_F(HandlerSpecTest, Ex5_12_TabsAndSpaces) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "quoted")); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "Quoted\t")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "block")); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "void main() {\n\tprintf(\"Hello, world!\\n\");\n}")); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex5_12); -} - -TEST_F(HandlerSpecTest, Ex5_13_EscapedCharacters) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "Fun with \x5C \x22 \x07 \x08 \x1B \x0C \x0A \x0D \x09 \x0B \x00 \x20 \xA0 \x85 \xe2\x80\xa8 \xe2\x80\xa9 A A A")); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex5_13); -} - -TEST_F(HandlerSpecTest, Ex5_14_InvalidEscapedCharacters) { - EXPECT_THROW_PARSER_EXCEPTION(IgnoreParse(ex5_14), std::string(ErrorMsg::INVALID_ESCAPE) + "c"); -} - -TEST_F(HandlerSpecTest, Ex6_1_IndentationSpaces) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "Not indented")); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "By one space")); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "By four\n spaces\n")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "Flow style")); - EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Flow)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "By two")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "Also by two")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "Still by two")); - EXPECT_CALL(handler, OnSequenceEnd()); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex6_1); -} - -TEST_F(HandlerSpecTest, Ex6_2_IndentationIndicators) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "a")); - EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "b")); - EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "c")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "d")); - EXPECT_CALL(handler, OnSequenceEnd()); - EXPECT_CALL(handler, OnSequenceEnd()); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex6_2); -} - -TEST_F(HandlerSpecTest, Ex6_3_SeparationSpaces) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "foo")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "bar")); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "baz")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "baz")); - EXPECT_CALL(handler, OnSequenceEnd()); - EXPECT_CALL(handler, OnSequenceEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex6_3); -} - -TEST_F(HandlerSpecTest, Ex6_4_LinePrefixes) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "plain")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "text lines")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "quoted")); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "text lines")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "block")); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "text\n \tlines\n")); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex6_4); -} - -TEST_F(HandlerSpecTest, Ex6_5_EmptyLines) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "Folding")); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "Empty line\nas a line feed")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "Chomping")); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "Clipped empty lines\n")); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex6_5); -} - -TEST_F(HandlerSpecTest, Ex6_6_LineFolding) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "trimmed\n\n\nas space")); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex6_6); -} - -TEST_F(HandlerSpecTest, Ex6_7_BlockFolding) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "foo \n\n\t bar\n\nbaz\n")); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex6_7); -} - -TEST_F(HandlerSpecTest, Ex6_8_FlowFolding) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnScalar(_, "!", 0, " foo\nbar\nbaz ")); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex6_8); -} - -TEST_F(HandlerSpecTest, Ex6_9_SeparatedComment) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "key")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "value")); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex6_9); -} - - -TEST_F(HandlerSpecTest, _MultiLineComments) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "key")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "value")); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex6_11); -} - -TEST_F(HandlerSpecTest, Ex6_12_SeparationSpacesII) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Flow)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "first")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "Sammy")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "last")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "Sosa")); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "hr")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "65")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "avg")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "0.278")); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex6_12); -} - -TEST_F(HandlerSpecTest, Ex6_13_ReservedDirectives) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "foo")); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex6_13); -} - -TEST_F(HandlerSpecTest, Ex6_14_YAMLDirective) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "foo")); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex6_14); -} - -TEST_F(HandlerSpecTest, Ex6_15_InvalidRepeatedYAMLDirective) { - EXPECT_THROW_PARSER_EXCEPTION(IgnoreParse(ex6_15), ErrorMsg::REPEATED_YAML_DIRECTIVE); -} - -TEST_F(HandlerSpecTest, Ex6_16_TagDirective) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnScalar(_, "tag:yaml.org,2002:str", 0, "foo")); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex6_16); -} - -TEST_F(HandlerSpecTest, Ex6_17_InvalidRepeatedTagDirective) { - EXPECT_THROW_PARSER_EXCEPTION(IgnoreParse(ex6_17), ErrorMsg::REPEATED_TAG_DIRECTIVE); -} - -TEST_F(HandlerSpecTest, Ex6_18_PrimaryTagHandle) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnScalar(_, "!foo", 0, "bar")); - EXPECT_CALL(handler, OnDocumentEnd()); - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnScalar(_, "tag:example.com,2000:app/foo", 0, "bar")); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex6_18); -} - -TEST_F(HandlerSpecTest, Ex6_19_SecondaryTagHandle) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnScalar(_, "tag:example.com,2000:app/int", 0, "1 - 3")); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex6_19); -} - -TEST_F(HandlerSpecTest, Ex6_20_TagHandles) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnScalar(_, "tag:example.com,2000:app/foo", 0, "bar")); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex6_20); -} - -TEST_F(HandlerSpecTest, Ex6_21_LocalTagPrefix) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnScalar(_, "!my-light", 0, "fluorescent")); - EXPECT_CALL(handler, OnDocumentEnd()); - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnScalar(_, "!my-light", 0, "green")); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex6_21); -} - -TEST_F(HandlerSpecTest, Ex6_22_GlobalTagPrefix) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "tag:example.com,2000:app/foo", 0, "bar")); - EXPECT_CALL(handler, OnSequenceEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex6_22); -} - -TEST_F(HandlerSpecTest, Ex6_23_NodeProperties) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "tag:yaml.org,2002:str", 1, "foo")); - EXPECT_CALL(handler, OnScalar(_, "tag:yaml.org,2002:str", 0, "bar")); - EXPECT_CALL(handler, OnScalar(_, "?", 2, "baz")); - EXPECT_CALL(handler, OnAlias(_, 1)); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex6_23); -} - -TEST_F(HandlerSpecTest, Ex6_24_VerbatimTags) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "tag:yaml.org,2002:str", 0, "foo")); - EXPECT_CALL(handler, OnScalar(_, "!bar", 0, "baz")); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex6_24); -} - -// TODO: Implement -TEST_F(HandlerSpecTest, DISABLED_Ex6_25_InvalidVerbatimTags) { - Parse(ex6_25); - FAIL() << "not implemented yet"; -} - -TEST_F(HandlerSpecTest, Ex6_26_TagShorthands) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "!local", 0, "foo")); - EXPECT_CALL(handler, OnScalar(_, "tag:yaml.org,2002:str", 0, "bar")); - EXPECT_CALL(handler, OnScalar(_, "tag:example.com,2000:app/tag%21", 0, "baz")); - EXPECT_CALL(handler, OnSequenceEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex6_26); -} - -TEST_F(HandlerSpecTest, Ex6_27a_InvalidTagShorthands) { - EXPECT_THROW_PARSER_EXCEPTION(IgnoreParse(ex6_27a), ErrorMsg::TAG_WITH_NO_SUFFIX); -} - -// TODO: should we reject this one (since !h! is not declared)? -TEST_F(HandlerSpecTest, DISABLED_Ex6_27b_InvalidTagShorthands) { - Parse(ex6_27b); - FAIL() << "not implemented yet"; -} - -TEST_F(HandlerSpecTest, Ex6_28_NonSpecificTags) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "12")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "12")); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "12")); - EXPECT_CALL(handler, OnSequenceEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex6_28); -} - -TEST_F(HandlerSpecTest, Ex6_29_NodeAnchors) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "First occurrence")); - EXPECT_CALL(handler, OnScalar(_, "?", 1, "Value")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "Second occurrence")); - EXPECT_CALL(handler, OnAlias(_, 1)); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex6_29); -} - -TEST_F(HandlerSpecTest, Ex7_1_AliasNodes) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "First occurrence")); - EXPECT_CALL(handler, OnScalar(_, "?", 1, "Foo")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "Second occurrence")); - EXPECT_CALL(handler, OnAlias(_, 1)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "Override anchor")); - EXPECT_CALL(handler, OnScalar(_, "?", 2, "Bar")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "Reuse anchor")); - EXPECT_CALL(handler, OnAlias(_, 2)); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex7_1); -} - -TEST_F(HandlerSpecTest, Ex7_2_EmptyNodes) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Flow)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "foo")); - EXPECT_CALL(handler, OnScalar(_, "tag:yaml.org,2002:str", 0, "")); - EXPECT_CALL(handler, OnScalar(_, "tag:yaml.org,2002:str", 0, "")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "bar")); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex7_2); -} - -TEST_F(HandlerSpecTest, Ex7_3_CompletelyEmptyNodes) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Flow)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "foo")); - EXPECT_CALL(handler, OnNull(_, 0)); - EXPECT_CALL(handler, OnNull(_, 0)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "bar")); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex7_3); -} - -TEST_F(HandlerSpecTest, Ex7_4_DoubleQuotedImplicitKeys) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "implicit block key")); - EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Flow)); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Flow)); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "implicit flow key")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "value")); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnSequenceEnd()); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex7_4); -} - -TEST_F(HandlerSpecTest, Ex7_5_DoubleQuotedLineBreaks) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "folded to a space,\nto a line feed, or \t \tnon-content")); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex7_5); -} - -TEST_F(HandlerSpecTest, Ex7_6_DoubleQuotedLines) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnScalar(_, "!", 0, " 1st non-empty\n2nd non-empty 3rd non-empty ")); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex7_6); -} - -TEST_F(HandlerSpecTest, Ex7_7_SingleQuotedCharacters) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "here's to \"quotes\"")); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex7_7); -} - -TEST_F(HandlerSpecTest, Ex7_8_SingleQuotedImplicitKeys) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "implicit block key")); - EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Flow)); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Flow)); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "implicit flow key")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "value")); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnSequenceEnd()); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex7_8); -} - -TEST_F(HandlerSpecTest, Ex7_9_SingleQuotedLines) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnScalar(_, "!", 0, " 1st non-empty\n2nd non-empty 3rd non-empty ")); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex7_9); -} - -TEST_F(HandlerSpecTest, Ex7_10_PlainCharacters) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "::vector")); - EXPECT_CALL(handler, OnScalar(_, "!", 0, ": - ()")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "Up, up, and away!")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "-123")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "http://example.com/foo#bar")); - EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Flow)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "::vector")); - EXPECT_CALL(handler, OnScalar(_, "!", 0, ": - ()")); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "Up, up, and away!")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "-123")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "http://example.com/foo#bar")); - EXPECT_CALL(handler, OnSequenceEnd()); - EXPECT_CALL(handler, OnSequenceEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex7_10); -} - -TEST_F(HandlerSpecTest, Ex7_11_PlainImplicitKeys) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "implicit block key")); - EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Flow)); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Flow)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "implicit flow key")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "value")); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnSequenceEnd()); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex7_11); -} - -TEST_F(HandlerSpecTest, Ex7_12_PlainLines) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "1st non-empty\n2nd non-empty 3rd non-empty")); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex7_12); -} - -TEST_F(HandlerSpecTest, Ex7_13_FlowSequence) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Flow)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "one")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "two")); - EXPECT_CALL(handler, OnSequenceEnd()); - EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Flow)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "three")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "four")); - EXPECT_CALL(handler, OnSequenceEnd()); - EXPECT_CALL(handler, OnSequenceEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex7_13); -} - -TEST_F(HandlerSpecTest, Ex7_14_FlowSequenceEntries) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Flow)); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "double quoted")); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "single quoted")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "plain text")); - EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Flow)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "nested")); - EXPECT_CALL(handler, OnSequenceEnd()); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Flow)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "single")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "pair")); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnSequenceEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex7_14); -} - -TEST_F(HandlerSpecTest, Ex7_15_FlowMappings) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Flow)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "one")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "two")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "three")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "four")); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Flow)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "five")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "six")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "seven")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "eight")); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnSequenceEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex7_15); -} - -TEST_F(HandlerSpecTest, Ex7_16_FlowMappingEntries) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Flow)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "explicit")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "entry")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "implicit")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "entry")); - EXPECT_CALL(handler, OnNull(_, 0)); - EXPECT_CALL(handler, OnNull(_, 0)); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex7_16); -} - -TEST_F(HandlerSpecTest, Ex7_17_FlowMappingSeparateValues) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Flow)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "unquoted")); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "separate")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "http://foo.com")); - EXPECT_CALL(handler, OnNull(_, 0)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "omitted value")); - EXPECT_CALL(handler, OnNull(_, 0)); - EXPECT_CALL(handler, OnNull(_, 0)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "omitted key")); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex7_17); -} - -TEST_F(HandlerSpecTest, Ex7_18_FlowMappingAdjacentValues) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Flow)); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "adjacent")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "value")); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "readable")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "value")); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "empty")); - EXPECT_CALL(handler, OnNull(_, 0)); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex7_18); -} - -TEST_F(HandlerSpecTest, Ex7_19_SinglePairFlowMappings) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Flow)); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Flow)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "foo")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "bar")); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnSequenceEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex7_19); -} - -TEST_F(HandlerSpecTest, Ex7_20_SinglePairExplicitEntry) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Flow)); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Flow)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "foo bar")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "baz")); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnSequenceEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex7_20); -} - -TEST_F(HandlerSpecTest, Ex7_21_SinglePairImplicitEntries) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Flow)); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Flow)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "YAML")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "separate")); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnSequenceEnd()); - EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Flow)); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Default)); - EXPECT_CALL(handler, OnNull(_, 0)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "empty key entry")); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnSequenceEnd()); - EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Flow)); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Flow)); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Flow)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "JSON")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "like")); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "adjacent")); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnSequenceEnd()); - EXPECT_CALL(handler, OnSequenceEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex7_21); -} - -TEST_F(HandlerSpecTest, Ex7_22_InvalidImplicitKeys) { - EXPECT_THROW_PARSER_EXCEPTION(IgnoreParse(ex7_22), ErrorMsg::END_OF_SEQ_FLOW); -} - -TEST_F(HandlerSpecTest, Ex7_23_FlowContent) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Flow)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "a")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "b")); - EXPECT_CALL(handler, OnSequenceEnd()); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Flow)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "a")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "b")); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "a")); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "b")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "c")); - EXPECT_CALL(handler, OnSequenceEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex7_23); -} - -TEST_F(HandlerSpecTest, Ex7_24_FlowNodes) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "tag:yaml.org,2002:str", 0, "a")); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "b")); - EXPECT_CALL(handler, OnScalar(_, "!", 1, "c")); - EXPECT_CALL(handler, OnAlias(_, 1)); - EXPECT_CALL(handler, OnScalar(_, "tag:yaml.org,2002:str", 0, "")); - EXPECT_CALL(handler, OnSequenceEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex7_24); -} - -TEST_F(HandlerSpecTest, Ex8_1_BlockScalarHeader) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "literal\n")); - EXPECT_CALL(handler, OnScalar(_, "!", 0, " folded\n")); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "keep\n\n")); - EXPECT_CALL(handler, OnScalar(_, "!", 0, " strip")); - EXPECT_CALL(handler, OnSequenceEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex8_1); -} - -TEST_F(HandlerSpecTest, Ex8_2_BlockIndentationHeader) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "detected\n")); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "\n\n# detected\n")); - EXPECT_CALL(handler, OnScalar(_, "!", 0, " explicit\n")); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "\t\ndetected\n")); - EXPECT_CALL(handler, OnSequenceEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex8_2); -} - -TEST_F(HandlerSpecTest, Ex8_3a_InvalidBlockScalarIndentationIndicators) { - EXPECT_THROW_PARSER_EXCEPTION(IgnoreParse(ex8_3a), ErrorMsg::END_OF_SEQ); -} - -TEST_F(HandlerSpecTest, Ex8_3b_InvalidBlockScalarIndentationIndicators) { - EXPECT_THROW_PARSER_EXCEPTION(IgnoreParse(ex8_3b), ErrorMsg::END_OF_SEQ); -} - -TEST_F(HandlerSpecTest, Ex8_3c_InvalidBlockScalarIndentationIndicators) { - EXPECT_THROW_PARSER_EXCEPTION(IgnoreParse(ex8_3c), ErrorMsg::END_OF_SEQ); -} - -TEST_F(HandlerSpecTest, Ex8_4_ChompingFinalLineBreak) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "strip")); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "text")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "clip")); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "text\n")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "keep")); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "text\n")); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex8_4); -} - -TEST_F(HandlerSpecTest, DISABLED_Ex8_5_ChompingTrailingLines) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "strip")); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "# text")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "clip")); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "# text\n")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "keep")); - // NOTE: I believe this is a bug in the YAML spec - - // it should be "# text\n\n" - EXPECT_CALL(handler, OnScalar(_, "!", 0, "# text\n")); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex8_5); -} - -TEST_F(HandlerSpecTest, Ex8_6_EmptyScalarChomping) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "strip")); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "clip")); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "keep")); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "\n")); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex8_6); -} - -TEST_F(HandlerSpecTest, Ex8_7_LiteralScalar) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "literal\n\ttext\n")); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex8_7); -} - -TEST_F(HandlerSpecTest, Ex8_8_LiteralContent) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "\n\nliteral\n \n\ntext\n")); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex8_8); -} - -TEST_F(HandlerSpecTest, Ex8_9_FoldedScalar) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "folded text\n")); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex8_9); -} - -TEST_F(HandlerSpecTest, Ex8_10_FoldedLines) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "\nfolded line\nnext line\n * bullet\n\n * list\n * lines\n\nlast line\n")); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex8_10); -} - -TEST_F(HandlerSpecTest, Ex8_11_MoreIndentedLines) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "\nfolded line\nnext line\n * bullet\n\n * list\n * lines\n\nlast line\n")); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex8_11); -} - -TEST_F(HandlerSpecTest, Ex8_12_EmptySeparationLines) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "\nfolded line\nnext line\n * bullet\n\n * list\n * lines\n\nlast line\n")); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex8_12); -} - -TEST_F(HandlerSpecTest, Ex8_13_FinalEmptyLines) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "\nfolded line\nnext line\n * bullet\n\n * list\n * lines\n\nlast line\n")); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex8_13); -} - -TEST_F(HandlerSpecTest, Ex8_14_BlockSequence) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "block sequence")); - EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "one")); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "two")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "three")); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnSequenceEnd()); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex8_14); -} - -TEST_F(HandlerSpecTest, Ex8_15_BlockSequenceEntryTypes) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnNull(_, 0)); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "block node\n")); - EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "one")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "two")); - EXPECT_CALL(handler, OnSequenceEnd()); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "one")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "two")); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnSequenceEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex8_15); -} - -TEST_F(HandlerSpecTest, Ex8_16_BlockMappings) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "block mapping")); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "key")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "value")); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex8_16); -} - -TEST_F(HandlerSpecTest, Ex8_17_ExplicitBlockMappingEntries) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "explicit key")); - EXPECT_CALL(handler, OnNull(_, 0)); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "block key\n")); - EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "one")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "two")); - EXPECT_CALL(handler, OnSequenceEnd()); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex8_17); -} - -TEST_F(HandlerSpecTest, Ex8_18_ImplicitBlockMappingEntries) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "plain key")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "in-line value")); - EXPECT_CALL(handler, OnNull(_, 0)); - EXPECT_CALL(handler, OnNull(_, 0)); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "quoted key")); - EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "entry")); - EXPECT_CALL(handler, OnSequenceEnd()); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex8_18); -} - -TEST_F(HandlerSpecTest, Ex8_19_CompactBlockMappings) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "sun")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "yellow")); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "earth")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "blue")); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "moon")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "white")); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnSequenceEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex8_19); -} - -TEST_F(HandlerSpecTest, Ex8_20_BlockNodeTypes) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "flow in block")); - EXPECT_CALL(handler, OnScalar(_, "!", 0, "Block scalar\n")); - EXPECT_CALL(handler, OnMapStart(_, "tag:yaml.org,2002:map", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "foo")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "bar")); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnSequenceEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex8_20); -} - -TEST_F(HandlerSpecTest, DISABLED_Ex8_21_BlockScalarNodes) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "literal")); - // NOTE: I believe this is a bug in the YAML spec - // - it should be "value\n" - EXPECT_CALL(handler, OnScalar(_, "!", 0, "value")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "folded")); - EXPECT_CALL(handler, OnScalar(_, "!foo", 0, "value")); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex8_21); -} - -TEST_F(HandlerSpecTest, Ex8_22_BlockCollectionNodes) { - EXPECT_CALL(handler, OnDocumentStart(_)); - EXPECT_CALL(handler, OnMapStart(_, "?", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "sequence")); - EXPECT_CALL(handler, OnSequenceStart(_, "tag:yaml.org,2002:seq", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "entry")); - EXPECT_CALL(handler, OnSequenceStart(_, "tag:yaml.org,2002:seq", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "nested")); - EXPECT_CALL(handler, OnSequenceEnd()); - EXPECT_CALL(handler, OnSequenceEnd()); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "mapping")); - EXPECT_CALL(handler, OnMapStart(_, "tag:yaml.org,2002:map", 0, EmitterStyle::Block)); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "foo")); - EXPECT_CALL(handler, OnScalar(_, "?", 0, "bar")); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnMapEnd()); - EXPECT_CALL(handler, OnDocumentEnd()); - Parse(ex8_22); -} -} -} diff --git a/saphyr/tests/specs/libyaml_fail-01.yaml b/saphyr/tests/specs/libyaml_fail-01.yaml deleted file mode 100644 index 5e6c0dc..0000000 --- a/saphyr/tests/specs/libyaml_fail-01.yaml +++ /dev/null @@ -1,6 +0,0 @@ -# Ex 8.18 -plain key: in-line value -: # Both empty -"quoted key": -- entry - diff --git a/saphyr/tests/specs/libyaml_fail-02.yaml b/saphyr/tests/specs/libyaml_fail-02.yaml deleted file mode 100644 index 60074de..0000000 --- a/saphyr/tests/specs/libyaml_fail-02.yaml +++ /dev/null @@ -1,7 +0,0 @@ -# Ex 7.17 -{ -unqoted : "separate", -http://foo.com, -omitted value:, -: omitted key, -} diff --git a/saphyr/tests/specs/libyaml_fail-03.yaml b/saphyr/tests/specs/libyaml_fail-03.yaml deleted file mode 100644 index fc821dc..0000000 --- a/saphyr/tests/specs/libyaml_fail-03.yaml +++ /dev/null @@ -1,5 +0,0 @@ -# ex 7.2 -{ - foo : !!str, - !!str : bar, -} From 15049196e03fa2f4157302254d9b04a82d215619 Mon Sep 17 00:00:00 2001 From: Gregor Purdy Date: Thu, 21 Mar 2024 05:48:47 -0700 Subject: [PATCH 336/380] Typo fix --- saphyr/documents/2024-03-15-FirstRelease.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/saphyr/documents/2024-03-15-FirstRelease.md b/saphyr/documents/2024-03-15-FirstRelease.md index 519a50d..18bd942 100644 --- a/saphyr/documents/2024-03-15-FirstRelease.md +++ b/saphyr/documents/2024-03-15-FirstRelease.md @@ -55,7 +55,7 @@ Here is in the end the performance breakdown: ![Comparison of the performance between `yaml-rust`, `yaml-rust2` and the C `libfyaml`. `yaml-rust2` is faster in every test than `yaml-rust`, but `libfyaml` remains faster overall.](./img/benchmarks-v0.6.svg) -Here is a shot description of what the files contain: +Here is a short description of what the files contain: * `big`: A large array of records with few fields. One of the fields is a description, a large text block scalar spanning multiple lines. Most of the scanning happens in block scalars. From 759db28bfbecde8ebc20347d8637c61f945e30d8 Mon Sep 17 00:00:00 2001 From: Gregor Purdy Date: Thu, 21 Mar 2024 05:51:01 -0700 Subject: [PATCH 337/380] Typo fix 2 in 2024-03-15-FirstRelease.md --- saphyr/documents/2024-03-15-FirstRelease.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/saphyr/documents/2024-03-15-FirstRelease.md b/saphyr/documents/2024-03-15-FirstRelease.md index 18bd942..696712f 100644 --- a/saphyr/documents/2024-03-15-FirstRelease.md +++ b/saphyr/documents/2024-03-15-FirstRelease.md @@ -70,7 +70,7 @@ I'd like to end this section with a small disclaimer: I am not a benchmark exper of files that would highlight how the parser performs when stressed different ways. I invite you to take a look at [the code generating the YAML files](https://github.com/Ethiraric/yaml-rust2/tree/master/tools/gen_large_yaml) and, if you are more knowledgeable than I am, improve upon them. `yaml-rust2` performs better with these files because those are the -ones I could work with. If you find a fil with which `yaml-rust2` is slower than `yaml-rust`, do file an issue! +ones I could work with. If you find a file with which `yaml-rust2` is slower than `yaml-rust`, do file an issue! ## This release ### Improvements from `yaml-rust` From 5b1af84dbec4be96bee1da9c80c71791351566c8 Mon Sep 17 00:00:00 2001 From: Gregor Purdy Date: Thu, 21 Mar 2024 05:52:07 -0700 Subject: [PATCH 338/380] Typo fix 3 in 2024-03-15-FirstRelease.md --- saphyr/documents/2024-03-15-FirstRelease.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/saphyr/documents/2024-03-15-FirstRelease.md b/saphyr/documents/2024-03-15-FirstRelease.md index 696712f..55d1d73 100644 --- a/saphyr/documents/2024-03-15-FirstRelease.md +++ b/saphyr/documents/2024-03-15-FirstRelease.md @@ -79,7 +79,7 @@ This release should improve over `yaml-rust` over 3 major points: * Performance: We all love fast software. I want to help you achieve it. I haven't managed to make this crate twice as fast, but you should notice a 15-20% improvement in performance. * Compliance: You may not notice it, since I didn't know most of the bugs I fixed were bugs to begin with, but this - crate should now be fully YAML-comliant. + crate should now be fully YAML-compliant. * Documentation: The documentation of `yaml-rust` is unfortunately incomplete. Documentation here is not exhaustive, but most items are documented. Notably, private items are documented, making it much easier to understand where something happens. There are also in-code comments that help figure out what is going on under the hood. From 6052436852e4ade7fe5b0d8f7397f9e1f9a1e0d7 Mon Sep 17 00:00:00 2001 From: Gregor Purdy Date: Thu, 21 Mar 2024 05:56:05 -0700 Subject: [PATCH 339/380] Typo fix 4 in 2024-03-15-FirstRelease.md --- saphyr/documents/2024-03-15-FirstRelease.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/saphyr/documents/2024-03-15-FirstRelease.md b/saphyr/documents/2024-03-15-FirstRelease.md index 55d1d73..0dc1af1 100644 --- a/saphyr/documents/2024-03-15-FirstRelease.md +++ b/saphyr/documents/2024-03-15-FirstRelease.md @@ -153,7 +153,7 @@ Work on this crate is far from over. I will try and match `libfyaml`'s performan against it, and I wouldn't have guessed it to outperform `yaml-rust2` that much. If you're interested in upgrading your `yaml-rust` crate, please do take a look at [davvid](https://github.com/davvid)'s -[fork of `yaml-rust`](https://github.com/davvid/yaml-rust). Very recent developements on this crate sparked from an +[fork of `yaml-rust`](https://github.com/davvid/yaml-rust). Very recent developments on this crate sparked from an [issue on advisory-db](https://github.com/rustsec/advisory-db/issues/1921) about the unmaintained state of `yaml-rust`. I hope it will be that YAML in Rust will improve following this issue. From eddea11e014e0ec58816558dab0dc4c6190771c9 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Sat, 23 Mar 2024 16:34:35 +0100 Subject: [PATCH 340/380] Switch from `encoding` to `encoding_rs`. See https://github.com/rustsec/advisory-db/issues/1605. --- saphyr/CHANGELOG.md | 22 +++++++ saphyr/Cargo.toml | 2 +- saphyr/src/yaml.rs | 150 ++++++++++++++++++++++++++++++++++++++------ 3 files changed, 155 insertions(+), 19 deletions(-) diff --git a/saphyr/CHANGELOG.md b/saphyr/CHANGELOG.md index c348e51..20c4678 100644 --- a/saphyr/CHANGELOG.md +++ b/saphyr/CHANGELOG.md @@ -1,5 +1,27 @@ # Changelog +## Upcoming +### Breaking changes + - The `encoding` library has been replaced with `encoding_rs`. If you use the + `trap` of `YamlDecoder`, this change will make your code not compile. + An additional enum `YamlDecoderTrap` has been added to abstract the + underlying library and avoid breaking changes in the future. This + additionally lifts the `encoding` dependency on _your_ project if you were + using that feature. + - The `encoding::types::DecoderTrap` has been replaced with `YamlDecoderTrap`. + - The signature of the function for `YamlDecoderTrap::Call` has changed: + ```rs + // Before, with `encoding::types::DecoderTrap::Call` + fn(_: &mut encoding::RawDecoder, _: &[u8], _: &mut encoding::StringWriter) -> bool; + // Now, with `YamlDecoderTrap::Call` + fn(_: u8, _: u8, _: &[u8], _: &mut String) -> ControlFlow>; + ``` + Please refer to the `YamlDecoderTrapFn` documentation for more details. + +**Features**: + +**Development**: + ## v0.7.0 **Features**: diff --git a/saphyr/Cargo.toml b/saphyr/Cargo.toml index bf96f58..c0bc62e 100644 --- a/saphyr/Cargo.toml +++ b/saphyr/Cargo.toml @@ -15,7 +15,7 @@ edition = "2021" [dependencies] arraydeque = "0.5.1" -encoding = "0.2" +encoding_rs = "0.8.33" hashlink = "0.8" [dev-dependencies] diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index e310795..2ef4376 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -2,8 +2,11 @@ #![allow(clippy::module_name_repetitions)] +use std::borrow::Cow; +use std::ops::ControlFlow; use std::{collections::BTreeMap, convert::TryFrom, mem, ops::Index}; +use encoding_rs::{Decoder, DecoderResult, Encoding}; use hashlink::LinkedHashMap; use crate::parser::{Event, MarkedEventReceiver, Parser, Tag}; @@ -238,11 +241,51 @@ impl YamlLoader { } } +/// The signature of the function to call when using [`YAMLDecodingTrap::Call`]. +/// +/// The arguments are as follows: +/// * `malformation_length`: The length of the sequence the decoder failed to decode. +/// * `bytes_read_after_malformation`: The number of lookahead bytes the decoder consumed after +/// the malformation. +/// * `input_at_malformation`: What the input buffer is at the malformation. +/// This is the buffer starting at the malformation. The first `malformation_length` bytes are +/// the problematic sequence. The following `bytes_read_after_malformation` are already stored +/// in the decoder and will not be re-fed. +/// * `output`: The output string. +/// +/// The function must modify `output` as it feels is best. For instance, one could recreate the +/// behavior of [`YAMLDecodingTrap::Ignore`] with an empty function, [`YAMLDecodingTrap::Replace`] +/// by pushing a `\u{FFFD}` into `output` and [`YAMLDecodingTrap::Strict`] by returning +/// [`ControlFlow::Break`]. +/// +/// # Returns +/// The function must return [`ControlFlow::Continue`] if decoding may continue or +/// [`ControlFlow::Break`] if decoding must be aborted. An optional error string may be supplied. +pub type YAMLDecodingTrapFn = fn( + malformation_length: u8, + bytes_read_after_malformation: u8, + input_at_malformation: &[u8], + output: &mut String, +) -> ControlFlow>; + +/// The behavior [`YamlDecoder`] must have when an decoding error occurs. +#[derive(Copy, Clone, PartialEq, Eq)] +pub enum YAMLDecodingTrap { + /// Ignore the offending bytes, remove them from the output. + Ignore, + /// Error out. + Strict, + /// Replace them with the Unicode REPLACEMENT CHARACTER. + Replace, + /// Call the user-supplied function upon decoding malformation. + Call(YAMLDecodingTrapFn), +} + /// `YamlDecoder` is a `YamlLoader` builder that allows you to supply your own encoding error trap. /// For example, to read a YAML file while ignoring Unicode decoding errors you can set the /// `encoding_trap` to `encoding::DecoderTrap::Ignore`. /// ```rust -/// use yaml_rust2::yaml::YamlDecoder; +/// use yaml_rust2::yaml::{YamlDecoder, YAMLDecodingTrap}; /// /// let string = b"--- /// a\xa9: 1 @@ -250,13 +293,13 @@ impl YamlLoader { /// c: [1, 2] /// "; /// let out = YamlDecoder::read(string as &[u8]) -/// .encoding_trap(encoding::DecoderTrap::Ignore) +/// .encoding_trap(YAMLDecodingTrap::Ignore) /// .decode() /// .unwrap(); /// ``` pub struct YamlDecoder { source: T, - trap: encoding::types::DecoderTrap, + trap: YAMLDecodingTrap, } impl YamlDecoder { @@ -264,12 +307,12 @@ impl YamlDecoder { pub fn read(source: T) -> YamlDecoder { YamlDecoder { source, - trap: encoding::DecoderTrap::Strict, + trap: YAMLDecodingTrap::Strict, } } /// Set the behavior of the decoder when the encoding is invalid. - pub fn encoding_trap(&mut self, trap: encoding::types::DecoderTrap) -> &mut Self { + pub fn encoding_trap(&mut self, trap: YAMLDecodingTrap) -> &mut Self { self.trap = trap; self } @@ -282,13 +325,84 @@ impl YamlDecoder { let mut buffer = Vec::new(); self.source.read_to_end(&mut buffer)?; - // Decodes the input buffer using either UTF-8, UTF-16LE or UTF-16BE depending on the BOM codepoint. - // If the buffer doesn't start with a BOM codepoint, it will use a fallback encoding obtained by - // detect_utf16_endianness. - let (res, _) = - encoding::types::decode(&buffer, self.trap, detect_utf16_endianness(&buffer)); - let s = res.map_err(LoadError::Decode)?; - YamlLoader::load_from_str(&s).map_err(LoadError::Scan) + // Check if the `encoding` library can detect encoding from the BOM, otherwise use + // `detect_utf16_endianness`. + let (encoding, _) = + Encoding::for_bom(&buffer).unwrap_or_else(|| (detect_utf16_endianness(&buffer), 2)); + let mut decoder = encoding.new_decoder(); + let mut output = String::new(); + + // Decode the input buffer. + decode_loop(&buffer, &mut output, &mut decoder, self.trap)?; + + YamlLoader::load_from_str(&output).map_err(LoadError::Scan) + } +} + +/// Perform a loop of [`Decoder::decode_to_string`], reallocating `output` if needed. +fn decode_loop( + input: &[u8], + output: &mut String, + decoder: &mut Decoder, + trap: YAMLDecodingTrap, +) -> Result<(), LoadError> { + output.reserve(input.len()); + let mut total_bytes_read = 0; + + loop { + match decoder.decode_to_string_without_replacement(&input[total_bytes_read..], output, true) + { + // If the input is empty, we processed the whole input. + (DecoderResult::InputEmpty, _) => break Ok(()), + // If the output is full, we must reallocate. + (DecoderResult::OutputFull, bytes_read) => { + total_bytes_read += bytes_read; + // The output is already reserved to the size of the input. We slowly resize. Here, + // we're expecting that 10% of bytes will double in size when converting to UTF-8. + output.reserve(input.len() / 10); + } + (DecoderResult::Malformed(malformed_len, bytes_after_malformed), bytes_read) => { + total_bytes_read += bytes_read; + match trap { + // Ignore (skip over) malformed character. + YAMLDecodingTrap::Ignore => {} + // Replace them with the Unicode REPLACEMENT CHARACTER. + YAMLDecodingTrap::Replace => { + output.push('\u{FFFD}'); + } + // Otherwise error, getting as much context as possible. + YAMLDecodingTrap::Strict => { + let malformed_len = malformed_len as usize; + let bytes_after_malformed = bytes_after_malformed as usize; + let byte_idx = total_bytes_read - (malformed_len + bytes_after_malformed); + let malformed_sequence = &input[byte_idx..byte_idx + malformed_len]; + + break Err(LoadError::Decode(Cow::Owned(format!( + "Invalid character sequence at {byte_idx}: {malformed_sequence:?}", + )))); + } + YAMLDecodingTrap::Call(callback) => { + let byte_idx = + total_bytes_read - ((malformed_len + bytes_after_malformed) as usize); + let malformed_sequence = + &input[byte_idx..byte_idx + malformed_len as usize]; + if let ControlFlow::Break(error) = callback( + malformed_len, + bytes_after_malformed, + &input[byte_idx..], + output, + ) { + if error.is_empty() { + break Err(LoadError::Decode(Cow::Owned(format!( + "Invalid character sequence at {byte_idx}: {malformed_sequence:?}", + )))); + } + break Err(LoadError::Decode(error)); + } + } + } + } + } } } @@ -301,15 +415,15 @@ impl YamlDecoder { /// This allows the encoding to be deduced by the pattern of null (#x00) characters. // /// See spec at -fn detect_utf16_endianness(b: &[u8]) -> encoding::types::EncodingRef { +fn detect_utf16_endianness(b: &[u8]) -> &'static Encoding { if b.len() > 1 && (b[0] != b[1]) { if b[0] == 0 { - return encoding::all::UTF_16BE; + return encoding_rs::UTF_16BE; } else if b[1] == 0 { - return encoding::all::UTF_16LE; + return encoding_rs::UTF_16LE; } } - encoding::all::UTF_8 + encoding_rs::UTF_8 } macro_rules! define_as ( @@ -550,7 +664,7 @@ impl Iterator for YamlIter { #[cfg(test)] mod test { - use super::{Yaml, YamlDecoder}; + use super::{YAMLDecodingTrap, Yaml, YamlDecoder}; #[test] fn test_read_bom() { @@ -623,7 +737,7 @@ b: 2.2 c: [1, 2] "; let out = YamlDecoder::read(s as &[u8]) - .encoding_trap(encoding::DecoderTrap::Ignore) + .encoding_trap(YAMLDecodingTrap::Ignore) .decode() .unwrap(); let doc = &out[0]; From a5550d42031976823ea9c38653173365f5091302 Mon Sep 17 00:00:00 2001 From: David Aguilar Date: Sat, 23 Mar 2024 22:42:58 -0700 Subject: [PATCH 341/380] yaml: add YamlLoader::documents() to get a read-only view of the parsed documents --- saphyr/src/yaml.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index 2ef4376..b869fcc 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -239,6 +239,12 @@ impl YamlLoader { parser.load(&mut loader, true)?; Ok(loader.docs) } + + /// Return a reference to the parsed Yaml documents. + #[must_use] + pub fn documents(&self) -> &[Yaml] { + &self.docs + } } /// The signature of the function to call when using [`YAMLDecodingTrap::Call`]. From e0560cb232fbf85163f8a2b89fc2704d8cdb592f Mon Sep 17 00:00:00 2001 From: David Aguilar Date: Sat, 23 Mar 2024 22:31:57 -0700 Subject: [PATCH 342/380] parser: add an option to keep tags across multiple documents Documents are self-contained and tags defined in the first document are not visible to subsequent documents. Add support for having tags that span across all documents by making the clearing of tags in the parser opt-out. Closes: #10 --- saphyr/src/parser.rs | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs index b834c38..1aacfdb 100644 --- a/saphyr/src/parser.rs +++ b/saphyr/src/parser.rs @@ -115,6 +115,8 @@ pub struct Parser { /// /// Key is the handle, and value is the prefix. tags: HashMap, + /// Make tags global across all documents. + keep_tags: bool, } /// Trait to be implemented in order to use the low-level parsing API. @@ -222,9 +224,17 @@ impl> Parser { // valid anchor_id starts from 1 anchor_id: 1, tags: HashMap::new(), + keep_tags: false, } } + /// Make tags persistent when parsing multiple documents. + #[must_use] + pub fn keep_tags(mut self, value: bool) -> Self { + self.keep_tags = value; + self + } + /// Try to load the next event and return it, but do not consuming it from `self`. /// /// Any subsequent call to [`Parser::peek`] will return the same value, until a call to @@ -595,7 +605,9 @@ impl> Parser { Token(mark, _) => mark, }; - self.tags.clear(); + if !self.keep_tags { + self.tags.clear(); + } if explicit_end { self.state = State::ImplicitDocumentStart; } else { @@ -1050,6 +1062,7 @@ impl> Parser { #[cfg(test)] mod test { use super::{Event, Parser}; + use crate::YamlLoader; #[test] fn test_peek_eq_parse() { @@ -1073,4 +1086,24 @@ a5: *x event.0 != Event::StreamEnd } {} } + + #[test] + fn test_keep_tags_across_multiple_documents() { + let text = r#" +%YAML 1.1 +%TAG !t! tag:test,2024: +--- !t!1 &1 +foo: "bar" +--- !t!2 &2 +baz: "qux" +"#; + let mut loader = YamlLoader::default(); + let mut parser = Parser::new(text.chars()).keep_tags(true); + assert!(parser.load(&mut loader, true).is_ok()); + assert_eq!(loader.documents().len(), 2); + let yaml = &loader.documents()[0]; + assert_eq!(yaml["foo"].as_str(), Some("bar")); + let yaml = &loader.documents()[1]; + assert_eq!(yaml["baz"].as_str(), Some("qux")); + } } From dfaea08aa14a24138a6c52a4c3d8d5bda053dbc2 Mon Sep 17 00:00:00 2001 From: David Aguilar Date: Sun, 24 Mar 2024 14:15:09 -0700 Subject: [PATCH 343/380] parser: better document `keep_tags` Co-authored-by: Ethiraric --- saphyr/src/parser.rs | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs index 1aacfdb..ba595d0 100644 --- a/saphyr/src/parser.rs +++ b/saphyr/src/parser.rs @@ -228,7 +228,28 @@ impl> Parser { } } - /// Make tags persistent when parsing multiple documents. + /// Whether to keep tags across multiple documents when parsing. + /// + /// This behavior is non-standard as per the YAML specification but can be encountered in the + /// wild. This boolean allows enabling this non-standard extension. This would result in the + /// parser accepting input from [test + /// QLJ7](https://github.com/yaml/yaml-test-suite/blob/ccfa74e56afb53da960847ff6e6976c0a0825709/src/QLJ7.yaml) + /// of the yaml-test-suite: + /// + /// ```yaml + /// %TAG !prefix! tag:example.com,2011: + /// --- !prefix!A + /// a: b + /// --- !prefix!B + /// c: d + /// --- !prefix!C + /// e: f + /// ``` + /// + /// With `keep_tags` set to `false`, the above YAML is rejected. As per the specification, tags + /// only apply to the document immediately following them. This would error on `!prefix!B`. + /// + /// With `keep_tags` set to `true`, the above YAML is accepted by the parser. #[must_use] pub fn keep_tags(mut self, value: bool) -> Self { self.keep_tags = value; From 6ea41cc9a85054ffa84070e2b72b998297912f33 Mon Sep 17 00:00:00 2001 From: David Aguilar Date: Sun, 24 Mar 2024 14:15:37 -0700 Subject: [PATCH 344/380] parser: add a test for `keep_tags(false)` Co-authored-by: Ethiraric --- saphyr/src/parser.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs index ba595d0..52aca1b 100644 --- a/saphyr/src/parser.rs +++ b/saphyr/src/parser.rs @@ -1126,5 +1126,9 @@ baz: "qux" assert_eq!(yaml["foo"].as_str(), Some("bar")); let yaml = &loader.documents()[1]; assert_eq!(yaml["baz"].as_str(), Some("qux")); + + let mut loader = YamlLoader::default() + let mut parser = Parser::new(text.chars()).keep_tags(false); + assert!(parser.load(&mut loader, true).is_err()); } } From 2c7ecbf054de32dd8d61015b0081976efcea8d46 Mon Sep 17 00:00:00 2001 From: David Aguilar Date: Sun, 24 Mar 2024 14:27:04 -0700 Subject: [PATCH 345/380] CHANGELOG: document the keep_tags API addition --- saphyr/CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/saphyr/CHANGELOG.md b/saphyr/CHANGELOG.md index 20c4678..cdbc6a5 100644 --- a/saphyr/CHANGELOG.md +++ b/saphyr/CHANGELOG.md @@ -20,6 +20,11 @@ **Features**: +- Tags can now be retained across documents by calling `keep_tags(true)` on a + `yaml_rust2::Parser` before loading documents. + ([#10](https://github.com/Ethiraric/yaml-rust2/issues/10) + ([#12](https://github.com/Ethiraric/yaml-rust2/pull/12)) + **Development**: ## v0.7.0 From e9e428eeaf3ebc4c56d18ccf5bf2e6ea1f929eeb Mon Sep 17 00:00:00 2001 From: David Aguilar Date: Sun, 24 Mar 2024 14:44:09 -0700 Subject: [PATCH 346/380] CHANGELOG: simplify the formatting and document YamlLoader::documents() --- saphyr/CHANGELOG.md | 63 ++++++++++++++++++++++++--------------------- 1 file changed, 34 insertions(+), 29 deletions(-) diff --git a/saphyr/CHANGELOG.md b/saphyr/CHANGELOG.md index cdbc6a5..da322fb 100644 --- a/saphyr/CHANGELOG.md +++ b/saphyr/CHANGELOG.md @@ -1,29 +1,34 @@ # Changelog ## Upcoming -### Breaking changes - - The `encoding` library has been replaced with `encoding_rs`. If you use the - `trap` of `YamlDecoder`, this change will make your code not compile. - An additional enum `YamlDecoderTrap` has been added to abstract the - underlying library and avoid breaking changes in the future. This - additionally lifts the `encoding` dependency on _your_ project if you were - using that feature. - - The `encoding::types::DecoderTrap` has been replaced with `YamlDecoderTrap`. - - The signature of the function for `YamlDecoderTrap::Call` has changed: - ```rs - // Before, with `encoding::types::DecoderTrap::Call` - fn(_: &mut encoding::RawDecoder, _: &[u8], _: &mut encoding::StringWriter) -> bool; - // Now, with `YamlDecoderTrap::Call` - fn(_: u8, _: u8, _: &[u8], _: &mut String) -> ControlFlow>; - ``` - Please refer to the `YamlDecoderTrapFn` documentation for more details. + +**Breaking Changes**: + +- The `encoding` library has been replaced with `encoding_rs`. If you use the +`trap` of `YamlDecoder`, this change will make your code not compile. +An additional enum `YamlDecoderTrap` has been added to abstract the +underlying library and avoid breaking changes in the future. This +additionally lifts the `encoding` dependency on _your_ project if you were +using that feature. + - The signature of the function for `YamlDecoderTrap::Call` has changed: + - The `encoding::types::DecoderTrap` has been replaced with `YamlDecoderTrap`. + ```rust + // Before, with `encoding::types::DecoderTrap::Call` + fn(_: &mut encoding::RawDecoder, _: &[u8], _: &mut encoding::StringWriter) -> bool; + // Now, with `YamlDecoderTrap::Call` + fn(_: u8, _: u8, _: &[u8], _: &mut String) -> ControlFlow>; + ``` + Please refer to the `YamlDecoderTrapFn` documentation for more details. **Features**: - Tags can now be retained across documents by calling `keep_tags(true)` on a - `yaml_rust2::Parser` before loading documents. - ([#10](https://github.com/Ethiraric/yaml-rust2/issues/10) - ([#12](https://github.com/Ethiraric/yaml-rust2/pull/12)) +`Parser` before loading documents. +([#10](https://github.com/Ethiraric/yaml-rust2/issues/10) +([#12](https://github.com/Ethiraric/yaml-rust2/pull/12)) + +- `YamlLoader` structs now have a `documents()` method that returns the parsed +documents associated with a loader. **Development**: @@ -32,30 +37,30 @@ **Features**: - Multi-line strings are now - [emitted using block scalars](https://github.com/chyh1990/yaml-rust/pull/136). +[emitted using block scalars](https://github.com/chyh1990/yaml-rust/pull/136). - Error messages now contain a byte offset to aid debugging. - ([#176](https://github.com/chyh1990/yaml-rust/pull/176)) +([#176](https://github.com/chyh1990/yaml-rust/pull/176)) - Yaml now has `or` and `borrowed_or` methods. - ([#179](https://github.com/chyh1990/yaml-rust/pull/179)) +([#179](https://github.com/chyh1990/yaml-rust/pull/179)) - `Yaml::load_from_bytes()` is now available. - ([#156](https://github.com/chyh1990/yaml-rust/pull/156)) +([#156](https://github.com/chyh1990/yaml-rust/pull/156)) - The parser and scanner now return Err() instead of calling panic. **Development**: - The documentation was updated to include a security note mentioning that - yaml-rust is safe because it does not interpret types. - ([#195](https://github.com/chyh1990/yaml-rust/pull/195)) +yaml-rust is safe because it does not interpret types. +([#195](https://github.com/chyh1990/yaml-rust/pull/195)) - Updated to quickcheck 1.0. - ([#188](https://github.com/chyh1990/yaml-rust/pull/188)) +([#188](https://github.com/chyh1990/yaml-rust/pull/188)) - `hashlink` is [now used](https://github.com/chyh1990/yaml-rust/pull/157) - instead of `linked_hash_map`. +instead of `linked_hash_map`. ## v0.6.0 @@ -70,7 +75,7 @@ ## v0.5.0 - The parser now supports tag directives. - ([#35](https://github.com/chyh1990/yaml-rust/issues/35) +([#35](https://github.com/chyh1990/yaml-rust/issues/35) - The `info` field has been exposed via a new `Yaml::info()` API method. - ([#190](https://github.com/chyh1990/yaml-rust/pull/190)) +([#190](https://github.com/chyh1990/yaml-rust/pull/190)) From 782d590e5cc1335f44230fc445151d754e04c45a Mon Sep 17 00:00:00 2001 From: David Aguilar Date: Sun, 24 Mar 2024 14:51:15 -0700 Subject: [PATCH 347/380] parser: add Parser::new_from_str(&str) --- bench/tools/dump_events.rs | 2 +- bench/tools/run_bench.rs | 2 +- bench/tools/time_parse.rs | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/bench/tools/dump_events.rs b/bench/tools/dump_events.rs index 8bf9e01..747e9b9 100644 --- a/bench/tools/dump_events.rs +++ b/bench/tools/dump_events.rs @@ -21,7 +21,7 @@ impl MarkedEventReceiver for EventSink { fn str_to_events(yaml: &str) -> Vec<(Event, Marker)> { let mut sink = EventSink { events: Vec::new() }; - let mut parser = Parser::new(yaml.chars()); + let mut parser = Parser::new_from_str(yaml); // Load events using our sink as the receiver. parser.load(&mut sink, true).unwrap(); sink.events diff --git a/bench/tools/run_bench.rs b/bench/tools/run_bench.rs index cda9db2..795f7bc 100644 --- a/bench/tools/run_bench.rs +++ b/bench/tools/run_bench.rs @@ -17,7 +17,7 @@ impl MarkedEventReceiver for NullSink { /// Parse the given input, returning elapsed time in nanoseconds. fn do_parse(input: &str) -> u64 { let mut sink = NullSink {}; - let mut parser = Parser::new(input.chars()); + let mut parser = Parser::new_from_str(input); let begin = std::time::Instant::now(); parser.load(&mut sink, true).unwrap(); let end = std::time::Instant::now(); diff --git a/bench/tools/time_parse.rs b/bench/tools/time_parse.rs index 014fcfc..1555dde 100644 --- a/bench/tools/time_parse.rs +++ b/bench/tools/time_parse.rs @@ -21,7 +21,7 @@ fn main() { f.read_to_string(&mut s).unwrap(); let mut sink = NullSink {}; - let mut parser = Parser::new(s.chars()); + let mut parser = Parser::new_from_str(&s); // Load events using our sink as the receiver. let begin = std::time::Instant::now(); From 21738031e35608406428b91e0ffb6118d2972590 Mon Sep 17 00:00:00 2001 From: David Aguilar Date: Sun, 24 Mar 2024 15:01:38 -0700 Subject: [PATCH 348/380] parser: fixup typos from github --- saphyr/src/parser.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs index 52aca1b..0b33e58 100644 --- a/saphyr/src/parser.rs +++ b/saphyr/src/parser.rs @@ -1127,7 +1127,7 @@ baz: "qux" let yaml = &loader.documents()[1]; assert_eq!(yaml["baz"].as_str(), Some("qux")); - let mut loader = YamlLoader::default() + let mut loader = YamlLoader::default(); let mut parser = Parser::new(text.chars()).keep_tags(false); assert!(parser.load(&mut loader, true).is_err()); } From cd2028aca70f73f6886028855f9ebfd9b4aa95e0 Mon Sep 17 00:00:00 2001 From: David Aguilar Date: Sun, 24 Mar 2024 14:51:15 -0700 Subject: [PATCH 349/380] parser: add Parser::new_from_str(&str) --- saphyr/src/parser.rs | 14 +++++++++++--- saphyr/tests/spec_test.rs | 2 +- saphyr/tests/yaml-test-suite.rs | 2 +- saphyr/tools/dump_events.rs | 2 +- saphyr/tools/run_bench.rs | 2 +- saphyr/tools/time_parse.rs | 2 +- 6 files changed, 16 insertions(+), 8 deletions(-) diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs index 0b33e58..c5855a8 100644 --- a/saphyr/src/parser.rs +++ b/saphyr/src/parser.rs @@ -182,7 +182,7 @@ pub struct Parser { /// /// Load events from a yaml string. /// fn str_to_events(yaml: &str) -> Vec { /// let mut sink = EventSink { events: Vec::new() }; -/// let mut parser = Parser::new(yaml.chars()); +/// let mut parser = Parser::new_from_str(yaml); /// // Load events using our sink as the receiver. /// parser.load(&mut sink, true).unwrap(); /// sink.events @@ -210,8 +210,16 @@ impl MarkedEventReceiver for R { /// A convenience alias for a `Result` of a parser event. pub type ParseResult = Result<(Event, Marker), ScanError>; +impl<'a> Parser> { + /// Create a new instance of a parser from a &str. + #[must_use] + pub fn new_from_str(value: &'a str) -> Self { + Parser::new(value.chars()) + } +} + impl> Parser { - /// Crate a new instance of a parser from the given input of characters. + /// Create a new instance of a parser from the given input of characters. pub fn new(src: T) -> Parser { Parser { scanner: Scanner::new(src), @@ -1099,7 +1107,7 @@ a4: - 2 a5: *x "; - let mut p = Parser::new(s.chars()); + let mut p = Parser::new_from_str(s); while { let event_peek = p.peek().unwrap().clone(); let event = p.next_token().unwrap(); diff --git a/saphyr/tests/spec_test.rs b/saphyr/tests/spec_test.rs index c54a2df..ecf1327 100644 --- a/saphyr/tests/spec_test.rs +++ b/saphyr/tests/spec_test.rs @@ -49,7 +49,7 @@ impl EventReceiver for YamlChecker { fn str_to_test_events(docs: &str) -> Vec { let mut p = YamlChecker { evs: Vec::new() }; - let mut parser = Parser::new(docs.chars()); + let mut parser = Parser::new_from_str(docs); parser.load(&mut p, true).unwrap(); p.evs } diff --git a/saphyr/tests/yaml-test-suite.rs b/saphyr/tests/yaml-test-suite.rs index e7ad10e..818083f 100644 --- a/saphyr/tests/yaml-test-suite.rs +++ b/saphyr/tests/yaml-test-suite.rs @@ -123,7 +123,7 @@ fn load_tests_from_file(entry: &DirEntry) -> Result>> { fn parse_to_events(source: &str) -> Result, ScanError> { let mut reporter = EventReporter::new(); - Parser::new(source.chars()).load(&mut reporter, true)?; + Parser::new_from_str(source).load(&mut reporter, true)?; Ok(reporter.events) } diff --git a/saphyr/tools/dump_events.rs b/saphyr/tools/dump_events.rs index 8bf9e01..747e9b9 100644 --- a/saphyr/tools/dump_events.rs +++ b/saphyr/tools/dump_events.rs @@ -21,7 +21,7 @@ impl MarkedEventReceiver for EventSink { fn str_to_events(yaml: &str) -> Vec<(Event, Marker)> { let mut sink = EventSink { events: Vec::new() }; - let mut parser = Parser::new(yaml.chars()); + let mut parser = Parser::new_from_str(yaml); // Load events using our sink as the receiver. parser.load(&mut sink, true).unwrap(); sink.events diff --git a/saphyr/tools/run_bench.rs b/saphyr/tools/run_bench.rs index cda9db2..795f7bc 100644 --- a/saphyr/tools/run_bench.rs +++ b/saphyr/tools/run_bench.rs @@ -17,7 +17,7 @@ impl MarkedEventReceiver for NullSink { /// Parse the given input, returning elapsed time in nanoseconds. fn do_parse(input: &str) -> u64 { let mut sink = NullSink {}; - let mut parser = Parser::new(input.chars()); + let mut parser = Parser::new_from_str(input); let begin = std::time::Instant::now(); parser.load(&mut sink, true).unwrap(); let end = std::time::Instant::now(); diff --git a/saphyr/tools/time_parse.rs b/saphyr/tools/time_parse.rs index 014fcfc..1555dde 100644 --- a/saphyr/tools/time_parse.rs +++ b/saphyr/tools/time_parse.rs @@ -21,7 +21,7 @@ fn main() { f.read_to_string(&mut s).unwrap(); let mut sink = NullSink {}; - let mut parser = Parser::new(s.chars()); + let mut parser = Parser::new_from_str(&s); // Load events using our sink as the receiver. let begin = std::time::Instant::now(); From ddb6885a17a0f3cad5ae270e2ea3ffffda4fb8d9 Mon Sep 17 00:00:00 2001 From: David Aguilar Date: Sun, 24 Mar 2024 03:07:15 -0700 Subject: [PATCH 350/380] yaml: add YamlLoader::load_from_parser(&Parser) Make it easier to load documents from a prebuilt Parser. --- saphyr/CHANGELOG.md | 2 ++ saphyr/src/parser.rs | 19 ++++++++++--------- saphyr/src/yaml.rs | 14 +++++++++++++- 3 files changed, 25 insertions(+), 10 deletions(-) diff --git a/saphyr/CHANGELOG.md b/saphyr/CHANGELOG.md index da322fb..2a933e1 100644 --- a/saphyr/CHANGELOG.md +++ b/saphyr/CHANGELOG.md @@ -30,6 +30,8 @@ using that feature. - `YamlLoader` structs now have a `documents()` method that returns the parsed documents associated with a loader. +- `Parser::new_from_str(&str)` and `YamlLoader::load_from_parser(&Parser)` were added. + **Development**: ## v0.7.0 diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs index c5855a8..288f232 100644 --- a/saphyr/src/parser.rs +++ b/saphyr/src/parser.rs @@ -1126,17 +1126,18 @@ foo: "bar" --- !t!2 &2 baz: "qux" "#; - let mut loader = YamlLoader::default(); - let mut parser = Parser::new(text.chars()).keep_tags(true); - assert!(parser.load(&mut loader, true).is_ok()); - assert_eq!(loader.documents().len(), 2); - let yaml = &loader.documents()[0]; + let mut parser = Parser::new_from_str(text).keep_tags(true); + let result = YamlLoader::load_from_parser(&mut parser); + assert!(result.is_ok()); + let docs = result.unwrap(); + assert_eq!(docs.len(), 2); + let yaml = &docs[0]; assert_eq!(yaml["foo"].as_str(), Some("bar")); - let yaml = &loader.documents()[1]; + let yaml = &docs[1]; assert_eq!(yaml["baz"].as_str(), Some("qux")); - let mut loader = YamlLoader::default(); - let mut parser = Parser::new(text.chars()).keep_tags(false); - assert!(parser.load(&mut loader, true).is_err()); + let mut parser = Parser::new_from_str(text).keep_tags(false); + let result = YamlLoader::load_from_parser(&mut parser); + assert!(result.is_err()); } } diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index b869fcc..6d58653 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -234,8 +234,20 @@ impl YamlLoader { /// # Errors /// Returns `ScanError` when loading fails. pub fn load_from_iter>(source: I) -> Result, ScanError> { - let mut loader = YamlLoader::default(); let mut parser = Parser::new(source); + Self::load_from_parser(&mut parser) + } + + /// Load the contents from the specified Parser as a set of YAML documents. + /// + /// Parsing succeeds if and only if all documents are parsed successfully. + /// An error in a latter document prevents the former from being returned. + /// # Errors + /// Returns `ScanError` when loading fails. + pub fn load_from_parser>( + parser: &mut Parser, + ) -> Result, ScanError> { + let mut loader = YamlLoader::default(); parser.load(&mut loader, true)?; Ok(loader.docs) } From 53b70cd93b4ea62765c3001f7308259e9436f304 Mon Sep 17 00:00:00 2001 From: David Aguilar Date: Sun, 24 Mar 2024 17:00:19 -0700 Subject: [PATCH 351/380] README: add self-links to make things more discoverable If someone lands on one of our forks on github there are still UI elements that point them back to the original chyh1990 yaml-rust project. Add links to make it easier to navigate back to the main fork when viewing a yaml-rust2 fork. --- saphyr/README.md | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/saphyr/README.md b/saphyr/README.md index 18db862..3e892a0 100644 --- a/saphyr/README.md +++ b/saphyr/README.md @@ -1,6 +1,8 @@ # yaml-rust2 -A fully compliant YAML 1.2 implementation written in pure Rust. +[yaml-rust2](https://github.com/Ethiraric/yaml-rust2) is a fully compliant YAML 1.2 +implementation written in pure Rust. + This work is based on [`yaml-rust`](https://github.com/chyh1990/yaml-rust) with fixes towards being compliant to the [YAML test suite](https://github.com/yaml/yaml-test-suite/). `yaml-rust`'s parser is @@ -120,8 +122,21 @@ You can find licences in the [`.licenses`](.licenses) subfolder. ## Contribution -Fork & PR on Github. +[Fork this repository](https://github.com/Ethiraric/yaml-rust2/fork) and +[Create a Pull Request on Github](https://github.com/Ethiraric/yaml-rust2/compare/master...Ethiraric:yaml-rust2:master). +You may need to click on "compare across forks" and select your fork's branch. +Make sure that `Ethiraric` is selected as the base repository, not `chyh1990`. Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in the work by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions. + +## Links + +* [yaml-rust2 source code repository](https://github.com/Ethiraric/yaml-rust2) + +* [yaml-rust2 releases on crates.io](https://crates.io/crates/yaml-rust2) + +* [yaml-rust2 documentation on docs.rs](https://docs.rs/yaml-rust2/latest/yaml_rust2/) + +* [yaml-test-suite](https://github.com/yaml/yaml-test-suite) From 37f52b7722756bc5845dba82a39930d8a571d1b8 Mon Sep 17 00:00:00 2001 From: David Aguilar Date: Sun, 24 Mar 2024 17:00:57 -0700 Subject: [PATCH 352/380] README: update the installation snippet to use yaml-rust2 0.7 --- saphyr/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/saphyr/README.md b/saphyr/README.md index 3e892a0..a471b28 100644 --- a/saphyr/README.md +++ b/saphyr/README.md @@ -17,7 +17,7 @@ Add the following to the Cargo.toml of your project: ```toml [dependencies] -yaml-rust2 = "0.6" +yaml-rust2 = "0.7" ``` Use `yaml_rust2::YamlLoader` to load YAML documents and access them as `Yaml` objects: From 95ac76e16319658723a55b763a0ac656c6973d41 Mon Sep 17 00:00:00 2001 From: John Vandenberg Date: Mon, 25 Mar 2024 19:01:58 +0800 Subject: [PATCH 353/380] fix typos --- bench/tools/gen_large_yaml/src/main.rs | 2 +- bench/tools/gen_large_yaml/src/nested.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bench/tools/gen_large_yaml/src/main.rs b/bench/tools/gen_large_yaml/src/main.rs index 86423bf..b585c59 100644 --- a/bench/tools/gen_large_yaml/src/main.rs +++ b/bench/tools/gen_large_yaml/src/main.rs @@ -42,7 +42,7 @@ struct Generator { /// The RNG state. /// /// We don't need to be cryptographically secure. [`SmallRng`] also implements the - /// [`SeedableRng`] trait, allowing runs to be predictible. + /// [`SeedableRng`] trait, allowing runs to be predictable. rng: SmallRng, /// The stack of indentations. indents: Vec, diff --git a/bench/tools/gen_large_yaml/src/nested.rs b/bench/tools/gen_large_yaml/src/nested.rs index db93ff9..0f182a9 100644 --- a/bench/tools/gen_large_yaml/src/nested.rs +++ b/bench/tools/gen_large_yaml/src/nested.rs @@ -26,7 +26,7 @@ struct Tree { /// The RNG state. /// /// We don't need to be cryptographically secure. [`SmallRng`] also implements the - /// [`SeedableRng`] trait, allowing runs to be predictible. + /// [`SeedableRng`] trait, allowing runs to be predictable. rng: SmallRng, } From 992c9c29fbaa0db007e075a43670ee5878c28953 Mon Sep 17 00:00:00 2001 From: John Vandenberg Date: Mon, 25 Mar 2024 19:01:58 +0800 Subject: [PATCH 354/380] fix typos --- saphyr/documents/2024-03-15-FirstRelease.md | 2 +- saphyr/src/emitter.rs | 2 +- saphyr/src/parser.rs | 2 +- saphyr/src/scanner.rs | 4 ++-- saphyr/tests/emitter.rs | 2 +- saphyr/tools/gen_large_yaml/src/main.rs | 2 +- saphyr/tools/gen_large_yaml/src/nested.rs | 2 +- 7 files changed, 8 insertions(+), 8 deletions(-) diff --git a/saphyr/documents/2024-03-15-FirstRelease.md b/saphyr/documents/2024-03-15-FirstRelease.md index 0dc1af1..7205fce 100644 --- a/saphyr/documents/2024-03-15-FirstRelease.md +++ b/saphyr/documents/2024-03-15-FirstRelease.md @@ -102,7 +102,7 @@ Switching to `yaml-rust2` should be a very simple process. Change your `Cargo.to As for your code, you have one of two solutions: * Changing your imports from `use yaml_rust::Yaml` to `use yaml_rust2::Yaml` if you import items directly, or change - occurences of `yaml_rust` to `yaml_rust2` if you use fully qualified paths. + occurrences of `yaml_rust` to `yaml_rust2` if you use fully qualified paths. * Alternatively, you can alias `yaml_rust2` with `use yaml_rust2 as yaml_rust`. This would keep your code working if you use fully qualified paths. diff --git a/saphyr/src/emitter.rs b/saphyr/src/emitter.rs index 15f8cab..60eda8a 100644 --- a/saphyr/src/emitter.rs +++ b/saphyr/src/emitter.rs @@ -123,7 +123,7 @@ fn escape_str(wr: &mut dyn fmt::Write, v: &str) -> Result<(), fmt::Error> { } impl<'a> YamlEmitter<'a> { - /// Create a nwe emitter serializing into `writer`. + /// Create a new emitter serializing into `writer`. pub fn new(writer: &'a mut dyn fmt::Write) -> YamlEmitter { YamlEmitter { writer, diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs index 288f232..43bf1aa 100644 --- a/saphyr/src/parser.rs +++ b/saphyr/src/parser.rs @@ -61,7 +61,7 @@ pub enum Event { Scalar(String, TScalarStyle, usize, Option), /// The start of a YAML sequence (array). SequenceStart( - /// The anchor ID of the start of the squence. + /// The anchor ID of the start of the sequence. usize, /// An optional tag Option, diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index 556ca9b..69f56ff 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -76,7 +76,7 @@ impl Marker { } } -/// An error that occured while scanning. +/// An error that occurred while scanning. #[derive(Clone, PartialEq, Debug, Eq)] pub struct ScanError { /// The position at which the error happened in the source. @@ -1912,7 +1912,7 @@ impl> Scanner { if (self.mark.col as isize) < self.indent { return Err(ScanError::new( start_mark, - "invalid identation in quoted scalar", + "invalid indentation in quoted scalar", )); } diff --git a/saphyr/tests/emitter.rs b/saphyr/tests/emitter.rs index 6460468..c085a56 100644 --- a/saphyr/tests/emitter.rs +++ b/saphyr/tests/emitter.rs @@ -37,7 +37,7 @@ a4: #[test] fn test_emit_complex() { let s = r" -cataloge: +catalogue: product: &coffee { name: Coffee, price: 2.5 , unit: 1l } product: &cookies { name: Cookies!, price: 3.40 , unit: 400g} diff --git a/saphyr/tools/gen_large_yaml/src/main.rs b/saphyr/tools/gen_large_yaml/src/main.rs index 86423bf..b585c59 100644 --- a/saphyr/tools/gen_large_yaml/src/main.rs +++ b/saphyr/tools/gen_large_yaml/src/main.rs @@ -42,7 +42,7 @@ struct Generator { /// The RNG state. /// /// We don't need to be cryptographically secure. [`SmallRng`] also implements the - /// [`SeedableRng`] trait, allowing runs to be predictible. + /// [`SeedableRng`] trait, allowing runs to be predictable. rng: SmallRng, /// The stack of indentations. indents: Vec, diff --git a/saphyr/tools/gen_large_yaml/src/nested.rs b/saphyr/tools/gen_large_yaml/src/nested.rs index db93ff9..0f182a9 100644 --- a/saphyr/tools/gen_large_yaml/src/nested.rs +++ b/saphyr/tools/gen_large_yaml/src/nested.rs @@ -26,7 +26,7 @@ struct Tree { /// The RNG state. /// /// We don't need to be cryptographically secure. [`SmallRng`] also implements the - /// [`SeedableRng`] trait, allowing runs to be predictible. + /// [`SeedableRng`] trait, allowing runs to be predictable. rng: SmallRng, } From 536e5f7ec4ef2c3a0f19587825716f7d3e17a5c4 Mon Sep 17 00:00:00 2001 From: Spenser Black Date: Mon, 25 Mar 2024 08:34:54 -0400 Subject: [PATCH 355/380] Add linguist attributes for `tests/*.rs.inc` files This informs Linguist (used by GitHub) that these files are Rust (they are currently detected as C++). This also marks them as generated. --- saphyr/.gitattributes | 1 + 1 file changed, 1 insertion(+) create mode 100644 saphyr/.gitattributes diff --git a/saphyr/.gitattributes b/saphyr/.gitattributes new file mode 100644 index 0000000..90e68e3 --- /dev/null +++ b/saphyr/.gitattributes @@ -0,0 +1 @@ +tests/*.rs.inc linguist-language=Rust linguist-generated From 134a1672ccd5602d8a71c4588ce86e96c6ff19d9 Mon Sep 17 00:00:00 2001 From: David Aguilar Date: Mon, 25 Mar 2024 22:15:24 -0700 Subject: [PATCH 356/380] garden: update urls and remove old forks and links We have the commits in git now. --- saphyr/garden.yaml | 29 +---------------------------- 1 file changed, 1 insertion(+), 28 deletions(-) diff --git a/saphyr/garden.yaml b/saphyr/garden.yaml index 78b309c..19f574c 100644 --- a/saphyr/garden.yaml +++ b/saphyr/garden.yaml @@ -44,40 +44,13 @@ trees: path: ${GARDEN_CONFIG_DIR} url: "git@github.com:Ethiraric/yaml-rust2.git" remotes: - byte1234: "git@github.com:byte1234/yaml-rust.git" - davvid: "git@github.com:davvid/yaml-rust.git" - gyscos: "git@github.com:gyscos/yaml-rust.git" - jturner314: "git@github.com:jturner314/yaml-rust.git" - mathstuf: "git@github.com:mathstuf/yaml-rust.git" - mkmik: "git@github.com:mkmik/yaml-rust.git" - olalonde: "git@github.com:olalonde/yaml-rust.git" - oldaccountdeadname: "git@github.com:oldaccountdeadname/yaml-rust.git" - ramosbugs: "git@github.com:ramosbugs/yaml-rust.git" - rocallahan: "git@github.com:rocallahan/yaml-rust.git" - smoelius: "git@github.com:smoelius/yaml-rust.git" + davvid: "git@github.com:davvid/yaml-rust2.git" yaml-rust: "git@github.com:chyh1990/yaml-rust.git" gitconfig: remote.yaml-rust.fetch: - "+refs/heads/*:refs/remotes/yaml-rust/*" # Access yaml-rust pull requests as yaml-rust/pull/* - "+refs/pull/*/head:refs/remotes/yaml-rust/pull/*" - links: - - "byte1234: replace linked_hash_map with hashlink" - - https://github.com/chyh1990/yaml-rust/pull/157 - - "ramosbugs: emit multi-line string values as block scalars" - - https://github.com/chyh1990/yaml-rust/pull/136 - - "oldaccountdeadname: introduce or function" - - https://github.com/chyh1990/yaml-rust/pull/179 - - "rocallahan: Update to quickcheck 1.0" - - https://github.com/chyh1990/yaml-rust/pull/188 - - "jturner314: Add byte index to error message" - - https://github.com/chyh1990/yaml-rust/pull/176 - - "mathstuf: mention that types are not interpreted" - - https://github.com/chyh1990/yaml-rust/pull/195 - - "olalonde: Expose info() api" - - https://github.com/chyh1990/yaml-rust/pull/190 - - "mkmik: Implement load_from_bytes" - - https://github.com/chyh1990/yaml-rust/pull/156 yaml-test-suite: description: Comprehensive, language independent Test Suite for YAML From 4a5ba3b49949ac735d4dcd43b3a292f84319e24f Mon Sep 17 00:00:00 2001 From: David Aguilar Date: Mon, 25 Mar 2024 22:16:57 -0700 Subject: [PATCH 357/380] garden: access yaml-rust2 pull requests as yaml-rust2/pull/* --- saphyr/garden.yaml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/saphyr/garden.yaml b/saphyr/garden.yaml index 19f574c..5db5126 100644 --- a/saphyr/garden.yaml +++ b/saphyr/garden.yaml @@ -47,9 +47,13 @@ trees: davvid: "git@github.com:davvid/yaml-rust2.git" yaml-rust: "git@github.com:chyh1990/yaml-rust.git" gitconfig: + # Access yaml-rust2 pull requests as yaml-rust2/pull/* + remote.yaml-rust2.url: "git@github.com:Ethiraric/yaml-rust2.git" + remote.yaml-rust2.fetch: + - "+refs/pull/*/head:refs/remotes/yaml-rust2/pull/*" + # Access yaml-rust pull requests as yaml-rust/pull/* remote.yaml-rust.fetch: - "+refs/heads/*:refs/remotes/yaml-rust/*" - # Access yaml-rust pull requests as yaml-rust/pull/* - "+refs/pull/*/head:refs/remotes/yaml-rust/pull/*" yaml-test-suite: From ae65040d3b3d2f80a792e05a14ad8cb3f17ef2fe Mon Sep 17 00:00:00 2001 From: David Aguilar Date: Mon, 25 Mar 2024 22:07:31 -0700 Subject: [PATCH 358/380] CHANGELOG: mention the linguist development-related updates --- saphyr/CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/saphyr/CHANGELOG.md b/saphyr/CHANGELOG.md index 2a933e1..cac12e0 100644 --- a/saphyr/CHANGELOG.md +++ b/saphyr/CHANGELOG.md @@ -34,6 +34,9 @@ documents associated with a loader. **Development**: +- Linguist attributes were added for the `tests/*.rs.inc` files to prevent github from +classifying them as C++ files. + ## v0.7.0 **Features**: From b5c9eec454f179c974990b25098120f5bd8e30ad Mon Sep 17 00:00:00 2001 From: David Aguilar Date: Mon, 25 Mar 2024 22:18:38 -0700 Subject: [PATCH 359/380] yaml-rust2 v0.8.0 --- saphyr/CHANGELOG.md | 2 +- saphyr/Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/saphyr/CHANGELOG.md b/saphyr/CHANGELOG.md index cac12e0..1b55c7f 100644 --- a/saphyr/CHANGELOG.md +++ b/saphyr/CHANGELOG.md @@ -1,6 +1,6 @@ # Changelog -## Upcoming +## v0.8.0 **Breaking Changes**: diff --git a/saphyr/Cargo.toml b/saphyr/Cargo.toml index c0bc62e..b75c474 100644 --- a/saphyr/Cargo.toml +++ b/saphyr/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "yaml-rust2" -version = "0.7.0" +version = "0.8.0" authors = [ "Yuheng Chen ", "Ethiraric ", From 4aad863f87d779271a3ee2efd2259412c5f6788f Mon Sep 17 00:00:00 2001 From: David Aguilar Date: Mon, 25 Mar 2024 22:25:46 -0700 Subject: [PATCH 360/380] maint: version-up and minimize where we mention the version Remove the version number from tools/gen_large_yaml so that we can avoid needing to update the version in two places. --- bench/tools/gen_large_yaml/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bench/tools/gen_large_yaml/Cargo.toml b/bench/tools/gen_large_yaml/Cargo.toml index a70e779..d57bdea 100644 --- a/bench/tools/gen_large_yaml/Cargo.toml +++ b/bench/tools/gen_large_yaml/Cargo.toml @@ -11,7 +11,7 @@ readme = "README.md" edition = "2018" [dependencies] -yaml-rust2 = { version = "0.7.0", path = "../../" } +yaml-rust2 = { path = "../.." } rand = { version = "0.8.5", features = [ "small_rng" ] } lipsum = "0.9.0" From 2dc9e2c13f5d12d0a6443c51c192485f49357875 Mon Sep 17 00:00:00 2001 From: David Aguilar Date: Mon, 25 Mar 2024 22:25:46 -0700 Subject: [PATCH 361/380] maint: version-up and minimize where we mention the version Remove the version number from tools/gen_large_yaml so that we can avoid needing to update the version in two places. --- saphyr/README.md | 2 +- saphyr/documents/2024-03-15-FirstRelease.md | 2 +- saphyr/src/lib.rs | 2 +- saphyr/tools/gen_large_yaml/Cargo.toml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/saphyr/README.md b/saphyr/README.md index a471b28..d9f12ad 100644 --- a/saphyr/README.md +++ b/saphyr/README.md @@ -17,7 +17,7 @@ Add the following to the Cargo.toml of your project: ```toml [dependencies] -yaml-rust2 = "0.7" +yaml-rust2 = "0.8" ``` Use `yaml_rust2::YamlLoader` to load YAML documents and access them as `Yaml` objects: diff --git a/saphyr/documents/2024-03-15-FirstRelease.md b/saphyr/documents/2024-03-15-FirstRelease.md index 7205fce..41aa642 100644 --- a/saphyr/documents/2024-03-15-FirstRelease.md +++ b/saphyr/documents/2024-03-15-FirstRelease.md @@ -96,7 +96,7 @@ Switching to `yaml-rust2` should be a very simple process. Change your `Cargo.to ```diff -yaml-rust = "0.4.4" -+yaml-rust2 = "0.6.0" ++yaml-rust2 = "0.8.0" ``` As for your code, you have one of two solutions: diff --git a/saphyr/src/lib.rs b/saphyr/src/lib.rs index 9ca9a45..5fc106b 100644 --- a/saphyr/src/lib.rs +++ b/saphyr/src/lib.rs @@ -11,7 +11,7 @@ //! //! ```toml //! [dependencies] -//! yaml-rust2 = "0.7.0" +//! yaml-rust2 = "0.8.0" //! ``` //! //! # Examples diff --git a/saphyr/tools/gen_large_yaml/Cargo.toml b/saphyr/tools/gen_large_yaml/Cargo.toml index a70e779..d57bdea 100644 --- a/saphyr/tools/gen_large_yaml/Cargo.toml +++ b/saphyr/tools/gen_large_yaml/Cargo.toml @@ -11,7 +11,7 @@ readme = "README.md" edition = "2018" [dependencies] -yaml-rust2 = { version = "0.7.0", path = "../../" } +yaml-rust2 = { path = "../.." } rand = { version = "0.8.5", features = [ "small_rng" ] } lipsum = "0.9.0" From bb3cc83120c53acff61be131cfdaa3ee909a1d81 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Thu, 28 Mar 2024 21:03:14 +0100 Subject: [PATCH 362/380] Add doc checks to `before_commit`. --- bench/justfile | 1 + 1 file changed, 1 insertion(+) diff --git a/bench/justfile b/bench/justfile index 58c8c4d..f33ee69 100644 --- a/bench/justfile +++ b/bench/justfile @@ -7,6 +7,7 @@ before_commit: cargo test --release cargo test --doc cargo build --profile=release-lto --package gen_large_yaml --bin gen_large_yaml --manifest-path tools/gen_large_yaml/Cargo.toml + RUSTDOCFLAGS="-D warnings" cargo doc --all-features ethi_bench: cargo build --release --all-targets From 2642252f7f0ba2690245b32d397d079b104be138 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Thu, 28 Mar 2024 21:03:14 +0100 Subject: [PATCH 363/380] Add doc checks to `before_commit`. --- saphyr/justfile | 1 + saphyr/src/parser.rs | 2 +- saphyr/src/scanner.rs | 6 +++--- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/saphyr/justfile b/saphyr/justfile index 58c8c4d..f33ee69 100644 --- a/saphyr/justfile +++ b/saphyr/justfile @@ -7,6 +7,7 @@ before_commit: cargo test --release cargo test --doc cargo build --profile=release-lto --package gen_large_yaml --bin gen_large_yaml --manifest-path tools/gen_large_yaml/Cargo.toml + RUSTDOCFLAGS="-D warnings" cargo doc --all-features ethi_bench: cargo build --release --all-targets diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs index 43bf1aa..59869a2 100644 --- a/saphyr/src/parser.rs +++ b/saphyr/src/parser.rs @@ -267,7 +267,7 @@ impl> Parser { /// Try to load the next event and return it, but do not consuming it from `self`. /// /// Any subsequent call to [`Parser::peek`] will return the same value, until a call to - /// [`Parser::next`] or [`Parser::load`]. + /// [`Iterator::next`] or [`Parser::load`]. /// # Errors /// Returns `ScanError` when loading the next event fails. pub fn peek(&mut self) -> Result<&(Event, Marker), ScanError> { diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index 69f56ff..dece35b 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -135,7 +135,7 @@ impl fmt::Display for ScanError { /// The contents of a scanner token. #[derive(Clone, PartialEq, Debug, Eq)] pub enum TokenType { - /// The start of the stream. Sent first, before even [`DocumentStart`]. + /// The start of the stream. Sent first, before even [`TokenType::DocumentStart`]. StreamStart(TEncoding), /// The end of the stream, EOF. StreamEnd, @@ -324,8 +324,8 @@ const BUFFER_LEN: usize = 16; /// some of the constructs. It has understanding of indentation and whitespace and is able to /// generate error messages for some invalid YAML constructs. /// -/// It is however not a full parser and needs [`parser::Parser`] to fully detect invalid YAML -/// documents. +/// It is however not a full parser and needs [`crate::parser::Parser`] to fully detect invalid +/// YAML documents. #[derive(Debug)] #[allow(clippy::struct_excessive_bools)] pub struct Scanner { From 0f1dedc58488c278b48ae6e1134265f08725c444 Mon Sep 17 00:00:00 2001 From: Alistair Francis Date: Sat, 30 Mar 2024 18:51:16 +0100 Subject: [PATCH 364/380] yaml: Implement IndexMut This implements the IndexMut trait for Yaml. This allows indexing the Yaml type while having a mutable reference. Unlike the Index, this will panic on a failure. That is allowed as per the Rust documentation [1]. We don't have the option of returning a mutable reference to BAD_VALUE as that is unsafe. So instead we just panic. 1: https://doc.rust-lang.org/std/ops/trait.IndexMut.html#tymethod.index_mut Resolves: https://github.com/chyh1990/yaml-rust/issues/123 Signed-off-by: Alistair Francis Co-authored-by: Ethiraric --- saphyr/CHANGELOG.md | 12 ++++++++++ saphyr/src/yaml.rs | 54 ++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 65 insertions(+), 1 deletion(-) diff --git a/saphyr/CHANGELOG.md b/saphyr/CHANGELOG.md index 1b55c7f..603b5d3 100644 --- a/saphyr/CHANGELOG.md +++ b/saphyr/CHANGELOG.md @@ -1,5 +1,17 @@ # Changelog +## Upcoming + +**Features** + +- ([#19](https://github.com/Ethiraric/yaml-rust2/pull/19)) `Yaml` now + implements `IndexMut` and `IndexMut<&'a str>`. These functions may not + return a mutable reference to a `BAD_VALUE`. Instead, `index_mut()` will + panic if either: + * The index is out of range, as per `IndexMut`'s requirements + * The inner `Yaml` variant doesn't match `Yaml::Array` for `usize` or + `Yaml::Hash` for `&'a str` + ## v0.8.0 **Breaking Changes**: diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index 6d58653..9486d1c 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -4,7 +4,7 @@ use std::borrow::Cow; use std::ops::ControlFlow; -use std::{collections::BTreeMap, convert::TryFrom, mem, ops::Index}; +use std::{collections::BTreeMap, convert::TryFrom, mem, ops::Index, ops::IndexMut}; use encoding_rs::{Decoder, DecoderResult, Encoding}; use hashlink::LinkedHashMap; @@ -478,6 +478,23 @@ pub fn $name(&self) -> Option<$t> { ); ); +macro_rules! define_as_mut_ref ( + ($name:ident, $t:ty, $yt:ident) => ( +/// Get a mutable reference to the inner object in the YAML enum if it is a `$t`. +/// +/// # Return +/// If the variant of `self` is `Yaml::$yt`, return `Some(&mut $t)` with the `$t` contained. +/// Otherwise, return `None`. +#[must_use] +pub fn $name(&mut self) -> Option<$t> { + match *self { + Yaml::$yt(ref mut v) => Some(v), + _ => None + } +} + ); +); + macro_rules! define_into ( ($name:ident, $t:ty, $yt:ident) => ( /// Get the inner object in the YAML enum if it is a `$t`. @@ -503,6 +520,9 @@ impl Yaml { define_as_ref!(as_hash, &Hash, Hash); define_as_ref!(as_vec, &Array, Array); + define_as_mut_ref!(as_mut_hash, &mut Hash, Hash); + define_as_mut_ref!(as_mut_vec, &mut Array, Array); + define_into!(into_bool, bool, Boolean); define_into!(into_i64, i64, Integer); define_into!(into_string, String, String); @@ -641,6 +661,16 @@ impl<'a> Index<&'a str> for Yaml { } } +impl<'a> IndexMut<&'a str> for Yaml { + fn index_mut(&mut self, idx: &'a str) -> &mut Yaml { + let key = Yaml::String(idx.to_owned()); + match self.as_mut_hash() { + Some(h) => h.get_mut(&key).unwrap(), + None => panic!("Not a hash type"), + } + } +} + impl Index for Yaml { type Output = Yaml; @@ -656,6 +686,28 @@ impl Index for Yaml { } } +impl IndexMut for Yaml { + /// Perform indexing if `self` is a sequence or a mapping. + /// + /// # Panics + /// This function panics if the index given is out of range (as per [`IndexMut`]). If `self` i + /// a [`Yaml::Array`], this is when the index is bigger or equal to the length of the + /// underlying `Vec`. If `self` is a [`Yaml::Hash`], this is when the mapping sequence does no + /// contain [`Yaml::Integer`]`(idx)` as a key. + /// + /// This function also panics if `self` is not a [`Yaml::Array`] nor a [`Yaml::Hash`]. + fn index_mut(&mut self, idx: usize) -> &mut Yaml { + match self { + Yaml::Array(sequence) => sequence.index_mut(idx), + Yaml::Hash(mapping) => { + let key = Yaml::Integer(i64::try_from(idx).unwrap()); + mapping.get_mut(&key).unwrap() + } + _ => panic!("Attempting to index but `self` is not a sequence nor a mapping"), + } + } +} + impl IntoIterator for Yaml { type Item = Yaml; type IntoIter = YamlIter; From f166970a3eec18c0551c04e0dcc817ae1ab16077 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Sat, 30 Mar 2024 19:24:54 +0100 Subject: [PATCH 365/380] Use cargo features. --- saphyr/CHANGELOG.md | 10 ++++++++++ saphyr/Cargo.toml | 8 +++++++- saphyr/src/debug.rs | 6 +++--- saphyr/src/emitter.rs | 2 +- saphyr/src/lib.rs | 13 +++++++++++++ saphyr/src/yaml.rs | 7 +++++++ 6 files changed, 41 insertions(+), 5 deletions(-) diff --git a/saphyr/CHANGELOG.md b/saphyr/CHANGELOG.md index 603b5d3..4e3ebd2 100644 --- a/saphyr/CHANGELOG.md +++ b/saphyr/CHANGELOG.md @@ -12,6 +12,16 @@ * The inner `Yaml` variant doesn't match `Yaml::Array` for `usize` or `Yaml::Hash` for `&'a str` +- Use cargo features + + This allows for more fine-grained control over MSRV and to completely remove + debug code from the library when it is consumed. + + The `encoding` feature, governing the `YamlDecoder`, has been enabled by + default. Users of `@davvid`'s fork of `yaml-rust` or of `yaml-rust2` might + already use this. Users of the original `yaml-rust` crate may freely disable + this feature (`cargo <...> --no-default-features`) and lower MSRV to 1.65.0. + ## v0.8.0 **Breaking Changes**: diff --git a/saphyr/Cargo.toml b/saphyr/Cargo.toml index b75c474..f4756c1 100644 --- a/saphyr/Cargo.toml +++ b/saphyr/Cargo.toml @@ -12,10 +12,16 @@ description = "A fully YAML 1.2 compliant YAML parser" repository = "https://github.com/Ethiraric/yaml-rust2" readme = "README.md" edition = "2021" +rust-version = "1.65.0" + +[features] +default = [ "encoding" ] +debug_prints = [] +encoding = [ "dep:encoding_rs" ] [dependencies] arraydeque = "0.5.1" -encoding_rs = "0.8.33" +encoding_rs = { version = "0.8.33", optional = true } hashlink = "0.8" [dev-dependencies] diff --git a/saphyr/src/debug.rs b/saphyr/src/debug.rs index b43ea2e..c1411cb 100644 --- a/saphyr/src/debug.rs +++ b/saphyr/src/debug.rs @@ -7,17 +7,17 @@ //! build, debug helpers will only trigger if that variable is set when running the program. // If a debug build, use stuff in the debug submodule. -#[cfg(debug_assertions)] +#[cfg(feature = "debug_prints")] pub use debug::enabled; // Otherwise, just export dummies for publicly visible functions. /// Evaluates to nothing. -#[cfg(not(debug_assertions))] +#[cfg(not(feature = "debug_prints"))] macro_rules! debug_print { ($($arg:tt)*) => {{}}; } -#[cfg(debug_assertions)] +#[cfg(feature = "debug_prints")] #[macro_use] #[allow(clippy::module_inception)] mod debug { diff --git a/saphyr/src/emitter.rs b/saphyr/src/emitter.rs index 60eda8a..48c8b5c 100644 --- a/saphyr/src/emitter.rs +++ b/saphyr/src/emitter.rs @@ -345,7 +345,7 @@ impl<'a> YamlEmitter<'a> { /// Strings starting with any of the following characters must be quoted. /// :, &, *, ?, |, -, <, >, =, !, %, @ /// Strings containing any of the following characters must be quoted. -/// {, }, [, ], ,, #, ` +/// {, }, \[, t \], ,, #, ` /// /// If the string contains any of the following control characters, it must be escaped with double quotes: /// \0, \x01, \x02, \x03, \x04, \x05, \x06, \a, \b, \t, \n, \v, \f, \r, \x0e, \x0f, \x10, \x11, \x12, \x13, \x14, \x15, \x16, \x17, \x18, \x19, \x1a, \e, \x1c, \x1d, \x1e, \x1f, \N, \_, \L, \P diff --git a/saphyr/src/lib.rs b/saphyr/src/lib.rs index 5fc106b..a171691 100644 --- a/saphyr/src/lib.rs +++ b/saphyr/src/lib.rs @@ -29,6 +29,19 @@ //! emitter.dump(doc).unwrap(); // dump the YAML object to a String //! //! ``` +//! +//! # Features +//! #### `encoding` (_enabled by default_) +//! Enables encoding-aware decoding of Yaml documents. +//! +//! This bumps MSRV up to `1.70.0`. +//! +//! #### `debug_prints` +//! Enables the `debug` module and usage of debug prints in the scanner and the parser. Do not +//! enable if you are consuming the crate rather than working on it as this can significantly +//! decrease performance. +//! +//! This bumps MSRV up to 1.70.0. #![warn(missing_docs, clippy::pedantic)] diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index 9486d1c..3c429d5 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -6,6 +6,7 @@ use std::borrow::Cow; use std::ops::ControlFlow; use std::{collections::BTreeMap, convert::TryFrom, mem, ops::Index, ops::IndexMut}; +#[cfg(feature = "encoding")] use encoding_rs::{Decoder, DecoderResult, Encoding}; use hashlink::LinkedHashMap; @@ -279,6 +280,7 @@ impl YamlLoader { /// # Returns /// The function must return [`ControlFlow::Continue`] if decoding may continue or /// [`ControlFlow::Break`] if decoding must be aborted. An optional error string may be supplied. +#[cfg(feature = "encoding")] pub type YAMLDecodingTrapFn = fn( malformation_length: u8, bytes_read_after_malformation: u8, @@ -287,6 +289,7 @@ pub type YAMLDecodingTrapFn = fn( ) -> ControlFlow>; /// The behavior [`YamlDecoder`] must have when an decoding error occurs. +#[cfg(feature = "encoding")] #[derive(Copy, Clone, PartialEq, Eq)] pub enum YAMLDecodingTrap { /// Ignore the offending bytes, remove them from the output. @@ -315,11 +318,13 @@ pub enum YAMLDecodingTrap { /// .decode() /// .unwrap(); /// ``` +#[cfg(feature = "encoding")] pub struct YamlDecoder { source: T, trap: YAMLDecodingTrap, } +#[cfg(feature = "encoding")] impl YamlDecoder { /// Create a `YamlDecoder` decoding the given source. pub fn read(source: T) -> YamlDecoder { @@ -358,6 +363,7 @@ impl YamlDecoder { } /// Perform a loop of [`Decoder::decode_to_string`], reallocating `output` if needed. +#[cfg(feature = "encoding")] fn decode_loop( input: &[u8], output: &mut String, @@ -433,6 +439,7 @@ fn decode_loop( /// This allows the encoding to be deduced by the pattern of null (#x00) characters. // /// See spec at +#[cfg(feature = "encoding")] fn detect_utf16_endianness(b: &[u8]) -> &'static Encoding { if b.len() > 1 && (b[0] != b[1]) { if b[0] == 0 { From d618d06061d59647bd97b7dd76d84cfc12af4a0c Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Sat, 30 Mar 2024 19:39:53 +0100 Subject: [PATCH 366/380] Messed up the MSRV. --- saphyr/Cargo.toml | 2 +- saphyr/src/lib.rs | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/saphyr/Cargo.toml b/saphyr/Cargo.toml index f4756c1..6eb3390 100644 --- a/saphyr/Cargo.toml +++ b/saphyr/Cargo.toml @@ -12,7 +12,7 @@ description = "A fully YAML 1.2 compliant YAML parser" repository = "https://github.com/Ethiraric/yaml-rust2" readme = "README.md" edition = "2021" -rust-version = "1.65.0" +rust-version = "1.70.0" [features] default = [ "encoding" ] diff --git a/saphyr/src/lib.rs b/saphyr/src/lib.rs index a171691..d430177 100644 --- a/saphyr/src/lib.rs +++ b/saphyr/src/lib.rs @@ -31,17 +31,19 @@ //! ``` //! //! # Features +//! **Note:** With all features disabled, this crate's MSRV is `1.65.0`. +//! //! #### `encoding` (_enabled by default_) //! Enables encoding-aware decoding of Yaml documents. //! -//! This bumps MSRV up to `1.70.0`. +//! The MSRV for this feature is `1.70.0`. //! //! #### `debug_prints` //! Enables the `debug` module and usage of debug prints in the scanner and the parser. Do not //! enable if you are consuming the crate rather than working on it as this can significantly //! decrease performance. //! -//! This bumps MSRV up to 1.70.0. +//! The MSRV for this feature is `1.70.0`. #![warn(missing_docs, clippy::pedantic)] From 30b713d7a7d9a6e2bcc1ec760d2869c074e4d4f9 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Tue, 2 Apr 2024 18:49:52 +0200 Subject: [PATCH 367/380] `yaml-rust2` -> `saphyr` --- saphyr/.github/workflows/ci.yml | 1 - saphyr/.gitmodules | 3 - saphyr/Cargo.toml | 31 +- saphyr/README.md | 61 +- saphyr/examples/dump_yaml.rs | 2 +- saphyr/justfile | 9 +- saphyr/src/char_traits.rs | 110 - saphyr/src/debug.rs | 41 - saphyr/src/emitter.rs | 6 +- saphyr/src/lib.rs | 41 +- saphyr/src/parser.rs | 1143 --------- saphyr/src/scanner.rs | 2593 --------------------- saphyr/src/yaml.rs | 11 +- saphyr/tests/basic.rs | 209 +- saphyr/tests/emitter.rs | 2 +- saphyr/tests/quickcheck.rs | 4 +- saphyr/tests/scanner.rs | 440 ---- saphyr/tests/spec_test.rs | 79 +- saphyr/tests/test_round_trip.rs | 4 +- saphyr/tests/yaml-test-suite | 1 - saphyr/tests/yaml-test-suite.rs | 295 --- saphyr/tools/README.md | 229 -- saphyr/tools/bench_compare/Cargo.toml | 21 - saphyr/tools/bench_compare/README.md | 120 - saphyr/tools/bench_compare/src/main.rs | 174 -- saphyr/tools/dump_events.rs | 38 - saphyr/tools/gen_large_yaml/Cargo.toml | 20 - saphyr/tools/gen_large_yaml/src/gen.rs | 156 -- saphyr/tools/gen_large_yaml/src/main.rs | 261 --- saphyr/tools/gen_large_yaml/src/nested.rs | 115 - saphyr/tools/run_bench.rs | 71 - saphyr/tools/time_parse.rs | 36 - 32 files changed, 69 insertions(+), 6258 deletions(-) delete mode 100644 saphyr/.gitmodules delete mode 100644 saphyr/src/debug.rs delete mode 100644 saphyr/src/parser.rs delete mode 100644 saphyr/src/scanner.rs delete mode 100644 saphyr/tests/scanner.rs delete mode 160000 saphyr/tests/yaml-test-suite delete mode 100644 saphyr/tests/yaml-test-suite.rs delete mode 100644 saphyr/tools/README.md delete mode 100644 saphyr/tools/bench_compare/Cargo.toml delete mode 100644 saphyr/tools/bench_compare/README.md delete mode 100644 saphyr/tools/bench_compare/src/main.rs delete mode 100644 saphyr/tools/dump_events.rs delete mode 100644 saphyr/tools/gen_large_yaml/Cargo.toml delete mode 100644 saphyr/tools/gen_large_yaml/src/gen.rs delete mode 100644 saphyr/tools/gen_large_yaml/src/main.rs delete mode 100644 saphyr/tools/gen_large_yaml/src/nested.rs delete mode 100644 saphyr/tools/run_bench.rs delete mode 100644 saphyr/tools/time_parse.rs diff --git a/saphyr/.github/workflows/ci.yml b/saphyr/.github/workflows/ci.yml index aa3a0fc..57aa15e 100644 --- a/saphyr/.github/workflows/ci.yml +++ b/saphyr/.github/workflows/ci.yml @@ -31,7 +31,6 @@ jobs: steps: - name: Checkout uses: actions/checkout@v3 - - run: git submodule update --init - run: rustup toolchain install ${{ matrix.rust }} --profile minimal --no-self-update - uses: Swatinem/rust-cache@v2 - name: Run build diff --git a/saphyr/.gitmodules b/saphyr/.gitmodules deleted file mode 100644 index cbc1e88..0000000 --- a/saphyr/.gitmodules +++ /dev/null @@ -1,3 +0,0 @@ -[submodule "tests/yaml-test-suite"] - path = tests/yaml-test-suite - url = https://github.com/yaml/yaml-test-suite/ diff --git a/saphyr/Cargo.toml b/saphyr/Cargo.toml index 6eb3390..4b62419 100644 --- a/saphyr/Cargo.toml +++ b/saphyr/Cargo.toml @@ -1,49 +1,34 @@ [package] -name = "yaml-rust2" -version = "0.8.0" +name = "saphyr" +version = "0.0.1" authors = [ "Yuheng Chen ", "Ethiraric ", "David Aguilar " ] -documentation = "https://docs.rs/yaml-rust2" +documentation = "https://docs.rs/saphyr" +keywords = [ "yaml", "parser" ] +categories = [ "encoding", "parser-implementations" ] license = "MIT OR Apache-2.0" -description = "A fully YAML 1.2 compliant YAML parser" -repository = "https://github.com/Ethiraric/yaml-rust2" +description = "A fully YAML 1.2 compliant YAML library" +repository = "https://github.com/saphyr-rs/saphyr" readme = "README.md" edition = "2021" rust-version = "1.70.0" [features] default = [ "encoding" ] -debug_prints = [] encoding = [ "dep:encoding_rs" ] [dependencies] arraydeque = "0.5.1" +saphyr-parser = "0.0.1" encoding_rs = { version = "0.8.33", optional = true } hashlink = "0.8" [dev-dependencies] -libtest-mimic = "0.3.0" quickcheck = "1.0" [profile.release-lto] inherits = "release" lto = true - -[[test]] -name = "yaml-test-suite" -harness = false - -[[bin]] -name = "dump_events" -path = "tools/dump_events.rs" - -[[bin]] -name = "time_parse" -path = "tools/time_parse.rs" - -[[bin]] -name = "run_bench" -path = "tools/run_bench.rs" diff --git a/saphyr/README.md b/saphyr/README.md index d9f12ad..1b417b9 100644 --- a/saphyr/README.md +++ b/saphyr/README.md @@ -1,29 +1,34 @@ -# yaml-rust2 +# saphyr -[yaml-rust2](https://github.com/Ethiraric/yaml-rust2) is a fully compliant YAML 1.2 -implementation written in pure Rust. +[saphyr](https://github.com/saphyr-rs/saphyr) is a fully compliant YAML 1.2 +library written in pure Rust. This work is based on [`yaml-rust`](https://github.com/chyh1990/yaml-rust) with fixes towards being compliant to the [YAML test suite](https://github.com/yaml/yaml-test-suite/). `yaml-rust`'s parser is heavily influenced by `libyaml` and `yaml-cpp`. -`yaml-rust2` is a pure Rust YAML 1.2 implementation that benefits from the +`saphyr` is a pure Rust YAML 1.2 implementation that benefits from the memory safety and other benefits from the Rust language. ## Quick Start - -Add the following to the Cargo.toml of your project: +### Installing +Add the following to your Cargo.toml: ```toml [dependencies] -yaml-rust2 = "0.8" +saphyr = "0.0.1" +``` +or use `cargo add` to get the latest version automatically: +```sh +cargo add saphyr ``` -Use `yaml_rust2::YamlLoader` to load YAML documents and access them as `Yaml` objects: +### Example +Use `saphyr::YamlLoader` to load YAML documents and access them as `Yaml` objects: ```rust -use yaml_rust2::{YamlLoader, YamlEmitter}; +use saphyr::{YamlLoader, YamlEmitter}; fn main() { let s = @@ -61,7 +66,7 @@ bar: } ``` -Note that `yaml_rust2::Yaml` implements `Index<&'a str>` and `Index`: +Note that `saphyr::Yaml` implements `Index<&'a str>` and `Index`: * `Index` assumes the container is an array * `Index<&'a str>` assumes the container is a string to value map @@ -75,7 +80,6 @@ your objects. * Pure Rust * `Vec`/`HashMap` access API -* Low-level YAML events emission ## Security @@ -85,24 +89,10 @@ communicating with the outside world just by parsing a YAML document. ## Specification Compliance -This implementation is fully compatible with the YAML 1.2 specification. In -order to help with compliance, `yaml-rust2` tests against (and passes) the [YAML -test suite](https://github.com/yaml/yaml-test-suite/). - -## Upgrading from yaml-rust - -You can use `yaml-rust2` as a drop-in replacement for the original `yaml-rust` crate. - -```toml -[dependencies] -yaml-rust = { version = "#.#", package = "yaml-rust2" } -``` - -This `Cargo.toml` declaration allows you to refer to this crate as `yaml_rust` in your code. - -```rust -use yaml_rust::{YamlLoader, YamlEmitter}; -``` +This implementation is fully compatible with the YAML 1.2 specification. The +parser behind this library +([`saphyr-parser`](https://github.com/saphyr-rs/saphyr-parser)) tests against +(and passes) the [YAML test suite](https://github.com/yaml/yaml-test-suite/). ## License @@ -122,10 +112,9 @@ You can find licences in the [`.licenses`](.licenses) subfolder. ## Contribution -[Fork this repository](https://github.com/Ethiraric/yaml-rust2/fork) and -[Create a Pull Request on Github](https://github.com/Ethiraric/yaml-rust2/compare/master...Ethiraric:yaml-rust2:master). +[Fork this repository](https://github.com/saphyr-rs/saphyr/fork) and +[Create a Pull Request on Github](https://github.com/saphyr-rs/saphyr/compare/master...saphyr-rs:saphyr:master). You may need to click on "compare across forks" and select your fork's branch. -Make sure that `Ethiraric` is selected as the base repository, not `chyh1990`. Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in the work by you, as defined in the Apache-2.0 license, shall @@ -133,10 +122,12 @@ be dual licensed as above, without any additional terms or conditions. ## Links -* [yaml-rust2 source code repository](https://github.com/Ethiraric/yaml-rust2) +* [saphyr source code repository](https://github.com/saphyr-rs/saphyr) -* [yaml-rust2 releases on crates.io](https://crates.io/crates/yaml-rust2) +* [saphyr releases on crates.io](https://crates.io/crates/saphyr) -* [yaml-rust2 documentation on docs.rs](https://docs.rs/yaml-rust2/latest/yaml_rust2/) +* [saphyr documentation on docs.rs](https://docs.rs/saphyr/latest/saphyr/) + +* [saphyr-parser releases on crates.io](https://crates.io/crates/saphyr-parser) * [yaml-test-suite](https://github.com/yaml/yaml-test-suite) diff --git a/saphyr/examples/dump_yaml.rs b/saphyr/examples/dump_yaml.rs index 1c3c452..1a9f0f5 100644 --- a/saphyr/examples/dump_yaml.rs +++ b/saphyr/examples/dump_yaml.rs @@ -1,7 +1,7 @@ +use saphyr::yaml; use std::env; use std::fs::File; use std::io::prelude::*; -use yaml_rust2::yaml; fn print_indent(indent: usize) { for _ in 0..indent { diff --git a/saphyr/justfile b/saphyr/justfile index f33ee69..47d601c 100644 --- a/saphyr/justfile +++ b/saphyr/justfile @@ -1,4 +1,5 @@ before_commit: + cargo fmt --check cargo clippy --release --all-targets -- -D warnings cargo clippy --all-targets -- -D warnings cargo build --release --all-targets @@ -6,12 +7,4 @@ before_commit: cargo test cargo test --release cargo test --doc - cargo build --profile=release-lto --package gen_large_yaml --bin gen_large_yaml --manifest-path tools/gen_large_yaml/Cargo.toml RUSTDOCFLAGS="-D warnings" cargo doc --all-features - -ethi_bench: - cargo build --release --all-targets - cd ../Yaml-rust && cargo build --release --all-targets - cd ../serde-yaml/ && cargo build --release --all-targets - cd ../libfyaml/build && ninja - cargo bench_compare run_bench diff --git a/saphyr/src/char_traits.rs b/saphyr/src/char_traits.rs index 82f81bd..c54aff0 100644 --- a/saphyr/src/char_traits.rs +++ b/saphyr/src/char_traits.rs @@ -1,115 +1,5 @@ //! Holds functions to determine if a character belongs to a specific character set. -/// Check whether the character is nil (`\0`). -#[inline] -pub(crate) fn is_z(c: char) -> bool { - c == '\0' -} - -/// Check whether the character is a line break (`\r` or `\n`). -#[inline] -pub(crate) fn is_break(c: char) -> bool { - c == '\n' || c == '\r' -} - -/// Check whether the character is nil or a line break (`\0`, `\r`, `\n`). -#[inline] -pub(crate) fn is_breakz(c: char) -> bool { - is_break(c) || is_z(c) -} - -/// Check whether the character is a whitespace (` ` or `\t`). -#[inline] -pub(crate) fn is_blank(c: char) -> bool { - c == ' ' || c == '\t' -} - -/// Check whether the character is nil, a linebreak or a whitespace. -/// -/// `\0`, ` `, `\t`, `\n`, `\r` -#[inline] -pub(crate) fn is_blank_or_breakz(c: char) -> bool { - is_blank(c) || is_breakz(c) -} - -/// Check whether the character is an ascii digit. -#[inline] -pub(crate) fn is_digit(c: char) -> bool { - c.is_ascii_digit() -} - -/// Check whether the character is a digit, letter, `_` or `-`. -#[inline] -pub(crate) fn is_alpha(c: char) -> bool { - matches!(c, '0'..='9' | 'a'..='z' | 'A'..='Z' | '_' | '-') -} - -/// Check whether the character is a hexadecimal character (case insensitive). -#[inline] -pub(crate) fn is_hex(c: char) -> bool { - c.is_ascii_digit() || ('a'..='f').contains(&c) || ('A'..='F').contains(&c) -} - -/// Convert the hexadecimal digit to an integer. -#[inline] -pub(crate) fn as_hex(c: char) -> u32 { - match c { - '0'..='9' => (c as u32) - ('0' as u32), - 'a'..='f' => (c as u32) - ('a' as u32) + 10, - 'A'..='F' => (c as u32) - ('A' as u32) + 10, - _ => unreachable!(), - } -} - -/// Check whether the character is a YAML flow character (one of `,[]{}`). -#[inline] -pub(crate) fn is_flow(c: char) -> bool { - matches!(c, ',' | '[' | ']' | '{' | '}') -} - -/// Check whether the character is the BOM character. -#[inline] -pub(crate) fn is_bom(c: char) -> bool { - c == '\u{FEFF}' -} - -/// Check whether the character is a YAML non-breaking character. -#[inline] -pub(crate) fn is_yaml_non_break(c: char) -> bool { - // TODO(ethiraric, 28/12/2023): is_printable - !is_break(c) && !is_bom(c) -} - -/// Check whether the character is NOT a YAML whitespace (` ` / `\t`). -#[inline] -pub(crate) fn is_yaml_non_space(c: char) -> bool { - is_yaml_non_break(c) && !is_blank(c) -} - -/// Check whether the character is a valid YAML anchor name character. -#[inline] -pub(crate) fn is_anchor_char(c: char) -> bool { - is_yaml_non_space(c) && !is_flow(c) && !is_z(c) -} - -/// Check whether the character is a valid word character. -#[inline] -pub(crate) fn is_word_char(c: char) -> bool { - is_alpha(c) && c != '_' -} - -/// Check whether the character is a valid URI character. -#[inline] -pub(crate) fn is_uri_char(c: char) -> bool { - is_word_char(c) || "#;/?:@&=+$,_.!~*\'()[]%".contains(c) -} - -/// Check whether the character is a valid tag character. -#[inline] -pub(crate) fn is_tag_char(c: char) -> bool { - is_uri_char(c) && !is_flow(c) && c != '!' -} - /// Check if the string can be expressed a valid literal block scalar. /// The YAML spec supports all of the following in block literals except `#xFEFF`: /// ```no_compile diff --git a/saphyr/src/debug.rs b/saphyr/src/debug.rs deleted file mode 100644 index c1411cb..0000000 --- a/saphyr/src/debug.rs +++ /dev/null @@ -1,41 +0,0 @@ -//! Debugging helpers. -//! -//! Debugging is governed by two conditions: -//! 1. The build mode. Debugging code is not emitted in release builds and thus not available. -//! 2. The `YAMLALL_DEBUG` environment variable. If built in debug mode, the program must be fed -//! the `YAMLALL_DEBUG` variable in its environment. While debugging code is present in debug -//! build, debug helpers will only trigger if that variable is set when running the program. - -// If a debug build, use stuff in the debug submodule. -#[cfg(feature = "debug_prints")] -pub use debug::enabled; - -// Otherwise, just export dummies for publicly visible functions. -/// Evaluates to nothing. -#[cfg(not(feature = "debug_prints"))] -macro_rules! debug_print { - ($($arg:tt)*) => {{}}; -} - -#[cfg(feature = "debug_prints")] -#[macro_use] -#[allow(clippy::module_inception)] -mod debug { - use std::sync::OnceLock; - - /// If debugging is [`enabled`], print the format string on the error output. - macro_rules! debug_print { - ($($arg:tt)*) => {{ - if $crate::debug::enabled() { - eprintln!($($arg)*) - } - }}; - } - - /// Return whether debugging features are enabled in this execution. - #[cfg(debug_assertions)] - pub fn enabled() -> bool { - static ENABLED: OnceLock = OnceLock::new(); - *ENABLED.get_or_init(|| std::env::var("YAMLRUST2_DEBUG").is_ok()) - } -} diff --git a/saphyr/src/emitter.rs b/saphyr/src/emitter.rs index 48c8b5c..19d8d4a 100644 --- a/saphyr/src/emitter.rs +++ b/saphyr/src/emitter.rs @@ -36,7 +36,7 @@ impl From for EmitError { /// The YAML serializer. /// /// ``` -/// # use yaml_rust2::{YamlLoader, YamlEmitter}; +/// # use saphyr::{YamlLoader, YamlEmitter}; /// let input_string = "a: b\nc: d"; /// let yaml = YamlLoader::load_from_str(input_string).unwrap(); /// @@ -142,6 +142,8 @@ impl<'a> YamlEmitter<'a> { /// In this form, blocks cannot have any properties (such as anchors /// or tags), which should be OK, because this emitter doesn't /// (currently) emit those anyways. + /// + /// TODO(ethiraric, 2024/04/02): We can support those now. pub fn compact(&mut self, compact: bool) { self.compact = compact; } @@ -157,7 +159,7 @@ impl<'a> YamlEmitter<'a> { /// # Examples /// /// ```rust - /// use yaml_rust2::{Yaml, YamlEmitter, YamlLoader}; + /// use saphyr::{Yaml, YamlEmitter, YamlLoader}; /// /// let input = r#"{foo: "bar!\nbar!", baz: 42}"#; /// let parsed = YamlLoader::load_from_str(input).unwrap(); diff --git a/saphyr/src/lib.rs b/saphyr/src/lib.rs index d430177..aaed759 100644 --- a/saphyr/src/lib.rs +++ b/saphyr/src/lib.rs @@ -6,19 +6,22 @@ //! //! # Usage //! -//! This crate is [on github](https://github.com/Ethiraric/yaml-rust2) and can be used by adding -//! `yaml-rust2` to the dependencies in your project's `Cargo.toml`. -//! +//! This crate is [on github](https://github.com/saphyr-rs/saphyr) and can be used by adding +//! `saphyr` to the dependencies in your project's `Cargo.toml`. //! ```toml //! [dependencies] -//! yaml-rust2 = "0.8.0" +//! saphyr = "0.0.1" +//! ``` +//! or by using `cargo add` to get the latest version: +//! ```sh +//! cargo add saphyr //! ``` //! //! # Examples //! Parse a string into `Vec` and then serialize it as a YAML string. //! //! ``` -//! use yaml_rust2::{YamlLoader, YamlEmitter}; +//! use saphyr::{YamlLoader, YamlEmitter}; //! //! let docs = YamlLoader::load_from_str("[1, 2, 3]").unwrap(); //! let doc = &docs[0]; // select the first YAML document @@ -37,28 +40,20 @@ //! Enables encoding-aware decoding of Yaml documents. //! //! The MSRV for this feature is `1.70.0`. -//! -//! #### `debug_prints` -//! Enables the `debug` module and usage of debug prints in the scanner and the parser. Do not -//! enable if you are consuming the crate rather than working on it as this can significantly -//! decrease performance. -//! -//! The MSRV for this feature is `1.70.0`. #![warn(missing_docs, clippy::pedantic)] -extern crate hashlink; - pub(crate) mod char_traits; -#[macro_use] -pub(crate) mod debug; pub mod emitter; -pub mod parser; -pub mod scanner; pub mod yaml; -// reexport key APIs -pub use crate::emitter::{EmitError, YamlEmitter}; -pub use crate::parser::Event; -pub use crate::scanner::ScanError; -pub use crate::yaml::{Yaml, YamlLoader}; +// Re-export main components. +pub use crate::emitter::YamlEmitter; +pub use crate::yaml::{Array, Hash, Yaml, YamlLoader}; + +#[cfg(feature = "encoding")] +pub use crate::yaml::{YAMLDecodingTrap, YAMLDecodingTrapFn, YamlDecoder}; + +// Re-export `ScanError` as it is used as part of our public API and we want consumers to be able +// to inspect it (e.g. perform a `match`). They wouldn't be able without it. +pub use saphyr_parser::ScanError; diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs deleted file mode 100644 index 59869a2..0000000 --- a/saphyr/src/parser.rs +++ /dev/null @@ -1,1143 +0,0 @@ -//! Home to the YAML Parser. -//! -//! The parser takes input from the [`crate::scanner::Scanner`], performs final checks for YAML -//! compliance, and emits a stream of tokens that can be used by the [`crate::YamlLoader`] to -//! construct the [`crate::Yaml`] object. - -use crate::scanner::{Marker, ScanError, Scanner, TScalarStyle, Token, TokenType}; -use std::collections::HashMap; - -#[derive(Clone, Copy, PartialEq, Debug, Eq)] -enum State { - /// We await the start of the stream. - StreamStart, - ImplicitDocumentStart, - DocumentStart, - DocumentContent, - DocumentEnd, - BlockNode, - // BlockNodeOrIndentlessSequence, - // FlowNode, - BlockSequenceFirstEntry, - BlockSequenceEntry, - IndentlessSequenceEntry, - BlockMappingFirstKey, - BlockMappingKey, - BlockMappingValue, - FlowSequenceFirstEntry, - FlowSequenceEntry, - FlowSequenceEntryMappingKey, - FlowSequenceEntryMappingValue, - FlowSequenceEntryMappingEnd, - FlowMappingFirstKey, - FlowMappingKey, - FlowMappingValue, - FlowMappingEmptyValue, - End, -} - -/// An event generated by the YAML parser. -/// -/// Events are used in the low-level event-based API (push parser). The API entrypoint is the -/// [`EventReceiver`] trait. -#[derive(Clone, PartialEq, Debug, Eq)] -pub enum Event { - /// Reserved for internal use. - Nothing, - /// Event generated at the very beginning of parsing. - StreamStart, - /// Last event that will be generated by the parser. Signals EOF. - StreamEnd, - /// The YAML start document directive (`---`). - DocumentStart, - /// The YAML end document directive (`...`). - DocumentEnd, - /// A YAML Alias. - Alias( - /// The anchor ID the alias refers to. - usize, - ), - /// Value, style, anchor_id, tag - Scalar(String, TScalarStyle, usize, Option), - /// The start of a YAML sequence (array). - SequenceStart( - /// The anchor ID of the start of the sequence. - usize, - /// An optional tag - Option, - ), - /// The end of a YAML sequence (array). - SequenceEnd, - /// The start of a YAML mapping (object, hash). - MappingStart( - /// The anchor ID of the start of the mapping. - usize, - /// An optional tag - Option, - ), - /// The end of a YAML mapping (object, hash). - MappingEnd, -} - -/// A YAML tag. -#[derive(Clone, PartialEq, Debug, Eq)] -pub struct Tag { - /// Handle of the tag (`!` included). - pub handle: String, - /// The suffix of the tag. - pub suffix: String, -} - -impl Event { - /// Create an empty scalar. - fn empty_scalar() -> Event { - // a null scalar - Event::Scalar("~".to_owned(), TScalarStyle::Plain, 0, None) - } - - /// Create an empty scalar with the given anchor. - fn empty_scalar_with_anchor(anchor: usize, tag: Option) -> Event { - Event::Scalar(String::new(), TScalarStyle::Plain, anchor, tag) - } -} - -/// A YAML parser. -#[derive(Debug)] -pub struct Parser { - scanner: Scanner, - states: Vec, - state: State, - token: Option, - current: Option<(Event, Marker)>, - anchors: HashMap, - anchor_id: usize, - /// The tag directives (`%TAG`) the parser has encountered. - /// - /// Key is the handle, and value is the prefix. - tags: HashMap, - /// Make tags global across all documents. - keep_tags: bool, -} - -/// Trait to be implemented in order to use the low-level parsing API. -/// -/// The low-level parsing API is event-based (a push parser), calling [`EventReceiver::on_event`] -/// for each YAML [`Event`] that occurs. -/// The [`EventReceiver`] trait only receives events. In order to receive both events and their -/// location in the source, use [`MarkedEventReceiver`]. Note that [`EventReceiver`]s implement -/// [`MarkedEventReceiver`] automatically. -/// -/// # Event hierarchy -/// The event stream starts with an [`Event::StreamStart`] event followed by an -/// [`Event::DocumentStart`] event. If the YAML document starts with a mapping (an object), an -/// [`Event::MappingStart`] event is emitted. If it starts with a sequence (an array), an -/// [`Event::SequenceStart`] event is emitted. Otherwise, an [`Event::Scalar`] event is emitted. -/// -/// In a mapping, key-values are sent as consecutive events. The first event after an -/// [`Event::MappingStart`] will be the key, and following its value. If the mapping contains no -/// sub-mapping or sub-sequence, then even events (starting from 0) will always be keys and odd -/// ones will always be values. The mapping ends when an [`Event::MappingEnd`] event is received. -/// -/// In a sequence, values are sent consecutively until the [`Event::SequenceEnd`] event. -/// -/// If a value is a sub-mapping or a sub-sequence, an [`Event::MappingStart`] or -/// [`Event::SequenceStart`] event will be sent respectively. Following events until the associated -/// [`Event::MappingStart`] or [`Event::SequenceEnd`] (beware of nested mappings or sequences) will -/// be part of the value and not another key-value pair or element in the sequence. -/// -/// For instance, the following yaml: -/// ```yaml -/// a: b -/// c: -/// d: e -/// f: -/// - g -/// - h -/// ``` -/// will emit (indented and commented for lisibility): -/// ```text -/// StreamStart, DocumentStart, MappingStart, -/// Scalar("a", ..), Scalar("b", ..) -/// Scalar("c", ..), MappingStart, Scalar("d", ..), Scalar("e", ..), MappingEnd, -/// Scalar("f", ..), SequenceStart, Scalar("g", ..), Scalar("h", ..), SequenceEnd, -/// MappingEnd, DocumentEnd, StreamEnd -/// ``` -/// -/// # Example -/// ``` -/// # use yaml_rust2::parser::{Event, EventReceiver, Parser}; -/// # -/// /// Sink of events. Collects them into an array. -/// struct EventSink { -/// events: Vec, -/// } -/// -/// /// Implement `on_event`, pushing into `self.events`. -/// impl EventReceiver for EventSink { -/// fn on_event(&mut self, ev: Event) { -/// self.events.push(ev); -/// } -/// } -/// -/// /// Load events from a yaml string. -/// fn str_to_events(yaml: &str) -> Vec { -/// let mut sink = EventSink { events: Vec::new() }; -/// let mut parser = Parser::new_from_str(yaml); -/// // Load events using our sink as the receiver. -/// parser.load(&mut sink, true).unwrap(); -/// sink.events -/// } -/// ``` -pub trait EventReceiver { - /// Handler called for each YAML event that is emitted by the parser. - fn on_event(&mut self, ev: Event); -} - -/// Trait to be implemented for using the low-level parsing API. -/// -/// Functionally similar to [`EventReceiver`], but receives a [`Marker`] as well as the event. -pub trait MarkedEventReceiver { - /// Handler called for each event that occurs. - fn on_event(&mut self, ev: Event, _mark: Marker); -} - -impl MarkedEventReceiver for R { - fn on_event(&mut self, ev: Event, _mark: Marker) { - self.on_event(ev); - } -} - -/// A convenience alias for a `Result` of a parser event. -pub type ParseResult = Result<(Event, Marker), ScanError>; - -impl<'a> Parser> { - /// Create a new instance of a parser from a &str. - #[must_use] - pub fn new_from_str(value: &'a str) -> Self { - Parser::new(value.chars()) - } -} - -impl> Parser { - /// Create a new instance of a parser from the given input of characters. - pub fn new(src: T) -> Parser { - Parser { - scanner: Scanner::new(src), - states: Vec::new(), - state: State::StreamStart, - token: None, - current: None, - - anchors: HashMap::new(), - // valid anchor_id starts from 1 - anchor_id: 1, - tags: HashMap::new(), - keep_tags: false, - } - } - - /// Whether to keep tags across multiple documents when parsing. - /// - /// This behavior is non-standard as per the YAML specification but can be encountered in the - /// wild. This boolean allows enabling this non-standard extension. This would result in the - /// parser accepting input from [test - /// QLJ7](https://github.com/yaml/yaml-test-suite/blob/ccfa74e56afb53da960847ff6e6976c0a0825709/src/QLJ7.yaml) - /// of the yaml-test-suite: - /// - /// ```yaml - /// %TAG !prefix! tag:example.com,2011: - /// --- !prefix!A - /// a: b - /// --- !prefix!B - /// c: d - /// --- !prefix!C - /// e: f - /// ``` - /// - /// With `keep_tags` set to `false`, the above YAML is rejected. As per the specification, tags - /// only apply to the document immediately following them. This would error on `!prefix!B`. - /// - /// With `keep_tags` set to `true`, the above YAML is accepted by the parser. - #[must_use] - pub fn keep_tags(mut self, value: bool) -> Self { - self.keep_tags = value; - self - } - - /// Try to load the next event and return it, but do not consuming it from `self`. - /// - /// Any subsequent call to [`Parser::peek`] will return the same value, until a call to - /// [`Iterator::next`] or [`Parser::load`]. - /// # Errors - /// Returns `ScanError` when loading the next event fails. - pub fn peek(&mut self) -> Result<&(Event, Marker), ScanError> { - if let Some(ref x) = self.current { - Ok(x) - } else { - self.current = Some(self.next_token()?); - self.peek() - } - } - - /// Try to load the next event and return it, consuming it from `self`. - /// # Errors - /// Returns `ScanError` when loading the next event fails. - pub fn next_token(&mut self) -> ParseResult { - match self.current.take() { - None => self.parse(), - Some(v) => Ok(v), - } - } - - /// Peek at the next token from the scanner. - fn peek_token(&mut self) -> Result<&Token, ScanError> { - match self.token { - None => { - self.token = Some(self.scan_next_token()?); - Ok(self.token.as_ref().unwrap()) - } - Some(ref tok) => Ok(tok), - } - } - - /// Extract and return the next token from the scanner. - /// - /// This function does _not_ make use of `self.token`. - fn scan_next_token(&mut self) -> Result { - let token = self.scanner.next(); - match token { - None => match self.scanner.get_error() { - None => Err(ScanError::new(self.scanner.mark(), "unexpected eof")), - Some(e) => Err(e), - }, - Some(tok) => Ok(tok), - } - } - - fn fetch_token(&mut self) -> Token { - self.token - .take() - .expect("fetch_token needs to be preceded by peek_token") - } - - /// Skip the next token from the scanner. - fn skip(&mut self) { - self.token = None; - //self.peek_token(); - } - /// Pops the top-most state and make it the current state. - fn pop_state(&mut self) { - self.state = self.states.pop().unwrap(); - } - /// Push a new state atop the state stack. - fn push_state(&mut self, state: State) { - self.states.push(state); - } - - fn parse(&mut self) -> ParseResult { - if self.state == State::End { - return Ok((Event::StreamEnd, self.scanner.mark())); - } - let (ev, mark) = self.state_machine()?; - // println!("EV {:?}", ev); - Ok((ev, mark)) - } - - /// Load the YAML from the stream in `self`, pushing events into `recv`. - /// - /// The contents of the stream are parsed and the corresponding events are sent into the - /// recveiver. For detailed explanations about how events work, see [`EventReceiver`]. - /// - /// If `multi` is set to `true`, the parser will allow parsing of multiple YAML documents - /// inside the stream. - /// - /// Note that any [`EventReceiver`] is also a [`MarkedEventReceiver`], so implementing the - /// former is enough to call this function. - /// # Errors - /// Returns `ScanError` when loading fails. - pub fn load( - &mut self, - recv: &mut R, - multi: bool, - ) -> Result<(), ScanError> { - if !self.scanner.stream_started() { - let (ev, mark) = self.next_token()?; - if ev != Event::StreamStart { - return Err(ScanError::new(mark, "did not find expected ")); - } - recv.on_event(ev, mark); - } - - if self.scanner.stream_ended() { - // XXX has parsed? - recv.on_event(Event::StreamEnd, self.scanner.mark()); - return Ok(()); - } - loop { - let (ev, mark) = self.next_token()?; - if ev == Event::StreamEnd { - recv.on_event(ev, mark); - return Ok(()); - } - // clear anchors before a new document - self.anchors.clear(); - self.load_document(ev, mark, recv)?; - if !multi { - break; - } - } - Ok(()) - } - - fn load_document( - &mut self, - first_ev: Event, - mark: Marker, - recv: &mut R, - ) -> Result<(), ScanError> { - if first_ev != Event::DocumentStart { - return Err(ScanError::new( - mark, - "did not find expected ", - )); - } - recv.on_event(first_ev, mark); - - let (ev, mark) = self.next_token()?; - self.load_node(ev, mark, recv)?; - - // DOCUMENT-END is expected. - let (ev, mark) = self.next_token()?; - assert_eq!(ev, Event::DocumentEnd); - recv.on_event(ev, mark); - - Ok(()) - } - - fn load_node( - &mut self, - first_ev: Event, - mark: Marker, - recv: &mut R, - ) -> Result<(), ScanError> { - match first_ev { - Event::Alias(..) | Event::Scalar(..) => { - recv.on_event(first_ev, mark); - Ok(()) - } - Event::SequenceStart(..) => { - recv.on_event(first_ev, mark); - self.load_sequence(recv) - } - Event::MappingStart(..) => { - recv.on_event(first_ev, mark); - self.load_mapping(recv) - } - _ => { - println!("UNREACHABLE EVENT: {first_ev:?}"); - unreachable!(); - } - } - } - - fn load_mapping(&mut self, recv: &mut R) -> Result<(), ScanError> { - let (mut key_ev, mut key_mark) = self.next_token()?; - while key_ev != Event::MappingEnd { - // key - self.load_node(key_ev, key_mark, recv)?; - - // value - let (ev, mark) = self.next_token()?; - self.load_node(ev, mark, recv)?; - - // next event - let (ev, mark) = self.next_token()?; - key_ev = ev; - key_mark = mark; - } - recv.on_event(key_ev, key_mark); - Ok(()) - } - - fn load_sequence(&mut self, recv: &mut R) -> Result<(), ScanError> { - let (mut ev, mut mark) = self.next_token()?; - while ev != Event::SequenceEnd { - self.load_node(ev, mark, recv)?; - - // next event - let (next_ev, next_mark) = self.next_token()?; - ev = next_ev; - mark = next_mark; - } - recv.on_event(ev, mark); - Ok(()) - } - - fn state_machine(&mut self) -> ParseResult { - // let next_tok = self.peek_token().cloned()?; - // println!("cur_state {:?}, next tok: {:?}", self.state, next_tok); - debug_print!("\n\x1B[;33mParser state: {:?} \x1B[;0m", self.state); - - match self.state { - State::StreamStart => self.stream_start(), - - State::ImplicitDocumentStart => self.document_start(true), - State::DocumentStart => self.document_start(false), - State::DocumentContent => self.document_content(), - State::DocumentEnd => self.document_end(), - - State::BlockNode => self.parse_node(true, false), - // State::BlockNodeOrIndentlessSequence => self.parse_node(true, true), - // State::FlowNode => self.parse_node(false, false), - State::BlockMappingFirstKey => self.block_mapping_key(true), - State::BlockMappingKey => self.block_mapping_key(false), - State::BlockMappingValue => self.block_mapping_value(), - - State::BlockSequenceFirstEntry => self.block_sequence_entry(true), - State::BlockSequenceEntry => self.block_sequence_entry(false), - - State::FlowSequenceFirstEntry => self.flow_sequence_entry(true), - State::FlowSequenceEntry => self.flow_sequence_entry(false), - - State::FlowMappingFirstKey => self.flow_mapping_key(true), - State::FlowMappingKey => self.flow_mapping_key(false), - State::FlowMappingValue => self.flow_mapping_value(false), - - State::IndentlessSequenceEntry => self.indentless_sequence_entry(), - - State::FlowSequenceEntryMappingKey => self.flow_sequence_entry_mapping_key(), - State::FlowSequenceEntryMappingValue => self.flow_sequence_entry_mapping_value(), - State::FlowSequenceEntryMappingEnd => self.flow_sequence_entry_mapping_end(), - State::FlowMappingEmptyValue => self.flow_mapping_value(true), - - /* impossible */ - State::End => unreachable!(), - } - } - - fn stream_start(&mut self) -> ParseResult { - match *self.peek_token()? { - Token(mark, TokenType::StreamStart(_)) => { - self.state = State::ImplicitDocumentStart; - self.skip(); - Ok((Event::StreamStart, mark)) - } - Token(mark, _) => Err(ScanError::new(mark, "did not find expected ")), - } - } - - fn document_start(&mut self, implicit: bool) -> ParseResult { - while let TokenType::DocumentEnd = self.peek_token()?.1 { - self.skip(); - } - - match *self.peek_token()? { - Token(mark, TokenType::StreamEnd) => { - self.state = State::End; - self.skip(); - Ok((Event::StreamEnd, mark)) - } - Token( - _, - TokenType::VersionDirective(..) - | TokenType::TagDirective(..) - | TokenType::DocumentStart, - ) => { - // explicit document - self.explicit_document_start() - } - Token(mark, _) if implicit => { - self.parser_process_directives()?; - self.push_state(State::DocumentEnd); - self.state = State::BlockNode; - Ok((Event::DocumentStart, mark)) - } - _ => { - // explicit document - self.explicit_document_start() - } - } - } - - fn parser_process_directives(&mut self) -> Result<(), ScanError> { - let mut version_directive_received = false; - loop { - let mut tags = HashMap::new(); - match self.peek_token()? { - Token(mark, TokenType::VersionDirective(_, _)) => { - // XXX parsing with warning according to spec - //if major != 1 || minor > 2 { - // return Err(ScanError::new(tok.0, - // "found incompatible YAML document")); - //} - if version_directive_received { - return Err(ScanError::new(*mark, "duplicate version directive")); - } - version_directive_received = true; - } - Token(mark, TokenType::TagDirective(handle, prefix)) => { - if tags.contains_key(handle) { - return Err(ScanError::new(*mark, "the TAG directive must only be given at most once per handle in the same document")); - } - tags.insert(handle.to_string(), prefix.to_string()); - } - _ => break, - } - self.tags = tags; - self.skip(); - } - Ok(()) - } - - fn explicit_document_start(&mut self) -> ParseResult { - self.parser_process_directives()?; - match *self.peek_token()? { - Token(mark, TokenType::DocumentStart) => { - self.push_state(State::DocumentEnd); - self.state = State::DocumentContent; - self.skip(); - Ok((Event::DocumentStart, mark)) - } - Token(mark, _) => Err(ScanError::new( - mark, - "did not find expected ", - )), - } - } - - fn document_content(&mut self) -> ParseResult { - match *self.peek_token()? { - Token( - mark, - TokenType::VersionDirective(..) - | TokenType::TagDirective(..) - | TokenType::DocumentStart - | TokenType::DocumentEnd - | TokenType::StreamEnd, - ) => { - self.pop_state(); - // empty scalar - Ok((Event::empty_scalar(), mark)) - } - _ => self.parse_node(true, false), - } - } - - fn document_end(&mut self) -> ParseResult { - let mut explicit_end = false; - let marker: Marker = match *self.peek_token()? { - Token(mark, TokenType::DocumentEnd) => { - explicit_end = true; - self.skip(); - mark - } - Token(mark, _) => mark, - }; - - if !self.keep_tags { - self.tags.clear(); - } - if explicit_end { - self.state = State::ImplicitDocumentStart; - } else { - if let Token(mark, TokenType::VersionDirective(..) | TokenType::TagDirective(..)) = - *self.peek_token()? - { - return Err(ScanError::new( - mark, - "missing explicit document end marker before directive", - )); - } - self.state = State::DocumentStart; - } - - Ok((Event::DocumentEnd, marker)) - } - - fn register_anchor(&mut self, name: String, _: &Marker) -> usize { - // anchors can be overridden/reused - // if self.anchors.contains_key(name) { - // return Err(ScanError::new(*mark, - // "while parsing anchor, found duplicated anchor")); - // } - let new_id = self.anchor_id; - self.anchor_id += 1; - self.anchors.insert(name, new_id); - new_id - } - - fn parse_node(&mut self, block: bool, indentless_sequence: bool) -> ParseResult { - let mut anchor_id = 0; - let mut tag = None; - match *self.peek_token()? { - Token(_, TokenType::Alias(_)) => { - self.pop_state(); - if let Token(mark, TokenType::Alias(name)) = self.fetch_token() { - match self.anchors.get(&name) { - None => { - return Err(ScanError::new( - mark, - "while parsing node, found unknown anchor", - )) - } - Some(id) => return Ok((Event::Alias(*id), mark)), - } - } - unreachable!() - } - Token(_, TokenType::Anchor(_)) => { - if let Token(mark, TokenType::Anchor(name)) = self.fetch_token() { - anchor_id = self.register_anchor(name, &mark); - if let TokenType::Tag(..) = self.peek_token()?.1 { - if let TokenType::Tag(handle, suffix) = self.fetch_token().1 { - tag = Some(self.resolve_tag(mark, &handle, suffix)?); - } else { - unreachable!() - } - } - } else { - unreachable!() - } - } - Token(mark, TokenType::Tag(..)) => { - if let TokenType::Tag(handle, suffix) = self.fetch_token().1 { - tag = Some(self.resolve_tag(mark, &handle, suffix)?); - if let TokenType::Anchor(_) = &self.peek_token()?.1 { - if let Token(mark, TokenType::Anchor(name)) = self.fetch_token() { - anchor_id = self.register_anchor(name, &mark); - } else { - unreachable!() - } - } - } else { - unreachable!() - } - } - _ => {} - } - match *self.peek_token()? { - Token(mark, TokenType::BlockEntry) if indentless_sequence => { - self.state = State::IndentlessSequenceEntry; - Ok((Event::SequenceStart(anchor_id, tag), mark)) - } - Token(_, TokenType::Scalar(..)) => { - self.pop_state(); - if let Token(mark, TokenType::Scalar(style, v)) = self.fetch_token() { - Ok((Event::Scalar(v, style, anchor_id, tag), mark)) - } else { - unreachable!() - } - } - Token(mark, TokenType::FlowSequenceStart) => { - self.state = State::FlowSequenceFirstEntry; - Ok((Event::SequenceStart(anchor_id, tag), mark)) - } - Token(mark, TokenType::FlowMappingStart) => { - self.state = State::FlowMappingFirstKey; - Ok((Event::MappingStart(anchor_id, tag), mark)) - } - Token(mark, TokenType::BlockSequenceStart) if block => { - self.state = State::BlockSequenceFirstEntry; - Ok((Event::SequenceStart(anchor_id, tag), mark)) - } - Token(mark, TokenType::BlockMappingStart) if block => { - self.state = State::BlockMappingFirstKey; - Ok((Event::MappingStart(anchor_id, tag), mark)) - } - // ex 7.2, an empty scalar can follow a secondary tag - Token(mark, _) if tag.is_some() || anchor_id > 0 => { - self.pop_state(); - Ok((Event::empty_scalar_with_anchor(anchor_id, tag), mark)) - } - Token(mark, _) => Err(ScanError::new( - mark, - "while parsing a node, did not find expected node content", - )), - } - } - - fn block_mapping_key(&mut self, first: bool) -> ParseResult { - // skip BlockMappingStart - if first { - let _ = self.peek_token()?; - //self.marks.push(tok.0); - self.skip(); - } - match *self.peek_token()? { - Token(_, TokenType::Key) => { - self.skip(); - if let Token(mark, TokenType::Key | TokenType::Value | TokenType::BlockEnd) = - *self.peek_token()? - { - self.state = State::BlockMappingValue; - // empty scalar - Ok((Event::empty_scalar(), mark)) - } else { - self.push_state(State::BlockMappingValue); - self.parse_node(true, true) - } - } - // XXX(chenyh): libyaml failed to parse spec 1.2, ex8.18 - Token(mark, TokenType::Value) => { - self.state = State::BlockMappingValue; - Ok((Event::empty_scalar(), mark)) - } - Token(mark, TokenType::BlockEnd) => { - self.pop_state(); - self.skip(); - Ok((Event::MappingEnd, mark)) - } - Token(mark, _) => Err(ScanError::new( - mark, - "while parsing a block mapping, did not find expected key", - )), - } - } - - fn block_mapping_value(&mut self) -> ParseResult { - match *self.peek_token()? { - Token(_, TokenType::Value) => { - self.skip(); - if let Token(mark, TokenType::Key | TokenType::Value | TokenType::BlockEnd) = - *self.peek_token()? - { - self.state = State::BlockMappingKey; - // empty scalar - Ok((Event::empty_scalar(), mark)) - } else { - self.push_state(State::BlockMappingKey); - self.parse_node(true, true) - } - } - Token(mark, _) => { - self.state = State::BlockMappingKey; - // empty scalar - Ok((Event::empty_scalar(), mark)) - } - } - } - - fn flow_mapping_key(&mut self, first: bool) -> ParseResult { - if first { - let _ = self.peek_token()?; - self.skip(); - } - let marker: Marker = { - match *self.peek_token()? { - Token(mark, TokenType::FlowMappingEnd) => mark, - Token(mark, _) => { - if !first { - match *self.peek_token()? { - Token(_, TokenType::FlowEntry) => self.skip(), - Token(mark, _) => return Err(ScanError::new( - mark, - "while parsing a flow mapping, did not find expected ',' or '}'", - )), - } - } - - match *self.peek_token()? { - Token(_, TokenType::Key) => { - self.skip(); - if let Token( - mark, - TokenType::Value | TokenType::FlowEntry | TokenType::FlowMappingEnd, - ) = *self.peek_token()? - { - self.state = State::FlowMappingValue; - return Ok((Event::empty_scalar(), mark)); - } - self.push_state(State::FlowMappingValue); - return self.parse_node(false, false); - } - Token(marker, TokenType::Value) => { - self.state = State::FlowMappingValue; - return Ok((Event::empty_scalar(), marker)); - } - Token(_, TokenType::FlowMappingEnd) => (), - _ => { - self.push_state(State::FlowMappingEmptyValue); - return self.parse_node(false, false); - } - } - - mark - } - } - }; - - self.pop_state(); - self.skip(); - Ok((Event::MappingEnd, marker)) - } - - fn flow_mapping_value(&mut self, empty: bool) -> ParseResult { - let mark: Marker = { - if empty { - let Token(mark, _) = *self.peek_token()?; - self.state = State::FlowMappingKey; - return Ok((Event::empty_scalar(), mark)); - } - match *self.peek_token()? { - Token(marker, TokenType::Value) => { - self.skip(); - match self.peek_token()?.1 { - TokenType::FlowEntry | TokenType::FlowMappingEnd => {} - _ => { - self.push_state(State::FlowMappingKey); - return self.parse_node(false, false); - } - } - marker - } - Token(marker, _) => marker, - } - }; - - self.state = State::FlowMappingKey; - Ok((Event::empty_scalar(), mark)) - } - - fn flow_sequence_entry(&mut self, first: bool) -> ParseResult { - // skip FlowMappingStart - if first { - let _ = self.peek_token()?; - //self.marks.push(tok.0); - self.skip(); - } - match *self.peek_token()? { - Token(mark, TokenType::FlowSequenceEnd) => { - self.pop_state(); - self.skip(); - return Ok((Event::SequenceEnd, mark)); - } - Token(_, TokenType::FlowEntry) if !first => { - self.skip(); - } - Token(mark, _) if !first => { - return Err(ScanError::new( - mark, - "while parsing a flow sequence, expected ',' or ']'", - )); - } - _ => { /* next */ } - } - match *self.peek_token()? { - Token(mark, TokenType::FlowSequenceEnd) => { - self.pop_state(); - self.skip(); - Ok((Event::SequenceEnd, mark)) - } - Token(mark, TokenType::Key) => { - self.state = State::FlowSequenceEntryMappingKey; - self.skip(); - Ok((Event::MappingStart(0, None), mark)) - } - _ => { - self.push_state(State::FlowSequenceEntry); - self.parse_node(false, false) - } - } - } - - fn indentless_sequence_entry(&mut self) -> ParseResult { - match *self.peek_token()? { - Token(_, TokenType::BlockEntry) => (), - Token(mark, _) => { - self.pop_state(); - return Ok((Event::SequenceEnd, mark)); - } - } - self.skip(); - if let Token( - mark, - TokenType::BlockEntry | TokenType::Key | TokenType::Value | TokenType::BlockEnd, - ) = *self.peek_token()? - { - self.state = State::IndentlessSequenceEntry; - Ok((Event::empty_scalar(), mark)) - } else { - self.push_state(State::IndentlessSequenceEntry); - self.parse_node(true, false) - } - } - - fn block_sequence_entry(&mut self, first: bool) -> ParseResult { - // BLOCK-SEQUENCE-START - if first { - let _ = self.peek_token()?; - //self.marks.push(tok.0); - self.skip(); - } - match *self.peek_token()? { - Token(mark, TokenType::BlockEnd) => { - self.pop_state(); - self.skip(); - Ok((Event::SequenceEnd, mark)) - } - Token(_, TokenType::BlockEntry) => { - self.skip(); - if let Token(mark, TokenType::BlockEntry | TokenType::BlockEnd) = - *self.peek_token()? - { - self.state = State::BlockSequenceEntry; - Ok((Event::empty_scalar(), mark)) - } else { - self.push_state(State::BlockSequenceEntry); - self.parse_node(true, false) - } - } - Token(mark, _) => Err(ScanError::new( - mark, - "while parsing a block collection, did not find expected '-' indicator", - )), - } - } - - fn flow_sequence_entry_mapping_key(&mut self) -> ParseResult { - if let Token(mark, TokenType::Value | TokenType::FlowEntry | TokenType::FlowSequenceEnd) = - *self.peek_token()? - { - self.skip(); - self.state = State::FlowSequenceEntryMappingValue; - Ok((Event::empty_scalar(), mark)) - } else { - self.push_state(State::FlowSequenceEntryMappingValue); - self.parse_node(false, false) - } - } - - fn flow_sequence_entry_mapping_value(&mut self) -> ParseResult { - match *self.peek_token()? { - Token(_, TokenType::Value) => { - self.skip(); - self.state = State::FlowSequenceEntryMappingValue; - if let Token(mark, TokenType::FlowEntry | TokenType::FlowSequenceEnd) = - *self.peek_token()? - { - self.state = State::FlowSequenceEntryMappingEnd; - Ok((Event::empty_scalar(), mark)) - } else { - self.push_state(State::FlowSequenceEntryMappingEnd); - self.parse_node(false, false) - } - } - Token(mark, _) => { - self.state = State::FlowSequenceEntryMappingEnd; - Ok((Event::empty_scalar(), mark)) - } - } - } - - #[allow(clippy::unnecessary_wraps)] - fn flow_sequence_entry_mapping_end(&mut self) -> ParseResult { - self.state = State::FlowSequenceEntry; - Ok((Event::MappingEnd, self.scanner.mark())) - } - - /// Resolve a tag from the handle and the suffix. - fn resolve_tag(&self, mark: Marker, handle: &str, suffix: String) -> Result { - if handle == "!!" { - // "!!" is a shorthand for "tag:yaml.org,2002:". However, that default can be - // overridden. - match self.tags.get("!!") { - Some(prefix) => Ok(Tag { - handle: prefix.to_string(), - suffix, - }), - None => Ok(Tag { - handle: "tag:yaml.org,2002:".to_string(), - suffix, - }), - } - } else if handle.is_empty() && suffix == "!" { - // "!" introduces a local tag. Local tags may have their prefix overridden. - match self.tags.get("") { - Some(prefix) => Ok(Tag { - handle: prefix.to_string(), - suffix, - }), - None => Ok(Tag { - handle: String::new(), - suffix, - }), - } - } else { - // Lookup handle in our tag directives. - let prefix = self.tags.get(handle); - if let Some(prefix) = prefix { - Ok(Tag { - handle: prefix.to_string(), - suffix, - }) - } else { - // Otherwise, it may be a local handle. With a local handle, the handle is set to - // "!" and the suffix to whatever follows it ("!foo" -> ("!", "foo")). - // If the handle is of the form "!foo!", this cannot be a local handle and we need - // to error. - if handle.len() >= 2 && handle.starts_with('!') && handle.ends_with('!') { - Err(ScanError::new(mark, "the handle wasn't declared")) - } else { - Ok(Tag { - handle: handle.to_string(), - suffix, - }) - } - } - } - } -} - -#[cfg(test)] -mod test { - use super::{Event, Parser}; - use crate::YamlLoader; - - #[test] - fn test_peek_eq_parse() { - let s = " -a0 bb: val -a1: &x - b1: 4 - b2: d -a2: 4 -a3: [1, 2, 3] -a4: - - [a1, a2] - - 2 -a5: *x -"; - let mut p = Parser::new_from_str(s); - while { - let event_peek = p.peek().unwrap().clone(); - let event = p.next_token().unwrap(); - assert_eq!(event, event_peek); - event.0 != Event::StreamEnd - } {} - } - - #[test] - fn test_keep_tags_across_multiple_documents() { - let text = r#" -%YAML 1.1 -%TAG !t! tag:test,2024: ---- !t!1 &1 -foo: "bar" ---- !t!2 &2 -baz: "qux" -"#; - let mut parser = Parser::new_from_str(text).keep_tags(true); - let result = YamlLoader::load_from_parser(&mut parser); - assert!(result.is_ok()); - let docs = result.unwrap(); - assert_eq!(docs.len(), 2); - let yaml = &docs[0]; - assert_eq!(yaml["foo"].as_str(), Some("bar")); - let yaml = &docs[1]; - assert_eq!(yaml["baz"].as_str(), Some("qux")); - - let mut parser = Parser::new_from_str(text).keep_tags(false); - let result = YamlLoader::load_from_parser(&mut parser); - assert!(result.is_err()); - } -} diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs deleted file mode 100644 index dece35b..0000000 --- a/saphyr/src/scanner.rs +++ /dev/null @@ -1,2593 +0,0 @@ -//! Home to the YAML Scanner. -//! -//! The scanner is the lowest-level parsing utility. It is the lexer / tokenizer, reading input a -//! character at a time and emitting tokens that can later be interpreted by the [`crate::parser`] -//! to check for more context and validity. -//! -//! Due to the grammar of YAML, the scanner has to have some context and is not error-free. - -#![allow(clippy::cast_possible_wrap)] -#![allow(clippy::cast_sign_loss)] - -use std::{char, collections::VecDeque, error::Error, fmt}; - -use arraydeque::ArrayDeque; - -use crate::char_traits::{ - as_hex, is_alpha, is_anchor_char, is_blank, is_blank_or_breakz, is_break, is_breakz, is_digit, - is_flow, is_hex, is_tag_char, is_uri_char, is_z, -}; - -/// The encoding of the input. Currently, only UTF-8 is supported. -#[derive(Clone, Copy, PartialEq, Debug, Eq)] -pub enum TEncoding { - /// UTF-8 encoding. - Utf8, -} - -/// The style as which the scalar was written in the YAML document. -#[derive(Clone, Copy, PartialEq, Debug, Eq)] -pub enum TScalarStyle { - /// A YAML plain scalar. - Plain, - /// A YAML single quoted scalar. - SingleQuoted, - /// A YAML double quoted scalar. - DoubleQuoted, - - /// A YAML literal block (`|` block). - Literal, - /// A YAML folded block (`>` block). - Folded, -} - -/// A location in a yaml document. -#[derive(Clone, Copy, PartialEq, Debug, Eq)] -pub struct Marker { - /// The index (in chars) in the input string. - index: usize, - /// The line (1-indexed). - line: usize, - /// The column (1-indexed). - col: usize, -} - -impl Marker { - fn new(index: usize, line: usize, col: usize) -> Marker { - Marker { index, line, col } - } - - /// Return the index (in bytes) of the marker in the source. - #[must_use] - pub fn index(&self) -> usize { - self.index - } - - /// Return the line of the marker in the source. - #[must_use] - pub fn line(&self) -> usize { - self.line - } - - /// Return the column of the marker in the source. - #[must_use] - pub fn col(&self) -> usize { - self.col - } -} - -/// An error that occurred while scanning. -#[derive(Clone, PartialEq, Debug, Eq)] -pub struct ScanError { - /// The position at which the error happened in the source. - mark: Marker, - /// Human-readable details about the error. - info: String, -} - -impl ScanError { - /// Create a new error from a location and an error string. - #[must_use] - pub fn new(loc: Marker, info: &str) -> ScanError { - ScanError { - mark: loc, - info: info.to_owned(), - } - } - - /// Return the marker pointing to the error in the source. - #[must_use] - pub fn marker(&self) -> &Marker { - &self.mark - } - - /// Return the information string describing the error that happened. - #[must_use] - pub fn info(&self) -> &str { - self.info.as_ref() - } -} - -impl Error for ScanError { - fn description(&self) -> &str { - self.info.as_ref() - } - - fn cause(&self) -> Option<&dyn Error> { - None - } -} - -impl fmt::Display for ScanError { - // col starts from 0 - fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - write!( - formatter, - "{} at byte {} line {} column {}", - self.info, - self.mark.index, - self.mark.line, - self.mark.col + 1, - ) - } -} - -/// The contents of a scanner token. -#[derive(Clone, PartialEq, Debug, Eq)] -pub enum TokenType { - /// The start of the stream. Sent first, before even [`TokenType::DocumentStart`]. - StreamStart(TEncoding), - /// The end of the stream, EOF. - StreamEnd, - /// A YAML version directive. - VersionDirective( - /// Major - u32, - /// Minor - u32, - ), - /// A YAML tag directive (e.g.: `!!str`, `!foo!bar`, ...). - TagDirective( - /// Handle - String, - /// Prefix - String, - ), - /// The start of a YAML document (`---`). - DocumentStart, - /// The end of a YAML document (`...`). - DocumentEnd, - /// The start of a sequence block. - /// - /// Sequence blocks are arrays starting with a `-`. - BlockSequenceStart, - /// The start of a sequence mapping. - /// - /// Sequence mappings are "dictionaries" with "key: value" entries. - BlockMappingStart, - /// End of the corresponding `BlockSequenceStart` or `BlockMappingStart`. - BlockEnd, - /// Start of an inline array (`[ a, b ]`). - FlowSequenceStart, - /// End of an inline array. - FlowSequenceEnd, - /// Start of an inline mapping (`{ a: b, c: d }`). - FlowMappingStart, - /// End of an inline mapping. - FlowMappingEnd, - /// An entry in a block sequence (c.f.: [`TokenType::BlockSequenceStart`]). - BlockEntry, - /// An entry in a flow sequence (c.f.: [`TokenType::FlowSequenceStart`]). - FlowEntry, - /// A key in a mapping. - Key, - /// A value in a mapping. - Value, - /// A reference to an anchor. - Alias(String), - /// A YAML anchor (`&`/`*`). - Anchor(String), - /// A YAML tag (starting with bangs `!`). - Tag( - /// The handle of the tag. - String, - /// The suffix of the tag. - String, - ), - /// A regular YAML scalar. - Scalar(TScalarStyle, String), -} - -/// A scanner token. -#[derive(Clone, PartialEq, Debug, Eq)] -pub struct Token(pub Marker, pub TokenType); - -/// A scalar that was parsed and may correspond to a simple key. -/// -/// Upon scanning the following yaml: -/// ```yaml -/// a: b -/// ``` -/// We do not know that `a` is a key for a map until we have reached the following `:`. For this -/// YAML, we would store `a` as a scalar token in the [`Scanner`], but not emit it yet. It would be -/// kept inside the scanner until more context is fetched and we are able to know whether it is a -/// plain scalar or a key. -/// -/// For example, see the following 2 yaml documents: -/// ```yaml -/// --- -/// a: b # Here, `a` is a key. -/// ... -/// --- -/// a # Here, `a` is a plain scalar. -/// ... -/// ``` -/// An instance of [`SimpleKey`] is created in the [`Scanner`] when such ambiguity occurs. -/// -/// In both documents, scanning `a` would lead to the creation of a [`SimpleKey`] with -/// [`Self::possible`] set to `true`. The token for `a` would be pushed in the [`Scanner`] but not -/// yet emitted. Instead, more context would be fetched (through [`Scanner::fetch_more_tokens`]). -/// -/// In the first document, upon reaching the `:`, the [`SimpleKey`] would be inspected and our -/// scalar `a` since it is a possible key, would be "turned" into a key. This is done by prepending -/// a [`TokenType::Key`] to our scalar token in the [`Scanner`]. This way, the -/// [`crate::parser::Parser`] would read the [`TokenType::Key`] token before the -/// [`TokenType::Scalar`] token. -/// -/// In the second document however, reaching the EOF would stale the [`SimpleKey`] and no -/// [`TokenType::Key`] would be emitted by the scanner. -#[derive(Clone, PartialEq, Debug, Eq)] -struct SimpleKey { - /// Whether the token this [`SimpleKey`] refers to may still be a key. - /// - /// Sometimes, when we have more context, we notice that what we thought could be a key no - /// longer can be. In that case, [`Self::possible`] is set to `false`. - /// - /// For instance, let us consider the following invalid YAML: - /// ```yaml - /// key - /// : value - /// ``` - /// Upon reading the `\n` after `key`, the [`SimpleKey`] that was created for `key` is staled - /// and [`Self::possible`] set to `false`. - possible: bool, - /// Whether the token this [`SimpleKey`] refers to is required to be a key. - /// - /// With more context, we may know for sure that the token must be a key. If the YAML is - /// invalid, it may happen that the token be deemed not a key. In such event, an error has to - /// be raised. This boolean helps us know when to raise such error. - /// - /// TODO(ethiraric, 30/12/2023): Example of when this happens. - required: bool, - /// The index of the token referred to by the [`SimpleKey`]. - /// - /// This is the index in the scanner, which takes into account both the tokens that have been - /// emitted and those about to be emitted. See [`Scanner::tokens_parsed`] and - /// [`Scanner::tokens`] for more details. - token_number: usize, - /// The position at which the token the [`SimpleKey`] refers to is. - mark: Marker, -} - -impl SimpleKey { - /// Create a new [`SimpleKey`] at the given `Marker` and with the given flow level. - fn new(mark: Marker) -> SimpleKey { - SimpleKey { - possible: false, - required: false, - token_number: 0, - mark, - } - } -} - -/// An indentation level on the stack of indentations. -#[derive(Clone, Debug, Default)] -struct Indent { - /// The former indentation level. - indent: isize, - /// Whether, upon closing, this indents generates a `BlockEnd` token. - /// - /// There are levels of indentation which do not start a block. Examples of this would be: - /// ```yaml - /// - - /// foo # ok - /// - - /// bar # ko, bar needs to be indented further than the `-`. - /// - [ - /// baz, # ok - /// quux # ko, quux needs to be indented further than the '-'. - /// ] # ko, the closing bracket needs to be indented further than the `-`. - /// ``` - /// - /// The indentation level created by the `-` is for a single entry in the sequence. Emitting a - /// `BlockEnd` when this indentation block ends would generate one `BlockEnd` per entry in the - /// sequence, although we must have exactly one to end the sequence. - needs_block_end: bool, -} - -/// The size of the [`Scanner`] buffer. -/// -/// The buffer is statically allocated to avoid conditions for reallocations each time we -/// consume/push a character. As of now, almost all lookaheads are 4 characters maximum, except: -/// - Escape sequences parsing: some escape codes are 8 characters -/// - Scanning indent in scalars: this looks ahead `indent + 2` characters -/// This constant must be set to at least 8. When scanning indent in scalars, the lookahead is done -/// in a single call if and only if the indent is `BUFFER_LEN - 2` or less. If the indent is higher -/// than that, the code will fall back to a loop of lookaheads. -const BUFFER_LEN: usize = 16; - -/// The YAML scanner. -/// -/// This corresponds to the low-level interface when reading YAML. The scanner emits token as they -/// are read (akin to a lexer), but it also holds sufficient context to be able to disambiguate -/// some of the constructs. It has understanding of indentation and whitespace and is able to -/// generate error messages for some invalid YAML constructs. -/// -/// It is however not a full parser and needs [`crate::parser::Parser`] to fully detect invalid -/// YAML documents. -#[derive(Debug)] -#[allow(clippy::struct_excessive_bools)] -pub struct Scanner { - /// The reader, providing with characters. - rdr: T, - /// The position of the cursor within the reader. - mark: Marker, - /// Buffer for tokens to be returned. - /// - /// This buffer can hold some temporary tokens that are not yet ready to be returned. For - /// instance, if we just read a scalar, it can be a value or a key if an implicit mapping - /// follows. In this case, the token stays in the `VecDeque` but cannot be returned from - /// [`Self::next`] until we have more context. - tokens: VecDeque, - /// Buffer for the next characters to consume. - buffer: ArrayDeque, - /// The last error that happened. - error: Option, - - /// Whether we have already emitted the `StreamStart` token. - stream_start_produced: bool, - /// Whether we have already emitted the `StreamEnd` token. - stream_end_produced: bool, - /// In some flow contexts, the value of a mapping is allowed to be adjacent to the `:`. When it - /// is, the index at which the `:` may be must be stored in `adjacent_value_allowed_at`. - adjacent_value_allowed_at: usize, - /// Whether a simple key could potentially start at the current position. - /// - /// Simple keys are the opposite of complex keys which are keys starting with `?`. - simple_key_allowed: bool, - /// A stack of potential simple keys. - /// - /// Refer to the documentation of [`SimpleKey`] for a more in-depth explanation of what they - /// are. - simple_keys: Vec, - /// The current indentation level. - indent: isize, - /// List of all block indentation levels we are in (except the current one). - indents: Vec, - /// Level of nesting of flow sequences. - flow_level: u8, - /// The number of tokens that have been returned from the scanner. - /// - /// This excludes the tokens from [`Self::tokens`]. - tokens_parsed: usize, - /// Whether a token is ready to be taken from [`Self::tokens`]. - token_available: bool, - /// Whether all characters encountered since the last newline were whitespace. - leading_whitespace: bool, - /// Whether we started a flow mapping. - /// - /// This is used to detect implicit flow mapping starts such as: - /// ```yaml - /// [ : foo ] # { null: "foo" } - /// ``` - flow_mapping_started: bool, - /// Whether we currently are in an implicit flow mapping. - implicit_flow_mapping: bool, -} - -impl> Iterator for Scanner { - type Item = Token; - fn next(&mut self) -> Option { - if self.error.is_some() { - return None; - } - match self.next_token() { - Ok(Some(tok)) => { - debug_print!( - " \x1B[;32m\u{21B3} {:?} \x1B[;36m{:?}\x1B[;m", - tok.1, - tok.0 - ); - Some(tok) - } - Ok(tok) => tok, - Err(e) => { - self.error = Some(e); - None - } - } - } -} - -/// A convenience alias for scanner functions that may fail without returning a value. -pub type ScanResult = Result<(), ScanError>; - -impl> Scanner { - /// Creates the YAML tokenizer. - pub fn new(rdr: T) -> Scanner { - Scanner { - rdr, - buffer: ArrayDeque::new(), - mark: Marker::new(0, 1, 0), - tokens: VecDeque::new(), - error: None, - - stream_start_produced: false, - stream_end_produced: false, - adjacent_value_allowed_at: 0, - simple_key_allowed: true, - simple_keys: Vec::new(), - indent: -1, - indents: Vec::new(), - flow_level: 0, - tokens_parsed: 0, - token_available: false, - leading_whitespace: true, - flow_mapping_started: false, - implicit_flow_mapping: false, - } - } - - /// Get a copy of the last error that was encountered, if any. - /// - /// This does not clear the error state and further calls to [`Self::get_error`] will return (a - /// clone of) the same error. - #[inline] - pub fn get_error(&self) -> Option { - self.error.clone() - } - - /// Fill `self.buffer` with at least `count` characters. - /// - /// The characters that are extracted this way are not consumed but only placed in the buffer. - #[inline] - fn lookahead(&mut self, count: usize) { - if self.buffer.len() >= count { - return; - } - for _ in 0..(count - self.buffer.len()) { - self.buffer - .push_back(self.rdr.next().unwrap_or('\0')) - .unwrap(); - } - } - - /// Consume the next character. It is assumed the next character is a blank. - #[inline] - fn skip_blank(&mut self) { - self.buffer.pop_front(); - - self.mark.index += 1; - self.mark.col += 1; - } - - /// Consume the next character. It is assumed the next character is not a blank. - #[inline] - fn skip_non_blank(&mut self) { - self.buffer.pop_front(); - - self.mark.index += 1; - self.mark.col += 1; - self.leading_whitespace = false; - } - - /// Consume the next characters. It is assumed none of the next characters are blanks. - #[inline] - fn skip_n_non_blank(&mut self, n: usize) { - self.buffer.drain(0..n); - - self.mark.index += n; - self.mark.col += n; - self.leading_whitespace = false; - } - - /// Consume the next character. It is assumed the next character is a newline. - #[inline] - fn skip_nl(&mut self) { - self.buffer.pop_front(); - - self.mark.index += 1; - self.mark.col = 0; - self.mark.line += 1; - self.leading_whitespace = true; - } - - /// Consume a linebreak (either CR, LF or CRLF), if any. Do nothing if there's none. - #[inline] - fn skip_line(&mut self) { - if self.buffer[0] == '\r' && self.buffer[1] == '\n' { - // While technically not a blank, this does not matter as `self.leading_whitespace` - // will be reset by `skip_nl`. - self.skip_blank(); - self.skip_nl(); - } else if is_break(self.buffer[0]) { - self.skip_nl(); - } - } - - /// Return the next character in the buffer. - /// - /// The character is not consumed. - #[inline] - fn ch(&self) -> char { - self.buffer[0] - } - - /// Look for the next character and return it. - /// - /// The character is not consumed. - /// Equivalent to calling [`Self::lookahead`] and [`Self::ch`]. - #[inline] - fn look_ch(&mut self) -> char { - self.lookahead(1); - self.ch() - } - - /// Read a character from the input stream, returning it directly. - /// - /// The buffer is bypassed and `self.mark` needs to be updated manually. - #[inline] - #[must_use] - fn raw_read_ch(&mut self) -> char { - self.rdr.next().unwrap_or('\0') - } - - /// Return whether the next character is `c`. - #[inline] - fn ch_is(&self, c: char) -> bool { - self.buffer[0] == c - } - - /// Return whether the [`TokenType::StreamStart`] event has been emitted. - #[inline] - pub fn stream_started(&self) -> bool { - self.stream_start_produced - } - - /// Return whether the [`TokenType::StreamEnd`] event has been emitted. - #[inline] - pub fn stream_ended(&self) -> bool { - self.stream_end_produced - } - - /// Get the current position in the input stream. - #[inline] - pub fn mark(&self) -> Marker { - self.mark - } - - // Read and consume a line break (either `\r`, `\n` or `\r\n`). - // - // A `\n` is pushed into `s`. - // - // # Panics (in debug) - // If the next characters do not correspond to a line break. - #[inline] - fn read_break(&mut self, s: &mut String) { - let c = self.buffer[0]; - let nc = self.buffer[1]; - debug_assert!(is_break(c)); - if c == '\r' && nc == '\n' { - self.skip_blank(); - } - self.skip_nl(); - - s.push('\n'); - } - - /// Check whether the next characters correspond to an end of document. - /// - /// [`Self::lookahead`] must have been called before calling this function. - fn next_is_document_end(&self) -> bool { - assert!(self.buffer.len() >= 4); - self.buffer[0] == '.' - && self.buffer[1] == '.' - && self.buffer[2] == '.' - && is_blank_or_breakz(self.buffer[3]) - } - - /// Check whether the next characters correspond to a document indicator. - /// - /// [`Self::lookahead`] must have been called before calling this function. - #[inline] - fn next_is_document_indicator(&self) -> bool { - assert!(self.buffer.len() >= 4); - self.mark.col == 0 - && (((self.buffer[0] == '-') && (self.buffer[1] == '-') && (self.buffer[2] == '-')) - || ((self.buffer[0] == '.') && (self.buffer[1] == '.') && (self.buffer[2] == '.'))) - && is_blank_or_breakz(self.buffer[3]) - } - - /// Insert a token at the given position. - fn insert_token(&mut self, pos: usize, tok: Token) { - let old_len = self.tokens.len(); - assert!(pos <= old_len); - self.tokens.insert(pos, tok); - } - - fn allow_simple_key(&mut self) { - self.simple_key_allowed = true; - } - - fn disallow_simple_key(&mut self) { - self.simple_key_allowed = false; - } - - /// Fetch the next token in the stream. - /// # Errors - /// Returns `ScanError` when the scanner does not find the next expected token. - pub fn fetch_next_token(&mut self) -> ScanResult { - self.lookahead(1); - // eprintln!("--> fetch_next_token Cur {:?} {:?}", self.mark, self.ch()); - - if !self.stream_start_produced { - self.fetch_stream_start(); - return Ok(()); - } - self.skip_to_next_token()?; - - debug_print!( - " \x1B[38;5;244m\u{2192} fetch_next_token after whitespace {:?} {:?}\x1B[m", - self.mark, - self.ch() - ); - - self.stale_simple_keys()?; - - let mark = self.mark; - self.unroll_indent(mark.col as isize); - - self.lookahead(4); - - if is_z(self.ch()) { - self.fetch_stream_end()?; - return Ok(()); - } - - // Is it a directive? - if self.mark.col == 0 && self.ch_is('%') { - return self.fetch_directive(); - } - - if self.mark.col == 0 - && self.buffer[0] == '-' - && self.buffer[1] == '-' - && self.buffer[2] == '-' - && is_blank_or_breakz(self.buffer[3]) - { - self.fetch_document_indicator(TokenType::DocumentStart)?; - return Ok(()); - } - - if self.mark.col == 0 - && self.buffer[0] == '.' - && self.buffer[1] == '.' - && self.buffer[2] == '.' - && is_blank_or_breakz(self.buffer[3]) - { - self.fetch_document_indicator(TokenType::DocumentEnd)?; - self.skip_ws_to_eol(SkipTabs::Yes)?; - if !is_breakz(self.ch()) { - return Err(ScanError::new( - self.mark, - "invalid content after document end marker", - )); - } - return Ok(()); - } - - if (self.mark.col as isize) < self.indent { - return Err(ScanError::new(self.mark, "invalid indentation")); - } - - let c = self.buffer[0]; - let nc = self.buffer[1]; - match c { - '[' => self.fetch_flow_collection_start(TokenType::FlowSequenceStart), - '{' => self.fetch_flow_collection_start(TokenType::FlowMappingStart), - ']' => self.fetch_flow_collection_end(TokenType::FlowSequenceEnd), - '}' => self.fetch_flow_collection_end(TokenType::FlowMappingEnd), - ',' => self.fetch_flow_entry(), - '-' if is_blank_or_breakz(nc) => self.fetch_block_entry(), - '?' if is_blank_or_breakz(nc) => self.fetch_key(), - ':' if is_blank_or_breakz(nc) - || (self.flow_level > 0 - && (is_flow(nc) || self.mark.index == self.adjacent_value_allowed_at)) => - { - self.fetch_value() - } - // Is it an alias? - '*' => self.fetch_anchor(true), - // Is it an anchor? - '&' => self.fetch_anchor(false), - '!' => self.fetch_tag(), - // Is it a literal scalar? - '|' if self.flow_level == 0 => self.fetch_block_scalar(true), - // Is it a folded scalar? - '>' if self.flow_level == 0 => self.fetch_block_scalar(false), - '\'' => self.fetch_flow_scalar(true), - '"' => self.fetch_flow_scalar(false), - // plain scalar - '-' if !is_blank_or_breakz(nc) => self.fetch_plain_scalar(), - ':' | '?' if !is_blank_or_breakz(nc) && self.flow_level == 0 => { - self.fetch_plain_scalar() - } - '%' | '@' | '`' => Err(ScanError::new( - self.mark, - &format!("unexpected character: `{c}'"), - )), - _ => self.fetch_plain_scalar(), - } - } - - /// Return the next token in the stream. - /// # Errors - /// Returns `ScanError` when scanning fails to find an expected next token. - pub fn next_token(&mut self) -> Result, ScanError> { - if self.stream_end_produced { - return Ok(None); - } - - if !self.token_available { - self.fetch_more_tokens()?; - } - let Some(t) = self.tokens.pop_front() else { - return Err(ScanError::new( - self.mark, - "did not find expected next token", - )); - }; - self.token_available = false; - self.tokens_parsed += 1; - - if let TokenType::StreamEnd = t.1 { - self.stream_end_produced = true; - } - Ok(Some(t)) - } - - /// Fetch tokens from the token stream. - /// # Errors - /// Returns `ScanError` when loading fails. - pub fn fetch_more_tokens(&mut self) -> ScanResult { - let mut need_more; - loop { - if self.tokens.is_empty() { - need_more = true; - } else { - need_more = false; - // Stale potential keys that we know won't be keys. - self.stale_simple_keys()?; - // If our next token to be emitted may be a key, fetch more context. - for sk in &self.simple_keys { - if sk.possible && sk.token_number == self.tokens_parsed { - need_more = true; - break; - } - } - } - - if !need_more { - break; - } - self.fetch_next_token()?; - } - self.token_available = true; - - Ok(()) - } - - /// Mark simple keys that can no longer be keys as such. - /// - /// This function sets `possible` to `false` to each key that, now we have more context, we - /// know will not be keys. - /// - /// # Errors - /// This function returns an error if one of the key we would stale was required to be a key. - fn stale_simple_keys(&mut self) -> ScanResult { - for sk in &mut self.simple_keys { - if sk.possible - // If not in a flow construct, simple keys cannot span multiple lines. - && self.flow_level == 0 - && (sk.mark.line < self.mark.line || sk.mark.index + 1024 < self.mark.index) - { - if sk.required { - return Err(ScanError::new(self.mark, "simple key expect ':'")); - } - sk.possible = false; - } - } - Ok(()) - } - - /// Skip over all whitespace and comments until the next token. - /// - /// # Errors - /// This function returns an error if a tabulation is encountered where there should not be - /// one. - fn skip_to_next_token(&mut self) -> ScanResult { - loop { - // TODO(chenyh) BOM - match self.look_ch() { - // Tabs may not be used as indentation. - // "Indentation" only exists as long as a block is started, but does not exist - // inside of flow-style constructs. Tabs are allowed as part of leading - // whitespaces outside of indentation. - // If a flow-style construct is in an indented block, its contents must still be - // indented. Also, tabs are allowed anywhere in it if it has no content. - '\t' if self.is_within_block() - && self.leading_whitespace - && (self.mark.col as isize) < self.indent => - { - self.skip_ws_to_eol(SkipTabs::Yes)?; - // If we have content on that line with a tab, return an error. - if !is_breakz(self.ch()) { - return Err(ScanError::new( - self.mark, - "tabs disallowed within this context (block indentation)", - )); - } - } - '\t' | ' ' => self.skip_blank(), - '\n' | '\r' => { - self.lookahead(2); - self.skip_line(); - if self.flow_level == 0 { - self.allow_simple_key(); - } - } - '#' => { - while !is_breakz(self.look_ch()) { - self.skip_non_blank(); - } - } - _ => break, - } - } - Ok(()) - } - - /// Skip over YAML whitespace (` `, `\n`, `\r`). - /// - /// # Errors - /// This function returns an error if no whitespace was found. - fn skip_yaml_whitespace(&mut self) -> ScanResult { - let mut need_whitespace = true; - loop { - match self.look_ch() { - ' ' => { - self.skip_blank(); - - need_whitespace = false; - } - '\n' | '\r' => { - self.lookahead(2); - self.skip_line(); - if self.flow_level == 0 { - self.allow_simple_key(); - } - need_whitespace = false; - } - '#' => { - while !is_breakz(self.look_ch()) { - self.skip_non_blank(); - } - } - _ => break, - } - } - - if need_whitespace { - Err(ScanError::new(self.mark(), "expected whitespace")) - } else { - Ok(()) - } - } - - /// Skip yaml whitespace at most up to eol. Also skips comments. - fn skip_ws_to_eol(&mut self, skip_tabs: SkipTabs) -> Result { - let mut encountered_tab = false; - let mut has_yaml_ws = false; - loop { - match self.look_ch() { - ' ' => { - has_yaml_ws = true; - self.skip_blank(); - } - '\t' if skip_tabs != SkipTabs::No => { - encountered_tab = true; - self.skip_blank(); - } - // YAML comments must be preceded by whitespace. - '#' if !encountered_tab && !has_yaml_ws => { - return Err(ScanError::new( - self.mark, - "comments must be separated from other tokens by whitespace", - )); - } - '#' => { - while !is_breakz(self.look_ch()) { - self.skip_non_blank(); - } - } - _ => break, - } - } - - Ok(SkipTabs::Result(encountered_tab, has_yaml_ws)) - } - - fn fetch_stream_start(&mut self) { - let mark = self.mark; - self.indent = -1; - self.stream_start_produced = true; - self.allow_simple_key(); - self.tokens - .push_back(Token(mark, TokenType::StreamStart(TEncoding::Utf8))); - self.simple_keys.push(SimpleKey::new(Marker::new(0, 0, 0))); - } - - fn fetch_stream_end(&mut self) -> ScanResult { - // force new line - if self.mark.col != 0 { - self.mark.col = 0; - self.mark.line += 1; - } - - // If the stream ended, we won't have more context. We can stall all the simple keys we - // had. If one was required, however, that was an error and we must propagate it. - for sk in &mut self.simple_keys { - if sk.required && sk.possible { - return Err(ScanError::new(self.mark, "simple key expected")); - } - sk.possible = false; - } - - self.unroll_indent(-1); - self.remove_simple_key()?; - self.disallow_simple_key(); - - self.tokens - .push_back(Token(self.mark, TokenType::StreamEnd)); - Ok(()) - } - - fn fetch_directive(&mut self) -> ScanResult { - self.unroll_indent(-1); - self.remove_simple_key()?; - - self.disallow_simple_key(); - - let tok = self.scan_directive()?; - self.skip_ws_to_eol(SkipTabs::Yes)?; - - self.tokens.push_back(tok); - - Ok(()) - } - - fn scan_directive(&mut self) -> Result { - let start_mark = self.mark; - self.skip_non_blank(); - - let name = self.scan_directive_name()?; - let tok = match name.as_ref() { - "YAML" => self.scan_version_directive_value(&start_mark)?, - "TAG" => self.scan_tag_directive_value(&start_mark)?, - // XXX This should be a warning instead of an error - _ => { - // skip current line - while !is_breakz(self.look_ch()) { - self.skip_non_blank(); - } - // XXX return an empty TagDirective token - Token( - start_mark, - TokenType::TagDirective(String::new(), String::new()), - ) - // return Err(ScanError::new(start_mark, - // "while scanning a directive, found unknown directive name")) - } - }; - - self.skip_ws_to_eol(SkipTabs::Yes)?; - - if !is_breakz(self.ch()) { - return Err(ScanError::new( - start_mark, - "while scanning a directive, did not find expected comment or line break", - )); - } - - // Eat a line break - if is_break(self.ch()) { - self.lookahead(2); - self.skip_line(); - } - - Ok(tok) - } - - fn scan_version_directive_value(&mut self, mark: &Marker) -> Result { - while is_blank(self.look_ch()) { - self.skip_blank(); - } - - let major = self.scan_version_directive_number(mark)?; - - if self.ch() != '.' { - return Err(ScanError::new( - *mark, - "while scanning a YAML directive, did not find expected digit or '.' character", - )); - } - self.skip_non_blank(); - - let minor = self.scan_version_directive_number(mark)?; - - Ok(Token(*mark, TokenType::VersionDirective(major, minor))) - } - - fn scan_directive_name(&mut self) -> Result { - let start_mark = self.mark; - let mut string = String::new(); - while is_alpha(self.look_ch()) { - string.push(self.ch()); - self.skip_non_blank(); - } - - if string.is_empty() { - return Err(ScanError::new( - start_mark, - "while scanning a directive, could not find expected directive name", - )); - } - - if !is_blank_or_breakz(self.ch()) { - return Err(ScanError::new( - start_mark, - "while scanning a directive, found unexpected non-alphabetical character", - )); - } - - Ok(string) - } - - fn scan_version_directive_number(&mut self, mark: &Marker) -> Result { - let mut val = 0u32; - let mut length = 0usize; - while let Some(digit) = self.look_ch().to_digit(10) { - if length + 1 > 9 { - return Err(ScanError::new( - *mark, - "while scanning a YAML directive, found extremely long version number", - )); - } - length += 1; - val = val * 10 + digit; - self.skip_non_blank(); - } - - if length == 0 { - return Err(ScanError::new( - *mark, - "while scanning a YAML directive, did not find expected version number", - )); - } - - Ok(val) - } - - fn scan_tag_directive_value(&mut self, mark: &Marker) -> Result { - /* Eat whitespaces. */ - while is_blank(self.look_ch()) { - self.skip_blank(); - } - let handle = self.scan_tag_handle(true, mark)?; - - /* Eat whitespaces. */ - while is_blank(self.look_ch()) { - self.skip_blank(); - } - - let prefix = self.scan_tag_prefix(mark)?; - - self.lookahead(1); - - if is_blank_or_breakz(self.ch()) { - Ok(Token(*mark, TokenType::TagDirective(handle, prefix))) - } else { - Err(ScanError::new( - *mark, - "while scanning TAG, did not find expected whitespace or line break", - )) - } - } - - fn fetch_tag(&mut self) -> ScanResult { - self.save_simple_key(); - self.disallow_simple_key(); - - let tok = self.scan_tag()?; - self.tokens.push_back(tok); - Ok(()) - } - - fn scan_tag(&mut self) -> Result { - let start_mark = self.mark; - let mut handle = String::new(); - let mut suffix; - - // Check if the tag is in the canonical form (verbatim). - self.lookahead(2); - - if self.buffer[1] == '<' { - suffix = self.scan_verbatim_tag(&start_mark)?; - } else { - // The tag has either the '!suffix' or the '!handle!suffix' - handle = self.scan_tag_handle(false, &start_mark)?; - // Check if it is, indeed, handle. - if handle.len() >= 2 && handle.starts_with('!') && handle.ends_with('!') { - // A tag handle starting with "!!" is a secondary tag handle. - let is_secondary_handle = handle == "!!"; - suffix = - self.scan_tag_shorthand_suffix(false, is_secondary_handle, "", &start_mark)?; - } else { - suffix = self.scan_tag_shorthand_suffix(false, false, &handle, &start_mark)?; - handle = "!".to_owned(); - // A special case: the '!' tag. Set the handle to '' and the - // suffix to '!'. - if suffix.is_empty() { - handle.clear(); - suffix = "!".to_owned(); - } - } - } - - if is_blank_or_breakz(self.look_ch()) || (self.flow_level > 0 && is_flow(self.ch())) { - // XXX: ex 7.2, an empty scalar can follow a secondary tag - Ok(Token(start_mark, TokenType::Tag(handle, suffix))) - } else { - Err(ScanError::new( - start_mark, - "while scanning a tag, did not find expected whitespace or line break", - )) - } - } - - fn scan_tag_handle(&mut self, directive: bool, mark: &Marker) -> Result { - let mut string = String::new(); - if self.look_ch() != '!' { - return Err(ScanError::new( - *mark, - "while scanning a tag, did not find expected '!'", - )); - } - - string.push(self.ch()); - self.skip_non_blank(); - - while is_alpha(self.look_ch()) { - string.push(self.ch()); - self.skip_non_blank(); - } - - // Check if the trailing character is '!' and copy it. - if self.ch() == '!' { - string.push(self.ch()); - self.skip_non_blank(); - } else if directive && string != "!" { - // It's either the '!' tag or not really a tag handle. If it's a %TAG - // directive, it's an error. If it's a tag token, it must be a part of - // URI. - return Err(ScanError::new( - *mark, - "while parsing a tag directive, did not find expected '!'", - )); - } - Ok(string) - } - - /// Scan for a tag prefix (6.8.2.2). - /// - /// There are 2 kinds of tag prefixes: - /// - Local: Starts with a `!`, contains only URI chars (`!foo`) - /// - Global: Starts with a tag char, contains then URI chars (`!foo,2000:app/`) - fn scan_tag_prefix(&mut self, start_mark: &Marker) -> Result { - let mut string = String::new(); - - if self.look_ch() == '!' { - // If we have a local tag, insert and skip `!`. - string.push(self.ch()); - self.skip_non_blank(); - } else if !is_tag_char(self.ch()) { - // Otherwise, check if the first global tag character is valid. - return Err(ScanError::new(*start_mark, "invalid global tag character")); - } else if self.ch() == '%' { - // If it is valid and an escape sequence, escape it. - string.push(self.scan_uri_escapes(start_mark)?); - } else { - // Otherwise, push the first character. - string.push(self.ch()); - self.skip_non_blank(); - } - - while is_uri_char(self.look_ch()) { - if self.ch() == '%' { - string.push(self.scan_uri_escapes(start_mark)?); - } else { - string.push(self.ch()); - self.skip_non_blank(); - } - } - - Ok(string) - } - - /// Scan for a verbatim tag. - /// - /// The prefixing `!<` must _not_ have been skipped. - fn scan_verbatim_tag(&mut self, start_mark: &Marker) -> Result { - // Eat `!<` - self.skip_non_blank(); - self.skip_non_blank(); - - let mut string = String::new(); - while is_uri_char(self.look_ch()) { - if self.ch() == '%' { - string.push(self.scan_uri_escapes(start_mark)?); - } else { - string.push(self.ch()); - self.skip_non_blank(); - } - } - - if self.ch() != '>' { - return Err(ScanError::new( - *start_mark, - "while scanning a verbatim tag, did not find the expected '>'", - )); - } - self.skip_non_blank(); - - Ok(string) - } - - fn scan_tag_shorthand_suffix( - &mut self, - _directive: bool, - _is_secondary: bool, - head: &str, - mark: &Marker, - ) -> Result { - let mut length = head.len(); - let mut string = String::new(); - - // Copy the head if needed. - // Note that we don't copy the leading '!' character. - if length > 1 { - string.extend(head.chars().skip(1)); - } - - while is_tag_char(self.look_ch()) { - // Check if it is a URI-escape sequence. - if self.ch() == '%' { - string.push(self.scan_uri_escapes(mark)?); - } else { - string.push(self.ch()); - self.skip_non_blank(); - } - - length += 1; - } - - if length == 0 { - return Err(ScanError::new( - *mark, - "while parsing a tag, did not find expected tag URI", - )); - } - - Ok(string) - } - - fn scan_uri_escapes(&mut self, mark: &Marker) -> Result { - let mut width = 0usize; - let mut code = 0u32; - loop { - self.lookahead(3); - - if !(self.ch() == '%' && is_hex(self.buffer[1]) && is_hex(self.buffer[2])) { - return Err(ScanError::new( - *mark, - "while parsing a tag, did not find URI escaped octet", - )); - } - - let octet = (as_hex(self.buffer[1]) << 4) + as_hex(self.buffer[2]); - if width == 0 { - width = match octet { - _ if octet & 0x80 == 0x00 => 1, - _ if octet & 0xE0 == 0xC0 => 2, - _ if octet & 0xF0 == 0xE0 => 3, - _ if octet & 0xF8 == 0xF0 => 4, - _ => { - return Err(ScanError::new( - *mark, - "while parsing a tag, found an incorrect leading UTF-8 octet", - )); - } - }; - code = octet; - } else { - if octet & 0xc0 != 0x80 { - return Err(ScanError::new( - *mark, - "while parsing a tag, found an incorrect trailing UTF-8 octet", - )); - } - code = (code << 8) + octet; - } - - self.skip_n_non_blank(3); - - width -= 1; - if width == 0 { - break; - } - } - - match char::from_u32(code) { - Some(ch) => Ok(ch), - None => Err(ScanError::new( - *mark, - "while parsing a tag, found an invalid UTF-8 codepoint", - )), - } - } - - fn fetch_anchor(&mut self, alias: bool) -> ScanResult { - self.save_simple_key(); - self.disallow_simple_key(); - - let tok = self.scan_anchor(alias)?; - - self.tokens.push_back(tok); - - Ok(()) - } - - fn scan_anchor(&mut self, alias: bool) -> Result { - let mut string = String::new(); - let start_mark = self.mark; - - self.skip_non_blank(); - while is_anchor_char(self.look_ch()) { - string.push(self.ch()); - self.skip_non_blank(); - } - - if string.is_empty() { - return Err(ScanError::new(start_mark, "while scanning an anchor or alias, did not find expected alphabetic or numeric character")); - } - - if alias { - Ok(Token(start_mark, TokenType::Alias(string))) - } else { - Ok(Token(start_mark, TokenType::Anchor(string))) - } - } - - fn fetch_flow_collection_start(&mut self, tok: TokenType) -> ScanResult { - // The indicators '[' and '{' may start a simple key. - self.save_simple_key(); - - self.roll_one_col_indent(); - self.increase_flow_level()?; - - self.allow_simple_key(); - - let start_mark = self.mark; - self.skip_non_blank(); - - if tok == TokenType::FlowMappingStart { - self.flow_mapping_started = true; - } - - self.skip_ws_to_eol(SkipTabs::Yes)?; - - self.tokens.push_back(Token(start_mark, tok)); - Ok(()) - } - - fn fetch_flow_collection_end(&mut self, tok: TokenType) -> ScanResult { - self.remove_simple_key()?; - self.decrease_flow_level(); - - self.disallow_simple_key(); - - self.end_implicit_mapping(self.mark); - - let start_mark = self.mark; - self.skip_non_blank(); - self.skip_ws_to_eol(SkipTabs::Yes)?; - - // A flow collection within a flow mapping can be a key. In that case, the value may be - // adjacent to the `:`. - // ```yaml - // - [ {a: b}:value ] - // ``` - if self.flow_level > 0 { - self.adjacent_value_allowed_at = self.mark.index; - } - - self.tokens.push_back(Token(start_mark, tok)); - Ok(()) - } - - /// Push the `FlowEntry` token and skip over the `,`. - fn fetch_flow_entry(&mut self) -> ScanResult { - self.remove_simple_key()?; - self.allow_simple_key(); - - self.end_implicit_mapping(self.mark); - - let start_mark = self.mark; - self.skip_non_blank(); - self.skip_ws_to_eol(SkipTabs::Yes)?; - - self.tokens - .push_back(Token(start_mark, TokenType::FlowEntry)); - Ok(()) - } - - fn increase_flow_level(&mut self) -> ScanResult { - self.simple_keys.push(SimpleKey::new(Marker::new(0, 0, 0))); - self.flow_level = self - .flow_level - .checked_add(1) - .ok_or_else(|| ScanError::new(self.mark, "recursion limit exceeded"))?; - Ok(()) - } - - fn decrease_flow_level(&mut self) { - if self.flow_level > 0 { - self.flow_level -= 1; - self.simple_keys.pop().unwrap(); - } - } - - /// Push the `Block*` token(s) and skip over the `-`. - /// - /// Add an indentation level and push a `BlockSequenceStart` token if needed, then push a - /// `BlockEntry` token. - /// This function only skips over the `-` and does not fetch the entry value. - fn fetch_block_entry(&mut self) -> ScanResult { - if self.flow_level > 0 { - // - * only allowed in block - return Err(ScanError::new( - self.mark, - r#""-" is only valid inside a block"#, - )); - } - // Check if we are allowed to start a new entry. - if !self.simple_key_allowed { - return Err(ScanError::new( - self.mark, - "block sequence entries are not allowed in this context", - )); - } - - // ???, fixes test G9HC. - if let Some(Token(mark, TokenType::Anchor(..) | TokenType::Tag(..))) = self.tokens.back() { - if self.mark.col == 0 && mark.col == 0 && self.indent > -1 { - return Err(ScanError::new(*mark, "invalid indentation for anchor")); - } - } - - // Skip over the `-`. - let mark = self.mark; - self.skip_non_blank(); - - // generate BLOCK-SEQUENCE-START if indented - self.roll_indent(mark.col, None, TokenType::BlockSequenceStart, mark); - let found_tabs = self.skip_ws_to_eol(SkipTabs::Yes)?.found_tabs(); - self.lookahead(2); - if found_tabs && self.buffer[0] == '-' && is_blank_or_breakz(self.buffer[1]) { - return Err(ScanError::new( - self.mark, - "'-' must be followed by a valid YAML whitespace", - )); - } - - self.skip_ws_to_eol(SkipTabs::No)?; - if is_break(self.look_ch()) || is_flow(self.ch()) { - self.roll_one_col_indent(); - } - - self.remove_simple_key()?; - self.allow_simple_key(); - - self.tokens - .push_back(Token(self.mark, TokenType::BlockEntry)); - - Ok(()) - } - - fn fetch_document_indicator(&mut self, t: TokenType) -> ScanResult { - self.unroll_indent(-1); - self.remove_simple_key()?; - self.disallow_simple_key(); - - let mark = self.mark; - - self.skip_n_non_blank(3); - - self.tokens.push_back(Token(mark, t)); - Ok(()) - } - - fn fetch_block_scalar(&mut self, literal: bool) -> ScanResult { - self.save_simple_key(); - self.allow_simple_key(); - let tok = self.scan_block_scalar(literal)?; - - self.tokens.push_back(tok); - Ok(()) - } - - #[allow(clippy::too_many_lines)] - fn scan_block_scalar(&mut self, literal: bool) -> Result { - let start_mark = self.mark; - let mut chomping = Chomping::Clip; - let mut increment: usize = 0; - let mut indent: usize = 0; - let mut trailing_blank: bool; - let mut leading_blank: bool = false; - let style = if literal { - TScalarStyle::Literal - } else { - TScalarStyle::Folded - }; - - let mut string = String::new(); - let mut leading_break = String::new(); - let mut trailing_breaks = String::new(); - let mut chomping_break = String::new(); - - // skip '|' or '>' - self.skip_non_blank(); - self.unroll_non_block_indents(); - - if self.look_ch() == '+' || self.ch() == '-' { - if self.ch() == '+' { - chomping = Chomping::Keep; - } else { - chomping = Chomping::Strip; - } - self.skip_non_blank(); - if is_digit(self.look_ch()) { - if self.ch() == '0' { - return Err(ScanError::new( - start_mark, - "while scanning a block scalar, found an indentation indicator equal to 0", - )); - } - increment = (self.ch() as usize) - ('0' as usize); - self.skip_non_blank(); - } - } else if is_digit(self.ch()) { - if self.ch() == '0' { - return Err(ScanError::new( - start_mark, - "while scanning a block scalar, found an indentation indicator equal to 0", - )); - } - - increment = (self.ch() as usize) - ('0' as usize); - self.skip_non_blank(); - self.lookahead(1); - if self.ch() == '+' || self.ch() == '-' { - if self.ch() == '+' { - chomping = Chomping::Keep; - } else { - chomping = Chomping::Strip; - } - self.skip_non_blank(); - } - } - - self.skip_ws_to_eol(SkipTabs::Yes)?; - - // Check if we are at the end of the line. - if !is_breakz(self.look_ch()) { - return Err(ScanError::new( - start_mark, - "while scanning a block scalar, did not find expected comment or line break", - )); - } - - if is_break(self.ch()) { - self.lookahead(2); - self.read_break(&mut chomping_break); - } - - if self.look_ch() == '\t' { - return Err(ScanError::new( - start_mark, - "a block scalar content cannot start with a tab", - )); - } - - if increment > 0 { - indent = if self.indent >= 0 { - (self.indent + increment as isize) as usize - } else { - increment - } - } - - // Scan the leading line breaks and determine the indentation level if needed. - if indent == 0 { - self.skip_block_scalar_first_line_indent(&mut indent, &mut trailing_breaks); - } else { - self.skip_block_scalar_indent(indent, &mut trailing_breaks); - } - - // We have an end-of-stream with no content, e.g.: - // ```yaml - // - |+ - // ``` - if is_z(self.ch()) { - let contents = match chomping { - // We strip trailing linebreaks. Nothing remain. - Chomping::Strip => String::new(), - // There was no newline after the chomping indicator. - _ if self.mark.line == start_mark.line() => String::new(), - // We clip lines, and there was a newline after the chomping indicator. - // All other breaks are ignored. - Chomping::Clip => chomping_break, - // We keep lines. There was a newline after the chomping indicator but nothing - // else. - Chomping::Keep if trailing_breaks.is_empty() => chomping_break, - // Otherwise, the newline after chomping is ignored. - Chomping::Keep => trailing_breaks, - }; - return Ok(Token(start_mark, TokenType::Scalar(style, contents))); - } - - if self.mark.col < indent && (self.mark.col as isize) > self.indent { - return Err(ScanError::new( - self.mark, - "wrongly indented line in block scalar", - )); - } - - let mut line_buffer = String::with_capacity(100); - let start_mark = self.mark; - while self.mark.col == indent && !is_z(self.ch()) { - if indent == 0 { - self.lookahead(4); - if self.next_is_document_end() { - break; - } - } - - // We are at the first content character of a content line. - trailing_blank = is_blank(self.ch()); - if !literal && !leading_break.is_empty() && !leading_blank && !trailing_blank { - string.push_str(&trailing_breaks); - if trailing_breaks.is_empty() { - string.push(' '); - } - } else { - string.push_str(&leading_break); - string.push_str(&trailing_breaks); - } - - leading_break.clear(); - trailing_breaks.clear(); - - leading_blank = is_blank(self.ch()); - - self.scan_block_scalar_content_line(&mut string, &mut line_buffer); - - // break on EOF - if is_z(self.ch()) { - break; - } - - self.lookahead(2); - self.read_break(&mut leading_break); - - // Eat the following indentation spaces and line breaks. - self.skip_block_scalar_indent(indent, &mut trailing_breaks); - } - - // Chomp the tail. - if chomping != Chomping::Strip { - string.push_str(&leading_break); - // If we had reached an eof but the last character wasn't an end-of-line, check if the - // last line was indented at least as the rest of the scalar, then we need to consider - // there is a newline. - if is_z(self.ch()) && self.mark.col >= indent.max(1) { - string.push('\n'); - } - } - - if chomping == Chomping::Keep { - string.push_str(&trailing_breaks); - } - - Ok(Token(start_mark, TokenType::Scalar(style, string))) - } - - /// Retrieve the contents of the line, parsing it as a block scalar. - /// - /// The contents will be appended to `string`. `line_buffer` is used as a temporary buffer to - /// store bytes before pushing them to `string` and thus avoiding reallocating more than - /// necessary. `line_buffer` is assumed to be empty upon calling this function. It will be - /// `clear`ed before the end of the function. - /// - /// This function assumed the first character to read is the first content character in the - /// line. This function does not consume the line break character(s) after the line. - fn scan_block_scalar_content_line(&mut self, string: &mut String, line_buffer: &mut String) { - // Start by evaluating characters in the buffer. - while !self.buffer.is_empty() && !is_breakz(self.ch()) { - string.push(self.ch()); - // We may technically skip non-blank characters. However, the only distinction is - // to determine what is leading whitespace and what is not. Here, we read the - // contents of the line until either eof or a linebreak. We know we will not read - // `self.leading_whitespace` until the end of the line, where it will be reset. - // This allows us to call a slightly less expensive function. - self.skip_blank(); - } - - // All characters that were in the buffer were consumed. We need to check if more - // follow. - if self.buffer.is_empty() { - // We will read all consecutive non-breakz characters. We push them into a - // temporary buffer. The main difference with going through `self.buffer` is that - // characters are appended here as their real size (1B for ascii, or up to 4 bytes for - // UTF-8). We can then use the internal `line_buffer` `Vec` to push data into `string` - // (using `String::push_str`). - let mut c = self.raw_read_ch(); - while !is_breakz(c) { - line_buffer.push(c); - c = self.raw_read_ch(); - } - - // Our last character read is stored in `c`. It is either an EOF or a break. In any - // case, we need to push it back into `self.buffer` so it may be properly read - // after. We must not insert it in `string`. - self.buffer.push_back(c).unwrap(); - - // We need to manually update our position; we haven't called a `skip` function. - self.mark.col += line_buffer.len(); - self.mark.index += line_buffer.len(); - - // We can now append our bytes to our `string`. - string.reserve(line_buffer.as_bytes().len()); - string.push_str(line_buffer); - // This clears the _contents_ without touching the _capacity_. - line_buffer.clear(); - } - } - - /// Skip the block scalar indentation and empty lines. - fn skip_block_scalar_indent(&mut self, indent: usize, breaks: &mut String) { - loop { - // Consume all spaces. Tabs cannot be used as indentation. - if indent < BUFFER_LEN - 2 { - self.lookahead(BUFFER_LEN); - while self.mark.col < indent && self.ch() == ' ' { - self.skip_blank(); - } - } else { - loop { - self.lookahead(BUFFER_LEN); - while !self.buffer.is_empty() && self.mark.col < indent && self.ch() == ' ' { - self.skip_blank(); - } - if !(!self.buffer.is_empty() && self.mark.col < indent && self.ch() == ' ') { - break; - } - } - self.lookahead(2); - } - - // If our current line is empty, skip over the break and continue looping. - if is_break(self.ch()) { - self.read_break(breaks); - } else { - // Otherwise, we have a content line. Return control. - break; - } - } - } - - /// Determine the indentation level for a block scalar from the first line of its contents. - /// - /// The function skips over whitespace-only lines and sets `indent` to the the longest - /// whitespace line that was encountered. - fn skip_block_scalar_first_line_indent(&mut self, indent: &mut usize, breaks: &mut String) { - let mut max_indent = 0; - loop { - // Consume all spaces. Tabs cannot be used as indentation. - while self.look_ch() == ' ' { - self.skip_blank(); - } - - if self.mark.col > max_indent { - max_indent = self.mark.col; - } - - if is_break(self.ch()) { - // If our current line is empty, skip over the break and continue looping. - self.lookahead(2); - self.read_break(breaks); - } else { - // Otherwise, we have a content line. Return control. - break; - } - } - - // In case a yaml looks like: - // ```yaml - // | - // foo - // bar - // ``` - // We need to set the indent to 0 and not 1. In all other cases, the indent must be at - // least 1. When in the above example, `self.indent` will be set to -1. - *indent = max_indent.max((self.indent + 1) as usize); - if self.indent > 0 { - *indent = (*indent).max(1); - } - } - - fn fetch_flow_scalar(&mut self, single: bool) -> ScanResult { - self.save_simple_key(); - self.disallow_simple_key(); - - let tok = self.scan_flow_scalar(single)?; - - // From spec: To ensure JSON compatibility, if a key inside a flow mapping is JSON-like, - // YAML allows the following value to be specified adjacent to the “:”. - self.skip_to_next_token()?; - self.adjacent_value_allowed_at = self.mark.index; - - self.tokens.push_back(tok); - Ok(()) - } - - #[allow(clippy::too_many_lines)] - fn scan_flow_scalar(&mut self, single: bool) -> Result { - let start_mark = self.mark; - - let mut string = String::new(); - let mut leading_break = String::new(); - let mut trailing_breaks = String::new(); - let mut whitespaces = String::new(); - let mut leading_blanks; - - /* Eat the left quote. */ - self.skip_non_blank(); - - loop { - /* Check for a document indicator. */ - self.lookahead(4); - - if self.mark.col == 0 - && (((self.buffer[0] == '-') && (self.buffer[1] == '-') && (self.buffer[2] == '-')) - || ((self.buffer[0] == '.') - && (self.buffer[1] == '.') - && (self.buffer[2] == '.'))) - && is_blank_or_breakz(self.buffer[3]) - { - return Err(ScanError::new( - start_mark, - "while scanning a quoted scalar, found unexpected document indicator", - )); - } - - if is_z(self.ch()) { - return Err(ScanError::new( - start_mark, - "while scanning a quoted scalar, found unexpected end of stream", - )); - } - - if (self.mark.col as isize) < self.indent { - return Err(ScanError::new( - start_mark, - "invalid indentation in quoted scalar", - )); - } - - leading_blanks = false; - self.consume_flow_scalar_non_whitespace_chars( - single, - &mut string, - &mut leading_blanks, - &start_mark, - )?; - - match self.look_ch() { - '\'' if single => break, - '"' if !single => break, - _ => {} - } - - // Consume blank characters. - while is_blank(self.ch()) || is_break(self.ch()) { - if is_blank(self.ch()) { - // Consume a space or a tab character. - if leading_blanks { - if self.ch() == '\t' && (self.mark.col as isize) < self.indent { - return Err(ScanError::new( - self.mark, - "tab cannot be used as indentation", - )); - } - self.skip_blank(); - } else { - whitespaces.push(self.ch()); - self.skip_blank(); - } - } else { - self.lookahead(2); - // Check if it is a first line break. - if leading_blanks { - self.read_break(&mut trailing_breaks); - } else { - whitespaces.clear(); - self.read_break(&mut leading_break); - leading_blanks = true; - } - } - self.lookahead(1); - } - - // Join the whitespaces or fold line breaks. - if leading_blanks { - if leading_break.is_empty() { - string.push_str(&leading_break); - string.push_str(&trailing_breaks); - trailing_breaks.clear(); - leading_break.clear(); - } else { - if trailing_breaks.is_empty() { - string.push(' '); - } else { - string.push_str(&trailing_breaks); - trailing_breaks.clear(); - } - leading_break.clear(); - } - } else { - string.push_str(&whitespaces); - whitespaces.clear(); - } - } // loop - - // Eat the right quote. - self.skip_non_blank(); - // Ensure there is no invalid trailing content. - self.skip_ws_to_eol(SkipTabs::Yes)?; - match self.ch() { - // These can be encountered in flow sequences or mappings. - ',' | '}' | ']' if self.flow_level > 0 => {} - // An end-of-line / end-of-stream is fine. No trailing content. - c if is_breakz(c) => {} - // ':' can be encountered if our scalar is a key. - // Outside of flow contexts, keys cannot span multiple lines - ':' if self.flow_level == 0 && start_mark.line == self.mark.line => {} - // Inside a flow context, this is allowed. - ':' if self.flow_level > 0 => {} - _ => { - return Err(ScanError::new( - self.mark, - "invalid trailing content after double-quoted scalar", - )); - } - } - - let style = if single { - TScalarStyle::SingleQuoted - } else { - TScalarStyle::DoubleQuoted - }; - Ok(Token(start_mark, TokenType::Scalar(style, string))) - } - - /// Consume successive non-whitespace characters from a flow scalar. - /// - /// This function resolves escape sequences and stops upon encountering a whitespace, the end - /// of the stream or the closing character for the scalar (`'` for single quoted scalars, `"` - /// for double quoted scalars). - /// - /// # Errors - /// Return an error if an invalid escape sequence is found. - fn consume_flow_scalar_non_whitespace_chars( - &mut self, - single: bool, - string: &mut String, - leading_blanks: &mut bool, - start_mark: &Marker, - ) -> Result<(), ScanError> { - self.lookahead(2); - while !is_blank_or_breakz(self.ch()) { - match self.ch() { - // Check for an escaped single quote. - '\'' if self.buffer[1] == '\'' && single => { - string.push('\''); - self.skip_n_non_blank(2); - } - // Check for the right quote. - '\'' if single => break, - '"' if !single => break, - // Check for an escaped line break. - '\\' if !single && is_break(self.buffer[1]) => { - self.lookahead(3); - self.skip_non_blank(); - self.skip_line(); - *leading_blanks = true; - break; - } - // Check for an escape sequence. - '\\' if !single => { - string.push(self.resolve_flow_scalar_escape_sequence(start_mark)?); - } - c => { - string.push(c); - self.skip_non_blank(); - } - } - self.lookahead(2); - } - Ok(()) - } - - /// Escape the sequence we encounter in a flow scalar. - /// - /// `self.ch()` must point to the `\` starting the escape sequence. - /// - /// # Errors - /// Return an error if an invalid escape sequence is found. - fn resolve_flow_scalar_escape_sequence( - &mut self, - start_mark: &Marker, - ) -> Result { - let mut code_length = 0usize; - let mut ret = '\0'; - - match self.buffer[1] { - '0' => ret = '\0', - 'a' => ret = '\x07', - 'b' => ret = '\x08', - 't' | '\t' => ret = '\t', - 'n' => ret = '\n', - 'v' => ret = '\x0b', - 'f' => ret = '\x0c', - 'r' => ret = '\x0d', - 'e' => ret = '\x1b', - ' ' => ret = '\x20', - '"' => ret = '"', - '/' => ret = '/', - '\\' => ret = '\\', - // Unicode next line (#x85) - 'N' => ret = char::from_u32(0x85).unwrap(), - // Unicode non-breaking space (#xA0) - '_' => ret = char::from_u32(0xA0).unwrap(), - // Unicode line separator (#x2028) - 'L' => ret = char::from_u32(0x2028).unwrap(), - // Unicode paragraph separator (#x2029) - 'P' => ret = char::from_u32(0x2029).unwrap(), - 'x' => code_length = 2, - 'u' => code_length = 4, - 'U' => code_length = 8, - _ => { - return Err(ScanError::new( - *start_mark, - "while parsing a quoted scalar, found unknown escape character", - )) - } - } - self.skip_n_non_blank(2); - - // Consume an arbitrary escape code. - if code_length > 0 { - self.lookahead(code_length); - let mut value = 0u32; - for i in 0..code_length { - if !is_hex(self.buffer[i]) { - return Err(ScanError::new( - *start_mark, - "while parsing a quoted scalar, did not find expected hexadecimal number", - )); - } - value = (value << 4) + as_hex(self.buffer[i]); - } - - let Some(ch) = char::from_u32(value) else { - return Err(ScanError::new( - *start_mark, - "while parsing a quoted scalar, found invalid Unicode character escape code", - )); - }; - ret = ch; - - self.skip_n_non_blank(code_length); - } - Ok(ret) - } - - fn fetch_plain_scalar(&mut self) -> ScanResult { - self.save_simple_key(); - self.disallow_simple_key(); - - let tok = self.scan_plain_scalar()?; - - self.tokens.push_back(tok); - Ok(()) - } - - /// Scan for a plain scalar. - /// - /// Plain scalars are the most readable but restricted style. They may span multiple lines in - /// some contexts. - #[allow(clippy::too_many_lines)] - fn scan_plain_scalar(&mut self) -> Result { - self.unroll_non_block_indents(); - let indent = self.indent + 1; - let start_mark = self.mark; - - if self.flow_level > 0 && (start_mark.col as isize) < indent { - return Err(ScanError::new( - start_mark, - "invalid indentation in flow construct", - )); - } - - let mut string = String::with_capacity(32); - let mut leading_break = String::with_capacity(32); - let mut trailing_breaks = String::with_capacity(32); - let mut whitespaces = String::with_capacity(32); - - loop { - self.lookahead(4); - if self.next_is_document_indicator() || self.ch() == '#' { - break; - } - - if self.flow_level > 0 && self.ch() == '-' && is_flow(self.buffer[1]) { - return Err(ScanError::new( - self.mark, - "plain scalar cannot start with '-' followed by ,[]{}", - )); - } - - if !is_blank_or_breakz(self.ch()) && self.next_can_be_plain_scalar() { - if self.leading_whitespace { - if leading_break.is_empty() { - string.push_str(&leading_break); - string.push_str(&trailing_breaks); - trailing_breaks.clear(); - leading_break.clear(); - } else { - if trailing_breaks.is_empty() { - string.push(' '); - } else { - string.push_str(&trailing_breaks); - trailing_breaks.clear(); - } - leading_break.clear(); - } - self.leading_whitespace = false; - } else if !whitespaces.is_empty() { - string.push_str(&whitespaces); - whitespaces.clear(); - } - - // We can unroll the first iteration of the loop. - string.push(self.ch()); - self.skip_non_blank(); - self.lookahead(2); - - // Add content non-blank characters to the scalar. - while !is_blank_or_breakz(self.ch()) { - if !self.next_can_be_plain_scalar() { - break; - } - - string.push(self.ch()); - self.skip_non_blank(); - self.lookahead(2); - } - } - - // We may reach the end of a plain scalar if: - // - We reach eof - // - We reach ": " - // - We find a flow character in a flow context - if !(is_blank(self.ch()) || is_break(self.ch())) { - break; - } - - // Process blank characters. - while is_blank(self.look_ch()) || is_break(self.ch()) { - if is_blank(self.ch()) { - if !self.leading_whitespace { - whitespaces.push(self.ch()); - self.skip_blank(); - } else if (self.mark.col as isize) < indent && self.ch() == '\t' { - // Tabs in an indentation columns are allowed if and only if the line is - // empty. Skip to the end of the line. - self.skip_ws_to_eol(SkipTabs::Yes)?; - if !is_breakz(self.ch()) { - return Err(ScanError::new( - start_mark, - "while scanning a plain scalar, found a tab", - )); - } - } else { - self.skip_blank(); - } - } else { - self.lookahead(2); - // Check if it is a first line break - if self.leading_whitespace { - self.read_break(&mut trailing_breaks); - } else { - whitespaces.clear(); - self.read_break(&mut leading_break); - self.leading_whitespace = true; - } - } - } - - // check indentation level - if self.flow_level == 0 && (self.mark.col as isize) < indent { - break; - } - } - - if self.leading_whitespace { - self.allow_simple_key(); - } - - Ok(Token( - start_mark, - TokenType::Scalar(TScalarStyle::Plain, string), - )) - } - - fn fetch_key(&mut self) -> ScanResult { - let start_mark = self.mark; - if self.flow_level == 0 { - // Check if we are allowed to start a new key (not necessarily simple). - if !self.simple_key_allowed { - return Err(ScanError::new( - self.mark, - "mapping keys are not allowed in this context", - )); - } - self.roll_indent( - start_mark.col, - None, - TokenType::BlockMappingStart, - start_mark, - ); - } else { - // The parser, upon receiving a `Key`, will insert a `MappingStart` event. - self.flow_mapping_started = true; - } - - self.remove_simple_key()?; - - if self.flow_level == 0 { - self.allow_simple_key(); - } else { - self.disallow_simple_key(); - } - - self.skip_non_blank(); - self.skip_yaml_whitespace()?; - if self.ch() == '\t' { - return Err(ScanError::new( - self.mark(), - "tabs disallowed in this context", - )); - } - self.tokens.push_back(Token(start_mark, TokenType::Key)); - Ok(()) - } - - /// Fetch a value from a mapping (after a `:`). - fn fetch_value(&mut self) -> ScanResult { - let sk = self.simple_keys.last().unwrap().clone(); - let start_mark = self.mark; - self.implicit_flow_mapping = self.flow_level > 0 && !self.flow_mapping_started; - - // Skip over ':'. - self.skip_non_blank(); - if self.look_ch() == '\t' - && !self.skip_ws_to_eol(SkipTabs::Yes)?.has_valid_yaml_ws() - && (self.ch() == '-' || is_alpha(self.ch())) - { - return Err(ScanError::new( - self.mark, - "':' must be followed by a valid YAML whitespace", - )); - } - - if sk.possible { - // insert simple key - let tok = Token(sk.mark, TokenType::Key); - self.insert_token(sk.token_number - self.tokens_parsed, tok); - if self.implicit_flow_mapping { - if sk.mark.line < start_mark.line { - return Err(ScanError::new( - start_mark, - "illegal placement of ':' indicator", - )); - } - self.insert_token( - sk.token_number - self.tokens_parsed, - Token(self.mark, TokenType::FlowMappingStart), - ); - } - - // Add the BLOCK-MAPPING-START token if needed. - self.roll_indent( - sk.mark.col, - Some(sk.token_number), - TokenType::BlockMappingStart, - start_mark, - ); - self.roll_one_col_indent(); - - self.simple_keys.last_mut().unwrap().possible = false; - self.disallow_simple_key(); - } else { - if self.implicit_flow_mapping { - self.tokens - .push_back(Token(self.mark, TokenType::FlowMappingStart)); - } - // The ':' indicator follows a complex key. - if self.flow_level == 0 { - if !self.simple_key_allowed { - return Err(ScanError::new( - start_mark, - "mapping values are not allowed in this context", - )); - } - - self.roll_indent( - start_mark.col, - None, - TokenType::BlockMappingStart, - start_mark, - ); - } - self.roll_one_col_indent(); - - if self.flow_level == 0 { - self.allow_simple_key(); - } else { - self.disallow_simple_key(); - } - } - self.tokens.push_back(Token(start_mark, TokenType::Value)); - - Ok(()) - } - - /// Add an indentation level to the stack with the given block token, if needed. - /// - /// An indentation level is added only if: - /// - We are not in a flow-style construct (which don't have indentation per-se). - /// - The current column is further indented than the last indent we have registered. - fn roll_indent(&mut self, col: usize, number: Option, tok: TokenType, mark: Marker) { - if self.flow_level > 0 { - return; - } - - // If the last indent was a non-block indent, remove it. - // This means that we prepared an indent that we thought we wouldn't use, but realized just - // now that it is a block indent. - if self.indent <= col as isize { - if let Some(indent) = self.indents.last() { - if !indent.needs_block_end { - self.indent = indent.indent; - self.indents.pop(); - } - } - } - - if self.indent < col as isize { - self.indents.push(Indent { - indent: self.indent, - needs_block_end: true, - }); - self.indent = col as isize; - let tokens_parsed = self.tokens_parsed; - match number { - Some(n) => self.insert_token(n - tokens_parsed, Token(mark, tok)), - None => self.tokens.push_back(Token(mark, tok)), - } - } - } - - /// Pop indentation levels from the stack as much as needed. - /// - /// Indentation levels are popped from the stack while they are further indented than `col`. - /// If we are in a flow-style construct (which don't have indentation per-se), this function - /// does nothing. - fn unroll_indent(&mut self, col: isize) { - if self.flow_level > 0 { - return; - } - while self.indent > col { - let indent = self.indents.pop().unwrap(); - self.indent = indent.indent; - if indent.needs_block_end { - self.tokens.push_back(Token(self.mark, TokenType::BlockEnd)); - } - } - } - - /// Add an indentation level of 1 column that does not start a block. - /// - /// See the documentation of [`Indent::needs_block_end`] for more details. - /// An indentation is not added if we are inside a flow level or if the last indent is already - /// a non-block indent. - fn roll_one_col_indent(&mut self) { - if self.flow_level == 0 && self.indents.last().map_or(false, |x| x.needs_block_end) { - self.indents.push(Indent { - indent: self.indent, - needs_block_end: false, - }); - self.indent += 1; - } - } - - /// Unroll all last indents created with [`Self::roll_one_col_indent`]. - fn unroll_non_block_indents(&mut self) { - while let Some(indent) = self.indents.last() { - if indent.needs_block_end { - break; - } - self.indent = indent.indent; - self.indents.pop(); - } - } - - /// Mark the next token to be inserted as a potential simple key. - fn save_simple_key(&mut self) { - if self.simple_key_allowed { - let required = self.flow_level == 0 - && self.indent == (self.mark.col as isize) - && self.indents.last().unwrap().needs_block_end; - let mut sk = SimpleKey::new(self.mark); - sk.possible = true; - sk.required = required; - sk.token_number = self.tokens_parsed + self.tokens.len(); - - self.simple_keys.pop(); - self.simple_keys.push(sk); - } - } - - fn remove_simple_key(&mut self) -> ScanResult { - let last = self.simple_keys.last_mut().unwrap(); - if last.possible && last.required { - return Err(ScanError::new(self.mark, "simple key expected")); - } - - last.possible = false; - Ok(()) - } - - /// Check whether the next characters may be part of a plain scalar. - /// - /// This function assumes we are not given a blankz character. - // For some reason, `#[inline]` is not enough. - #[allow(clippy::inline_always)] - #[inline(always)] - fn next_can_be_plain_scalar(&self) -> bool { - match self.ch() { - // indicators can end a plain scalar, see 7.3.3. Plain Style - ':' if is_blank_or_breakz(self.buffer[1]) - || (self.flow_level > 0 && is_flow(self.buffer[1])) => - { - false - } - c if self.flow_level > 0 && is_flow(c) => false, - _ => true, - } - } - - /// Return whether the scanner is inside a block but outside of a flow sequence. - fn is_within_block(&self) -> bool { - !self.indents.is_empty() - } - - /// If an implicit mapping had started, end it. - fn end_implicit_mapping(&mut self, mark: Marker) { - if self.implicit_flow_mapping { - self.implicit_flow_mapping = false; - self.flow_mapping_started = false; - self.tokens - .push_back(Token(mark, TokenType::FlowMappingEnd)); - } - } -} - -/// Behavior to adopt regarding treating tabs as whitespace. -/// -/// Although tab is a valid yaml whitespace, it doesn't always behave the same as a space. -#[derive(Copy, Clone, Eq, PartialEq)] -enum SkipTabs { - /// Skip all tabs as whitespace. - Yes, - /// Don't skip any tab. Return from the function when encountering one. - No, - /// Return value from the function. - Result( - /// Whether tabs were encountered. - bool, - /// Whether at least 1 valid yaml whitespace has been encountered. - bool, - ), -} - -impl SkipTabs { - /// Whether tabs were found while skipping whitespace. - /// - /// This function must be called after a call to `skip_ws_to_eol`. - fn found_tabs(self) -> bool { - matches!(self, SkipTabs::Result(true, _)) - } - - /// Whether a valid YAML whitespace has been found in skipped-over content. - /// - /// This function must be called after a call to `skip_ws_to_eol`. - fn has_valid_yaml_ws(self) -> bool { - matches!(self, SkipTabs::Result(_, true)) - } -} - -/// Chomping, how final line breaks and trailing empty lines are interpreted. -/// -/// See YAML spec 8.1.1.2. -#[derive(PartialEq, Eq)] -pub enum Chomping { - /// The final line break and any trailing empty lines are excluded. - Strip, - /// The final line break is preserved, but trailing empty lines are excluded. - Clip, - /// The final line break and trailing empty lines are included. - Keep, -} - -#[cfg(test)] -mod test { - #[test] - fn test_is_anchor_char() { - use super::is_anchor_char; - assert!(is_anchor_char('x')); - } -} diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index 3c429d5..3af4bb3 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -10,8 +10,7 @@ use std::{collections::BTreeMap, convert::TryFrom, mem, ops::Index, ops::IndexMu use encoding_rs::{Decoder, DecoderResult, Encoding}; use hashlink::LinkedHashMap; -use crate::parser::{Event, MarkedEventReceiver, Parser, Tag}; -use crate::scanner::{Marker, ScanError, TScalarStyle}; +use saphyr_parser::{Event, MarkedEventReceiver, Marker, Parser, ScanError, TScalarStyle, Tag}; /// A YAML node is stored as this `Yaml` enumeration, which provides an easy way to /// access your YAML document. @@ -19,7 +18,7 @@ use crate::scanner::{Marker, ScanError, TScalarStyle}; /// # Examples /// /// ``` -/// use yaml_rust2::Yaml; +/// use saphyr::Yaml; /// let foo = Yaml::from_str("-123"); // convert the string to the appropriate YAML type /// assert_eq!(foo.as_i64().unwrap(), -123); /// @@ -306,7 +305,7 @@ pub enum YAMLDecodingTrap { /// For example, to read a YAML file while ignoring Unicode decoding errors you can set the /// `encoding_trap` to `encoding::DecoderTrap::Ignore`. /// ```rust -/// use yaml_rust2::yaml::{YamlDecoder, YAMLDecodingTrap}; +/// use saphyr::{YamlDecoder, YAMLDecodingTrap}; /// /// let string = b"--- /// a\xa9: 1 @@ -580,7 +579,7 @@ impl Yaml { /// replace it with a given value `other`. Otherwise, return self unchanged. /// /// ``` - /// use yaml_rust2::yaml::Yaml; + /// use saphyr::Yaml; /// /// assert_eq!(Yaml::BadValue.or(Yaml::Integer(3)), Yaml::Integer(3)); /// assert_eq!(Yaml::Integer(3).or(Yaml::BadValue), Yaml::Integer(3)); @@ -613,7 +612,7 @@ impl Yaml { /// /// # Examples /// ``` - /// # use yaml_rust2::yaml::Yaml; + /// # use saphyr::Yaml; /// assert!(matches!(Yaml::from_str("42"), Yaml::Integer(42))); /// assert!(matches!(Yaml::from_str("0x2A"), Yaml::Integer(42))); /// assert!(matches!(Yaml::from_str("0o52"), Yaml::Integer(42))); diff --git a/saphyr/tests/basic.rs b/saphyr/tests/basic.rs index b769c2b..cc00cb0 100644 --- a/saphyr/tests/basic.rs +++ b/saphyr/tests/basic.rs @@ -1,8 +1,7 @@ #![allow(clippy::bool_assert_comparison)] #![allow(clippy::float_cmp)] -use std::vec; -use yaml_rust2::{Yaml, YamlEmitter, YamlLoader}; +use saphyr::{Yaml, YamlEmitter, YamlLoader}; #[test] fn test_api() { @@ -44,27 +43,6 @@ fn test_api() { assert!(!writer.is_empty()); } -#[test] -fn test_fail() { - let s = " -# syntax error -scalar -key: [1, 2]] -key1:a2 -"; - let Err(error) = YamlLoader::load_from_str(s) else { - panic!() - }; - assert_eq!( - error.info(), - "mapping values are not allowed in this context" - ); - assert_eq!( - error.to_string(), - "mapping values are not allowed in this context at byte 26 line 4 column 4" - ); -} - #[test] fn test_coerce() { let s = "--- @@ -80,51 +58,6 @@ c: [1, 2] assert!(doc["d"][0].is_badvalue()); } -#[test] -fn test_empty_doc() { - let s: String = String::new(); - YamlLoader::load_from_str(&s).unwrap(); - let s: String = "---".to_owned(); - assert_eq!(YamlLoader::load_from_str(&s).unwrap()[0], Yaml::Null); -} - -#[test] -fn test_parser() { - let s: String = " -# comment -a0 bb: val -a1: - b1: 4 - b2: d -a2: 4 # i'm comment -a3: [1, 2, 3] -a4: - - - a1 - - a2 - - 2 -a5: 'single_quoted' -a6: \"double_quoted\" -a7: 你好 -" - .to_owned(); - let out = YamlLoader::load_from_str(&s).unwrap(); - let doc = &out[0]; - assert_eq!(doc["a7"].as_str().unwrap(), "你好"); -} - -#[test] -fn test_multi_doc() { - let s = " -'a scalar' ---- -'a scalar' ---- -'a scalar' -"; - let out = YamlLoader::load_from_str(s).unwrap(); - assert_eq!(out.len(), 3); -} - #[test] fn test_anchor() { let s = " @@ -150,15 +83,6 @@ a1: &DEFAULT assert_eq!(doc["a1"]["b2"], Yaml::BadValue); } -#[test] -fn test_github_27() { - // https://github.com/chyh1990/yaml-rust/issues/27 - let s = "&a"; - let out = YamlLoader::load_from_str(s).unwrap(); - let doc = &out[0]; - assert_eq!(doc.as_str().unwrap(), ""); -} - #[test] fn test_plain_datatype() { let s = " @@ -223,45 +147,6 @@ fn test_plain_datatype() { assert!(!doc[25][1].as_bool().unwrap()); } -#[test] -fn test_bad_hyphen() { - // See: https://github.com/chyh1990/yaml-rust/issues/23 - let s = "{-"; - assert!(YamlLoader::load_from_str(s).is_err()); -} - -#[test] -fn test_issue_65() { - // See: https://github.com/chyh1990/yaml-rust/issues/65 - let b = "\n\"ll\\\"ll\\\r\n\"ll\\\"ll\\\r\r\r\rU\r\r\rU"; - assert!(YamlLoader::load_from_str(b).is_err()); -} - -#[test] -fn test_issue_65_mwe() { - // A MWE for `test_issue_65`. The error over there is that there is invalid trailing content - // after a double quoted string. - let b = r#""foo" l"#; - assert!(YamlLoader::load_from_str(b).is_err()); -} - -#[test] -fn test_bad_docstart() { - assert!(YamlLoader::load_from_str("---This used to cause an infinite loop").is_ok()); - assert_eq!( - YamlLoader::load_from_str("----"), - Ok(vec![Yaml::String(String::from("----"))]) - ); - assert_eq!( - YamlLoader::load_from_str("--- #here goes a comment"), - Ok(vec![Yaml::Null]) - ); - assert_eq!( - YamlLoader::load_from_str("---- #here goes a comment"), - Ok(vec![Yaml::String(String::from("----"))]) - ); -} - #[test] fn test_plain_datatype_with_into_methods() { let s = " @@ -348,95 +233,3 @@ fn test_integer_key() { let first = out.into_iter().next().unwrap(); assert_eq!(first[0]["important"].as_bool().unwrap(), true); } - -#[test] -fn test_indentation_equality() { - let four_spaces = YamlLoader::load_from_str( - r" -hash: - with: - indentations -", - ) - .unwrap() - .into_iter() - .next() - .unwrap(); - - let two_spaces = YamlLoader::load_from_str( - r" -hash: - with: - indentations -", - ) - .unwrap() - .into_iter() - .next() - .unwrap(); - - let one_space = YamlLoader::load_from_str( - r" -hash: - with: - indentations -", - ) - .unwrap() - .into_iter() - .next() - .unwrap(); - - let mixed_spaces = YamlLoader::load_from_str( - r" -hash: - with: - indentations -", - ) - .unwrap() - .into_iter() - .next() - .unwrap(); - - assert_eq!(four_spaces, two_spaces); - assert_eq!(two_spaces, one_space); - assert_eq!(four_spaces, mixed_spaces); -} - -#[test] -fn test_two_space_indentations() { - // https://github.com/kbknapp/clap-rs/issues/965 - - let s = r" -subcommands: - - server: - about: server related commands -subcommands2: - - server: - about: server related commands -subcommands3: - - server: - about: server related commands - "; - - let out = YamlLoader::load_from_str(s).unwrap(); - let doc = &out.into_iter().next().unwrap(); - - println!("{doc:#?}"); - assert_eq!(doc["subcommands"][0]["server"], Yaml::Null); - assert!(doc["subcommands2"][0]["server"].as_hash().is_some()); - assert!(doc["subcommands3"][0]["server"].as_hash().is_some()); -} - -#[test] -fn test_recursion_depth_check_objects() { - let s = "{a:".repeat(10_000) + &"}".repeat(10_000); - assert!(YamlLoader::load_from_str(&s).is_err()); -} - -#[test] -fn test_recursion_depth_check_arrays() { - let s = "[".repeat(10_000) + &"]".repeat(10_000); - assert!(YamlLoader::load_from_str(&s).is_err()); -} diff --git a/saphyr/tests/emitter.rs b/saphyr/tests/emitter.rs index c085a56..53e558f 100644 --- a/saphyr/tests/emitter.rs +++ b/saphyr/tests/emitter.rs @@ -1,4 +1,4 @@ -use yaml_rust2::{YamlEmitter, YamlLoader}; +use saphyr::{YamlEmitter, YamlLoader}; #[allow(clippy::similar_names)] #[test] diff --git a/saphyr/tests/quickcheck.rs b/saphyr/tests/quickcheck.rs index fdf2549..819d064 100644 --- a/saphyr/tests/quickcheck.rs +++ b/saphyr/tests/quickcheck.rs @@ -1,9 +1,9 @@ -extern crate yaml_rust2; #[macro_use] extern crate quickcheck; use quickcheck::TestResult; -use yaml_rust2::{Yaml, YamlEmitter, YamlLoader}; + +use saphyr::{Yaml, YamlEmitter, YamlLoader}; quickcheck! { fn test_check_weird_keys(xs: Vec) -> TestResult { diff --git a/saphyr/tests/scanner.rs b/saphyr/tests/scanner.rs deleted file mode 100644 index 0a09517..0000000 --- a/saphyr/tests/scanner.rs +++ /dev/null @@ -1,440 +0,0 @@ -#![allow(clippy::enum_glob_use)] - -use yaml_rust2::{scanner::TokenType::*, scanner::*}; - -macro_rules! next { - ($p:ident, $tk:pat) => {{ - let tok = $p.next().unwrap(); - match tok.1 { - $tk => {} - _ => panic!("unexpected token: {:?}", tok), - } - }}; -} - -macro_rules! next_scalar { - ($p:ident, $tk:expr, $v:expr) => {{ - let tok = $p.next().unwrap(); - match tok.1 { - Scalar(style, ref v) => { - assert_eq!(style, $tk); - assert_eq!(v, $v); - } - _ => panic!("unexpected token: {:?}", tok), - } - }}; -} - -macro_rules! end { - ($p:ident) => {{ - assert_eq!($p.next(), None); - }}; -} -/// test cases in libyaml scanner.c -#[test] -fn test_empty() { - let s = ""; - let mut p = Scanner::new(s.chars()); - next!(p, StreamStart(..)); - next!(p, StreamEnd); - end!(p); -} - -#[test] -fn test_scalar() { - let s = "a scalar"; - let mut p = Scanner::new(s.chars()); - next!(p, StreamStart(..)); - next!(p, Scalar(TScalarStyle::Plain, _)); - next!(p, StreamEnd); - end!(p); -} - -#[test] -fn test_explicit_scalar() { - let s = "--- -'a scalar' -... -"; - let mut p = Scanner::new(s.chars()); - next!(p, StreamStart(..)); - next!(p, DocumentStart); - next!(p, Scalar(TScalarStyle::SingleQuoted, _)); - next!(p, DocumentEnd); - next!(p, StreamEnd); - end!(p); -} - -#[test] -fn test_multiple_documents() { - let s = " -'a scalar' ---- -'a scalar' ---- -'a scalar' -"; - let mut p = Scanner::new(s.chars()); - next!(p, StreamStart(..)); - next!(p, Scalar(TScalarStyle::SingleQuoted, _)); - next!(p, DocumentStart); - next!(p, Scalar(TScalarStyle::SingleQuoted, _)); - next!(p, DocumentStart); - next!(p, Scalar(TScalarStyle::SingleQuoted, _)); - next!(p, StreamEnd); - end!(p); -} - -#[test] -fn test_a_flow_sequence() { - let s = "[item 1, item 2, item 3]"; - let mut p = Scanner::new(s.chars()); - next!(p, StreamStart(..)); - next!(p, FlowSequenceStart); - next_scalar!(p, TScalarStyle::Plain, "item 1"); - next!(p, FlowEntry); - next!(p, Scalar(TScalarStyle::Plain, _)); - next!(p, FlowEntry); - next!(p, Scalar(TScalarStyle::Plain, _)); - next!(p, FlowSequenceEnd); - next!(p, StreamEnd); - end!(p); -} - -#[test] -fn test_a_flow_mapping() { - let s = " -{ - a simple key: a value, # Note that the KEY token is produced. - ? a complex key: another value, -} -"; - let mut p = Scanner::new(s.chars()); - next!(p, StreamStart(..)); - next!(p, FlowMappingStart); - next!(p, Key); - next!(p, Scalar(TScalarStyle::Plain, _)); - next!(p, Value); - next!(p, Scalar(TScalarStyle::Plain, _)); - next!(p, FlowEntry); - next!(p, Key); - next_scalar!(p, TScalarStyle::Plain, "a complex key"); - next!(p, Value); - next!(p, Scalar(TScalarStyle::Plain, _)); - next!(p, FlowEntry); - next!(p, FlowMappingEnd); - next!(p, StreamEnd); - end!(p); -} - -#[test] -fn test_block_sequences() { - let s = " -- item 1 -- item 2 -- - - item 3.1 - - item 3.2 -- - key 1: value 1 - key 2: value 2 -"; - let mut p = Scanner::new(s.chars()); - next!(p, StreamStart(..)); - next!(p, BlockSequenceStart); - next!(p, BlockEntry); - next_scalar!(p, TScalarStyle::Plain, "item 1"); - next!(p, BlockEntry); - next_scalar!(p, TScalarStyle::Plain, "item 2"); - next!(p, BlockEntry); - next!(p, BlockSequenceStart); - next!(p, BlockEntry); - next_scalar!(p, TScalarStyle::Plain, "item 3.1"); - next!(p, BlockEntry); - next_scalar!(p, TScalarStyle::Plain, "item 3.2"); - next!(p, BlockEnd); - next!(p, BlockEntry); - next!(p, BlockMappingStart); - next!(p, Key); - next_scalar!(p, TScalarStyle::Plain, "key 1"); - next!(p, Value); - next_scalar!(p, TScalarStyle::Plain, "value 1"); - next!(p, Key); - next_scalar!(p, TScalarStyle::Plain, "key 2"); - next!(p, Value); - next_scalar!(p, TScalarStyle::Plain, "value 2"); - next!(p, BlockEnd); - next!(p, BlockEnd); - next!(p, StreamEnd); - end!(p); -} - -#[test] -fn test_block_mappings() { - let s = " -a simple key: a value # The KEY token is produced here. -? a complex key -: another value -a mapping: - key 1: value 1 - key 2: value 2 -a sequence: - - item 1 - - item 2 -"; - let mut p = Scanner::new(s.chars()); - next!(p, StreamStart(..)); - next!(p, BlockMappingStart); - next!(p, Key); - next!(p, Scalar(_, _)); - next!(p, Value); - next!(p, Scalar(_, _)); - next!(p, Key); - next!(p, Scalar(_, _)); - next!(p, Value); - next!(p, Scalar(_, _)); - next!(p, Key); - next!(p, Scalar(_, _)); - next!(p, Value); // libyaml comment seems to be wrong - next!(p, BlockMappingStart); - next!(p, Key); - next!(p, Scalar(_, _)); - next!(p, Value); - next!(p, Scalar(_, _)); - next!(p, Key); - next!(p, Scalar(_, _)); - next!(p, Value); - next!(p, Scalar(_, _)); - next!(p, BlockEnd); - next!(p, Key); - next!(p, Scalar(_, _)); - next!(p, Value); - next!(p, BlockSequenceStart); - next!(p, BlockEntry); - next!(p, Scalar(_, _)); - next!(p, BlockEntry); - next!(p, Scalar(_, _)); - next!(p, BlockEnd); - next!(p, BlockEnd); - next!(p, StreamEnd); - end!(p); -} - -#[test] -fn test_no_block_sequence_start() { - let s = " -key: -- item 1 -- item 2 -"; - let mut p = Scanner::new(s.chars()); - next!(p, StreamStart(..)); - next!(p, BlockMappingStart); - next!(p, Key); - next_scalar!(p, TScalarStyle::Plain, "key"); - next!(p, Value); - next!(p, BlockEntry); - next_scalar!(p, TScalarStyle::Plain, "item 1"); - next!(p, BlockEntry); - next_scalar!(p, TScalarStyle::Plain, "item 2"); - next!(p, BlockEnd); - next!(p, StreamEnd); - end!(p); -} - -#[test] -fn test_collections_in_sequence() { - let s = " -- - item 1 - - item 2 -- key 1: value 1 - key 2: value 2 -- ? complex key - : complex value -"; - let mut p = Scanner::new(s.chars()); - next!(p, StreamStart(..)); - next!(p, BlockSequenceStart); - next!(p, BlockEntry); - next!(p, BlockSequenceStart); - next!(p, BlockEntry); - next_scalar!(p, TScalarStyle::Plain, "item 1"); - next!(p, BlockEntry); - next_scalar!(p, TScalarStyle::Plain, "item 2"); - next!(p, BlockEnd); - next!(p, BlockEntry); - next!(p, BlockMappingStart); - next!(p, Key); - next_scalar!(p, TScalarStyle::Plain, "key 1"); - next!(p, Value); - next_scalar!(p, TScalarStyle::Plain, "value 1"); - next!(p, Key); - next_scalar!(p, TScalarStyle::Plain, "key 2"); - next!(p, Value); - next_scalar!(p, TScalarStyle::Plain, "value 2"); - next!(p, BlockEnd); - next!(p, BlockEntry); - next!(p, BlockMappingStart); - next!(p, Key); - next_scalar!(p, TScalarStyle::Plain, "complex key"); - next!(p, Value); - next_scalar!(p, TScalarStyle::Plain, "complex value"); - next!(p, BlockEnd); - next!(p, BlockEnd); - next!(p, StreamEnd); - end!(p); -} - -#[test] -fn test_collections_in_mapping() { - let s = " -? a sequence -: - item 1 - - item 2 -? a mapping -: key 1: value 1 - key 2: value 2 -"; - let mut p = Scanner::new(s.chars()); - next!(p, StreamStart(..)); - next!(p, BlockMappingStart); - next!(p, Key); - next_scalar!(p, TScalarStyle::Plain, "a sequence"); - next!(p, Value); - next!(p, BlockSequenceStart); - next!(p, BlockEntry); - next_scalar!(p, TScalarStyle::Plain, "item 1"); - next!(p, BlockEntry); - next_scalar!(p, TScalarStyle::Plain, "item 2"); - next!(p, BlockEnd); - next!(p, Key); - next_scalar!(p, TScalarStyle::Plain, "a mapping"); - next!(p, Value); - next!(p, BlockMappingStart); - next!(p, Key); - next_scalar!(p, TScalarStyle::Plain, "key 1"); - next!(p, Value); - next_scalar!(p, TScalarStyle::Plain, "value 1"); - next!(p, Key); - next_scalar!(p, TScalarStyle::Plain, "key 2"); - next!(p, Value); - next_scalar!(p, TScalarStyle::Plain, "value 2"); - next!(p, BlockEnd); - next!(p, BlockEnd); - next!(p, StreamEnd); - end!(p); -} - -#[test] -fn test_spec_ex7_3() { - let s = " -{ - ? foo :, - : bar, -} -"; - let mut p = Scanner::new(s.chars()); - next!(p, StreamStart(..)); - next!(p, FlowMappingStart); - next!(p, Key); - next_scalar!(p, TScalarStyle::Plain, "foo"); - next!(p, Value); - next!(p, FlowEntry); - next!(p, Value); - next_scalar!(p, TScalarStyle::Plain, "bar"); - next!(p, FlowEntry); - next!(p, FlowMappingEnd); - next!(p, StreamEnd); - end!(p); -} - -#[test] -fn test_plain_scalar_starting_with_indicators_in_flow() { - // "Plain scalars must not begin with most indicators, as this would cause ambiguity with - // other YAML constructs. However, the “:”, “?” and “-” indicators may be used as the first - // character if followed by a non-space “safe” character, as this causes no ambiguity." - - let s = "{a: :b}"; - let mut p = Scanner::new(s.chars()); - next!(p, StreamStart(..)); - next!(p, FlowMappingStart); - next!(p, Key); - next_scalar!(p, TScalarStyle::Plain, "a"); - next!(p, Value); - next_scalar!(p, TScalarStyle::Plain, ":b"); - next!(p, FlowMappingEnd); - next!(p, StreamEnd); - end!(p); - - let s = "{a: ?b}"; - let mut p = Scanner::new(s.chars()); - next!(p, StreamStart(..)); - next!(p, FlowMappingStart); - next!(p, Key); - next_scalar!(p, TScalarStyle::Plain, "a"); - next!(p, Value); - next_scalar!(p, TScalarStyle::Plain, "?b"); - next!(p, FlowMappingEnd); - next!(p, StreamEnd); - end!(p); -} - -#[test] -fn test_plain_scalar_starting_with_indicators_in_block() { - let s = ":a"; - let mut p = Scanner::new(s.chars()); - next!(p, StreamStart(..)); - next_scalar!(p, TScalarStyle::Plain, ":a"); - next!(p, StreamEnd); - end!(p); - - let s = "?a"; - let mut p = Scanner::new(s.chars()); - next!(p, StreamStart(..)); - next_scalar!(p, TScalarStyle::Plain, "?a"); - next!(p, StreamEnd); - end!(p); -} - -#[test] -fn test_plain_scalar_containing_indicators_in_block() { - let s = "a:,b"; - let mut p = Scanner::new(s.chars()); - next!(p, StreamStart(..)); - next_scalar!(p, TScalarStyle::Plain, "a:,b"); - next!(p, StreamEnd); - end!(p); - - let s = ":,b"; - let mut p = Scanner::new(s.chars()); - next!(p, StreamStart(..)); - next_scalar!(p, TScalarStyle::Plain, ":,b"); - next!(p, StreamEnd); - end!(p); -} - -#[test] -fn test_scanner_cr() { - let s = "---\r\n- tok1\r\n- tok2"; - let mut p = Scanner::new(s.chars()); - next!(p, StreamStart(..)); - next!(p, DocumentStart); - next!(p, BlockSequenceStart); - next!(p, BlockEntry); - next_scalar!(p, TScalarStyle::Plain, "tok1"); - next!(p, BlockEntry); - next_scalar!(p, TScalarStyle::Plain, "tok2"); - next!(p, BlockEnd); - next!(p, StreamEnd); - end!(p); -} - -#[test] -fn test_uri() { - // TODO -} - -#[test] -fn test_uri_escapes() { - // TODO -} diff --git a/saphyr/tests/spec_test.rs b/saphyr/tests/spec_test.rs index ecf1327..80b6bfd 100644 --- a/saphyr/tests/spec_test.rs +++ b/saphyr/tests/spec_test.rs @@ -1,84 +1,7 @@ -#![allow(dead_code)] -#![allow(non_upper_case_globals)] -extern crate yaml_rust2; - -use yaml_rust2::parser::{Event, EventReceiver, Parser}; -use yaml_rust2::scanner::TScalarStyle; - -// These names match the names used in the C++ test suite. -#[cfg_attr(feature = "cargo-clippy", allow(clippy::enum_variant_names))] -#[derive(Clone, PartialEq, PartialOrd, Debug)] -enum TestEvent { - OnDocumentStart, - OnDocumentEnd, - OnSequenceStart, - OnSequenceEnd, - OnMapStart, - OnMapEnd, - OnScalar, - OnAlias, - OnNull, -} - -struct YamlChecker { - pub evs: Vec, -} - -impl EventReceiver for YamlChecker { - fn on_event(&mut self, ev: Event) { - let tev = match ev { - Event::DocumentStart => TestEvent::OnDocumentStart, - Event::DocumentEnd => TestEvent::OnDocumentEnd, - Event::SequenceStart(..) => TestEvent::OnSequenceStart, - Event::SequenceEnd => TestEvent::OnSequenceEnd, - Event::MappingStart(..) => TestEvent::OnMapStart, - Event::MappingEnd => TestEvent::OnMapEnd, - Event::Scalar(ref v, style, _, _) => { - if v == "~" && style == TScalarStyle::Plain { - TestEvent::OnNull - } else { - TestEvent::OnScalar - } - } - Event::Alias(_) => TestEvent::OnAlias, - _ => return, // ignore other events - }; - self.evs.push(tev); - } -} - -fn str_to_test_events(docs: &str) -> Vec { - let mut p = YamlChecker { evs: Vec::new() }; - let mut parser = Parser::new_from_str(docs); - parser.load(&mut p, true).unwrap(); - p.evs -} - -macro_rules! assert_next { - ($v:expr, $p:pat) => { - match $v.next().unwrap() { - $p => {} - e => { - panic!("unexpected event: {:?} (expected {:?})", e, stringify!($p)); - } - } - }; -} - -// auto generated from handler_spec_test.cpp -include!("specexamples.rs.inc"); -include!("spec_test.rs.inc"); - -// hand-crafted tests -//#[test] -//fn test_hc_alias() { -//} +use saphyr::{Hash, Yaml, YamlEmitter, YamlLoader}; #[test] fn test_mapvec_legal() { - use yaml_rust2::yaml::{Hash, Yaml}; - use yaml_rust2::{YamlEmitter, YamlLoader}; - // Emitting a `map>, _>` should result in legal yaml that // we can parse. diff --git a/saphyr/tests/test_round_trip.rs b/saphyr/tests/test_round_trip.rs index 5f0a7a1..0d03d3e 100644 --- a/saphyr/tests/test_round_trip.rs +++ b/saphyr/tests/test_round_trip.rs @@ -1,6 +1,4 @@ -extern crate yaml_rust2; - -use yaml_rust2::{Yaml, YamlEmitter, YamlLoader}; +use saphyr::{Yaml, YamlEmitter, YamlLoader}; fn roundtrip(original: &Yaml) { let mut emitted = String::new(); diff --git a/saphyr/tests/yaml-test-suite b/saphyr/tests/yaml-test-suite deleted file mode 160000 index 45db50a..0000000 --- a/saphyr/tests/yaml-test-suite +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 45db50aecf9b1520f8258938c88f396e96f30831 diff --git a/saphyr/tests/yaml-test-suite.rs b/saphyr/tests/yaml-test-suite.rs deleted file mode 100644 index 818083f..0000000 --- a/saphyr/tests/yaml-test-suite.rs +++ /dev/null @@ -1,295 +0,0 @@ -use std::fs::{self, DirEntry}; - -use libtest_mimic::{run_tests, Arguments, Outcome, Test}; - -use yaml_rust2::{ - parser::{Event, EventReceiver, Parser, Tag}, - scanner::TScalarStyle, - yaml, ScanError, Yaml, YamlLoader, -}; - -type Result> = std::result::Result; - -struct YamlTest { - yaml_visual: String, - yaml: String, - expected_events: String, - expected_error: bool, -} - -fn main() -> Result<()> { - let mut arguments = Arguments::from_args(); - if arguments.num_threads.is_none() { - arguments.num_threads = Some(1); - } - let tests: Vec> = std::fs::read_dir("tests/yaml-test-suite/src")? - .map(|entry| -> Result<_> { - let entry = entry?; - let tests = load_tests_from_file(&entry)?; - Ok(tests) - }) - .collect::>()?; - let mut tests: Vec<_> = tests.into_iter().flatten().collect(); - tests.sort_by_key(|t| t.name.clone()); - - run_tests(&arguments, tests, run_yaml_test).exit(); -} - -fn run_yaml_test(test: &Test) -> Outcome { - let desc = &test.data; - let actual_events = parse_to_events(&desc.yaml); - let events_diff = actual_events.map(|events| events_differ(&events, &desc.expected_events)); - let mut error_text = match (&events_diff, desc.expected_error) { - (Ok(x), true) => Some(format!("no error when expected: {x:#?}")), - (Err(_), true) | (Ok(None), false) => None, - (Err(e), false) => Some(format!("unexpected error {e:?}")), - (Ok(Some(diff)), false) => Some(format!("events differ: {diff}")), - }; - - // Show a caret on error. - if let Some(text) = &mut error_text { - use std::fmt::Write; - let _ = writeln!(text, "\n### Input:\n{}\n### End", desc.yaml_visual); - if let Err(err) = &events_diff { - writeln!(text, "### Error position").unwrap(); - let mut lines = desc.yaml.lines(); - for _ in 0..(err.marker().line() - 1) { - let l = lines.next().unwrap(); - writeln!(text, "{l}").unwrap(); - } - writeln!(text, "\x1B[91;1m{}", lines.next().unwrap()).unwrap(); - for _ in 0..err.marker().col() { - write!(text, " ").unwrap(); - } - writeln!(text, "^\x1b[m").unwrap(); - for l in lines { - writeln!(text, "{l}").unwrap(); - } - writeln!(text, "### End error position").unwrap(); - } - } - - match error_text { - None => Outcome::Passed, - Some(txt) => Outcome::Failed { msg: Some(txt) }, - } -} - -fn load_tests_from_file(entry: &DirEntry) -> Result>> { - let file_name = entry.file_name().to_string_lossy().to_string(); - let test_name = file_name - .strip_suffix(".yaml") - .ok_or("unexpected filename")?; - let tests = YamlLoader::load_from_str(&fs::read_to_string(entry.path())?)?; - let tests = tests[0].as_vec().ok_or("no test list found in file")?; - - let mut result = vec![]; - let mut current_test = yaml::Hash::new(); - for (idx, test_data) in tests.iter().enumerate() { - let name = if tests.len() > 1 { - format!("{test_name}-{idx:02}") - } else { - test_name.to_string() - }; - - // Test fields except `fail` are "inherited" - let test_data = test_data.as_hash().unwrap(); - current_test.remove(&Yaml::String("fail".into())); - for (key, value) in test_data.clone() { - current_test.insert(key, value); - } - - let current_test = Yaml::Hash(current_test.clone()); // Much better indexing - - if current_test["skip"] != Yaml::BadValue { - continue; - } - - result.push(Test { - name, - kind: String::new(), - is_ignored: false, - is_bench: false, - data: YamlTest { - yaml_visual: current_test["yaml"].as_str().unwrap().to_string(), - yaml: visual_to_raw(current_test["yaml"].as_str().unwrap()), - expected_events: visual_to_raw(current_test["tree"].as_str().unwrap()), - expected_error: current_test["fail"].as_bool() == Some(true), - }, - }); - } - Ok(result) -} - -fn parse_to_events(source: &str) -> Result, ScanError> { - let mut reporter = EventReporter::new(); - Parser::new_from_str(source).load(&mut reporter, true)?; - Ok(reporter.events) -} - -struct EventReporter { - events: Vec, -} - -impl EventReporter { - fn new() -> Self { - Self { events: vec![] } - } -} - -impl EventReceiver for EventReporter { - fn on_event(&mut self, ev: Event) { - let line: String = match ev { - Event::StreamStart => "+STR".into(), - Event::StreamEnd => "-STR".into(), - - Event::DocumentStart => "+DOC".into(), - Event::DocumentEnd => "-DOC".into(), - - Event::SequenceStart(idx, tag) => { - format!("+SEQ{}{}", format_index(idx), format_tag(&tag)) - } - Event::SequenceEnd => "-SEQ".into(), - - Event::MappingStart(idx, tag) => { - format!("+MAP{}{}", format_index(idx), format_tag(&tag)) - } - Event::MappingEnd => "-MAP".into(), - - Event::Scalar(ref text, style, idx, ref tag) => { - let kind = match style { - TScalarStyle::Plain => ":", - TScalarStyle::SingleQuoted => "'", - TScalarStyle::DoubleQuoted => r#"""#, - TScalarStyle::Literal => "|", - TScalarStyle::Folded => ">", - }; - format!( - "=VAL{}{} {}{}", - format_index(idx), - format_tag(tag), - kind, - escape_text(text) - ) - } - Event::Alias(idx) => format!("=ALI *{idx}"), - Event::Nothing => return, - }; - self.events.push(line); - } -} - -fn format_index(idx: usize) -> String { - if idx > 0 { - format!(" &{idx}") - } else { - String::new() - } -} - -fn escape_text(text: &str) -> String { - let mut text = text.to_owned(); - for (ch, replacement) in [ - ('\\', r"\\"), - ('\n', "\\n"), - ('\r', "\\r"), - ('\x08', "\\b"), - ('\t', "\\t"), - ] { - text = text.replace(ch, replacement); - } - text -} - -fn format_tag(tag: &Option) -> String { - if let Some(tag) = tag { - format!(" <{}{}>", tag.handle, tag.suffix) - } else { - String::new() - } -} - -fn events_differ(actual: &[String], expected: &str) -> Option { - let actual = actual.iter().map(Some).chain(std::iter::repeat(None)); - let expected = expected_events(expected); - let expected = expected.iter().map(Some).chain(std::iter::repeat(None)); - for (idx, (act, exp)) in actual.zip(expected).enumerate() { - return match (act, exp) { - (Some(act), Some(exp)) => { - if act == exp { - continue; - } else { - Some(format!( - "line {idx} differs: \n=> expected `{exp}`\n=> found `{act}`", - )) - } - } - (Some(a), None) => Some(format!("extra actual line: {a:?}")), - (None, Some(e)) => Some(format!("extra expected line: {e:?}")), - (None, None) => None, - }; - } - unreachable!() -} - -/// Convert the snippets from "visual" to "actual" representation -fn visual_to_raw(yaml: &str) -> String { - let mut yaml = yaml.to_owned(); - for (pat, replacement) in [ - ("␣", " "), - ("»", "\t"), - ("—", ""), // Tab line continuation ——» - ("←", "\r"), - ("⇔", "\u{FEFF}"), - ("↵", ""), // Trailing newline marker - ("∎\n", ""), - ] { - yaml = yaml.replace(pat, replacement); - } - yaml -} - -/// Adapt the expectations to the yaml-rust reasonable limitations -/// -/// Drop information on node styles (flow/block) and anchor names. -/// Both are things that can be omitted according to spec. -fn expected_events(expected_tree: &str) -> Vec { - let mut anchors = vec![]; - expected_tree - .split('\n') - .map(|s| s.trim_start().to_owned()) - .filter(|s| !s.is_empty()) - .map(|mut s| { - // Anchor name-to-number conversion - if let Some(start) = s.find('&') { - if s[..start].find(':').is_none() { - let len = s[start..].find(' ').unwrap_or(s[start..].len()); - anchors.push(s[start + 1..start + len].to_owned()); - s = s.replace(&s[start..start + len], &format!("&{}", anchors.len())); - } - } - // Alias nodes name-to-number - if s.starts_with("=ALI") { - let start = s.find('*').unwrap(); - let name = &s[start + 1..]; - let idx = anchors - .iter() - .enumerate() - .filter(|(_, v)| v == &name) - .last() - .unwrap() - .0; - s = s.replace(&s[start..], &format!("*{}", idx + 1)); - } - // Dropping style information - match &*s { - "+DOC ---" => "+DOC".into(), - "-DOC ..." => "-DOC".into(), - s if s.starts_with("+SEQ []") => s.replacen("+SEQ []", "+SEQ", 1), - s if s.starts_with("+MAP {}") => s.replacen("+MAP {}", "+MAP", 1), - "=VAL :" => "=VAL :~".into(), // FIXME: known bug - s => s.into(), - } - }) - .collect() -} diff --git a/saphyr/tools/README.md b/saphyr/tools/README.md deleted file mode 100644 index 7728a0f..0000000 --- a/saphyr/tools/README.md +++ /dev/null @@ -1,229 +0,0 @@ -# `yaml-rust2` tools -This directory contains tools that are used to develop the crate. -Due to dependency management, only some of them are available as binaries from the `yaml-rust2` crate. - -| Tool | Invocation | -|------|------------| -| `bench_compare` | `cargo bench_compare` | -| `dump_events` | `cargo run --bin dump_events -- [...]` | -| `gen_large_yaml` | `cargo gen_large_yaml` | -| `run_bench` | `cargo run --bin run_bench -- [...]` | -| `time_parse` | `cargo run --bin time_parse -- [...]` | - -## `bench_compare` -See the [dedicated README file](./bench_compare/README.md). - -## `dump_events` -This is a debugging helper for the parser. It outputs events emitted by the parser for a given file. This can be paired with the `YAMLRUST2_DEBUG` environment variable to have an in-depth overview of which steps the scanner and the parser are taking. - -### Example -Consider the following `input.yaml` YAML file: -```yaml -- foo: bar -- baz: - c: [3, 4, 5] -``` - -Running `cargo run --bin dump_events -- input.yaml` outputs: -``` - ↳ StreamStart - ↳ DocumentStart - ↳ SequenceStart(0, None) - ↳ MappingStart(0, None) - ↳ Scalar("foo", Plain, 0, None) - ↳ Scalar("bar", Plain, 0, None) - ↳ MappingEnd - ↳ MappingStart(0, None) - ↳ Scalar("baz", Plain, 0, None) - ↳ Scalar("~", Plain, 0, None) - ↳ Scalar("c", Plain, 0, None) - ↳ SequenceStart(0, None) - ↳ Scalar("3", Plain, 0, None) - ↳ Scalar("4", Plain, 0, None) - ↳ Scalar("5", Plain, 0, None) - ↳ SequenceEnd - ↳ MappingEnd - ↳ SequenceEnd - ↳ DocumentEnd - ↳ StreamEnd -``` - -Running `YAMLRUST2_DEBUG=1 cargo run --bin dump_events -- input.yaml` outputs much more details: -
- Full output - -``` -Parser state: StreamStart - ↳ StreamStart(Utf8) Marker { index: 0, line: 1, col: 0 } - ↳ StreamStart - -Parser state: ImplicitDocumentStart - → fetch_next_token after whitespace Marker { index: 0, line: 1, col: 0 } '-' - ↳ BlockSequenceStart Marker { index: 0, line: 1, col: 0 } - ↳ DocumentStart - -Parser state: BlockNode - ↳ SequenceStart(0, None) - -Parser state: BlockSequenceFirstEntry - ↳ BlockEntry Marker { index: 2, line: 1, col: 2 } - → fetch_next_token after whitespace Marker { index: 2, line: 1, col: 2 } 'f' - → fetch_next_token after whitespace Marker { index: 5, line: 1, col: 5 } ':' - ↳ BlockMappingStart Marker { index: 5, line: 1, col: 5 } - ↳ MappingStart(0, None) - -Parser state: BlockMappingFirstKey - ↳ Key Marker { index: 2, line: 1, col: 2 } - ↳ Scalar(Plain, "foo") Marker { index: 2, line: 1, col: 2 } - ↳ Scalar("foo", Plain, 0, None) - -Parser state: BlockMappingValue - ↳ Value Marker { index: 5, line: 1, col: 5 } - → fetch_next_token after whitespace Marker { index: 7, line: 1, col: 7 } 'b' - ↳ Scalar(Plain, "bar") Marker { index: 7, line: 1, col: 7 } - ↳ Scalar("bar", Plain, 0, None) - -Parser state: BlockMappingKey - → fetch_next_token after whitespace Marker { index: 11, line: 2, col: 0 } '-' - ↳ BlockEnd Marker { index: 11, line: 2, col: 0 } - ↳ MappingEnd - -Parser state: BlockSequenceEntry - ↳ BlockEntry Marker { index: 13, line: 2, col: 2 } - → fetch_next_token after whitespace Marker { index: 13, line: 2, col: 2 } 'b' - → fetch_next_token after whitespace Marker { index: 16, line: 2, col: 5 } ':' - ↳ BlockMappingStart Marker { index: 16, line: 2, col: 5 } - ↳ MappingStart(0, None) - -Parser state: BlockMappingFirstKey - ↳ Key Marker { index: 13, line: 2, col: 2 } - ↳ Scalar(Plain, "baz") Marker { index: 13, line: 2, col: 2 } - ↳ Scalar("baz", Plain, 0, None) - -Parser state: BlockMappingValue - ↳ Value Marker { index: 16, line: 2, col: 5 } - → fetch_next_token after whitespace Marker { index: 20, line: 3, col: 2 } 'c' - → fetch_next_token after whitespace Marker { index: 21, line: 3, col: 3 } ':' - ↳ Key Marker { index: 20, line: 3, col: 2 } - ↳ Scalar("~", Plain, 0, None) - -Parser state: BlockMappingKey - ↳ Scalar(Plain, "c") Marker { index: 20, line: 3, col: 2 } - ↳ Scalar("c", Plain, 0, None) - -Parser state: BlockMappingValue - ↳ Value Marker { index: 21, line: 3, col: 3 } - → fetch_next_token after whitespace Marker { index: 23, line: 3, col: 5 } '[' - ↳ FlowSequenceStart Marker { index: 23, line: 3, col: 5 } - ↳ SequenceStart(0, None) - -Parser state: FlowSequenceFirstEntry - → fetch_next_token after whitespace Marker { index: 24, line: 3, col: 6 } '3' - → fetch_next_token after whitespace Marker { index: 25, line: 3, col: 7 } ',' - ↳ Scalar(Plain, "3") Marker { index: 24, line: 3, col: 6 } - ↳ Scalar("3", Plain, 0, None) - -Parser state: FlowSequenceEntry - ↳ FlowEntry Marker { index: 25, line: 3, col: 7 } - → fetch_next_token after whitespace Marker { index: 27, line: 3, col: 9 } '4' - → fetch_next_token after whitespace Marker { index: 28, line: 3, col: 10 } ',' - ↳ Scalar(Plain, "4") Marker { index: 27, line: 3, col: 9 } - ↳ Scalar("4", Plain, 0, None) - -Parser state: FlowSequenceEntry - ↳ FlowEntry Marker { index: 28, line: 3, col: 10 } - → fetch_next_token after whitespace Marker { index: 30, line: 3, col: 12 } '5' - → fetch_next_token after whitespace Marker { index: 31, line: 3, col: 13 } ']' - ↳ Scalar(Plain, "5") Marker { index: 30, line: 3, col: 12 } - ↳ Scalar("5", Plain, 0, None) - -Parser state: FlowSequenceEntry - ↳ FlowSequenceEnd Marker { index: 31, line: 3, col: 13 } - ↳ SequenceEnd - -Parser state: BlockMappingKey - → fetch_next_token after whitespace Marker { index: 33, line: 4, col: 0 } '\0' - ↳ BlockEnd Marker { index: 33, line: 4, col: 0 } - ↳ MappingEnd - -Parser state: BlockSequenceEntry - ↳ BlockEnd Marker { index: 33, line: 4, col: 0 } - ↳ SequenceEnd - -Parser state: DocumentEnd - ↳ StreamEnd Marker { index: 33, line: 4, col: 0 } - ↳ DocumentEnd - -Parser state: DocumentStart - ↳ StreamEnd -``` - -
- -While this cannot be shown in Markdown, the output is colored so that it is a bit easier to read. - -## `gen_large_yaml` -It is hard to find large (100+MiB) real-world YAML files that could be used to benchmark a parser. This utility generates multiple large files that are meant to stress the parser with different layouts of YAML files. The resulting files do not look like anything that would be encountered in production, but can serve as a base to test several features of a YAML parser. - -The generated files are the following: - - - `big.yaml`: A large array of records with few fields. One of the fields is a description, a large text block scalar spanning multiple lines. Most of the scanning happens in block scalars. - - `nested.yaml`: Very short key-value pairs that nest deeply. - - `small_objects.yaml`: A large array of 2 key-value mappings. - - `strings_array.yaml`: A large array of lipsum one-liners (~150-175 characters in length). - -All generated files are meant to be between 200 and 250 MiB in size. - -This tool depends on external dependencies that are not part of `yaml-rust2`'s dependencies or `dev-dependencies` and as such can't be called through `cargo run` directly. A dedicated `cargo gen_large_yaml` alias can be used to generate the benchmark files. - -## `run_bench` -This is a benchmarking helper that runs the parser on the given file a given number of times and is able to extract simple metrics out of the results. The `--output-yaml` flag can be specified to make the output a YAML file that can be fed into other tools. - -This binary is made to be used by `bench_compare`. - -Synopsis: `run_bench input.yaml [--output-yaml]` - -### Examples -```sh -$> cargo run --release --bin run_bench -- bench_yaml/big.yaml 10 -Average: 1.631936191s -Min: 1.629654651s -Max: 1.633045284s -95%: 1.633045284s - -$> cargo run --release --bin run_bench -- bench_yaml/big.yaml 10 --output-yaml -parser: yaml-rust2 -input: bench_yaml/big.yaml -average: 1649847674 -min: 1648277149 -max: 1651936305 -percentile95: 1651936305 -iterations: 10 -times: - - 1650216129 - - 1649349978 - - 1649507018 - - 1648277149 - - 1649036548 - - 1650323982 - - 1650917692 - - 1648702081 - - 1650209860 - - 1651936305 -``` - -## `time_parse` -This is a benchmarking helper that times how long it takes for the parser to emit all events. It calls the parser on the given input file, receives parsing events and then immediately discards them. It is advised to run this tool with `--release`. - -### Examples -Loading a small file could output the following: -```sh -$> cargo run --release --bin time_parse -- input.yaml -Loaded 0MiB in 14.189µs -``` - -While loading a larger file could output the following: -```sh -$> cargo run --release --bin time_parse -- bench_yaml/big.yaml -Loaded 220MiB in 1.612677853s -``` diff --git a/saphyr/tools/bench_compare/Cargo.toml b/saphyr/tools/bench_compare/Cargo.toml deleted file mode 100644 index 4ca9b33..0000000 --- a/saphyr/tools/bench_compare/Cargo.toml +++ /dev/null @@ -1,21 +0,0 @@ -[package] -name = "bench_compare" -version = "0.6.0" -authors = [ - "Ethiraric " -] -license = "MIT OR Apache-2.0" -description = "Run multiple YAML parsers and compare their times" -repository = "https://github.com/Ethiraric/yaml-rust2" -readme = "README.md" -edition = "2018" - -[dependencies] -anyhow = { version = "1.0.81", features = ["backtrace"] } -serde = { version = "1.0.197", features = ["derive"] } -serde_yaml = "0.9.32" -toml = "0.8.11" - -[profile.release-lto] -inherits = "release" -lto = true diff --git a/saphyr/tools/bench_compare/README.md b/saphyr/tools/bench_compare/README.md deleted file mode 100644 index b9e990b..0000000 --- a/saphyr/tools/bench_compare/README.md +++ /dev/null @@ -1,120 +0,0 @@ -# `bench_compare` -This tool helps with comparing times different YAML parsers take to parse the same input. - -## Synopsis -``` -bench_compare time_parse -bench_compare run_bench -``` - -This will run either `time_parse` or `run_bench` (described below) with the given set of parsers from the configuration file. - -## Parsers requirements -Parsers are expected to be event-based. In order to be fair to this crate's benchmark implementation, parsers should: - -* Load the file into memory (a string, `mmap`, ...) **prior** to starting the clock -* Initialize the parser, if needed -* **Start the clock** -* Read events from the parser while the parser has not finished parsing -* Discard events as they are received (dropping them, `free`ing them or anything similar) so as to not grow their memory consumption too high, and allowing the parser to reuse event structures -* **Stop the clock** -* Destroy the resources, if needed/wanted (parser, file buffer, ...). The kernel will reap after the process exits. - - -## Parsers required binaries -This tool recognizes 2 binaries: `time_parse` and `run_bench`. - -### `time_parse` -Synopsis: -``` -time_parse file.yaml [--short] -``` - -The binary must run the aforementioned steps and display on its output the time the parser took to parse the given file. -With the `--short` option, the binary must only output the benchmark time in nanoseconds. - -```sh -# This is meant to be human-readable. -# The example below is what this crate implements. -$> time_parse file.yaml -Loaded 200MiB in 1.74389s. - -# This will be read by this tool. -# This must output ONLY the time, in nanoseconds. -$> time_parse file.yaml --short -1743892394 -``` - -This tool will always provide the `--short` option. - -### `run_bench` -Synopsis: -``` -run_bench file.yaml [--output-yaml] -``` - -The binary is expected to run `` runs of the aforementioned steps and display on its output relevant information. -The `--output-yaml` instructs the binary to output details about its runs in YAML on its standard output. -The binary may optionally perform some warmup runs prior to running the benchmark. The time it took the binary to run will not be evaluated. - -```sh -# This is meant to be human-readable. -# The example below is what this crate implements. -$> run_bench file.yaml 100 -Average: 1.589485s -Min : 1.583078s -Max : 1.597028s -95% : 1.593219s - -# This will be read by this tool. -# This must output a YAML as described below. -$> run_bench ../file.yaml 10 --output-yaml -parser: yaml-rust2 -input: ../file.yaml -average: 1620303590 -min: 1611632108 -max: 1636401896 -percentile95: 1636401896 -iterations: 10 -times: - - 1636401896 - - 1623914538 - - 1611632108 - - 1612973608 - - 1617748930 - - 1615419514 - - 1612172250 - - 1620791346 - - 1629339306 - - 1622642412 -``` - -The expected fields are (all times in nanoseconds): - -* `parser`: The name of the parser (in case of a mistake renaming files) -* `input`: The path to the input file as given to the binary arguments -* `average`: The average time it took to run the parser -* `min`: The shortest time it took to run the parser -* `max`: The longest time it took to run the parser -* `percentile95`: The 95th percentile time of the runs -* `iterations`: The number of times the parser was run (``) -* `times`: An array of `iterations` times, one for each run, in the order they were run (first run first) - -## Configuration -`bench_compare` is configured through a `bench_compare.toml` file. This file must be located in the current directory. -As of now, default values are unsupported and all fields must be set. The following fields are required: -```toml -yaml_input_dir = "bench_yaml" # The path to the directory containing the input yaml files -iterations = 10 # The number of iterations, if using `run_bench` -yaml_output_dir = "yaml_output" # The directory in which `run_bench`'s yamls are saved -csv_output = "benchmark.csv" # The CSV output aggregating times for each parser and file - -[[parsers]] # A parser, can be repeated as many times as there are parsers -name = "yaml-rust2" # The name of the parser (used for logging) -path = "target/release/" # The path in which the parsers' `run_bench` and `time_parse` are - -# If there is another parser, another block can be added -# [[parsers]] -# name = "libfyaml" -# path = "../libfyaml/build" -``` diff --git a/saphyr/tools/bench_compare/src/main.rs b/saphyr/tools/bench_compare/src/main.rs deleted file mode 100644 index ac33f9c..0000000 --- a/saphyr/tools/bench_compare/src/main.rs +++ /dev/null @@ -1,174 +0,0 @@ -use std::{fs::File, io::BufWriter, io::Write, path::Path}; - -use anyhow::Error; -use serde::{Deserialize, Serialize}; - -fn main() { - if let Err(e) = entrypoint() { - eprintln!("{e:?}"); - std::process::exit(1); - } -} - -fn entrypoint() -> Result<(), Error> { - let config: Config = - toml::from_str(&std::fs::read_to_string("bench_compare.toml").unwrap()).unwrap(); - if config.parsers.is_empty() { - println!("Please add at least one parser. Refer to the README for instructions."); - return Ok(()); - } - let args: Vec<_> = std::env::args().collect(); - if args.len() != 2 - || (args.len() == 2 && !["time_parse", "run_bench"].contains(&args[1].as_str())) - { - println!("Usage: bench_compare "); - return Ok(()); - } - match args[1].as_str() { - "run_bench" => run_bench(&config)?, - "time_parse" => unimplemented!(), - _ => unreachable!(), - } - Ok(()) -} - -/// Run the `run_bench` binary on the given parsers. -fn run_bench(config: &Config) -> Result<(), Error> { - // Create output directory - std::fs::create_dir_all(&config.yaml_output_dir)?; - - let inputs = list_input_files(config)?; - let iterations = format!("{}", config.iterations); - let mut averages = vec![]; - - // Inputs are ordered, so are parsers. - for input in &inputs { - let input_basename = Path::new(&input).file_name().unwrap().to_string_lossy(); - let mut input_times = vec![]; - - // Run each input for each parser. - for parser in &config.parsers { - println!("Running {input_basename} against {}", parser.name); - // Run benchmark - let path = Path::new(&parser.path).join("run_bench"); - let output = std::process::Command::new(path) - .arg(input) - .arg(&iterations) - .arg("--output-yaml") - .output()?; - // Check exit status. - if output.status.code().unwrap_or(1) == 0 { - let s = String::from_utf8_lossy(&output.stdout); - // Get output as yaml. - match serde_yaml::from_str::(&s) { - Ok(output) => { - // Push average into our CSV-to-be. - input_times.push(output.average); - // Save the YAML for later. - serde_yaml::to_writer( - BufWriter::new(File::create(format!( - "{}/{}-{}", - config.yaml_output_dir, parser.name, input_basename - ))?), - &output, - )?; - } - Err(e) => { - // Yaml is invalid, use 0 as "didn't run properly". - println!("Errored: Invalid YAML output: {e}"); - input_times.push(0); - } - } - } else { - // An error happened, use 0 as "didn't run properly". - println!("Errored: process did exit non-zero"); - input_times.push(0); - } - } - averages.push(input_times); - } - - // Finally, save a CSV. - save_run_bench_csv(config, &inputs, &averages) -} - -/// General configuration structure. -#[derive(Serialize, Deserialize)] -struct Config { - /// The path to the directory containing the input yaml files. - yaml_input_dir: String, - /// Number of iterations to run, if using `run_bench`. - iterations: u32, - /// The parsers to run. - parsers: Vec, - /// The path to the directory in which `run_bench`'s yamls are saved. - yaml_output_dir: String, - /// The path to the CSV output aggregating times for each parser and file. - csv_output: String, -} - -/// A parser configuration. -#[derive(Serialize, Deserialize)] -struct Parser { - /// The name of the parser. - name: String, - /// The path in which the parser's `run_bench` and `time_parse` are located. - path: String, -} - -/// Ourput of running `run_bench` on a given parser. -#[derive(Serialize, Deserialize)] -struct BenchYamlOutput { - /// The name of the parser. - parser: String, - /// The file taken as input. - input: String, - /// Average parsing time (ns). - average: u64, - /// Shortest parsing time (ns). - min: u64, - /// Longest parsing time (ns). - max: u64, - /// 95th percentile of parsing times (ns). - percentile95: u64, - /// Number of iterations. - iterations: u64, - /// Parsing times for each run. - times: Vec, -} - -/// Save a CSV file with all averages from `run_bench`. -fn save_run_bench_csv( - config: &Config, - inputs: &[String], - averages: &[Vec], -) -> Result<(), Error> { - let mut csv = BufWriter::new(File::create(&config.csv_output)?); - for parser in &config.parsers { - write!(csv, ",{}", parser.name,)?; - } - writeln!(csv)?; - for (path, averages) in inputs.iter().zip(averages.iter()) { - let filename = Path::new(path).file_name().unwrap().to_string_lossy(); - write!(csv, "{}", filename)?; - for avg in averages { - write!(csv, ",{avg}")?; - } - writeln!(csv)?; - } - - Ok(()) -} - -/// Returns the paths to the input yaml files. -fn list_input_files(config: &Config) -> Result, Error> { - Ok(std::fs::read_dir(&config.yaml_input_dir)? - .filter_map(Result::ok) - .map(|entry| entry.path().to_string_lossy().to_string()) - .filter(|path| { - Path::new(path) - .extension() - .map_or(false, |ext| ext.eq_ignore_ascii_case("yaml")) - }) - .collect()) -} diff --git a/saphyr/tools/dump_events.rs b/saphyr/tools/dump_events.rs deleted file mode 100644 index 747e9b9..0000000 --- a/saphyr/tools/dump_events.rs +++ /dev/null @@ -1,38 +0,0 @@ -use std::env; -use std::fs::File; -use std::io::prelude::*; -use yaml_rust2::{ - parser::{MarkedEventReceiver, Parser}, - scanner::Marker, - Event, -}; - -#[derive(Debug)] -struct EventSink { - events: Vec<(Event, Marker)>, -} - -impl MarkedEventReceiver for EventSink { - fn on_event(&mut self, ev: Event, mark: Marker) { - eprintln!(" \x1B[;34m\u{21B3} {:?}\x1B[;m", &ev); - self.events.push((ev, mark)); - } -} - -fn str_to_events(yaml: &str) -> Vec<(Event, Marker)> { - let mut sink = EventSink { events: Vec::new() }; - let mut parser = Parser::new_from_str(yaml); - // Load events using our sink as the receiver. - parser.load(&mut sink, true).unwrap(); - sink.events -} - -fn main() { - let args: Vec<_> = env::args().collect(); - let mut f = File::open(&args[1]).unwrap(); - let mut s = String::new(); - f.read_to_string(&mut s).unwrap(); - - // dbg!(str_to_events(&s)); - str_to_events(&s); -} diff --git a/saphyr/tools/gen_large_yaml/Cargo.toml b/saphyr/tools/gen_large_yaml/Cargo.toml deleted file mode 100644 index d57bdea..0000000 --- a/saphyr/tools/gen_large_yaml/Cargo.toml +++ /dev/null @@ -1,20 +0,0 @@ -[package] -name = "gen_large_yaml" -version = "0.6.0" -authors = [ - "Ethiraric " -] -license = "MIT OR Apache-2.0" -description = "A helper to generate large YAML files" -repository = "https://github.com/Ethiraric/yaml-rust2" -readme = "README.md" -edition = "2018" - -[dependencies] -yaml-rust2 = { path = "../.." } -rand = { version = "0.8.5", features = [ "small_rng" ] } -lipsum = "0.9.0" - -[profile.release-lto] -inherits = "release" -lto = true diff --git a/saphyr/tools/gen_large_yaml/src/gen.rs b/saphyr/tools/gen_large_yaml/src/gen.rs deleted file mode 100644 index 78d16ba..0000000 --- a/saphyr/tools/gen_large_yaml/src/gen.rs +++ /dev/null @@ -1,156 +0,0 @@ -#![allow(clippy::too_many_arguments)] - -use rand::{distributions::Alphanumeric, rngs::SmallRng, Rng}; - -/// Generate a string with hexadecimal digits of the specified length. -pub fn hex_string(rng: &mut SmallRng, len: usize) -> String { - const DIGITS: &[u8] = b"0123456789abcdef"; - string_from_set(rng, len, len + 1, DIGITS) -} - -/// Generate an e-mail address. -pub fn email(rng: &mut SmallRng, len_lo: usize, len_hi: usize) -> String { - const CHARSET: &[u8] = b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-_.0123456789"; - format!( - "{}@example.com", - string_from_set(rng, len_lo, len_hi, CHARSET) - ) -} - -/// Generate a random URL. -pub fn url( - rng: &mut SmallRng, - scheme: &str, - n_paths_lo: usize, - n_paths_hi: usize, - path_len_lo: usize, - path_len_hi: usize, - extension: Option<&str>, -) -> String { - let mut string = format!("{scheme}://example.com"); - for _ in 0..rng.gen_range(n_paths_lo..n_paths_hi) { - string.push('/'); - string.push_str(&alnum_string(rng, path_len_lo, path_len_hi)); - } - if let Some(extension) = extension { - string.push('.'); - string.push_str(extension); - } - string -} - -/// Generate a random integer. -pub fn integer(rng: &mut SmallRng, lo: i64, hi: i64) -> i64 { - rng.gen_range(lo..hi) -} - -/// Generate an alphanumeric string with a length between `lo_len` and `hi_len`. -pub fn alnum_string(rng: &mut SmallRng, lo_len: usize, hi_len: usize) -> String { - let len = rng.gen_range(lo_len..hi_len); - rng.sample_iter(&Alphanumeric) - .take(len) - .map(char::from) - .collect() -} - -/// Generate a string with hexadecimal digits of the specified length. -pub fn string_from_set(rng: &mut SmallRng, len_lo: usize, len_hi: usize, set: &[u8]) -> String { - (0..rng.gen_range(len_lo..len_hi)) - .map(|_| set[rng.gen_range(0..set.len())] as char) - .collect() -} - -/// Generate a lipsum paragraph. -pub fn paragraph( - rng: &mut SmallRng, - lines_lo: usize, - lines_hi: usize, - wps_lo: usize, - wps_hi: usize, - line_maxcol: usize, -) -> Vec { - let mut ret = Vec::new(); - let nlines = rng.gen_range(lines_lo..lines_hi); - - while ret.len() < nlines { - let words_in_sentence = rng.gen_range(wps_lo..wps_hi); - let mut sentence = lipsum::lipsum_words_with_rng(rng.clone(), words_in_sentence); - - if let Some(last_line) = ret.pop() { - sentence = format!("{last_line} {sentence}"); - } - - while sentence.len() > line_maxcol { - let last_space_idx = line_maxcol - - sentence[0..line_maxcol] - .chars() - .rev() - .position(char::is_whitespace) - .unwrap(); - ret.push(sentence[0..last_space_idx].to_string()); - sentence = sentence[last_space_idx + 1..].to_string(); - } - if !sentence.is_empty() { - ret.push(sentence); - } - } - - ret -} - -/// Generate a full name. -pub fn full_name(rng: &mut SmallRng, len_lo: usize, len_hi: usize) -> String { - format!( - "{} {}", - name(rng, len_lo, len_hi), - name(rng, len_lo, len_hi) - ) -} - -/// Generate a name. -pub fn name(rng: &mut SmallRng, len_lo: usize, len_hi: usize) -> String { - const UPPER: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"; - const LOWER: &[u8] = b"abcdefghijklmnopqrstuvwxyz"; - - let len = rng.gen_range(len_lo..len_hi); - let mut ret = String::new(); - ret.push(UPPER[rng.gen_range(0..UPPER.len())] as char); - ret.push_str(string_from_set(rng, len, len + 1, LOWER).as_str()); - - ret -} - -/// Generate a set of words. -pub fn words(rng: &mut SmallRng, words_lo: usize, words_hi: usize) -> String { - let nwords = rng.gen_range(words_lo..words_hi); - lipsum::lipsum_words_with_rng(rng.clone(), nwords).replace(|c| "-\'\",*:".contains(c), "") -} - -/// Generate a lipsum text. -/// -/// Texts are composed of some paragraphs and empty lines between them. -pub fn text( - rng: &mut SmallRng, - paragraphs_lo: usize, - paragraphs_hi: usize, - lines_lo: usize, - lines_hi: usize, - wps_lo: usize, - wps_hi: usize, - line_maxcol: usize, -) -> Vec { - let mut ret = Vec::new(); - let mut first = true; - - for _ in 0..rng.gen_range(paragraphs_lo..paragraphs_hi) { - if first { - first = false; - } else { - ret.push(String::new()); - } - - ret.extend(paragraph(rng, lines_lo, lines_hi, wps_lo, wps_hi, line_maxcol).into_iter()); - } - - ret -} diff --git a/saphyr/tools/gen_large_yaml/src/main.rs b/saphyr/tools/gen_large_yaml/src/main.rs deleted file mode 100644 index b585c59..0000000 --- a/saphyr/tools/gen_large_yaml/src/main.rs +++ /dev/null @@ -1,261 +0,0 @@ -#![allow(dead_code)] - -mod gen; -mod nested; - -use std::fs::File; -use std::io::BufWriter; -use std::path::Path; - -use rand::{rngs::SmallRng, Rng, SeedableRng}; - -/// The path into which the generated YAML files will be written. -const OUTPUT_DIR: &str = "bench_yaml"; - -fn main() -> std::io::Result<()> { - let mut generator = Generator::new(); - let output_path = Path::new(OUTPUT_DIR); - if !output_path.is_dir() { - std::fs::create_dir(output_path).unwrap(); - } - - println!("Generating big.yaml"); - let mut out = BufWriter::new(File::create(output_path.join("big.yaml")).unwrap()); - generator.gen_record_array(&mut out, 100_000, 100_001)?; - - println!("Generating nested.yaml"); - let mut out = BufWriter::new(File::create(output_path.join("nested.yaml")).unwrap()); - nested::create_deep_object(&mut out, 1_100_000)?; - - println!("Generating small_objects.yaml"); - let mut out = BufWriter::new(File::create(output_path.join("small_objects.yaml")).unwrap()); - generator.gen_authors_array(&mut out, 4_000_000, 4_000_001)?; - - println!("Generating strings_array.yaml"); - let mut out = BufWriter::new(File::create(output_path.join("strings_array.yaml")).unwrap()); - generator.gen_strings_array(&mut out, 1_300_000, 1_300_001, 10, 40)?; - Ok(()) -} - -/// YAML Generator. -struct Generator { - /// The RNG state. - /// - /// We don't need to be cryptographically secure. [`SmallRng`] also implements the - /// [`SeedableRng`] trait, allowing runs to be predictable. - rng: SmallRng, - /// The stack of indentations. - indents: Vec, -} - -type GenFn = dyn FnOnce(&mut Generator, &mut W) -> std::io::Result<()>; - -impl Generator { - /// Create a new generator. - fn new() -> Self { - Generator { - rng: SmallRng::seed_from_u64(42), - indents: vec![0], - } - } - - /// Generate an array of records as per [`Self::gen_record_object`]. - fn gen_record_array( - &mut self, - writer: &mut W, - items_lo: usize, - items_hi: usize, - ) -> std::io::Result<()> { - self.gen_array(writer, items_lo, items_hi, Generator::gen_record_object) - } - - /// Generate an array of lipsum one-liners. - fn gen_strings_array( - &mut self, - writer: &mut W, - items_lo: usize, - items_hi: usize, - words_lo: usize, - words_hi: usize, - ) -> std::io::Result<()> { - self.gen_array(writer, items_lo, items_hi, |gen, writer| { - write!(writer, "{}", gen::words(&mut gen.rng, words_lo, words_hi)) - }) - } - - /// Generate a YAML object/mapping containing a record. - /// - /// Fields are description, hash, version, home, repository and pdf. - /// The `description` field is a long string and puts a lot of weight in plain scalar / block - /// scalar parsing. - fn gen_record_object(&mut self, writer: &mut W) -> std::io::Result<()> { - let fields: Vec<(String, Box>)> = vec![ - ( - "description".to_string(), - Box::new(|gen, w| { - write!(w, "|")?; - gen.push_indent(2); - gen.nl(w)?; - let indent = gen.indent(); - let text = gen::text(&mut gen.rng, 1, 9, 3, 8, 10, 20, 80 - indent); - gen.write_lines(w, &text)?; - gen.pop_indent(); - Ok(()) - }), - ), - ( - "authors".to_string(), - Box::new(|gen, w| { - gen.push_indent(2); - gen.nl(w)?; - gen.gen_authors_array(w, 1, 10)?; - gen.pop_indent(); - Ok(()) - }), - ), - ( - "hash".to_string(), - Box::new(|gen, w| write!(w, "{}", gen::hex_string(&mut gen.rng, 64))), - ), - ( - "version".to_string(), - Box::new(|gen, w| write!(w, "{}", gen::integer(&mut gen.rng, 1, 9))), - ), - ( - "home".to_string(), - Box::new(|gen, w| { - write!(w, "{}", gen::url(&mut gen.rng, "https", 0, 1, 0, 0, None)) - }), - ), - ( - "repository".to_string(), - Box::new(|gen, w| { - write!(w, "{}", gen::url(&mut gen.rng, "git", 1, 4, 10, 20, None)) - }), - ), - ( - "pdf".to_string(), - Box::new(|gen, w| { - write!( - w, - "{}", - gen::url(&mut gen.rng, "https", 1, 4, 10, 30, Some("pdf")) - ) - }), - ), - ]; - self.gen_object(writer, fields) - } - - /// Generate an array of authors as per [`Self::gen_author_object`]. - fn gen_authors_array( - &mut self, - writer: &mut W, - items_lo: usize, - items_hi: usize, - ) -> std::io::Result<()> { - self.gen_array(writer, items_lo, items_hi, Generator::gen_author_object) - } - - /// Generate a small object with 2 string fields. - fn gen_author_object(&mut self, writer: &mut W) -> std::io::Result<()> { - let fields: Vec<(String, Box>)> = vec![ - ( - "name".to_string(), - Box::new(|gen, w| write!(w, "{}", gen::full_name(&mut gen.rng, 10, 15))), - ), - ( - "email".to_string(), - Box::new(|gen, w| write!(w, "{}", gen::email(&mut gen.rng, 1, 9))), - ), - ]; - self.gen_object(writer, fields) - } - - /// Generate a YAML array/sequence containing nodes generated by the given function. - fn gen_array std::io::Result<()>>( - &mut self, - writer: &mut W, - len_lo: usize, - len_hi: usize, - mut obj_creator: F, - ) -> std::io::Result<()> { - let mut first = true; - for _ in 0..self.rng.gen_range(len_lo..len_hi) { - if first { - first = false; - } else { - self.nl(writer)?; - } - write!(writer, "- ")?; - self.push_indent(2); - (obj_creator)(self, writer)?; - self.pop_indent(); - } - Ok(()) - } - - /// Create a Yaml object with some fields in it. - fn gen_object( - &mut self, - writer: &mut W, - fields: Vec<(String, Box>)>, - ) -> std::io::Result<()> { - let mut first = true; - for (key, f) in fields { - if first { - first = false; - } else { - self.nl(writer)?; - } - write!(writer, "{key}: ")?; - f(self, writer)?; - } - Ok(()) - } - - /// Write the given lines at the right indentation. - fn write_lines( - &mut self, - writer: &mut W, - lines: &[String], - ) -> std::io::Result<()> { - let mut first = true; - - for line in lines { - if first { - first = false; - } else { - self.nl(writer)?; - } - write!(writer, "{line}")?; - } - - Ok(()) - } - - /// Write a new line to the writer and indent. - fn nl(&mut self, writer: &mut W) -> std::io::Result<()> { - writeln!(writer)?; - for _ in 0..self.indent() { - write!(writer, " ")?; - } - Ok(()) - } - - /// Return the given indent. - fn indent(&self) -> usize { - *self.indents.last().unwrap() - } - - /// Push a new indent with the given relative offset. - fn push_indent(&mut self, offset: usize) { - self.indents.push(self.indent() + offset); - } - - /// Pops the last indent. - fn pop_indent(&mut self) { - self.indents.pop(); - assert!(!self.indents.is_empty()); - } -} diff --git a/saphyr/tools/gen_large_yaml/src/nested.rs b/saphyr/tools/gen_large_yaml/src/nested.rs deleted file mode 100644 index 0f182a9..0000000 --- a/saphyr/tools/gen_large_yaml/src/nested.rs +++ /dev/null @@ -1,115 +0,0 @@ -use std::{cell::RefCell, rc::Rc}; - -use rand::{rngs::SmallRng, Rng, SeedableRng}; - -/// Create a deep object with the given amount of nodes. -pub fn create_deep_object( - writer: &mut W, - n_nodes: usize, -) -> std::io::Result<()> { - let mut tree = Tree::new(); - for _ in 0..n_nodes { - tree.push_node(); - } - tree.write_to(writer) -} - -/// An n-tree. -/// -/// The algorithm used to generate a potentially deep object is to create a tree, one node at a -/// time, where each node is put as a child of a random existing node in the tree. -struct Tree { - /// The tree-view of the tree. - root: Rc>, - /// Array of all the nodes in the tree, including the root node. - nodes: Vec>>, - /// The RNG state. - /// - /// We don't need to be cryptographically secure. [`SmallRng`] also implements the - /// [`SeedableRng`] trait, allowing runs to be predictable. - rng: SmallRng, -} - -/// A node in a tree. -struct Node { - /// All the children of the node. - children: Vec>>, -} - -impl Tree { - /// Create a new tree. - fn new() -> Self { - let root = Node::new_rc_refcell(); - Tree { - root: root.clone(), - nodes: vec![root], - rng: SmallRng::seed_from_u64(42), - } - } - - /// Add a new node as a child of a random node in the tree. - fn push_node(&mut self) { - let new_node = Node::new_rc_refcell(); - let n_nodes = self.nodes.len(); - // Bias the nodes towards the end so that there is more nesting. - let parent = &mut self.nodes[self.rng.gen_range((3 * n_nodes / 4)..n_nodes)]; - (**parent).borrow_mut().push_child(new_node.clone()); - self.nodes.push(new_node); - } - - /// Write the YAML representation of the tree to `writer`. - fn write_to(&self, writer: &mut W) -> std::io::Result<()> { - (*self.root).borrow().write_to(writer, 0) - } -} - -impl Node { - /// Create a new node. - fn new() -> Self { - Node { children: vec![] } - } - - fn new_rc_refcell() -> Rc> { - Rc::new(RefCell::new(Self::new())) - } - - /// Append a child to the node. - fn push_child(&mut self, child: Rc>) { - self.children.push(child); - } - - /// Write the YAML representation of the node to `writer`. - fn write_to(&self, writer: &mut W, indent: usize) -> std::io::Result<()> { - if self.children.is_empty() { - write_n(writer, ' ', indent)?; - writer.write_all(b"a: 1\n")?; - } else { - for (n, child) in self.children.iter().enumerate() { - write_n(writer, ' ', indent)?; - write_id_for_number(writer, n)?; - writer.write_all(b":\n")?; - (**child).borrow().write_to(writer, indent + 2)?; - } - } - Ok(()) - } -} - -/// Write `n` times `c` to `out`. -fn write_n(out: &mut W, c: char, n: usize) -> std::io::Result<()> { - for _ in 0..n { - write!(out, "{c}")?; - } - Ok(()) -} - -/// Create a valid identifier for the given number. -fn write_id_for_number(out: &mut W, mut n: usize) -> std::io::Result<()> { - const DIGITS: &[u8] = b"_abcdefghijklmnopqrstuvwxyz"; - n += 1; - while n > 0 { - write!(out, "{}", DIGITS[n % DIGITS.len()] as char)?; - n /= DIGITS.len(); - } - Ok(()) -} diff --git a/saphyr/tools/run_bench.rs b/saphyr/tools/run_bench.rs deleted file mode 100644 index 795f7bc..0000000 --- a/saphyr/tools/run_bench.rs +++ /dev/null @@ -1,71 +0,0 @@ -#![allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)] - -use std::{env, fs::File, io::prelude::*}; -use yaml_rust2::{ - parser::{MarkedEventReceiver, Parser}, - scanner::Marker, - Event, -}; - -/// A sink which discards any event sent. -struct NullSink {} - -impl MarkedEventReceiver for NullSink { - fn on_event(&mut self, _: Event, _: Marker) {} -} - -/// Parse the given input, returning elapsed time in nanoseconds. -fn do_parse(input: &str) -> u64 { - let mut sink = NullSink {}; - let mut parser = Parser::new_from_str(input); - let begin = std::time::Instant::now(); - parser.load(&mut sink, true).unwrap(); - let end = std::time::Instant::now(); - (end - begin).as_nanos() as u64 -} - -fn main() { - let args: Vec<_> = env::args().collect(); - let iterations: u64 = args[2].parse().unwrap(); - let output_yaml = args.len() == 4 && args[3] == "--output-yaml"; - let mut f = File::open(&args[1]).unwrap(); - let mut s = String::new(); - f.read_to_string(&mut s).unwrap(); - - // Warmup - do_parse(&s); - do_parse(&s); - do_parse(&s); - - // Bench - let times: Vec<_> = (0..iterations).map(|_| do_parse(&s)).collect(); - - let mut sorted_times = times.clone(); - sorted_times.sort_unstable(); - - // Compute relevant metrics. - let sum: u64 = times.iter().sum(); - let avg = sum / iterations; - let min = sorted_times[0]; - let max = sorted_times[(iterations - 1) as usize]; - let percentile95 = sorted_times[((95 * iterations) / 100) as usize]; - - if output_yaml { - println!("parser: yaml-rust2"); - println!("input: {}", args[1]); - println!("average: {avg}"); - println!("min: {min}"); - println!("max: {max}"); - println!("percentile95: {percentile95}"); - println!("iterations: {iterations}"); - println!("times:"); - for time in × { - println!(" - {time}"); - } - } else { - println!("Average: {}s", (avg as f64) / 1_000_000_000.0); - println!("Min: {}s", (min as f64) / 1_000_000_000.0); - println!("Max: {}s", (max as f64) / 1_000_000_000.0); - println!("95%: {}s", (percentile95 as f64) / 1_000_000_000.0); - } -} diff --git a/saphyr/tools/time_parse.rs b/saphyr/tools/time_parse.rs deleted file mode 100644 index 1555dde..0000000 --- a/saphyr/tools/time_parse.rs +++ /dev/null @@ -1,36 +0,0 @@ -use std::env; -use std::fs::File; -use std::io::prelude::*; -use yaml_rust2::{ - parser::{MarkedEventReceiver, Parser}, - scanner::Marker, - Event, -}; - -/// A sink which discards any event sent. -struct NullSink {} - -impl MarkedEventReceiver for NullSink { - fn on_event(&mut self, _: Event, _: Marker) {} -} - -fn main() { - let args: Vec<_> = env::args().collect(); - let mut f = File::open(&args[1]).unwrap(); - let mut s = String::new(); - f.read_to_string(&mut s).unwrap(); - - let mut sink = NullSink {}; - let mut parser = Parser::new_from_str(&s); - - // Load events using our sink as the receiver. - let begin = std::time::Instant::now(); - parser.load(&mut sink, true).unwrap(); - let end = std::time::Instant::now(); - - if args.len() == 3 && args[2] == "--short" { - println!("{}", (end - begin).as_nanos()); - } else { - println!("Loaded {}MiB in {:?}", s.len() / 1024 / 1024, end - begin); - } -} From 8967f050f7c9290d774d2967fd72328776734f0f Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Wed, 3 Apr 2024 20:23:30 +0200 Subject: [PATCH 368/380] Convert to standalone repository. --- bench/.cargo/config.toml | 3 + bench/.gitignore | 1 + bench/Cargo.lock | 136 +++++++++++++ bench/Cargo.toml | 19 ++ bench/README.md | 3 + bench/justfile | 5 +- bench/src/lib.rs | 2 + bench/tools/bench_compare/.gitignore | 1 + bench/tools/bench_compare/Cargo.lock | 266 ++++++++++++++++++++++++++ bench/tools/gen_large_yaml/.gitignore | 1 + bench/tools/gen_large_yaml/Cargo.lock | 86 +++++++++ bench/tools/gen_large_yaml/Cargo.toml | 1 - bench/tools/run_bench.rs | 7 +- bench/tools/time_parse.rs | 7 +- 14 files changed, 523 insertions(+), 15 deletions(-) create mode 100644 bench/.cargo/config.toml create mode 100644 bench/.gitignore create mode 100644 bench/Cargo.lock create mode 100644 bench/Cargo.toml create mode 100644 bench/README.md create mode 100644 bench/src/lib.rs create mode 100644 bench/tools/bench_compare/.gitignore create mode 100644 bench/tools/bench_compare/Cargo.lock create mode 100644 bench/tools/gen_large_yaml/.gitignore create mode 100644 bench/tools/gen_large_yaml/Cargo.lock diff --git a/bench/.cargo/config.toml b/bench/.cargo/config.toml new file mode 100644 index 0000000..497dd42 --- /dev/null +++ b/bench/.cargo/config.toml @@ -0,0 +1,3 @@ +[alias] +gen_large_yaml = "run --profile=release-lto --package gen_large_yaml --bin gen_large_yaml --manifest-path tools/gen_large_yaml/Cargo.toml --" +bench_compare = "run --package bench_compare --bin bench_compare --manifest-path tools/bench_compare/Cargo.toml --" diff --git a/bench/.gitignore b/bench/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/bench/.gitignore @@ -0,0 +1 @@ +/target diff --git a/bench/Cargo.lock b/bench/Cargo.lock new file mode 100644 index 0000000..3b945ce --- /dev/null +++ b/bench/Cargo.lock @@ -0,0 +1,136 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "ahash" +version = "0.8.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" +dependencies = [ + "cfg-if", + "once_cell", + "version_check", + "zerocopy", +] + +[[package]] +name = "allocator-api2" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0942ffc6dcaadf03badf6e6a2d0228460359d5e34b57ccdc720b7382dfbd5ec5" + +[[package]] +name = "arraydeque" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d902e3d592a523def97af8f317b08ce16b7ab854c1985a0c671e6f15cebc236" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "hashbrown" +version = "0.14.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" +dependencies = [ + "ahash", + "allocator-api2", +] + +[[package]] +name = "hashlink" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8094feaf31ff591f651a2664fb9cfd92bba7a60ce3197265e9482ebe753c8f7" +dependencies = [ + "hashbrown", +] + +[[package]] +name = "once_cell" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" + +[[package]] +name = "proc-macro2" +version = "1.0.79" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e835ff2298f5721608eb1a980ecaee1aef2c132bf95ecc026a11b7bf3c01c02e" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "saphyr-bench" +version = "0.0.1" +dependencies = [ + "saphyr-parser", +] + +[[package]] +name = "saphyr-parser" +version = "0.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "886b4bb040ecd2944f54c3543e612c336396e3eba700c5063d8bad5f40bac3d7" +dependencies = [ + "arraydeque", + "hashlink", +] + +[[package]] +name = "syn" +version = "2.0.58" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44cfb93f38070beee36b3fef7d4f5a16f27751d94b187b666a5cc5e9b0d30687" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "version_check" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" + +[[package]] +name = "zerocopy" +version = "0.7.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74d4d3961e53fa4c9a25a8637fc2bfaf2595b3d3ae34875568a5cf64787716be" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] diff --git a/bench/Cargo.toml b/bench/Cargo.toml new file mode 100644 index 0000000..985a6fa --- /dev/null +++ b/bench/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "saphyr-bench" +version = "0.0.1" +authors = [ "Ethiraric " ] +license = "MIT" +description = "Utilities to benchmark saphyr" +readme = "README.md" +edition = "2021" + +[dependencies] +saphyr-parser = "0.0.1" + +[[bin]] +name = "time_parse" +path = "tools/time_parse.rs" + +[[bin]] +name = "run_bench" +path = "tools/run_bench.rs" diff --git a/bench/README.md b/bench/README.md new file mode 100644 index 0000000..3988618 --- /dev/null +++ b/bench/README.md @@ -0,0 +1,3 @@ +# `saphyr-bench` + +Set of utilities to benchmark the `saphyr` library. diff --git a/bench/justfile b/bench/justfile index f33ee69..6bd7211 100644 --- a/bench/justfile +++ b/bench/justfile @@ -3,11 +3,8 @@ before_commit: cargo clippy --all-targets -- -D warnings cargo build --release --all-targets cargo build --all-targets - cargo test - cargo test --release - cargo test --doc cargo build --profile=release-lto --package gen_large_yaml --bin gen_large_yaml --manifest-path tools/gen_large_yaml/Cargo.toml - RUSTDOCFLAGS="-D warnings" cargo doc --all-features + cargo build --profile=release-lto --package bench_compare --bin bench_compare --manifest-path tools/bench_compare/Cargo.toml ethi_bench: cargo build --release --all-targets diff --git a/bench/src/lib.rs b/bench/src/lib.rs new file mode 100644 index 0000000..139597f --- /dev/null +++ b/bench/src/lib.rs @@ -0,0 +1,2 @@ + + diff --git a/bench/tools/bench_compare/.gitignore b/bench/tools/bench_compare/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/bench/tools/bench_compare/.gitignore @@ -0,0 +1 @@ +/target diff --git a/bench/tools/bench_compare/Cargo.lock b/bench/tools/bench_compare/Cargo.lock new file mode 100644 index 0000000..e4e996a --- /dev/null +++ b/bench/tools/bench_compare/Cargo.lock @@ -0,0 +1,266 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "addr2line" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb" +dependencies = [ + "gimli", +] + +[[package]] +name = "adler" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" + +[[package]] +name = "anyhow" +version = "1.0.81" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0952808a6c2afd1aa8947271f3a60f1a6763c7b912d210184c5149b5cf147247" +dependencies = [ + "backtrace", +] + +[[package]] +name = "backtrace" +version = "0.3.71" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26b05800d2e817c8b3b4b54abd461726265fa9789ae34330622f2db9ee696f9d" +dependencies = [ + "addr2line", + "cc", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", +] + +[[package]] +name = "bench_compare" +version = "0.6.0" +dependencies = [ + "anyhow", + "serde", + "serde_yaml", + "toml", +] + +[[package]] +name = "cc" +version = "1.0.90" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8cd6604a82acf3039f1144f54b8eb34e91ffba622051189e71b781822d5ee1f5" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "equivalent" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" + +[[package]] +name = "gimli" +version = "0.28.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" + +[[package]] +name = "hashbrown" +version = "0.14.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" + +[[package]] +name = "indexmap" +version = "2.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26" +dependencies = [ + "equivalent", + "hashbrown", +] + +[[package]] +name = "itoa" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" + +[[package]] +name = "libc" +version = "0.2.153" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" + +[[package]] +name = "memchr" +version = "2.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" + +[[package]] +name = "miniz_oxide" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d811f3e15f28568be3407c8e7fdb6514c1cda3cb30683f15b6a1a1dc4ea14a7" +dependencies = [ + "adler", +] + +[[package]] +name = "object" +version = "0.32.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441" +dependencies = [ + "memchr", +] + +[[package]] +name = "proc-macro2" +version = "1.0.79" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e835ff2298f5721608eb1a980ecaee1aef2c132bf95ecc026a11b7bf3c01c02e" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rustc-demangle" +version = "0.1.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" + +[[package]] +name = "ryu" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e86697c916019a8588c99b5fac3cead74ec0b4b819707a682fd4d23fa0ce1ba1" + +[[package]] +name = "serde" +version = "1.0.197" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fb1c873e1b9b056a4dc4c0c198b24c3ffa059243875552b2bd0933b1aee4ce2" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.197" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_spanned" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb3622f419d1296904700073ea6cc23ad690adbd66f13ea683df73298736f0c1" +dependencies = [ + "serde", +] + +[[package]] +name = "serde_yaml" +version = "0.9.34+deprecated" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" +dependencies = [ + "indexmap", + "itoa", + "ryu", + "serde", + "unsafe-libyaml", +] + +[[package]] +name = "syn" +version = "2.0.58" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44cfb93f38070beee36b3fef7d4f5a16f27751d94b187b666a5cc5e9b0d30687" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "toml" +version = "0.8.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9dd1545e8208b4a5af1aa9bbd0b4cf7e9ea08fabc5d0a5c67fcaafa17433aa3" +dependencies = [ + "serde", + "serde_spanned", + "toml_datetime", + "toml_edit", +] + +[[package]] +name = "toml_datetime" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3550f4e9685620ac18a50ed434eb3aec30db8ba93b0287467bca5826ea25baf1" +dependencies = [ + "serde", +] + +[[package]] +name = "toml_edit" +version = "0.22.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e40bb779c5187258fd7aad0eb68cb8706a0a81fa712fbea808ab43c4b8374c4" +dependencies = [ + "indexmap", + "serde", + "serde_spanned", + "toml_datetime", + "winnow", +] + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "unsafe-libyaml" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861" + +[[package]] +name = "winnow" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dffa400e67ed5a4dd237983829e66475f0a4a26938c4b04c21baede6262215b8" +dependencies = [ + "memchr", +] diff --git a/bench/tools/gen_large_yaml/.gitignore b/bench/tools/gen_large_yaml/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/bench/tools/gen_large_yaml/.gitignore @@ -0,0 +1 @@ +/target diff --git a/bench/tools/gen_large_yaml/Cargo.lock b/bench/tools/gen_large_yaml/Cargo.lock new file mode 100644 index 0000000..63d892b --- /dev/null +++ b/bench/tools/gen_large_yaml/Cargo.lock @@ -0,0 +1,86 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "gen_large_yaml" +version = "0.6.0" +dependencies = [ + "lipsum", + "rand", +] + +[[package]] +name = "getrandom" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "190092ea657667030ac6a35e305e62fc4dd69fd98ac98631e5d3a2b1575a12b5" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "libc" +version = "0.2.153" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" + +[[package]] +name = "lipsum" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "636860251af8963cc40f6b4baadee105f02e21b28131d76eba8e40ce84ab8064" +dependencies = [ + "rand", + "rand_chacha", +] + +[[package]] +name = "ppv-lite86" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom", +] + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" diff --git a/bench/tools/gen_large_yaml/Cargo.toml b/bench/tools/gen_large_yaml/Cargo.toml index d57bdea..d8526fb 100644 --- a/bench/tools/gen_large_yaml/Cargo.toml +++ b/bench/tools/gen_large_yaml/Cargo.toml @@ -11,7 +11,6 @@ readme = "README.md" edition = "2018" [dependencies] -yaml-rust2 = { path = "../.." } rand = { version = "0.8.5", features = [ "small_rng" ] } lipsum = "0.9.0" diff --git a/bench/tools/run_bench.rs b/bench/tools/run_bench.rs index 795f7bc..3e987a5 100644 --- a/bench/tools/run_bench.rs +++ b/bench/tools/run_bench.rs @@ -1,11 +1,8 @@ #![allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)] use std::{env, fs::File, io::prelude::*}; -use yaml_rust2::{ - parser::{MarkedEventReceiver, Parser}, - scanner::Marker, - Event, -}; + +use saphyr_parser::{Event, MarkedEventReceiver, Marker, Parser}; /// A sink which discards any event sent. struct NullSink {} diff --git a/bench/tools/time_parse.rs b/bench/tools/time_parse.rs index 1555dde..154d698 100644 --- a/bench/tools/time_parse.rs +++ b/bench/tools/time_parse.rs @@ -1,11 +1,8 @@ use std::env; use std::fs::File; use std::io::prelude::*; -use yaml_rust2::{ - parser::{MarkedEventReceiver, Parser}, - scanner::Marker, - Event, -}; + +use saphyr_parser::{Event, MarkedEventReceiver, Marker, Parser}; /// A sink which discards any event sent. struct NullSink {} From 5b3fa958b8f62e82b39d56ed80bb30886c17a424 Mon Sep 17 00:00:00 2001 From: maddymakesgames Date: Wed, 3 Apr 2024 17:33:59 -0400 Subject: [PATCH 369/380] add Error and Display impls for LoadError --- saphyr/src/yaml.rs | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index 3af4bb3..abe2174 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -186,6 +186,26 @@ impl From for LoadError { } } +impl std::error::Error for LoadError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + Some(match &self { + LoadError::IO(e) => e, + LoadError::Scan(e) => e, + LoadError::Decode(_) => return None, + }) + } +} + +impl std::fmt::Display for LoadError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + LoadError::IO(e) => e.fmt(f), + LoadError::Scan(e) => e.fmt(f), + LoadError::Decode(e) => e.fmt(f), + } + } +} + impl YamlLoader { fn insert_new_node(&mut self, node: (Yaml, usize)) { // valid anchor id starts from 1 From 976007017d2bc9c5509b8b9e8c5d8e6e29793804 Mon Sep 17 00:00:00 2001 From: David Aguilar Date: Mon, 8 Apr 2024 23:37:22 -0700 Subject: [PATCH 370/380] garden: yaml-rust2 -> saphyr --- saphyr/garden.yaml | 64 +++++++++++++++------------------------------- 1 file changed, 21 insertions(+), 43 deletions(-) diff --git a/saphyr/garden.yaml b/saphyr/garden.yaml index 5db5126..3a91c69 100644 --- a/saphyr/garden.yaml +++ b/saphyr/garden.yaml @@ -8,55 +8,33 @@ # garden fix commands: - bench: cargo bench "$@" - build: cargo build "$@" + build: | + cargo build --all-targets --release + cargo build --all-targets check>: - check/clippy - check/fmt - build - test - - check/profile - check/clippy: cargo clippy --all-targets "$@" -- -D warnings + - doc + check/clippy: | + cargo clippy --all-targets --release -- -D warnings + cargo clippy --all-targets -- -D warnings check/fmt: cargo fmt --check - check/profile: | - cargo build \ - --profile=release-lto \ - --package gen_large_yaml \ - --bin gen_large_yaml \ - --manifest-path tools/gen_large_yaml/Cargo.toml - clean: cargo clean "$@" - coverage: cargo kcov "$@" - doc: cargo doc --no-deps --package yaml-rust2 "$@" - ethi/bench: | - cargo build --release --all-targets - cd ../Yaml-rust && cargo build --release --all-targets - cd ../libfyaml/build && ninja - cargo bench_compare run_bench - fix: cargo clippy --all-targets --fix "$@" -- -D warnings - fmt: cargo fmt "$@" - test: cargo test "$@" - update: cargo update "$@" + doc: cargo doc --all-features + fix: cargo clippy --all-targets --fix -- -D warnings + fmt: cargo fmt + test: | + cargo test + cargo test --release + cargo test --doc watch: cargo watch --shell "garden check" -trees: - yaml-rust2: - description: A pure Rust YAML implementation - path: ${GARDEN_CONFIG_DIR} - url: "git@github.com:Ethiraric/yaml-rust2.git" - remotes: - davvid: "git@github.com:davvid/yaml-rust2.git" - yaml-rust: "git@github.com:chyh1990/yaml-rust.git" - gitconfig: - # Access yaml-rust2 pull requests as yaml-rust2/pull/* - remote.yaml-rust2.url: "git@github.com:Ethiraric/yaml-rust2.git" - remote.yaml-rust2.fetch: - - "+refs/pull/*/head:refs/remotes/yaml-rust2/pull/*" - # Access yaml-rust pull requests as yaml-rust/pull/* - remote.yaml-rust.fetch: - - "+refs/heads/*:refs/remotes/yaml-rust/*" - - "+refs/pull/*/head:refs/remotes/yaml-rust/pull/*" +environment: + RUSTDOCFLAGS: "-D warnings" - yaml-test-suite: - description: Comprehensive, language independent Test Suite for YAML - path: tests/yaml-test-suite - url: https://github.com/yaml/yaml-test-suite +trees: + saphyr: + description: A pure Rust YAML implementation + path: ${GARDEN_CONFIG_DIR} + url: "git@github.com:saphyr-rs/saphyr.git" From 2b8eb3f62b3e267262a66827ae8d01be1c1df754 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Mon, 10 Jun 2024 18:05:25 +0200 Subject: [PATCH 371/380] Split `yaml.rs` into sizeable files. --- saphyr/examples/dump_yaml.rs | 10 +- saphyr/src/encoding.rs | 289 ++++++++++++++++++++ saphyr/src/lib.rs | 14 +- saphyr/src/loader.rs | 227 ++++++++++++++++ saphyr/src/yaml.rs | 513 +---------------------------------- 5 files changed, 533 insertions(+), 520 deletions(-) create mode 100644 saphyr/src/encoding.rs create mode 100644 saphyr/src/loader.rs diff --git a/saphyr/examples/dump_yaml.rs b/saphyr/examples/dump_yaml.rs index 1a9f0f5..8d85d7e 100644 --- a/saphyr/examples/dump_yaml.rs +++ b/saphyr/examples/dump_yaml.rs @@ -1,4 +1,4 @@ -use saphyr::yaml; +use saphyr::{Yaml, YamlLoader}; use std::env; use std::fs::File; use std::io::prelude::*; @@ -9,14 +9,14 @@ fn print_indent(indent: usize) { } } -fn dump_node(doc: &yaml::Yaml, indent: usize) { +fn dump_node(doc: &Yaml, indent: usize) { match *doc { - yaml::Yaml::Array(ref v) => { + Yaml::Array(ref v) => { for x in v { dump_node(x, indent + 1); } } - yaml::Yaml::Hash(ref h) => { + Yaml::Hash(ref h) => { for (k, v) in h { print_indent(indent); println!("{k:?}:"); @@ -36,7 +36,7 @@ fn main() { let mut s = String::new(); f.read_to_string(&mut s).unwrap(); - let docs = yaml::YamlLoader::load_from_str(&s).unwrap(); + let docs = YamlLoader::load_from_str(&s).unwrap(); for doc in &docs { println!("---"); dump_node(doc, 0); diff --git a/saphyr/src/encoding.rs b/saphyr/src/encoding.rs new file mode 100644 index 0000000..6d46dd3 --- /dev/null +++ b/saphyr/src/encoding.rs @@ -0,0 +1,289 @@ +//! Encoding utilities. Available only with the `encoding` feature. + +use std::{borrow::Cow, ops::ControlFlow}; + +use encoding_rs::{Decoder, DecoderResult, Encoding}; + +use crate::{loader::LoadError, Yaml, YamlLoader}; + +/// The signature of the function to call when using [`YAMLDecodingTrap::Call`]. +/// +/// The arguments are as follows: +/// * `malformation_length`: The length of the sequence the decoder failed to decode. +/// * `bytes_read_after_malformation`: The number of lookahead bytes the decoder consumed after +/// the malformation. +/// * `input_at_malformation`: What the input buffer is at the malformation. +/// This is the buffer starting at the malformation. The first `malformation_length` bytes are +/// the problematic sequence. The following `bytes_read_after_malformation` are already stored +/// in the decoder and will not be re-fed. +/// * `output`: The output string. +/// +/// The function must modify `output` as it feels is best. For instance, one could recreate the +/// behavior of [`YAMLDecodingTrap::Ignore`] with an empty function, [`YAMLDecodingTrap::Replace`] +/// by pushing a `\u{FFFD}` into `output` and [`YAMLDecodingTrap::Strict`] by returning +/// [`ControlFlow::Break`]. +/// +/// # Returns +/// The function must return [`ControlFlow::Continue`] if decoding may continue or +/// [`ControlFlow::Break`] if decoding must be aborted. An optional error string may be supplied. +pub type YAMLDecodingTrapFn = fn( + malformation_length: u8, + bytes_read_after_malformation: u8, + input_at_malformation: &[u8], + output: &mut String, +) -> ControlFlow>; + +/// The behavior [`YamlDecoder`] must have when an decoding error occurs. +#[derive(Copy, Clone, PartialEq, Eq)] +pub enum YAMLDecodingTrap { + /// Ignore the offending bytes, remove them from the output. + Ignore, + /// Error out. + Strict, + /// Replace them with the Unicode REPLACEMENT CHARACTER. + Replace, + /// Call the user-supplied function upon decoding malformation. + Call(YAMLDecodingTrapFn), +} + +/// `YamlDecoder` is a `YamlLoader` builder that allows you to supply your own encoding error trap. +/// For example, to read a YAML file while ignoring Unicode decoding errors you can set the +/// `encoding_trap` to `encoding::DecoderTrap::Ignore`. +/// ```rust +/// use saphyr::{YamlDecoder, YAMLDecodingTrap}; +/// +/// let string = b"--- +/// a\xa9: 1 +/// b: 2.2 +/// c: [1, 2] +/// "; +/// let out = YamlDecoder::read(string as &[u8]) +/// .encoding_trap(YAMLDecodingTrap::Ignore) +/// .decode() +/// .unwrap(); +/// ``` +pub struct YamlDecoder { + /// The input stream. + source: T, + /// The behavior to adopt when encountering a malformed encoding. + trap: YAMLDecodingTrap, +} + +impl YamlDecoder { + /// Create a `YamlDecoder` decoding the given source. + pub fn read(source: T) -> YamlDecoder { + YamlDecoder { + source, + trap: YAMLDecodingTrap::Strict, + } + } + + /// Set the behavior of the decoder when the encoding is invalid. + pub fn encoding_trap(&mut self, trap: YAMLDecodingTrap) -> &mut Self { + self.trap = trap; + self + } + + /// Run the decode operation with the source and trap the `YamlDecoder` was built with. + /// + /// # Errors + /// Returns `LoadError` when decoding fails. + pub fn decode(&mut self) -> Result, LoadError> { + let mut buffer = Vec::new(); + self.source.read_to_end(&mut buffer)?; + + // Check if the `encoding` library can detect encoding from the BOM, otherwise use + // `detect_utf16_endianness`. + let (encoding, _) = + Encoding::for_bom(&buffer).unwrap_or_else(|| (detect_utf16_endianness(&buffer), 2)); + let mut decoder = encoding.new_decoder(); + let mut output = String::new(); + + // Decode the input buffer. + decode_loop(&buffer, &mut output, &mut decoder, self.trap)?; + + YamlLoader::load_from_str(&output).map_err(LoadError::Scan) + } +} + +/// Perform a loop of [`Decoder::decode_to_string`], reallocating `output` if needed. +fn decode_loop( + input: &[u8], + output: &mut String, + decoder: &mut Decoder, + trap: YAMLDecodingTrap, +) -> Result<(), LoadError> { + use crate::loader::LoadError; + + output.reserve(input.len()); + let mut total_bytes_read = 0; + + loop { + match decoder.decode_to_string_without_replacement(&input[total_bytes_read..], output, true) + { + // If the input is empty, we processed the whole input. + (DecoderResult::InputEmpty, _) => break Ok(()), + // If the output is full, we must reallocate. + (DecoderResult::OutputFull, bytes_read) => { + total_bytes_read += bytes_read; + // The output is already reserved to the size of the input. We slowly resize. Here, + // we're expecting that 10% of bytes will double in size when converting to UTF-8. + output.reserve(input.len() / 10); + } + (DecoderResult::Malformed(malformed_len, bytes_after_malformed), bytes_read) => { + total_bytes_read += bytes_read; + match trap { + // Ignore (skip over) malformed character. + YAMLDecodingTrap::Ignore => {} + // Replace them with the Unicode REPLACEMENT CHARACTER. + YAMLDecodingTrap::Replace => { + output.push('\u{FFFD}'); + } + // Otherwise error, getting as much context as possible. + YAMLDecodingTrap::Strict => { + let malformed_len = malformed_len as usize; + let bytes_after_malformed = bytes_after_malformed as usize; + let byte_idx = total_bytes_read - (malformed_len + bytes_after_malformed); + let malformed_sequence = &input[byte_idx..byte_idx + malformed_len]; + + break Err(LoadError::Decode(Cow::Owned(format!( + "Invalid character sequence at {byte_idx}: {malformed_sequence:?}", + )))); + } + YAMLDecodingTrap::Call(callback) => { + let byte_idx = + total_bytes_read - ((malformed_len + bytes_after_malformed) as usize); + let malformed_sequence = + &input[byte_idx..byte_idx + malformed_len as usize]; + if let ControlFlow::Break(error) = callback( + malformed_len, + bytes_after_malformed, + &input[byte_idx..], + output, + ) { + if error.is_empty() { + break Err(LoadError::Decode(Cow::Owned(format!( + "Invalid character sequence at {byte_idx}: {malformed_sequence:?}", + )))); + } + break Err(LoadError::Decode(error)); + } + } + } + } + } + } +} + +/// The encoding crate knows how to tell apart UTF-8 from UTF-16LE and utf-16BE, when the +/// bytestream starts with BOM codepoint. +/// However, it doesn't even attempt to guess the UTF-16 endianness of the input bytestream since +/// in the general case the bytestream could start with a codepoint that uses both bytes. +/// +/// The YAML-1.2 spec mandates that the first character of a YAML document is an ASCII character. +/// This allows the encoding to be deduced by the pattern of null (#x00) characters. +// +/// See spec at +fn detect_utf16_endianness(b: &[u8]) -> &'static Encoding { + if b.len() > 1 && (b[0] != b[1]) { + if b[0] == 0 { + return encoding_rs::UTF_16BE; + } else if b[1] == 0 { + return encoding_rs::UTF_16LE; + } + } + encoding_rs::UTF_8 +} + +#[cfg(test)] +mod test { + use super::{YAMLDecodingTrap, Yaml, YamlDecoder}; + + #[test] + fn test_read_bom() { + let s = b"\xef\xbb\xbf--- +a: 1 +b: 2.2 +c: [1, 2] +"; + let out = YamlDecoder::read(s as &[u8]).decode().unwrap(); + let doc = &out[0]; + assert_eq!(doc["a"].as_i64().unwrap(), 1i64); + assert!((doc["b"].as_f64().unwrap() - 2.2f64).abs() <= f64::EPSILON); + assert_eq!(doc["c"][1].as_i64().unwrap(), 2i64); + assert!(doc["d"][0].is_badvalue()); + } + + #[test] + fn test_read_utf16le() { + let s = b"\xff\xfe-\x00-\x00-\x00 +\x00a\x00:\x00 \x001\x00 +\x00b\x00:\x00 \x002\x00.\x002\x00 +\x00c\x00:\x00 \x00[\x001\x00,\x00 \x002\x00]\x00 +\x00"; + let out = YamlDecoder::read(s as &[u8]).decode().unwrap(); + let doc = &out[0]; + println!("GOT: {doc:?}"); + assert_eq!(doc["a"].as_i64().unwrap(), 1i64); + assert!((doc["b"].as_f64().unwrap() - 2.2f64) <= f64::EPSILON); + assert_eq!(doc["c"][1].as_i64().unwrap(), 2i64); + assert!(doc["d"][0].is_badvalue()); + } + + #[test] + fn test_read_utf16be() { + let s = b"\xfe\xff\x00-\x00-\x00-\x00 +\x00a\x00:\x00 \x001\x00 +\x00b\x00:\x00 \x002\x00.\x002\x00 +\x00c\x00:\x00 \x00[\x001\x00,\x00 \x002\x00]\x00 +"; + let out = YamlDecoder::read(s as &[u8]).decode().unwrap(); + let doc = &out[0]; + println!("GOT: {doc:?}"); + assert_eq!(doc["a"].as_i64().unwrap(), 1i64); + assert!((doc["b"].as_f64().unwrap() - 2.2f64).abs() <= f64::EPSILON); + assert_eq!(doc["c"][1].as_i64().unwrap(), 2i64); + assert!(doc["d"][0].is_badvalue()); + } + + #[test] + fn test_read_utf16le_nobom() { + let s = b"-\x00-\x00-\x00 +\x00a\x00:\x00 \x001\x00 +\x00b\x00:\x00 \x002\x00.\x002\x00 +\x00c\x00:\x00 \x00[\x001\x00,\x00 \x002\x00]\x00 +\x00"; + let out = YamlDecoder::read(s as &[u8]).decode().unwrap(); + let doc = &out[0]; + println!("GOT: {doc:?}"); + assert_eq!(doc["a"].as_i64().unwrap(), 1i64); + assert!((doc["b"].as_f64().unwrap() - 2.2f64).abs() <= f64::EPSILON); + assert_eq!(doc["c"][1].as_i64().unwrap(), 2i64); + assert!(doc["d"][0].is_badvalue()); + } + + #[test] + fn test_read_trap() { + let s = b"--- +a\xa9: 1 +b: 2.2 +c: [1, 2] +"; + let out = YamlDecoder::read(s as &[u8]) + .encoding_trap(YAMLDecodingTrap::Ignore) + .decode() + .unwrap(); + let doc = &out[0]; + println!("GOT: {doc:?}"); + assert_eq!(doc["a"].as_i64().unwrap(), 1i64); + assert!((doc["b"].as_f64().unwrap() - 2.2f64).abs() <= f64::EPSILON); + assert_eq!(doc["c"][1].as_i64().unwrap(), 2i64); + assert!(doc["d"][0].is_badvalue()); + } + + #[test] + fn test_or() { + assert_eq!(Yaml::Null.or(Yaml::Integer(3)), Yaml::Integer(3)); + assert_eq!(Yaml::Integer(3).or(Yaml::Integer(7)), Yaml::Integer(3)); + } +} diff --git a/saphyr/src/lib.rs b/saphyr/src/lib.rs index aaed759..ede027b 100644 --- a/saphyr/src/lib.rs +++ b/saphyr/src/lib.rs @@ -43,16 +43,20 @@ #![warn(missing_docs, clippy::pedantic)] -pub(crate) mod char_traits; -pub mod emitter; -pub mod yaml; +mod char_traits; +mod emitter; +mod loader; +mod yaml; // Re-export main components. pub use crate::emitter::YamlEmitter; -pub use crate::yaml::{Array, Hash, Yaml, YamlLoader}; +pub use crate::loader::YamlLoader; +pub use crate::yaml::{Array, Hash, Yaml}; #[cfg(feature = "encoding")] -pub use crate::yaml::{YAMLDecodingTrap, YAMLDecodingTrapFn, YamlDecoder}; +mod encoding; +#[cfg(feature = "encoding")] +pub use crate::encoding::{YAMLDecodingTrap, YAMLDecodingTrapFn, YamlDecoder}; // Re-export `ScanError` as it is used as part of our public API and we want consumers to be able // to inspect it (e.g. perform a `match`). They wouldn't be able without it. diff --git a/saphyr/src/loader.rs b/saphyr/src/loader.rs new file mode 100644 index 0000000..f2706bb --- /dev/null +++ b/saphyr/src/loader.rs @@ -0,0 +1,227 @@ +//! The default loader. + +use std::collections::BTreeMap; + +use saphyr_parser::{Event, MarkedEventReceiver, Marker, Parser, ScanError, TScalarStyle, Tag}; + +use crate::{Hash, Yaml}; + +/// Main structure for quickly parsing YAML. +/// +/// See [`YamlLoader::load_from_str`]. +#[derive(Default)] +#[allow(clippy::module_name_repetitions)] +pub struct YamlLoader { + /// The different YAML documents that are loaded. + docs: Vec, + // states + // (current node, anchor_id) tuple + doc_stack: Vec<(Yaml, usize)>, + key_stack: Vec, + anchor_map: BTreeMap, +} + +impl MarkedEventReceiver for YamlLoader { + fn on_event(&mut self, ev: Event, _: Marker) { + // println!("EV {:?}", ev); + match ev { + Event::DocumentStart | Event::Nothing | Event::StreamStart | Event::StreamEnd => { + // do nothing + } + Event::DocumentEnd => { + match self.doc_stack.len() { + // empty document + 0 => self.docs.push(Yaml::BadValue), + 1 => self.docs.push(self.doc_stack.pop().unwrap().0), + _ => unreachable!(), + } + } + Event::SequenceStart(aid, _) => { + self.doc_stack.push((Yaml::Array(Vec::new()), aid)); + } + Event::SequenceEnd => { + let node = self.doc_stack.pop().unwrap(); + self.insert_new_node(node); + } + Event::MappingStart(aid, _) => { + self.doc_stack.push((Yaml::Hash(Hash::new()), aid)); + self.key_stack.push(Yaml::BadValue); + } + Event::MappingEnd => { + self.key_stack.pop().unwrap(); + let node = self.doc_stack.pop().unwrap(); + self.insert_new_node(node); + } + Event::Scalar(v, style, aid, tag) => { + let node = if style != TScalarStyle::Plain { + Yaml::String(v) + } else if let Some(Tag { + ref handle, + ref suffix, + }) = tag + { + if handle == "tag:yaml.org,2002:" { + match suffix.as_ref() { + "bool" => { + // "true" or "false" + match v.parse::() { + Err(_) => Yaml::BadValue, + Ok(v) => Yaml::Boolean(v), + } + } + "int" => match v.parse::() { + Err(_) => Yaml::BadValue, + Ok(v) => Yaml::Integer(v), + }, + "float" => match parse_f64(&v) { + Some(_) => Yaml::Real(v), + None => Yaml::BadValue, + }, + "null" => match v.as_ref() { + "~" | "null" => Yaml::Null, + _ => Yaml::BadValue, + }, + _ => Yaml::String(v), + } + } else { + Yaml::String(v) + } + } else { + // Datatype is not specified, or unrecognized + Yaml::from_str(&v) + }; + + self.insert_new_node((node, aid)); + } + Event::Alias(id) => { + let n = match self.anchor_map.get(&id) { + Some(v) => v.clone(), + None => Yaml::BadValue, + }; + self.insert_new_node((n, 0)); + } + } + // println!("DOC {:?}", self.doc_stack); + } +} + +/// An error that happened when loading a YAML document. +#[derive(Debug)] +pub enum LoadError { + /// An I/O error. + IO(std::io::Error), + /// An error within the scanner. This indicates a malformed YAML input. + Scan(ScanError), + /// A decoding error (e.g.: Invalid UTF-8). + Decode(std::borrow::Cow<'static, str>), +} + +impl From for LoadError { + fn from(error: std::io::Error) -> Self { + LoadError::IO(error) + } +} + +impl std::error::Error for LoadError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + Some(match &self { + LoadError::IO(e) => e, + LoadError::Scan(e) => e, + LoadError::Decode(_) => return None, + }) + } +} + +impl std::fmt::Display for LoadError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + LoadError::IO(e) => e.fmt(f), + LoadError::Scan(e) => e.fmt(f), + LoadError::Decode(e) => e.fmt(f), + } + } +} + +impl YamlLoader { + fn insert_new_node(&mut self, node: (Yaml, usize)) { + // valid anchor id starts from 1 + if node.1 > 0 { + self.anchor_map.insert(node.1, node.0.clone()); + } + if self.doc_stack.is_empty() { + self.doc_stack.push(node); + } else { + let parent = self.doc_stack.last_mut().unwrap(); + match *parent { + (Yaml::Array(ref mut v), _) => v.push(node.0), + (Yaml::Hash(ref mut h), _) => { + let cur_key = self.key_stack.last_mut().unwrap(); + // current node is a key + if cur_key.is_badvalue() { + *cur_key = node.0; + // current node is a value + } else { + let mut newkey = Yaml::BadValue; + std::mem::swap(&mut newkey, cur_key); + h.insert(newkey, node.0); + } + } + _ => unreachable!(), + } + } + } + + /// Load the given string as a set of YAML documents. + /// + /// The `source` is interpreted as YAML documents and is parsed. Parsing succeeds if and only + /// if all documents are parsed successfully. An error in a latter document prevents the former + /// from being returned. + /// # Errors + /// Returns `ScanError` when loading fails. + pub fn load_from_str(source: &str) -> Result, ScanError> { + Self::load_from_iter(source.chars()) + } + + /// Load the contents of the given iterator as a set of YAML documents. + /// + /// The `source` is interpreted as YAML documents and is parsed. Parsing succeeds if and only + /// if all documents are parsed successfully. An error in a latter document prevents the former + /// from being returned. + /// # Errors + /// Returns `ScanError` when loading fails. + pub fn load_from_iter>(source: I) -> Result, ScanError> { + let mut parser = Parser::new(source); + Self::load_from_parser(&mut parser) + } + + /// Load the contents from the specified Parser as a set of YAML documents. + /// + /// Parsing succeeds if and only if all documents are parsed successfully. + /// An error in a latter document prevents the former from being returned. + /// # Errors + /// Returns `ScanError` when loading fails. + pub fn load_from_parser>( + parser: &mut Parser, + ) -> Result, ScanError> { + let mut loader = YamlLoader::default(); + parser.load(&mut loader, true)?; + Ok(loader.docs) + } + + /// Return a reference to the parsed Yaml documents. + #[must_use] + pub fn documents(&self) -> &[Yaml] { + &self.docs + } +} + +// parse f64 as Core schema +// See: https://github.com/chyh1990/yaml-rust/issues/51 +pub(crate) fn parse_f64(v: &str) -> Option { + match v { + ".inf" | ".Inf" | ".INF" | "+.inf" | "+.Inf" | "+.INF" => Some(f64::INFINITY), + "-.inf" | "-.Inf" | "-.INF" => Some(f64::NEG_INFINITY), + ".nan" | "NaN" | ".NAN" => Some(f64::NAN), + _ => v.parse::().ok(), + } +} diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index abe2174..acd8f68 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -2,15 +2,11 @@ #![allow(clippy::module_name_repetitions)] -use std::borrow::Cow; -use std::ops::ControlFlow; -use std::{collections::BTreeMap, convert::TryFrom, mem, ops::Index, ops::IndexMut}; +use std::{convert::TryFrom, ops::Index, ops::IndexMut}; -#[cfg(feature = "encoding")] -use encoding_rs::{Decoder, DecoderResult, Encoding}; use hashlink::LinkedHashMap; -use saphyr_parser::{Event, MarkedEventReceiver, Marker, Parser, ScanError, TScalarStyle, Tag}; +use crate::loader::parse_f64; /// A YAML node is stored as this `Yaml` enumeration, which provides an easy way to /// access your YAML document. @@ -60,416 +56,6 @@ pub type Array = Vec; /// The type contained in the `Yaml::Hash` variant. This corresponds to YAML mappings. pub type Hash = LinkedHashMap; -// parse f64 as Core schema -// See: https://github.com/chyh1990/yaml-rust/issues/51 -fn parse_f64(v: &str) -> Option { - match v { - ".inf" | ".Inf" | ".INF" | "+.inf" | "+.Inf" | "+.INF" => Some(f64::INFINITY), - "-.inf" | "-.Inf" | "-.INF" => Some(f64::NEG_INFINITY), - ".nan" | "NaN" | ".NAN" => Some(f64::NAN), - _ => v.parse::().ok(), - } -} - -/// Main structure for quickly parsing YAML. -/// -/// See [`YamlLoader::load_from_str`]. -#[derive(Default)] -pub struct YamlLoader { - /// The different YAML documents that are loaded. - docs: Vec, - // states - // (current node, anchor_id) tuple - doc_stack: Vec<(Yaml, usize)>, - key_stack: Vec, - anchor_map: BTreeMap, -} - -impl MarkedEventReceiver for YamlLoader { - fn on_event(&mut self, ev: Event, _: Marker) { - // println!("EV {:?}", ev); - match ev { - Event::DocumentStart | Event::Nothing | Event::StreamStart | Event::StreamEnd => { - // do nothing - } - Event::DocumentEnd => { - match self.doc_stack.len() { - // empty document - 0 => self.docs.push(Yaml::BadValue), - 1 => self.docs.push(self.doc_stack.pop().unwrap().0), - _ => unreachable!(), - } - } - Event::SequenceStart(aid, _) => { - self.doc_stack.push((Yaml::Array(Vec::new()), aid)); - } - Event::SequenceEnd => { - let node = self.doc_stack.pop().unwrap(); - self.insert_new_node(node); - } - Event::MappingStart(aid, _) => { - self.doc_stack.push((Yaml::Hash(Hash::new()), aid)); - self.key_stack.push(Yaml::BadValue); - } - Event::MappingEnd => { - self.key_stack.pop().unwrap(); - let node = self.doc_stack.pop().unwrap(); - self.insert_new_node(node); - } - Event::Scalar(v, style, aid, tag) => { - let node = if style != TScalarStyle::Plain { - Yaml::String(v) - } else if let Some(Tag { - ref handle, - ref suffix, - }) = tag - { - if handle == "tag:yaml.org,2002:" { - match suffix.as_ref() { - "bool" => { - // "true" or "false" - match v.parse::() { - Err(_) => Yaml::BadValue, - Ok(v) => Yaml::Boolean(v), - } - } - "int" => match v.parse::() { - Err(_) => Yaml::BadValue, - Ok(v) => Yaml::Integer(v), - }, - "float" => match parse_f64(&v) { - Some(_) => Yaml::Real(v), - None => Yaml::BadValue, - }, - "null" => match v.as_ref() { - "~" | "null" => Yaml::Null, - _ => Yaml::BadValue, - }, - _ => Yaml::String(v), - } - } else { - Yaml::String(v) - } - } else { - // Datatype is not specified, or unrecognized - Yaml::from_str(&v) - }; - - self.insert_new_node((node, aid)); - } - Event::Alias(id) => { - let n = match self.anchor_map.get(&id) { - Some(v) => v.clone(), - None => Yaml::BadValue, - }; - self.insert_new_node((n, 0)); - } - } - // println!("DOC {:?}", self.doc_stack); - } -} - -/// An error that happened when loading a YAML document. -#[derive(Debug)] -pub enum LoadError { - /// An I/O error. - IO(std::io::Error), - /// An error within the scanner. This indicates a malformed YAML input. - Scan(ScanError), - /// A decoding error (e.g.: Invalid UTF_8). - Decode(std::borrow::Cow<'static, str>), -} - -impl From for LoadError { - fn from(error: std::io::Error) -> Self { - LoadError::IO(error) - } -} - -impl std::error::Error for LoadError { - fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { - Some(match &self { - LoadError::IO(e) => e, - LoadError::Scan(e) => e, - LoadError::Decode(_) => return None, - }) - } -} - -impl std::fmt::Display for LoadError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - LoadError::IO(e) => e.fmt(f), - LoadError::Scan(e) => e.fmt(f), - LoadError::Decode(e) => e.fmt(f), - } - } -} - -impl YamlLoader { - fn insert_new_node(&mut self, node: (Yaml, usize)) { - // valid anchor id starts from 1 - if node.1 > 0 { - self.anchor_map.insert(node.1, node.0.clone()); - } - if self.doc_stack.is_empty() { - self.doc_stack.push(node); - } else { - let parent = self.doc_stack.last_mut().unwrap(); - match *parent { - (Yaml::Array(ref mut v), _) => v.push(node.0), - (Yaml::Hash(ref mut h), _) => { - let cur_key = self.key_stack.last_mut().unwrap(); - // current node is a key - if cur_key.is_badvalue() { - *cur_key = node.0; - // current node is a value - } else { - let mut newkey = Yaml::BadValue; - mem::swap(&mut newkey, cur_key); - h.insert(newkey, node.0); - } - } - _ => unreachable!(), - } - } - } - - /// Load the given string as a set of YAML documents. - /// - /// The `source` is interpreted as YAML documents and is parsed. Parsing succeeds if and only - /// if all documents are parsed successfully. An error in a latter document prevents the former - /// from being returned. - /// # Errors - /// Returns `ScanError` when loading fails. - pub fn load_from_str(source: &str) -> Result, ScanError> { - Self::load_from_iter(source.chars()) - } - - /// Load the contents of the given iterator as a set of YAML documents. - /// - /// The `source` is interpreted as YAML documents and is parsed. Parsing succeeds if and only - /// if all documents are parsed successfully. An error in a latter document prevents the former - /// from being returned. - /// # Errors - /// Returns `ScanError` when loading fails. - pub fn load_from_iter>(source: I) -> Result, ScanError> { - let mut parser = Parser::new(source); - Self::load_from_parser(&mut parser) - } - - /// Load the contents from the specified Parser as a set of YAML documents. - /// - /// Parsing succeeds if and only if all documents are parsed successfully. - /// An error in a latter document prevents the former from being returned. - /// # Errors - /// Returns `ScanError` when loading fails. - pub fn load_from_parser>( - parser: &mut Parser, - ) -> Result, ScanError> { - let mut loader = YamlLoader::default(); - parser.load(&mut loader, true)?; - Ok(loader.docs) - } - - /// Return a reference to the parsed Yaml documents. - #[must_use] - pub fn documents(&self) -> &[Yaml] { - &self.docs - } -} - -/// The signature of the function to call when using [`YAMLDecodingTrap::Call`]. -/// -/// The arguments are as follows: -/// * `malformation_length`: The length of the sequence the decoder failed to decode. -/// * `bytes_read_after_malformation`: The number of lookahead bytes the decoder consumed after -/// the malformation. -/// * `input_at_malformation`: What the input buffer is at the malformation. -/// This is the buffer starting at the malformation. The first `malformation_length` bytes are -/// the problematic sequence. The following `bytes_read_after_malformation` are already stored -/// in the decoder and will not be re-fed. -/// * `output`: The output string. -/// -/// The function must modify `output` as it feels is best. For instance, one could recreate the -/// behavior of [`YAMLDecodingTrap::Ignore`] with an empty function, [`YAMLDecodingTrap::Replace`] -/// by pushing a `\u{FFFD}` into `output` and [`YAMLDecodingTrap::Strict`] by returning -/// [`ControlFlow::Break`]. -/// -/// # Returns -/// The function must return [`ControlFlow::Continue`] if decoding may continue or -/// [`ControlFlow::Break`] if decoding must be aborted. An optional error string may be supplied. -#[cfg(feature = "encoding")] -pub type YAMLDecodingTrapFn = fn( - malformation_length: u8, - bytes_read_after_malformation: u8, - input_at_malformation: &[u8], - output: &mut String, -) -> ControlFlow>; - -/// The behavior [`YamlDecoder`] must have when an decoding error occurs. -#[cfg(feature = "encoding")] -#[derive(Copy, Clone, PartialEq, Eq)] -pub enum YAMLDecodingTrap { - /// Ignore the offending bytes, remove them from the output. - Ignore, - /// Error out. - Strict, - /// Replace them with the Unicode REPLACEMENT CHARACTER. - Replace, - /// Call the user-supplied function upon decoding malformation. - Call(YAMLDecodingTrapFn), -} - -/// `YamlDecoder` is a `YamlLoader` builder that allows you to supply your own encoding error trap. -/// For example, to read a YAML file while ignoring Unicode decoding errors you can set the -/// `encoding_trap` to `encoding::DecoderTrap::Ignore`. -/// ```rust -/// use saphyr::{YamlDecoder, YAMLDecodingTrap}; -/// -/// let string = b"--- -/// a\xa9: 1 -/// b: 2.2 -/// c: [1, 2] -/// "; -/// let out = YamlDecoder::read(string as &[u8]) -/// .encoding_trap(YAMLDecodingTrap::Ignore) -/// .decode() -/// .unwrap(); -/// ``` -#[cfg(feature = "encoding")] -pub struct YamlDecoder { - source: T, - trap: YAMLDecodingTrap, -} - -#[cfg(feature = "encoding")] -impl YamlDecoder { - /// Create a `YamlDecoder` decoding the given source. - pub fn read(source: T) -> YamlDecoder { - YamlDecoder { - source, - trap: YAMLDecodingTrap::Strict, - } - } - - /// Set the behavior of the decoder when the encoding is invalid. - pub fn encoding_trap(&mut self, trap: YAMLDecodingTrap) -> &mut Self { - self.trap = trap; - self - } - - /// Run the decode operation with the source and trap the `YamlDecoder` was built with. - /// - /// # Errors - /// Returns `LoadError` when decoding fails. - pub fn decode(&mut self) -> Result, LoadError> { - let mut buffer = Vec::new(); - self.source.read_to_end(&mut buffer)?; - - // Check if the `encoding` library can detect encoding from the BOM, otherwise use - // `detect_utf16_endianness`. - let (encoding, _) = - Encoding::for_bom(&buffer).unwrap_or_else(|| (detect_utf16_endianness(&buffer), 2)); - let mut decoder = encoding.new_decoder(); - let mut output = String::new(); - - // Decode the input buffer. - decode_loop(&buffer, &mut output, &mut decoder, self.trap)?; - - YamlLoader::load_from_str(&output).map_err(LoadError::Scan) - } -} - -/// Perform a loop of [`Decoder::decode_to_string`], reallocating `output` if needed. -#[cfg(feature = "encoding")] -fn decode_loop( - input: &[u8], - output: &mut String, - decoder: &mut Decoder, - trap: YAMLDecodingTrap, -) -> Result<(), LoadError> { - output.reserve(input.len()); - let mut total_bytes_read = 0; - - loop { - match decoder.decode_to_string_without_replacement(&input[total_bytes_read..], output, true) - { - // If the input is empty, we processed the whole input. - (DecoderResult::InputEmpty, _) => break Ok(()), - // If the output is full, we must reallocate. - (DecoderResult::OutputFull, bytes_read) => { - total_bytes_read += bytes_read; - // The output is already reserved to the size of the input. We slowly resize. Here, - // we're expecting that 10% of bytes will double in size when converting to UTF-8. - output.reserve(input.len() / 10); - } - (DecoderResult::Malformed(malformed_len, bytes_after_malformed), bytes_read) => { - total_bytes_read += bytes_read; - match trap { - // Ignore (skip over) malformed character. - YAMLDecodingTrap::Ignore => {} - // Replace them with the Unicode REPLACEMENT CHARACTER. - YAMLDecodingTrap::Replace => { - output.push('\u{FFFD}'); - } - // Otherwise error, getting as much context as possible. - YAMLDecodingTrap::Strict => { - let malformed_len = malformed_len as usize; - let bytes_after_malformed = bytes_after_malformed as usize; - let byte_idx = total_bytes_read - (malformed_len + bytes_after_malformed); - let malformed_sequence = &input[byte_idx..byte_idx + malformed_len]; - - break Err(LoadError::Decode(Cow::Owned(format!( - "Invalid character sequence at {byte_idx}: {malformed_sequence:?}", - )))); - } - YAMLDecodingTrap::Call(callback) => { - let byte_idx = - total_bytes_read - ((malformed_len + bytes_after_malformed) as usize); - let malformed_sequence = - &input[byte_idx..byte_idx + malformed_len as usize]; - if let ControlFlow::Break(error) = callback( - malformed_len, - bytes_after_malformed, - &input[byte_idx..], - output, - ) { - if error.is_empty() { - break Err(LoadError::Decode(Cow::Owned(format!( - "Invalid character sequence at {byte_idx}: {malformed_sequence:?}", - )))); - } - break Err(LoadError::Decode(error)); - } - } - } - } - } - } -} - -/// The encoding crate knows how to tell apart UTF-8 from UTF-16LE and utf-16BE, when the -/// bytestream starts with BOM codepoint. -/// However, it doesn't even attempt to guess the UTF-16 endianness of the input bytestream since -/// in the general case the bytestream could start with a codepoint that uses both bytes. -/// -/// The YAML-1.2 spec mandates that the first character of a YAML document is an ASCII character. -/// This allows the encoding to be deduced by the pattern of null (#x00) characters. -// -/// See spec at -#[cfg(feature = "encoding")] -fn detect_utf16_endianness(b: &[u8]) -> &'static Encoding { - if b.len() > 1 && (b[0] != b[1]) { - if b[0] == 0 { - return encoding_rs::UTF_16BE; - } else if b[1] == 0 { - return encoding_rs::UTF_16LE; - } - } - encoding_rs::UTF_8 -} - macro_rules! define_as ( ($name:ident, $t:ident, $yt:ident) => ( /// Get a copy of the inner object in the YAML enum if it is a `$t`. @@ -623,7 +209,7 @@ impl Yaml { } } -#[cfg_attr(feature = "cargo-clippy", allow(clippy::should_implement_trait))] +#[allow(clippy::should_implement_trait)] impl Yaml { /// Convert a string to a [`Yaml`] node. /// @@ -757,96 +343,3 @@ impl Iterator for YamlIter { self.yaml.next() } } - -#[cfg(test)] -mod test { - use super::{YAMLDecodingTrap, Yaml, YamlDecoder}; - - #[test] - fn test_read_bom() { - let s = b"\xef\xbb\xbf--- -a: 1 -b: 2.2 -c: [1, 2] -"; - let out = YamlDecoder::read(s as &[u8]).decode().unwrap(); - let doc = &out[0]; - assert_eq!(doc["a"].as_i64().unwrap(), 1i64); - assert!((doc["b"].as_f64().unwrap() - 2.2f64).abs() <= f64::EPSILON); - assert_eq!(doc["c"][1].as_i64().unwrap(), 2i64); - assert!(doc["d"][0].is_badvalue()); - } - - #[test] - fn test_read_utf16le() { - let s = b"\xff\xfe-\x00-\x00-\x00 -\x00a\x00:\x00 \x001\x00 -\x00b\x00:\x00 \x002\x00.\x002\x00 -\x00c\x00:\x00 \x00[\x001\x00,\x00 \x002\x00]\x00 -\x00"; - let out = YamlDecoder::read(s as &[u8]).decode().unwrap(); - let doc = &out[0]; - println!("GOT: {doc:?}"); - assert_eq!(doc["a"].as_i64().unwrap(), 1i64); - assert!((doc["b"].as_f64().unwrap() - 2.2f64) <= f64::EPSILON); - assert_eq!(doc["c"][1].as_i64().unwrap(), 2i64); - assert!(doc["d"][0].is_badvalue()); - } - - #[test] - fn test_read_utf16be() { - let s = b"\xfe\xff\x00-\x00-\x00-\x00 -\x00a\x00:\x00 \x001\x00 -\x00b\x00:\x00 \x002\x00.\x002\x00 -\x00c\x00:\x00 \x00[\x001\x00,\x00 \x002\x00]\x00 -"; - let out = YamlDecoder::read(s as &[u8]).decode().unwrap(); - let doc = &out[0]; - println!("GOT: {doc:?}"); - assert_eq!(doc["a"].as_i64().unwrap(), 1i64); - assert!((doc["b"].as_f64().unwrap() - 2.2f64).abs() <= f64::EPSILON); - assert_eq!(doc["c"][1].as_i64().unwrap(), 2i64); - assert!(doc["d"][0].is_badvalue()); - } - - #[test] - fn test_read_utf16le_nobom() { - let s = b"-\x00-\x00-\x00 -\x00a\x00:\x00 \x001\x00 -\x00b\x00:\x00 \x002\x00.\x002\x00 -\x00c\x00:\x00 \x00[\x001\x00,\x00 \x002\x00]\x00 -\x00"; - let out = YamlDecoder::read(s as &[u8]).decode().unwrap(); - let doc = &out[0]; - println!("GOT: {doc:?}"); - assert_eq!(doc["a"].as_i64().unwrap(), 1i64); - assert!((doc["b"].as_f64().unwrap() - 2.2f64).abs() <= f64::EPSILON); - assert_eq!(doc["c"][1].as_i64().unwrap(), 2i64); - assert!(doc["d"][0].is_badvalue()); - } - - #[test] - fn test_read_trap() { - let s = b"--- -a\xa9: 1 -b: 2.2 -c: [1, 2] -"; - let out = YamlDecoder::read(s as &[u8]) - .encoding_trap(YAMLDecodingTrap::Ignore) - .decode() - .unwrap(); - let doc = &out[0]; - println!("GOT: {doc:?}"); - assert_eq!(doc["a"].as_i64().unwrap(), 1i64); - assert!((doc["b"].as_f64().unwrap() - 2.2f64).abs() <= f64::EPSILON); - assert_eq!(doc["c"][1].as_i64().unwrap(), 2i64); - assert!(doc["d"][0].is_badvalue()); - } - - #[test] - fn test_or() { - assert_eq!(Yaml::Null.or(Yaml::Integer(3)), Yaml::Integer(3)); - assert_eq!(Yaml::Integer(3).or(Yaml::Integer(7)), Yaml::Integer(3)); - } -} From 425f00ceb84eb7a38eef3c841fab32b2635398cf Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Mon, 10 Jun 2024 22:39:13 +0200 Subject: [PATCH 372/380] Add base support for annotated YAML objects. --- saphyr/README.md | 2 + saphyr/src/annotated.rs | 276 ++++++++++++++++++++++++++++++++++++++++ saphyr/src/lib.rs | 7 +- saphyr/src/macros.rs | 85 +++++++++++++ saphyr/src/yaml.rs | 112 +++------------- 5 files changed, 390 insertions(+), 92 deletions(-) create mode 100644 saphyr/src/annotated.rs create mode 100644 saphyr/src/macros.rs diff --git a/saphyr/README.md b/saphyr/README.md index 1b417b9..c1df782 100644 --- a/saphyr/README.md +++ b/saphyr/README.md @@ -72,6 +72,8 @@ Note that `saphyr::Yaml` implements `Index<&'a str>` and `Index`: * `Index<&'a str>` assumes the container is a string to value map * otherwise, `Yaml::BadValue` is returned +Note that `annotated::YamlData` cannot return `BadValue` and will panic. + If your document does not conform to this convention (e.g. map with complex type key), you can use the `Yaml::as_XXX` family API of functions to access your objects. diff --git a/saphyr/src/annotated.rs b/saphyr/src/annotated.rs new file mode 100644 index 0000000..930e37e --- /dev/null +++ b/saphyr/src/annotated.rs @@ -0,0 +1,276 @@ +//! Utilities for extracting YAML with certain metadata. + +use std::ops::{Index, IndexMut}; + +use hashlink::LinkedHashMap; + +use crate::loader::parse_f64; + +/// A YAML node without annotation. See [`Yaml`], you probably want that. +/// +/// Unlike [`Yaml`] which only supports storing data, [`YamlData`] allows storing metadata +/// alongside the YAML data. It is unlikely one would build it directly; it is mostly intended to +/// be used, for instance, when parsing a YAML where retrieving markers / comments is relevant. +/// +/// This definition is recursive. Each annotated node will be a structure storing the annotations +/// and the YAML data. We need to have a distinct enumeration from [`Yaml`] because the type for +/// the `Array` and `Hash` variants is dependant on that structure. +/// +/// If we had written [`YamlData`] as: +/// ```ignore +/// pub enum YamlData { +/// // ... +/// Array(Vec), +/// Hash(LinkedHashMap), +/// // ... +/// } +/// ``` +/// we would have stored metadata for the root node only. All subsequent nodes would be [`Yaml`], +/// which does not contain any annotation. +/// +/// Notable differences with [`Yaml`]: +/// * Indexing cannot return `BadValue` and will panic instead. +/// +/// [`Yaml`]: crate::Yaml +#[derive(Clone, PartialEq, PartialOrd, Debug, Eq, Ord, Hash)] +pub enum YamlData +where + Node: std::hash::Hash + std::cmp::Eq + From, +{ + /// Float types are stored as String and parsed on demand. + /// Note that `f64` does NOT implement Eq trait and can NOT be stored in `BTreeMap`. + Real(String), + /// YAML int is stored as i64. + Integer(i64), + /// YAML scalar. + String(String), + /// YAML bool, e.g. `true` or `false`. + Boolean(bool), + /// YAML array, can be accessed as a `Vec`. + Array(AnnotatedArray), + /// YAML hash, can be accessed as a `LinkedHashMap`. + /// + /// Insertion order will match the order of insertion into the map. + Hash(AnnotatedHash), + /// Alias, not fully supported yet. + Alias(usize), + /// YAML null, e.g. `null` or `~`. + Null, + /// Accessing a nonexistent node via the Index trait returns `BadValue`. This + /// simplifies error handling in the calling code. Invalid type conversion also + /// returns `BadValue`. + BadValue, +} + +/// The type contained in the [`YamlData::Array`] variant. This corresponds to YAML sequences. +#[allow(clippy::module_name_repetitions)] +pub type AnnotatedArray = Vec; +/// The type contained in the [`YamlData::Hash`] variant. This corresponds to YAML mappings. +#[allow(clippy::module_name_repetitions)] +pub type AnnotatedHash = LinkedHashMap; + +impl YamlData +where + Node: std::hash::Hash + std::cmp::Eq + From, +{ + define_as!(as_bool, bool, Boolean); + define_as!(as_i64, i64, Integer); + + define_as_ref!(as_hash, &AnnotatedHash, Hash); + define_as_ref!(as_str, &str, String); + define_as_ref!(as_vec, &AnnotatedArray, Array); + + define_as_mut_ref!(as_mut_hash, &mut AnnotatedHash, Hash); + define_as_mut_ref!(as_mut_vec, &mut AnnotatedArray, Array); + + define_into!(into_bool, bool, Boolean); + define_into!(into_hash, AnnotatedHash, Hash); + define_into!(into_i64, i64, Integer); + define_into!(into_string, String, String); + define_into!(into_vec, AnnotatedArray, Array); + + define_is!(is_alias, Self::Alias(_)); + define_is!(is_array, Self::Array(_)); + define_is!(is_badvalue, Self::BadValue); + define_is!(is_boolean, Self::Boolean(_)); + define_is!(is_integer, Self::Integer(_)); + define_is!(is_null, Self::Null); + define_is!(is_real, Self::Real(_)); + define_is!(is_string, Self::String(_)); + + /// Return the `f64` value contained in this YAML node. + /// + /// If the node is not a [`YamlData::Real`] YAML node or its contents is not a valid `f64` + /// string, `None` is returned. + #[must_use] + pub fn as_f64(&self) -> Option { + if let Self::Real(ref v) = self { + parse_f64(v) + } else { + None + } + } + + /// Return the `f64` value contained in this YAML node. + /// + /// If the node is not a [`YamlData::Real`] YAML node or its contents is not a valid `f64` + /// string, `None` is returned. + #[must_use] + pub fn into_f64(self) -> Option { + self.as_f64() + } + + /// If a value is null or otherwise bad (see variants), consume it and + /// replace it with a given value `other`. Otherwise, return self unchanged. + /// + /// See [`Yaml::or`] for examples. + /// + /// [`Yaml::or`]: crate::Yaml::or + #[must_use] + pub fn or(self, other: Self) -> Self { + match self { + Self::BadValue | Self::Null => other, + this => this, + } + } + + /// See [`Self::or`] for behavior. + /// + /// This performs the same operations, but with borrowed values for less linear pipelines. + #[must_use] + pub fn borrowed_or<'a>(&'a self, other: &'a Self) -> &'a Self { + match self { + Self::BadValue | Self::Null => other, + this => this, + } + } +} + +// NOTE(ethiraric, 10/06/2024): We cannot create a "generic static" variable which would act as a +// `BAD_VALUE`. This means that, unlike for `Yaml`, we have to make the indexing method panic. + +impl<'a, Node> Index<&'a str> for YamlData +where + Node: std::hash::Hash + std::cmp::Eq + From, +{ + type Output = Node; + + /// Perform indexing if `self` is a mapping. + /// + /// # Panics + /// This function panics if the key given does not exist within `self` (as per [`Index`]). + /// + /// This function also panics if `self` is not a [`YamlData::Hash`]. + fn index(&self, idx: &'a str) -> &Node { + let key = Self::String(idx.to_owned()); + match self.as_hash() { + Some(h) => h.get(&key.into()).unwrap(), + None => panic!("{idx}: key does not exist"), + } + } +} + +impl<'a, Node> IndexMut<&'a str> for YamlData +where + Node: std::hash::Hash + std::cmp::Eq + From, +{ + /// Perform indexing if `self` is a mapping. + /// + /// # Panics + /// This function panics if the key given does not exist within `self` (as per [`Index`]). + /// + /// This function also panics if `self` is not a [`YamlData::Hash`]. + fn index_mut(&mut self, idx: &'a str) -> &mut Node { + let key = Self::String(idx.to_owned()); + match self.as_mut_hash() { + Some(h) => h.get_mut(&key.into()).unwrap(), + None => panic!("Not a hash type"), + } + } +} + +impl Index for YamlData +where + Node: std::hash::Hash + std::cmp::Eq + From, +{ + type Output = Node; + + /// Perform indexing if `self` is a sequence or a mapping. + /// + /// # Panics + /// This function panics if the index given is out of range (as per [`Index`]). If `self` is a + /// [`YamlData::Array`], this is when the index is bigger or equal to the length of the + /// underlying `Vec`. If `self` is a [`YamlData::Hash`], this is when the mapping sequence does + /// not contain [`YamlData::Integer`]`(idx)` as a key. + /// + /// This function also panics if `self` is not a [`YamlData::Array`] nor a [`YamlData::Hash`]. + fn index(&self, idx: usize) -> &Node { + if let Some(v) = self.as_vec() { + v.get(idx).unwrap() + } else if let Some(v) = self.as_hash() { + let key = Self::Integer(i64::try_from(idx).unwrap()); + v.get(&key.into()).unwrap() + } else { + panic!("{idx}: Index out of bounds"); + } + } +} + +impl IndexMut for YamlData +where + Node: std::hash::Hash + std::cmp::Eq + From, +{ + /// Perform indexing if `self` is a sequence or a mapping. + /// + /// # Panics + /// This function panics if the index given is out of range (as per [`IndexMut`]). If `self` is + /// a [`YamlData::Array`], this is when the index is bigger or equal to the length of the + /// underlying `Vec`. If `self` is a [`YamlData::Hash`], this is when the mapping sequence does + /// not contain [`YamlData::Integer`]`(idx)` as a key. + /// + /// This function also panics if `self` is not a [`YamlData::Array`] nor a [`YamlData::Hash`]. + fn index_mut(&mut self, idx: usize) -> &mut Node { + match self { + Self::Array(sequence) => sequence.index_mut(idx), + Self::Hash(mapping) => { + let key = Self::Integer(i64::try_from(idx).unwrap()); + mapping.get_mut(&key.into()).unwrap() + } + _ => panic!("Attempting to index but `self` is not a sequence nor a mapping"), + } + } +} + +impl IntoIterator for YamlData +where + Node: std::hash::Hash + std::cmp::Eq + From, +{ + type Item = Node; + type IntoIter = AnnotatedYamlIter; + + fn into_iter(self) -> Self::IntoIter { + Self::IntoIter { + yaml: self.into_vec().unwrap_or_default().into_iter(), + } + } +} + +/// An iterator over a [`YamlData`] node. +#[allow(clippy::module_name_repetitions)] +pub struct AnnotatedYamlIter +where + Node: std::hash::Hash + std::cmp::Eq + From>, +{ + yaml: std::vec::IntoIter, +} + +impl Iterator for AnnotatedYamlIter +where + Node: std::hash::Hash + std::cmp::Eq + From>, +{ + type Item = Node; + + fn next(&mut self) -> Option { + self.yaml.next() + } +} diff --git a/saphyr/src/lib.rs b/saphyr/src/lib.rs index ede027b..43cd0b8 100644 --- a/saphyr/src/lib.rs +++ b/saphyr/src/lib.rs @@ -43,15 +43,20 @@ #![warn(missing_docs, clippy::pedantic)] +#[macro_use] +mod macros; + +mod annotated; mod char_traits; mod emitter; mod loader; mod yaml; // Re-export main components. +pub use crate::annotated::{AnnotatedArray, AnnotatedHash, YamlData}; pub use crate::emitter::YamlEmitter; pub use crate::loader::YamlLoader; -pub use crate::yaml::{Array, Hash, Yaml}; +pub use crate::yaml::{Array, Hash, Yaml, YamlIter}; #[cfg(feature = "encoding")] mod encoding; diff --git a/saphyr/src/macros.rs b/saphyr/src/macros.rs new file mode 100644 index 0000000..a455736 --- /dev/null +++ b/saphyr/src/macros.rs @@ -0,0 +1,85 @@ +/// Generate `as_TYPE` methods for the [`crate::Yaml`] enum. +macro_rules! define_as ( + ($fn_name:ident, $t:ident, $variant:ident) => ( +/// Get a copy of the inner object in the YAML enum if it is a `$t`. +/// +/// # Return +/// If the variant of `self` is `Self::$variant`, return `Some($t)` with a copy of the `$t` contained. +/// Otherwise, return `None`. +#[must_use] +pub fn $fn_name(&self) -> Option<$t> { + match *self { + Self::$variant(v) => Some(v), + _ => None + } +} + ); +); + +/// Generate `as_TYPE` methods for the [`crate::Yaml`] enum, returning references. +macro_rules! define_as_ref ( + ($fn_name:ident, $t:ty, $variant:ident) => ( +/// Get a reference to the inner object in the YAML enum if it is a `$t`. +/// +/// # Return +/// If the variant of `self` is `Self::$variant`, return `Some(&$t)` with the `$t` contained. Otherwise, +/// return `None`. +#[must_use] +pub fn $fn_name(&self) -> Option<$t> { + match *self { + Self::$variant(ref v) => Some(v), + _ => None + } +} + ); +); + +/// Generate `as_TYPE` methods for the [`crate::Yaml`] enum, returning mutable references. +macro_rules! define_as_mut_ref ( + ($fn_name:ident, $t:ty, $variant:ident) => ( +/// Get a mutable reference to the inner object in the YAML enum if it is a `$t`. +/// +/// # Return +/// If the variant of `self` is `Self::$variant`, return `Some(&mut $t)` with the `$t` contained. +/// Otherwise, return `None`. +#[must_use] +pub fn $fn_name(&mut self) -> Option<$t> { + match *self { + Self::$variant(ref mut v) => Some(v), + _ => None + } +} + ); +); + +/// Generate `into_TYPE` methods for the [`crate::Yaml`] enum. +macro_rules! define_into ( + ($fn_name:ident, $t:ty, $variant:ident) => ( +/// Get the inner object in the YAML enum if it is a `$t`. +/// +/// # Return +/// If the variant of `self` is `Self::$variant`, return `Some($t)` with the `$t` contained. Otherwise, +/// return `None`. +#[must_use] +pub fn $fn_name(self) -> Option<$t> { + match self { + Self::$variant(v) => Some(v), + _ => None + } +} + ); +); + +/// Generate `is_TYPE` methods for the [`crate::Yaml`] enum. +macro_rules! define_is ( + ($fn_name:ident, $variant:pat) => ( +/// Check whether the YAML enum contains the given variant. +/// +/// # Return +/// If the variant of `self` is `Self::$variant`, return `true`. Otherwise, return `False`. +#[must_use] +pub fn $fn_name(&self) -> bool { + matches!(self, $variant) +} + ); +); diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index acd8f68..5ac883f 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -56,108 +56,31 @@ pub type Array = Vec; /// The type contained in the `Yaml::Hash` variant. This corresponds to YAML mappings. pub type Hash = LinkedHashMap; -macro_rules! define_as ( - ($name:ident, $t:ident, $yt:ident) => ( -/// Get a copy of the inner object in the YAML enum if it is a `$t`. -/// -/// # Return -/// If the variant of `self` is `Yaml::$yt`, return `Some($t)` with a copy of the `$t` contained. -/// Otherwise, return `None`. -#[must_use] -pub fn $name(&self) -> Option<$t> { - match *self { - Yaml::$yt(v) => Some(v), - _ => None - } -} - ); -); - -macro_rules! define_as_ref ( - ($name:ident, $t:ty, $yt:ident) => ( -/// Get a reference to the inner object in the YAML enum if it is a `$t`. -/// -/// # Return -/// If the variant of `self` is `Yaml::$yt`, return `Some(&$t)` with the `$t` contained. Otherwise, -/// return `None`. -#[must_use] -pub fn $name(&self) -> Option<$t> { - match *self { - Yaml::$yt(ref v) => Some(v), - _ => None - } -} - ); -); - -macro_rules! define_as_mut_ref ( - ($name:ident, $t:ty, $yt:ident) => ( -/// Get a mutable reference to the inner object in the YAML enum if it is a `$t`. -/// -/// # Return -/// If the variant of `self` is `Yaml::$yt`, return `Some(&mut $t)` with the `$t` contained. -/// Otherwise, return `None`. -#[must_use] -pub fn $name(&mut self) -> Option<$t> { - match *self { - Yaml::$yt(ref mut v) => Some(v), - _ => None - } -} - ); -); - -macro_rules! define_into ( - ($name:ident, $t:ty, $yt:ident) => ( -/// Get the inner object in the YAML enum if it is a `$t`. -/// -/// # Return -/// If the variant of `self` is `Yaml::$yt`, return `Some($t)` with the `$t` contained. Otherwise, -/// return `None`. -#[must_use] -pub fn $name(self) -> Option<$t> { - match self { - Yaml::$yt(v) => Some(v), - _ => None - } -} - ); -); - impl Yaml { define_as!(as_bool, bool, Boolean); define_as!(as_i64, i64, Integer); - define_as_ref!(as_str, &str, String); define_as_ref!(as_hash, &Hash, Hash); + define_as_ref!(as_str, &str, String); define_as_ref!(as_vec, &Array, Array); define_as_mut_ref!(as_mut_hash, &mut Hash, Hash); define_as_mut_ref!(as_mut_vec, &mut Array, Array); define_into!(into_bool, bool, Boolean); + define_into!(into_hash, Hash, Hash); define_into!(into_i64, i64, Integer); define_into!(into_string, String, String); - define_into!(into_hash, Hash, Hash); define_into!(into_vec, Array, Array); - /// Return whether `self` is a [`Yaml::Null`] node. - #[must_use] - pub fn is_null(&self) -> bool { - matches!(*self, Yaml::Null) - } - - /// Return whether `self` is a [`Yaml::BadValue`] node. - #[must_use] - pub fn is_badvalue(&self) -> bool { - matches!(*self, Yaml::BadValue) - } - - /// Return whether `self` is a [`Yaml::Array`] node. - #[must_use] - pub fn is_array(&self) -> bool { - matches!(*self, Yaml::Array(_)) - } + define_is!(is_alias, Self::Alias(_)); + define_is!(is_array, Self::Array(_)); + define_is!(is_badvalue, Self::BadValue); + define_is!(is_boolean, Self::Boolean(_)); + define_is!(is_integer, Self::Integer(_)); + define_is!(is_null, Self::Null); + define_is!(is_real, Self::Real(_)); + define_is!(is_string, Self::String(_)); /// Return the `f64` value contained in this YAML node. /// @@ -198,8 +121,9 @@ impl Yaml { } } - /// See `or` for behavior. This performs the same operations, but with - /// borrowed values for less linear pipelines. + /// See [`Self::or`] for behavior. + /// + /// This performs the same operations, but with borrowed values for less linear pipelines. #[must_use] pub fn borrowed_or<'a>(&'a self, other: &'a Self) -> &'a Self { match self { @@ -274,6 +198,12 @@ impl<'a> Index<&'a str> for Yaml { } impl<'a> IndexMut<&'a str> for Yaml { + /// Perform indexing if `self` is a mapping. + /// + /// # Panics + /// This function panics if the key given does not exist within `self` (as per [`Index`]). + /// + /// This function also panics if `self` is not a [`Yaml::Hash`]. fn index_mut(&mut self, idx: &'a str) -> &mut Yaml { let key = Yaml::String(idx.to_owned()); match self.as_mut_hash() { @@ -302,9 +232,9 @@ impl IndexMut for Yaml { /// Perform indexing if `self` is a sequence or a mapping. /// /// # Panics - /// This function panics if the index given is out of range (as per [`IndexMut`]). If `self` i + /// This function panics if the index given is out of range (as per [`IndexMut`]). If `self` is /// a [`Yaml::Array`], this is when the index is bigger or equal to the length of the - /// underlying `Vec`. If `self` is a [`Yaml::Hash`], this is when the mapping sequence does no + /// underlying `Vec`. If `self` is a [`Yaml::Hash`], this is when the mapping sequence does not /// contain [`Yaml::Integer`]`(idx)` as a key. /// /// This function also panics if `self` is not a [`Yaml::Array`] nor a [`Yaml::Hash`]. From d2caaf2ab310f4466488ea6f1a6eb1f08abb8b50 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Thu, 13 Jun 2024 18:30:03 +0200 Subject: [PATCH 373/380] Prepare the ground for annotated parsing. * Make `YamlLoader` generic on the type of the `Node`. This is required because deeper node need to have annotations too. * Add a `LoadableYamlNode` trait, required for YAML node types to be loaded by `YamlLoader`. It contains methods required by `YamlLoader` during loading. * Implement `LoadableYamlNode` for `Yaml`. * Take `load_from_str` out of `YamlLoader` for parsing non-annotated nodes. This avoids every user to specify the generics in `YamlLoader::::load_from_str`. --- saphyr/CHANGELOG.md | 9 +- saphyr/examples/dump_yaml.rs | 4 +- saphyr/src/emitter.rs | 11 +- saphyr/src/encoding.rs | 4 +- saphyr/src/lib.rs | 8 +- saphyr/src/loader.rs | 250 +++++++++++++++++++++----------- saphyr/tests/basic.rs | 18 +-- saphyr/tests/emitter.rs | 22 +-- saphyr/tests/quickcheck.rs | 4 +- saphyr/tests/spec_test.rs | 4 +- saphyr/tests/test_round_trip.rs | 15 +- 11 files changed, 218 insertions(+), 131 deletions(-) diff --git a/saphyr/CHANGELOG.md b/saphyr/CHANGELOG.md index 4e3ebd2..b3ac065 100644 --- a/saphyr/CHANGELOG.md +++ b/saphyr/CHANGELOG.md @@ -2,7 +2,14 @@ ## Upcoming -**Features** +**Breaking Changes**: +- Move `load_from_*` methods out of the `YamlLoader`. Now, `YamlLoader` gained + a generic parameter. Moving those functions out of it spares having to + manually specify the generic in `YamlLoader::::load_from_str`. + Manipulating the `YamlLoader` directly was not common. + + +**Features**: - ([#19](https://github.com/Ethiraric/yaml-rust2/pull/19)) `Yaml` now implements `IndexMut` and `IndexMut<&'a str>`. These functions may not diff --git a/saphyr/examples/dump_yaml.rs b/saphyr/examples/dump_yaml.rs index 8d85d7e..34e41ee 100644 --- a/saphyr/examples/dump_yaml.rs +++ b/saphyr/examples/dump_yaml.rs @@ -1,4 +1,4 @@ -use saphyr::{Yaml, YamlLoader}; +use saphyr::{load_from_str, Yaml}; use std::env; use std::fs::File; use std::io::prelude::*; @@ -36,7 +36,7 @@ fn main() { let mut s = String::new(); f.read_to_string(&mut s).unwrap(); - let docs = YamlLoader::load_from_str(&s).unwrap(); + let docs = load_from_str(&s).unwrap(); for doc in &docs { println!("---"); dump_node(doc, 0); diff --git a/saphyr/src/emitter.rs b/saphyr/src/emitter.rs index 19d8d4a..8a7be40 100644 --- a/saphyr/src/emitter.rs +++ b/saphyr/src/emitter.rs @@ -36,9 +36,9 @@ impl From for EmitError { /// The YAML serializer. /// /// ``` -/// # use saphyr::{YamlLoader, YamlEmitter}; +/// # use saphyr::{load_from_str, YamlEmitter}; /// let input_string = "a: b\nc: d"; -/// let yaml = YamlLoader::load_from_str(input_string).unwrap(); +/// let yaml = load_from_str(input_string).unwrap(); /// /// let mut output = String::new(); /// YamlEmitter::new(&mut output).dump(&yaml[0]).unwrap(); @@ -159,10 +159,10 @@ impl<'a> YamlEmitter<'a> { /// # Examples /// /// ```rust - /// use saphyr::{Yaml, YamlEmitter, YamlLoader}; + /// use saphyr::{Yaml, YamlEmitter, load_from_str}; /// /// let input = r#"{foo: "bar!\nbar!", baz: 42}"#; - /// let parsed = YamlLoader::load_from_str(input).unwrap(); + /// let parsed = load_from_str(input).unwrap(); /// eprintln!("{:?}", parsed); /// /// let mut output = String::new(); @@ -410,12 +410,11 @@ fn need_quotes(string: &str) -> bool { #[cfg(test)] mod test { use super::YamlEmitter; - use crate::YamlLoader; #[test] fn test_multiline_string() { let input = r#"{foo: "bar!\nbar!", baz: 42}"#; - let parsed = YamlLoader::load_from_str(input).unwrap(); + let parsed = crate::load_from_str(input).unwrap(); let mut output = String::new(); let mut emitter = YamlEmitter::new(&mut output); emitter.multiline_strings(true); diff --git a/saphyr/src/encoding.rs b/saphyr/src/encoding.rs index 6d46dd3..17dcb69 100644 --- a/saphyr/src/encoding.rs +++ b/saphyr/src/encoding.rs @@ -4,7 +4,7 @@ use std::{borrow::Cow, ops::ControlFlow}; use encoding_rs::{Decoder, DecoderResult, Encoding}; -use crate::{loader::LoadError, Yaml, YamlLoader}; +use crate::{loader::LoadError, Yaml}; /// The signature of the function to call when using [`YAMLDecodingTrap::Call`]. /// @@ -102,7 +102,7 @@ impl YamlDecoder { // Decode the input buffer. decode_loop(&buffer, &mut output, &mut decoder, self.trap)?; - YamlLoader::load_from_str(&output).map_err(LoadError::Scan) + crate::load_from_str(&output).map_err(LoadError::Scan) } } diff --git a/saphyr/src/lib.rs b/saphyr/src/lib.rs index 43cd0b8..140f50f 100644 --- a/saphyr/src/lib.rs +++ b/saphyr/src/lib.rs @@ -21,9 +21,9 @@ //! Parse a string into `Vec` and then serialize it as a YAML string. //! //! ``` -//! use saphyr::{YamlLoader, YamlEmitter}; +//! use saphyr::{load_from_str, YamlEmitter}; //! -//! let docs = YamlLoader::load_from_str("[1, 2, 3]").unwrap(); +//! let docs = load_from_str("[1, 2, 3]").unwrap(); //! let doc = &docs[0]; // select the first YAML document //! assert_eq!(doc[0].as_i64().unwrap(), 1); // access elements by index //! @@ -55,7 +55,9 @@ mod yaml; // Re-export main components. pub use crate::annotated::{AnnotatedArray, AnnotatedHash, YamlData}; pub use crate::emitter::YamlEmitter; -pub use crate::loader::YamlLoader; +pub use crate::loader::{ + load_from_iter, load_from_parser, load_from_str, LoadableYamlNode, YamlLoader, +}; pub use crate::yaml::{Array, Hash, Yaml, YamlIter}; #[cfg(feature = "encoding")] diff --git a/saphyr/src/loader.rs b/saphyr/src/loader.rs index f2706bb..365cb21 100644 --- a/saphyr/src/loader.rs +++ b/saphyr/src/loader.rs @@ -2,26 +2,84 @@ use std::collections::BTreeMap; +use hashlink::LinkedHashMap; use saphyr_parser::{Event, MarkedEventReceiver, Marker, Parser, ScanError, TScalarStyle, Tag}; use crate::{Hash, Yaml}; -/// Main structure for quickly parsing YAML. +/// Load the given string as a set of YAML documents. /// -/// See [`YamlLoader::load_from_str`]. -#[derive(Default)] -#[allow(clippy::module_name_repetitions)] -pub struct YamlLoader { - /// The different YAML documents that are loaded. - docs: Vec, - // states - // (current node, anchor_id) tuple - doc_stack: Vec<(Yaml, usize)>, - key_stack: Vec, - anchor_map: BTreeMap, +/// The `source` is interpreted as YAML documents and is parsed. Parsing succeeds if and only +/// if all documents are parsed successfully. An error in a latter document prevents the former +/// from being returned. +/// # Errors +/// Returns `ScanError` when loading fails. +pub fn load_from_str(source: &str) -> Result, ScanError> { + load_from_iter(source.chars()) } -impl MarkedEventReceiver for YamlLoader { +/// Load the contents of the given iterator as a set of YAML documents. +/// +/// The `source` is interpreted as YAML documents and is parsed. Parsing succeeds if and only +/// if all documents are parsed successfully. An error in a latter document prevents the former +/// from being returned. +/// # Errors +/// Returns `ScanError` when loading fails. +pub fn load_from_iter>(source: I) -> Result, ScanError> { + let mut parser = Parser::new(source); + load_from_parser(&mut parser) +} + +/// Load the contents from the specified Parser as a set of YAML documents. +/// +/// Parsing succeeds if and only if all documents are parsed successfully. +/// An error in a latter document prevents the former from being returned. +/// # Errors +/// Returns `ScanError` when loading fails. +pub fn load_from_parser>( + parser: &mut Parser, +) -> Result, ScanError> { + let mut loader = YamlLoader::default(); + parser.load(&mut loader, true)?; + Ok(loader.docs) +} + +/// Main structure for quickly parsing YAML. +/// +/// See [`load_from_str`]. +#[allow(clippy::module_name_repetitions)] +pub struct YamlLoader +where + Node: LoadableYamlNode, +{ + /// The different YAML documents that are loaded. + docs: Vec, + // states + // (current node, anchor_id) tuple + doc_stack: Vec<(Node, usize)>, + key_stack: Vec, + anchor_map: BTreeMap, +} + +// For some reason, rustc wants `Node: Default` if I `#[derive(Default)]`. +impl Default for YamlLoader +where + Node: LoadableYamlNode, +{ + fn default() -> Self { + Self { + docs: vec![], + doc_stack: vec![], + key_stack: vec![], + anchor_map: BTreeMap::new(), + } + } +} + +impl MarkedEventReceiver for YamlLoader +where + Node: LoadableYamlNode, +{ fn on_event(&mut self, ev: Event, _: Marker) { // println!("EV {:?}", ev); match ev { @@ -31,21 +89,21 @@ impl MarkedEventReceiver for YamlLoader { Event::DocumentEnd => { match self.doc_stack.len() { // empty document - 0 => self.docs.push(Yaml::BadValue), + 0 => self.docs.push(Yaml::BadValue.into()), 1 => self.docs.push(self.doc_stack.pop().unwrap().0), _ => unreachable!(), } } Event::SequenceStart(aid, _) => { - self.doc_stack.push((Yaml::Array(Vec::new()), aid)); + self.doc_stack.push((Yaml::Array(Vec::new()).into(), aid)); } Event::SequenceEnd => { let node = self.doc_stack.pop().unwrap(); self.insert_new_node(node); } Event::MappingStart(aid, _) => { - self.doc_stack.push((Yaml::Hash(Hash::new()), aid)); - self.key_stack.push(Yaml::BadValue); + self.doc_stack.push((Yaml::Hash(Hash::new()).into(), aid)); + self.key_stack.push(Yaml::BadValue.into()); } Event::MappingEnd => { self.key_stack.pop().unwrap(); @@ -91,17 +149,47 @@ impl MarkedEventReceiver for YamlLoader { Yaml::from_str(&v) }; - self.insert_new_node((node, aid)); + self.insert_new_node((node.into(), aid)); } Event::Alias(id) => { let n = match self.anchor_map.get(&id) { Some(v) => v.clone(), - None => Yaml::BadValue, + None => Yaml::BadValue.into(), }; self.insert_new_node((n, 0)); } } - // println!("DOC {:?}", self.doc_stack); + } +} + +impl YamlLoader +where + Node: LoadableYamlNode, +{ + fn insert_new_node(&mut self, node: (Node, usize)) { + // valid anchor id starts from 1 + if node.1 > 0 { + self.anchor_map.insert(node.1, node.0.clone()); + } + if self.doc_stack.is_empty() { + self.doc_stack.push(node); + } else { + let parent = self.doc_stack.last_mut().unwrap(); + let parent_node = &mut parent.0; + if parent_node.is_array() { + parent_node.array_mut().push(node.0); + } else if parent_node.is_hash() { + let cur_key = self.key_stack.last_mut().unwrap(); + // current node is a key + if cur_key.is_badvalue() { + *cur_key = node.0; + // current node is a value + } else { + let hash = parent_node.hash_mut(); + hash.insert(cur_key.take(), node.0); + } + } + } } } @@ -142,76 +230,70 @@ impl std::fmt::Display for LoadError { } } -impl YamlLoader { - fn insert_new_node(&mut self, node: (Yaml, usize)) { - // valid anchor id starts from 1 - if node.1 > 0 { - self.anchor_map.insert(node.1, node.0.clone()); - } - if self.doc_stack.is_empty() { - self.doc_stack.push(node); - } else { - let parent = self.doc_stack.last_mut().unwrap(); - match *parent { - (Yaml::Array(ref mut v), _) => v.push(node.0), - (Yaml::Hash(ref mut h), _) => { - let cur_key = self.key_stack.last_mut().unwrap(); - // current node is a key - if cur_key.is_badvalue() { - *cur_key = node.0; - // current node is a value - } else { - let mut newkey = Yaml::BadValue; - std::mem::swap(&mut newkey, cur_key); - h.insert(newkey, node.0); - } - } - _ => unreachable!(), - } - } - } +/// A trait providing methods used by the [`YamlLoader`]. +/// +/// This trait must be implemented on YAML node types (i.e.: [`Yaml`] and annotated YAML nodes). It +/// provides the necessary methods for [`YamlLoader`] to load data into the node. +pub trait LoadableYamlNode: From + Clone + std::hash::Hash + Eq { + /// Return whether the YAML node is an array. + fn is_array(&self) -> bool; - /// Load the given string as a set of YAML documents. + /// Return whether the YAML node is a hash. + fn is_hash(&self) -> bool; + + /// Return whether the YAML node is `BadValue`. + fn is_badvalue(&self) -> bool; + + /// Retrieve the array variant of the YAML node. /// - /// The `source` is interpreted as YAML documents and is parsed. Parsing succeeds if and only - /// if all documents are parsed successfully. An error in a latter document prevents the former - /// from being returned. - /// # Errors - /// Returns `ScanError` when loading fails. - pub fn load_from_str(source: &str) -> Result, ScanError> { - Self::load_from_iter(source.chars()) - } + /// # Panics + /// This function panics if `self` is not an array. + fn array_mut(&mut self) -> &mut Vec; - /// Load the contents of the given iterator as a set of YAML documents. + /// Retrieve the hash variant of the YAML node. /// - /// The `source` is interpreted as YAML documents and is parsed. Parsing succeeds if and only - /// if all documents are parsed successfully. An error in a latter document prevents the former - /// from being returned. - /// # Errors - /// Returns `ScanError` when loading fails. - pub fn load_from_iter>(source: I) -> Result, ScanError> { - let mut parser = Parser::new(source); - Self::load_from_parser(&mut parser) - } + /// # Panics + /// This function panics if `self` is not a hash. + fn hash_mut(&mut self) -> &mut LinkedHashMap; - /// Load the contents from the specified Parser as a set of YAML documents. - /// - /// Parsing succeeds if and only if all documents are parsed successfully. - /// An error in a latter document prevents the former from being returned. - /// # Errors - /// Returns `ScanError` when loading fails. - pub fn load_from_parser>( - parser: &mut Parser, - ) -> Result, ScanError> { - let mut loader = YamlLoader::default(); - parser.load(&mut loader, true)?; - Ok(loader.docs) - } - - /// Return a reference to the parsed Yaml documents. + /// Take the contained node out of `Self`, leaving a `BadValue` in its place. #[must_use] - pub fn documents(&self) -> &[Yaml] { - &self.docs + fn take(&mut self) -> Self; +} + +impl LoadableYamlNode for Yaml { + fn is_array(&self) -> bool { + matches!(self, Yaml::Array(_)) + } + + fn is_hash(&self) -> bool { + matches!(self, Yaml::Hash(_)) + } + + fn is_badvalue(&self) -> bool { + matches!(self, Yaml::BadValue) + } + + fn array_mut(&mut self) -> &mut Vec { + if let Yaml::Array(x) = self { + x + } else { + panic!("Called array_mut on a non-array"); + } + } + + fn hash_mut(&mut self) -> &mut LinkedHashMap { + if let Yaml::Hash(x) = self { + x + } else { + panic!("Called hash_mut on a non-hash"); + } + } + + fn take(&mut self) -> Self { + let mut taken_out = Yaml::BadValue; + std::mem::swap(&mut taken_out, self); + taken_out } } diff --git a/saphyr/tests/basic.rs b/saphyr/tests/basic.rs index cc00cb0..6a20c4d 100644 --- a/saphyr/tests/basic.rs +++ b/saphyr/tests/basic.rs @@ -1,7 +1,7 @@ #![allow(clippy::bool_assert_comparison)] #![allow(clippy::float_cmp)] -use saphyr::{Yaml, YamlEmitter, YamlLoader}; +use saphyr::{load_from_str, Yaml, YamlEmitter}; #[test] fn test_api() { @@ -29,7 +29,7 @@ fn test_api() { - name: Staff damage: 3 "; - let docs = YamlLoader::load_from_str(s).unwrap(); + let docs = load_from_str(s).unwrap(); let doc = &docs[0]; assert_eq!(doc[0]["name"].as_str().unwrap(), "Ogre"); @@ -50,7 +50,7 @@ a: 1 b: 2.2 c: [1, 2] "; - let out = YamlLoader::load_from_str(s).unwrap(); + let out = load_from_str(s).unwrap(); let doc = &out[0]; assert_eq!(doc["a"].as_i64().unwrap(), 1i64); assert_eq!(doc["b"].as_f64().unwrap(), 2.2f64); @@ -66,7 +66,7 @@ a1: &DEFAULT b2: d a2: *DEFAULT "; - let out = YamlLoader::load_from_str(s).unwrap(); + let out = load_from_str(s).unwrap(); let doc = &out[0]; assert_eq!(doc["a2"]["b1"].as_i64().unwrap(), 4); } @@ -78,7 +78,7 @@ a1: &DEFAULT b1: 4 b2: *DEFAULT "; - let out = YamlLoader::load_from_str(s).unwrap(); + let out = load_from_str(s).unwrap(); let doc = &out[0]; assert_eq!(doc["a1"]["b2"], Yaml::BadValue); } @@ -114,7 +114,7 @@ fn test_plain_datatype() { - +12345 - [ true, false ] "; - let out = YamlLoader::load_from_str(s).unwrap(); + let out = load_from_str(s).unwrap(); let doc = &out[0]; assert_eq!(doc[0].as_str().unwrap(), "string"); @@ -171,7 +171,7 @@ fn test_plain_datatype_with_into_methods() { - .NAN - !!float .INF "; - let mut out = YamlLoader::load_from_str(s).unwrap().into_iter(); + let mut out = load_from_str(s).unwrap().into_iter(); let mut doc = out.next().unwrap().into_iter(); assert_eq!(doc.next().unwrap().into_string().unwrap(), "string"); @@ -203,7 +203,7 @@ b: ~ a: ~ c: ~ "; - let out = YamlLoader::load_from_str(s).unwrap(); + let out = load_from_str(s).unwrap(); let first = out.into_iter().next().unwrap(); let mut iter = first.into_hash().unwrap().into_iter(); assert_eq!( @@ -229,7 +229,7 @@ fn test_integer_key() { 1: important: false "; - let out = YamlLoader::load_from_str(s).unwrap(); + let out = load_from_str(s).unwrap(); let first = out.into_iter().next().unwrap(); assert_eq!(first[0]["important"].as_bool().unwrap(), true); } diff --git a/saphyr/tests/emitter.rs b/saphyr/tests/emitter.rs index 53e558f..142713e 100644 --- a/saphyr/tests/emitter.rs +++ b/saphyr/tests/emitter.rs @@ -1,4 +1,4 @@ -use saphyr::{YamlEmitter, YamlLoader}; +use saphyr::{load_from_str, YamlEmitter}; #[allow(clippy::similar_names)] #[test] @@ -16,7 +16,7 @@ a4: - 2 "; - let docs = YamlLoader::load_from_str(s).unwrap(); + let docs = load_from_str(s).unwrap(); let doc = &docs[0]; let mut writer = String::new(); { @@ -25,7 +25,7 @@ a4: } println!("original:\n{s}"); println!("emitted:\n{writer}"); - let docs_new = match YamlLoader::load_from_str(&writer) { + let docs_new = match load_from_str(&writer) { Ok(y) => y, Err(e) => panic!("{}", e), }; @@ -55,14 +55,14 @@ products: {}: empty hash key "; - let docs = YamlLoader::load_from_str(s).unwrap(); + let docs = load_from_str(s).unwrap(); let doc = &docs[0]; let mut writer = String::new(); { let mut emitter = YamlEmitter::new(&mut writer); emitter.dump(doc).unwrap(); } - let docs_new = match YamlLoader::load_from_str(&writer) { + let docs_new = match load_from_str(&writer) { Ok(y) => y, Err(e) => panic!("{}", e), }; @@ -106,7 +106,7 @@ x: test y: avoid quoting here z: string with spaces"#; - let docs = YamlLoader::load_from_str(s).unwrap(); + let docs = load_from_str(s).unwrap(); let doc = &docs[0]; let mut writer = String::new(); { @@ -164,7 +164,7 @@ null0: ~ bool0: true bool1: false"#; - let docs = YamlLoader::load_from_str(input).unwrap(); + let docs = load_from_str(input).unwrap(); let doc = &docs[0]; let mut writer = String::new(); { @@ -212,7 +212,7 @@ e: h: []" }; - let docs = YamlLoader::load_from_str(s).unwrap(); + let docs = load_from_str(s).unwrap(); let doc = &docs[0]; let mut writer = String::new(); { @@ -234,7 +234,7 @@ a: - - e - f"; - let docs = YamlLoader::load_from_str(s).unwrap(); + let docs = load_from_str(s).unwrap(); let doc = &docs[0]; let mut writer = String::new(); { @@ -258,7 +258,7 @@ a: - - f - - e"; - let docs = YamlLoader::load_from_str(s).unwrap(); + let docs = load_from_str(s).unwrap(); let doc = &docs[0]; let mut writer = String::new(); { @@ -280,7 +280,7 @@ a: d: e: f"; - let docs = YamlLoader::load_from_str(s).unwrap(); + let docs = load_from_str(s).unwrap(); let doc = &docs[0]; let mut writer = String::new(); { diff --git a/saphyr/tests/quickcheck.rs b/saphyr/tests/quickcheck.rs index 819d064..7c91601 100644 --- a/saphyr/tests/quickcheck.rs +++ b/saphyr/tests/quickcheck.rs @@ -3,7 +3,7 @@ extern crate quickcheck; use quickcheck::TestResult; -use saphyr::{Yaml, YamlEmitter, YamlLoader}; +use saphyr::{load_from_str, Yaml, YamlEmitter}; quickcheck! { fn test_check_weird_keys(xs: Vec) -> TestResult { @@ -13,7 +13,7 @@ quickcheck! { let mut emitter = YamlEmitter::new(&mut out_str); emitter.dump(&input).unwrap(); } - match YamlLoader::load_from_str(&out_str) { + match load_from_str(&out_str) { Ok(output) => TestResult::from_bool(output.len() == 1 && input == output[0]), Err(err) => TestResult::error(err.to_string()), } diff --git a/saphyr/tests/spec_test.rs b/saphyr/tests/spec_test.rs index 80b6bfd..52a0551 100644 --- a/saphyr/tests/spec_test.rs +++ b/saphyr/tests/spec_test.rs @@ -1,4 +1,4 @@ -use saphyr::{Hash, Yaml, YamlEmitter, YamlLoader}; +use saphyr::{load_from_str, Hash, Yaml, YamlEmitter}; #[test] fn test_mapvec_legal() { @@ -53,5 +53,5 @@ fn test_mapvec_legal() { // - 6 // ``` - YamlLoader::load_from_str(&out_str).unwrap(); + load_from_str(&out_str).unwrap(); } diff --git a/saphyr/tests/test_round_trip.rs b/saphyr/tests/test_round_trip.rs index 0d03d3e..f1b2838 100644 --- a/saphyr/tests/test_round_trip.rs +++ b/saphyr/tests/test_round_trip.rs @@ -1,10 +1,10 @@ -use saphyr::{Yaml, YamlEmitter, YamlLoader}; +use saphyr::{load_from_str, Yaml, YamlEmitter}; fn roundtrip(original: &Yaml) { let mut emitted = String::new(); YamlEmitter::new(&mut emitted).dump(original).unwrap(); - let documents = YamlLoader::load_from_str(&emitted).unwrap(); + let documents = load_from_str(&emitted).unwrap(); println!("emitted {emitted}"); assert_eq!(documents.len(), 1); @@ -12,12 +12,12 @@ fn roundtrip(original: &Yaml) { } fn double_roundtrip(original: &str) { - let parsed = YamlLoader::load_from_str(original).unwrap(); + let parsed = load_from_str(original).unwrap(); let mut serialized = String::new(); YamlEmitter::new(&mut serialized).dump(&parsed[0]).unwrap(); - let reparsed = YamlLoader::load_from_str(&serialized).unwrap(); + let reparsed = load_from_str(&serialized).unwrap(); assert_eq!(parsed, reparsed); } @@ -55,15 +55,12 @@ fn test_numberlike_strings() { /// Example from #[test] fn test_issue133() { - let doc = YamlLoader::load_from_str("\"0x123\"") - .unwrap() - .pop() - .unwrap(); + let doc = load_from_str("\"0x123\"").unwrap().pop().unwrap(); assert_eq!(doc, Yaml::String("0x123".to_string())); let mut out_str = String::new(); YamlEmitter::new(&mut out_str).dump(&doc).unwrap(); - let doc2 = YamlLoader::load_from_str(&out_str).unwrap().pop().unwrap(); + let doc2 = load_from_str(&out_str).unwrap().pop().unwrap(); assert_eq!(doc, doc2); // This failed because the type has changed to a number now } From 9ab8dd7c070cdca1c43085357e027f31942b5e85 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Thu, 13 Jun 2024 19:14:05 +0200 Subject: [PATCH 374/380] Update doccomments. --- saphyr/src/lib.rs | 1 - saphyr/src/loader.rs | 44 ++++++++++++++++++++++++++++++++++---------- 2 files changed, 34 insertions(+), 11 deletions(-) diff --git a/saphyr/src/lib.rs b/saphyr/src/lib.rs index 140f50f..f41cdd4 100644 --- a/saphyr/src/lib.rs +++ b/saphyr/src/lib.rs @@ -30,7 +30,6 @@ //! let mut out_str = String::new(); //! let mut emitter = YamlEmitter::new(&mut out_str); //! emitter.dump(doc).unwrap(); // dump the YAML object to a String -//! //! ``` //! //! # Features diff --git a/saphyr/src/loader.rs b/saphyr/src/loader.rs index 365cb21..0ea0b34 100644 --- a/saphyr/src/loader.rs +++ b/saphyr/src/loader.rs @@ -7,22 +7,42 @@ use saphyr_parser::{Event, MarkedEventReceiver, Marker, Parser, ScanError, TScal use crate::{Hash, Yaml}; -/// Load the given string as a set of YAML documents. +/// Load the given string as an array of YAML documents. /// /// The `source` is interpreted as YAML documents and is parsed. Parsing succeeds if and only /// if all documents are parsed successfully. An error in a latter document prevents the former /// from being returned. +/// +/// Most often, only one document is loaded in a YAML string. In this case, only the first element +/// of the returned `Vec` will be used. Otherwise, each element in the `Vec` is a document: +/// +/// ``` +/// use saphyr::{load_from_str, Yaml}; +/// +/// let docs = load_from_str(r#" +/// First document +/// --- +/// - Second document +/// "#).unwrap(); +/// let first_document = &docs[0]; // Select the first YAML document +/// // The document is a string containing "First document". +/// assert_eq!(*first_document, Yaml::String("First document".to_owned())); +/// +/// let second_document = &docs[1]; // Select the second YAML document +/// // The document is an array containing a single string, "Second document". +/// assert_eq!(second_document[0], Yaml::String("Second document".to_owned())); +/// ``` +/// /// # Errors /// Returns `ScanError` when loading fails. pub fn load_from_str(source: &str) -> Result, ScanError> { load_from_iter(source.chars()) } -/// Load the contents of the given iterator as a set of YAML documents. +/// Load the contents of the given iterator as an array of YAML documents. +/// +/// See [`load_from_str`] for details. /// -/// The `source` is interpreted as YAML documents and is parsed. Parsing succeeds if and only -/// if all documents are parsed successfully. An error in a latter document prevents the former -/// from being returned. /// # Errors /// Returns `ScanError` when loading fails. pub fn load_from_iter>(source: I) -> Result, ScanError> { @@ -30,10 +50,10 @@ pub fn load_from_iter>(source: I) -> Result, load_from_parser(&mut parser) } -/// Load the contents from the specified Parser as a set of YAML documents. +/// Load the contents from the specified Parser as an array of YAML documents. +/// +/// See [`load_from_str`] for details. /// -/// Parsing succeeds if and only if all documents are parsed successfully. -/// An error in a latter document prevents the former from being returned. /// # Errors /// Returns `ScanError` when loading fails. pub fn load_from_parser>( @@ -44,9 +64,13 @@ pub fn load_from_parser>( Ok(loader.docs) } -/// Main structure for quickly parsing YAML. +/// Main structure for parsing YAML. /// -/// See [`load_from_str`]. +/// The `YamlLoader` may load raw YAML documents or add metadata if needed. The type of the `Node` +/// dictates what data and metadata the loader will add to the `Node`. +/// +/// Each node must implement [`LoadableYamlNode`]. The methods are required for the loader to +/// manipulate and populate the `Node`. #[allow(clippy::module_name_repetitions)] pub struct YamlLoader where From 842d536cb0fe6c6d399d90dde0014d0d34c8f2d1 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Thu, 13 Jun 2024 22:23:05 +0200 Subject: [PATCH 375/380] Implement `LoadableYamlNode` for `MarkedYaml`. A few changes have had to be made to `LoadableYamlNode`: * The `From` requirement has been removed as it can be error-prone. It was not a direct conversion as it is unable to handle `Yaml::Hash` or `Yaml::Array` with a non-empty array/map. * Instead, `from_bare_yaml` was added, which does essentially the same as `From` but does not leak for users of the library. * `with_marker` has been added to populate the marker for the `Node`. The function is empty for `Yaml`. `load_from_*` methods have been added to `MarkedYaml` for convenience. They load YAML using the markers. The markers returned from `saphyr-parser` are not all correct, meaning that tests are kind of useless for now as they will fail due to bugs outside of the scope of this library. --- saphyr/CHANGELOG.md | 8 ++ saphyr/Cargo.toml | 2 +- saphyr/src/annotated.rs | 9 +- saphyr/src/annotated/marked_yaml.rs | 152 ++++++++++++++++++++++++++++ saphyr/src/lib.rs | 6 +- saphyr/src/loader.rs | 59 ++++++++--- saphyr/src/yaml.rs | 1 + 7 files changed, 222 insertions(+), 15 deletions(-) create mode 100644 saphyr/src/annotated/marked_yaml.rs diff --git a/saphyr/CHANGELOG.md b/saphyr/CHANGELOG.md index b3ac065..e87aa0a 100644 --- a/saphyr/CHANGELOG.md +++ b/saphyr/CHANGELOG.md @@ -29,6 +29,14 @@ already use this. Users of the original `yaml-rust` crate may freely disable this feature (`cargo <...> --no-default-features`) and lower MSRV to 1.65.0. +- Load with metadata + + The `YamlLoader` now supports adding metadata alongside the nodes. For now, + the only one supported is the `Marker`, pointing to the position in the input + stream of the start of the node. + + This feature is extensible and should allow (later) to add comments. + ## v0.8.0 **Breaking Changes**: diff --git a/saphyr/Cargo.toml b/saphyr/Cargo.toml index 4b62419..dd5f7a2 100644 --- a/saphyr/Cargo.toml +++ b/saphyr/Cargo.toml @@ -22,7 +22,7 @@ encoding = [ "dep:encoding_rs" ] [dependencies] arraydeque = "0.5.1" -saphyr-parser = "0.0.1" +saphyr-parser = "0.0.2" encoding_rs = { version = "0.8.33", optional = true } hashlink = "0.8" diff --git a/saphyr/src/annotated.rs b/saphyr/src/annotated.rs index 930e37e..21ba8f3 100644 --- a/saphyr/src/annotated.rs +++ b/saphyr/src/annotated.rs @@ -1,12 +1,17 @@ //! Utilities for extracting YAML with certain metadata. +pub mod marked_yaml; + use std::ops::{Index, IndexMut}; use hashlink::LinkedHashMap; use crate::loader::parse_f64; -/// A YAML node without annotation. See [`Yaml`], you probably want that. +/// YAML data for nodes that will contain annotations. +/// +/// If you want a YAML node without annotations, see [`Yaml`]. +/// If you want a YAML node with annotations, see types using [`YamlData`] such as [`MarkedYaml`] /// /// Unlike [`Yaml`] which only supports storing data, [`YamlData`] allows storing metadata /// alongside the YAML data. It is unlikely one would build it directly; it is mostly intended to @@ -32,6 +37,7 @@ use crate::loader::parse_f64; /// * Indexing cannot return `BadValue` and will panic instead. /// /// [`Yaml`]: crate::Yaml +/// [`MarkedYaml`]: marked_yaml::MarkedYaml #[derive(Clone, PartialEq, PartialOrd, Debug, Eq, Ord, Hash)] pub enum YamlData where @@ -93,6 +99,7 @@ where define_is!(is_array, Self::Array(_)); define_is!(is_badvalue, Self::BadValue); define_is!(is_boolean, Self::Boolean(_)); + define_is!(is_hash, Self::Hash(_)); define_is!(is_integer, Self::Integer(_)); define_is!(is_null, Self::Null); define_is!(is_real, Self::Real(_)); diff --git a/saphyr/src/annotated/marked_yaml.rs b/saphyr/src/annotated/marked_yaml.rs new file mode 100644 index 0000000..1c86072 --- /dev/null +++ b/saphyr/src/annotated/marked_yaml.rs @@ -0,0 +1,152 @@ +//! A YAML node with position in the source document. +//! +//! This is set aside so as to not clutter `annotated.rs`. + +use hashlink::LinkedHashMap; +use saphyr_parser::{Marker, Parser, ScanError}; + +use crate::{LoadableYamlNode, Yaml, YamlData, YamlLoader}; + +/// A YAML node with [`Marker`]s pointing to the start of the node. +/// +/// This structure does not implement functions to operate on the YAML object. To access those, +/// refer to the [`Self::data`] field. +#[derive(Clone, Debug)] +pub struct MarkedYaml { + /// The marker pointing to the start of the node. + /// + /// The marker is relative to the start of the input stream that was given to the parser, not + /// to the start of the document within the input stream. + pub marker: Marker, + /// The YAML contents of the node. + pub data: YamlData, +} + +impl MarkedYaml { + /// Load the given string as an array of YAML documents. + /// + /// See the function [`load_from_str`] for more details. + /// + /// # Errors + /// Returns `ScanError` when loading fails. + /// + /// [`load_from_str`]: `crate::load_from_str` + pub fn load_from_str(source: &str) -> Result, ScanError> { + Self::load_from_iter(source.chars()) + } + + /// Load the contents of the given iterator as an array of YAML documents. + /// + /// See the function [`load_from_iter`] for more details. + /// + /// # Errors + /// Returns `ScanError` when loading fails. + /// + /// [`load_from_iter`]: `crate::load_from_iter` + pub fn load_from_iter>(source: I) -> Result, ScanError> { + let mut parser = Parser::new(source); + Self::load_from_parser(&mut parser) + } + + /// Load the contents from the specified [`Parser`] as an array of YAML documents. + /// + /// See the function [`load_from_parser`] for more details. + /// + /// # Errors + /// Returns `ScanError` when loading fails. + /// + /// [`load_from_parser`]: `crate::load_from_parser` + pub fn load_from_parser>( + parser: &mut Parser, + ) -> Result, ScanError> { + let mut loader = YamlLoader::::default(); + parser.load(&mut loader, true)?; + Ok(loader.into_documents()) + } +} + +impl PartialEq for MarkedYaml { + fn eq(&self, other: &Self) -> bool { + self.data.eq(&other.data) + } +} + +// I don't know if it's okay to implement that, but we need it for the hashmap. +impl Eq for MarkedYaml {} + +impl std::hash::Hash for MarkedYaml { + fn hash(&self, state: &mut H) { + self.data.hash(state); + } +} + +impl From> for MarkedYaml { + fn from(value: YamlData) -> Self { + Self { + marker: Marker::default(), + data: value, + } + } +} + +impl LoadableYamlNode for MarkedYaml { + fn from_bare_yaml(yaml: Yaml) -> Self { + Self { + marker: Marker::default(), + data: match yaml { + Yaml::Real(x) => YamlData::Real(x), + Yaml::Integer(x) => YamlData::Integer(x), + Yaml::String(x) => YamlData::String(x), + Yaml::Boolean(x) => YamlData::Boolean(x), + // Array and Hash will always have their container empty. + Yaml::Array(_) => YamlData::Array(vec![]), + Yaml::Hash(_) => YamlData::Hash(LinkedHashMap::new()), + Yaml::Alias(x) => YamlData::Alias(x), + Yaml::Null => YamlData::Null, + Yaml::BadValue => YamlData::BadValue, + }, + } + } + + fn is_array(&self) -> bool { + self.data.is_array() + } + + fn is_hash(&self) -> bool { + self.data.is_hash() + } + + fn is_badvalue(&self) -> bool { + self.data.is_badvalue() + } + + fn array_mut(&mut self) -> &mut Vec { + if let YamlData::Array(x) = &mut self.data { + x + } else { + panic!("Called array_mut on a non-array"); + } + } + + fn hash_mut(&mut self) -> &mut LinkedHashMap { + if let YamlData::Hash(x) = &mut self.data { + x + } else { + panic!("Called array_mut on a non-array"); + } + } + + fn take(&mut self) -> Self { + let mut taken_out = MarkedYaml { + marker: Marker::default(), + data: YamlData::BadValue, + }; + std::mem::swap(&mut taken_out, self); + taken_out + } + + fn with_marker(mut self, marker: Marker) -> Self { + self.marker = marker; + self + } +} diff --git a/saphyr/src/lib.rs b/saphyr/src/lib.rs index f41cdd4..1f431f3 100644 --- a/saphyr/src/lib.rs +++ b/saphyr/src/lib.rs @@ -52,7 +52,9 @@ mod loader; mod yaml; // Re-export main components. -pub use crate::annotated::{AnnotatedArray, AnnotatedHash, YamlData}; +pub use crate::annotated::{ + marked_yaml::MarkedYaml, AnnotatedArray, AnnotatedHash, AnnotatedYamlIter, YamlData, +}; pub use crate::emitter::YamlEmitter; pub use crate::loader::{ load_from_iter, load_from_parser, load_from_str, LoadableYamlNode, YamlLoader, @@ -67,3 +69,5 @@ pub use crate::encoding::{YAMLDecodingTrap, YAMLDecodingTrapFn, YamlDecoder}; // Re-export `ScanError` as it is used as part of our public API and we want consumers to be able // to inspect it (e.g. perform a `match`). They wouldn't be able without it. pub use saphyr_parser::ScanError; +// Re-export [`Marker`] which is used for annotated YAMLs. +pub use saphyr_parser::Marker; diff --git a/saphyr/src/loader.rs b/saphyr/src/loader.rs index 0ea0b34..d6ececd 100644 --- a/saphyr/src/loader.rs +++ b/saphyr/src/loader.rs @@ -50,7 +50,7 @@ pub fn load_from_iter>(source: I) -> Result, load_from_parser(&mut parser) } -/// Load the contents from the specified Parser as an array of YAML documents. +/// Load the contents from the specified [`Parser`] as an array of YAML documents. /// /// See [`load_from_str`] for details. /// @@ -104,8 +104,7 @@ impl MarkedEventReceiver for YamlLoader where Node: LoadableYamlNode, { - fn on_event(&mut self, ev: Event, _: Marker) { - // println!("EV {:?}", ev); + fn on_event(&mut self, ev: Event, marker: Marker) { match ev { Event::DocumentStart | Event::Nothing | Event::StreamStart | Event::StreamEnd => { // do nothing @@ -113,21 +112,29 @@ where Event::DocumentEnd => { match self.doc_stack.len() { // empty document - 0 => self.docs.push(Yaml::BadValue.into()), + 0 => self + .docs + .push(Node::from_bare_yaml(Yaml::BadValue).with_marker(marker)), 1 => self.docs.push(self.doc_stack.pop().unwrap().0), _ => unreachable!(), } } Event::SequenceStart(aid, _) => { - self.doc_stack.push((Yaml::Array(Vec::new()).into(), aid)); + self.doc_stack.push(( + Node::from_bare_yaml(Yaml::Array(Vec::new())).with_marker(marker), + aid, + )); } Event::SequenceEnd => { let node = self.doc_stack.pop().unwrap(); self.insert_new_node(node); } Event::MappingStart(aid, _) => { - self.doc_stack.push((Yaml::Hash(Hash::new()).into(), aid)); - self.key_stack.push(Yaml::BadValue.into()); + self.doc_stack.push(( + Node::from_bare_yaml(Yaml::Hash(Hash::new())).with_marker(marker), + aid, + )); + self.key_stack.push(Node::from_bare_yaml(Yaml::BadValue)); } Event::MappingEnd => { self.key_stack.pop().unwrap(); @@ -172,15 +179,14 @@ where // Datatype is not specified, or unrecognized Yaml::from_str(&v) }; - - self.insert_new_node((node.into(), aid)); + self.insert_new_node((Node::from_bare_yaml(node).with_marker(marker), aid)); } Event::Alias(id) => { let n = match self.anchor_map.get(&id) { Some(v) => v.clone(), - None => Yaml::BadValue.into(), + None => Node::from_bare_yaml(Yaml::BadValue), }; - self.insert_new_node((n, 0)); + self.insert_new_node((n.with_marker(marker), 0)); } } } @@ -215,6 +221,12 @@ where } } } + + /// Return the document nodes from `self`, consuming it in the process. + #[must_use] + pub fn into_documents(self) -> Vec { + self.docs + } } /// An error that happened when loading a YAML document. @@ -258,7 +270,19 @@ impl std::fmt::Display for LoadError { /// /// This trait must be implemented on YAML node types (i.e.: [`Yaml`] and annotated YAML nodes). It /// provides the necessary methods for [`YamlLoader`] to load data into the node. -pub trait LoadableYamlNode: From + Clone + std::hash::Hash + Eq { +pub trait LoadableYamlNode: Clone + std::hash::Hash + Eq { + /// Create an instance of `Self` from a [`Yaml`]. + /// + /// Nodes must implement this to be built. The optional metadata that they contain will be + /// later provided by the loader and can be default initialized. The [`Yaml`] object passed as + /// parameter may be of the [`Array`] or [`Hash`] variants. In this event, the inner container + /// will always be empty. There is no need to traverse all elements to convert them from + /// [`Yaml`] to `Self`. + /// + /// [`Array`]: `Yaml::Array` + /// [`Hash`]: `Yaml::Hash` + fn from_bare_yaml(yaml: Yaml) -> Self; + /// Return whether the YAML node is an array. fn is_array(&self) -> bool; @@ -283,9 +307,20 @@ pub trait LoadableYamlNode: From + Clone + std::hash::Hash + Eq { /// Take the contained node out of `Self`, leaving a `BadValue` in its place. #[must_use] fn take(&mut self) -> Self; + + /// Provide the marker for the node (builder-style). + #[inline] + #[must_use] + fn with_marker(self, _: Marker) -> Self { + self + } } impl LoadableYamlNode for Yaml { + fn from_bare_yaml(yaml: Yaml) -> Self { + yaml + } + fn is_array(&self) -> bool { matches!(self, Yaml::Array(_)) } diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index 5ac883f..f15ba00 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -77,6 +77,7 @@ impl Yaml { define_is!(is_array, Self::Array(_)); define_is!(is_badvalue, Self::BadValue); define_is!(is_boolean, Self::Boolean(_)); + define_is!(is_hash, Self::Hash(_)); define_is!(is_integer, Self::Integer(_)); define_is!(is_null, Self::Null); define_is!(is_real, Self::Real(_)); From 23c0b3c547a32897c9a46ea4f868a3210d32d861 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Thu, 13 Jun 2024 22:37:56 +0200 Subject: [PATCH 376/380] Move `load_from_*` functions in `Yaml`. This would make more sense in user code: ```rs Yaml::load_from_str("foo"); // Explicit that we're parsing YAML load_from_str("foo"); // Too implicit, too generic, may be from another lib ``` Plus, this mirrors `MarkedYaml`'s behavior. --- saphyr/README.md | 4 +- saphyr/examples/dump_yaml.rs | 4 +- saphyr/src/annotated/marked_yaml.rs | 10 ++--- saphyr/src/emitter.rs | 12 +++--- saphyr/src/encoding.rs | 2 +- saphyr/src/lib.rs | 8 ++-- saphyr/src/loader.rs | 59 +--------------------------- saphyr/src/yaml.rs | 60 ++++++++++++++++++++++++++++- saphyr/tests/basic.rs | 18 ++++----- saphyr/tests/emitter.rs | 22 +++++------ saphyr/tests/quickcheck.rs | 4 +- saphyr/tests/spec_test.rs | 4 +- saphyr/tests/test_round_trip.rs | 12 +++--- 13 files changed, 110 insertions(+), 109 deletions(-) diff --git a/saphyr/README.md b/saphyr/README.md index c1df782..498edc9 100644 --- a/saphyr/README.md +++ b/saphyr/README.md @@ -28,7 +28,7 @@ cargo add saphyr Use `saphyr::YamlLoader` to load YAML documents and access them as `Yaml` objects: ```rust -use saphyr::{YamlLoader, YamlEmitter}; +use saphyr::{Yaml, YamlEmitter}; fn main() { let s = @@ -40,7 +40,7 @@ bar: - 1 - 2.0 "; - let docs = YamlLoader::load_from_str(s).unwrap(); + let docs = Yaml::load_from_str(s).unwrap(); // Multi document support, doc is a yaml::Yaml let doc = &docs[0]; diff --git a/saphyr/examples/dump_yaml.rs b/saphyr/examples/dump_yaml.rs index 34e41ee..8641732 100644 --- a/saphyr/examples/dump_yaml.rs +++ b/saphyr/examples/dump_yaml.rs @@ -1,4 +1,4 @@ -use saphyr::{load_from_str, Yaml}; +use saphyr::Yaml; use std::env; use std::fs::File; use std::io::prelude::*; @@ -36,7 +36,7 @@ fn main() { let mut s = String::new(); f.read_to_string(&mut s).unwrap(); - let docs = load_from_str(&s).unwrap(); + let docs = Yaml::load_from_str(&s).unwrap(); for doc in &docs { println!("---"); dump_node(doc, 0); diff --git a/saphyr/src/annotated/marked_yaml.rs b/saphyr/src/annotated/marked_yaml.rs index 1c86072..dcc73dc 100644 --- a/saphyr/src/annotated/marked_yaml.rs +++ b/saphyr/src/annotated/marked_yaml.rs @@ -30,19 +30,19 @@ impl MarkedYaml { /// # Errors /// Returns `ScanError` when loading fails. /// - /// [`load_from_str`]: `crate::load_from_str` + /// [`load_from_str`]: `Yaml::load_from_str` pub fn load_from_str(source: &str) -> Result, ScanError> { Self::load_from_iter(source.chars()) } /// Load the contents of the given iterator as an array of YAML documents. /// - /// See the function [`load_from_iter`] for more details. + /// See the function [`load_from_str`] for more details. /// /// # Errors /// Returns `ScanError` when loading fails. /// - /// [`load_from_iter`]: `crate::load_from_iter` + /// [`load_from_str`]: `Yaml::load_from_str` pub fn load_from_iter>(source: I) -> Result, ScanError> { let mut parser = Parser::new(source); Self::load_from_parser(&mut parser) @@ -50,12 +50,12 @@ impl MarkedYaml { /// Load the contents from the specified [`Parser`] as an array of YAML documents. /// - /// See the function [`load_from_parser`] for more details. + /// See the function [`load_from_str`] for more details. /// /// # Errors /// Returns `ScanError` when loading fails. /// - /// [`load_from_parser`]: `crate::load_from_parser` + /// [`load_from_str`]: `Yaml::load_from_str` pub fn load_from_parser>( parser: &mut Parser, ) -> Result, ScanError> { diff --git a/saphyr/src/emitter.rs b/saphyr/src/emitter.rs index 8a7be40..3653f56 100644 --- a/saphyr/src/emitter.rs +++ b/saphyr/src/emitter.rs @@ -36,9 +36,9 @@ impl From for EmitError { /// The YAML serializer. /// /// ``` -/// # use saphyr::{load_from_str, YamlEmitter}; +/// # use saphyr::{Yaml, YamlEmitter}; /// let input_string = "a: b\nc: d"; -/// let yaml = load_from_str(input_string).unwrap(); +/// let yaml = Yaml::load_from_str(input_string).unwrap(); /// /// let mut output = String::new(); /// YamlEmitter::new(&mut output).dump(&yaml[0]).unwrap(); @@ -159,10 +159,10 @@ impl<'a> YamlEmitter<'a> { /// # Examples /// /// ```rust - /// use saphyr::{Yaml, YamlEmitter, load_from_str}; + /// use saphyr::{Yaml, YamlEmitter}; /// /// let input = r#"{foo: "bar!\nbar!", baz: 42}"#; - /// let parsed = load_from_str(input).unwrap(); + /// let parsed = Yaml::load_from_str(input).unwrap(); /// eprintln!("{:?}", parsed); /// /// let mut output = String::new(); @@ -409,12 +409,14 @@ fn need_quotes(string: &str) -> bool { #[cfg(test)] mod test { + use crate::Yaml; + use super::YamlEmitter; #[test] fn test_multiline_string() { let input = r#"{foo: "bar!\nbar!", baz: 42}"#; - let parsed = crate::load_from_str(input).unwrap(); + let parsed = Yaml::load_from_str(input).unwrap(); let mut output = String::new(); let mut emitter = YamlEmitter::new(&mut output); emitter.multiline_strings(true); diff --git a/saphyr/src/encoding.rs b/saphyr/src/encoding.rs index 17dcb69..b5e3cd3 100644 --- a/saphyr/src/encoding.rs +++ b/saphyr/src/encoding.rs @@ -102,7 +102,7 @@ impl YamlDecoder { // Decode the input buffer. decode_loop(&buffer, &mut output, &mut decoder, self.trap)?; - crate::load_from_str(&output).map_err(LoadError::Scan) + Yaml::load_from_str(&output).map_err(LoadError::Scan) } } diff --git a/saphyr/src/lib.rs b/saphyr/src/lib.rs index 1f431f3..5309312 100644 --- a/saphyr/src/lib.rs +++ b/saphyr/src/lib.rs @@ -21,9 +21,9 @@ //! Parse a string into `Vec` and then serialize it as a YAML string. //! //! ``` -//! use saphyr::{load_from_str, YamlEmitter}; +//! use saphyr::{Yaml, YamlEmitter}; //! -//! let docs = load_from_str("[1, 2, 3]").unwrap(); +//! let docs = Yaml::load_from_str("[1, 2, 3]").unwrap(); //! let doc = &docs[0]; // select the first YAML document //! assert_eq!(doc[0].as_i64().unwrap(), 1); // access elements by index //! @@ -56,9 +56,7 @@ pub use crate::annotated::{ marked_yaml::MarkedYaml, AnnotatedArray, AnnotatedHash, AnnotatedYamlIter, YamlData, }; pub use crate::emitter::YamlEmitter; -pub use crate::loader::{ - load_from_iter, load_from_parser, load_from_str, LoadableYamlNode, YamlLoader, -}; +pub use crate::loader::{LoadableYamlNode, YamlLoader}; pub use crate::yaml::{Array, Hash, Yaml, YamlIter}; #[cfg(feature = "encoding")] diff --git a/saphyr/src/loader.rs b/saphyr/src/loader.rs index d6ececd..188512a 100644 --- a/saphyr/src/loader.rs +++ b/saphyr/src/loader.rs @@ -3,67 +3,10 @@ use std::collections::BTreeMap; use hashlink::LinkedHashMap; -use saphyr_parser::{Event, MarkedEventReceiver, Marker, Parser, ScanError, TScalarStyle, Tag}; +use saphyr_parser::{Event, MarkedEventReceiver, Marker, ScanError, TScalarStyle, Tag}; use crate::{Hash, Yaml}; -/// Load the given string as an array of YAML documents. -/// -/// The `source` is interpreted as YAML documents and is parsed. Parsing succeeds if and only -/// if all documents are parsed successfully. An error in a latter document prevents the former -/// from being returned. -/// -/// Most often, only one document is loaded in a YAML string. In this case, only the first element -/// of the returned `Vec` will be used. Otherwise, each element in the `Vec` is a document: -/// -/// ``` -/// use saphyr::{load_from_str, Yaml}; -/// -/// let docs = load_from_str(r#" -/// First document -/// --- -/// - Second document -/// "#).unwrap(); -/// let first_document = &docs[0]; // Select the first YAML document -/// // The document is a string containing "First document". -/// assert_eq!(*first_document, Yaml::String("First document".to_owned())); -/// -/// let second_document = &docs[1]; // Select the second YAML document -/// // The document is an array containing a single string, "Second document". -/// assert_eq!(second_document[0], Yaml::String("Second document".to_owned())); -/// ``` -/// -/// # Errors -/// Returns `ScanError` when loading fails. -pub fn load_from_str(source: &str) -> Result, ScanError> { - load_from_iter(source.chars()) -} - -/// Load the contents of the given iterator as an array of YAML documents. -/// -/// See [`load_from_str`] for details. -/// -/// # Errors -/// Returns `ScanError` when loading fails. -pub fn load_from_iter>(source: I) -> Result, ScanError> { - let mut parser = Parser::new(source); - load_from_parser(&mut parser) -} - -/// Load the contents from the specified [`Parser`] as an array of YAML documents. -/// -/// See [`load_from_str`] for details. -/// -/// # Errors -/// Returns `ScanError` when loading fails. -pub fn load_from_parser>( - parser: &mut Parser, -) -> Result, ScanError> { - let mut loader = YamlLoader::default(); - parser.load(&mut loader, true)?; - Ok(loader.docs) -} - /// Main structure for parsing YAML. /// /// The `YamlLoader` may load raw YAML documents or add metadata if needed. The type of the `Node` diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index f15ba00..581d5d7 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -5,8 +5,9 @@ use std::{convert::TryFrom, ops::Index, ops::IndexMut}; use hashlink::LinkedHashMap; +use saphyr_parser::{Parser, ScanError}; -use crate::loader::parse_f64; +use crate::{loader::parse_f64, YamlLoader}; /// A YAML node is stored as this `Yaml` enumeration, which provides an easy way to /// access your YAML document. @@ -57,6 +58,63 @@ pub type Array = Vec; pub type Hash = LinkedHashMap; impl Yaml { + /// Load the given string as an array of YAML documents. + /// + /// The `source` is interpreted as YAML documents and is parsed. Parsing succeeds if and only + /// if all documents are parsed successfully. An error in a latter document prevents the former + /// from being returned. + /// + /// Most often, only one document is loaded in a YAML string. In this case, only the first element + /// of the returned `Vec` will be used. Otherwise, each element in the `Vec` is a document: + /// + /// ``` + /// use saphyr::Yaml; + /// + /// let docs = Yaml::load_from_str(r#" + /// First document + /// --- + /// - Second document + /// "#).unwrap(); + /// let first_document = &docs[0]; // Select the first YAML document + /// // The document is a string containing "First document". + /// assert_eq!(*first_document, Yaml::String("First document".to_owned())); + /// + /// let second_document = &docs[1]; // Select the second YAML document + /// // The document is an array containing a single string, "Second document". + /// assert_eq!(second_document[0], Yaml::String("Second document".to_owned())); + /// ``` + /// + /// # Errors + /// Returns `ScanError` when loading fails. + pub fn load_from_str(source: &str) -> Result, ScanError> { + Self::load_from_iter(source.chars()) + } + + /// Load the contents of the given iterator as an array of YAML documents. + /// + /// See [`Self::load_from_str`] for details. + /// + /// # Errors + /// Returns `ScanError` when loading fails. + pub fn load_from_iter>(source: I) -> Result, ScanError> { + let mut parser = Parser::new(source); + Self::load_from_parser(&mut parser) + } + + /// Load the contents from the specified [`Parser`] as an array of YAML documents. + /// + /// See [`Self::load_from_str`] for details. + /// + /// # Errors + /// Returns `ScanError` when loading fails. + pub fn load_from_parser>( + parser: &mut Parser, + ) -> Result, ScanError> { + let mut loader = YamlLoader::default(); + parser.load(&mut loader, true)?; + Ok(loader.into_documents()) + } + define_as!(as_bool, bool, Boolean); define_as!(as_i64, i64, Integer); diff --git a/saphyr/tests/basic.rs b/saphyr/tests/basic.rs index 6a20c4d..7a97c77 100644 --- a/saphyr/tests/basic.rs +++ b/saphyr/tests/basic.rs @@ -1,7 +1,7 @@ #![allow(clippy::bool_assert_comparison)] #![allow(clippy::float_cmp)] -use saphyr::{load_from_str, Yaml, YamlEmitter}; +use saphyr::{Yaml, YamlEmitter}; #[test] fn test_api() { @@ -29,7 +29,7 @@ fn test_api() { - name: Staff damage: 3 "; - let docs = load_from_str(s).unwrap(); + let docs = Yaml::load_from_str(s).unwrap(); let doc = &docs[0]; assert_eq!(doc[0]["name"].as_str().unwrap(), "Ogre"); @@ -50,7 +50,7 @@ a: 1 b: 2.2 c: [1, 2] "; - let out = load_from_str(s).unwrap(); + let out = Yaml::load_from_str(s).unwrap(); let doc = &out[0]; assert_eq!(doc["a"].as_i64().unwrap(), 1i64); assert_eq!(doc["b"].as_f64().unwrap(), 2.2f64); @@ -66,7 +66,7 @@ a1: &DEFAULT b2: d a2: *DEFAULT "; - let out = load_from_str(s).unwrap(); + let out = Yaml::load_from_str(s).unwrap(); let doc = &out[0]; assert_eq!(doc["a2"]["b1"].as_i64().unwrap(), 4); } @@ -78,7 +78,7 @@ a1: &DEFAULT b1: 4 b2: *DEFAULT "; - let out = load_from_str(s).unwrap(); + let out = Yaml::load_from_str(s).unwrap(); let doc = &out[0]; assert_eq!(doc["a1"]["b2"], Yaml::BadValue); } @@ -114,7 +114,7 @@ fn test_plain_datatype() { - +12345 - [ true, false ] "; - let out = load_from_str(s).unwrap(); + let out = Yaml::load_from_str(s).unwrap(); let doc = &out[0]; assert_eq!(doc[0].as_str().unwrap(), "string"); @@ -171,7 +171,7 @@ fn test_plain_datatype_with_into_methods() { - .NAN - !!float .INF "; - let mut out = load_from_str(s).unwrap().into_iter(); + let mut out = Yaml::load_from_str(s).unwrap().into_iter(); let mut doc = out.next().unwrap().into_iter(); assert_eq!(doc.next().unwrap().into_string().unwrap(), "string"); @@ -203,7 +203,7 @@ b: ~ a: ~ c: ~ "; - let out = load_from_str(s).unwrap(); + let out = Yaml::load_from_str(s).unwrap(); let first = out.into_iter().next().unwrap(); let mut iter = first.into_hash().unwrap().into_iter(); assert_eq!( @@ -229,7 +229,7 @@ fn test_integer_key() { 1: important: false "; - let out = load_from_str(s).unwrap(); + let out = Yaml::load_from_str(s).unwrap(); let first = out.into_iter().next().unwrap(); assert_eq!(first[0]["important"].as_bool().unwrap(), true); } diff --git a/saphyr/tests/emitter.rs b/saphyr/tests/emitter.rs index 142713e..fdf1acb 100644 --- a/saphyr/tests/emitter.rs +++ b/saphyr/tests/emitter.rs @@ -1,4 +1,4 @@ -use saphyr::{load_from_str, YamlEmitter}; +use saphyr::{Yaml, YamlEmitter}; #[allow(clippy::similar_names)] #[test] @@ -16,7 +16,7 @@ a4: - 2 "; - let docs = load_from_str(s).unwrap(); + let docs = Yaml::load_from_str(s).unwrap(); let doc = &docs[0]; let mut writer = String::new(); { @@ -25,7 +25,7 @@ a4: } println!("original:\n{s}"); println!("emitted:\n{writer}"); - let docs_new = match load_from_str(&writer) { + let docs_new = match Yaml::load_from_str(&writer) { Ok(y) => y, Err(e) => panic!("{}", e), }; @@ -55,14 +55,14 @@ products: {}: empty hash key "; - let docs = load_from_str(s).unwrap(); + let docs = Yaml::load_from_str(s).unwrap(); let doc = &docs[0]; let mut writer = String::new(); { let mut emitter = YamlEmitter::new(&mut writer); emitter.dump(doc).unwrap(); } - let docs_new = match load_from_str(&writer) { + let docs_new = match Yaml::load_from_str(&writer) { Ok(y) => y, Err(e) => panic!("{}", e), }; @@ -106,7 +106,7 @@ x: test y: avoid quoting here z: string with spaces"#; - let docs = load_from_str(s).unwrap(); + let docs = Yaml::load_from_str(s).unwrap(); let doc = &docs[0]; let mut writer = String::new(); { @@ -164,7 +164,7 @@ null0: ~ bool0: true bool1: false"#; - let docs = load_from_str(input).unwrap(); + let docs = Yaml::load_from_str(input).unwrap(); let doc = &docs[0]; let mut writer = String::new(); { @@ -212,7 +212,7 @@ e: h: []" }; - let docs = load_from_str(s).unwrap(); + let docs = Yaml::load_from_str(s).unwrap(); let doc = &docs[0]; let mut writer = String::new(); { @@ -234,7 +234,7 @@ a: - - e - f"; - let docs = load_from_str(s).unwrap(); + let docs = Yaml::load_from_str(s).unwrap(); let doc = &docs[0]; let mut writer = String::new(); { @@ -258,7 +258,7 @@ a: - - f - - e"; - let docs = load_from_str(s).unwrap(); + let docs = Yaml::load_from_str(s).unwrap(); let doc = &docs[0]; let mut writer = String::new(); { @@ -280,7 +280,7 @@ a: d: e: f"; - let docs = load_from_str(s).unwrap(); + let docs = Yaml::load_from_str(s).unwrap(); let doc = &docs[0]; let mut writer = String::new(); { diff --git a/saphyr/tests/quickcheck.rs b/saphyr/tests/quickcheck.rs index 7c91601..666739c 100644 --- a/saphyr/tests/quickcheck.rs +++ b/saphyr/tests/quickcheck.rs @@ -3,7 +3,7 @@ extern crate quickcheck; use quickcheck::TestResult; -use saphyr::{load_from_str, Yaml, YamlEmitter}; +use saphyr::{Yaml, YamlEmitter}; quickcheck! { fn test_check_weird_keys(xs: Vec) -> TestResult { @@ -13,7 +13,7 @@ quickcheck! { let mut emitter = YamlEmitter::new(&mut out_str); emitter.dump(&input).unwrap(); } - match load_from_str(&out_str) { + match Yaml::load_from_str(&out_str) { Ok(output) => TestResult::from_bool(output.len() == 1 && input == output[0]), Err(err) => TestResult::error(err.to_string()), } diff --git a/saphyr/tests/spec_test.rs b/saphyr/tests/spec_test.rs index 52a0551..1cf98e5 100644 --- a/saphyr/tests/spec_test.rs +++ b/saphyr/tests/spec_test.rs @@ -1,4 +1,4 @@ -use saphyr::{load_from_str, Hash, Yaml, YamlEmitter}; +use saphyr::{Hash, Yaml, YamlEmitter}; #[test] fn test_mapvec_legal() { @@ -53,5 +53,5 @@ fn test_mapvec_legal() { // - 6 // ``` - load_from_str(&out_str).unwrap(); + Yaml::load_from_str(&out_str).unwrap(); } diff --git a/saphyr/tests/test_round_trip.rs b/saphyr/tests/test_round_trip.rs index f1b2838..e4ada73 100644 --- a/saphyr/tests/test_round_trip.rs +++ b/saphyr/tests/test_round_trip.rs @@ -1,10 +1,10 @@ -use saphyr::{load_from_str, Yaml, YamlEmitter}; +use saphyr::{Yaml, YamlEmitter}; fn roundtrip(original: &Yaml) { let mut emitted = String::new(); YamlEmitter::new(&mut emitted).dump(original).unwrap(); - let documents = load_from_str(&emitted).unwrap(); + let documents = Yaml::load_from_str(&emitted).unwrap(); println!("emitted {emitted}"); assert_eq!(documents.len(), 1); @@ -12,12 +12,12 @@ fn roundtrip(original: &Yaml) { } fn double_roundtrip(original: &str) { - let parsed = load_from_str(original).unwrap(); + let parsed = Yaml::load_from_str(original).unwrap(); let mut serialized = String::new(); YamlEmitter::new(&mut serialized).dump(&parsed[0]).unwrap(); - let reparsed = load_from_str(&serialized).unwrap(); + let reparsed = Yaml::load_from_str(&serialized).unwrap(); assert_eq!(parsed, reparsed); } @@ -55,12 +55,12 @@ fn test_numberlike_strings() { /// Example from #[test] fn test_issue133() { - let doc = load_from_str("\"0x123\"").unwrap().pop().unwrap(); + let doc = Yaml::load_from_str("\"0x123\"").unwrap().pop().unwrap(); assert_eq!(doc, Yaml::String("0x123".to_string())); let mut out_str = String::new(); YamlEmitter::new(&mut out_str).dump(&doc).unwrap(); - let doc2 = load_from_str(&out_str).unwrap().pop().unwrap(); + let doc2 = Yaml::load_from_str(&out_str).unwrap().pop().unwrap(); assert_eq!(doc, doc2); // This failed because the type has changed to a number now } From d582b0fec986d597047db7d9a7904287a1e8731c Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Wed, 3 Jul 2024 00:51:37 +0200 Subject: [PATCH 377/380] Refactor to remove unnecessary unwrap. --- saphyr/src/loader.rs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/saphyr/src/loader.rs b/saphyr/src/loader.rs index 188512a..5f53492 100644 --- a/saphyr/src/loader.rs +++ b/saphyr/src/loader.rs @@ -144,10 +144,7 @@ where if node.1 > 0 { self.anchor_map.insert(node.1, node.0.clone()); } - if self.doc_stack.is_empty() { - self.doc_stack.push(node); - } else { - let parent = self.doc_stack.last_mut().unwrap(); + if let Some(parent) = self.doc_stack.last_mut() { let parent_node = &mut parent.0; if parent_node.is_array() { parent_node.array_mut().push(node.0); @@ -162,6 +159,8 @@ where hash.insert(cur_key.take(), node.0); } } + } else { + self.doc_stack.push(node); } } From 1fc46923ef51cafc1e88258341ae0f603aa2661f Mon Sep 17 00:00:00 2001 From: Chris Gunn Date: Tue, 9 Jul 2024 07:27:58 -0700 Subject: [PATCH 378/380] Fix multiline string emit. Use `|-` instead of `|` when there is not a trailing newline in the string value. --- saphyr/src/emitter.rs | 32 +++++++++++++++++++--------- saphyr/tests/test_round_trip.rs | 37 +++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 10 deletions(-) diff --git a/saphyr/src/emitter.rs b/saphyr/src/emitter.rs index 3653f56..6e98ba8 100644 --- a/saphyr/src/emitter.rs +++ b/saphyr/src/emitter.rs @@ -171,7 +171,7 @@ impl<'a> YamlEmitter<'a> { /// emitter.dump(&parsed[0]).unwrap(); /// assert_eq!(output.as_str(), "\ /// --- - /// foo: | + /// foo: |- /// bar! /// bar! /// baz: 42"); @@ -219,15 +219,7 @@ impl<'a> YamlEmitter<'a> { && v.contains('\n') && char_traits::is_valid_literal_block_scalar(v) { - write!(self.writer, "|")?; - self.level += 1; - for line in v.lines() { - writeln!(self.writer)?; - self.write_indent()?; - // It's literal text, so don't escape special chars. - write!(self.writer, "{line}")?; - } - self.level -= 1; + self.emit_literal_block(v)?; } else if need_quotes(v) { escape_str(self.writer, v)?; } else { @@ -260,6 +252,26 @@ impl<'a> YamlEmitter<'a> { } } + fn emit_literal_block(&mut self, v: &str) -> EmitResult { + let ends_with_newline = v.ends_with('\n'); + if ends_with_newline { + self.writer.write_str("|")?; + } else { + self.writer.write_str("|-")?; + } + + self.level += 1; + // lines() will omit the last line if it is empty. + for line in v.lines() { + writeln!(self.writer)?; + self.write_indent()?; + // It's literal text, so don't escape special chars. + self.writer.write_str(line)?; + } + self.level -= 1; + Ok(()) + } + fn emit_array(&mut self, v: &[Yaml]) -> EmitResult { if v.is_empty() { write!(self.writer, "[]")?; diff --git a/saphyr/tests/test_round_trip.rs b/saphyr/tests/test_round_trip.rs index e4ada73..8280f22 100644 --- a/saphyr/tests/test_round_trip.rs +++ b/saphyr/tests/test_round_trip.rs @@ -11,6 +11,19 @@ fn roundtrip(original: &Yaml) { assert_eq!(documents[0], *original); } +fn roundtrip_multiline(original: &Yaml) { + let mut emitted = String::new(); + let mut emitter = YamlEmitter::new(&mut emitted); + emitter.multiline_strings(true); + emitter.dump(original).unwrap(); + + let documents = Yaml::load_from_str(&emitted).unwrap(); + println!("emitted {emitted}"); + + assert_eq!(documents.len(), 1); + assert_eq!(documents[0], *original); +} + fn double_roundtrip(original: &str) { let parsed = Yaml::load_from_str(original).unwrap(); @@ -75,3 +88,27 @@ fn test_crlf() { let y = Yaml::Array(vec![Yaml::String("\r\n".to_owned())]); roundtrip(&y); } + +#[test] +fn test_multiline_noline() { + let y = Yaml::Array(vec![Yaml::String("a".to_owned())]); + roundtrip_multiline(&y); +} + +#[test] +fn test_multiline_inner_newline() { + let y = Yaml::Array(vec![Yaml::String("a\nb".to_owned())]); + roundtrip_multiline(&y); +} + +#[test] +fn test_multiline_trailing_newline() { + let y = Yaml::Array(vec![Yaml::String("a\n".to_owned())]); + roundtrip_multiline(&y); +} + +#[test] +fn test_multiline_leading_newline() { + let y = Yaml::Array(vec![Yaml::String("\na".to_owned())]); + roundtrip_multiline(&y); +} From fd5a606b1961c2ee36696204573950d8c6743b7f Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Wed, 25 Sep 2024 16:31:18 +0200 Subject: [PATCH 379/380] Make `LoadError` `Clone`. Fixes #11 --- saphyr/src/loader.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/saphyr/src/loader.rs b/saphyr/src/loader.rs index 5f53492..ab1a94c 100644 --- a/saphyr/src/loader.rs +++ b/saphyr/src/loader.rs @@ -1,6 +1,6 @@ //! The default loader. -use std::collections::BTreeMap; +use std::{collections::BTreeMap, sync::Arc}; use hashlink::LinkedHashMap; use saphyr_parser::{Event, MarkedEventReceiver, Marker, ScanError, TScalarStyle, Tag}; @@ -172,10 +172,10 @@ where } /// An error that happened when loading a YAML document. -#[derive(Debug)] +#[derive(Debug, Clone)] pub enum LoadError { /// An I/O error. - IO(std::io::Error), + IO(Arc), /// An error within the scanner. This indicates a malformed YAML input. Scan(ScanError), /// A decoding error (e.g.: Invalid UTF-8). @@ -184,7 +184,7 @@ pub enum LoadError { impl From for LoadError { fn from(error: std::io::Error) -> Self { - LoadError::IO(error) + LoadError::IO(Arc::new(error)) } } From 5be327f85507a32b2e454a0c85e7176795424585 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Wed, 25 Sep 2024 16:32:56 +0200 Subject: [PATCH 380/380] Add changelog entry for last commit. --- saphyr/CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/saphyr/CHANGELOG.md b/saphyr/CHANGELOG.md index e87aa0a..a3ee6bc 100644 --- a/saphyr/CHANGELOG.md +++ b/saphyr/CHANGELOG.md @@ -7,6 +7,8 @@ a generic parameter. Moving those functions out of it spares having to manually specify the generic in `YamlLoader::::load_from_str`. Manipulating the `YamlLoader` directly was not common. +- Make `LoadError` `Clone` by storing an `Arc` instead of the + error directly. **Features**: