diff --git a/saphyr/Readme.md b/saphyr/Readme.md new file mode 100644 index 0000000..fb9de9f --- /dev/null +++ b/saphyr/Readme.md @@ -0,0 +1,11 @@ +# yaml-rust + +The missing Rust implementation for YAML 1.2. + +## Specification Compliance + +### Missing Feature + +* Tag directive +* Tag data type are ignored +* Alias & Anchor diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs index 1108e2d..5608d63 100644 --- a/saphyr/src/parser.rs +++ b/saphyr/src/parser.rs @@ -36,18 +36,21 @@ pub enum Event { StreamEnd, DocumentStart, DocumentEnd, - Alias, - Scalar(String, TScalarStyle), - SequenceStart, + // anchor_id + Alias(usize), + Scalar(String, TScalarStyle, usize), + // anchor_id + SequenceStart(usize), SequenceEnd, - MappingStart, + // anchor_id + MappingStart(usize), MappingEnd } impl Event { fn empty_scalar() -> Event { // a null scalar - Event::Scalar("~".to_string(), TScalarStyle::Plain) + Event::Scalar("~".to_string(), TScalarStyle::Plain, 0) } } @@ -111,7 +114,7 @@ impl> Parser { return Ok(Event::StreamEnd); } let ev = try!(self.state_machine()); - // println!("EV {:?}", ev); + println!("EV {:?}", ev); recv.on_event(&ev); Ok(ev) } @@ -159,14 +162,16 @@ impl> Parser { fn load_node(&mut self, first_ev: &Event, recv: &mut R) -> Result<(), ScanError> { match *first_ev { - Event::Alias => { unimplemented!() }, - Event::Scalar(_, _) => { + Event::Alias(..) => { Ok(()) }, - Event::SequenceStart => { + Event::Scalar(_, _, _) => { + Ok(()) + }, + Event::SequenceStart(_) => { self.load_sequence(first_ev, recv) }, - Event::MappingStart => { + Event::MappingStart(_) => { self.load_mapping(first_ev, recv) }, _ => { println!("UNREACHABLE EVENT: {:?}", first_ev); @@ -366,34 +371,60 @@ impl> Parser { } fn parse_node(&mut self, block: bool, indentless_sequence: bool) -> ParseResult { - let tok = try!(self.peek()); + let mut tok = try!(self.peek()); + let anchor_id = 0; + match tok.1 { + TokenType::AliasToken(v) => { + self.pop_state(); + self.skip(); + // TODO(chenyh): find anchor id + return Ok(Event::Alias(0)); + }, + TokenType::AnchorToken(..) => { + self.skip(); + tok = try!(self.peek()); + if let TokenType::TagToken(_, _) = tok.1 { + self.skip(); + tok = try!(self.peek()); + } + }, + TokenType::TagToken(..) => { + // XXX: ex 7.2, an empty scalar can follow a secondary tag + // but we haven't implemented it + self.skip(); + tok = try!(self.peek()); + if let TokenType::AnchorToken(_) = tok.1 { + self.skip(); + tok = try!(self.peek()); + } + }, + _ => {} + } match tok.1 { - TokenType::AliasToken => unimplemented!(), - TokenType::AnchorToken => unimplemented!(), TokenType::BlockEntryToken if indentless_sequence => { self.state = State::IndentlessSequenceEntry; - Ok(Event::SequenceStart) + Ok(Event::SequenceStart(anchor_id)) }, TokenType::ScalarToken(style, v) => { self.pop_state(); self.skip(); - Ok(Event::Scalar(v, style)) + Ok(Event::Scalar(v, style, anchor_id)) }, TokenType::FlowSequenceStartToken => { self.state = State::FlowSequenceFirstEntry; - Ok(Event::SequenceStart) + Ok(Event::SequenceStart(anchor_id)) }, TokenType::FlowMappingStartToken => { self.state = State::FlowMappingFirstKey; - Ok(Event::MappingStart) + Ok(Event::MappingStart(anchor_id)) }, TokenType::BlockSequenceStartToken if block => { self.state = State::BlockSequenceFirstEntry; - Ok(Event::SequenceStart) + Ok(Event::SequenceStart(anchor_id)) }, TokenType::BlockMappingStartToken if block => { self.state = State::BlockMappingFirstKey; - Ok(Event::MappingStart) + Ok(Event::MappingStart(anchor_id)) }, _ => { Err(ScanError::new(tok.0, "while parsing a node, did not find expected node content")) } } @@ -574,7 +605,7 @@ impl> Parser { TokenType::KeyToken => { self.state = State::FlowSequenceEntryMappingKey; self.skip(); - Ok(Event::MappingStart) + Ok(Event::MappingStart(0)) } _ => { self.push_state(State::FlowSequenceEntry); diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index 734c7f8..ddec711 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -70,9 +70,10 @@ pub enum TokenType { FlowEntryToken, KeyToken, ValueToken, - AliasToken, - AnchorToken, - TagToken, + AliasToken(String), + AnchorToken(String), + // handle, suffix + TagToken(String, String), ScalarToken(TScalarStyle, String) } @@ -158,6 +159,14 @@ fn is_digit(c: char) -> bool { c >= '0' && c <= '9' } #[inline] +fn is_alpha(c: char) -> bool { + match c { + '0'...'9' | 'a'...'z' | 'A'...'Z' => true, + '_' | '-' => true, + _ => false + } +} +#[inline] fn is_hex(c: char) -> bool { (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') @@ -335,32 +344,32 @@ impl> Scanner { let c = self.buffer[0]; let nc = self.buffer[1]; match c { - '[' => try!(self.fetch_flow_collection_start(TokenType::FlowSequenceStartToken)), - '{' => try!(self.fetch_flow_collection_start(TokenType::FlowMappingStartToken)), - ']' => try!(self.fetch_flow_collection_end(TokenType::FlowSequenceEndToken)), - '}' => try!(self.fetch_flow_collection_end(TokenType::FlowMappingEndToken)), - ',' => try!(self.fetch_flow_entry()), - '-' if is_blankz(nc) => try!(self.fetch_block_entry()), - '?' if self.flow_level > 0 || is_blankz(nc) => try!(self.fetch_key()), - ':' if self.flow_level > 0 || is_blankz(nc) => try!(self.fetch_value()), - '*' => unimplemented!(), - '&' => unimplemented!(), - '!' => unimplemented!(), + '[' => self.fetch_flow_collection_start(TokenType::FlowSequenceStartToken), + '{' => self.fetch_flow_collection_start(TokenType::FlowMappingStartToken), + ']' => self.fetch_flow_collection_end(TokenType::FlowSequenceEndToken), + '}' => self.fetch_flow_collection_end(TokenType::FlowMappingEndToken), + ',' => self.fetch_flow_entry(), + '-' if is_blankz(nc) => self.fetch_block_entry(), + '?' if self.flow_level > 0 || is_blankz(nc) => self.fetch_key(), + ':' if self.flow_level > 0 || is_blankz(nc) => self.fetch_value(), + // Is it an alias? + '*' => self.fetch_anchor(true), + // Is it an anchor? + '&' => self.fetch_anchor(false), + '!' => self.fetch_tag(), // Is it a literal scalar? - '|' if self.flow_level == 0 => try!(self.fetch_block_scalar(true)), + '|' if self.flow_level == 0 => self.fetch_block_scalar(true), // Is it a folded scalar? - '>' if self.flow_level == 0 => try!(self.fetch_block_scalar(false)), - '\'' => try!(self.fetch_flow_scalar(true)), - '"' => try!(self.fetch_flow_scalar(false)), + '>' if self.flow_level == 0 => self.fetch_block_scalar(false), + '\'' => self.fetch_flow_scalar(true), + '"' => self.fetch_flow_scalar(false), // plain scalar - '-' if !is_blankz(nc) => try!(self.fetch_plain_scalar()), - ':' | '?' if !is_blankz(nc) && self.flow_level == 0 => try!(self.fetch_plain_scalar()), + '-' if !is_blankz(nc) => self.fetch_plain_scalar(), + ':' | '?' if !is_blankz(nc) && self.flow_level == 0 => self.fetch_plain_scalar(), '%' | '@' | '`' => return Err(ScanError::new(self.mark, &format!("unexpected character: `{}'", c))), - _ => try!(self.fetch_plain_scalar()), + _ => self.fetch_plain_scalar(), } - - Ok(()) } pub fn next_token(&mut self) -> Result, ScanError> { @@ -545,7 +554,7 @@ impl> Scanner { let start_mark = self.mark; let mut string = String::new(); self.lookahead(1); - while self.ch().is_alphabetic() { + while is_alpha(self.ch()) { string.push(self.ch()); self.skip(); self.lookahead(1); @@ -591,6 +600,187 @@ impl> Scanner { unimplemented!(); } + fn fetch_tag(&mut self) -> ScanResult { + try!(self.save_simple_key()); + self.disallow_simple_key(); + + let tok = try!(self.scan_tag()); + self.tokens.push_back(tok); + Ok(()) + } + + fn scan_tag(&mut self) -> Result { + let start_mark = self.mark; + let mut handle = String::new(); + let mut suffix = String::new(); + let mut secondary = false; + + // Check if the tag is in the canonical form (verbatim). + self.lookahead(2); + + if self.buffer[1] == '<' { + // Eat '!<' + self.skip(); + self.skip(); + suffix = try!(self.scan_tag_uri(false, false, &String::new(), &start_mark)); + + if self.ch() != '>' { + return Err(ScanError::new(start_mark, + "while scanning a tag, did not find the expected '>'")); + } + + self.skip(); + } else { + // The tag has either the '!suffix' or the '!handle!suffix' + handle = try!(self.scan_tag_handle(false, &start_mark)); + // Check if it is, indeed, handle. + if handle.len() >= 2 && handle.starts_with('!') && handle.ends_with('!') { + if handle == "!!" { + secondary = true; + } + suffix = try!(self.scan_tag_uri(false, secondary, &String::new(), &start_mark)); + } else { + suffix = try!(self.scan_tag_uri(false, false, &handle, &start_mark)); + handle = "!".to_string(); + // A special case: the '!' tag. Set the handle to '' and the + // suffix to '!'. + if suffix.len() == 0 { + handle.clear(); + suffix = "!".to_string(); + } + } + } + + self.lookahead(1); + if is_blankz(self.ch()) { + // XXX: ex 7.2, an empty scalar can follow a secondary tag + Ok(Token(start_mark, TokenType::TagToken(handle, suffix))) + } else { + Err(ScanError::new(start_mark, + "while scanning a tag, did not find expected whitespace or line break")) + } + } + + fn scan_tag_handle(&mut self, directive: bool, mark: &Marker) -> Result { + let mut string = String::new(); + self.lookahead(1); + if self.ch() != '!' { + return Err(ScanError::new(*mark, + "while scanning a tag, did not find expected '!'")); + } + + string.push(self.ch()); + self.skip(); + + self.lookahead(1); + while is_alpha(self.ch()) { + string.push(self.ch()); + self.skip(); + self.lookahead(1); + } + + // Check if the trailing character is '!' and copy it. + if self.ch() == '!' { + string.push(self.ch()); + self.skip(); + } else { + // It's either the '!' tag or not really a tag handle. If it's a %TAG + // directive, it's an error. If it's a tag token, it must be a part of + // URI. + if directive && string != "!" { + return Err(ScanError::new(*mark, + "while parsing a tag directive, did not find expected '!'")); + } + } + Ok(string) + } + + fn scan_tag_uri(&mut self, directive: bool, is_secondary: bool, + head: &String, mark: &Marker) -> Result { + let mut length = head.len(); + let mut string = String::new(); + + // Copy the head if needed. + // Note that we don't copy the leading '!' character. + if length > 1 { + string.extend(head.chars().skip(1)); + } + + self.lookahead(1); + /* + * The set of characters that may appear in URI is as follows: + * + * '0'-'9', 'A'-'Z', 'a'-'z', '_', '-', ';', '/', '?', ':', '@', '&', + * '=', '+', '$', ',', '.', '!', '~', '*', '\'', '(', ')', '[', ']', + * '%'. + */ + while match self.ch() { + ';' | '/' | '?' | ':' | '@' | '&' if !is_secondary => true, + '=' | '+' | '$' | ',' | '.' | '!' | '~' | '*' | '\'' | '(' | ')' | '[' | ']' if !is_secondary => true, + '%' => true, + c if is_alpha(c) => true, + _ => false + } { + // Check if it is a URI-escape sequence. + if self.ch() == '%' { + unimplemented!(); + } else { + string.push(self.ch()); + self.skip(); + } + + length += 1; + self.lookahead(1); + } + + if length == 0 { + return Err(ScanError::new(*mark, + "while parsing a tag, did not find expected tag URI")); + } + + Ok(string) + } + + fn fetch_anchor(&mut self, alias: bool) -> ScanResult { + try!(self.save_simple_key()); + self.disallow_simple_key(); + + let tok = try!(self.scan_anchor(alias)); + + self.tokens.push_back(tok); + + Ok(()) + } + + fn scan_anchor(&mut self, alias: bool) + -> Result { + let mut string = String::new(); + let start_mark = self.mark; + + self.skip(); + self.lookahead(1); + while is_alpha(self.ch()) { + string.push(self.ch()); + self.skip(); + self.lookahead(1); + } + + if string.is_empty() + || match self.ch() { + c if is_blankz(c) => false, + '?' | ':' | ',' | ']' | '}' | '%' | '@' | '`' => false, + _ => true + } { + return Err(ScanError::new(start_mark, "while scanning an anchor or alias, did not find expected alphabetic or numeric character")); + } + + if alias { + Ok(Token(start_mark, TokenType::AliasToken(string))) + } else { + Ok(Token(start_mark, TokenType::AnchorToken(string))) + } + } + fn fetch_flow_collection_start(&mut self, tok :TokenType) -> ScanResult { // The indicators '[' and '{' may start a simple key. try!(self.save_simple_key()); diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index 33522b2..585feb9 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -14,6 +14,7 @@ pub enum Yaml { Boolean(bool), Array(self::Array), Hash(self::Hash), + Alias(usize), Null, /// Access non-exist node by Index trait will return BadValue. /// This simplifies error handling of user. @@ -45,14 +46,14 @@ impl EventReceiver for YamlLoader { _ => unreachable!() } }, - Event::SequenceStart => { + Event::SequenceStart(_) => { self.doc_stack.push(Yaml::Array(Vec::new())); }, Event::SequenceEnd => { let node = self.doc_stack.pop().unwrap(); self.insert_new_node(node); }, - Event::MappingStart => { + Event::MappingStart(_) => { self.doc_stack.push(Yaml::Hash(Hash::new())); self.key_stack.push(Yaml::BadValue); }, @@ -61,7 +62,7 @@ impl EventReceiver for YamlLoader { let node = self.doc_stack.pop().unwrap(); self.insert_new_node(node); }, - Event::Scalar(ref v, style) => { + Event::Scalar(ref v, style, _) => { let node = if style != TScalarStyle::Plain { Yaml::String(v.clone()) } else { @@ -77,6 +78,10 @@ impl EventReceiver for YamlLoader { self.insert_new_node(node); }, + Event::Alias(id) => { + // XXX(chenyh): how to handle alias? + self.insert_new_node(Yaml::Alias(id)); + } _ => { /* ignore */ } } // println!("DOC {:?}", self.doc_stack); diff --git a/saphyr/tests/spec_test.rs b/saphyr/tests/spec_test.rs index a796403..f6342a3 100644 --- a/saphyr/tests/spec_test.rs +++ b/saphyr/tests/spec_test.rs @@ -27,17 +27,18 @@ impl EventReceiver for YamlChecker { let tev = match *ev { Event::DocumentStart => TestEvent::OnDocumentStart, Event::DocumentEnd => TestEvent::OnDocumentEnd, - Event::SequenceStart => TestEvent::OnSequenceStart, + Event::SequenceStart(..) => TestEvent::OnSequenceStart, Event::SequenceEnd => TestEvent::OnSequenceEnd, - Event::MappingStart => TestEvent::OnMapStart, + Event::MappingStart(..) => TestEvent::OnMapStart, Event::MappingEnd => TestEvent::OnMapEnd, - Event::Scalar(ref v, style) => { + Event::Scalar(ref v, style, _) => { if v == "~" && style == TScalarStyle::Plain { TestEvent::OnNull } else { TestEvent::OnScalar } }, + Event::Alias(_) => TestEvent::OnAlias, _ => { return } // ignore other events }; self.evs.push(tev); diff --git a/saphyr/tests/specs/libyaml_fail-03.yaml b/saphyr/tests/specs/libyaml_fail-03.yaml new file mode 100644 index 0000000..fc821dc --- /dev/null +++ b/saphyr/tests/specs/libyaml_fail-03.yaml @@ -0,0 +1,5 @@ +# ex 7.2 +{ + foo : !!str, + !!str : bar, +}