Add partial anchor support

This commit is contained in:
Yuheng Chen 2015-05-29 01:56:03 +08:00
parent 169ec43039
commit 3d7a97ad32
6 changed files with 293 additions and 50 deletions

11
saphyr/Readme.md Normal file
View file

@ -0,0 +1,11 @@
# yaml-rust
The missing Rust implementation for YAML 1.2.
## Specification Compliance
### Missing Feature
* Tag directive
* Tag data type are ignored
* Alias & Anchor

View file

@ -36,18 +36,21 @@ pub enum Event {
StreamEnd,
DocumentStart,
DocumentEnd,
Alias,
Scalar(String, TScalarStyle),
SequenceStart,
// anchor_id
Alias(usize),
Scalar(String, TScalarStyle, usize),
// anchor_id
SequenceStart(usize),
SequenceEnd,
MappingStart,
// anchor_id
MappingStart(usize),
MappingEnd
}
impl Event {
fn empty_scalar() -> Event {
// a null scalar
Event::Scalar("~".to_string(), TScalarStyle::Plain)
Event::Scalar("~".to_string(), TScalarStyle::Plain, 0)
}
}
@ -111,7 +114,7 @@ impl<T: Iterator<Item=char>> Parser<T> {
return Ok(Event::StreamEnd);
}
let ev = try!(self.state_machine());
// println!("EV {:?}", ev);
println!("EV {:?}", ev);
recv.on_event(&ev);
Ok(ev)
}
@ -159,14 +162,16 @@ impl<T: Iterator<Item=char>> Parser<T> {
fn load_node<R: EventReceiver>(&mut self, first_ev: &Event, recv: &mut R)
-> Result<(), ScanError> {
match *first_ev {
Event::Alias => { unimplemented!() },
Event::Scalar(_, _) => {
Event::Alias(..) => {
Ok(())
},
Event::SequenceStart => {
Event::Scalar(_, _, _) => {
Ok(())
},
Event::SequenceStart(_) => {
self.load_sequence(first_ev, recv)
},
Event::MappingStart => {
Event::MappingStart(_) => {
self.load_mapping(first_ev, recv)
},
_ => { println!("UNREACHABLE EVENT: {:?}", first_ev);
@ -366,34 +371,60 @@ impl<T: Iterator<Item=char>> Parser<T> {
}
fn parse_node(&mut self, block: bool, indentless_sequence: bool) -> ParseResult {
let tok = try!(self.peek());
let mut tok = try!(self.peek());
let anchor_id = 0;
match tok.1 {
TokenType::AliasToken(v) => {
self.pop_state();
self.skip();
// TODO(chenyh): find anchor id
return Ok(Event::Alias(0));
},
TokenType::AnchorToken(..) => {
self.skip();
tok = try!(self.peek());
if let TokenType::TagToken(_, _) = tok.1 {
self.skip();
tok = try!(self.peek());
}
},
TokenType::TagToken(..) => {
// XXX: ex 7.2, an empty scalar can follow a secondary tag
// but we haven't implemented it
self.skip();
tok = try!(self.peek());
if let TokenType::AnchorToken(_) = tok.1 {
self.skip();
tok = try!(self.peek());
}
},
_ => {}
}
match tok.1 {
TokenType::AliasToken => unimplemented!(),
TokenType::AnchorToken => unimplemented!(),
TokenType::BlockEntryToken if indentless_sequence => {
self.state = State::IndentlessSequenceEntry;
Ok(Event::SequenceStart)
Ok(Event::SequenceStart(anchor_id))
},
TokenType::ScalarToken(style, v) => {
self.pop_state();
self.skip();
Ok(Event::Scalar(v, style))
Ok(Event::Scalar(v, style, anchor_id))
},
TokenType::FlowSequenceStartToken => {
self.state = State::FlowSequenceFirstEntry;
Ok(Event::SequenceStart)
Ok(Event::SequenceStart(anchor_id))
},
TokenType::FlowMappingStartToken => {
self.state = State::FlowMappingFirstKey;
Ok(Event::MappingStart)
Ok(Event::MappingStart(anchor_id))
},
TokenType::BlockSequenceStartToken if block => {
self.state = State::BlockSequenceFirstEntry;
Ok(Event::SequenceStart)
Ok(Event::SequenceStart(anchor_id))
},
TokenType::BlockMappingStartToken if block => {
self.state = State::BlockMappingFirstKey;
Ok(Event::MappingStart)
Ok(Event::MappingStart(anchor_id))
},
_ => { Err(ScanError::new(tok.0, "while parsing a node, did not find expected node content")) }
}
@ -574,7 +605,7 @@ impl<T: Iterator<Item=char>> Parser<T> {
TokenType::KeyToken => {
self.state = State::FlowSequenceEntryMappingKey;
self.skip();
Ok(Event::MappingStart)
Ok(Event::MappingStart(0))
}
_ => {
self.push_state(State::FlowSequenceEntry);

View file

@ -70,9 +70,10 @@ pub enum TokenType {
FlowEntryToken,
KeyToken,
ValueToken,
AliasToken,
AnchorToken,
TagToken,
AliasToken(String),
AnchorToken(String),
// handle, suffix
TagToken(String, String),
ScalarToken(TScalarStyle, String)
}
@ -158,6 +159,14 @@ fn is_digit(c: char) -> bool {
c >= '0' && c <= '9'
}
#[inline]
fn is_alpha(c: char) -> bool {
match c {
'0'...'9' | 'a'...'z' | 'A'...'Z' => true,
'_' | '-' => true,
_ => false
}
}
#[inline]
fn is_hex(c: char) -> bool {
(c >= '0' && c <= '9')
|| (c >= 'a' && c <= 'f')
@ -335,32 +344,32 @@ impl<T: Iterator<Item=char>> Scanner<T> {
let c = self.buffer[0];
let nc = self.buffer[1];
match c {
'[' => try!(self.fetch_flow_collection_start(TokenType::FlowSequenceStartToken)),
'{' => try!(self.fetch_flow_collection_start(TokenType::FlowMappingStartToken)),
']' => try!(self.fetch_flow_collection_end(TokenType::FlowSequenceEndToken)),
'}' => try!(self.fetch_flow_collection_end(TokenType::FlowMappingEndToken)),
',' => try!(self.fetch_flow_entry()),
'-' if is_blankz(nc) => try!(self.fetch_block_entry()),
'?' if self.flow_level > 0 || is_blankz(nc) => try!(self.fetch_key()),
':' if self.flow_level > 0 || is_blankz(nc) => try!(self.fetch_value()),
'*' => unimplemented!(),
'&' => unimplemented!(),
'!' => unimplemented!(),
'[' => self.fetch_flow_collection_start(TokenType::FlowSequenceStartToken),
'{' => self.fetch_flow_collection_start(TokenType::FlowMappingStartToken),
']' => self.fetch_flow_collection_end(TokenType::FlowSequenceEndToken),
'}' => self.fetch_flow_collection_end(TokenType::FlowMappingEndToken),
',' => self.fetch_flow_entry(),
'-' if is_blankz(nc) => self.fetch_block_entry(),
'?' if self.flow_level > 0 || is_blankz(nc) => self.fetch_key(),
':' if self.flow_level > 0 || is_blankz(nc) => self.fetch_value(),
// Is it an alias?
'*' => self.fetch_anchor(true),
// Is it an anchor?
'&' => self.fetch_anchor(false),
'!' => self.fetch_tag(),
// Is it a literal scalar?
'|' if self.flow_level == 0 => try!(self.fetch_block_scalar(true)),
'|' if self.flow_level == 0 => self.fetch_block_scalar(true),
// Is it a folded scalar?
'>' if self.flow_level == 0 => try!(self.fetch_block_scalar(false)),
'\'' => try!(self.fetch_flow_scalar(true)),
'"' => try!(self.fetch_flow_scalar(false)),
'>' if self.flow_level == 0 => self.fetch_block_scalar(false),
'\'' => self.fetch_flow_scalar(true),
'"' => self.fetch_flow_scalar(false),
// plain scalar
'-' if !is_blankz(nc) => try!(self.fetch_plain_scalar()),
':' | '?' if !is_blankz(nc) && self.flow_level == 0 => try!(self.fetch_plain_scalar()),
'-' if !is_blankz(nc) => self.fetch_plain_scalar(),
':' | '?' if !is_blankz(nc) && self.flow_level == 0 => self.fetch_plain_scalar(),
'%' | '@' | '`' => return Err(ScanError::new(self.mark,
&format!("unexpected character: `{}'", c))),
_ => try!(self.fetch_plain_scalar()),
_ => self.fetch_plain_scalar(),
}
Ok(())
}
pub fn next_token(&mut self) -> Result<Option<Token>, ScanError> {
@ -545,7 +554,7 @@ impl<T: Iterator<Item=char>> Scanner<T> {
let start_mark = self.mark;
let mut string = String::new();
self.lookahead(1);
while self.ch().is_alphabetic() {
while is_alpha(self.ch()) {
string.push(self.ch());
self.skip();
self.lookahead(1);
@ -591,6 +600,187 @@ impl<T: Iterator<Item=char>> Scanner<T> {
unimplemented!();
}
fn fetch_tag(&mut self) -> ScanResult {
try!(self.save_simple_key());
self.disallow_simple_key();
let tok = try!(self.scan_tag());
self.tokens.push_back(tok);
Ok(())
}
fn scan_tag(&mut self) -> Result<Token, ScanError> {
let start_mark = self.mark;
let mut handle = String::new();
let mut suffix = String::new();
let mut secondary = false;
// Check if the tag is in the canonical form (verbatim).
self.lookahead(2);
if self.buffer[1] == '<' {
// Eat '!<'
self.skip();
self.skip();
suffix = try!(self.scan_tag_uri(false, false, &String::new(), &start_mark));
if self.ch() != '>' {
return Err(ScanError::new(start_mark,
"while scanning a tag, did not find the expected '>'"));
}
self.skip();
} else {
// The tag has either the '!suffix' or the '!handle!suffix'
handle = try!(self.scan_tag_handle(false, &start_mark));
// Check if it is, indeed, handle.
if handle.len() >= 2 && handle.starts_with('!') && handle.ends_with('!') {
if handle == "!!" {
secondary = true;
}
suffix = try!(self.scan_tag_uri(false, secondary, &String::new(), &start_mark));
} else {
suffix = try!(self.scan_tag_uri(false, false, &handle, &start_mark));
handle = "!".to_string();
// A special case: the '!' tag. Set the handle to '' and the
// suffix to '!'.
if suffix.len() == 0 {
handle.clear();
suffix = "!".to_string();
}
}
}
self.lookahead(1);
if is_blankz(self.ch()) {
// XXX: ex 7.2, an empty scalar can follow a secondary tag
Ok(Token(start_mark, TokenType::TagToken(handle, suffix)))
} else {
Err(ScanError::new(start_mark,
"while scanning a tag, did not find expected whitespace or line break"))
}
}
fn scan_tag_handle(&mut self, directive: bool, mark: &Marker) -> Result<String, ScanError> {
let mut string = String::new();
self.lookahead(1);
if self.ch() != '!' {
return Err(ScanError::new(*mark,
"while scanning a tag, did not find expected '!'"));
}
string.push(self.ch());
self.skip();
self.lookahead(1);
while is_alpha(self.ch()) {
string.push(self.ch());
self.skip();
self.lookahead(1);
}
// Check if the trailing character is '!' and copy it.
if self.ch() == '!' {
string.push(self.ch());
self.skip();
} else {
// It's either the '!' tag or not really a tag handle. If it's a %TAG
// directive, it's an error. If it's a tag token, it must be a part of
// URI.
if directive && string != "!" {
return Err(ScanError::new(*mark,
"while parsing a tag directive, did not find expected '!'"));
}
}
Ok(string)
}
fn scan_tag_uri(&mut self, directive: bool, is_secondary: bool,
head: &String, mark: &Marker) -> Result<String, ScanError> {
let mut length = head.len();
let mut string = String::new();
// Copy the head if needed.
// Note that we don't copy the leading '!' character.
if length > 1 {
string.extend(head.chars().skip(1));
}
self.lookahead(1);
/*
* The set of characters that may appear in URI is as follows:
*
* '0'-'9', 'A'-'Z', 'a'-'z', '_', '-', ';', '/', '?', ':', '@', '&',
* '=', '+', '$', ',', '.', '!', '~', '*', '\'', '(', ')', '[', ']',
* '%'.
*/
while match self.ch() {
';' | '/' | '?' | ':' | '@' | '&' if !is_secondary => true,
'=' | '+' | '$' | ',' | '.' | '!' | '~' | '*' | '\'' | '(' | ')' | '[' | ']' if !is_secondary => true,
'%' => true,
c if is_alpha(c) => true,
_ => false
} {
// Check if it is a URI-escape sequence.
if self.ch() == '%' {
unimplemented!();
} else {
string.push(self.ch());
self.skip();
}
length += 1;
self.lookahead(1);
}
if length == 0 {
return Err(ScanError::new(*mark,
"while parsing a tag, did not find expected tag URI"));
}
Ok(string)
}
fn fetch_anchor(&mut self, alias: bool) -> ScanResult {
try!(self.save_simple_key());
self.disallow_simple_key();
let tok = try!(self.scan_anchor(alias));
self.tokens.push_back(tok);
Ok(())
}
fn scan_anchor(&mut self, alias: bool)
-> Result<Token, ScanError> {
let mut string = String::new();
let start_mark = self.mark;
self.skip();
self.lookahead(1);
while is_alpha(self.ch()) {
string.push(self.ch());
self.skip();
self.lookahead(1);
}
if string.is_empty()
|| match self.ch() {
c if is_blankz(c) => false,
'?' | ':' | ',' | ']' | '}' | '%' | '@' | '`' => false,
_ => true
} {
return Err(ScanError::new(start_mark, "while scanning an anchor or alias, did not find expected alphabetic or numeric character"));
}
if alias {
Ok(Token(start_mark, TokenType::AliasToken(string)))
} else {
Ok(Token(start_mark, TokenType::AnchorToken(string)))
}
}
fn fetch_flow_collection_start(&mut self, tok :TokenType) -> ScanResult {
// The indicators '[' and '{' may start a simple key.
try!(self.save_simple_key());

View file

@ -14,6 +14,7 @@ pub enum Yaml {
Boolean(bool),
Array(self::Array),
Hash(self::Hash),
Alias(usize),
Null,
/// Access non-exist node by Index trait will return BadValue.
/// This simplifies error handling of user.
@ -45,14 +46,14 @@ impl EventReceiver for YamlLoader {
_ => unreachable!()
}
},
Event::SequenceStart => {
Event::SequenceStart(_) => {
self.doc_stack.push(Yaml::Array(Vec::new()));
},
Event::SequenceEnd => {
let node = self.doc_stack.pop().unwrap();
self.insert_new_node(node);
},
Event::MappingStart => {
Event::MappingStart(_) => {
self.doc_stack.push(Yaml::Hash(Hash::new()));
self.key_stack.push(Yaml::BadValue);
},
@ -61,7 +62,7 @@ impl EventReceiver for YamlLoader {
let node = self.doc_stack.pop().unwrap();
self.insert_new_node(node);
},
Event::Scalar(ref v, style) => {
Event::Scalar(ref v, style, _) => {
let node = if style != TScalarStyle::Plain {
Yaml::String(v.clone())
} else {
@ -77,6 +78,10 @@ impl EventReceiver for YamlLoader {
self.insert_new_node(node);
},
Event::Alias(id) => {
// XXX(chenyh): how to handle alias?
self.insert_new_node(Yaml::Alias(id));
}
_ => { /* ignore */ }
}
// println!("DOC {:?}", self.doc_stack);

View file

@ -27,17 +27,18 @@ impl EventReceiver for YamlChecker {
let tev = match *ev {
Event::DocumentStart => TestEvent::OnDocumentStart,
Event::DocumentEnd => TestEvent::OnDocumentEnd,
Event::SequenceStart => TestEvent::OnSequenceStart,
Event::SequenceStart(..) => TestEvent::OnSequenceStart,
Event::SequenceEnd => TestEvent::OnSequenceEnd,
Event::MappingStart => TestEvent::OnMapStart,
Event::MappingStart(..) => TestEvent::OnMapStart,
Event::MappingEnd => TestEvent::OnMapEnd,
Event::Scalar(ref v, style) => {
Event::Scalar(ref v, style, _) => {
if v == "~" && style == TScalarStyle::Plain {
TestEvent::OnNull
} else {
TestEvent::OnScalar
}
},
Event::Alias(_) => TestEvent::OnAlias,
_ => { return } // ignore other events
};
self.evs.push(tev);

View file

@ -0,0 +1,5 @@
# ex 7.2
{
foo : !!str,
!!str : bar,
}