Add partial anchor support
This commit is contained in:
parent
5237e538eb
commit
5d0683c396
6 changed files with 293 additions and 50 deletions
11
parser/Readme.md
Normal file
11
parser/Readme.md
Normal file
|
@ -0,0 +1,11 @@
|
|||
# yaml-rust
|
||||
|
||||
The missing Rust implementation for YAML 1.2.
|
||||
|
||||
## Specification Compliance
|
||||
|
||||
### Missing Feature
|
||||
|
||||
* Tag directive
|
||||
* Tag data type are ignored
|
||||
* Alias & Anchor
|
|
@ -36,18 +36,21 @@ pub enum Event {
|
|||
StreamEnd,
|
||||
DocumentStart,
|
||||
DocumentEnd,
|
||||
Alias,
|
||||
Scalar(String, TScalarStyle),
|
||||
SequenceStart,
|
||||
// anchor_id
|
||||
Alias(usize),
|
||||
Scalar(String, TScalarStyle, usize),
|
||||
// anchor_id
|
||||
SequenceStart(usize),
|
||||
SequenceEnd,
|
||||
MappingStart,
|
||||
// anchor_id
|
||||
MappingStart(usize),
|
||||
MappingEnd
|
||||
}
|
||||
|
||||
impl Event {
|
||||
fn empty_scalar() -> Event {
|
||||
// a null scalar
|
||||
Event::Scalar("~".to_string(), TScalarStyle::Plain)
|
||||
Event::Scalar("~".to_string(), TScalarStyle::Plain, 0)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -111,7 +114,7 @@ impl<T: Iterator<Item=char>> Parser<T> {
|
|||
return Ok(Event::StreamEnd);
|
||||
}
|
||||
let ev = try!(self.state_machine());
|
||||
// println!("EV {:?}", ev);
|
||||
println!("EV {:?}", ev);
|
||||
recv.on_event(&ev);
|
||||
Ok(ev)
|
||||
}
|
||||
|
@ -159,14 +162,16 @@ impl<T: Iterator<Item=char>> Parser<T> {
|
|||
fn load_node<R: EventReceiver>(&mut self, first_ev: &Event, recv: &mut R)
|
||||
-> Result<(), ScanError> {
|
||||
match *first_ev {
|
||||
Event::Alias => { unimplemented!() },
|
||||
Event::Scalar(_, _) => {
|
||||
Event::Alias(..) => {
|
||||
Ok(())
|
||||
},
|
||||
Event::SequenceStart => {
|
||||
Event::Scalar(_, _, _) => {
|
||||
Ok(())
|
||||
},
|
||||
Event::SequenceStart(_) => {
|
||||
self.load_sequence(first_ev, recv)
|
||||
},
|
||||
Event::MappingStart => {
|
||||
Event::MappingStart(_) => {
|
||||
self.load_mapping(first_ev, recv)
|
||||
},
|
||||
_ => { println!("UNREACHABLE EVENT: {:?}", first_ev);
|
||||
|
@ -366,34 +371,60 @@ impl<T: Iterator<Item=char>> Parser<T> {
|
|||
}
|
||||
|
||||
fn parse_node(&mut self, block: bool, indentless_sequence: bool) -> ParseResult {
|
||||
let tok = try!(self.peek());
|
||||
let mut tok = try!(self.peek());
|
||||
let anchor_id = 0;
|
||||
match tok.1 {
|
||||
TokenType::AliasToken(v) => {
|
||||
self.pop_state();
|
||||
self.skip();
|
||||
// TODO(chenyh): find anchor id
|
||||
return Ok(Event::Alias(0));
|
||||
},
|
||||
TokenType::AnchorToken(..) => {
|
||||
self.skip();
|
||||
tok = try!(self.peek());
|
||||
if let TokenType::TagToken(_, _) = tok.1 {
|
||||
self.skip();
|
||||
tok = try!(self.peek());
|
||||
}
|
||||
},
|
||||
TokenType::TagToken(..) => {
|
||||
// XXX: ex 7.2, an empty scalar can follow a secondary tag
|
||||
// but we haven't implemented it
|
||||
self.skip();
|
||||
tok = try!(self.peek());
|
||||
if let TokenType::AnchorToken(_) = tok.1 {
|
||||
self.skip();
|
||||
tok = try!(self.peek());
|
||||
}
|
||||
},
|
||||
_ => {}
|
||||
}
|
||||
match tok.1 {
|
||||
TokenType::AliasToken => unimplemented!(),
|
||||
TokenType::AnchorToken => unimplemented!(),
|
||||
TokenType::BlockEntryToken if indentless_sequence => {
|
||||
self.state = State::IndentlessSequenceEntry;
|
||||
Ok(Event::SequenceStart)
|
||||
Ok(Event::SequenceStart(anchor_id))
|
||||
},
|
||||
TokenType::ScalarToken(style, v) => {
|
||||
self.pop_state();
|
||||
self.skip();
|
||||
Ok(Event::Scalar(v, style))
|
||||
Ok(Event::Scalar(v, style, anchor_id))
|
||||
},
|
||||
TokenType::FlowSequenceStartToken => {
|
||||
self.state = State::FlowSequenceFirstEntry;
|
||||
Ok(Event::SequenceStart)
|
||||
Ok(Event::SequenceStart(anchor_id))
|
||||
},
|
||||
TokenType::FlowMappingStartToken => {
|
||||
self.state = State::FlowMappingFirstKey;
|
||||
Ok(Event::MappingStart)
|
||||
Ok(Event::MappingStart(anchor_id))
|
||||
},
|
||||
TokenType::BlockSequenceStartToken if block => {
|
||||
self.state = State::BlockSequenceFirstEntry;
|
||||
Ok(Event::SequenceStart)
|
||||
Ok(Event::SequenceStart(anchor_id))
|
||||
},
|
||||
TokenType::BlockMappingStartToken if block => {
|
||||
self.state = State::BlockMappingFirstKey;
|
||||
Ok(Event::MappingStart)
|
||||
Ok(Event::MappingStart(anchor_id))
|
||||
},
|
||||
_ => { Err(ScanError::new(tok.0, "while parsing a node, did not find expected node content")) }
|
||||
}
|
||||
|
@ -574,7 +605,7 @@ impl<T: Iterator<Item=char>> Parser<T> {
|
|||
TokenType::KeyToken => {
|
||||
self.state = State::FlowSequenceEntryMappingKey;
|
||||
self.skip();
|
||||
Ok(Event::MappingStart)
|
||||
Ok(Event::MappingStart(0))
|
||||
}
|
||||
_ => {
|
||||
self.push_state(State::FlowSequenceEntry);
|
||||
|
|
|
@ -70,9 +70,10 @@ pub enum TokenType {
|
|||
FlowEntryToken,
|
||||
KeyToken,
|
||||
ValueToken,
|
||||
AliasToken,
|
||||
AnchorToken,
|
||||
TagToken,
|
||||
AliasToken(String),
|
||||
AnchorToken(String),
|
||||
// handle, suffix
|
||||
TagToken(String, String),
|
||||
ScalarToken(TScalarStyle, String)
|
||||
}
|
||||
|
||||
|
@ -158,6 +159,14 @@ fn is_digit(c: char) -> bool {
|
|||
c >= '0' && c <= '9'
|
||||
}
|
||||
#[inline]
|
||||
fn is_alpha(c: char) -> bool {
|
||||
match c {
|
||||
'0'...'9' | 'a'...'z' | 'A'...'Z' => true,
|
||||
'_' | '-' => true,
|
||||
_ => false
|
||||
}
|
||||
}
|
||||
#[inline]
|
||||
fn is_hex(c: char) -> bool {
|
||||
(c >= '0' && c <= '9')
|
||||
|| (c >= 'a' && c <= 'f')
|
||||
|
@ -335,32 +344,32 @@ impl<T: Iterator<Item=char>> Scanner<T> {
|
|||
let c = self.buffer[0];
|
||||
let nc = self.buffer[1];
|
||||
match c {
|
||||
'[' => try!(self.fetch_flow_collection_start(TokenType::FlowSequenceStartToken)),
|
||||
'{' => try!(self.fetch_flow_collection_start(TokenType::FlowMappingStartToken)),
|
||||
']' => try!(self.fetch_flow_collection_end(TokenType::FlowSequenceEndToken)),
|
||||
'}' => try!(self.fetch_flow_collection_end(TokenType::FlowMappingEndToken)),
|
||||
',' => try!(self.fetch_flow_entry()),
|
||||
'-' if is_blankz(nc) => try!(self.fetch_block_entry()),
|
||||
'?' if self.flow_level > 0 || is_blankz(nc) => try!(self.fetch_key()),
|
||||
':' if self.flow_level > 0 || is_blankz(nc) => try!(self.fetch_value()),
|
||||
'*' => unimplemented!(),
|
||||
'&' => unimplemented!(),
|
||||
'!' => unimplemented!(),
|
||||
'[' => self.fetch_flow_collection_start(TokenType::FlowSequenceStartToken),
|
||||
'{' => self.fetch_flow_collection_start(TokenType::FlowMappingStartToken),
|
||||
']' => self.fetch_flow_collection_end(TokenType::FlowSequenceEndToken),
|
||||
'}' => self.fetch_flow_collection_end(TokenType::FlowMappingEndToken),
|
||||
',' => self.fetch_flow_entry(),
|
||||
'-' if is_blankz(nc) => self.fetch_block_entry(),
|
||||
'?' if self.flow_level > 0 || is_blankz(nc) => self.fetch_key(),
|
||||
':' if self.flow_level > 0 || is_blankz(nc) => self.fetch_value(),
|
||||
// Is it an alias?
|
||||
'*' => self.fetch_anchor(true),
|
||||
// Is it an anchor?
|
||||
'&' => self.fetch_anchor(false),
|
||||
'!' => self.fetch_tag(),
|
||||
// Is it a literal scalar?
|
||||
'|' if self.flow_level == 0 => try!(self.fetch_block_scalar(true)),
|
||||
'|' if self.flow_level == 0 => self.fetch_block_scalar(true),
|
||||
// Is it a folded scalar?
|
||||
'>' if self.flow_level == 0 => try!(self.fetch_block_scalar(false)),
|
||||
'\'' => try!(self.fetch_flow_scalar(true)),
|
||||
'"' => try!(self.fetch_flow_scalar(false)),
|
||||
'>' if self.flow_level == 0 => self.fetch_block_scalar(false),
|
||||
'\'' => self.fetch_flow_scalar(true),
|
||||
'"' => self.fetch_flow_scalar(false),
|
||||
// plain scalar
|
||||
'-' if !is_blankz(nc) => try!(self.fetch_plain_scalar()),
|
||||
':' | '?' if !is_blankz(nc) && self.flow_level == 0 => try!(self.fetch_plain_scalar()),
|
||||
'-' if !is_blankz(nc) => self.fetch_plain_scalar(),
|
||||
':' | '?' if !is_blankz(nc) && self.flow_level == 0 => self.fetch_plain_scalar(),
|
||||
'%' | '@' | '`' => return Err(ScanError::new(self.mark,
|
||||
&format!("unexpected character: `{}'", c))),
|
||||
_ => try!(self.fetch_plain_scalar()),
|
||||
_ => self.fetch_plain_scalar(),
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn next_token(&mut self) -> Result<Option<Token>, ScanError> {
|
||||
|
@ -545,7 +554,7 @@ impl<T: Iterator<Item=char>> Scanner<T> {
|
|||
let start_mark = self.mark;
|
||||
let mut string = String::new();
|
||||
self.lookahead(1);
|
||||
while self.ch().is_alphabetic() {
|
||||
while is_alpha(self.ch()) {
|
||||
string.push(self.ch());
|
||||
self.skip();
|
||||
self.lookahead(1);
|
||||
|
@ -591,6 +600,187 @@ impl<T: Iterator<Item=char>> Scanner<T> {
|
|||
unimplemented!();
|
||||
}
|
||||
|
||||
fn fetch_tag(&mut self) -> ScanResult {
|
||||
try!(self.save_simple_key());
|
||||
self.disallow_simple_key();
|
||||
|
||||
let tok = try!(self.scan_tag());
|
||||
self.tokens.push_back(tok);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn scan_tag(&mut self) -> Result<Token, ScanError> {
|
||||
let start_mark = self.mark;
|
||||
let mut handle = String::new();
|
||||
let mut suffix = String::new();
|
||||
let mut secondary = false;
|
||||
|
||||
// Check if the tag is in the canonical form (verbatim).
|
||||
self.lookahead(2);
|
||||
|
||||
if self.buffer[1] == '<' {
|
||||
// Eat '!<'
|
||||
self.skip();
|
||||
self.skip();
|
||||
suffix = try!(self.scan_tag_uri(false, false, &String::new(), &start_mark));
|
||||
|
||||
if self.ch() != '>' {
|
||||
return Err(ScanError::new(start_mark,
|
||||
"while scanning a tag, did not find the expected '>'"));
|
||||
}
|
||||
|
||||
self.skip();
|
||||
} else {
|
||||
// The tag has either the '!suffix' or the '!handle!suffix'
|
||||
handle = try!(self.scan_tag_handle(false, &start_mark));
|
||||
// Check if it is, indeed, handle.
|
||||
if handle.len() >= 2 && handle.starts_with('!') && handle.ends_with('!') {
|
||||
if handle == "!!" {
|
||||
secondary = true;
|
||||
}
|
||||
suffix = try!(self.scan_tag_uri(false, secondary, &String::new(), &start_mark));
|
||||
} else {
|
||||
suffix = try!(self.scan_tag_uri(false, false, &handle, &start_mark));
|
||||
handle = "!".to_string();
|
||||
// A special case: the '!' tag. Set the handle to '' and the
|
||||
// suffix to '!'.
|
||||
if suffix.len() == 0 {
|
||||
handle.clear();
|
||||
suffix = "!".to_string();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
self.lookahead(1);
|
||||
if is_blankz(self.ch()) {
|
||||
// XXX: ex 7.2, an empty scalar can follow a secondary tag
|
||||
Ok(Token(start_mark, TokenType::TagToken(handle, suffix)))
|
||||
} else {
|
||||
Err(ScanError::new(start_mark,
|
||||
"while scanning a tag, did not find expected whitespace or line break"))
|
||||
}
|
||||
}
|
||||
|
||||
fn scan_tag_handle(&mut self, directive: bool, mark: &Marker) -> Result<String, ScanError> {
|
||||
let mut string = String::new();
|
||||
self.lookahead(1);
|
||||
if self.ch() != '!' {
|
||||
return Err(ScanError::new(*mark,
|
||||
"while scanning a tag, did not find expected '!'"));
|
||||
}
|
||||
|
||||
string.push(self.ch());
|
||||
self.skip();
|
||||
|
||||
self.lookahead(1);
|
||||
while is_alpha(self.ch()) {
|
||||
string.push(self.ch());
|
||||
self.skip();
|
||||
self.lookahead(1);
|
||||
}
|
||||
|
||||
// Check if the trailing character is '!' and copy it.
|
||||
if self.ch() == '!' {
|
||||
string.push(self.ch());
|
||||
self.skip();
|
||||
} else {
|
||||
// It's either the '!' tag or not really a tag handle. If it's a %TAG
|
||||
// directive, it's an error. If it's a tag token, it must be a part of
|
||||
// URI.
|
||||
if directive && string != "!" {
|
||||
return Err(ScanError::new(*mark,
|
||||
"while parsing a tag directive, did not find expected '!'"));
|
||||
}
|
||||
}
|
||||
Ok(string)
|
||||
}
|
||||
|
||||
fn scan_tag_uri(&mut self, directive: bool, is_secondary: bool,
|
||||
head: &String, mark: &Marker) -> Result<String, ScanError> {
|
||||
let mut length = head.len();
|
||||
let mut string = String::new();
|
||||
|
||||
// Copy the head if needed.
|
||||
// Note that we don't copy the leading '!' character.
|
||||
if length > 1 {
|
||||
string.extend(head.chars().skip(1));
|
||||
}
|
||||
|
||||
self.lookahead(1);
|
||||
/*
|
||||
* The set of characters that may appear in URI is as follows:
|
||||
*
|
||||
* '0'-'9', 'A'-'Z', 'a'-'z', '_', '-', ';', '/', '?', ':', '@', '&',
|
||||
* '=', '+', '$', ',', '.', '!', '~', '*', '\'', '(', ')', '[', ']',
|
||||
* '%'.
|
||||
*/
|
||||
while match self.ch() {
|
||||
';' | '/' | '?' | ':' | '@' | '&' if !is_secondary => true,
|
||||
'=' | '+' | '$' | ',' | '.' | '!' | '~' | '*' | '\'' | '(' | ')' | '[' | ']' if !is_secondary => true,
|
||||
'%' => true,
|
||||
c if is_alpha(c) => true,
|
||||
_ => false
|
||||
} {
|
||||
// Check if it is a URI-escape sequence.
|
||||
if self.ch() == '%' {
|
||||
unimplemented!();
|
||||
} else {
|
||||
string.push(self.ch());
|
||||
self.skip();
|
||||
}
|
||||
|
||||
length += 1;
|
||||
self.lookahead(1);
|
||||
}
|
||||
|
||||
if length == 0 {
|
||||
return Err(ScanError::new(*mark,
|
||||
"while parsing a tag, did not find expected tag URI"));
|
||||
}
|
||||
|
||||
Ok(string)
|
||||
}
|
||||
|
||||
fn fetch_anchor(&mut self, alias: bool) -> ScanResult {
|
||||
try!(self.save_simple_key());
|
||||
self.disallow_simple_key();
|
||||
|
||||
let tok = try!(self.scan_anchor(alias));
|
||||
|
||||
self.tokens.push_back(tok);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn scan_anchor(&mut self, alias: bool)
|
||||
-> Result<Token, ScanError> {
|
||||
let mut string = String::new();
|
||||
let start_mark = self.mark;
|
||||
|
||||
self.skip();
|
||||
self.lookahead(1);
|
||||
while is_alpha(self.ch()) {
|
||||
string.push(self.ch());
|
||||
self.skip();
|
||||
self.lookahead(1);
|
||||
}
|
||||
|
||||
if string.is_empty()
|
||||
|| match self.ch() {
|
||||
c if is_blankz(c) => false,
|
||||
'?' | ':' | ',' | ']' | '}' | '%' | '@' | '`' => false,
|
||||
_ => true
|
||||
} {
|
||||
return Err(ScanError::new(start_mark, "while scanning an anchor or alias, did not find expected alphabetic or numeric character"));
|
||||
}
|
||||
|
||||
if alias {
|
||||
Ok(Token(start_mark, TokenType::AliasToken(string)))
|
||||
} else {
|
||||
Ok(Token(start_mark, TokenType::AnchorToken(string)))
|
||||
}
|
||||
}
|
||||
|
||||
fn fetch_flow_collection_start(&mut self, tok :TokenType) -> ScanResult {
|
||||
// The indicators '[' and '{' may start a simple key.
|
||||
try!(self.save_simple_key());
|
||||
|
|
|
@ -14,6 +14,7 @@ pub enum Yaml {
|
|||
Boolean(bool),
|
||||
Array(self::Array),
|
||||
Hash(self::Hash),
|
||||
Alias(usize),
|
||||
Null,
|
||||
/// Access non-exist node by Index trait will return BadValue.
|
||||
/// This simplifies error handling of user.
|
||||
|
@ -45,14 +46,14 @@ impl EventReceiver for YamlLoader {
|
|||
_ => unreachable!()
|
||||
}
|
||||
},
|
||||
Event::SequenceStart => {
|
||||
Event::SequenceStart(_) => {
|
||||
self.doc_stack.push(Yaml::Array(Vec::new()));
|
||||
},
|
||||
Event::SequenceEnd => {
|
||||
let node = self.doc_stack.pop().unwrap();
|
||||
self.insert_new_node(node);
|
||||
},
|
||||
Event::MappingStart => {
|
||||
Event::MappingStart(_) => {
|
||||
self.doc_stack.push(Yaml::Hash(Hash::new()));
|
||||
self.key_stack.push(Yaml::BadValue);
|
||||
},
|
||||
|
@ -61,7 +62,7 @@ impl EventReceiver for YamlLoader {
|
|||
let node = self.doc_stack.pop().unwrap();
|
||||
self.insert_new_node(node);
|
||||
},
|
||||
Event::Scalar(ref v, style) => {
|
||||
Event::Scalar(ref v, style, _) => {
|
||||
let node = if style != TScalarStyle::Plain {
|
||||
Yaml::String(v.clone())
|
||||
} else {
|
||||
|
@ -77,6 +78,10 @@ impl EventReceiver for YamlLoader {
|
|||
|
||||
self.insert_new_node(node);
|
||||
},
|
||||
Event::Alias(id) => {
|
||||
// XXX(chenyh): how to handle alias?
|
||||
self.insert_new_node(Yaml::Alias(id));
|
||||
}
|
||||
_ => { /* ignore */ }
|
||||
}
|
||||
// println!("DOC {:?}", self.doc_stack);
|
||||
|
|
|
@ -27,17 +27,18 @@ impl EventReceiver for YamlChecker {
|
|||
let tev = match *ev {
|
||||
Event::DocumentStart => TestEvent::OnDocumentStart,
|
||||
Event::DocumentEnd => TestEvent::OnDocumentEnd,
|
||||
Event::SequenceStart => TestEvent::OnSequenceStart,
|
||||
Event::SequenceStart(..) => TestEvent::OnSequenceStart,
|
||||
Event::SequenceEnd => TestEvent::OnSequenceEnd,
|
||||
Event::MappingStart => TestEvent::OnMapStart,
|
||||
Event::MappingStart(..) => TestEvent::OnMapStart,
|
||||
Event::MappingEnd => TestEvent::OnMapEnd,
|
||||
Event::Scalar(ref v, style) => {
|
||||
Event::Scalar(ref v, style, _) => {
|
||||
if v == "~" && style == TScalarStyle::Plain {
|
||||
TestEvent::OnNull
|
||||
} else {
|
||||
TestEvent::OnScalar
|
||||
}
|
||||
},
|
||||
Event::Alias(_) => TestEvent::OnAlias,
|
||||
_ => { return } // ignore other events
|
||||
};
|
||||
self.evs.push(tev);
|
||||
|
|
5
parser/tests/specs/libyaml_fail-03.yaml
Normal file
5
parser/tests/specs/libyaml_fail-03.yaml
Normal file
|
@ -0,0 +1,5 @@
|
|||
# ex 7.2
|
||||
{
|
||||
foo : !!str,
|
||||
!!str : bar,
|
||||
}
|
Loading…
Reference in a new issue