Add tag:yaml.org,2002 parsing

This commit is contained in:
Yuheng Chen 2015-05-30 22:39:50 +08:00
parent 064f10beee
commit e4862a7c8f
4 changed files with 137 additions and 34 deletions

View file

@ -39,7 +39,8 @@ pub enum Event {
DocumentEnd, DocumentEnd,
// anchor_id // anchor_id
Alias(usize), Alias(usize),
Scalar(String, TScalarStyle, usize), // value, style, anchor_id, tag
Scalar(String, TScalarStyle, usize, Option<TokenType>),
// anchor_id // anchor_id
SequenceStart(usize), SequenceStart(usize),
SequenceEnd, SequenceEnd,
@ -51,7 +52,11 @@ pub enum Event {
impl Event { impl Event {
fn empty_scalar() -> Event { fn empty_scalar() -> Event {
// a null scalar // a null scalar
Event::Scalar("~".to_string(), TScalarStyle::Plain, 0) Event::Scalar("~".to_string(), TScalarStyle::Plain, 0, None)
}
fn empty_scalar_with_anchor(anchor: usize, tag: TokenType) -> Event {
Event::Scalar("".to_string(), TScalarStyle::Plain, anchor, Some(tag))
} }
} }
@ -121,7 +126,7 @@ impl<T: Iterator<Item=char>> Parser<T> {
return Ok(Event::StreamEnd); return Ok(Event::StreamEnd);
} }
let ev = try!(self.state_machine()); let ev = try!(self.state_machine());
println!("EV {:?}", ev); // println!("EV {:?}", ev);
recv.on_event(&ev); recv.on_event(&ev);
Ok(ev) Ok(ev)
} }
@ -174,7 +179,7 @@ impl<T: Iterator<Item=char>> Parser<T> {
Event::Alias(..) => { Event::Alias(..) => {
Ok(()) Ok(())
}, },
Event::Scalar(_, _, _) => { Event::Scalar(..) => {
Ok(()) Ok(())
}, },
Event::SequenceStart(_) => { Event::SequenceStart(_) => {
@ -218,8 +223,8 @@ impl<T: Iterator<Item=char>> Parser<T> {
} }
fn state_machine(&mut self) -> ParseResult { fn state_machine(&mut self) -> ParseResult {
let next_tok = try!(self.peek()); // let next_tok = try!(self.peek());
println!("cur_state {:?}, next tok: {:?}", self.state, next_tok); // println!("cur_state {:?}, next tok: {:?}", self.state, next_tok);
match self.state { match self.state {
State::StreamStart => self.stream_start(), State::StreamStart => self.stream_start(),
@ -395,11 +400,11 @@ impl<T: Iterator<Item=char>> Parser<T> {
fn parse_node(&mut self, block: bool, indentless_sequence: bool) -> ParseResult { fn parse_node(&mut self, block: bool, indentless_sequence: bool) -> ParseResult {
let mut tok = try!(self.peek()); let mut tok = try!(self.peek());
let mut anchor_id = 0; let mut anchor_id = 0;
let mut tag = None;
match tok.1 { match tok.1 {
TokenType::AliasToken(name) => { TokenType::AliasToken(name) => {
self.pop_state(); self.pop_state();
self.skip(); self.skip();
// TODO(chenyh): find anchor id
match self.anchors.get(&name) { match self.anchors.get(&name) {
None => return Err(ScanError::new(tok.0, "while parsing node, found unknown anchor")), None => return Err(ScanError::new(tok.0, "while parsing node, found unknown anchor")),
Some(id) => return Ok(Event::Alias(*id)) Some(id) => return Ok(Event::Alias(*id))
@ -410,13 +415,13 @@ impl<T: Iterator<Item=char>> Parser<T> {
self.skip(); self.skip();
tok = try!(self.peek()); tok = try!(self.peek());
if let TokenType::TagToken(_, _) = tok.1 { if let TokenType::TagToken(_, _) = tok.1 {
tag = Some(tok.1);
self.skip(); self.skip();
tok = try!(self.peek()); tok = try!(self.peek());
} }
}, },
TokenType::TagToken(..) => { TokenType::TagToken(..) => {
// XXX: ex 7.2, an empty scalar can follow a secondary tag tag = Some(tok.1);
// but we haven't implemented it
self.skip(); self.skip();
tok = try!(self.peek()); tok = try!(self.peek());
if let TokenType::AnchorToken(name) = tok.1 { if let TokenType::AnchorToken(name) = tok.1 {
@ -435,7 +440,7 @@ impl<T: Iterator<Item=char>> Parser<T> {
TokenType::ScalarToken(style, v) => { TokenType::ScalarToken(style, v) => {
self.pop_state(); self.pop_state();
self.skip(); self.skip();
Ok(Event::Scalar(v, style, anchor_id)) Ok(Event::Scalar(v, style, anchor_id, tag))
}, },
TokenType::FlowSequenceStartToken => { TokenType::FlowSequenceStartToken => {
self.state = State::FlowSequenceFirstEntry; self.state = State::FlowSequenceFirstEntry;
@ -453,6 +458,11 @@ impl<T: Iterator<Item=char>> Parser<T> {
self.state = State::BlockMappingFirstKey; self.state = State::BlockMappingFirstKey;
Ok(Event::MappingStart(anchor_id)) Ok(Event::MappingStart(anchor_id))
}, },
// ex 7.2, an empty scalar can follow a secondary tag
_ if tag.is_some() || anchor_id > 0 => {
self.pop_state();
Ok(Event::empty_scalar_with_anchor(anchor_id, tag.unwrap()))
},
_ => { Err(ScanError::new(tok.0, "while parsing a node, did not find expected node content")) } _ => { Err(ScanError::new(tok.0, "while parsing a node, did not find expected node content")) }
} }
} }
@ -683,7 +693,8 @@ impl<T: Iterator<Item=char>> Parser<T> {
self.skip(); self.skip();
tok = try!(self.peek()); tok = try!(self.peek());
match tok.1 { match tok.1 {
TokenType::BlockEntryToken | TokenType::BlockEndToken => { TokenType::BlockEntryToken
| TokenType::BlockEndToken => {
self.state = State::BlockSequenceEntry; self.state = State::BlockSequenceEntry;
Ok(Event::empty_scalar()) Ok(Event::empty_scalar())
}, },

View file

@ -1322,7 +1322,6 @@ impl<T: Iterator<Item=char>> Scanner<T> {
trailing_breaks.clear(); trailing_breaks.clear();
leading_break.clear(); leading_break.clear();
} }
leading_blanks = false;
} else { } else {
string.extend(whitespaces.chars()); string.extend(whitespaces.chars());
whitespaces.clear(); whitespaces.clear();

View file

@ -4,12 +4,14 @@ use std::string;
use std::str::FromStr; use std::str::FromStr;
use std::mem; use std::mem;
use parser::*; use parser::*;
use scanner::{TScalarStyle, ScanError}; use scanner::{TScalarStyle, ScanError, TokenType};
#[derive(Clone, PartialEq, PartialOrd, Debug, Eq, Ord)] #[derive(Clone, PartialEq, PartialOrd, Debug, Eq, Ord)]
pub enum Yaml { pub enum Yaml {
/// number types are stored as String, and parsed on demand. /// float types are stored as String, and parsed on demand.
Number(string::String), /// Note that f64 does NOT implement Eq trait and can NOT be stored in BTreeMap
Real(string::String),
Integer(i64),
String(string::String), String(string::String),
Boolean(bool), Boolean(bool),
Array(self::Array), Array(self::Array),
@ -33,7 +35,7 @@ pub struct YamlLoader {
impl EventReceiver for YamlLoader { impl EventReceiver for YamlLoader {
fn on_event(&mut self, ev: &Event) { fn on_event(&mut self, ev: &Event) {
println!("EV {:?}", ev); // println!("EV {:?}", ev);
match *ev { match *ev {
Event::DocumentStart => { Event::DocumentStart => {
// do nothing // do nothing
@ -62,17 +64,48 @@ impl EventReceiver for YamlLoader {
let node = self.doc_stack.pop().unwrap(); let node = self.doc_stack.pop().unwrap();
self.insert_new_node(node); self.insert_new_node(node);
}, },
Event::Scalar(ref v, style, _) => { Event::Scalar(ref v, style, _, ref tag) => {
let node = if style != TScalarStyle::Plain { let node = if style != TScalarStyle::Plain {
Yaml::String(v.clone()) Yaml::String(v.clone())
} else { } else {
match v.as_ref() { match tag {
"~" => Yaml::Null, &Some(TokenType::TagToken(ref handle, ref suffix)) => {
"true" => Yaml::Boolean(true), // XXX tag:yaml.org,2002:
"false" => Yaml::Boolean(false), if handle == "!!" {
// try parsing as f64 match suffix.as_ref() {
_ if v.parse::<f64>().is_ok() => Yaml::Number(v.clone()), "bool" => {
_ => Yaml::String(v.clone()) // "true" or "false"
match v.parse::<bool>() {
Err(_) => Yaml::BadValue,
Ok(v) => Yaml::Boolean(v)
}
},
"int" => {
match v.parse::<i64>() {
Err(_) => Yaml::BadValue,
Ok(v) => Yaml::Integer(v)
}
},
"float" => {
match v.parse::<f64>() {
Err(_) => Yaml::BadValue,
Ok(_) => Yaml::Real(v.clone())
}
},
"null" => {
match v.as_ref() {
"~" | "null" => Yaml::Null,
_ => Yaml::BadValue,
}
}
_ => Yaml::String(v.clone()),
}
} else {
Yaml::String(v.clone())
}
},
// Datatype is not specified, or unrecognized
_ => { Yaml::from_str(v.as_ref()) }
} }
}; };
@ -149,6 +182,7 @@ pub fn $name(&self) -> Option<$t> {
impl Yaml { impl Yaml {
define_as!(as_bool, bool, Boolean); define_as!(as_bool, bool, Boolean);
define_as!(as_i64, i64, Integer);
define_as_ref!(as_str, &str, String); define_as_ref!(as_str, &str, String);
define_as_ref!(as_hash, &Hash, Hash); define_as_ref!(as_hash, &Hash, Hash);
@ -168,17 +202,25 @@ impl Yaml {
} }
} }
pub fn as_number<T: FromStr>(&self) -> Option<T> { pub fn as_f64(&self) -> Option<f64> {
match *self { match *self {
Yaml::Number(ref v) => { Yaml::Real(ref v) => {
v.parse::<T>().ok() v.parse::<f64>().ok()
}, },
_ => None _ => None
} }
} }
pub fn from_str(s: &str) -> Yaml { pub fn from_str(v: &str) -> Yaml {
Yaml::String(s.to_string()) match v {
"~" | "null" => Yaml::Null,
"true" => Yaml::Boolean(true),
"false" => Yaml::Boolean(false),
_ if v.parse::<i64>().is_ok() => Yaml::Integer(v.parse::<i64>().unwrap()),
// try parsing as f64
_ if v.parse::<f64>().is_ok() => Yaml::Real(v.to_string()),
_ => Yaml::String(v.to_string())
}
} }
} }
@ -220,9 +262,9 @@ c: [1, 2]
"; ";
let out = YamlLoader::load_from_str(&s).unwrap(); let out = YamlLoader::load_from_str(&s).unwrap();
let doc = &out[0]; let doc = &out[0];
assert_eq!(doc["a"].as_number::<i32>().unwrap(), 1); assert_eq!(doc["a"].as_i64().unwrap(), 1i64);
assert_eq!(doc["b"].as_number::<f32>().unwrap(), 2.2f32); assert_eq!(doc["b"].as_f64().unwrap(), 2.2f64);
assert_eq!(doc["c"][1].as_number::<i32>().unwrap(), 2); assert_eq!(doc["c"][1].as_i64().unwrap(), 2i64);
assert!(doc["d"][0].is_badvalue()); assert!(doc["d"][0].is_badvalue());
} }
@ -246,7 +288,6 @@ a7: 你好
".to_string(); ".to_string();
let out = YamlLoader::load_from_str(&s).unwrap(); let out = YamlLoader::load_from_str(&s).unwrap();
let doc = &out[0]; let doc = &out[0];
println!("DOC {:?}", doc);
assert_eq!(doc["a7"].as_str().unwrap(), "你好"); assert_eq!(doc["a7"].as_str().unwrap(), "你好");
} }
@ -264,5 +305,57 @@ a7: 你好
assert_eq!(out.len(), 3); assert_eq!(out.len(), 3);
} }
#[test]
fn test_plain_datatype() {
let s =
"
- 'string'
- \"string\"
- string
- 123
- -321
- 1.23
- -1e4
- ~
- null
- true
- false
- !!str 0
- !!int 100
- !!float 2
- !!null ~
- !!bool true
- !!bool false
# bad values
- !!int string
- !!float string
- !!bool null
- !!null val
";
let out = YamlLoader::load_from_str(&s).unwrap();
let doc = &out[0];
assert_eq!(doc[0].as_str().unwrap(), "string");
assert_eq!(doc[1].as_str().unwrap(), "string");
assert_eq!(doc[2].as_str().unwrap(), "string");
assert_eq!(doc[3].as_i64().unwrap(), 123);
assert_eq!(doc[4].as_i64().unwrap(), -321);
assert_eq!(doc[5].as_f64().unwrap(), 1.23);
assert_eq!(doc[6].as_f64().unwrap(), -1e4);
assert!(doc[7].is_null());
assert!(doc[8].is_null());
assert_eq!(doc[9].as_bool().unwrap(), true);
assert_eq!(doc[10].as_bool().unwrap(), false);
assert_eq!(doc[11].as_str().unwrap(), "0");
assert_eq!(doc[12].as_i64().unwrap(), 100);
assert_eq!(doc[13].as_f64().unwrap(), 2.0);
assert!(doc[14].is_null());
assert_eq!(doc[15].as_bool().unwrap(), true);
assert_eq!(doc[16].as_bool().unwrap(), false);
assert!(doc[17].is_badvalue());
assert!(doc[18].is_badvalue());
assert!(doc[19].is_badvalue());
assert!(doc[20].is_badvalue());
}
} }

View file

@ -31,7 +31,7 @@ impl EventReceiver for YamlChecker {
Event::SequenceEnd => TestEvent::OnSequenceEnd, Event::SequenceEnd => TestEvent::OnSequenceEnd,
Event::MappingStart(..) => TestEvent::OnMapStart, Event::MappingStart(..) => TestEvent::OnMapStart,
Event::MappingEnd => TestEvent::OnMapEnd, Event::MappingEnd => TestEvent::OnMapEnd,
Event::Scalar(ref v, style, _) => { Event::Scalar(ref v, style, _, _)=> {
if v == "~" && style == TScalarStyle::Plain { if v == "~" && style == TScalarStyle::Plain {
TestEvent::OnNull TestEvent::OnNull
} else { } else {