diff --git a/saphyr/src/parser.rs b/saphyr/src/parser.rs index b2a35cb..b70f88e 100644 --- a/saphyr/src/parser.rs +++ b/saphyr/src/parser.rs @@ -39,7 +39,8 @@ pub enum Event { DocumentEnd, // anchor_id Alias(usize), - Scalar(String, TScalarStyle, usize), + // value, style, anchor_id, tag + Scalar(String, TScalarStyle, usize, Option), // anchor_id SequenceStart(usize), SequenceEnd, @@ -51,7 +52,11 @@ pub enum Event { impl Event { fn empty_scalar() -> Event { // a null scalar - Event::Scalar("~".to_string(), TScalarStyle::Plain, 0) + Event::Scalar("~".to_string(), TScalarStyle::Plain, 0, None) + } + + fn empty_scalar_with_anchor(anchor: usize, tag: TokenType) -> Event { + Event::Scalar("".to_string(), TScalarStyle::Plain, anchor, Some(tag)) } } @@ -121,7 +126,7 @@ impl> Parser { return Ok(Event::StreamEnd); } let ev = try!(self.state_machine()); - println!("EV {:?}", ev); + // println!("EV {:?}", ev); recv.on_event(&ev); Ok(ev) } @@ -174,7 +179,7 @@ impl> Parser { Event::Alias(..) => { Ok(()) }, - Event::Scalar(_, _, _) => { + Event::Scalar(..) => { Ok(()) }, Event::SequenceStart(_) => { @@ -218,8 +223,8 @@ impl> Parser { } fn state_machine(&mut self) -> ParseResult { - let next_tok = try!(self.peek()); - println!("cur_state {:?}, next tok: {:?}", self.state, next_tok); + // let next_tok = try!(self.peek()); + // println!("cur_state {:?}, next tok: {:?}", self.state, next_tok); match self.state { State::StreamStart => self.stream_start(), @@ -395,11 +400,11 @@ impl> Parser { fn parse_node(&mut self, block: bool, indentless_sequence: bool) -> ParseResult { let mut tok = try!(self.peek()); let mut anchor_id = 0; + let mut tag = None; match tok.1 { TokenType::AliasToken(name) => { self.pop_state(); self.skip(); - // TODO(chenyh): find anchor id match self.anchors.get(&name) { None => return Err(ScanError::new(tok.0, "while parsing node, found unknown anchor")), Some(id) => return Ok(Event::Alias(*id)) @@ -410,13 +415,13 @@ impl> Parser { self.skip(); tok = try!(self.peek()); if let TokenType::TagToken(_, _) = tok.1 { + tag = Some(tok.1); self.skip(); tok = try!(self.peek()); } }, TokenType::TagToken(..) => { - // XXX: ex 7.2, an empty scalar can follow a secondary tag - // but we haven't implemented it + tag = Some(tok.1); self.skip(); tok = try!(self.peek()); if let TokenType::AnchorToken(name) = tok.1 { @@ -435,7 +440,7 @@ impl> Parser { TokenType::ScalarToken(style, v) => { self.pop_state(); self.skip(); - Ok(Event::Scalar(v, style, anchor_id)) + Ok(Event::Scalar(v, style, anchor_id, tag)) }, TokenType::FlowSequenceStartToken => { self.state = State::FlowSequenceFirstEntry; @@ -453,6 +458,11 @@ impl> Parser { self.state = State::BlockMappingFirstKey; Ok(Event::MappingStart(anchor_id)) }, + // ex 7.2, an empty scalar can follow a secondary tag + _ if tag.is_some() || anchor_id > 0 => { + self.pop_state(); + Ok(Event::empty_scalar_with_anchor(anchor_id, tag.unwrap())) + }, _ => { Err(ScanError::new(tok.0, "while parsing a node, did not find expected node content")) } } } @@ -683,7 +693,8 @@ impl> Parser { self.skip(); tok = try!(self.peek()); match tok.1 { - TokenType::BlockEntryToken | TokenType::BlockEndToken => { + TokenType::BlockEntryToken + | TokenType::BlockEndToken => { self.state = State::BlockSequenceEntry; Ok(Event::empty_scalar()) }, diff --git a/saphyr/src/scanner.rs b/saphyr/src/scanner.rs index 2887356..0d2c86e 100644 --- a/saphyr/src/scanner.rs +++ b/saphyr/src/scanner.rs @@ -1322,7 +1322,6 @@ impl> Scanner { trailing_breaks.clear(); leading_break.clear(); } - leading_blanks = false; } else { string.extend(whitespaces.chars()); whitespaces.clear(); diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index 585feb9..ec14088 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -4,12 +4,14 @@ use std::string; use std::str::FromStr; use std::mem; use parser::*; -use scanner::{TScalarStyle, ScanError}; +use scanner::{TScalarStyle, ScanError, TokenType}; #[derive(Clone, PartialEq, PartialOrd, Debug, Eq, Ord)] pub enum Yaml { - /// number types are stored as String, and parsed on demand. - Number(string::String), + /// float types are stored as String, and parsed on demand. + /// Note that f64 does NOT implement Eq trait and can NOT be stored in BTreeMap + Real(string::String), + Integer(i64), String(string::String), Boolean(bool), Array(self::Array), @@ -33,7 +35,7 @@ pub struct YamlLoader { impl EventReceiver for YamlLoader { fn on_event(&mut self, ev: &Event) { - println!("EV {:?}", ev); + // println!("EV {:?}", ev); match *ev { Event::DocumentStart => { // do nothing @@ -62,17 +64,48 @@ impl EventReceiver for YamlLoader { let node = self.doc_stack.pop().unwrap(); self.insert_new_node(node); }, - Event::Scalar(ref v, style, _) => { + Event::Scalar(ref v, style, _, ref tag) => { let node = if style != TScalarStyle::Plain { Yaml::String(v.clone()) } else { - match v.as_ref() { - "~" => Yaml::Null, - "true" => Yaml::Boolean(true), - "false" => Yaml::Boolean(false), - // try parsing as f64 - _ if v.parse::().is_ok() => Yaml::Number(v.clone()), - _ => Yaml::String(v.clone()) + match tag { + &Some(TokenType::TagToken(ref handle, ref suffix)) => { + // XXX tag:yaml.org,2002: + if handle == "!!" { + match suffix.as_ref() { + "bool" => { + // "true" or "false" + match v.parse::() { + Err(_) => Yaml::BadValue, + Ok(v) => Yaml::Boolean(v) + } + }, + "int" => { + match v.parse::() { + Err(_) => Yaml::BadValue, + Ok(v) => Yaml::Integer(v) + } + }, + "float" => { + match v.parse::() { + Err(_) => Yaml::BadValue, + Ok(_) => Yaml::Real(v.clone()) + } + }, + "null" => { + match v.as_ref() { + "~" | "null" => Yaml::Null, + _ => Yaml::BadValue, + } + } + _ => Yaml::String(v.clone()), + } + } else { + Yaml::String(v.clone()) + } + }, + // Datatype is not specified, or unrecognized + _ => { Yaml::from_str(v.as_ref()) } } }; @@ -149,6 +182,7 @@ pub fn $name(&self) -> Option<$t> { impl Yaml { define_as!(as_bool, bool, Boolean); + define_as!(as_i64, i64, Integer); define_as_ref!(as_str, &str, String); define_as_ref!(as_hash, &Hash, Hash); @@ -168,17 +202,25 @@ impl Yaml { } } - pub fn as_number(&self) -> Option { + pub fn as_f64(&self) -> Option { match *self { - Yaml::Number(ref v) => { - v.parse::().ok() + Yaml::Real(ref v) => { + v.parse::().ok() }, _ => None } } - pub fn from_str(s: &str) -> Yaml { - Yaml::String(s.to_string()) + pub fn from_str(v: &str) -> Yaml { + match v { + "~" | "null" => Yaml::Null, + "true" => Yaml::Boolean(true), + "false" => Yaml::Boolean(false), + _ if v.parse::().is_ok() => Yaml::Integer(v.parse::().unwrap()), + // try parsing as f64 + _ if v.parse::().is_ok() => Yaml::Real(v.to_string()), + _ => Yaml::String(v.to_string()) + } } } @@ -220,9 +262,9 @@ c: [1, 2] "; let out = YamlLoader::load_from_str(&s).unwrap(); let doc = &out[0]; - assert_eq!(doc["a"].as_number::().unwrap(), 1); - assert_eq!(doc["b"].as_number::().unwrap(), 2.2f32); - assert_eq!(doc["c"][1].as_number::().unwrap(), 2); + assert_eq!(doc["a"].as_i64().unwrap(), 1i64); + assert_eq!(doc["b"].as_f64().unwrap(), 2.2f64); + assert_eq!(doc["c"][1].as_i64().unwrap(), 2i64); assert!(doc["d"][0].is_badvalue()); } @@ -246,7 +288,6 @@ a7: 你好 ".to_string(); let out = YamlLoader::load_from_str(&s).unwrap(); let doc = &out[0]; - println!("DOC {:?}", doc); assert_eq!(doc["a7"].as_str().unwrap(), "你好"); } @@ -264,5 +305,57 @@ a7: 你好 assert_eq!(out.len(), 3); } + #[test] + fn test_plain_datatype() { + let s = +" +- 'string' +- \"string\" +- string +- 123 +- -321 +- 1.23 +- -1e4 +- ~ +- null +- true +- false +- !!str 0 +- !!int 100 +- !!float 2 +- !!null ~ +- !!bool true +- !!bool false +# bad values +- !!int string +- !!float string +- !!bool null +- !!null val +"; + let out = YamlLoader::load_from_str(&s).unwrap(); + let doc = &out[0]; + + assert_eq!(doc[0].as_str().unwrap(), "string"); + assert_eq!(doc[1].as_str().unwrap(), "string"); + assert_eq!(doc[2].as_str().unwrap(), "string"); + assert_eq!(doc[3].as_i64().unwrap(), 123); + assert_eq!(doc[4].as_i64().unwrap(), -321); + assert_eq!(doc[5].as_f64().unwrap(), 1.23); + assert_eq!(doc[6].as_f64().unwrap(), -1e4); + assert!(doc[7].is_null()); + assert!(doc[8].is_null()); + assert_eq!(doc[9].as_bool().unwrap(), true); + assert_eq!(doc[10].as_bool().unwrap(), false); + assert_eq!(doc[11].as_str().unwrap(), "0"); + assert_eq!(doc[12].as_i64().unwrap(), 100); + assert_eq!(doc[13].as_f64().unwrap(), 2.0); + assert!(doc[14].is_null()); + assert_eq!(doc[15].as_bool().unwrap(), true); + assert_eq!(doc[16].as_bool().unwrap(), false); + assert!(doc[17].is_badvalue()); + assert!(doc[18].is_badvalue()); + assert!(doc[19].is_badvalue()); + assert!(doc[20].is_badvalue()); + } } diff --git a/saphyr/tests/spec_test.rs b/saphyr/tests/spec_test.rs index daa38d2..fa1d065 100644 --- a/saphyr/tests/spec_test.rs +++ b/saphyr/tests/spec_test.rs @@ -31,7 +31,7 @@ impl EventReceiver for YamlChecker { Event::SequenceEnd => TestEvent::OnSequenceEnd, Event::MappingStart(..) => TestEvent::OnMapStart, Event::MappingEnd => TestEvent::OnMapEnd, - Event::Scalar(ref v, style, _) => { + Event::Scalar(ref v, style, _, _)=> { if v == "~" && style == TScalarStyle::Plain { TestEvent::OnNull } else {