Add scan_flow_scalar
This commit is contained in:
parent
a8ab8ebc27
commit
78134e144f
3 changed files with 213 additions and 22 deletions
|
@ -37,7 +37,7 @@ pub enum Event {
|
||||||
DocumentStart,
|
DocumentStart,
|
||||||
DocumentEnd,
|
DocumentEnd,
|
||||||
Alias,
|
Alias,
|
||||||
Scalar(String),
|
Scalar(String, TScalarStyle),
|
||||||
SequenceStart,
|
SequenceStart,
|
||||||
SequenceEnd,
|
SequenceEnd,
|
||||||
MappingStart,
|
MappingStart,
|
||||||
|
@ -46,7 +46,7 @@ pub enum Event {
|
||||||
|
|
||||||
impl Event {
|
impl Event {
|
||||||
fn empty_scalar() -> Event {
|
fn empty_scalar() -> Event {
|
||||||
Event::Scalar(String::new())
|
Event::Scalar(String::new(), TScalarStyle::Plain)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -130,9 +130,8 @@ impl<T: Iterator<Item=char>> Parser<T> {
|
||||||
|
|
||||||
fn load_node(&mut self, first_ev: &Event) -> Result<Yaml, ScanError> {
|
fn load_node(&mut self, first_ev: &Event) -> Result<Yaml, ScanError> {
|
||||||
match *first_ev {
|
match *first_ev {
|
||||||
Event::Scalar(ref v) => {
|
Event::Scalar(ref v, _) => {
|
||||||
// TODO scalar
|
// TODO scalar
|
||||||
println!("Scalar: {:?}", first_ev);
|
|
||||||
Ok(Yaml::String(v.clone()))
|
Ok(Yaml::String(v.clone()))
|
||||||
},
|
},
|
||||||
Event::SequenceStart => {
|
Event::SequenceStart => {
|
||||||
|
@ -294,10 +293,10 @@ impl<T: Iterator<Item=char>> Parser<T> {
|
||||||
self.state = State::IndentlessSequenceEntry;
|
self.state = State::IndentlessSequenceEntry;
|
||||||
Ok(Event::SequenceStart)
|
Ok(Event::SequenceStart)
|
||||||
},
|
},
|
||||||
TokenType::ScalarToken(_, v) => {
|
TokenType::ScalarToken(style, v) => {
|
||||||
self.pop_state();
|
self.pop_state();
|
||||||
self.skip();
|
self.skip();
|
||||||
Ok(Event::Scalar(v))
|
Ok(Event::Scalar(v, style))
|
||||||
},
|
},
|
||||||
TokenType::FlowSequenceStartToken => {
|
TokenType::FlowSequenceStartToken => {
|
||||||
self.state = State::FlowSequenceFirstEntry;
|
self.state = State::FlowSequenceFirstEntry;
|
||||||
|
@ -472,12 +471,14 @@ a0 bb: val
|
||||||
a1:
|
a1:
|
||||||
b1: 4
|
b1: 4
|
||||||
b2: d
|
b2: d
|
||||||
a2: 4
|
a2: 4 # i'm comment
|
||||||
a3: [1, 2, 3]
|
a3: [1, 2, 3]
|
||||||
a4:
|
a4:
|
||||||
- - a1
|
- - a1
|
||||||
- a2
|
- a2
|
||||||
- 2
|
- 2
|
||||||
|
a5: 'single_quoted'
|
||||||
|
a5: \"double_quoted\"
|
||||||
".to_string();
|
".to_string();
|
||||||
let mut parser = Parser::new(s.chars());
|
let mut parser = Parser::new(s.chars());
|
||||||
let out = parser.load().unwrap();
|
let out = parser.load().unwrap();
|
||||||
|
|
|
@ -127,18 +127,23 @@ impl<T: Iterator<Item=char>> Iterator for Scanner<T> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
fn is_z(c: char) -> bool {
|
fn is_z(c: char) -> bool {
|
||||||
c == '\0'
|
c == '\0'
|
||||||
}
|
}
|
||||||
|
#[inline]
|
||||||
fn is_break(c: char) -> bool {
|
fn is_break(c: char) -> bool {
|
||||||
c == '\n' || c == '\r'
|
c == '\n' || c == '\r'
|
||||||
}
|
}
|
||||||
|
#[inline]
|
||||||
fn is_breakz(c: char) -> bool {
|
fn is_breakz(c: char) -> bool {
|
||||||
is_break(c) || is_z(c)
|
is_break(c) || is_z(c)
|
||||||
}
|
}
|
||||||
|
#[inline]
|
||||||
fn is_blank(c: char) -> bool {
|
fn is_blank(c: char) -> bool {
|
||||||
c == ' ' || c == '\t'
|
c == ' ' || c == '\t'
|
||||||
}
|
}
|
||||||
|
#[inline]
|
||||||
fn is_blankz(c: char) -> bool {
|
fn is_blankz(c: char) -> bool {
|
||||||
is_blank(c) || is_breakz(c)
|
is_blank(c) || is_breakz(c)
|
||||||
}
|
}
|
||||||
|
@ -166,6 +171,7 @@ impl<T: Iterator<Item=char>> Scanner<T> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
fn lookahead(&mut self, count: usize) {
|
fn lookahead(&mut self, count: usize) {
|
||||||
if self.buffer.len() >= count {
|
if self.buffer.len() >= count {
|
||||||
return;
|
return;
|
||||||
|
@ -174,6 +180,7 @@ impl<T: Iterator<Item=char>> Scanner<T> {
|
||||||
self.buffer.push_back(self.rdr.next().unwrap_or('\0'));
|
self.buffer.push_back(self.rdr.next().unwrap_or('\0'));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#[inline]
|
||||||
fn skip(&mut self) {
|
fn skip(&mut self) {
|
||||||
let c = self.buffer.pop_front().unwrap();
|
let c = self.buffer.pop_front().unwrap();
|
||||||
|
|
||||||
|
@ -185,25 +192,41 @@ impl<T: Iterator<Item=char>> Scanner<T> {
|
||||||
self.mark.col += 1;
|
self.mark.col += 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#[inline]
|
||||||
|
fn skip_line(&mut self) {
|
||||||
|
if self.buffer[0] == '\r' && self.buffer[1] == '\n' {
|
||||||
|
self.skip();
|
||||||
|
self.skip();
|
||||||
|
} else if is_break(self.buffer[0]) {
|
||||||
|
self.skip();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#[inline]
|
||||||
fn ch(&self) -> char {
|
fn ch(&self) -> char {
|
||||||
self.buffer[0]
|
self.buffer[0]
|
||||||
}
|
}
|
||||||
|
#[inline]
|
||||||
fn ch_is(&self, c: char) -> bool {
|
fn ch_is(&self, c: char) -> bool {
|
||||||
self.buffer[0] == c
|
self.buffer[0] == c
|
||||||
}
|
}
|
||||||
#[allow(dead_code)]
|
#[allow(dead_code)]
|
||||||
|
#[inline]
|
||||||
fn eof(&self) -> bool {
|
fn eof(&self) -> bool {
|
||||||
self.ch_is('\0')
|
self.ch_is('\0')
|
||||||
}
|
}
|
||||||
|
#[inline]
|
||||||
pub fn stream_started(&self) -> bool {
|
pub fn stream_started(&self) -> bool {
|
||||||
self.stream_start_produced
|
self.stream_start_produced
|
||||||
}
|
}
|
||||||
|
#[inline]
|
||||||
pub fn stream_ended(&self) -> bool {
|
pub fn stream_ended(&self) -> bool {
|
||||||
self.stream_end_produced
|
self.stream_end_produced
|
||||||
}
|
}
|
||||||
|
#[inline]
|
||||||
pub fn mark(&self) -> Marker {
|
pub fn mark(&self) -> Marker {
|
||||||
self.mark
|
self.mark
|
||||||
}
|
}
|
||||||
|
#[inline]
|
||||||
fn read_break(&mut self, s: &mut String) {
|
fn read_break(&mut self, s: &mut String) {
|
||||||
if self.buffer[0] == '\r' && self.buffer[1] == '\n' {
|
if self.buffer[0] == '\r' && self.buffer[1] == '\n' {
|
||||||
s.push('\n');
|
s.push('\n');
|
||||||
|
@ -291,8 +314,8 @@ impl<T: Iterator<Item=char>> Scanner<T> {
|
||||||
'!' => unimplemented!(),
|
'!' => unimplemented!(),
|
||||||
'|' if self.flow_level == 0 => unimplemented!(),
|
'|' if self.flow_level == 0 => unimplemented!(),
|
||||||
'>' if self.flow_level == 0 => unimplemented!(),
|
'>' if self.flow_level == 0 => unimplemented!(),
|
||||||
'\'' => unimplemented!(),
|
'\'' => try!(self.fetch_flow_scalar(true)),
|
||||||
'"' => unimplemented!(),
|
'"' => try!(self.fetch_flow_scalar(false)),
|
||||||
// plain scalar
|
// plain scalar
|
||||||
'-' if !is_blankz(nc) => try!(self.fetch_plain_scalar()),
|
'-' if !is_blankz(nc) => try!(self.fetch_plain_scalar()),
|
||||||
':' | '?' if !is_blankz(nc) && self.flow_level == 0 => try!(self.fetch_plain_scalar()),
|
':' | '?' if !is_blankz(nc) && self.flow_level == 0 => try!(self.fetch_plain_scalar()),
|
||||||
|
@ -368,7 +391,8 @@ impl<T: Iterator<Item=char>> Scanner<T> {
|
||||||
' ' => self.skip(),
|
' ' => self.skip(),
|
||||||
'\t' if self.flow_level > 0 || !self.simple_key_allowed => self.skip(),
|
'\t' if self.flow_level > 0 || !self.simple_key_allowed => self.skip(),
|
||||||
'\n' | '\r' => {
|
'\n' | '\r' => {
|
||||||
self.skip();
|
self.lookahead(2);
|
||||||
|
self.skip_line();
|
||||||
if self.flow_level == 0 {
|
if self.flow_level == 0 {
|
||||||
self.allow_simple_key();
|
self.allow_simple_key();
|
||||||
}
|
}
|
||||||
|
@ -493,15 +517,180 @@ impl<T: Iterator<Item=char>> Scanner<T> {
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn fetch_plain_scalar(&mut self) -> Result<(), ScanError> {
|
fn fetch_flow_scalar(&mut self, single: bool) -> ScanResult {
|
||||||
try!(self.save_simple_key());
|
try!(self.save_simple_key());
|
||||||
|
self.disallow_simple_key();
|
||||||
|
|
||||||
|
let tok = try!(self.scan_flow_scalar(single));
|
||||||
|
|
||||||
|
self.tokens.push_back(tok);
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn scan_flow_scalar(&mut self, single: bool) -> Result<Token, ScanError> {
|
||||||
|
let start_mark = self.mark;
|
||||||
|
|
||||||
|
let mut string = String::new();
|
||||||
|
let mut leading_break = String::new();
|
||||||
|
let mut trailing_breaks = String::new();
|
||||||
|
let mut whitespaces = String::new();
|
||||||
|
let mut leading_blanks = false;
|
||||||
|
|
||||||
|
/* Eat the left quote. */
|
||||||
|
self.skip();
|
||||||
|
|
||||||
|
loop {
|
||||||
|
/* Check for a document indicator. */
|
||||||
|
self.lookahead(4);
|
||||||
|
|
||||||
|
if self.mark.col == 0 &&
|
||||||
|
((self.buffer[0] == '-') &&
|
||||||
|
(self.buffer[1] == '-') &&
|
||||||
|
(self.buffer[2] == '-')) ||
|
||||||
|
((self.buffer[0] == '.') &&
|
||||||
|
(self.buffer[1] == '.') &&
|
||||||
|
(self.buffer[2] == '.')) &&
|
||||||
|
is_blankz(self.buffer[3]) {
|
||||||
|
return Err(ScanError::new(start_mark,
|
||||||
|
"while scanning a quoted scalar, found unexpected document indicator"));
|
||||||
|
}
|
||||||
|
|
||||||
|
if is_z(self.ch()) {
|
||||||
|
return Err(ScanError::new(start_mark,
|
||||||
|
"while scanning a quoted scalar, found unexpected end of stream"));
|
||||||
|
}
|
||||||
|
|
||||||
|
self.lookahead(2);
|
||||||
|
|
||||||
|
leading_blanks = false;
|
||||||
|
// Consume non-blank characters.
|
||||||
|
|
||||||
|
while !is_blankz(self.ch()) {
|
||||||
|
match self.ch() {
|
||||||
|
// Check for an escaped single quote.
|
||||||
|
'\'' if self.buffer[1] == '\'' && single => {
|
||||||
|
string.push('\'');
|
||||||
|
self.skip();
|
||||||
|
self.skip();
|
||||||
|
},
|
||||||
|
// Check for the right quote.
|
||||||
|
'\'' if single => { break; },
|
||||||
|
'"' if !single => { break; },
|
||||||
|
// Check for an escaped line break.
|
||||||
|
'\\' if !single && is_break(self.buffer[1]) => {
|
||||||
|
self.lookahead(3);
|
||||||
|
self.skip();
|
||||||
|
self.skip_line();
|
||||||
|
leading_blanks = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// Check for an escape sequence.
|
||||||
|
'\\' if !single => {
|
||||||
|
let mut code_length = 0usize;
|
||||||
|
match self.buffer[1] {
|
||||||
|
'0' => string.push('\0'),
|
||||||
|
'a' => string.push('\x07'),
|
||||||
|
'b' => string.push('\x08'),
|
||||||
|
't' | '\t' => string.push('\t'),
|
||||||
|
'n' => string.push('\n'),
|
||||||
|
'v' => string.push('\x0b'),
|
||||||
|
'f' => string.push('\x0c'),
|
||||||
|
'r' => string.push('\x0d'),
|
||||||
|
'e' => string.push('\x1b'),
|
||||||
|
' ' => string.push('\x20'),
|
||||||
|
'"' => string.push('"'),
|
||||||
|
'\'' => string.push('\''),
|
||||||
|
'\\' => string.push('\\'),
|
||||||
|
//'N' => { string.push('\xc2'); string.push('\x85') },
|
||||||
|
'x' => code_length = 2,
|
||||||
|
'u' => code_length = 4,
|
||||||
|
'U' => code_length = 8,
|
||||||
|
_ => return Err(ScanError::new(start_mark,
|
||||||
|
"while parsing a quoted scalar, found unknown escape character"))
|
||||||
|
}
|
||||||
|
self.skip();
|
||||||
|
self.skip();
|
||||||
|
// Consume an arbitrary escape code.
|
||||||
|
if code_length > 0 {
|
||||||
|
let val = 0;
|
||||||
|
self.lookahead(code_length);
|
||||||
|
unimplemented!();
|
||||||
|
}
|
||||||
|
},
|
||||||
|
c => { string.push(c); self.skip(); }
|
||||||
|
}
|
||||||
|
self.lookahead(2);
|
||||||
|
}
|
||||||
|
match self.ch() {
|
||||||
|
'\'' if single => { break; },
|
||||||
|
'"' if !single => { break; },
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
self.lookahead(1);
|
||||||
|
|
||||||
|
// Consume blank characters.
|
||||||
|
while is_blank(self.ch()) || is_break(self.ch()) {
|
||||||
|
if is_blank(self.ch()) {
|
||||||
|
// Consume a space or a tab character.
|
||||||
|
if !leading_blanks {
|
||||||
|
whitespaces.push(self.ch());
|
||||||
|
self.skip();
|
||||||
|
} else {
|
||||||
|
self.skip();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
self.lookahead(2);
|
||||||
|
// Check if it is a first line break.
|
||||||
|
if !leading_blanks {
|
||||||
|
whitespaces.clear();
|
||||||
|
self.read_break(&mut leading_break);
|
||||||
|
leading_blanks = true;
|
||||||
|
} else {
|
||||||
|
self.read_break(&mut trailing_breaks);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
self.lookahead(1);
|
||||||
|
}
|
||||||
|
// Join the whitespaces or fold line breaks.
|
||||||
|
if leading_blanks {
|
||||||
|
if !leading_break.is_empty() {
|
||||||
|
if trailing_breaks.is_empty() {
|
||||||
|
string.push(' ');
|
||||||
|
} else {
|
||||||
|
string.extend(trailing_breaks.chars());
|
||||||
|
trailing_breaks.clear();
|
||||||
|
}
|
||||||
|
leading_break.clear();
|
||||||
|
} else {
|
||||||
|
string.extend(leading_break.chars());
|
||||||
|
string.extend(trailing_breaks.chars());
|
||||||
|
trailing_breaks.clear();
|
||||||
|
leading_break.clear();
|
||||||
|
}
|
||||||
|
leading_blanks = false;
|
||||||
|
} else {
|
||||||
|
string.extend(whitespaces.chars());
|
||||||
|
whitespaces.clear();
|
||||||
|
}
|
||||||
|
} // loop
|
||||||
|
|
||||||
|
// Eat the right quote.
|
||||||
|
self.skip();
|
||||||
|
|
||||||
|
if single {
|
||||||
|
Ok(Token(start_mark, TokenType::ScalarToken(TScalarStyle::SingleQuoted, string)))
|
||||||
|
} else {
|
||||||
|
Ok(Token(start_mark, TokenType::ScalarToken(TScalarStyle::DoubleQuoted, string)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn fetch_plain_scalar(&mut self) -> ScanResult {
|
||||||
|
try!(self.save_simple_key());
|
||||||
self.disallow_simple_key();
|
self.disallow_simple_key();
|
||||||
|
|
||||||
let tok = try!(self.scan_plain_scalar());
|
let tok = try!(self.scan_plain_scalar());
|
||||||
|
|
||||||
self.tokens.push_back(tok);
|
self.tokens.push_back(tok);
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -13,6 +13,8 @@ pub enum Yaml {
|
||||||
Array(self::Array),
|
Array(self::Array),
|
||||||
Hash(self::Hash),
|
Hash(self::Hash),
|
||||||
Null,
|
Null,
|
||||||
|
/// Access non-exist node by Index trait will return BadValue.
|
||||||
|
/// This simplifies error handling of user.
|
||||||
BadValue,
|
BadValue,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -89,7 +91,6 @@ impl Yaml {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
pub fn as_f64(&self) -> Option<f64> {
|
pub fn as_f64(&self) -> Option<f64> {
|
||||||
// XXX(chenyh) precompile me
|
// XXX(chenyh) precompile me
|
||||||
let float_pattern = regex!(r"^([-+]?)(\.[0-9]+|[0-9]+(\.[0-9]*)?([eE][-+]?[0-9]+)?)$");
|
let float_pattern = regex!(r"^([-+]?)(\.[0-9]+|[0-9]+(\.[0-9]*)?([eE][-+]?[0-9]+)?)$");
|
||||||
|
|
Loading…
Reference in a new issue