saphyr-serde/saphyr/src/scanner.rs

1268 lines
38 KiB
Rust
Raw Normal View History

2015-05-24 06:27:42 +00:00
use std::collections::VecDeque;
#[derive(Clone, Copy, PartialEq, Debug, Eq)]
pub enum TEncoding {
Utf8
}
#[derive(Clone, Copy, PartialEq, Debug, Eq)]
pub enum TScalarStyle {
Any,
Plain,
SingleQuoted,
DoubleQuoted,
Literal,
Foled
}
#[derive(Clone, Copy, PartialEq, Debug, Eq)]
pub struct Marker {
index: usize,
line: usize,
col: usize,
}
impl Marker {
fn new(index: usize, line: usize, col: usize) -> Marker {
Marker {
index: index,
line: line,
col: col
}
}
}
#[derive(Clone, PartialEq, Debug, Eq)]
pub struct ScanError {
mark: Marker,
info: String,
}
impl ScanError {
pub fn new(loc: Marker, info: &str) -> ScanError {
ScanError {
mark: loc,
info: info.to_string()
}
}
}
#[derive(Clone, PartialEq, Debug, Eq)]
pub enum TokenType {
NoToken,
StreamStartToken(TEncoding),
StreamEndToken,
VersionDirectiveToken,
TagDirectiveToken,
DocumentStartToken,
DocumentEndToken,
BlockSequenceStartToken,
BlockMappingStartToken,
BlockEndToken,
FlowSequenceStartToken,
FlowSequenceEndToken,
FlowMappingStartToken,
FlowMappingEndToken,
BlockEntryToken,
FlowEntryToken,
KeyToken,
ValueToken,
AliasToken,
AnchorToken,
TagToken,
ScalarToken(TScalarStyle, String)
}
#[derive(Clone, PartialEq, Debug, Eq)]
pub struct Token(pub Marker, pub TokenType);
#[derive(Clone, PartialEq, Debug, Eq)]
struct SimpleKey {
possible: bool,
required: bool,
token_number: usize,
mark: Marker,
}
impl SimpleKey {
fn new(mark: Marker) -> SimpleKey {
SimpleKey {
possible: false,
required: false,
token_number: 0,
mark: mark,
}
}
}
#[derive(Debug)]
pub struct Scanner<T> {
rdr: T,
mark: Marker,
tokens: VecDeque<Token>,
buffer: VecDeque<char>,
stream_start_produced: bool,
stream_end_produced: bool,
simple_key_allowed: bool,
simple_keys: Vec<SimpleKey>,
indent: isize,
indents: Vec<isize>,
flow_level: usize,
tokens_parsed: usize,
token_available: bool,
}
impl<T: Iterator<Item=char>> Iterator for Scanner<T> {
type Item = Token;
fn next(&mut self) -> Option<Token> {
match self.next_token() {
Ok(tok) => tok,
Err(e) => {
println!("Error: {:?}", e);
None
}
}
}
}
2015-05-24 19:21:53 +00:00
#[inline]
2015-05-24 06:27:42 +00:00
fn is_z(c: char) -> bool {
c == '\0'
}
2015-05-24 19:21:53 +00:00
#[inline]
2015-05-24 06:27:42 +00:00
fn is_break(c: char) -> bool {
c == '\n' || c == '\r'
}
2015-05-24 19:21:53 +00:00
#[inline]
2015-05-24 06:27:42 +00:00
fn is_breakz(c: char) -> bool {
is_break(c) || is_z(c)
}
2015-05-24 19:21:53 +00:00
#[inline]
2015-05-24 06:27:42 +00:00
fn is_blank(c: char) -> bool {
c == ' ' || c == '\t'
}
2015-05-24 19:21:53 +00:00
#[inline]
2015-05-24 06:27:42 +00:00
fn is_blankz(c: char) -> bool {
is_blank(c) || is_breakz(c)
}
pub type ScanResult = Result<(), ScanError>;
impl<T: Iterator<Item=char>> Scanner<T> {
/// Creates the YAML tokenizer.
pub fn new(rdr: T) -> Scanner<T> {
2015-05-24 06:37:36 +00:00
Scanner {
2015-05-24 06:27:42 +00:00
rdr: rdr,
buffer: VecDeque::new(),
mark: Marker::new(0, 1, 0),
tokens: VecDeque::new(),
stream_start_produced: false,
stream_end_produced: false,
simple_key_allowed: true,
simple_keys: Vec::new(),
indent: -1,
indents: Vec::new(),
flow_level: 0,
tokens_parsed: 0,
token_available: false,
2015-05-24 06:37:36 +00:00
}
2015-05-24 06:27:42 +00:00
}
2015-05-24 19:21:53 +00:00
#[inline]
2015-05-24 06:38:54 +00:00
fn lookahead(&mut self, count: usize) {
2015-05-24 06:27:42 +00:00
if self.buffer.len() >= count {
return;
}
2015-05-24 06:37:36 +00:00
for _ in 0..(count - self.buffer.len()) {
2015-05-24 06:27:42 +00:00
self.buffer.push_back(self.rdr.next().unwrap_or('\0'));
}
}
2015-05-24 19:21:53 +00:00
#[inline]
2015-05-24 06:27:42 +00:00
fn skip(&mut self) {
let c = self.buffer.pop_front().unwrap();
self.mark.index += 1;
if c == '\n' {
self.mark.line += 1;
self.mark.col = 0;
} else {
self.mark.col += 1;
}
}
2015-05-24 19:21:53 +00:00
#[inline]
fn skip_line(&mut self) {
if self.buffer[0] == '\r' && self.buffer[1] == '\n' {
self.skip();
self.skip();
} else if is_break(self.buffer[0]) {
self.skip();
}
}
#[inline]
2015-05-24 06:27:42 +00:00
fn ch(&self) -> char {
self.buffer[0]
}
2015-05-24 19:21:53 +00:00
#[inline]
2015-05-24 06:27:42 +00:00
fn ch_is(&self, c: char) -> bool {
self.buffer[0] == c
}
2015-05-24 06:37:36 +00:00
#[allow(dead_code)]
2015-05-24 19:21:53 +00:00
#[inline]
2015-05-24 06:27:42 +00:00
fn eof(&self) -> bool {
self.ch_is('\0')
}
2015-05-24 19:21:53 +00:00
#[inline]
2015-05-24 06:27:42 +00:00
pub fn stream_started(&self) -> bool {
self.stream_start_produced
}
2015-05-24 19:21:53 +00:00
#[inline]
2015-05-24 06:27:42 +00:00
pub fn stream_ended(&self) -> bool {
self.stream_end_produced
}
2015-05-24 19:21:53 +00:00
#[inline]
2015-05-24 06:27:42 +00:00
pub fn mark(&self) -> Marker {
self.mark
}
2015-05-24 19:21:53 +00:00
#[inline]
2015-05-24 06:27:42 +00:00
fn read_break(&mut self, s: &mut String) {
if self.buffer[0] == '\r' && self.buffer[1] == '\n' {
s.push('\n');
self.skip();
self.skip();
} else if self.buffer[0] == '\r' || self.buffer[0] == '\n' {
s.push('\n');
self.skip();
} else {
unreachable!();
}
}
fn insert_token(&mut self, pos: usize, tok: Token) {
let old_len = self.tokens.len();
assert!(pos <= old_len);
self.tokens.push_back(tok);
for i in 0..old_len - pos {
self.tokens.swap(old_len - i, old_len - i - 1);
}
}
fn allow_simple_key(&mut self) {
self.simple_key_allowed = true;
}
fn disallow_simple_key(&mut self) {
self.simple_key_allowed = false;
}
pub fn fetch_next_token(&mut self) -> ScanResult {
2015-05-24 06:38:54 +00:00
self.lookahead(1);
2015-05-24 06:27:42 +00:00
// println!("--> fetch_next_token Cur {:?} {:?}", self.mark, self.ch());
if !self.stream_start_produced {
self.fetch_stream_start();
return Ok(());
}
self.skip_to_next_token();
try!(self.stale_simple_keys());
let mark = self.mark;
self.unroll_indent(mark.col as isize);
2015-05-24 06:38:54 +00:00
self.lookahead(4);
2015-05-24 06:27:42 +00:00
if is_z(self.ch()) {
2015-05-24 06:37:36 +00:00
try!(self.fetch_stream_end());
2015-05-24 06:27:42 +00:00
return Ok(());
}
if self.mark.col == 0 && self.ch_is('%') {
unimplemented!();
}
if self.mark.col == 0
&& self.buffer[0] == '-'
&& self.buffer[1] == '-'
&& self.buffer[2] == '-'
&& is_blankz(self.buffer[3]) {
try!(self.fetch_document_indicator(TokenType::DocumentStartToken));
return Ok(());
}
if self.mark.col == 0
&& self.buffer[0] == '.'
&& self.buffer[1] == '.'
&& self.buffer[2] == '.'
&& is_blankz(self.buffer[3]) {
try!(self.fetch_document_indicator(TokenType::DocumentEndToken));
return Ok(());
}
let c = self.buffer[0];
let nc = self.buffer[1];
match c {
'[' => try!(self.fetch_flow_collection_start(TokenType::FlowSequenceStartToken)),
'{' => try!(self.fetch_flow_collection_start(TokenType::FlowMappingStartToken)),
']' => try!(self.fetch_flow_collection_end(TokenType::FlowSequenceEndToken)),
'}' => try!(self.fetch_flow_collection_end(TokenType::FlowMappingEndToken)),
',' => try!(self.fetch_flow_entry()),
'-' if is_blankz(nc) => try!(self.fetch_block_entry()),
2015-05-25 11:31:33 +00:00
'?' if self.flow_level > 0 || is_blankz(nc) => try!(self.fetch_key()),
2015-05-24 06:27:42 +00:00
':' if self.flow_level > 0 || is_blankz(nc) => try!(self.fetch_value()),
'*' => unimplemented!(),
'&' => unimplemented!(),
'!' => unimplemented!(),
'|' if self.flow_level == 0 => unimplemented!(),
'>' if self.flow_level == 0 => unimplemented!(),
2015-05-24 19:21:53 +00:00
'\'' => try!(self.fetch_flow_scalar(true)),
'"' => try!(self.fetch_flow_scalar(false)),
2015-05-24 06:27:42 +00:00
// plain scalar
'-' if !is_blankz(nc) => try!(self.fetch_plain_scalar()),
':' | '?' if !is_blankz(nc) && self.flow_level == 0 => try!(self.fetch_plain_scalar()),
'%' | '@' | '`' => return Err(ScanError::new(self.mark,
&format!("unexpected character: `{}'", c))),
_ => try!(self.fetch_plain_scalar()),
}
Ok(())
}
pub fn next_token(&mut self) -> Result<Option<Token>, ScanError> {
if self.stream_end_produced {
return Ok(None);
}
if !self.token_available {
try!(self.fetch_more_tokens());
}
let t = self.tokens.pop_front().unwrap();
self.token_available = false;
self.tokens_parsed += 1;
match t.1 {
TokenType::StreamEndToken => self.stream_end_produced = true,
_ => {}
}
Ok(Some(t))
}
pub fn fetch_more_tokens(&mut self) -> ScanResult {
2015-05-24 06:37:36 +00:00
let mut need_more;
2015-05-24 06:27:42 +00:00
loop {
need_more = false;
if self.tokens.is_empty() {
need_more = true;
} else {
try!(self.stale_simple_keys());
for sk in &self.simple_keys {
if sk.possible && sk.token_number == self.tokens_parsed {
need_more = true;
break;
}
}
}
if !need_more { break; }
try!(self.fetch_next_token());
}
self.token_available = true;
Ok(())
}
fn stale_simple_keys(&mut self) -> ScanResult {
for sk in &mut self.simple_keys {
if sk.possible && (sk.mark.line < self.mark.line
|| sk.mark.index + 1024 < self.mark.index) {
if sk.required {
return Err(ScanError::new(self.mark, "simple key expect ':'"));
}
sk.possible = false;
}
}
Ok(())
}
fn skip_to_next_token(&mut self) {
loop {
2015-05-24 06:38:54 +00:00
self.lookahead(1);
2015-05-24 06:27:42 +00:00
// TODO(chenyh) BOM
match self.ch() {
' ' => self.skip(),
'\t' if self.flow_level > 0 || !self.simple_key_allowed => self.skip(),
'\n' | '\r' => {
2015-05-24 19:21:53 +00:00
self.lookahead(2);
self.skip_line();
2015-05-24 06:27:42 +00:00
if self.flow_level == 0 {
self.allow_simple_key();
}
},
2015-05-24 06:38:54 +00:00
'#' => while !is_breakz(self.ch()) { self.skip(); self.lookahead(1); },
2015-05-24 06:27:42 +00:00
_ => break
}
}
}
fn fetch_stream_start(&mut self) {
let mark = self.mark;
self.indent = -1;
self.stream_start_produced = true;
self.allow_simple_key();
self.tokens.push_back(Token(mark, TokenType::StreamStartToken(TEncoding::Utf8)));
self.simple_keys.push(SimpleKey::new(Marker::new(0,0,0)));
}
fn fetch_stream_end(&mut self) -> ScanResult {
// force new line
if self.mark.col != 0 {
self.mark.col = 0;
self.mark.line += 1;
}
self.unroll_indent(-1);
try!(self.remove_simple_key());
self.disallow_simple_key();
self.tokens.push_back(Token(self.mark, TokenType::StreamEndToken));
Ok(())
}
fn fetch_flow_collection_start(&mut self, tok :TokenType) -> ScanResult {
// The indicators '[' and '{' may start a simple key.
try!(self.save_simple_key());
self.increase_flow_level();
self.allow_simple_key();
let start_mark = self.mark;
self.skip();
self.tokens.push_back(Token(start_mark, tok));
Ok(())
}
fn fetch_flow_collection_end(&mut self, tok :TokenType) -> ScanResult {
try!(self.remove_simple_key());
self.decrease_flow_level();
self.disallow_simple_key();
let start_mark = self.mark;
self.skip();
self.tokens.push_back(Token(start_mark, tok));
Ok(())
}
fn fetch_flow_entry(&mut self) -> ScanResult {
try!(self.remove_simple_key());
self.allow_simple_key();
let start_mark = self.mark;
self.skip();
self.tokens.push_back(Token(start_mark, TokenType::FlowEntryToken));
Ok(())
}
fn increase_flow_level(&mut self) {
self.simple_keys.push(SimpleKey::new(Marker::new(0,0,0)));
self.flow_level += 1;
}
fn decrease_flow_level(&mut self) {
if self.flow_level > 0 {
self.flow_level -= 1;
self.simple_keys.pop().unwrap();
}
}
fn fetch_block_entry(&mut self) -> ScanResult {
if self.flow_level == 0 {
// Check if we are allowed to start a new entry.
if !self.simple_key_allowed {
return Err(ScanError::new(self.mark,
"block sequence entries are not allowed in this context"));
}
let mark = self.mark;
// generate BLOCK-SEQUENCE-START if indented
self.roll_indent(mark.col, None, TokenType::BlockSequenceStartToken, mark);
} else {
// - * only allowed in block
unreachable!();
}
2015-05-24 06:37:36 +00:00
try!(self.remove_simple_key());
2015-05-24 06:27:42 +00:00
self.allow_simple_key();
let start_mark = self.mark;
self.skip();
self.tokens.push_back(Token(start_mark, TokenType::BlockEntryToken));
Ok(())
}
fn fetch_document_indicator(&mut self, t: TokenType) -> ScanResult {
self.unroll_indent(-1);
try!(self.remove_simple_key());
self.disallow_simple_key();
let mark = self.mark;
self.skip();
self.skip();
self.skip();
self.tokens.push_back(Token(mark, t));
Ok(())
}
2015-05-24 19:21:53 +00:00
fn fetch_flow_scalar(&mut self, single: bool) -> ScanResult {
2015-05-24 06:27:42 +00:00
try!(self.save_simple_key());
self.disallow_simple_key();
2015-05-24 19:21:53 +00:00
let tok = try!(self.scan_flow_scalar(single));
2015-05-24 06:27:42 +00:00
self.tokens.push_back(tok);
Ok(())
}
2015-05-24 19:21:53 +00:00
fn scan_flow_scalar(&mut self, single: bool) -> Result<Token, ScanError> {
2015-05-24 06:27:42 +00:00
let start_mark = self.mark;
let mut string = String::new();
let mut leading_break = String::new();
let mut trailing_breaks = String::new();
let mut whitespaces = String::new();
let mut leading_blanks = false;
2015-05-24 19:21:53 +00:00
/* Eat the left quote. */
self.skip();
2015-05-24 06:27:42 +00:00
loop {
/* Check for a document indicator. */
2015-05-24 06:38:54 +00:00
self.lookahead(4);
2015-05-24 06:27:42 +00:00
if self.mark.col == 0 &&
((self.buffer[0] == '-') &&
(self.buffer[1] == '-') &&
(self.buffer[2] == '-')) ||
((self.buffer[0] == '.') &&
(self.buffer[1] == '.') &&
(self.buffer[2] == '.')) &&
is_blankz(self.buffer[3]) {
2015-05-24 19:21:53 +00:00
return Err(ScanError::new(start_mark,
"while scanning a quoted scalar, found unexpected document indicator"));
2015-05-24 06:27:42 +00:00
}
2015-05-24 19:21:53 +00:00
if is_z(self.ch()) {
return Err(ScanError::new(start_mark,
"while scanning a quoted scalar, found unexpected end of stream"));
}
self.lookahead(2);
leading_blanks = false;
// Consume non-blank characters.
while !is_blankz(self.ch()) {
match self.ch() {
// Check for an escaped single quote.
'\'' if self.buffer[1] == '\'' && single => {
string.push('\'');
self.skip();
self.skip();
},
// Check for the right quote.
'\'' if single => { break; },
'"' if !single => { break; },
// Check for an escaped line break.
'\\' if !single && is_break(self.buffer[1]) => {
self.lookahead(3);
self.skip();
self.skip_line();
leading_blanks = true;
break;
}
// Check for an escape sequence.
'\\' if !single => {
let mut code_length = 0usize;
match self.buffer[1] {
'0' => string.push('\0'),
'a' => string.push('\x07'),
'b' => string.push('\x08'),
't' | '\t' => string.push('\t'),
'n' => string.push('\n'),
'v' => string.push('\x0b'),
'f' => string.push('\x0c'),
'r' => string.push('\x0d'),
'e' => string.push('\x1b'),
' ' => string.push('\x20'),
'"' => string.push('"'),
'\'' => string.push('\''),
'\\' => string.push('\\'),
//'N' => { string.push('\xc2'); string.push('\x85') },
'x' => code_length = 2,
'u' => code_length = 4,
'U' => code_length = 8,
_ => return Err(ScanError::new(start_mark,
"while parsing a quoted scalar, found unknown escape character"))
}
self.skip();
self.skip();
// Consume an arbitrary escape code.
if code_length > 0 {
let val = 0;
self.lookahead(code_length);
unimplemented!();
}
},
c => { string.push(c); self.skip(); }
}
self.lookahead(2);
}
match self.ch() {
'\'' if single => { break; },
'"' if !single => { break; },
_ => {}
}
self.lookahead(1);
// Consume blank characters.
while is_blank(self.ch()) || is_break(self.ch()) {
if is_blank(self.ch()) {
// Consume a space or a tab character.
if !leading_blanks {
whitespaces.push(self.ch());
self.skip();
} else {
self.skip();
}
} else {
self.lookahead(2);
// Check if it is a first line break.
if !leading_blanks {
whitespaces.clear();
self.read_break(&mut leading_break);
leading_blanks = true;
} else {
self.read_break(&mut trailing_breaks);
}
}
self.lookahead(1);
}
// Join the whitespaces or fold line breaks.
if leading_blanks {
if !leading_break.is_empty() {
if trailing_breaks.is_empty() {
string.push(' ');
} else {
string.extend(trailing_breaks.chars());
trailing_breaks.clear();
}
leading_break.clear();
} else {
string.extend(leading_break.chars());
string.extend(trailing_breaks.chars());
trailing_breaks.clear();
leading_break.clear();
}
leading_blanks = false;
} else {
string.extend(whitespaces.chars());
whitespaces.clear();
}
} // loop
// Eat the right quote.
self.skip();
if single {
Ok(Token(start_mark, TokenType::ScalarToken(TScalarStyle::SingleQuoted, string)))
} else {
Ok(Token(start_mark, TokenType::ScalarToken(TScalarStyle::DoubleQuoted, string)))
}
}
fn fetch_plain_scalar(&mut self) -> ScanResult {
try!(self.save_simple_key());
self.disallow_simple_key();
let tok = try!(self.scan_plain_scalar());
self.tokens.push_back(tok);
Ok(())
}
fn scan_plain_scalar(&mut self) -> Result<Token, ScanError> {
let indent = self.indent + 1;
let start_mark = self.mark;
let mut string = String::new();
let mut leading_break = String::new();
let mut trailing_breaks = String::new();
let mut whitespaces = String::new();
let mut leading_blanks = false;
loop {
/* Check for a document indicator. */
self.lookahead(4);
if self.mark.col == 0 &&
((self.buffer[0] == '-') &&
(self.buffer[1] == '-') &&
(self.buffer[2] == '-')) ||
((self.buffer[0] == '.') &&
(self.buffer[1] == '.') &&
(self.buffer[2] == '.')) &&
is_blankz(self.buffer[3]) {
break;
}
2015-05-24 06:27:42 +00:00
if self.ch() == '#' { break; }
while !is_blankz(self.ch()) {
if self.flow_level > 0 && self.ch() == ':'
&& is_blankz(self.ch()) {
return Err(ScanError::new(start_mark,
2015-05-24 19:21:53 +00:00
"while scanning a plain scalar, found unexpected ':'"));
2015-05-24 06:27:42 +00:00
}
// indicators ends a plain scalar
match self.ch() {
':' if is_blankz(self.buffer[1]) => break,
2015-05-24 17:34:18 +00:00
',' | ':' | '?' | '[' | ']' |'{' |'}' if self.flow_level > 0 => break,
2015-05-24 06:27:42 +00:00
_ => {}
}
if leading_blanks || !whitespaces.is_empty() {
if leading_blanks {
if !leading_break.is_empty() {
if trailing_breaks.is_empty() {
string.push(' ');
} else {
string.extend(trailing_breaks.chars());
trailing_breaks.clear();
}
leading_break.clear();
} else {
string.extend(leading_break.chars());
string.extend(trailing_breaks.chars());
trailing_breaks.clear();
leading_break.clear();
}
leading_blanks = false;
} else {
string.extend(whitespaces.chars());
whitespaces.clear();
}
}
string.push(self.ch());
self.skip();
2015-05-24 06:38:54 +00:00
self.lookahead(2);
2015-05-24 06:27:42 +00:00
}
// is the end?
if !(is_blank(self.ch()) || is_break(self.ch())) { break; }
2015-05-24 06:38:54 +00:00
self.lookahead(1);
2015-05-24 06:27:42 +00:00
while is_blank(self.ch()) || is_break(self.ch()) {
if is_blank(self.ch()) {
if leading_blanks && (self.mark.col as isize) < indent
&& self.ch() == '\t' {
return Err(ScanError::new(start_mark,
"while scanning a plain scalar, found a tab"));
}
if !leading_blanks {
whitespaces.push(self.ch());
self.skip();
} else {
self.skip();
}
} else {
2015-05-24 06:38:54 +00:00
self.lookahead(2);
2015-05-24 06:27:42 +00:00
// Check if it is a first line break
if !leading_blanks {
whitespaces.clear();
self.read_break(&mut leading_break);
leading_blanks = true;
} else {
self.read_break(&mut trailing_breaks);
}
}
2015-05-24 06:38:54 +00:00
self.lookahead(1);
2015-05-24 06:27:42 +00:00
}
// check intendation level
if self.flow_level == 0 && (self.mark.col as isize) < indent {
break;
}
}
if leading_blanks {
self.allow_simple_key();
}
Ok(Token(start_mark, TokenType::ScalarToken(TScalarStyle::Plain, string)))
}
2015-05-25 11:31:33 +00:00
fn fetch_key(&mut self) -> ScanResult {
let start_mark = self.mark;
if self.flow_level == 0 {
// Check if we are allowed to start a new key (not nessesary simple).
if !self.simple_key_allowed {
return Err(ScanError::new(self.mark, "mapping keys are not allowed in this context"));
}
self.roll_indent(start_mark.col, None,
TokenType::BlockMappingStartToken, start_mark);
}
try!(self.remove_simple_key());
if self.flow_level == 0 {
self.allow_simple_key();
} else {
self.disallow_simple_key();
}
self.skip();
self.tokens.push_back(Token(start_mark, TokenType::KeyToken));
Ok(())
}
2015-05-24 06:27:42 +00:00
fn fetch_value(&mut self) -> ScanResult {
let sk = self.simple_keys.last().unwrap().clone();
let start_mark = self.mark;
if sk.possible {
2015-05-25 11:31:33 +00:00
// insert simple key
let tok = Token(sk.mark, TokenType::KeyToken);
2015-05-24 06:27:42 +00:00
let tokens_parsed = self.tokens_parsed;
self.insert_token(sk.token_number - tokens_parsed, tok);
// Add the BLOCK-MAPPING-START token if needed.
self.roll_indent(sk.mark.col, Some(sk.token_number),
TokenType::BlockMappingStartToken, start_mark);
self.simple_keys.last_mut().unwrap().possible = false;
self.disallow_simple_key();
} else {
// The ':' indicator follows a complex key.
2015-05-25 11:31:33 +00:00
if self.flow_level == 0 {
if !self.simple_key_allowed {
return Err(ScanError::new(start_mark,
"mapping values are not allowed in this context"));
}
2015-05-24 06:27:42 +00:00
2015-05-25 11:31:33 +00:00
self.roll_indent(start_mark.col, None,
TokenType::BlockMappingStartToken, start_mark);
}
if self.flow_level == 0 {
self.allow_simple_key();
} else {
self.disallow_simple_key();
}
}
2015-05-24 06:27:42 +00:00
self.skip();
self.tokens.push_back(Token(start_mark, TokenType::ValueToken));
Ok(())
}
fn roll_indent(&mut self, col: usize, number: Option<usize>,
tok: TokenType, mark: Marker) {
if self.flow_level > 0 {
return;
}
if self.indent < col as isize {
self.indents.push(self.indent);
self.indent = col as isize;
let tokens_parsed = self.tokens_parsed;
match number {
Some(n) => self.insert_token(n - tokens_parsed, Token(mark, tok)),
None => self.tokens.push_back(Token(mark, tok))
}
}
}
fn unroll_indent(&mut self, col: isize) {
if self.flow_level > 0 {
return;
}
while self.indent > col {
self.tokens.push_back(Token(self.mark, TokenType::BlockEndToken));
self.indent = self.indents.pop().unwrap();
}
}
fn save_simple_key(&mut self) -> Result<(), ScanError> {
let required = self.flow_level > 0 && self.indent == (self.mark.col as isize);
if self.simple_key_allowed {
let mut sk = SimpleKey::new(self.mark);
sk.possible = true;
sk.required = required;
sk.token_number = self.tokens_parsed + self.tokens.len();
try!(self.remove_simple_key());
self.simple_keys.pop();
self.simple_keys.push(sk);
}
Ok(())
}
fn remove_simple_key(&mut self) -> ScanResult {
let last = self.simple_keys.last_mut().unwrap();
if last.possible {
if last.required {
return Err(ScanError::new(self.mark, "simple key expected"));
}
}
last.possible = false;
Ok(())
}
}
#[cfg(test)]
mod test {
use super::*;
2015-05-25 11:31:33 +00:00
use super::TokenType::*;
macro_rules! next {
($p:ident, $tk:pat) => {{
let tok = $p.next().unwrap();
match tok.1 {
$tk => {},
_ => { panic!("unexpected token: {:?}",
tok) }
}
}}
}
macro_rules! next_scalar {
($p:ident, $tk:expr, $v:expr) => {{
let tok = $p.next().unwrap();
match tok.1 {
ScalarToken(style, ref v) => {
assert_eq!(style, $tk);
assert_eq!(v, $v);
},
_ => { panic!("unexpected token: {:?}",
tok) }
2015-05-24 06:27:42 +00:00
}
2015-05-25 11:31:33 +00:00
}}
}
macro_rules! end {
($p:ident) => {{
assert_eq!($p.next(), None);
}}
}
/// test cases in libyaml scanner.c
#[test]
fn test_empty() {
let s = "";
let mut p = Scanner::new(s.chars());
next!(p, StreamStartToken(..));
next!(p, StreamEndToken);
end!(p);
}
#[test]
fn test_scalar() {
let s = "a scalar";
let mut p = Scanner::new(s.chars());
next!(p, StreamStartToken(..));
next!(p, ScalarToken(TScalarStyle::Plain, _));
next!(p, StreamEndToken);
end!(p);
}
#[test]
fn test_explicit_scalar() {
let s =
"---
'a scalar'
...
";
let mut p = Scanner::new(s.chars());
next!(p, StreamStartToken(..));
next!(p, DocumentStartToken);
next!(p, ScalarToken(TScalarStyle::SingleQuoted, _));
next!(p, DocumentEndToken);
next!(p, StreamEndToken);
end!(p);
}
#[test]
fn test_multiple_documents() {
let s =
"
'a scalar'
---
'a scalar'
---
'a scalar'
";
let mut p = Scanner::new(s.chars());
next!(p, StreamStartToken(..));
next!(p, ScalarToken(TScalarStyle::SingleQuoted, _));
next!(p, DocumentStartToken);
next!(p, ScalarToken(TScalarStyle::SingleQuoted, _));
next!(p, DocumentStartToken);
next!(p, ScalarToken(TScalarStyle::SingleQuoted, _));
next!(p, StreamEndToken);
end!(p);
}
#[test]
fn test_a_flow_sequence() {
let s = "[item 1, item 2, item 3]";
let mut p = Scanner::new(s.chars());
next!(p, StreamStartToken(..));
next!(p, FlowSequenceStartToken);
next_scalar!(p, TScalarStyle::Plain, "item 1");
next!(p, FlowEntryToken);
next!(p, ScalarToken(TScalarStyle::Plain, _));
next!(p, FlowEntryToken);
next!(p, ScalarToken(TScalarStyle::Plain, _));
next!(p, FlowSequenceEndToken);
next!(p, StreamEndToken);
end!(p);
}
#[test]
fn test_a_flow_mapping() {
let s =
"
{
a simple key: a value, # Note that the KEY token is produced.
? a complex key: another value,
}
";
let mut p = Scanner::new(s.chars());
next!(p, StreamStartToken(..));
next!(p, FlowMappingStartToken);
next!(p, KeyToken);
next!(p, ScalarToken(TScalarStyle::Plain, _));
next!(p, ValueToken);
next!(p, ScalarToken(TScalarStyle::Plain, _));
next!(p, FlowEntryToken);
next!(p, KeyToken);
next_scalar!(p, TScalarStyle::Plain, "a complex key");
next!(p, ValueToken);
next!(p, ScalarToken(TScalarStyle::Plain, _));
next!(p, FlowEntryToken);
next!(p, FlowMappingEndToken);
next!(p, StreamEndToken);
end!(p);
}
#[test]
fn test_block_sequences() {
let s =
"
- item 1
- item 2
-
- item 3.1
- item 3.2
-
key 1: value 1
key 2: value 2
";
let mut p = Scanner::new(s.chars());
next!(p, StreamStartToken(..));
next!(p, BlockSequenceStartToken);
next!(p, BlockEntryToken);
next_scalar!(p, TScalarStyle::Plain, "item 1");
next!(p, BlockEntryToken);
next_scalar!(p, TScalarStyle::Plain, "item 2");
next!(p, BlockEntryToken);
next!(p, BlockSequenceStartToken);
next!(p, BlockEntryToken);
next_scalar!(p, TScalarStyle::Plain, "item 3.1");
next!(p, BlockEntryToken);
next_scalar!(p, TScalarStyle::Plain, "item 3.2");
next!(p, BlockEndToken);
next!(p, BlockEntryToken);
next!(p, BlockMappingStartToken);
next!(p, KeyToken);
next_scalar!(p, TScalarStyle::Plain, "key 1");
next!(p, ValueToken);
next_scalar!(p, TScalarStyle::Plain, "value 1");
next!(p, KeyToken);
next_scalar!(p, TScalarStyle::Plain, "key 2");
next!(p, ValueToken);
next_scalar!(p, TScalarStyle::Plain, "value 2");
next!(p, BlockEndToken);
next!(p, BlockEndToken);
next!(p, StreamEndToken);
end!(p);
}
#[test]
fn test_block_mappings() {
let s =
"
a simple key: a value # The KEY token is produced here.
? a complex key
: another value
a mapping:
key 1: value 1
key 2: value 2
a sequence:
- item 1
- item 2
";
let mut p = Scanner::new(s.chars());
next!(p, StreamStartToken(..));
next!(p, BlockMappingStartToken);
next!(p, KeyToken);
next!(p, ScalarToken(_, _));
next!(p, ValueToken);
next!(p, ScalarToken(_, _));
next!(p, KeyToken);
next!(p, ScalarToken(_, _));
next!(p, ValueToken);
next!(p, ScalarToken(_, _));
next!(p, KeyToken);
next!(p, ScalarToken(_, _));
next!(p, ValueToken); // libyaml comment seems to be wrong
next!(p, BlockMappingStartToken);
next!(p, KeyToken);
next!(p, ScalarToken(_, _));
next!(p, ValueToken);
next!(p, ScalarToken(_, _));
next!(p, KeyToken);
next!(p, ScalarToken(_, _));
next!(p, ValueToken);
next!(p, ScalarToken(_, _));
next!(p, BlockEndToken);
next!(p, KeyToken);
next!(p, ScalarToken(_, _));
next!(p, ValueToken);
next!(p, BlockSequenceStartToken);
next!(p, BlockEntryToken);
next!(p, ScalarToken(_, _));
next!(p, BlockEntryToken);
next!(p, ScalarToken(_, _));
next!(p, BlockEndToken);
next!(p, BlockEndToken);
next!(p, StreamEndToken);
end!(p);
}
#[test]
fn test_no_block_sequence_start() {
let s =
"
key:
- item 1
- item 2
";
let mut p = Scanner::new(s.chars());
next!(p, StreamStartToken(..));
next!(p, BlockMappingStartToken);
next!(p, KeyToken);
next_scalar!(p, TScalarStyle::Plain, "key");
next!(p, ValueToken);
next!(p, BlockEntryToken);
next_scalar!(p, TScalarStyle::Plain, "item 1");
next!(p, BlockEntryToken);
next_scalar!(p, TScalarStyle::Plain, "item 2");
next!(p, BlockEndToken);
next!(p, StreamEndToken);
end!(p);
}
#[test]
fn test_collections_in_sequence() {
let s =
"
- - item 1
- item 2
- key 1: value 1
key 2: value 2
- ? complex key
: complex value
";
let mut p = Scanner::new(s.chars());
next!(p, StreamStartToken(..));
next!(p, BlockSequenceStartToken);
next!(p, BlockEntryToken);
next!(p, BlockSequenceStartToken);
next!(p, BlockEntryToken);
next_scalar!(p, TScalarStyle::Plain, "item 1");
next!(p, BlockEntryToken);
next_scalar!(p, TScalarStyle::Plain, "item 2");
next!(p, BlockEndToken);
next!(p, BlockEntryToken);
next!(p, BlockMappingStartToken);
next!(p, KeyToken);
next_scalar!(p, TScalarStyle::Plain, "key 1");
next!(p, ValueToken);
next_scalar!(p, TScalarStyle::Plain, "value 1");
next!(p, KeyToken);
next_scalar!(p, TScalarStyle::Plain, "key 2");
next!(p, ValueToken);
next_scalar!(p, TScalarStyle::Plain, "value 2");
next!(p, BlockEndToken);
next!(p, BlockEntryToken);
next!(p, BlockMappingStartToken);
next!(p, KeyToken);
next_scalar!(p, TScalarStyle::Plain, "complex key");
next!(p, ValueToken);
next_scalar!(p, TScalarStyle::Plain, "complex value");
next!(p, BlockEndToken);
next!(p, BlockEndToken);
next!(p, StreamEndToken);
end!(p);
}
#[test]
fn test_collections_in_mapping() {
let s =
"
? a sequence
: - item 1
- item 2
? a mapping
: key 1: value 1
key 2: value 2
";
let mut p = Scanner::new(s.chars());
next!(p, StreamStartToken(..));
next!(p, BlockMappingStartToken);
next!(p, KeyToken);
next_scalar!(p, TScalarStyle::Plain, "a sequence");
next!(p, ValueToken);
next!(p, BlockSequenceStartToken);
next!(p, BlockEntryToken);
next_scalar!(p, TScalarStyle::Plain, "item 1");
next!(p, BlockEntryToken);
next_scalar!(p, TScalarStyle::Plain, "item 2");
next!(p, BlockEndToken);
next!(p, KeyToken);
next_scalar!(p, TScalarStyle::Plain, "a mapping");
next!(p, ValueToken);
next!(p, BlockMappingStartToken);
next!(p, KeyToken);
next_scalar!(p, TScalarStyle::Plain, "key 1");
next!(p, ValueToken);
next_scalar!(p, TScalarStyle::Plain, "value 1");
next!(p, KeyToken);
next_scalar!(p, TScalarStyle::Plain, "key 2");
next!(p, ValueToken);
next_scalar!(p, TScalarStyle::Plain, "value 2");
next!(p, BlockEndToken);
next!(p, BlockEndToken);
next!(p, StreamEndToken);
end!(p);
2015-05-24 06:27:42 +00:00
}
}