From 008da5005c6c07174796b8c9eeac07a5a35e4f86 Mon Sep 17 00:00:00 2001 From: Yuheng Chen Date: Thu, 28 May 2015 22:07:59 +0800 Subject: [PATCH] Add VersionDirectiveToken --- parser/src/parser.rs | 38 +++++++++-- parser/src/scanner.rs | 148 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 178 insertions(+), 8 deletions(-) diff --git a/parser/src/parser.rs b/parser/src/parser.rs index 8bc2918..1108e2d 100644 --- a/parser/src/parser.rs +++ b/parser/src/parser.rs @@ -82,8 +82,12 @@ impl> Parser { self.token = self.scanner.next(); } if self.token.is_none() { - return Err(ScanError::new(self.scanner.mark(), - "unexpected eof")); + match self.scanner.get_error() { + None => + return Err(ScanError::new(self.scanner.mark(), + "unexpected eof")), + Some(e) => return Err(e), + } } // XXX better? Ok(self.token.clone().unwrap()) @@ -200,7 +204,7 @@ impl> Parser { } fn state_machine(&mut self) -> ParseResult { - let next_tok = self.peek(); + let next_tok = try!(self.peek()); println!("cur_state {:?}, next tok: {:?}", self.state, next_tok); match self.state { State::StreamStart => self.stream_start(), @@ -272,13 +276,14 @@ impl> Parser { self.skip(); return Ok(Event::StreamEnd); }, - TokenType::VersionDirectiveToken + TokenType::VersionDirectiveToken(..) | TokenType::TagDirectiveToken | TokenType::DocumentStartToken => { // explicit document self._explict_document_start() }, _ if implicit => { + try!(self.parser_process_directives()); self.push_state(State::DocumentEnd); self.state = State::BlockNode; Ok(Event::DocumentStart) @@ -290,7 +295,30 @@ impl> Parser { } } + fn parser_process_directives(&mut self) -> Result<(), ScanError> { + loop { + let tok = try!(self.peek()); + match tok.1 { + TokenType::VersionDirectiveToken(_, _) => { + // XXX parsing with warning according to spec + //if major != 1 || minor > 2 { + // return Err(ScanError::new(tok.0, + // "found incompatible YAML document")); + //} + }, + TokenType::TagDirectiveToken => { + unimplemented!(); + }, + _ => break + } + self.skip(); + } + // TODO tag directive + Ok(()) + } + fn _explict_document_start(&mut self) -> ParseResult { + try!(self.parser_process_directives()); let tok = try!(self.peek()); if tok.1 != TokenType::DocumentStartToken { return Err(ScanError::new(tok.0, "did not find expected ")); @@ -304,7 +332,7 @@ impl> Parser { fn document_content(&mut self) -> ParseResult { let tok = try!(self.peek()); match tok.1 { - TokenType::VersionDirectiveToken + TokenType::VersionDirectiveToken(..) |TokenType::TagDirectiveToken |TokenType::DocumentStartToken |TokenType::DocumentEndToken diff --git a/parser/src/scanner.rs b/parser/src/scanner.rs index 4b0cdfa..00ba30d 100644 --- a/parser/src/scanner.rs +++ b/parser/src/scanner.rs @@ -54,7 +54,8 @@ pub enum TokenType { NoToken, StreamStartToken(TEncoding), StreamEndToken, - VersionDirectiveToken, + /// major, minor + VersionDirectiveToken(u32, u32), TagDirectiveToken, DocumentStartToken, DocumentEndToken, @@ -103,6 +104,7 @@ pub struct Scanner { mark: Marker, tokens: VecDeque, buffer: VecDeque, + error: Option, stream_start_produced: bool, stream_end_produced: bool, @@ -118,10 +120,13 @@ pub struct Scanner { impl> Iterator for Scanner { type Item = Token; fn next(&mut self) -> Option { + if self.error.is_some() { + return None; + } match self.next_token() { Ok(tok) => tok, Err(e) => { - println!("Error: {:?}", e); + self.error = Some(e); None } } @@ -178,6 +183,7 @@ impl> Scanner { buffer: VecDeque::new(), mark: Marker::new(0, 1, 0), tokens: VecDeque::new(), + error: None, stream_start_produced: false, stream_end_produced: false, @@ -190,6 +196,13 @@ impl> Scanner { token_available: false, } } + #[inline] + pub fn get_error(&self) -> Option { + match self.error { + None => None, + Some(ref e) => Some(e.clone()), + } + } #[inline] fn lookahead(&mut self, count: usize) { @@ -296,8 +309,9 @@ impl> Scanner { return Ok(()); } + // Is it a directive? if self.mark.col == 0 && self.ch_is('%') { - unimplemented!(); + return self.fetch_directive(); } if self.mark.col == 0 @@ -449,6 +463,134 @@ impl> Scanner { Ok(()) } + fn fetch_directive(&mut self) -> ScanResult { + self.unroll_indent(-1); + try!(self.remove_simple_key()); + + self.disallow_simple_key(); + + let tok = try!(self.scan_directive()); + + self.tokens.push_back(tok); + + Ok(()) + } + + fn scan_directive(&mut self) -> Result { + let start_mark = self.mark; + self.skip(); + + let name = try!(self.scan_directive_name()); + let tok = match name.as_ref() { + "YAML" => { + try!(self.scan_version_directive_value(&start_mark)) + }, + "TAG" => { + try!(self.scan_tag_directive_value(&start_mark)) + }, + _ => return Err(ScanError::new(start_mark, + "while scanning a directive, found uknown directive name")) + }; + self.lookahead(1); + + while is_blank(self.ch()) { + self.skip(); + self.lookahead(1); + } + + if self.ch() == '#' { + while !is_breakz(self.ch()) { + self.skip(); + self.lookahead(1); + } + } + + if !is_breakz(self.ch()) { + return Err(ScanError::new(start_mark, + "while scanning a directive, did not find expected comment or line break")); + } + + // Eat a line break + if is_break(self.ch()) { + self.lookahead(2); + self.skip_line(); + } + + Ok(tok) + } + + fn scan_version_directive_value(&mut self, mark: &Marker) -> Result { + self.lookahead(1); + + while is_blank(self.ch()) { + self.skip(); + self.lookahead(1); + } + + let major = try!(self.scan_version_directive_number(mark)); + + if self.ch() != '.' { + return Err(ScanError::new(*mark, + "while scanning a YAML directive, did not find expected digit or '.' character")); + } + + self.skip(); + + let minor = try!(self.scan_version_directive_number(mark)); + + Ok(Token(*mark, TokenType::VersionDirectiveToken(major, minor))) + } + + fn scan_directive_name(&mut self) -> Result { + let start_mark = self.mark; + let mut string = String::new(); + self.lookahead(1); + while self.ch().is_alphabetic() { + string.push(self.ch()); + self.skip(); + self.lookahead(1); + } + + if string.is_empty() { + return Err(ScanError::new(start_mark, + "while scanning a directive, could not find expected directive name")); + } + + if !is_blankz(self.ch()) { + return Err(ScanError::new(start_mark, + "while scanning a directive, found unexpected non-alphabetical character")); + } + + Ok(string) + } + + fn scan_version_directive_number(&mut self, mark: &Marker) -> Result { + let mut val = 0u32; + let mut length = 0usize; + self.lookahead(1); + while is_digit(self.ch()) { + if length + 1 > 9 { + return Err(ScanError::new(*mark, + "while scanning a YAML directive, found extremely long version number")); + } + length += 1; + val = val * 10 + ((self.ch() as u32) - ('0' as u32)); + self.skip(); + self.lookahead(1); + } + + if length == 0 { + return Err(ScanError::new(*mark, + "while scanning a YAML directive, did not find expected version number")); + } + + Ok(val) + } + + fn scan_tag_directive_value(&mut self, mark: &Marker) -> Result { + unimplemented!(); + } + fn fetch_flow_collection_start(&mut self, tok :TokenType) -> ScanResult { // The indicators '[' and '{' may start a simple key. try!(self.save_simple_key());