Add scan_uri_escapes

This commit is contained in:
Yuheng Chen 2015-05-30 18:49:54 +08:00
parent 50eac23091
commit 6b61f4357d
2 changed files with 67 additions and 5 deletions

View file

@ -499,8 +499,18 @@ impl<T: Iterator<Item=char>> Scanner<T> {
try!(self.scan_tag_directive_value(&start_mark)) try!(self.scan_tag_directive_value(&start_mark))
}, },
// XXX This should be a warning instead of an error // XXX This should be a warning instead of an error
_ => return Err(ScanError::new(start_mark, _ => {
"while scanning a directive, found uknown directive name")) // skip current line
self.lookahead(1);
while !is_breakz(self.ch()) {
self.skip();
self.lookahead(1);
}
// XXX return an empty TagDirective token
Token(start_mark, TokenType::TagDirectiveToken(String::new(), String::new()))
// return Err(ScanError::new(start_mark,
// "while scanning a directive, found unknown directive name"))
}
}; };
self.lookahead(1); self.lookahead(1);
@ -722,7 +732,7 @@ impl<T: Iterator<Item=char>> Scanner<T> {
Ok(string) Ok(string)
} }
fn scan_tag_uri(&mut self, directive: bool, is_secondary: bool, fn scan_tag_uri(&mut self, directive: bool, _is_secondary: bool,
head: &String, mark: &Marker) -> Result<String, ScanError> { head: &String, mark: &Marker) -> Result<String, ScanError> {
let mut length = head.len(); let mut length = head.len();
let mut string = String::new(); let mut string = String::new();
@ -750,7 +760,7 @@ impl<T: Iterator<Item=char>> Scanner<T> {
} { } {
// Check if it is a URI-escape sequence. // Check if it is a URI-escape sequence.
if self.ch() == '%' { if self.ch() == '%' {
unimplemented!(); string.push(try!(self.scan_uri_escapes(directive, mark)));
} else { } else {
string.push(self.ch()); string.push(self.ch());
self.skip(); self.skip();
@ -768,6 +778,58 @@ impl<T: Iterator<Item=char>> Scanner<T> {
Ok(string) Ok(string)
} }
fn scan_uri_escapes(&mut self, _directive: bool, mark: &Marker)
-> Result<char, ScanError> {
let mut width = 0usize;
let mut code = 0u32;
loop {
self.lookahead(3);
if !(self.ch() == '%'
&& is_hex(self.buffer[1])
&& is_hex(self.buffer[2])) {
return Err(ScanError::new(*mark,
"while parsing a tag, did not find URI escaped octet"));
}
let octet = (as_hex(self.buffer[1]) << 4) + as_hex(self.buffer[2]);
if width == 0 {
width = match octet {
_ if octet & 0x80 == 0x00 => 1,
_ if octet & 0xE0 == 0xC0 => 2,
_ if octet & 0xF0 == 0xE0 => 3,
_ if octet & 0xF8 == 0xF0 => 4,
_ => {
return Err(ScanError::new(*mark,
"while parsing a tag, found an incorrect leading UTF-8 octet"));
}
};
code = octet;
} else {
if octet & 0xc0 != 0x80 {
return Err(ScanError::new(*mark,
"while parsing a tag, found an incorrect trailing UTF-8 octet"));
}
code = (code << 8) + octet;
}
self.skip();
self.skip();
self.skip();
width -= 1;
if width == 0 {
break;
}
}
match char::from_u32(code) {
Some(ch) => Ok(ch),
None => Err(ScanError::new(*mark,
"while parsing a tag, found an invalid UTF-8 codepoint"))
}
}
fn fetch_anchor(&mut self, alias: bool) -> ScanResult { fn fetch_anchor(&mut self, alias: bool) -> ScanResult {
try!(self.save_simple_key()); try!(self.save_simple_key());
self.disallow_simple_key(); self.disallow_simple_key();

View file

@ -1,9 +1,9 @@
#![allow(dead_code)] #![allow(dead_code)]
#![allow(non_upper_case_globals)]
extern crate yaml_rust; extern crate yaml_rust;
use yaml_rust::parser::{Parser, EventReceiver, Event}; use yaml_rust::parser::{Parser, EventReceiver, Event};
use yaml_rust::scanner::TScalarStyle; use yaml_rust::scanner::TScalarStyle;
use yaml_rust::yaml::Yaml;
#[derive(Clone, PartialEq, PartialOrd, Debug)] #[derive(Clone, PartialEq, PartialOrd, Debug)]
enum TestEvent { enum TestEvent {