f9eb8c6d60
Change scanner's complex test: ```diff - *coffee: + *coffee : amount: 4 - *cookies: + *cookies : amount: 4 ``` According to https://play.yaml.io/main/parser, this example was invalid in the first place. Adding a space makes it so that the colon is not part of the alias name. Also fix colons not being able to be part of anchors.
1749 lines
52 KiB
Rust
1749 lines
52 KiB
Rust
#![allow(clippy::cast_possible_wrap)]
|
|
#![allow(clippy::cast_sign_loss)]
|
|
|
|
use std::collections::VecDeque;
|
|
use std::error::Error;
|
|
use std::{char, fmt};
|
|
|
|
#[derive(Clone, Copy, PartialEq, Debug, Eq)]
|
|
pub enum TEncoding {
|
|
Utf8,
|
|
}
|
|
|
|
#[derive(Clone, Copy, PartialEq, Debug, Eq)]
|
|
pub enum TScalarStyle {
|
|
Any,
|
|
Plain,
|
|
SingleQuoted,
|
|
DoubleQuoted,
|
|
|
|
Literal,
|
|
Foled,
|
|
}
|
|
|
|
/// A location in a yaml document.
|
|
#[derive(Clone, Copy, PartialEq, Debug, Eq)]
|
|
pub struct Marker {
|
|
index: usize,
|
|
line: usize,
|
|
col: usize,
|
|
}
|
|
|
|
impl Marker {
|
|
fn new(index: usize, line: usize, col: usize) -> Marker {
|
|
Marker { index, line, col }
|
|
}
|
|
|
|
/// Return the index (in bytes) of the marker in the source.
|
|
#[must_use]
|
|
pub fn index(&self) -> usize {
|
|
self.index
|
|
}
|
|
|
|
/// Return the line of the marker in the source.
|
|
#[must_use]
|
|
pub fn line(&self) -> usize {
|
|
self.line
|
|
}
|
|
|
|
/// Return the column of the marker in the source.
|
|
#[must_use]
|
|
pub fn col(&self) -> usize {
|
|
self.col
|
|
}
|
|
}
|
|
|
|
/// An error that occured while scanning.
|
|
#[derive(Clone, PartialEq, Debug, Eq)]
|
|
pub struct ScanError {
|
|
mark: Marker,
|
|
info: String,
|
|
}
|
|
|
|
impl ScanError {
|
|
/// Create a new error from a location and an error string.
|
|
#[must_use]
|
|
pub fn new(loc: Marker, info: &str) -> ScanError {
|
|
ScanError {
|
|
mark: loc,
|
|
info: info.to_owned(),
|
|
}
|
|
}
|
|
|
|
/// Return the marker pointing to the error in the source.
|
|
#[must_use]
|
|
pub fn marker(&self) -> &Marker {
|
|
&self.mark
|
|
}
|
|
|
|
/// Return the information string describing the error that happened.
|
|
#[must_use]
|
|
pub fn info(&self) -> &str {
|
|
self.info.as_ref()
|
|
}
|
|
}
|
|
|
|
impl Error for ScanError {
|
|
fn description(&self) -> &str {
|
|
self.info.as_ref()
|
|
}
|
|
|
|
fn cause(&self) -> Option<&dyn Error> {
|
|
None
|
|
}
|
|
}
|
|
|
|
impl fmt::Display for ScanError {
|
|
// col starts from 0
|
|
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
|
|
write!(
|
|
formatter,
|
|
"{} at line {} column {}",
|
|
self.info,
|
|
self.mark.line,
|
|
self.mark.col + 1
|
|
)
|
|
}
|
|
}
|
|
|
|
#[derive(Clone, PartialEq, Debug, Eq)]
|
|
pub enum TokenType {
|
|
NoToken,
|
|
StreamStart(TEncoding),
|
|
StreamEnd,
|
|
/// major, minor
|
|
VersionDirective(u32, u32),
|
|
/// handle, prefix
|
|
TagDirective(String, String),
|
|
DocumentStart,
|
|
DocumentEnd,
|
|
BlockSequenceStart,
|
|
BlockMappingStart,
|
|
BlockEnd,
|
|
FlowSequenceStart,
|
|
FlowSequenceEnd,
|
|
FlowMappingStart,
|
|
FlowMappingEnd,
|
|
BlockEntry,
|
|
FlowEntry,
|
|
Key,
|
|
Value,
|
|
Alias(String),
|
|
Anchor(String),
|
|
/// handle, suffix
|
|
Tag(String, String),
|
|
Scalar(TScalarStyle, String),
|
|
}
|
|
|
|
#[derive(Clone, PartialEq, Debug, Eq)]
|
|
pub struct Token(pub Marker, pub TokenType);
|
|
|
|
#[derive(Clone, PartialEq, Debug, Eq)]
|
|
struct SimpleKey {
|
|
possible: bool,
|
|
required: bool,
|
|
token_number: usize,
|
|
mark: Marker,
|
|
}
|
|
|
|
impl SimpleKey {
|
|
fn new(mark: Marker) -> SimpleKey {
|
|
SimpleKey {
|
|
possible: false,
|
|
required: false,
|
|
token_number: 0,
|
|
mark,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
#[allow(clippy::struct_excessive_bools)]
|
|
pub struct Scanner<T> {
|
|
rdr: T,
|
|
mark: Marker,
|
|
tokens: VecDeque<Token>,
|
|
buffer: VecDeque<char>,
|
|
error: Option<ScanError>,
|
|
|
|
stream_start_produced: bool,
|
|
stream_end_produced: bool,
|
|
adjacent_value_allowed_at: usize,
|
|
simple_key_allowed: bool,
|
|
simple_keys: Vec<SimpleKey>,
|
|
indent: isize,
|
|
indents: Vec<isize>,
|
|
flow_level: u8,
|
|
tokens_parsed: usize,
|
|
token_available: bool,
|
|
}
|
|
|
|
impl<T: Iterator<Item = char>> Iterator for Scanner<T> {
|
|
type Item = Token;
|
|
fn next(&mut self) -> Option<Token> {
|
|
if self.error.is_some() {
|
|
return None;
|
|
}
|
|
match self.next_token() {
|
|
Ok(tok) => tok,
|
|
Err(e) => {
|
|
self.error = Some(e);
|
|
None
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
#[inline]
|
|
fn is_z(c: char) -> bool {
|
|
c == '\0'
|
|
}
|
|
#[inline]
|
|
fn is_break(c: char) -> bool {
|
|
c == '\n' || c == '\r'
|
|
}
|
|
#[inline]
|
|
fn is_breakz(c: char) -> bool {
|
|
is_break(c) || is_z(c)
|
|
}
|
|
#[inline]
|
|
fn is_blank(c: char) -> bool {
|
|
c == ' ' || c == '\t'
|
|
}
|
|
#[inline]
|
|
fn is_blankz(c: char) -> bool {
|
|
is_blank(c) || is_breakz(c)
|
|
}
|
|
#[inline]
|
|
fn is_digit(c: char) -> bool {
|
|
c.is_ascii_digit()
|
|
}
|
|
#[inline]
|
|
fn is_alpha(c: char) -> bool {
|
|
matches!(c, '0'..='9' | 'a'..='z' | 'A'..='Z' | '_' | '-')
|
|
}
|
|
#[inline]
|
|
fn is_hex(c: char) -> bool {
|
|
c.is_ascii_digit() || ('a'..='f').contains(&c) || ('A'..='F').contains(&c)
|
|
}
|
|
#[inline]
|
|
fn as_hex(c: char) -> u32 {
|
|
match c {
|
|
'0'..='9' => (c as u32) - ('0' as u32),
|
|
'a'..='f' => (c as u32) - ('a' as u32) + 10,
|
|
'A'..='F' => (c as u32) - ('A' as u32) + 10,
|
|
_ => unreachable!(),
|
|
}
|
|
}
|
|
#[inline]
|
|
fn is_flow(c: char) -> bool {
|
|
matches!(c, ',' | '[' | ']' | '{' | '}')
|
|
}
|
|
|
|
pub type ScanResult = Result<(), ScanError>;
|
|
|
|
impl<T: Iterator<Item = char>> Scanner<T> {
|
|
/// Creates the YAML tokenizer.
|
|
pub fn new(rdr: T) -> Scanner<T> {
|
|
Scanner {
|
|
rdr,
|
|
buffer: VecDeque::new(),
|
|
mark: Marker::new(0, 1, 0),
|
|
tokens: VecDeque::new(),
|
|
error: None,
|
|
|
|
stream_start_produced: false,
|
|
stream_end_produced: false,
|
|
adjacent_value_allowed_at: 0,
|
|
simple_key_allowed: true,
|
|
simple_keys: Vec::new(),
|
|
indent: -1,
|
|
indents: Vec::new(),
|
|
flow_level: 0,
|
|
tokens_parsed: 0,
|
|
token_available: false,
|
|
}
|
|
}
|
|
#[inline]
|
|
pub fn get_error(&self) -> Option<ScanError> {
|
|
self.error.as_ref().map(std::clone::Clone::clone)
|
|
}
|
|
|
|
#[inline]
|
|
fn lookahead(&mut self, count: usize) {
|
|
if self.buffer.len() >= count {
|
|
return;
|
|
}
|
|
for _ in 0..(count - self.buffer.len()) {
|
|
self.buffer.push_back(self.rdr.next().unwrap_or('\0'));
|
|
}
|
|
}
|
|
#[inline]
|
|
fn skip(&mut self) {
|
|
let c = self.buffer.pop_front().unwrap();
|
|
|
|
self.mark.index += 1;
|
|
if c == '\n' {
|
|
self.mark.line += 1;
|
|
self.mark.col = 0;
|
|
} else {
|
|
self.mark.col += 1;
|
|
}
|
|
}
|
|
#[inline]
|
|
fn skip_line(&mut self) {
|
|
if self.buffer[0] == '\r' && self.buffer[1] == '\n' {
|
|
self.skip();
|
|
self.skip();
|
|
} else if is_break(self.buffer[0]) {
|
|
self.skip();
|
|
}
|
|
}
|
|
#[inline]
|
|
fn ch(&self) -> char {
|
|
self.buffer[0]
|
|
}
|
|
#[inline]
|
|
fn ch_is(&self, c: char) -> bool {
|
|
self.buffer[0] == c
|
|
}
|
|
#[allow(dead_code)]
|
|
#[inline]
|
|
fn eof(&self) -> bool {
|
|
self.ch_is('\0')
|
|
}
|
|
#[inline]
|
|
pub fn stream_started(&self) -> bool {
|
|
self.stream_start_produced
|
|
}
|
|
#[inline]
|
|
pub fn stream_ended(&self) -> bool {
|
|
self.stream_end_produced
|
|
}
|
|
#[inline]
|
|
pub fn mark(&self) -> Marker {
|
|
self.mark
|
|
}
|
|
#[inline]
|
|
fn read_break(&mut self, s: &mut String) {
|
|
if self.buffer[0] == '\r' && self.buffer[1] == '\n' {
|
|
s.push('\n');
|
|
self.skip();
|
|
self.skip();
|
|
} else if self.buffer[0] == '\r' || self.buffer[0] == '\n' {
|
|
s.push('\n');
|
|
self.skip();
|
|
} else {
|
|
unreachable!();
|
|
}
|
|
}
|
|
fn insert_token(&mut self, pos: usize, tok: Token) {
|
|
let old_len = self.tokens.len();
|
|
assert!(pos <= old_len);
|
|
self.tokens.push_back(tok);
|
|
for i in 0..old_len - pos {
|
|
self.tokens.swap(old_len - i, old_len - i - 1);
|
|
}
|
|
}
|
|
fn allow_simple_key(&mut self) {
|
|
self.simple_key_allowed = true;
|
|
}
|
|
fn disallow_simple_key(&mut self) {
|
|
self.simple_key_allowed = false;
|
|
}
|
|
|
|
pub fn fetch_next_token(&mut self) -> ScanResult {
|
|
self.lookahead(1);
|
|
// println!("--> fetch_next_token Cur {:?} {:?}", self.mark, self.ch());
|
|
|
|
if !self.stream_start_produced {
|
|
self.fetch_stream_start();
|
|
return Ok(());
|
|
}
|
|
self.skip_to_next_token();
|
|
|
|
self.stale_simple_keys()?;
|
|
|
|
let mark = self.mark;
|
|
self.unroll_indent(mark.col as isize);
|
|
|
|
self.lookahead(4);
|
|
|
|
if is_z(self.ch()) {
|
|
self.fetch_stream_end()?;
|
|
return Ok(());
|
|
}
|
|
|
|
// Is it a directive?
|
|
if self.mark.col == 0 && self.ch_is('%') {
|
|
return self.fetch_directive();
|
|
}
|
|
|
|
if self.mark.col == 0
|
|
&& self.buffer[0] == '-'
|
|
&& self.buffer[1] == '-'
|
|
&& self.buffer[2] == '-'
|
|
&& is_blankz(self.buffer[3])
|
|
{
|
|
self.fetch_document_indicator(TokenType::DocumentStart)?;
|
|
return Ok(());
|
|
}
|
|
|
|
if self.mark.col == 0
|
|
&& self.buffer[0] == '.'
|
|
&& self.buffer[1] == '.'
|
|
&& self.buffer[2] == '.'
|
|
&& is_blankz(self.buffer[3])
|
|
{
|
|
self.fetch_document_indicator(TokenType::DocumentEnd)?;
|
|
return Ok(());
|
|
}
|
|
|
|
let c = self.buffer[0];
|
|
let nc = self.buffer[1];
|
|
match c {
|
|
'[' => self.fetch_flow_collection_start(TokenType::FlowSequenceStart),
|
|
'{' => self.fetch_flow_collection_start(TokenType::FlowMappingStart),
|
|
']' => self.fetch_flow_collection_end(TokenType::FlowSequenceEnd),
|
|
'}' => self.fetch_flow_collection_end(TokenType::FlowMappingEnd),
|
|
',' => self.fetch_flow_entry(),
|
|
'-' if is_blankz(nc) => self.fetch_block_entry(),
|
|
'?' if is_blankz(nc) => self.fetch_key(),
|
|
':' if is_blankz(nc)
|
|
|| (self.flow_level > 0
|
|
&& (is_flow(nc) || self.mark.index == self.adjacent_value_allowed_at)) =>
|
|
{
|
|
self.fetch_value()
|
|
}
|
|
// Is it an alias?
|
|
'*' => self.fetch_anchor(true),
|
|
// Is it an anchor?
|
|
'&' => self.fetch_anchor(false),
|
|
'!' => self.fetch_tag(),
|
|
// Is it a literal scalar?
|
|
'|' if self.flow_level == 0 => self.fetch_block_scalar(true),
|
|
// Is it a folded scalar?
|
|
'>' if self.flow_level == 0 => self.fetch_block_scalar(false),
|
|
'\'' => self.fetch_flow_scalar(true),
|
|
'"' => self.fetch_flow_scalar(false),
|
|
// plain scalar
|
|
'-' if !is_blankz(nc) => self.fetch_plain_scalar(),
|
|
':' | '?' if !is_blankz(nc) && self.flow_level == 0 => self.fetch_plain_scalar(),
|
|
'%' | '@' | '`' => Err(ScanError::new(
|
|
self.mark,
|
|
&format!("unexpected character: `{c}'"),
|
|
)),
|
|
_ => self.fetch_plain_scalar(),
|
|
}
|
|
}
|
|
|
|
pub fn next_token(&mut self) -> Result<Option<Token>, ScanError> {
|
|
if self.stream_end_produced {
|
|
return Ok(None);
|
|
}
|
|
|
|
if !self.token_available {
|
|
self.fetch_more_tokens()?;
|
|
}
|
|
let t = self.tokens.pop_front().unwrap();
|
|
self.token_available = false;
|
|
self.tokens_parsed += 1;
|
|
|
|
if let TokenType::StreamEnd = t.1 {
|
|
self.stream_end_produced = true;
|
|
}
|
|
Ok(Some(t))
|
|
}
|
|
|
|
pub fn fetch_more_tokens(&mut self) -> ScanResult {
|
|
let mut need_more;
|
|
loop {
|
|
need_more = false;
|
|
if self.tokens.is_empty() {
|
|
need_more = true;
|
|
} else {
|
|
self.stale_simple_keys()?;
|
|
for sk in &self.simple_keys {
|
|
if sk.possible && sk.token_number == self.tokens_parsed {
|
|
need_more = true;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
if !need_more {
|
|
break;
|
|
}
|
|
self.fetch_next_token()?;
|
|
}
|
|
self.token_available = true;
|
|
|
|
Ok(())
|
|
}
|
|
|
|
fn stale_simple_keys(&mut self) -> ScanResult {
|
|
for sk in &mut self.simple_keys {
|
|
if sk.possible
|
|
&& (sk.mark.line < self.mark.line || sk.mark.index + 1024 < self.mark.index)
|
|
{
|
|
if sk.required {
|
|
return Err(ScanError::new(self.mark, "simple key expect ':'"));
|
|
}
|
|
sk.possible = false;
|
|
}
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
fn skip_to_next_token(&mut self) {
|
|
loop {
|
|
self.lookahead(1);
|
|
// TODO(chenyh) BOM
|
|
match self.ch() {
|
|
' ' => self.skip(),
|
|
'\t' if self.flow_level > 0 || !self.simple_key_allowed => self.skip(),
|
|
'\n' | '\r' => {
|
|
self.lookahead(2);
|
|
self.skip_line();
|
|
if self.flow_level == 0 {
|
|
self.allow_simple_key();
|
|
}
|
|
}
|
|
'#' => {
|
|
while !is_breakz(self.ch()) {
|
|
self.skip();
|
|
self.lookahead(1);
|
|
}
|
|
}
|
|
_ => break,
|
|
}
|
|
}
|
|
}
|
|
|
|
fn fetch_stream_start(&mut self) {
|
|
let mark = self.mark;
|
|
self.indent = -1;
|
|
self.stream_start_produced = true;
|
|
self.allow_simple_key();
|
|
self.tokens
|
|
.push_back(Token(mark, TokenType::StreamStart(TEncoding::Utf8)));
|
|
self.simple_keys.push(SimpleKey::new(Marker::new(0, 0, 0)));
|
|
}
|
|
|
|
fn fetch_stream_end(&mut self) -> ScanResult {
|
|
// force new line
|
|
if self.mark.col != 0 {
|
|
self.mark.col = 0;
|
|
self.mark.line += 1;
|
|
}
|
|
|
|
self.unroll_indent(-1);
|
|
self.remove_simple_key()?;
|
|
self.disallow_simple_key();
|
|
|
|
self.tokens
|
|
.push_back(Token(self.mark, TokenType::StreamEnd));
|
|
Ok(())
|
|
}
|
|
|
|
fn fetch_directive(&mut self) -> ScanResult {
|
|
self.unroll_indent(-1);
|
|
self.remove_simple_key()?;
|
|
|
|
self.disallow_simple_key();
|
|
|
|
let tok = self.scan_directive()?;
|
|
|
|
self.tokens.push_back(tok);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
fn scan_directive(&mut self) -> Result<Token, ScanError> {
|
|
let start_mark = self.mark;
|
|
self.skip();
|
|
|
|
let name = self.scan_directive_name()?;
|
|
let tok = match name.as_ref() {
|
|
"YAML" => self.scan_version_directive_value(&start_mark)?,
|
|
"TAG" => self.scan_tag_directive_value(&start_mark)?,
|
|
// XXX This should be a warning instead of an error
|
|
_ => {
|
|
// skip current line
|
|
self.lookahead(1);
|
|
while !is_breakz(self.ch()) {
|
|
self.skip();
|
|
self.lookahead(1);
|
|
}
|
|
// XXX return an empty TagDirective token
|
|
Token(
|
|
start_mark,
|
|
TokenType::TagDirective(String::new(), String::new()),
|
|
)
|
|
// return Err(ScanError::new(start_mark,
|
|
// "while scanning a directive, found unknown directive name"))
|
|
}
|
|
};
|
|
self.lookahead(1);
|
|
|
|
while is_blank(self.ch()) {
|
|
self.skip();
|
|
self.lookahead(1);
|
|
}
|
|
|
|
if self.ch() == '#' {
|
|
while !is_breakz(self.ch()) {
|
|
self.skip();
|
|
self.lookahead(1);
|
|
}
|
|
}
|
|
|
|
if !is_breakz(self.ch()) {
|
|
return Err(ScanError::new(
|
|
start_mark,
|
|
"while scanning a directive, did not find expected comment or line break",
|
|
));
|
|
}
|
|
|
|
// Eat a line break
|
|
if is_break(self.ch()) {
|
|
self.lookahead(2);
|
|
self.skip_line();
|
|
}
|
|
|
|
Ok(tok)
|
|
}
|
|
|
|
fn scan_version_directive_value(&mut self, mark: &Marker) -> Result<Token, ScanError> {
|
|
self.lookahead(1);
|
|
|
|
while is_blank(self.ch()) {
|
|
self.skip();
|
|
self.lookahead(1);
|
|
}
|
|
|
|
let major = self.scan_version_directive_number(mark)?;
|
|
|
|
if self.ch() != '.' {
|
|
return Err(ScanError::new(
|
|
*mark,
|
|
"while scanning a YAML directive, did not find expected digit or '.' character",
|
|
));
|
|
}
|
|
|
|
self.skip();
|
|
|
|
let minor = self.scan_version_directive_number(mark)?;
|
|
|
|
Ok(Token(*mark, TokenType::VersionDirective(major, minor)))
|
|
}
|
|
|
|
fn scan_directive_name(&mut self) -> Result<String, ScanError> {
|
|
let start_mark = self.mark;
|
|
let mut string = String::new();
|
|
self.lookahead(1);
|
|
while is_alpha(self.ch()) {
|
|
string.push(self.ch());
|
|
self.skip();
|
|
self.lookahead(1);
|
|
}
|
|
|
|
if string.is_empty() {
|
|
return Err(ScanError::new(
|
|
start_mark,
|
|
"while scanning a directive, could not find expected directive name",
|
|
));
|
|
}
|
|
|
|
if !is_blankz(self.ch()) {
|
|
return Err(ScanError::new(
|
|
start_mark,
|
|
"while scanning a directive, found unexpected non-alphabetical character",
|
|
));
|
|
}
|
|
|
|
Ok(string)
|
|
}
|
|
|
|
fn scan_version_directive_number(&mut self, mark: &Marker) -> Result<u32, ScanError> {
|
|
let mut val = 0u32;
|
|
let mut length = 0usize;
|
|
self.lookahead(1);
|
|
while is_digit(self.ch()) {
|
|
if length + 1 > 9 {
|
|
return Err(ScanError::new(
|
|
*mark,
|
|
"while scanning a YAML directive, found extremely long version number",
|
|
));
|
|
}
|
|
length += 1;
|
|
val = val * 10 + ((self.ch() as u32) - ('0' as u32));
|
|
self.skip();
|
|
self.lookahead(1);
|
|
}
|
|
|
|
if length == 0 {
|
|
return Err(ScanError::new(
|
|
*mark,
|
|
"while scanning a YAML directive, did not find expected version number",
|
|
));
|
|
}
|
|
|
|
Ok(val)
|
|
}
|
|
|
|
fn scan_tag_directive_value(&mut self, mark: &Marker) -> Result<Token, ScanError> {
|
|
self.lookahead(1);
|
|
/* Eat whitespaces. */
|
|
while is_blank(self.ch()) {
|
|
self.skip();
|
|
self.lookahead(1);
|
|
}
|
|
let handle = self.scan_tag_handle(true, mark)?;
|
|
|
|
self.lookahead(1);
|
|
/* Eat whitespaces. */
|
|
while is_blank(self.ch()) {
|
|
self.skip();
|
|
self.lookahead(1);
|
|
}
|
|
|
|
let is_secondary = handle == "!!";
|
|
let prefix = self.scan_tag_uri(true, is_secondary, "", mark)?;
|
|
|
|
self.lookahead(1);
|
|
|
|
if is_blankz(self.ch()) {
|
|
Ok(Token(*mark, TokenType::TagDirective(handle, prefix)))
|
|
} else {
|
|
Err(ScanError::new(
|
|
*mark,
|
|
"while scanning TAG, did not find expected whitespace or line break",
|
|
))
|
|
}
|
|
}
|
|
|
|
fn fetch_tag(&mut self) -> ScanResult {
|
|
self.save_simple_key()?;
|
|
self.disallow_simple_key();
|
|
|
|
let tok = self.scan_tag()?;
|
|
self.tokens.push_back(tok);
|
|
Ok(())
|
|
}
|
|
|
|
fn scan_tag(&mut self) -> Result<Token, ScanError> {
|
|
let start_mark = self.mark;
|
|
let mut handle = String::new();
|
|
let mut suffix;
|
|
let mut secondary = false;
|
|
|
|
// Check if the tag is in the canonical form (verbatim).
|
|
self.lookahead(2);
|
|
|
|
if self.buffer[1] == '<' {
|
|
// Eat '!<'
|
|
self.skip();
|
|
self.skip();
|
|
suffix = self.scan_tag_uri(false, false, "", &start_mark)?;
|
|
|
|
if self.ch() != '>' {
|
|
return Err(ScanError::new(
|
|
start_mark,
|
|
"while scanning a tag, did not find the expected '>'",
|
|
));
|
|
}
|
|
|
|
self.skip();
|
|
} else {
|
|
// The tag has either the '!suffix' or the '!handle!suffix'
|
|
handle = self.scan_tag_handle(false, &start_mark)?;
|
|
// Check if it is, indeed, handle.
|
|
if handle.len() >= 2 && handle.starts_with('!') && handle.ends_with('!') {
|
|
if handle == "!!" {
|
|
secondary = true;
|
|
}
|
|
suffix = self.scan_tag_uri(false, secondary, "", &start_mark)?;
|
|
} else {
|
|
suffix = self.scan_tag_uri(false, false, &handle, &start_mark)?;
|
|
handle = "!".to_owned();
|
|
// A special case: the '!' tag. Set the handle to '' and the
|
|
// suffix to '!'.
|
|
if suffix.is_empty() {
|
|
handle.clear();
|
|
suffix = "!".to_owned();
|
|
}
|
|
}
|
|
}
|
|
|
|
self.lookahead(1);
|
|
if is_blankz(self.ch()) {
|
|
// XXX: ex 7.2, an empty scalar can follow a secondary tag
|
|
Ok(Token(start_mark, TokenType::Tag(handle, suffix)))
|
|
} else {
|
|
Err(ScanError::new(
|
|
start_mark,
|
|
"while scanning a tag, did not find expected whitespace or line break",
|
|
))
|
|
}
|
|
}
|
|
|
|
fn scan_tag_handle(&mut self, directive: bool, mark: &Marker) -> Result<String, ScanError> {
|
|
let mut string = String::new();
|
|
self.lookahead(1);
|
|
if self.ch() != '!' {
|
|
return Err(ScanError::new(
|
|
*mark,
|
|
"while scanning a tag, did not find expected '!'",
|
|
));
|
|
}
|
|
|
|
string.push(self.ch());
|
|
self.skip();
|
|
|
|
self.lookahead(1);
|
|
while is_alpha(self.ch()) {
|
|
string.push(self.ch());
|
|
self.skip();
|
|
self.lookahead(1);
|
|
}
|
|
|
|
// Check if the trailing character is '!' and copy it.
|
|
if self.ch() == '!' {
|
|
string.push(self.ch());
|
|
self.skip();
|
|
} else if directive && string != "!" {
|
|
// It's either the '!' tag or not really a tag handle. If it's a %TAG
|
|
// directive, it's an error. If it's a tag token, it must be a part of
|
|
// URI.
|
|
return Err(ScanError::new(
|
|
*mark,
|
|
"while parsing a tag directive, did not find expected '!'",
|
|
));
|
|
}
|
|
Ok(string)
|
|
}
|
|
|
|
fn scan_tag_uri(
|
|
&mut self,
|
|
directive: bool,
|
|
_is_secondary: bool,
|
|
head: &str,
|
|
mark: &Marker,
|
|
) -> Result<String, ScanError> {
|
|
let mut length = head.len();
|
|
let mut string = String::new();
|
|
|
|
// Copy the head if needed.
|
|
// Note that we don't copy the leading '!' character.
|
|
if length > 1 {
|
|
string.extend(head.chars().skip(1));
|
|
}
|
|
|
|
self.lookahead(1);
|
|
/*
|
|
* The set of characters that may appear in URI is as follows:
|
|
*
|
|
* '0'-'9', 'A'-'Z', 'a'-'z', '_', '-', ';', '/', '?', ':', '@', '&',
|
|
* '=', '+', '$', ',', '.', '!', '~', '*', '\'', '(', ')', '[', ']',
|
|
* '%'.
|
|
*/
|
|
while match self.ch() {
|
|
';' | '/' | '?' | ':' | '@' | '&' => true,
|
|
'=' | '+' | '$' | ',' | '.' | '!' | '~' | '*' | '\'' | '(' | ')' | '[' | ']' => true,
|
|
'%' => true,
|
|
c if is_alpha(c) => true,
|
|
_ => false,
|
|
} {
|
|
// Check if it is a URI-escape sequence.
|
|
if self.ch() == '%' {
|
|
string.push(self.scan_uri_escapes(directive, mark)?);
|
|
} else {
|
|
string.push(self.ch());
|
|
self.skip();
|
|
}
|
|
|
|
length += 1;
|
|
self.lookahead(1);
|
|
}
|
|
|
|
if length == 0 {
|
|
return Err(ScanError::new(
|
|
*mark,
|
|
"while parsing a tag, did not find expected tag URI",
|
|
));
|
|
}
|
|
|
|
Ok(string)
|
|
}
|
|
|
|
fn scan_uri_escapes(&mut self, _directive: bool, mark: &Marker) -> Result<char, ScanError> {
|
|
let mut width = 0usize;
|
|
let mut code = 0u32;
|
|
loop {
|
|
self.lookahead(3);
|
|
|
|
if !(self.ch() == '%' && is_hex(self.buffer[1]) && is_hex(self.buffer[2])) {
|
|
return Err(ScanError::new(
|
|
*mark,
|
|
"while parsing a tag, did not find URI escaped octet",
|
|
));
|
|
}
|
|
|
|
let octet = (as_hex(self.buffer[1]) << 4) + as_hex(self.buffer[2]);
|
|
if width == 0 {
|
|
width = match octet {
|
|
_ if octet & 0x80 == 0x00 => 1,
|
|
_ if octet & 0xE0 == 0xC0 => 2,
|
|
_ if octet & 0xF0 == 0xE0 => 3,
|
|
_ if octet & 0xF8 == 0xF0 => 4,
|
|
_ => {
|
|
return Err(ScanError::new(
|
|
*mark,
|
|
"while parsing a tag, found an incorrect leading UTF-8 octet",
|
|
));
|
|
}
|
|
};
|
|
code = octet;
|
|
} else {
|
|
if octet & 0xc0 != 0x80 {
|
|
return Err(ScanError::new(
|
|
*mark,
|
|
"while parsing a tag, found an incorrect trailing UTF-8 octet",
|
|
));
|
|
}
|
|
code = (code << 8) + octet;
|
|
}
|
|
|
|
self.skip();
|
|
self.skip();
|
|
self.skip();
|
|
|
|
width -= 1;
|
|
if width == 0 {
|
|
break;
|
|
}
|
|
}
|
|
|
|
match char::from_u32(code) {
|
|
Some(ch) => Ok(ch),
|
|
None => Err(ScanError::new(
|
|
*mark,
|
|
"while parsing a tag, found an invalid UTF-8 codepoint",
|
|
)),
|
|
}
|
|
}
|
|
|
|
fn fetch_anchor(&mut self, alias: bool) -> ScanResult {
|
|
self.save_simple_key()?;
|
|
self.disallow_simple_key();
|
|
|
|
let tok = self.scan_anchor(alias)?;
|
|
|
|
self.tokens.push_back(tok);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
fn scan_anchor(&mut self, alias: bool) -> Result<Token, ScanError> {
|
|
let mut string = String::new();
|
|
let start_mark = self.mark;
|
|
|
|
self.skip();
|
|
self.lookahead(1);
|
|
while is_alpha(self.ch()) || self.ch_is(':') {
|
|
string.push(self.ch());
|
|
self.skip();
|
|
self.lookahead(1);
|
|
}
|
|
|
|
if string.is_empty()
|
|
|| match self.ch() {
|
|
c if is_blankz(c) => false,
|
|
'?' | ',' | ']' | '}' | '%' | '@' | '`' => false,
|
|
_ => true,
|
|
}
|
|
{
|
|
return Err(ScanError::new(start_mark, "while scanning an anchor or alias, did not find expected alphabetic or numeric character"));
|
|
}
|
|
|
|
if alias {
|
|
Ok(Token(start_mark, TokenType::Alias(string)))
|
|
} else {
|
|
Ok(Token(start_mark, TokenType::Anchor(string)))
|
|
}
|
|
}
|
|
|
|
fn fetch_flow_collection_start(&mut self, tok: TokenType) -> ScanResult {
|
|
// The indicators '[' and '{' may start a simple key.
|
|
self.save_simple_key()?;
|
|
|
|
self.increase_flow_level()?;
|
|
|
|
self.allow_simple_key();
|
|
|
|
let start_mark = self.mark;
|
|
self.skip();
|
|
|
|
self.tokens.push_back(Token(start_mark, tok));
|
|
Ok(())
|
|
}
|
|
|
|
fn fetch_flow_collection_end(&mut self, tok: TokenType) -> ScanResult {
|
|
self.remove_simple_key()?;
|
|
self.decrease_flow_level();
|
|
|
|
self.disallow_simple_key();
|
|
|
|
let start_mark = self.mark;
|
|
self.skip();
|
|
|
|
self.tokens.push_back(Token(start_mark, tok));
|
|
Ok(())
|
|
}
|
|
|
|
fn fetch_flow_entry(&mut self) -> ScanResult {
|
|
self.remove_simple_key()?;
|
|
self.allow_simple_key();
|
|
|
|
let start_mark = self.mark;
|
|
self.skip();
|
|
|
|
self.tokens
|
|
.push_back(Token(start_mark, TokenType::FlowEntry));
|
|
Ok(())
|
|
}
|
|
|
|
fn increase_flow_level(&mut self) -> ScanResult {
|
|
self.simple_keys.push(SimpleKey::new(Marker::new(0, 0, 0)));
|
|
self.flow_level = self
|
|
.flow_level
|
|
.checked_add(1)
|
|
.ok_or_else(|| ScanError::new(self.mark, "recursion limit exceeded"))?;
|
|
Ok(())
|
|
}
|
|
fn decrease_flow_level(&mut self) {
|
|
if self.flow_level > 0 {
|
|
self.flow_level -= 1;
|
|
self.simple_keys.pop().unwrap();
|
|
}
|
|
}
|
|
|
|
fn fetch_block_entry(&mut self) -> ScanResult {
|
|
if self.flow_level == 0 {
|
|
// Check if we are allowed to start a new entry.
|
|
if !self.simple_key_allowed {
|
|
return Err(ScanError::new(
|
|
self.mark,
|
|
"block sequence entries are not allowed in this context",
|
|
));
|
|
}
|
|
|
|
let mark = self.mark;
|
|
// generate BLOCK-SEQUENCE-START if indented
|
|
self.roll_indent(mark.col, None, TokenType::BlockSequenceStart, mark);
|
|
} else {
|
|
// - * only allowed in block
|
|
return Err(ScanError::new(
|
|
self.mark,
|
|
r#""-" is only valid inside a block"#,
|
|
));
|
|
}
|
|
self.remove_simple_key()?;
|
|
self.allow_simple_key();
|
|
|
|
let start_mark = self.mark;
|
|
self.skip();
|
|
|
|
self.tokens
|
|
.push_back(Token(start_mark, TokenType::BlockEntry));
|
|
Ok(())
|
|
}
|
|
|
|
fn fetch_document_indicator(&mut self, t: TokenType) -> ScanResult {
|
|
self.unroll_indent(-1);
|
|
self.remove_simple_key()?;
|
|
self.disallow_simple_key();
|
|
|
|
let mark = self.mark;
|
|
|
|
self.skip();
|
|
self.skip();
|
|
self.skip();
|
|
|
|
self.tokens.push_back(Token(mark, t));
|
|
Ok(())
|
|
}
|
|
|
|
fn fetch_block_scalar(&mut self, literal: bool) -> ScanResult {
|
|
self.save_simple_key()?;
|
|
self.allow_simple_key();
|
|
let tok = self.scan_block_scalar(literal)?;
|
|
|
|
self.tokens.push_back(tok);
|
|
Ok(())
|
|
}
|
|
|
|
#[allow(clippy::too_many_lines)]
|
|
fn scan_block_scalar(&mut self, literal: bool) -> Result<Token, ScanError> {
|
|
let start_mark = self.mark;
|
|
let mut chomping: i32 = 0;
|
|
let mut increment: usize = 0;
|
|
let mut indent: usize = 0;
|
|
let mut trailing_blank: bool;
|
|
let mut leading_blank: bool = false;
|
|
|
|
let mut string = String::new();
|
|
let mut leading_break = String::new();
|
|
let mut trailing_breaks = String::new();
|
|
|
|
// skip '|' or '>'
|
|
self.skip();
|
|
self.lookahead(1);
|
|
|
|
if self.ch() == '+' || self.ch() == '-' {
|
|
if self.ch() == '+' {
|
|
chomping = 1;
|
|
} else {
|
|
chomping = -1;
|
|
}
|
|
self.skip();
|
|
self.lookahead(1);
|
|
if is_digit(self.ch()) {
|
|
if self.ch() == '0' {
|
|
return Err(ScanError::new(
|
|
start_mark,
|
|
"while scanning a block scalar, found an indentation indicator equal to 0",
|
|
));
|
|
}
|
|
increment = (self.ch() as usize) - ('0' as usize);
|
|
self.skip();
|
|
}
|
|
} else if is_digit(self.ch()) {
|
|
if self.ch() == '0' {
|
|
return Err(ScanError::new(
|
|
start_mark,
|
|
"while scanning a block scalar, found an indentation indicator equal to 0",
|
|
));
|
|
}
|
|
|
|
increment = (self.ch() as usize) - ('0' as usize);
|
|
self.skip();
|
|
self.lookahead(1);
|
|
if self.ch() == '+' || self.ch() == '-' {
|
|
if self.ch() == '+' {
|
|
chomping = 1;
|
|
} else {
|
|
chomping = -1;
|
|
}
|
|
self.skip();
|
|
}
|
|
}
|
|
|
|
// Eat whitespaces and comments to the end of the line.
|
|
self.lookahead(1);
|
|
|
|
while is_blank(self.ch()) {
|
|
self.skip();
|
|
self.lookahead(1);
|
|
}
|
|
|
|
if self.ch() == '#' {
|
|
while !is_breakz(self.ch()) {
|
|
self.skip();
|
|
self.lookahead(1);
|
|
}
|
|
}
|
|
|
|
// Check if we are at the end of the line.
|
|
if !is_breakz(self.ch()) {
|
|
return Err(ScanError::new(
|
|
start_mark,
|
|
"while scanning a block scalar, did not find expected comment or line break",
|
|
));
|
|
}
|
|
|
|
if is_break(self.ch()) {
|
|
self.lookahead(2);
|
|
self.skip_line();
|
|
}
|
|
|
|
if increment > 0 {
|
|
indent = if self.indent >= 0 {
|
|
(self.indent + increment as isize) as usize
|
|
} else {
|
|
increment
|
|
}
|
|
}
|
|
// Scan the leading line breaks and determine the indentation level if needed.
|
|
self.block_scalar_breaks(&mut indent, &mut trailing_breaks)?;
|
|
|
|
self.lookahead(1);
|
|
|
|
let start_mark = self.mark;
|
|
|
|
while self.mark.col == indent && !is_z(self.ch()) {
|
|
// We are at the beginning of a non-empty line.
|
|
trailing_blank = is_blank(self.ch());
|
|
if !literal && !leading_break.is_empty() && !leading_blank && !trailing_blank {
|
|
if trailing_breaks.is_empty() {
|
|
string.push(' ');
|
|
}
|
|
leading_break.clear();
|
|
} else {
|
|
string.push_str(&leading_break);
|
|
leading_break.clear();
|
|
}
|
|
|
|
string.push_str(&trailing_breaks);
|
|
trailing_breaks.clear();
|
|
|
|
leading_blank = is_blank(self.ch());
|
|
|
|
while !is_breakz(self.ch()) {
|
|
string.push(self.ch());
|
|
self.skip();
|
|
self.lookahead(1);
|
|
}
|
|
// break on EOF
|
|
if is_z(self.ch()) {
|
|
break;
|
|
}
|
|
|
|
self.lookahead(2);
|
|
self.read_break(&mut leading_break);
|
|
|
|
// Eat the following indentation spaces and line breaks.
|
|
self.block_scalar_breaks(&mut indent, &mut trailing_breaks)?;
|
|
}
|
|
|
|
// Chomp the tail.
|
|
if chomping != -1 {
|
|
string.push_str(&leading_break);
|
|
}
|
|
|
|
if chomping == 1 {
|
|
string.push_str(&trailing_breaks);
|
|
}
|
|
|
|
if literal {
|
|
Ok(Token(
|
|
start_mark,
|
|
TokenType::Scalar(TScalarStyle::Literal, string),
|
|
))
|
|
} else {
|
|
Ok(Token(
|
|
start_mark,
|
|
TokenType::Scalar(TScalarStyle::Foled, string),
|
|
))
|
|
}
|
|
}
|
|
|
|
fn block_scalar_breaks(&mut self, indent: &mut usize, breaks: &mut String) -> ScanResult {
|
|
let mut max_indent = 0;
|
|
loop {
|
|
self.lookahead(1);
|
|
while (*indent == 0 || self.mark.col < *indent) && self.buffer[0] == ' ' {
|
|
self.skip();
|
|
self.lookahead(1);
|
|
}
|
|
|
|
if self.mark.col > max_indent {
|
|
max_indent = self.mark.col;
|
|
}
|
|
|
|
// Check for a tab character messing the indentation.
|
|
if (*indent == 0 || self.mark.col < *indent) && self.buffer[0] == '\t' {
|
|
return Err(ScanError::new(self.mark,
|
|
"while scanning a block scalar, found a tab character where an indentation space is expected"));
|
|
}
|
|
|
|
if !is_break(self.ch()) {
|
|
break;
|
|
}
|
|
|
|
self.lookahead(2);
|
|
// Consume the line break.
|
|
self.read_break(breaks);
|
|
}
|
|
|
|
if *indent == 0 {
|
|
*indent = max_indent;
|
|
if *indent < (self.indent + 1) as usize {
|
|
*indent = (self.indent + 1) as usize;
|
|
}
|
|
if *indent < 1 {
|
|
*indent = 1;
|
|
}
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
fn fetch_flow_scalar(&mut self, single: bool) -> ScanResult {
|
|
self.save_simple_key()?;
|
|
self.disallow_simple_key();
|
|
|
|
let tok = self.scan_flow_scalar(single)?;
|
|
|
|
// From spec: To ensure JSON compatibility, if a key inside a flow mapping is JSON-like,
|
|
// YAML allows the following value to be specified adjacent to the “:”.
|
|
self.adjacent_value_allowed_at = self.mark.index;
|
|
|
|
self.tokens.push_back(tok);
|
|
Ok(())
|
|
}
|
|
|
|
#[allow(clippy::too_many_lines)]
|
|
fn scan_flow_scalar(&mut self, single: bool) -> Result<Token, ScanError> {
|
|
let start_mark = self.mark;
|
|
|
|
let mut string = String::new();
|
|
let mut leading_break = String::new();
|
|
let mut trailing_breaks = String::new();
|
|
let mut whitespaces = String::new();
|
|
let mut leading_blanks;
|
|
|
|
/* Eat the left quote. */
|
|
self.skip();
|
|
|
|
loop {
|
|
/* Check for a document indicator. */
|
|
self.lookahead(4);
|
|
|
|
if self.mark.col == 0
|
|
&& (((self.buffer[0] == '-') && (self.buffer[1] == '-') && (self.buffer[2] == '-'))
|
|
|| ((self.buffer[0] == '.')
|
|
&& (self.buffer[1] == '.')
|
|
&& (self.buffer[2] == '.')))
|
|
&& is_blankz(self.buffer[3])
|
|
{
|
|
return Err(ScanError::new(
|
|
start_mark,
|
|
"while scanning a quoted scalar, found unexpected document indicator",
|
|
));
|
|
}
|
|
|
|
if is_z(self.ch()) {
|
|
return Err(ScanError::new(
|
|
start_mark,
|
|
"while scanning a quoted scalar, found unexpected end of stream",
|
|
));
|
|
}
|
|
|
|
self.lookahead(2);
|
|
|
|
leading_blanks = false;
|
|
// Consume non-blank characters.
|
|
|
|
while !is_blankz(self.ch()) {
|
|
match self.ch() {
|
|
// Check for an escaped single quote.
|
|
'\'' if self.buffer[1] == '\'' && single => {
|
|
string.push('\'');
|
|
self.skip();
|
|
self.skip();
|
|
}
|
|
// Check for the right quote.
|
|
'\'' if single => break,
|
|
'"' if !single => break,
|
|
// Check for an escaped line break.
|
|
'\\' if !single && is_break(self.buffer[1]) => {
|
|
self.lookahead(3);
|
|
self.skip();
|
|
self.skip_line();
|
|
leading_blanks = true;
|
|
break;
|
|
}
|
|
// Check for an escape sequence.
|
|
'\\' if !single => {
|
|
let mut code_length = 0usize;
|
|
match self.buffer[1] {
|
|
'0' => string.push('\0'),
|
|
'a' => string.push('\x07'),
|
|
'b' => string.push('\x08'),
|
|
't' | '\t' => string.push('\t'),
|
|
'n' => string.push('\n'),
|
|
'v' => string.push('\x0b'),
|
|
'f' => string.push('\x0c'),
|
|
'r' => string.push('\x0d'),
|
|
'e' => string.push('\x1b'),
|
|
' ' => string.push('\x20'),
|
|
'"' => string.push('"'),
|
|
'\'' => string.push('\''),
|
|
'\\' => string.push('\\'),
|
|
// NEL (#x85)
|
|
'N' => string.push(char::from_u32(0x85).unwrap()),
|
|
// #xA0
|
|
'_' => string.push(char::from_u32(0xA0).unwrap()),
|
|
// LS (#x2028)
|
|
'L' => string.push(char::from_u32(0x2028).unwrap()),
|
|
// PS (#x2029)
|
|
'P' => string.push(char::from_u32(0x2029).unwrap()),
|
|
'x' => code_length = 2,
|
|
'u' => code_length = 4,
|
|
'U' => code_length = 8,
|
|
_ => {
|
|
return Err(ScanError::new(
|
|
start_mark,
|
|
"while parsing a quoted scalar, found unknown escape character",
|
|
))
|
|
}
|
|
}
|
|
self.skip();
|
|
self.skip();
|
|
// Consume an arbitrary escape code.
|
|
if code_length > 0 {
|
|
self.lookahead(code_length);
|
|
let mut value = 0u32;
|
|
for i in 0..code_length {
|
|
if !is_hex(self.buffer[i]) {
|
|
return Err(ScanError::new(start_mark,
|
|
"while parsing a quoted scalar, did not find expected hexadecimal number"));
|
|
}
|
|
value = (value << 4) + as_hex(self.buffer[i]);
|
|
}
|
|
|
|
let Some(ch) = char::from_u32(value) else {
|
|
return Err(ScanError::new(start_mark, "while parsing a quoted scalar, found invalid Unicode character escape code"));
|
|
};
|
|
string.push(ch);
|
|
|
|
for _ in 0..code_length {
|
|
self.skip();
|
|
}
|
|
}
|
|
}
|
|
c => {
|
|
string.push(c);
|
|
self.skip();
|
|
}
|
|
}
|
|
self.lookahead(2);
|
|
}
|
|
self.lookahead(1);
|
|
match self.ch() {
|
|
'\'' if single => break,
|
|
'"' if !single => break,
|
|
_ => {}
|
|
}
|
|
|
|
// Consume blank characters.
|
|
while is_blank(self.ch()) || is_break(self.ch()) {
|
|
if is_blank(self.ch()) {
|
|
// Consume a space or a tab character.
|
|
if leading_blanks {
|
|
self.skip();
|
|
} else {
|
|
whitespaces.push(self.ch());
|
|
self.skip();
|
|
}
|
|
} else {
|
|
self.lookahead(2);
|
|
// Check if it is a first line break.
|
|
if leading_blanks {
|
|
self.read_break(&mut trailing_breaks);
|
|
} else {
|
|
whitespaces.clear();
|
|
self.read_break(&mut leading_break);
|
|
leading_blanks = true;
|
|
}
|
|
}
|
|
self.lookahead(1);
|
|
}
|
|
// Join the whitespaces or fold line breaks.
|
|
if leading_blanks {
|
|
if leading_break.is_empty() {
|
|
string.push_str(&leading_break);
|
|
string.push_str(&trailing_breaks);
|
|
trailing_breaks.clear();
|
|
leading_break.clear();
|
|
} else {
|
|
if trailing_breaks.is_empty() {
|
|
string.push(' ');
|
|
} else {
|
|
string.push_str(&trailing_breaks);
|
|
trailing_breaks.clear();
|
|
}
|
|
leading_break.clear();
|
|
}
|
|
} else {
|
|
string.push_str(&whitespaces);
|
|
whitespaces.clear();
|
|
}
|
|
} // loop
|
|
|
|
// Eat the right quote.
|
|
self.skip();
|
|
|
|
if single {
|
|
Ok(Token(
|
|
start_mark,
|
|
TokenType::Scalar(TScalarStyle::SingleQuoted, string),
|
|
))
|
|
} else {
|
|
Ok(Token(
|
|
start_mark,
|
|
TokenType::Scalar(TScalarStyle::DoubleQuoted, string),
|
|
))
|
|
}
|
|
}
|
|
|
|
fn fetch_plain_scalar(&mut self) -> ScanResult {
|
|
self.save_simple_key()?;
|
|
self.disallow_simple_key();
|
|
|
|
let tok = self.scan_plain_scalar()?;
|
|
|
|
self.tokens.push_back(tok);
|
|
Ok(())
|
|
}
|
|
|
|
fn scan_plain_scalar(&mut self) -> Result<Token, ScanError> {
|
|
let indent = self.indent + 1;
|
|
let start_mark = self.mark;
|
|
|
|
let mut string = String::new();
|
|
let mut leading_break = String::new();
|
|
let mut trailing_breaks = String::new();
|
|
let mut whitespaces = String::new();
|
|
let mut leading_blanks = false;
|
|
|
|
loop {
|
|
/* Check for a document indicator. */
|
|
self.lookahead(4);
|
|
|
|
if self.mark.col == 0
|
|
&& (((self.buffer[0] == '-') && (self.buffer[1] == '-') && (self.buffer[2] == '-'))
|
|
|| ((self.buffer[0] == '.')
|
|
&& (self.buffer[1] == '.')
|
|
&& (self.buffer[2] == '.')))
|
|
&& is_blankz(self.buffer[3])
|
|
{
|
|
break;
|
|
}
|
|
|
|
if self.ch() == '#' {
|
|
break;
|
|
}
|
|
while !is_blankz(self.ch()) {
|
|
// indicators can end a plain scalar, see 7.3.3. Plain Style
|
|
match self.ch() {
|
|
':' if is_blankz(self.buffer[1])
|
|
|| (self.flow_level > 0 && is_flow(self.buffer[1])) =>
|
|
{
|
|
break;
|
|
}
|
|
',' | '[' | ']' | '{' | '}' if self.flow_level > 0 => break,
|
|
_ => {}
|
|
}
|
|
|
|
if leading_blanks || !whitespaces.is_empty() {
|
|
if leading_blanks {
|
|
if leading_break.is_empty() {
|
|
string.push_str(&leading_break);
|
|
string.push_str(&trailing_breaks);
|
|
trailing_breaks.clear();
|
|
leading_break.clear();
|
|
} else {
|
|
if trailing_breaks.is_empty() {
|
|
string.push(' ');
|
|
} else {
|
|
string.push_str(&trailing_breaks);
|
|
trailing_breaks.clear();
|
|
}
|
|
leading_break.clear();
|
|
}
|
|
leading_blanks = false;
|
|
} else {
|
|
string.push_str(&whitespaces);
|
|
whitespaces.clear();
|
|
}
|
|
}
|
|
|
|
string.push(self.ch());
|
|
self.skip();
|
|
self.lookahead(2);
|
|
}
|
|
// is the end?
|
|
if !(is_blank(self.ch()) || is_break(self.ch())) {
|
|
break;
|
|
}
|
|
self.lookahead(1);
|
|
|
|
while is_blank(self.ch()) || is_break(self.ch()) {
|
|
if is_blank(self.ch()) {
|
|
if leading_blanks && (self.mark.col as isize) < indent && self.ch() == '\t' {
|
|
return Err(ScanError::new(
|
|
start_mark,
|
|
"while scanning a plain scalar, found a tab",
|
|
));
|
|
}
|
|
|
|
if leading_blanks {
|
|
self.skip();
|
|
} else {
|
|
whitespaces.push(self.ch());
|
|
self.skip();
|
|
}
|
|
} else {
|
|
self.lookahead(2);
|
|
// Check if it is a first line break
|
|
if leading_blanks {
|
|
self.read_break(&mut trailing_breaks);
|
|
} else {
|
|
whitespaces.clear();
|
|
self.read_break(&mut leading_break);
|
|
leading_blanks = true;
|
|
}
|
|
}
|
|
self.lookahead(1);
|
|
}
|
|
|
|
// check indentation level
|
|
if self.flow_level == 0 && (self.mark.col as isize) < indent {
|
|
break;
|
|
}
|
|
}
|
|
|
|
if leading_blanks {
|
|
self.allow_simple_key();
|
|
}
|
|
|
|
Ok(Token(
|
|
start_mark,
|
|
TokenType::Scalar(TScalarStyle::Plain, string),
|
|
))
|
|
}
|
|
|
|
fn fetch_key(&mut self) -> ScanResult {
|
|
let start_mark = self.mark;
|
|
if self.flow_level == 0 {
|
|
// Check if we are allowed to start a new key (not necessarily simple).
|
|
if !self.simple_key_allowed {
|
|
return Err(ScanError::new(
|
|
self.mark,
|
|
"mapping keys are not allowed in this context",
|
|
));
|
|
}
|
|
self.roll_indent(
|
|
start_mark.col,
|
|
None,
|
|
TokenType::BlockMappingStart,
|
|
start_mark,
|
|
);
|
|
}
|
|
|
|
self.remove_simple_key()?;
|
|
|
|
if self.flow_level == 0 {
|
|
self.allow_simple_key();
|
|
} else {
|
|
self.disallow_simple_key();
|
|
}
|
|
|
|
self.skip();
|
|
self.tokens.push_back(Token(start_mark, TokenType::Key));
|
|
Ok(())
|
|
}
|
|
|
|
fn fetch_value(&mut self) -> ScanResult {
|
|
let sk = self.simple_keys.last().unwrap().clone();
|
|
let start_mark = self.mark;
|
|
if sk.possible {
|
|
// insert simple key
|
|
let tok = Token(sk.mark, TokenType::Key);
|
|
let tokens_parsed = self.tokens_parsed;
|
|
self.insert_token(sk.token_number - tokens_parsed, tok);
|
|
|
|
// Add the BLOCK-MAPPING-START token if needed.
|
|
self.roll_indent(
|
|
sk.mark.col,
|
|
Some(sk.token_number),
|
|
TokenType::BlockMappingStart,
|
|
start_mark,
|
|
);
|
|
|
|
self.simple_keys.last_mut().unwrap().possible = false;
|
|
self.disallow_simple_key();
|
|
} else {
|
|
// The ':' indicator follows a complex key.
|
|
if self.flow_level == 0 {
|
|
if !self.simple_key_allowed {
|
|
return Err(ScanError::new(
|
|
start_mark,
|
|
"mapping values are not allowed in this context",
|
|
));
|
|
}
|
|
|
|
self.roll_indent(
|
|
start_mark.col,
|
|
None,
|
|
TokenType::BlockMappingStart,
|
|
start_mark,
|
|
);
|
|
}
|
|
|
|
if self.flow_level == 0 {
|
|
self.allow_simple_key();
|
|
} else {
|
|
self.disallow_simple_key();
|
|
}
|
|
}
|
|
self.skip();
|
|
self.tokens.push_back(Token(start_mark, TokenType::Value));
|
|
|
|
Ok(())
|
|
}
|
|
|
|
fn roll_indent(&mut self, col: usize, number: Option<usize>, tok: TokenType, mark: Marker) {
|
|
if self.flow_level > 0 {
|
|
return;
|
|
}
|
|
|
|
if self.indent < col as isize {
|
|
self.indents.push(self.indent);
|
|
self.indent = col as isize;
|
|
let tokens_parsed = self.tokens_parsed;
|
|
match number {
|
|
Some(n) => self.insert_token(n - tokens_parsed, Token(mark, tok)),
|
|
None => self.tokens.push_back(Token(mark, tok)),
|
|
}
|
|
}
|
|
}
|
|
|
|
fn unroll_indent(&mut self, col: isize) {
|
|
if self.flow_level > 0 {
|
|
return;
|
|
}
|
|
while self.indent > col {
|
|
self.tokens.push_back(Token(self.mark, TokenType::BlockEnd));
|
|
self.indent = self.indents.pop().unwrap();
|
|
}
|
|
}
|
|
|
|
fn save_simple_key(&mut self) -> Result<(), ScanError> {
|
|
let required = self.flow_level > 0 && self.indent == (self.mark.col as isize);
|
|
if self.simple_key_allowed {
|
|
let mut sk = SimpleKey::new(self.mark);
|
|
sk.possible = true;
|
|
sk.required = required;
|
|
sk.token_number = self.tokens_parsed + self.tokens.len();
|
|
|
|
self.remove_simple_key()?;
|
|
|
|
self.simple_keys.pop();
|
|
self.simple_keys.push(sk);
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
fn remove_simple_key(&mut self) -> ScanResult {
|
|
let last = self.simple_keys.last_mut().unwrap();
|
|
if last.possible && last.required {
|
|
return Err(ScanError::new(self.mark, "simple key expected"));
|
|
}
|
|
|
|
last.possible = false;
|
|
Ok(())
|
|
}
|
|
}
|