2212 lines
68 KiB
Rust
2212 lines
68 KiB
Rust
#![allow(clippy::cast_possible_wrap)]
|
|
#![allow(clippy::cast_sign_loss)]
|
|
|
|
use std::collections::VecDeque;
|
|
use std::error::Error;
|
|
use std::{char, fmt};
|
|
|
|
#[derive(Clone, Copy, PartialEq, Debug, Eq)]
|
|
pub enum TEncoding {
|
|
Utf8,
|
|
}
|
|
|
|
#[derive(Clone, Copy, PartialEq, Debug, Eq)]
|
|
pub enum TScalarStyle {
|
|
Any,
|
|
Plain,
|
|
SingleQuoted,
|
|
DoubleQuoted,
|
|
|
|
Literal,
|
|
Foled,
|
|
}
|
|
|
|
/// A location in a yaml document.
|
|
#[derive(Clone, Copy, PartialEq, Debug, Eq)]
|
|
pub struct Marker {
|
|
index: usize,
|
|
line: usize,
|
|
col: usize,
|
|
}
|
|
|
|
impl Marker {
|
|
fn new(index: usize, line: usize, col: usize) -> Marker {
|
|
Marker { index, line, col }
|
|
}
|
|
|
|
/// Return the index (in bytes) of the marker in the source.
|
|
#[must_use]
|
|
pub fn index(&self) -> usize {
|
|
self.index
|
|
}
|
|
|
|
/// Return the line of the marker in the source.
|
|
#[must_use]
|
|
pub fn line(&self) -> usize {
|
|
self.line
|
|
}
|
|
|
|
/// Return the column of the marker in the source.
|
|
#[must_use]
|
|
pub fn col(&self) -> usize {
|
|
self.col
|
|
}
|
|
}
|
|
|
|
/// An error that occured while scanning.
|
|
#[derive(Clone, PartialEq, Debug, Eq)]
|
|
pub struct ScanError {
|
|
mark: Marker,
|
|
info: String,
|
|
}
|
|
|
|
impl ScanError {
|
|
/// Create a new error from a location and an error string.
|
|
#[must_use]
|
|
pub fn new(loc: Marker, info: &str) -> ScanError {
|
|
ScanError {
|
|
mark: loc,
|
|
info: info.to_owned(),
|
|
}
|
|
}
|
|
|
|
/// Return the marker pointing to the error in the source.
|
|
#[must_use]
|
|
pub fn marker(&self) -> &Marker {
|
|
&self.mark
|
|
}
|
|
|
|
/// Return the information string describing the error that happened.
|
|
#[must_use]
|
|
pub fn info(&self) -> &str {
|
|
self.info.as_ref()
|
|
}
|
|
}
|
|
|
|
impl Error for ScanError {
|
|
fn description(&self) -> &str {
|
|
self.info.as_ref()
|
|
}
|
|
|
|
fn cause(&self) -> Option<&dyn Error> {
|
|
None
|
|
}
|
|
}
|
|
|
|
impl fmt::Display for ScanError {
|
|
// col starts from 0
|
|
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
|
|
write!(
|
|
formatter,
|
|
"{} at line {} column {}",
|
|
self.info,
|
|
self.mark.line,
|
|
self.mark.col + 1
|
|
)
|
|
}
|
|
}
|
|
|
|
#[derive(Clone, PartialEq, Debug, Eq)]
|
|
pub enum TokenType {
|
|
NoToken,
|
|
/// The start of the stream. Sent first, before even [`DocumentStart`].
|
|
StreamStart(TEncoding),
|
|
/// The end of the stream, EOF.
|
|
StreamEnd,
|
|
VersionDirective(
|
|
/// Major
|
|
u32,
|
|
/// Minor
|
|
u32,
|
|
),
|
|
TagDirective(
|
|
/// Handle
|
|
String,
|
|
/// Prefix
|
|
String,
|
|
),
|
|
/// The start of a YAML document (`---`).
|
|
DocumentStart,
|
|
/// The end of a YAML document (`...`).
|
|
DocumentEnd,
|
|
/// The start of a sequence block.
|
|
///
|
|
/// Sequence blocks are arrays starting with a `-`.
|
|
BlockSequenceStart,
|
|
/// The start of a sequence mapping.
|
|
///
|
|
/// Sequence mappings are "dictionaries" with "key: value" entries.
|
|
BlockMappingStart,
|
|
/// End of the corresponding `BlockSequenceStart` or `BlockMappingStart`.
|
|
BlockEnd,
|
|
/// Start of an inline array (`[ a, b ]`).
|
|
FlowSequenceStart,
|
|
/// End of an inline array.
|
|
FlowSequenceEnd,
|
|
/// Start of an inline mapping (`{ a: b, c: d }`).
|
|
FlowMappingStart,
|
|
/// End of an inline mapping.
|
|
FlowMappingEnd,
|
|
/// An entry in a block sequence (c.f.: [`TokenType::BlockSequenceStart`]).
|
|
BlockEntry,
|
|
/// An entry in a flow sequence (c.f.: [`TokenType::FlowSequenceStart`]).
|
|
FlowEntry,
|
|
/// A key in a mapping.
|
|
Key,
|
|
/// A value in a mapping.
|
|
Value,
|
|
/// A reference to an anchor.
|
|
Alias(String),
|
|
/// A YAML anchor (`&`/`*`).
|
|
Anchor(String),
|
|
/// A YAML tag (starting with bangs `!`).
|
|
Tag(
|
|
/// The handle of the tag.
|
|
String,
|
|
/// The suffix of the tag.
|
|
String,
|
|
),
|
|
/// A regular YAML scalar.
|
|
Scalar(TScalarStyle, String),
|
|
}
|
|
|
|
#[derive(Clone, PartialEq, Debug, Eq)]
|
|
pub struct Token(pub Marker, pub TokenType);
|
|
|
|
#[derive(Clone, PartialEq, Debug, Eq)]
|
|
struct SimpleKey {
|
|
possible: bool,
|
|
required: bool,
|
|
token_number: usize,
|
|
mark: Marker,
|
|
}
|
|
|
|
impl SimpleKey {
|
|
fn new(mark: Marker) -> SimpleKey {
|
|
SimpleKey {
|
|
possible: false,
|
|
required: false,
|
|
token_number: 0,
|
|
mark,
|
|
}
|
|
}
|
|
}
|
|
|
|
/// An indentation level on the stack of indentations.
|
|
#[derive(Clone, Debug, Default)]
|
|
struct Indent {
|
|
/// The former indentation level.
|
|
indent: isize,
|
|
/// Whether, upon closing, this indents generates a `BlockEnd` token.
|
|
///
|
|
/// There are levels of indentation which do not start a block. Examples of this would be:
|
|
/// ```yaml
|
|
/// -
|
|
/// foo # ok
|
|
/// -
|
|
/// bar # ko, bar needs to be indented further than the `-`.
|
|
/// - [
|
|
/// baz, # ok
|
|
/// quux # ko, quux needs to be indented further than the '-'.
|
|
/// ] # ko, the closing bracket needs to be indented further than the `-`.
|
|
/// ```
|
|
///
|
|
/// The indentation level created by the `-` is for a single entry in the sequence. Emitting a
|
|
/// `BlockEnd` when this indentation block ends would generate one `BlockEnd` per entry in the
|
|
/// sequence, although we must have exactly one to end the sequence.
|
|
needs_block_end: bool,
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
#[allow(clippy::struct_excessive_bools)]
|
|
pub struct Scanner<T> {
|
|
/// The reader, providing with characters.
|
|
rdr: T,
|
|
/// The position of the cursor within the reader.
|
|
mark: Marker,
|
|
/// Buffer for tokens to be read.
|
|
tokens: VecDeque<Token>,
|
|
/// Buffer for the next characters to consume.
|
|
buffer: VecDeque<char>,
|
|
/// The last error that happened.
|
|
error: Option<ScanError>,
|
|
|
|
/// Whether we have already emitted the `StreamStart` token.
|
|
stream_start_produced: bool,
|
|
/// Whether we have already emitted the `StreamEnd` token.
|
|
stream_end_produced: bool,
|
|
adjacent_value_allowed_at: usize,
|
|
/// Whether a simple key could potentially start at the current position.
|
|
///
|
|
/// Simple keys are the opposite of complex keys which are keys starting with `?`.
|
|
simple_key_allowed: bool,
|
|
simple_keys: Vec<SimpleKey>,
|
|
/// The current indentation level.
|
|
indent: isize,
|
|
/// List of all block indentation levels we are in (except the current one).
|
|
indents: Vec<Indent>,
|
|
/// Level of nesting of flow sequences.
|
|
flow_level: u8,
|
|
tokens_parsed: usize,
|
|
token_available: bool,
|
|
/// Whether all characters encountered since the last newline were whitespace.
|
|
leading_whitespace: bool,
|
|
/// Whether we started a flow mapping.
|
|
///
|
|
/// This is used to detect implicit flow mapping starts such as:
|
|
/// ```yaml
|
|
/// [ : foo ] # { null: "foo" }
|
|
/// ```
|
|
flow_mapping_started: bool,
|
|
/// Whether we currently are in an implicit flow mapping.
|
|
implicit_flow_mapping: bool,
|
|
}
|
|
|
|
impl<T: Iterator<Item = char>> Iterator for Scanner<T> {
|
|
type Item = Token;
|
|
fn next(&mut self) -> Option<Token> {
|
|
if self.error.is_some() {
|
|
return None;
|
|
}
|
|
match self.next_token() {
|
|
Ok(Some(tok)) => {
|
|
if std::env::var("YAMLRUST_DEBUG").is_ok() {
|
|
eprintln!(
|
|
" \x1B[;32m\u{21B3} {:?} \x1B[;36m{:?}\x1B[;m",
|
|
tok.1, tok.0
|
|
);
|
|
}
|
|
Some(tok)
|
|
}
|
|
Ok(tok) => tok,
|
|
Err(e) => {
|
|
self.error = Some(e);
|
|
None
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Check whether the character is nil (`\0`).
|
|
#[inline]
|
|
fn is_z(c: char) -> bool {
|
|
c == '\0'
|
|
}
|
|
|
|
/// Check whether the character is a line break (`\r` or `\n`).
|
|
#[inline]
|
|
fn is_break(c: char) -> bool {
|
|
c == '\n' || c == '\r'
|
|
}
|
|
|
|
/// Check whether the character is nil or a line break (`\0`, `\r`, `\n`).
|
|
#[inline]
|
|
fn is_breakz(c: char) -> bool {
|
|
is_break(c) || is_z(c)
|
|
}
|
|
|
|
/// Check whether the character is a whitespace (` ` or `\t`).
|
|
#[inline]
|
|
fn is_blank(c: char) -> bool {
|
|
c == ' ' || c == '\t'
|
|
}
|
|
|
|
/// Check whether the character is nil, a linebreak or a whitespace.
|
|
///
|
|
/// `\0`, ` `, `\t`, `\n`, `\r`
|
|
#[inline]
|
|
fn is_blankz(c: char) -> bool {
|
|
is_blank(c) || is_breakz(c)
|
|
}
|
|
|
|
/// Check whether the character is an ascii digit.
|
|
#[inline]
|
|
fn is_digit(c: char) -> bool {
|
|
c.is_ascii_digit()
|
|
}
|
|
|
|
/// Check whether the character is a digit, letter, `_` or `-`.
|
|
#[inline]
|
|
fn is_alpha(c: char) -> bool {
|
|
matches!(c, '0'..='9' | 'a'..='z' | 'A'..='Z' | '_' | '-')
|
|
}
|
|
|
|
/// Check whether the character is a hexadecimal character (case insensitive).
|
|
#[inline]
|
|
fn is_hex(c: char) -> bool {
|
|
c.is_ascii_digit() || ('a'..='f').contains(&c) || ('A'..='F').contains(&c)
|
|
}
|
|
|
|
/// Convert the hexadecimal digit to an integer.
|
|
#[inline]
|
|
fn as_hex(c: char) -> u32 {
|
|
match c {
|
|
'0'..='9' => (c as u32) - ('0' as u32),
|
|
'a'..='f' => (c as u32) - ('a' as u32) + 10,
|
|
'A'..='F' => (c as u32) - ('A' as u32) + 10,
|
|
_ => unreachable!(),
|
|
}
|
|
}
|
|
|
|
/// Check whether the character is a YAML flow character (one of `,[]{}`).
|
|
#[inline]
|
|
fn is_flow(c: char) -> bool {
|
|
matches!(c, ',' | '[' | ']' | '{' | '}')
|
|
}
|
|
|
|
/// Check whether the character is the BOM character.
|
|
#[inline]
|
|
fn is_bom(c: char) -> bool {
|
|
c == '\u{FEFF}'
|
|
}
|
|
|
|
/// Check whether the character is a YAML non-breaking character.
|
|
#[inline]
|
|
fn is_yaml_non_break(c: char) -> bool {
|
|
// TODO(ethiraric, 28/12/2023): is_printable
|
|
!is_break(c) && !is_bom(c)
|
|
}
|
|
|
|
/// Check whether the character is NOT a YAML whitespace (` ` / `\t`).
|
|
#[inline]
|
|
fn is_yaml_non_space(c: char) -> bool {
|
|
is_yaml_non_break(c) && !is_blank(c)
|
|
}
|
|
|
|
/// Check whether the character is a valid YAML anchor name character.
|
|
#[inline]
|
|
fn is_anchor_char(c: char) -> bool {
|
|
is_yaml_non_space(c) && !is_flow(c) && !is_z(c)
|
|
}
|
|
|
|
pub type ScanResult = Result<(), ScanError>;
|
|
|
|
impl<T: Iterator<Item = char>> Scanner<T> {
|
|
/// Creates the YAML tokenizer.
|
|
pub fn new(rdr: T) -> Scanner<T> {
|
|
Scanner {
|
|
rdr,
|
|
buffer: VecDeque::new(),
|
|
mark: Marker::new(0, 1, 0),
|
|
tokens: VecDeque::new(),
|
|
error: None,
|
|
|
|
stream_start_produced: false,
|
|
stream_end_produced: false,
|
|
adjacent_value_allowed_at: 0,
|
|
simple_key_allowed: true,
|
|
simple_keys: Vec::new(),
|
|
indent: -1,
|
|
indents: Vec::new(),
|
|
flow_level: 0,
|
|
tokens_parsed: 0,
|
|
token_available: false,
|
|
leading_whitespace: true,
|
|
flow_mapping_started: false,
|
|
implicit_flow_mapping: false,
|
|
}
|
|
}
|
|
|
|
#[inline]
|
|
pub fn get_error(&self) -> Option<ScanError> {
|
|
self.error.as_ref().map(std::clone::Clone::clone)
|
|
}
|
|
|
|
/// Fill `self.buffer` with at least `count` characters.
|
|
///
|
|
/// The characters that are extracted this way are not consumed but only placed in the buffer.
|
|
#[inline]
|
|
fn lookahead(&mut self, count: usize) {
|
|
if self.buffer.len() >= count {
|
|
return;
|
|
}
|
|
for _ in 0..(count - self.buffer.len()) {
|
|
self.buffer.push_back(self.rdr.next().unwrap_or('\0'));
|
|
}
|
|
}
|
|
|
|
/// Consume the next character. Remove from buffer and update mark.
|
|
#[inline]
|
|
fn skip(&mut self) {
|
|
let c = self.buffer.pop_front().unwrap();
|
|
|
|
self.mark.index += 1;
|
|
if c == '\n' {
|
|
self.leading_whitespace = true;
|
|
self.mark.line += 1;
|
|
self.mark.col = 0;
|
|
} else {
|
|
// TODO(ethiraric, 20/12/2023): change to `self.leading_whitespace &= is_blank(c)`?
|
|
if self.leading_whitespace && !is_blank(c) {
|
|
self.leading_whitespace = false;
|
|
}
|
|
self.mark.col += 1;
|
|
}
|
|
}
|
|
|
|
/// Consume a linebreak (either CR, LF or CRLF), if any. Do nothing if there's none.
|
|
#[inline]
|
|
fn skip_line(&mut self) {
|
|
if self.buffer[0] == '\r' && self.buffer[1] == '\n' {
|
|
self.skip();
|
|
self.skip();
|
|
} else if is_break(self.buffer[0]) {
|
|
self.skip();
|
|
}
|
|
}
|
|
|
|
/// Return the next character in the buffer.
|
|
///
|
|
/// The character is not consumed.
|
|
#[inline]
|
|
fn ch(&self) -> char {
|
|
self.buffer[0]
|
|
}
|
|
|
|
/// Look for the next character and return it.
|
|
///
|
|
/// The character is not consumed.
|
|
/// Equivalent to calling [`Self::lookahead`] and [`Self::ch`].
|
|
#[inline]
|
|
fn look_ch(&mut self) -> char {
|
|
self.lookahead(1);
|
|
self.ch()
|
|
}
|
|
|
|
/// Consume and return the next character.
|
|
///
|
|
/// Equivalent to calling [`Self::ch`] and [`Self::skip`].
|
|
#[inline]
|
|
fn ch_skip(&mut self) -> char {
|
|
let ret = self.ch();
|
|
self.skip();
|
|
ret
|
|
}
|
|
|
|
/// Return whether the next character is `c`.
|
|
#[inline]
|
|
fn ch_is(&self, c: char) -> bool {
|
|
self.buffer[0] == c
|
|
}
|
|
|
|
#[allow(dead_code)]
|
|
#[inline]
|
|
fn eof(&self) -> bool {
|
|
self.ch_is('\0')
|
|
}
|
|
|
|
#[inline]
|
|
pub fn stream_started(&self) -> bool {
|
|
self.stream_start_produced
|
|
}
|
|
|
|
#[inline]
|
|
pub fn stream_ended(&self) -> bool {
|
|
self.stream_end_produced
|
|
}
|
|
|
|
#[inline]
|
|
pub fn mark(&self) -> Marker {
|
|
self.mark
|
|
}
|
|
|
|
// Read and consume a line break (either `\r`, `\n` or `\r\n`).
|
|
//
|
|
// A `\n` is pushed into `s`.
|
|
//
|
|
// # Panics
|
|
// If the next characters do not correspond to a line break.
|
|
#[inline]
|
|
fn read_break(&mut self, s: &mut String) {
|
|
if self.buffer[0] == '\r' && self.buffer[1] == '\n' {
|
|
s.push('\n');
|
|
self.skip();
|
|
self.skip();
|
|
} else if self.buffer[0] == '\r' || self.buffer[0] == '\n' {
|
|
s.push('\n');
|
|
self.skip();
|
|
} else {
|
|
unreachable!();
|
|
}
|
|
}
|
|
|
|
/// Insert a token at the given position.
|
|
fn insert_token(&mut self, pos: usize, tok: Token) {
|
|
let old_len = self.tokens.len();
|
|
assert!(pos <= old_len);
|
|
self.tokens.insert(pos, tok);
|
|
}
|
|
|
|
fn allow_simple_key(&mut self) {
|
|
self.simple_key_allowed = true;
|
|
}
|
|
|
|
fn disallow_simple_key(&mut self) {
|
|
self.simple_key_allowed = false;
|
|
}
|
|
|
|
pub fn fetch_next_token(&mut self) -> ScanResult {
|
|
self.lookahead(1);
|
|
// eprintln!("--> fetch_next_token Cur {:?} {:?}", self.mark, self.ch());
|
|
|
|
if !self.stream_start_produced {
|
|
self.fetch_stream_start();
|
|
return Ok(());
|
|
}
|
|
self.skip_to_next_token()?;
|
|
|
|
if std::env::var("YAMLRUST_DEBUG").is_ok() {
|
|
eprintln!(
|
|
" \x1B[38;5;244m\u{2192} fetch_next_token after whitespace {:?} {:?}\x1B[m",
|
|
self.mark,
|
|
self.ch()
|
|
);
|
|
}
|
|
|
|
self.stale_simple_keys()?;
|
|
|
|
let mark = self.mark;
|
|
self.unroll_indent(mark.col as isize);
|
|
|
|
self.lookahead(4);
|
|
|
|
if is_z(self.ch()) {
|
|
self.fetch_stream_end()?;
|
|
return Ok(());
|
|
}
|
|
|
|
// Is it a directive?
|
|
if self.mark.col == 0 && self.ch_is('%') {
|
|
return self.fetch_directive();
|
|
}
|
|
|
|
if self.mark.col == 0
|
|
&& self.buffer[0] == '-'
|
|
&& self.buffer[1] == '-'
|
|
&& self.buffer[2] == '-'
|
|
&& is_blankz(self.buffer[3])
|
|
{
|
|
self.fetch_document_indicator(TokenType::DocumentStart)?;
|
|
return Ok(());
|
|
}
|
|
|
|
if self.mark.col == 0
|
|
&& self.buffer[0] == '.'
|
|
&& self.buffer[1] == '.'
|
|
&& self.buffer[2] == '.'
|
|
&& is_blankz(self.buffer[3])
|
|
{
|
|
self.fetch_document_indicator(TokenType::DocumentEnd)?;
|
|
return Ok(());
|
|
}
|
|
|
|
let c = self.buffer[0];
|
|
let nc = self.buffer[1];
|
|
match c {
|
|
'[' => self.fetch_flow_collection_start(TokenType::FlowSequenceStart),
|
|
'{' => self.fetch_flow_collection_start(TokenType::FlowMappingStart),
|
|
']' => self.fetch_flow_collection_end(TokenType::FlowSequenceEnd),
|
|
'}' => self.fetch_flow_collection_end(TokenType::FlowMappingEnd),
|
|
',' => self.fetch_flow_entry(),
|
|
'-' if is_blankz(nc) => self.fetch_block_entry(),
|
|
'?' if is_blankz(nc) => self.fetch_key(),
|
|
':' if is_blankz(nc)
|
|
|| (self.flow_level > 0
|
|
&& (is_flow(nc) || self.mark.index == self.adjacent_value_allowed_at)) =>
|
|
{
|
|
self.fetch_value()
|
|
}
|
|
// Is it an alias?
|
|
'*' => self.fetch_anchor(true),
|
|
// Is it an anchor?
|
|
'&' => self.fetch_anchor(false),
|
|
'!' => self.fetch_tag(),
|
|
// Is it a literal scalar?
|
|
'|' if self.flow_level == 0 => self.fetch_block_scalar(true),
|
|
// Is it a folded scalar?
|
|
'>' if self.flow_level == 0 => self.fetch_block_scalar(false),
|
|
'\'' => self.fetch_flow_scalar(true),
|
|
'"' => self.fetch_flow_scalar(false),
|
|
// plain scalar
|
|
'-' if !is_blankz(nc) => self.fetch_plain_scalar(),
|
|
':' | '?' if !is_blankz(nc) && self.flow_level == 0 => self.fetch_plain_scalar(),
|
|
'%' | '@' | '`' => Err(ScanError::new(
|
|
self.mark,
|
|
&format!("unexpected character: `{c}'"),
|
|
)),
|
|
_ => self.fetch_plain_scalar(),
|
|
}
|
|
}
|
|
|
|
pub fn next_token(&mut self) -> Result<Option<Token>, ScanError> {
|
|
if self.stream_end_produced {
|
|
return Ok(None);
|
|
}
|
|
|
|
if !self.token_available {
|
|
self.fetch_more_tokens()?;
|
|
}
|
|
let t = self.tokens.pop_front().unwrap();
|
|
self.token_available = false;
|
|
self.tokens_parsed += 1;
|
|
|
|
if let TokenType::StreamEnd = t.1 {
|
|
self.stream_end_produced = true;
|
|
}
|
|
Ok(Some(t))
|
|
}
|
|
|
|
pub fn fetch_more_tokens(&mut self) -> ScanResult {
|
|
let mut need_more;
|
|
loop {
|
|
if self.tokens.is_empty() {
|
|
need_more = true;
|
|
} else {
|
|
need_more = false;
|
|
self.stale_simple_keys()?;
|
|
for sk in &self.simple_keys {
|
|
if sk.possible && sk.token_number == self.tokens_parsed {
|
|
need_more = true;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
if !need_more {
|
|
break;
|
|
}
|
|
self.fetch_next_token()?;
|
|
}
|
|
self.token_available = true;
|
|
|
|
Ok(())
|
|
}
|
|
|
|
fn stale_simple_keys(&mut self) -> ScanResult {
|
|
for sk in &mut self.simple_keys {
|
|
if sk.possible
|
|
&& (sk.mark.line < self.mark.line || sk.mark.index + 1024 < self.mark.index)
|
|
{
|
|
if sk.required {
|
|
return Err(ScanError::new(self.mark, "simple key expect ':'"));
|
|
}
|
|
sk.possible = false;
|
|
}
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
/// Skip over all whitespace and comments until the next token.
|
|
///
|
|
/// # Errors
|
|
/// This function returns an error if a tabulation is encountered where there should not be
|
|
/// one.
|
|
fn skip_to_next_token(&mut self) -> ScanResult {
|
|
loop {
|
|
// TODO(chenyh) BOM
|
|
match self.look_ch() {
|
|
' ' => self.skip(),
|
|
// Tabs may not be used as indentation.
|
|
// "Indentation" only exists as long as a block is started, but does not exist
|
|
// inside of flow-style constructs. Tabs are allowed as part of leading
|
|
// whitespaces outside of indentation.
|
|
// If a flow-style construct is in an indented block, its contents must still be
|
|
// indented. Also, tabs are allowed anywhere in it if it has no content.
|
|
'\t' if self.is_within_block()
|
|
&& self.leading_whitespace
|
|
&& (self.mark.col as isize) < self.indent =>
|
|
{
|
|
self.skip_ws_to_eol(SkipTabs::Yes);
|
|
// If we have content on that line with a tab, return an error.
|
|
if !is_breakz(self.ch()) {
|
|
return Err(ScanError::new(
|
|
self.mark,
|
|
"tabs disallowed within this context (block indentation)",
|
|
));
|
|
}
|
|
}
|
|
'\t' => self.skip(),
|
|
'\n' | '\r' => {
|
|
self.lookahead(2);
|
|
self.skip_line();
|
|
if self.flow_level == 0 {
|
|
self.allow_simple_key();
|
|
}
|
|
}
|
|
'#' => {
|
|
while !is_breakz(self.ch()) {
|
|
self.skip();
|
|
self.lookahead(1);
|
|
}
|
|
}
|
|
_ => break,
|
|
}
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
/// Skip over YAML whitespace (` `, `\n`, `\r`).
|
|
///
|
|
/// # Errors
|
|
/// This function returns an error if no whitespace was found.
|
|
fn skip_yaml_whitespace(&mut self) -> ScanResult {
|
|
let mut need_whitespace = true;
|
|
loop {
|
|
match self.look_ch() {
|
|
' ' => {
|
|
self.skip();
|
|
|
|
need_whitespace = false;
|
|
}
|
|
'\n' | '\r' => {
|
|
self.lookahead(2);
|
|
self.skip_line();
|
|
if self.flow_level == 0 {
|
|
self.allow_simple_key();
|
|
}
|
|
need_whitespace = false;
|
|
}
|
|
'#' => {
|
|
while !is_breakz(self.ch()) {
|
|
self.skip();
|
|
self.lookahead(1);
|
|
}
|
|
}
|
|
_ => break,
|
|
}
|
|
}
|
|
|
|
if need_whitespace {
|
|
Err(ScanError::new(self.mark(), "expected whitespace"))
|
|
} else {
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
/// Skip yaml whitespace at most up to eol. Also skips comments.
|
|
fn skip_ws_to_eol(&mut self, skip_tabs: SkipTabs) -> SkipTabs {
|
|
let mut encountered_tab = false;
|
|
let mut has_yaml_ws = false;
|
|
loop {
|
|
match self.look_ch() {
|
|
' ' => {
|
|
has_yaml_ws = true;
|
|
self.skip();
|
|
}
|
|
'\t' if skip_tabs != SkipTabs::No => {
|
|
encountered_tab = true;
|
|
self.skip();
|
|
}
|
|
'#' => {
|
|
while !is_breakz(self.look_ch()) {
|
|
self.skip();
|
|
}
|
|
}
|
|
_ => break,
|
|
}
|
|
}
|
|
|
|
SkipTabs::Result(encountered_tab, has_yaml_ws)
|
|
}
|
|
|
|
fn fetch_stream_start(&mut self) {
|
|
let mark = self.mark;
|
|
self.indent = -1;
|
|
self.stream_start_produced = true;
|
|
self.allow_simple_key();
|
|
self.tokens
|
|
.push_back(Token(mark, TokenType::StreamStart(TEncoding::Utf8)));
|
|
self.simple_keys.push(SimpleKey::new(Marker::new(0, 0, 0)));
|
|
}
|
|
|
|
fn fetch_stream_end(&mut self) -> ScanResult {
|
|
// force new line
|
|
if self.mark.col != 0 {
|
|
self.mark.col = 0;
|
|
self.mark.line += 1;
|
|
}
|
|
|
|
self.unroll_indent(-1);
|
|
self.remove_simple_key()?;
|
|
self.disallow_simple_key();
|
|
|
|
self.tokens
|
|
.push_back(Token(self.mark, TokenType::StreamEnd));
|
|
Ok(())
|
|
}
|
|
|
|
fn fetch_directive(&mut self) -> ScanResult {
|
|
self.unroll_indent(-1);
|
|
self.remove_simple_key()?;
|
|
|
|
self.disallow_simple_key();
|
|
|
|
let tok = self.scan_directive()?;
|
|
|
|
self.tokens.push_back(tok);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
fn scan_directive(&mut self) -> Result<Token, ScanError> {
|
|
let start_mark = self.mark;
|
|
self.skip();
|
|
|
|
let name = self.scan_directive_name()?;
|
|
let tok = match name.as_ref() {
|
|
"YAML" => self.scan_version_directive_value(&start_mark)?,
|
|
"TAG" => self.scan_tag_directive_value(&start_mark)?,
|
|
// XXX This should be a warning instead of an error
|
|
_ => {
|
|
// skip current line
|
|
self.lookahead(1);
|
|
while !is_breakz(self.ch()) {
|
|
self.skip();
|
|
self.lookahead(1);
|
|
}
|
|
// XXX return an empty TagDirective token
|
|
Token(
|
|
start_mark,
|
|
TokenType::TagDirective(String::new(), String::new()),
|
|
)
|
|
// return Err(ScanError::new(start_mark,
|
|
// "while scanning a directive, found unknown directive name"))
|
|
}
|
|
};
|
|
self.lookahead(1);
|
|
|
|
while is_blank(self.ch()) {
|
|
self.skip();
|
|
self.lookahead(1);
|
|
}
|
|
|
|
if self.ch() == '#' {
|
|
while !is_breakz(self.ch()) {
|
|
self.skip();
|
|
self.lookahead(1);
|
|
}
|
|
}
|
|
|
|
if !is_breakz(self.ch()) {
|
|
return Err(ScanError::new(
|
|
start_mark,
|
|
"while scanning a directive, did not find expected comment or line break",
|
|
));
|
|
}
|
|
|
|
// Eat a line break
|
|
if is_break(self.ch()) {
|
|
self.lookahead(2);
|
|
self.skip_line();
|
|
}
|
|
|
|
Ok(tok)
|
|
}
|
|
|
|
fn scan_version_directive_value(&mut self, mark: &Marker) -> Result<Token, ScanError> {
|
|
self.lookahead(1);
|
|
|
|
while is_blank(self.ch()) {
|
|
self.skip();
|
|
self.lookahead(1);
|
|
}
|
|
|
|
let major = self.scan_version_directive_number(mark)?;
|
|
|
|
if self.ch() != '.' {
|
|
return Err(ScanError::new(
|
|
*mark,
|
|
"while scanning a YAML directive, did not find expected digit or '.' character",
|
|
));
|
|
}
|
|
|
|
self.skip();
|
|
|
|
let minor = self.scan_version_directive_number(mark)?;
|
|
|
|
Ok(Token(*mark, TokenType::VersionDirective(major, minor)))
|
|
}
|
|
|
|
fn scan_directive_name(&mut self) -> Result<String, ScanError> {
|
|
let start_mark = self.mark;
|
|
let mut string = String::new();
|
|
self.lookahead(1);
|
|
while is_alpha(self.ch()) {
|
|
string.push(self.ch());
|
|
self.skip();
|
|
self.lookahead(1);
|
|
}
|
|
|
|
if string.is_empty() {
|
|
return Err(ScanError::new(
|
|
start_mark,
|
|
"while scanning a directive, could not find expected directive name",
|
|
));
|
|
}
|
|
|
|
if !is_blankz(self.ch()) {
|
|
return Err(ScanError::new(
|
|
start_mark,
|
|
"while scanning a directive, found unexpected non-alphabetical character",
|
|
));
|
|
}
|
|
|
|
Ok(string)
|
|
}
|
|
|
|
fn scan_version_directive_number(&mut self, mark: &Marker) -> Result<u32, ScanError> {
|
|
let mut val = 0u32;
|
|
let mut length = 0usize;
|
|
self.lookahead(1);
|
|
while is_digit(self.ch()) {
|
|
if length + 1 > 9 {
|
|
return Err(ScanError::new(
|
|
*mark,
|
|
"while scanning a YAML directive, found extremely long version number",
|
|
));
|
|
}
|
|
length += 1;
|
|
val = val * 10 + ((self.ch() as u32) - ('0' as u32));
|
|
self.skip();
|
|
self.lookahead(1);
|
|
}
|
|
|
|
if length == 0 {
|
|
return Err(ScanError::new(
|
|
*mark,
|
|
"while scanning a YAML directive, did not find expected version number",
|
|
));
|
|
}
|
|
|
|
Ok(val)
|
|
}
|
|
|
|
fn scan_tag_directive_value(&mut self, mark: &Marker) -> Result<Token, ScanError> {
|
|
self.lookahead(1);
|
|
/* Eat whitespaces. */
|
|
while is_blank(self.ch()) {
|
|
self.skip();
|
|
self.lookahead(1);
|
|
}
|
|
let handle = self.scan_tag_handle(true, mark)?;
|
|
|
|
self.lookahead(1);
|
|
/* Eat whitespaces. */
|
|
while is_blank(self.ch()) {
|
|
self.skip();
|
|
self.lookahead(1);
|
|
}
|
|
|
|
let is_secondary = handle == "!!";
|
|
let prefix = self.scan_tag_uri(true, is_secondary, "", mark)?;
|
|
|
|
self.lookahead(1);
|
|
|
|
if is_blankz(self.ch()) {
|
|
Ok(Token(*mark, TokenType::TagDirective(handle, prefix)))
|
|
} else {
|
|
Err(ScanError::new(
|
|
*mark,
|
|
"while scanning TAG, did not find expected whitespace or line break",
|
|
))
|
|
}
|
|
}
|
|
|
|
fn fetch_tag(&mut self) -> ScanResult {
|
|
self.save_simple_key()?;
|
|
self.disallow_simple_key();
|
|
|
|
let tok = self.scan_tag()?;
|
|
self.tokens.push_back(tok);
|
|
Ok(())
|
|
}
|
|
|
|
fn scan_tag(&mut self) -> Result<Token, ScanError> {
|
|
let start_mark = self.mark;
|
|
let mut handle = String::new();
|
|
let mut suffix;
|
|
|
|
// Check if the tag is in the canonical form (verbatim).
|
|
self.lookahead(2);
|
|
|
|
if self.buffer[1] == '<' {
|
|
// Eat '!<'
|
|
self.skip();
|
|
self.skip();
|
|
suffix = self.scan_tag_uri(false, false, "", &start_mark)?;
|
|
|
|
if self.ch() != '>' {
|
|
return Err(ScanError::new(
|
|
start_mark,
|
|
"while scanning a tag, did not find the expected '>'",
|
|
));
|
|
}
|
|
|
|
self.skip();
|
|
} else {
|
|
// The tag has either the '!suffix' or the '!handle!suffix'
|
|
handle = self.scan_tag_handle(false, &start_mark)?;
|
|
// Check if it is, indeed, handle.
|
|
if handle.len() >= 2 && handle.starts_with('!') && handle.ends_with('!') {
|
|
// A tag handle starting with "!!" is a secondary tag handle.
|
|
let is_secondary_handle = handle == "!!";
|
|
suffix = self.scan_tag_uri(false, is_secondary_handle, "", &start_mark)?;
|
|
} else {
|
|
suffix = self.scan_tag_uri(false, false, &handle, &start_mark)?;
|
|
handle = "!".to_owned();
|
|
// A special case: the '!' tag. Set the handle to '' and the
|
|
// suffix to '!'.
|
|
if suffix.is_empty() {
|
|
handle.clear();
|
|
suffix = "!".to_owned();
|
|
}
|
|
}
|
|
}
|
|
|
|
if is_blankz(self.look_ch()) {
|
|
// XXX: ex 7.2, an empty scalar can follow a secondary tag
|
|
Ok(Token(start_mark, TokenType::Tag(handle, suffix)))
|
|
} else {
|
|
Err(ScanError::new(
|
|
start_mark,
|
|
"while scanning a tag, did not find expected whitespace or line break",
|
|
))
|
|
}
|
|
}
|
|
|
|
fn scan_tag_handle(&mut self, directive: bool, mark: &Marker) -> Result<String, ScanError> {
|
|
let mut string = String::new();
|
|
if self.look_ch() != '!' {
|
|
return Err(ScanError::new(
|
|
*mark,
|
|
"while scanning a tag, did not find expected '!'",
|
|
));
|
|
}
|
|
|
|
string.push(self.ch_skip());
|
|
|
|
while is_alpha(self.look_ch()) {
|
|
string.push(self.ch_skip());
|
|
}
|
|
|
|
// Check if the trailing character is '!' and copy it.
|
|
if self.ch() == '!' {
|
|
string.push(self.ch_skip());
|
|
} else if directive && string != "!" {
|
|
// It's either the '!' tag or not really a tag handle. If it's a %TAG
|
|
// directive, it's an error. If it's a tag token, it must be a part of
|
|
// URI.
|
|
return Err(ScanError::new(
|
|
*mark,
|
|
"while parsing a tag directive, did not find expected '!'",
|
|
));
|
|
}
|
|
Ok(string)
|
|
}
|
|
|
|
fn scan_tag_uri(
|
|
&mut self,
|
|
directive: bool,
|
|
_is_secondary: bool,
|
|
head: &str,
|
|
mark: &Marker,
|
|
) -> Result<String, ScanError> {
|
|
let mut length = head.len();
|
|
let mut string = String::new();
|
|
|
|
// Copy the head if needed.
|
|
// Note that we don't copy the leading '!' character.
|
|
if length > 1 {
|
|
string.extend(head.chars().skip(1));
|
|
}
|
|
|
|
/*
|
|
* The set of characters that may appear in URI is as follows:
|
|
*
|
|
* '0'-'9', 'A'-'Z', 'a'-'z', '_', '-', ';', '/', '?', ':', '@', '&',
|
|
* '=', '+', '$', ',', '.', '!', '~', '*', '\'', '(', ')', '[', ']',
|
|
* '%'.
|
|
*/
|
|
while match self.look_ch() {
|
|
';' | '/' | '?' | ':' | '@' | '&' => true,
|
|
'=' | '+' | '$' | ',' | '.' | '!' | '~' | '*' | '\'' | '(' | ')' | '[' | ']' => true,
|
|
'%' => true,
|
|
c if is_alpha(c) => true,
|
|
_ => false,
|
|
} {
|
|
// Check if it is a URI-escape sequence.
|
|
if self.ch() == '%' {
|
|
string.push(self.scan_uri_escapes(directive, mark)?);
|
|
} else {
|
|
string.push(self.ch());
|
|
self.skip();
|
|
}
|
|
|
|
length += 1;
|
|
}
|
|
|
|
if length == 0 {
|
|
return Err(ScanError::new(
|
|
*mark,
|
|
"while parsing a tag, did not find expected tag URI",
|
|
));
|
|
}
|
|
|
|
Ok(string)
|
|
}
|
|
|
|
fn scan_uri_escapes(&mut self, _directive: bool, mark: &Marker) -> Result<char, ScanError> {
|
|
let mut width = 0usize;
|
|
let mut code = 0u32;
|
|
loop {
|
|
self.lookahead(3);
|
|
|
|
if !(self.ch() == '%' && is_hex(self.buffer[1]) && is_hex(self.buffer[2])) {
|
|
return Err(ScanError::new(
|
|
*mark,
|
|
"while parsing a tag, did not find URI escaped octet",
|
|
));
|
|
}
|
|
|
|
let octet = (as_hex(self.buffer[1]) << 4) + as_hex(self.buffer[2]);
|
|
if width == 0 {
|
|
width = match octet {
|
|
_ if octet & 0x80 == 0x00 => 1,
|
|
_ if octet & 0xE0 == 0xC0 => 2,
|
|
_ if octet & 0xF0 == 0xE0 => 3,
|
|
_ if octet & 0xF8 == 0xF0 => 4,
|
|
_ => {
|
|
return Err(ScanError::new(
|
|
*mark,
|
|
"while parsing a tag, found an incorrect leading UTF-8 octet",
|
|
));
|
|
}
|
|
};
|
|
code = octet;
|
|
} else {
|
|
if octet & 0xc0 != 0x80 {
|
|
return Err(ScanError::new(
|
|
*mark,
|
|
"while parsing a tag, found an incorrect trailing UTF-8 octet",
|
|
));
|
|
}
|
|
code = (code << 8) + octet;
|
|
}
|
|
|
|
self.skip();
|
|
self.skip();
|
|
self.skip();
|
|
|
|
width -= 1;
|
|
if width == 0 {
|
|
break;
|
|
}
|
|
}
|
|
|
|
match char::from_u32(code) {
|
|
Some(ch) => Ok(ch),
|
|
None => Err(ScanError::new(
|
|
*mark,
|
|
"while parsing a tag, found an invalid UTF-8 codepoint",
|
|
)),
|
|
}
|
|
}
|
|
|
|
fn fetch_anchor(&mut self, alias: bool) -> ScanResult {
|
|
self.save_simple_key()?;
|
|
self.disallow_simple_key();
|
|
|
|
let tok = self.scan_anchor(alias)?;
|
|
|
|
self.tokens.push_back(tok);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
fn scan_anchor(&mut self, alias: bool) -> Result<Token, ScanError> {
|
|
let mut string = String::new();
|
|
let start_mark = self.mark;
|
|
|
|
self.skip();
|
|
while is_anchor_char(self.look_ch()) {
|
|
string.push(self.ch());
|
|
self.skip();
|
|
}
|
|
|
|
if string.is_empty() {
|
|
return Err(ScanError::new(start_mark, "while scanning an anchor or alias, did not find expected alphabetic or numeric character"));
|
|
}
|
|
|
|
if alias {
|
|
Ok(Token(start_mark, TokenType::Alias(string)))
|
|
} else {
|
|
Ok(Token(start_mark, TokenType::Anchor(string)))
|
|
}
|
|
}
|
|
|
|
fn fetch_flow_collection_start(&mut self, tok: TokenType) -> ScanResult {
|
|
// The indicators '[' and '{' may start a simple key.
|
|
self.save_simple_key()?;
|
|
|
|
self.increase_flow_level()?;
|
|
|
|
self.allow_simple_key();
|
|
|
|
let start_mark = self.mark;
|
|
self.skip();
|
|
|
|
if tok == TokenType::FlowMappingStart {
|
|
self.flow_mapping_started = true;
|
|
}
|
|
|
|
self.tokens.push_back(Token(start_mark, tok));
|
|
Ok(())
|
|
}
|
|
|
|
fn fetch_flow_collection_end(&mut self, tok: TokenType) -> ScanResult {
|
|
self.remove_simple_key()?;
|
|
self.decrease_flow_level();
|
|
|
|
self.disallow_simple_key();
|
|
|
|
self.end_implicit_mapping(self.mark);
|
|
|
|
let start_mark = self.mark;
|
|
self.skip();
|
|
|
|
self.tokens.push_back(Token(start_mark, tok));
|
|
Ok(())
|
|
}
|
|
|
|
/// Push the `FlowEntry` token and skip over the `,`.
|
|
fn fetch_flow_entry(&mut self) -> ScanResult {
|
|
self.remove_simple_key()?;
|
|
self.allow_simple_key();
|
|
|
|
self.end_implicit_mapping(self.mark);
|
|
|
|
let start_mark = self.mark;
|
|
self.skip();
|
|
|
|
self.tokens
|
|
.push_back(Token(start_mark, TokenType::FlowEntry));
|
|
Ok(())
|
|
}
|
|
|
|
fn increase_flow_level(&mut self) -> ScanResult {
|
|
self.simple_keys.push(SimpleKey::new(Marker::new(0, 0, 0)));
|
|
self.flow_level = self
|
|
.flow_level
|
|
.checked_add(1)
|
|
.ok_or_else(|| ScanError::new(self.mark, "recursion limit exceeded"))?;
|
|
Ok(())
|
|
}
|
|
|
|
fn decrease_flow_level(&mut self) {
|
|
if self.flow_level > 0 {
|
|
self.flow_level -= 1;
|
|
self.simple_keys.pop().unwrap();
|
|
}
|
|
}
|
|
|
|
/// Push the `Block*` token(s) and skip over the `-`.
|
|
///
|
|
/// Add an indentation level and push a `BlockSequenceStart` token if needed, then push a
|
|
/// `BlockEntry` token.
|
|
/// This function only skips over the `-` and does not fetch the entry value.
|
|
fn fetch_block_entry(&mut self) -> ScanResult {
|
|
if self.flow_level > 0 {
|
|
// - * only allowed in block
|
|
return Err(ScanError::new(
|
|
self.mark,
|
|
r#""-" is only valid inside a block"#,
|
|
));
|
|
}
|
|
// Check if we are allowed to start a new entry.
|
|
if !self.simple_key_allowed {
|
|
return Err(ScanError::new(
|
|
self.mark,
|
|
"block sequence entries are not allowed in this context",
|
|
));
|
|
}
|
|
|
|
// Skip over the `-`.
|
|
let mark = self.mark;
|
|
self.skip();
|
|
|
|
// generate BLOCK-SEQUENCE-START if indented
|
|
self.roll_indent(mark.col, None, TokenType::BlockSequenceStart, mark);
|
|
let found_tabs = self.skip_ws_to_eol(SkipTabs::Yes).found_tabs();
|
|
self.lookahead(2);
|
|
if found_tabs && self.buffer[0] == '-' && is_blankz(self.buffer[1]) {
|
|
return Err(ScanError::new(
|
|
self.mark,
|
|
"'-' must be followed by a valid YAML whitespace",
|
|
));
|
|
}
|
|
|
|
self.skip_ws_to_eol(SkipTabs::No);
|
|
if is_break(self.look_ch()) || is_flow(self.ch()) {
|
|
self.roll_one_col_indent();
|
|
}
|
|
|
|
self.remove_simple_key()?;
|
|
self.allow_simple_key();
|
|
|
|
self.tokens
|
|
.push_back(Token(self.mark, TokenType::BlockEntry));
|
|
|
|
Ok(())
|
|
}
|
|
|
|
fn fetch_document_indicator(&mut self, t: TokenType) -> ScanResult {
|
|
self.unroll_indent(-1);
|
|
self.remove_simple_key()?;
|
|
self.disallow_simple_key();
|
|
|
|
let mark = self.mark;
|
|
|
|
self.skip();
|
|
self.skip();
|
|
self.skip();
|
|
|
|
self.tokens.push_back(Token(mark, t));
|
|
Ok(())
|
|
}
|
|
|
|
fn fetch_block_scalar(&mut self, literal: bool) -> ScanResult {
|
|
self.save_simple_key()?;
|
|
self.allow_simple_key();
|
|
let tok = self.scan_block_scalar(literal)?;
|
|
|
|
self.tokens.push_back(tok);
|
|
Ok(())
|
|
}
|
|
|
|
#[allow(clippy::too_many_lines)]
|
|
fn scan_block_scalar(&mut self, literal: bool) -> Result<Token, ScanError> {
|
|
let start_mark = self.mark;
|
|
let mut chomping: i32 = 0;
|
|
let mut increment: usize = 0;
|
|
let mut indent: usize = 0;
|
|
let mut trailing_blank: bool;
|
|
let mut leading_blank: bool = false;
|
|
|
|
let mut string = String::new();
|
|
let mut leading_break = String::new();
|
|
let mut trailing_breaks = String::new();
|
|
|
|
// skip '|' or '>'
|
|
self.skip();
|
|
self.unroll_non_block_indents();
|
|
|
|
if self.look_ch() == '+' || self.ch() == '-' {
|
|
if self.ch() == '+' {
|
|
chomping = 1;
|
|
} else {
|
|
chomping = -1;
|
|
}
|
|
self.skip();
|
|
if is_digit(self.look_ch()) {
|
|
if self.ch() == '0' {
|
|
return Err(ScanError::new(
|
|
start_mark,
|
|
"while scanning a block scalar, found an indentation indicator equal to 0",
|
|
));
|
|
}
|
|
increment = (self.ch() as usize) - ('0' as usize);
|
|
self.skip();
|
|
}
|
|
} else if is_digit(self.ch()) {
|
|
if self.ch() == '0' {
|
|
return Err(ScanError::new(
|
|
start_mark,
|
|
"while scanning a block scalar, found an indentation indicator equal to 0",
|
|
));
|
|
}
|
|
|
|
increment = (self.ch() as usize) - ('0' as usize);
|
|
self.skip();
|
|
self.lookahead(1);
|
|
if self.ch() == '+' || self.ch() == '-' {
|
|
if self.ch() == '+' {
|
|
chomping = 1;
|
|
} else {
|
|
chomping = -1;
|
|
}
|
|
self.skip();
|
|
}
|
|
}
|
|
|
|
self.skip_ws_to_eol(SkipTabs::Yes);
|
|
|
|
// Check if we are at the end of the line.
|
|
if !is_breakz(self.ch()) {
|
|
return Err(ScanError::new(
|
|
start_mark,
|
|
"while scanning a block scalar, did not find expected comment or line break",
|
|
));
|
|
}
|
|
|
|
if is_break(self.ch()) {
|
|
self.lookahead(2);
|
|
self.skip_line();
|
|
}
|
|
|
|
if self.look_ch() == '\t' {
|
|
return Err(ScanError::new(
|
|
start_mark,
|
|
"a block scalar content cannot start with a tab",
|
|
));
|
|
}
|
|
|
|
if increment > 0 {
|
|
indent = if self.indent >= 0 {
|
|
(self.indent + increment as isize) as usize
|
|
} else {
|
|
increment
|
|
}
|
|
}
|
|
|
|
// Scan the leading line breaks and determine the indentation level if needed.
|
|
if indent == 0 {
|
|
self.skip_block_scalar_first_line_indent(&mut indent, &mut trailing_breaks);
|
|
} else {
|
|
self.skip_block_scalar_indent(indent, &mut trailing_breaks);
|
|
}
|
|
|
|
self.lookahead(1);
|
|
|
|
let start_mark = self.mark;
|
|
|
|
while self.mark.col == indent && !is_z(self.ch()) {
|
|
// We are at the beginning of a non-empty line.
|
|
trailing_blank = is_blank(self.ch());
|
|
if !literal && !leading_break.is_empty() && !leading_blank && !trailing_blank {
|
|
if trailing_breaks.is_empty() {
|
|
string.push(' ');
|
|
}
|
|
leading_break.clear();
|
|
} else {
|
|
string.push_str(&leading_break);
|
|
leading_break.clear();
|
|
}
|
|
|
|
string.push_str(&trailing_breaks);
|
|
trailing_breaks.clear();
|
|
|
|
leading_blank = is_blank(self.ch());
|
|
|
|
while !is_breakz(self.ch()) {
|
|
string.push(self.ch());
|
|
self.skip();
|
|
self.lookahead(1);
|
|
}
|
|
// break on EOF
|
|
if is_z(self.ch()) {
|
|
break;
|
|
}
|
|
|
|
self.lookahead(2);
|
|
self.read_break(&mut leading_break);
|
|
|
|
// Eat the following indentation spaces and line breaks.
|
|
self.skip_block_scalar_indent(indent, &mut trailing_breaks);
|
|
}
|
|
|
|
// Chomp the tail.
|
|
if chomping != -1 {
|
|
string.push_str(&leading_break);
|
|
}
|
|
|
|
if chomping == 1 {
|
|
string.push_str(&trailing_breaks);
|
|
}
|
|
|
|
if literal {
|
|
Ok(Token(
|
|
start_mark,
|
|
TokenType::Scalar(TScalarStyle::Literal, string),
|
|
))
|
|
} else {
|
|
Ok(Token(
|
|
start_mark,
|
|
TokenType::Scalar(TScalarStyle::Foled, string),
|
|
))
|
|
}
|
|
}
|
|
|
|
/// Skip the block scalar indentation and empty lines.
|
|
fn skip_block_scalar_indent(&mut self, indent: usize, breaks: &mut String) {
|
|
loop {
|
|
// Consume all spaces. Tabs cannot be used as indentation.
|
|
while self.mark.col < indent && self.look_ch() == ' ' {
|
|
self.skip();
|
|
}
|
|
|
|
// If our current line is empty, skip over the break and continue looping.
|
|
if is_break(self.look_ch()) {
|
|
self.lookahead(2);
|
|
self.read_break(breaks);
|
|
} else {
|
|
// Otherwise, we have a content line. Return control.
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Determine the indentation level for a block scalar from the first line of its contents.
|
|
///
|
|
/// The function skips over whitespace-only lines and sets `indent` to the the longest
|
|
/// whitespace line that was encountered.
|
|
fn skip_block_scalar_first_line_indent(&mut self, indent: &mut usize, breaks: &mut String) {
|
|
let mut max_indent = 0;
|
|
loop {
|
|
// Consume all spaces. Tabs cannot be used as indentation.
|
|
while self.look_ch() == ' ' {
|
|
self.skip();
|
|
}
|
|
|
|
if self.mark.col > max_indent {
|
|
max_indent = self.mark.col;
|
|
}
|
|
|
|
if is_break(self.look_ch()) {
|
|
// If our current line is empty, skip over the break and continue looping.
|
|
self.lookahead(2);
|
|
self.read_break(breaks);
|
|
} else {
|
|
// Otherwise, we have a content line. Return control.
|
|
break;
|
|
}
|
|
}
|
|
|
|
*indent = max_indent.max((self.indent + 1) as usize).max(1);
|
|
}
|
|
|
|
fn fetch_flow_scalar(&mut self, single: bool) -> ScanResult {
|
|
self.save_simple_key()?;
|
|
self.disallow_simple_key();
|
|
|
|
let tok = self.scan_flow_scalar(single)?;
|
|
|
|
// From spec: To ensure JSON compatibility, if a key inside a flow mapping is JSON-like,
|
|
// YAML allows the following value to be specified adjacent to the “:”.
|
|
self.adjacent_value_allowed_at = self.mark.index;
|
|
|
|
self.tokens.push_back(tok);
|
|
Ok(())
|
|
}
|
|
|
|
#[allow(clippy::too_many_lines)]
|
|
fn scan_flow_scalar(&mut self, single: bool) -> Result<Token, ScanError> {
|
|
let start_mark = self.mark;
|
|
|
|
let mut string = String::new();
|
|
let mut leading_break = String::new();
|
|
let mut trailing_breaks = String::new();
|
|
let mut whitespaces = String::new();
|
|
let mut leading_blanks;
|
|
|
|
/* Eat the left quote. */
|
|
self.skip();
|
|
|
|
loop {
|
|
/* Check for a document indicator. */
|
|
self.lookahead(4);
|
|
|
|
if self.mark.col == 0
|
|
&& (((self.buffer[0] == '-') && (self.buffer[1] == '-') && (self.buffer[2] == '-'))
|
|
|| ((self.buffer[0] == '.')
|
|
&& (self.buffer[1] == '.')
|
|
&& (self.buffer[2] == '.')))
|
|
&& is_blankz(self.buffer[3])
|
|
{
|
|
return Err(ScanError::new(
|
|
start_mark,
|
|
"while scanning a quoted scalar, found unexpected document indicator",
|
|
));
|
|
}
|
|
|
|
if is_z(self.ch()) {
|
|
return Err(ScanError::new(
|
|
start_mark,
|
|
"while scanning a quoted scalar, found unexpected end of stream",
|
|
));
|
|
}
|
|
|
|
self.lookahead(2);
|
|
|
|
leading_blanks = false;
|
|
|
|
// Consume non-blank characters.
|
|
while !is_blankz(self.ch()) {
|
|
match self.ch() {
|
|
// Check for an escaped single quote.
|
|
'\'' if self.buffer[1] == '\'' && single => {
|
|
string.push('\'');
|
|
self.skip();
|
|
self.skip();
|
|
}
|
|
// Check for the right quote.
|
|
'\'' if single => break,
|
|
'"' if !single => break,
|
|
// Check for an escaped line break.
|
|
'\\' if !single && is_break(self.buffer[1]) => {
|
|
self.lookahead(3);
|
|
self.skip();
|
|
self.skip_line();
|
|
leading_blanks = true;
|
|
break;
|
|
}
|
|
// Check for an escape sequence.
|
|
'\\' if !single => {
|
|
let mut code_length = 0usize;
|
|
match self.buffer[1] {
|
|
'0' => string.push('\0'),
|
|
'a' => string.push('\x07'),
|
|
'b' => string.push('\x08'),
|
|
't' | '\t' => string.push('\t'),
|
|
'n' => string.push('\n'),
|
|
'v' => string.push('\x0b'),
|
|
'f' => string.push('\x0c'),
|
|
'r' => string.push('\x0d'),
|
|
'e' => string.push('\x1b'),
|
|
' ' => string.push('\x20'),
|
|
'"' => string.push('"'),
|
|
'\'' => string.push('\''),
|
|
'\\' => string.push('\\'),
|
|
// NEL (#x85)
|
|
'N' => string.push(char::from_u32(0x85).unwrap()),
|
|
// #xA0
|
|
'_' => string.push(char::from_u32(0xA0).unwrap()),
|
|
// LS (#x2028)
|
|
'L' => string.push(char::from_u32(0x2028).unwrap()),
|
|
// PS (#x2029)
|
|
'P' => string.push(char::from_u32(0x2029).unwrap()),
|
|
'x' => code_length = 2,
|
|
'u' => code_length = 4,
|
|
'U' => code_length = 8,
|
|
_ => {
|
|
return Err(ScanError::new(
|
|
start_mark,
|
|
"while parsing a quoted scalar, found unknown escape character",
|
|
))
|
|
}
|
|
}
|
|
self.skip();
|
|
self.skip();
|
|
// Consume an arbitrary escape code.
|
|
if code_length > 0 {
|
|
self.lookahead(code_length);
|
|
let mut value = 0u32;
|
|
for i in 0..code_length {
|
|
if !is_hex(self.buffer[i]) {
|
|
return Err(ScanError::new(start_mark,
|
|
"while parsing a quoted scalar, did not find expected hexadecimal number"));
|
|
}
|
|
value = (value << 4) + as_hex(self.buffer[i]);
|
|
}
|
|
|
|
let Some(ch) = char::from_u32(value) else {
|
|
return Err(ScanError::new(start_mark, "while parsing a quoted scalar, found invalid Unicode character escape code"));
|
|
};
|
|
string.push(ch);
|
|
|
|
for _ in 0..code_length {
|
|
self.skip();
|
|
}
|
|
}
|
|
}
|
|
c => {
|
|
string.push(c);
|
|
self.skip();
|
|
}
|
|
}
|
|
self.lookahead(2);
|
|
}
|
|
self.lookahead(1);
|
|
match self.ch() {
|
|
'\'' if single => break,
|
|
'"' if !single => break,
|
|
_ => {}
|
|
}
|
|
|
|
// Consume blank characters.
|
|
while is_blank(self.ch()) || is_break(self.ch()) {
|
|
if is_blank(self.ch()) {
|
|
// Consume a space or a tab character.
|
|
if leading_blanks {
|
|
if self.ch() == '\t' && (self.mark.col as isize) < self.indent {
|
|
return Err(ScanError::new(
|
|
self.mark,
|
|
"tab cannot be used as indentation",
|
|
));
|
|
}
|
|
self.skip();
|
|
} else {
|
|
whitespaces.push(self.ch());
|
|
self.skip();
|
|
}
|
|
} else {
|
|
self.lookahead(2);
|
|
// Check if it is a first line break.
|
|
if leading_blanks {
|
|
self.read_break(&mut trailing_breaks);
|
|
} else {
|
|
whitespaces.clear();
|
|
self.read_break(&mut leading_break);
|
|
leading_blanks = true;
|
|
}
|
|
}
|
|
self.lookahead(1);
|
|
}
|
|
// Join the whitespaces or fold line breaks.
|
|
if leading_blanks {
|
|
if leading_break.is_empty() {
|
|
string.push_str(&leading_break);
|
|
string.push_str(&trailing_breaks);
|
|
trailing_breaks.clear();
|
|
leading_break.clear();
|
|
} else {
|
|
if trailing_breaks.is_empty() {
|
|
string.push(' ');
|
|
} else {
|
|
string.push_str(&trailing_breaks);
|
|
trailing_breaks.clear();
|
|
}
|
|
leading_break.clear();
|
|
}
|
|
} else {
|
|
string.push_str(&whitespaces);
|
|
whitespaces.clear();
|
|
}
|
|
} // loop
|
|
|
|
// Eat the right quote.
|
|
self.skip();
|
|
|
|
if single {
|
|
Ok(Token(
|
|
start_mark,
|
|
TokenType::Scalar(TScalarStyle::SingleQuoted, string),
|
|
))
|
|
} else {
|
|
Ok(Token(
|
|
start_mark,
|
|
TokenType::Scalar(TScalarStyle::DoubleQuoted, string),
|
|
))
|
|
}
|
|
}
|
|
|
|
fn fetch_plain_scalar(&mut self) -> ScanResult {
|
|
self.save_simple_key()?;
|
|
self.disallow_simple_key();
|
|
|
|
let tok = self.scan_plain_scalar()?;
|
|
|
|
self.tokens.push_back(tok);
|
|
Ok(())
|
|
}
|
|
|
|
fn scan_plain_scalar(&mut self) -> Result<Token, ScanError> {
|
|
self.unroll_non_block_indents();
|
|
let indent = self.indent + 1;
|
|
let start_mark = self.mark;
|
|
|
|
let mut string = String::new();
|
|
let mut leading_break = String::new();
|
|
let mut trailing_breaks = String::new();
|
|
let mut whitespaces = String::new();
|
|
let mut leading_blanks = true;
|
|
|
|
loop {
|
|
/* Check for a document indicator. */
|
|
self.lookahead(4);
|
|
|
|
if self.mark.col == 0
|
|
&& (((self.buffer[0] == '-') && (self.buffer[1] == '-') && (self.buffer[2] == '-'))
|
|
|| ((self.buffer[0] == '.')
|
|
&& (self.buffer[1] == '.')
|
|
&& (self.buffer[2] == '.')))
|
|
&& is_blankz(self.buffer[3])
|
|
{
|
|
break;
|
|
}
|
|
|
|
if self.ch() == '#' {
|
|
break;
|
|
}
|
|
while !is_blankz(self.ch()) {
|
|
// indicators can end a plain scalar, see 7.3.3. Plain Style
|
|
match self.ch() {
|
|
':' if is_blankz(self.buffer[1])
|
|
|| (self.flow_level > 0 && is_flow(self.buffer[1])) =>
|
|
{
|
|
break;
|
|
}
|
|
',' | '[' | ']' | '{' | '}' if self.flow_level > 0 => break,
|
|
_ => {}
|
|
}
|
|
|
|
if leading_blanks || !whitespaces.is_empty() {
|
|
if leading_blanks {
|
|
if leading_break.is_empty() {
|
|
string.push_str(&leading_break);
|
|
string.push_str(&trailing_breaks);
|
|
trailing_breaks.clear();
|
|
leading_break.clear();
|
|
} else {
|
|
if trailing_breaks.is_empty() {
|
|
string.push(' ');
|
|
} else {
|
|
string.push_str(&trailing_breaks);
|
|
trailing_breaks.clear();
|
|
}
|
|
leading_break.clear();
|
|
}
|
|
leading_blanks = false;
|
|
} else {
|
|
string.push_str(&whitespaces);
|
|
whitespaces.clear();
|
|
}
|
|
}
|
|
|
|
string.push(self.ch());
|
|
self.skip();
|
|
self.lookahead(2);
|
|
}
|
|
// is the end?
|
|
if !(is_blank(self.ch()) || is_break(self.ch())) {
|
|
break;
|
|
}
|
|
|
|
while is_blank(self.look_ch()) || is_break(self.ch()) {
|
|
if is_blank(self.ch()) {
|
|
if leading_blanks && (self.mark.col as isize) < indent && self.ch() == '\t' {
|
|
// If our line contains only whitespace, this is not an error.
|
|
// Skip over it.
|
|
self.skip_ws_to_eol(SkipTabs::Yes);
|
|
if is_breakz(self.ch()) {
|
|
continue;
|
|
}
|
|
return Err(ScanError::new(
|
|
start_mark,
|
|
"while scanning a plain scalar, found a tab",
|
|
));
|
|
}
|
|
|
|
if !leading_blanks {
|
|
whitespaces.push(self.ch());
|
|
}
|
|
self.skip();
|
|
} else {
|
|
self.lookahead(2);
|
|
// Check if it is a first line break
|
|
if leading_blanks {
|
|
self.read_break(&mut trailing_breaks);
|
|
} else {
|
|
whitespaces.clear();
|
|
self.read_break(&mut leading_break);
|
|
leading_blanks = true;
|
|
}
|
|
}
|
|
}
|
|
|
|
// check indentation level
|
|
if self.flow_level == 0 && (self.mark.col as isize) < indent {
|
|
break;
|
|
}
|
|
}
|
|
|
|
if leading_blanks {
|
|
self.allow_simple_key();
|
|
}
|
|
|
|
Ok(Token(
|
|
start_mark,
|
|
TokenType::Scalar(TScalarStyle::Plain, string),
|
|
))
|
|
}
|
|
|
|
fn fetch_key(&mut self) -> ScanResult {
|
|
let start_mark = self.mark;
|
|
if self.flow_level == 0 {
|
|
// Check if we are allowed to start a new key (not necessarily simple).
|
|
if !self.simple_key_allowed {
|
|
return Err(ScanError::new(
|
|
self.mark,
|
|
"mapping keys are not allowed in this context",
|
|
));
|
|
}
|
|
self.roll_indent(
|
|
start_mark.col,
|
|
None,
|
|
TokenType::BlockMappingStart,
|
|
start_mark,
|
|
);
|
|
} else {
|
|
// The parser, upon receiving a `Key`, will insert a `MappingStart` event.
|
|
self.flow_mapping_started = true;
|
|
}
|
|
|
|
self.remove_simple_key()?;
|
|
|
|
if self.flow_level == 0 {
|
|
self.allow_simple_key();
|
|
} else {
|
|
self.disallow_simple_key();
|
|
}
|
|
|
|
self.skip();
|
|
self.skip_yaml_whitespace()?;
|
|
if self.ch() == '\t' {
|
|
return Err(ScanError::new(
|
|
self.mark(),
|
|
"tabs disallowed in this context",
|
|
));
|
|
}
|
|
self.tokens.push_back(Token(start_mark, TokenType::Key));
|
|
Ok(())
|
|
}
|
|
|
|
/// Fetch a value from a mapping (after a `:`).
|
|
fn fetch_value(&mut self) -> ScanResult {
|
|
let sk = self.simple_keys.last().unwrap().clone();
|
|
let start_mark = self.mark;
|
|
self.implicit_flow_mapping = self.flow_level > 0 && !self.flow_mapping_started;
|
|
|
|
// Skip over ':'.
|
|
self.skip();
|
|
if self.look_ch() == '\t'
|
|
&& !self.skip_ws_to_eol(SkipTabs::Yes).has_valid_yaml_ws()
|
|
&& (self.ch() == '-' || is_alpha(self.ch()))
|
|
{
|
|
return Err(ScanError::new(
|
|
self.mark,
|
|
"':' must be followed by a valid YAML whitespace",
|
|
));
|
|
}
|
|
|
|
if sk.possible {
|
|
// insert simple key
|
|
let tok = Token(sk.mark, TokenType::Key);
|
|
let tokens_parsed = self.tokens_parsed;
|
|
self.insert_token(sk.token_number - tokens_parsed, tok);
|
|
if self.implicit_flow_mapping {
|
|
self.insert_token(
|
|
sk.token_number - tokens_parsed,
|
|
Token(self.mark, TokenType::FlowMappingStart),
|
|
);
|
|
}
|
|
|
|
// Add the BLOCK-MAPPING-START token if needed.
|
|
self.roll_indent(
|
|
sk.mark.col,
|
|
Some(sk.token_number),
|
|
TokenType::BlockMappingStart,
|
|
start_mark,
|
|
);
|
|
self.roll_one_col_indent();
|
|
|
|
self.simple_keys.last_mut().unwrap().possible = false;
|
|
self.disallow_simple_key();
|
|
} else {
|
|
if self.implicit_flow_mapping {
|
|
self.tokens
|
|
.push_back(Token(self.mark, TokenType::FlowMappingStart));
|
|
}
|
|
// The ':' indicator follows a complex key.
|
|
if self.flow_level == 0 {
|
|
if !self.simple_key_allowed {
|
|
return Err(ScanError::new(
|
|
start_mark,
|
|
"mapping values are not allowed in this context",
|
|
));
|
|
}
|
|
|
|
self.roll_indent(
|
|
start_mark.col,
|
|
None,
|
|
TokenType::BlockMappingStart,
|
|
start_mark,
|
|
);
|
|
}
|
|
self.roll_one_col_indent();
|
|
|
|
if self.flow_level == 0 {
|
|
self.allow_simple_key();
|
|
} else {
|
|
self.disallow_simple_key();
|
|
}
|
|
}
|
|
self.tokens.push_back(Token(start_mark, TokenType::Value));
|
|
|
|
Ok(())
|
|
}
|
|
|
|
/// Add an indentation level to the stack with the given block token, if needed.
|
|
///
|
|
/// An indentation level is added only if:
|
|
/// - We are not in a flow-style construct (which don't have indentation per-se).
|
|
/// - The current column is further indented than the last indent we have registered.
|
|
fn roll_indent(&mut self, col: usize, number: Option<usize>, tok: TokenType, mark: Marker) {
|
|
if self.flow_level > 0 {
|
|
return;
|
|
}
|
|
|
|
// If the last indent was a non-block indent, remove it.
|
|
// This means that we prepared an indent that we thought we wouldn't use, but realized just
|
|
// now that it is a block indent.
|
|
if self.indent <= col as isize {
|
|
if let Some(indent) = self.indents.last() {
|
|
if !indent.needs_block_end {
|
|
self.indent = indent.indent;
|
|
self.indents.pop();
|
|
}
|
|
}
|
|
}
|
|
|
|
if self.indent < col as isize {
|
|
self.indents.push(Indent {
|
|
indent: self.indent,
|
|
needs_block_end: true,
|
|
});
|
|
self.indent = col as isize;
|
|
let tokens_parsed = self.tokens_parsed;
|
|
match number {
|
|
Some(n) => self.insert_token(n - tokens_parsed, Token(mark, tok)),
|
|
None => self.tokens.push_back(Token(mark, tok)),
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Pop indentation levels from the stack as much as needed.
|
|
///
|
|
/// Indentation levels are popped from the stack while they are further indented than `col`.
|
|
/// If we are in a flow-style construct (which don't have indentation per-se), this function
|
|
/// does nothing.
|
|
fn unroll_indent(&mut self, col: isize) {
|
|
if self.flow_level > 0 {
|
|
return;
|
|
}
|
|
while self.indent > col {
|
|
let indent = self.indents.pop().unwrap();
|
|
self.indent = indent.indent;
|
|
if indent.needs_block_end {
|
|
self.tokens.push_back(Token(self.mark, TokenType::BlockEnd));
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Add an indentation level of 1 column that does not start a block.
|
|
///
|
|
/// See the documentation of [`Indent::needs_block_end`] for more details.
|
|
fn roll_one_col_indent(&mut self) {
|
|
if self.flow_level == 0 {
|
|
self.indents.push(Indent {
|
|
indent: self.indent,
|
|
needs_block_end: false,
|
|
});
|
|
self.indent += 1;
|
|
}
|
|
}
|
|
|
|
/// Unroll all last indents created with [`Self::roll_one_col_indent`].
|
|
fn unroll_non_block_indents(&mut self) {
|
|
while let Some(indent) = self.indents.last() {
|
|
if indent.needs_block_end {
|
|
break;
|
|
} else {
|
|
self.indent = indent.indent;
|
|
self.indents.pop();
|
|
}
|
|
}
|
|
}
|
|
|
|
fn save_simple_key(&mut self) -> ScanResult {
|
|
if self.simple_key_allowed {
|
|
let required = self.flow_level > 0
|
|
&& self.indent == (self.mark.col as isize)
|
|
&& self.indents.last().unwrap().needs_block_end;
|
|
let mut sk = SimpleKey::new(self.mark);
|
|
sk.possible = true;
|
|
sk.required = required;
|
|
sk.token_number = self.tokens_parsed + self.tokens.len();
|
|
|
|
self.remove_simple_key()?;
|
|
|
|
self.simple_keys.pop();
|
|
self.simple_keys.push(sk);
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
fn remove_simple_key(&mut self) -> ScanResult {
|
|
let last = self.simple_keys.last_mut().unwrap();
|
|
if last.possible && last.required {
|
|
return Err(ScanError::new(self.mark, "simple key expected"));
|
|
}
|
|
|
|
last.possible = false;
|
|
Ok(())
|
|
}
|
|
|
|
/// Return whether the scanner is inside a block but outside of a flow sequence.
|
|
fn is_within_block(&self) -> bool {
|
|
!self.indents.is_empty()
|
|
}
|
|
|
|
/// If an implicit mapping had started, end it.
|
|
fn end_implicit_mapping(&mut self, mark: Marker) {
|
|
if self.implicit_flow_mapping {
|
|
self.implicit_flow_mapping = false;
|
|
self.flow_mapping_started = false;
|
|
self.tokens
|
|
.push_back(Token(mark, TokenType::FlowMappingEnd));
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Behavior to adopt regarding treating tabs as whitespace.
|
|
///
|
|
/// Although tab is a valid yaml whitespace, it doesn't always behave the same as a space.
|
|
#[derive(Copy, Clone, Eq, PartialEq)]
|
|
enum SkipTabs {
|
|
/// Skip all tabs as whitespace.
|
|
Yes,
|
|
/// Don't skip any tab. Return from the function when encountering one.
|
|
No,
|
|
/// Return value from the function.
|
|
Result(
|
|
/// Whether tabs were encountered.
|
|
bool,
|
|
/// Whether at least 1 valid yaml whitespace has been encountered.
|
|
bool,
|
|
),
|
|
}
|
|
|
|
impl SkipTabs {
|
|
/// Whether tabs were found while skipping whitespace.
|
|
///
|
|
/// This function must be called after a call to `skip_ws_to_eol`.
|
|
fn found_tabs(self) -> bool {
|
|
matches!(self, SkipTabs::Result(true, _))
|
|
}
|
|
|
|
/// Whether a valid YAML whitespace has been found in skipped-over content.
|
|
///
|
|
/// This function must be called after a call to `skip_ws_to_eol`.
|
|
fn has_valid_yaml_ws(self) -> bool {
|
|
matches!(self, SkipTabs::Result(_, true))
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod test {
|
|
#[test]
|
|
fn test_is_anchor_char() {
|
|
use super::is_anchor_char;
|
|
assert!(is_anchor_char('x'));
|
|
}
|
|
}
|