Fix indent when -
& entry have \n
in-between.
This commit is contained in:
parent
5a8c5a3d44
commit
cd2c34cabe
2 changed files with 123 additions and 31 deletions
|
@ -181,6 +181,31 @@ impl SimpleKey {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// An indentation level on the stack of indentations.
|
||||||
|
#[derive(Clone, Debug, Default)]
|
||||||
|
struct Indent {
|
||||||
|
/// The former indentation level.
|
||||||
|
indent: isize,
|
||||||
|
/// Whether, upon closing, this indents generates a `BlockEnd` token.
|
||||||
|
///
|
||||||
|
/// There are levels of indentation which do not start a block. Examples of this would be:
|
||||||
|
/// ```yaml
|
||||||
|
/// -
|
||||||
|
/// foo # ok
|
||||||
|
/// -
|
||||||
|
/// bar # ko, bar needs to be indented further than the `-`.
|
||||||
|
/// - [
|
||||||
|
/// baz, # ok
|
||||||
|
/// quux # ko, quux needs to be indented further than the '-'.
|
||||||
|
/// ] # ko, the closing bracket needs to be indented further than the `-`.
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// The indentation level created by the `-` is for a single entry in the sequence. Emitting a
|
||||||
|
/// `BlockEnd` when this indentation block ends would generate one `BlockEnd` per entry in the
|
||||||
|
/// sequence, although we must have exactly one to end the sequence.
|
||||||
|
needs_block_end: bool,
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
#[allow(clippy::struct_excessive_bools)]
|
#[allow(clippy::struct_excessive_bools)]
|
||||||
pub struct Scanner<T> {
|
pub struct Scanner<T> {
|
||||||
|
@ -190,7 +215,9 @@ pub struct Scanner<T> {
|
||||||
buffer: VecDeque<char>,
|
buffer: VecDeque<char>,
|
||||||
error: Option<ScanError>,
|
error: Option<ScanError>,
|
||||||
|
|
||||||
|
/// Whether we have already emitted the `StreamStart` token.
|
||||||
stream_start_produced: bool,
|
stream_start_produced: bool,
|
||||||
|
/// Whether we have already emitted the `StreamEnd` token.
|
||||||
stream_end_produced: bool,
|
stream_end_produced: bool,
|
||||||
adjacent_value_allowed_at: usize,
|
adjacent_value_allowed_at: usize,
|
||||||
/// Whether a simple key could potentially start at the current position.
|
/// Whether a simple key could potentially start at the current position.
|
||||||
|
@ -198,8 +225,11 @@ pub struct Scanner<T> {
|
||||||
/// Simple keys are the opposite of complex keys which are keys starting with `?`.
|
/// Simple keys are the opposite of complex keys which are keys starting with `?`.
|
||||||
simple_key_allowed: bool,
|
simple_key_allowed: bool,
|
||||||
simple_keys: Vec<SimpleKey>,
|
simple_keys: Vec<SimpleKey>,
|
||||||
|
/// The current indentation level.
|
||||||
indent: isize,
|
indent: isize,
|
||||||
indents: Vec<isize>,
|
/// List of all block indentation levels we are in (except the current one).
|
||||||
|
indents: Vec<Indent>,
|
||||||
|
/// Level of nesting of flow sequences.
|
||||||
flow_level: u8,
|
flow_level: u8,
|
||||||
tokens_parsed: usize,
|
tokens_parsed: usize,
|
||||||
token_available: bool,
|
token_available: bool,
|
||||||
|
@ -247,7 +277,9 @@ fn is_blank(c: char) -> bool {
|
||||||
c == ' ' || c == '\t'
|
c == ' ' || c == '\t'
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Check whether the character is nil or a whitespace (`\0`, ` `, `\t`).
|
/// Check whether the character is nil, a linebreak or a whitespace.
|
||||||
|
///
|
||||||
|
/// `\0`, ` `, `\t`, `\n`, `\r`
|
||||||
#[inline]
|
#[inline]
|
||||||
fn is_blankz(c: char) -> bool {
|
fn is_blankz(c: char) -> bool {
|
||||||
is_blank(c) || is_breakz(c)
|
is_blank(c) || is_breakz(c)
|
||||||
|
@ -454,13 +486,14 @@ impl<T: Iterator<Item = char>> Scanner<T> {
|
||||||
|
|
||||||
pub fn fetch_next_token(&mut self) -> ScanResult {
|
pub fn fetch_next_token(&mut self) -> ScanResult {
|
||||||
self.lookahead(1);
|
self.lookahead(1);
|
||||||
// println!("--> fetch_next_token Cur {:?} {:?}", self.mark, self.ch());
|
// eprintln!("--> fetch_next_token Cur {:?} {:?}", self.mark, self.ch());
|
||||||
|
|
||||||
if !self.stream_start_produced {
|
if !self.stream_start_produced {
|
||||||
self.fetch_stream_start();
|
self.fetch_stream_start();
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
self.skip_to_next_token()?;
|
self.skip_to_next_token()?;
|
||||||
|
// eprintln!("--> fetch_next_token wo ws {:?} {:?}", self.mark, self.ch());
|
||||||
|
|
||||||
self.stale_simple_keys()?;
|
self.stale_simple_keys()?;
|
||||||
|
|
||||||
|
@ -607,17 +640,23 @@ impl<T: Iterator<Item = char>> Scanner<T> {
|
||||||
' ' => self.skip(),
|
' ' => self.skip(),
|
||||||
// Tabs may not be used as indentation.
|
// Tabs may not be used as indentation.
|
||||||
// "Indentation" only exists as long as a block is started, but does not exist
|
// "Indentation" only exists as long as a block is started, but does not exist
|
||||||
// inside of flow-style constructs. Tabs are allowed as part of leaading
|
// inside of flow-style constructs. Tabs are allowed as part of leading
|
||||||
// whitespaces outside of indentation.
|
// whitespaces outside of indentation.
|
||||||
|
// If a flow-style construct is in an indented block, its contents must still be
|
||||||
|
// indented. Also, tabs are allowed anywhere in it if it has no content.
|
||||||
'\t' if self.is_within_block()
|
'\t' if self.is_within_block()
|
||||||
&& self.leading_whitespace
|
&& self.leading_whitespace
|
||||||
&& (self.mark.col as isize) < self.indent =>
|
&& (self.mark.col as isize) < self.indent =>
|
||||||
{
|
{
|
||||||
|
self.skip_ws_to_eol(true);
|
||||||
|
// If we have content on that line with a tab, return an error.
|
||||||
|
if !is_breakz(self.ch()) {
|
||||||
return Err(ScanError::new(
|
return Err(ScanError::new(
|
||||||
self.mark,
|
self.mark,
|
||||||
"tabs disallowed within this context (block indentation)",
|
"tabs disallowed within this context (block indentation)",
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
}
|
||||||
'\t' => self.skip(),
|
'\t' => self.skip(),
|
||||||
'\n' | '\r' => {
|
'\n' | '\r' => {
|
||||||
self.lookahead(2);
|
self.lookahead(2);
|
||||||
|
@ -682,6 +721,23 @@ impl<T: Iterator<Item = char>> Scanner<T> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Skip yaml whitespace at most up to eol. Also skips comments.
|
||||||
|
fn skip_ws_to_eol(&mut self, skip_tab: bool) {
|
||||||
|
loop {
|
||||||
|
match self.look_ch() {
|
||||||
|
' ' => self.skip(),
|
||||||
|
'\t' if skip_tab => self.skip(),
|
||||||
|
'#' => {
|
||||||
|
while !is_breakz(self.ch()) {
|
||||||
|
self.skip();
|
||||||
|
self.lookahead(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => break,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn fetch_stream_start(&mut self) {
|
fn fetch_stream_start(&mut self) {
|
||||||
let mark = self.mark;
|
let mark = self.mark;
|
||||||
self.indent = -1;
|
self.indent = -1;
|
||||||
|
@ -1153,6 +1209,7 @@ impl<T: Iterator<Item = char>> Scanner<T> {
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Push the `FlowEntry` token and skip over the `,`.
|
||||||
fn fetch_flow_entry(&mut self) -> ScanResult {
|
fn fetch_flow_entry(&mut self) -> ScanResult {
|
||||||
self.remove_simple_key()?;
|
self.remove_simple_key()?;
|
||||||
self.allow_simple_key();
|
self.allow_simple_key();
|
||||||
|
@ -1173,6 +1230,7 @@ impl<T: Iterator<Item = char>> Scanner<T> {
|
||||||
.ok_or_else(|| ScanError::new(self.mark, "recursion limit exceeded"))?;
|
.ok_or_else(|| ScanError::new(self.mark, "recursion limit exceeded"))?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn decrease_flow_level(&mut self) {
|
fn decrease_flow_level(&mut self) {
|
||||||
if self.flow_level > 0 {
|
if self.flow_level > 0 {
|
||||||
self.flow_level -= 1;
|
self.flow_level -= 1;
|
||||||
|
@ -1180,8 +1238,19 @@ impl<T: Iterator<Item = char>> Scanner<T> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Push the `Block*` token(s) and skip over the `-`.
|
||||||
|
///
|
||||||
|
/// Add an indentation level and push a `BlockSequenceStart` token if needed, then push a
|
||||||
|
/// `BlockEntry` token.
|
||||||
|
/// This function only skips over the `-` and does not fetch the entry value.
|
||||||
fn fetch_block_entry(&mut self) -> ScanResult {
|
fn fetch_block_entry(&mut self) -> ScanResult {
|
||||||
if self.flow_level == 0 {
|
if self.flow_level > 0 {
|
||||||
|
// - * only allowed in block
|
||||||
|
return Err(ScanError::new(
|
||||||
|
self.mark,
|
||||||
|
r#""-" is only valid inside a block"#,
|
||||||
|
));
|
||||||
|
}
|
||||||
// Check if we are allowed to start a new entry.
|
// Check if we are allowed to start a new entry.
|
||||||
if !self.simple_key_allowed {
|
if !self.simple_key_allowed {
|
||||||
return Err(ScanError::new(
|
return Err(ScanError::new(
|
||||||
|
@ -1190,24 +1259,27 @@ impl<T: Iterator<Item = char>> Scanner<T> {
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Skip over the `-`.
|
||||||
let mark = self.mark;
|
let mark = self.mark;
|
||||||
|
self.skip();
|
||||||
|
|
||||||
// generate BLOCK-SEQUENCE-START if indented
|
// generate BLOCK-SEQUENCE-START if indented
|
||||||
self.roll_indent(mark.col, None, TokenType::BlockSequenceStart, mark);
|
self.roll_indent(mark.col, None, TokenType::BlockSequenceStart, mark);
|
||||||
} else {
|
self.skip_ws_to_eol(false);
|
||||||
// - * only allowed in block
|
if is_break(self.look_ch()) || is_flow(self.ch()) {
|
||||||
return Err(ScanError::new(
|
self.indents.push(Indent {
|
||||||
self.mark,
|
indent: self.indent,
|
||||||
r#""-" is only valid inside a block"#,
|
needs_block_end: false,
|
||||||
));
|
});
|
||||||
|
self.indent += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
self.remove_simple_key()?;
|
self.remove_simple_key()?;
|
||||||
self.allow_simple_key();
|
self.allow_simple_key();
|
||||||
|
|
||||||
let start_mark = self.mark;
|
|
||||||
self.skip();
|
|
||||||
|
|
||||||
self.tokens
|
self.tokens
|
||||||
.push_back(Token(start_mark, TokenType::BlockEntry));
|
.push_back(Token(self.mark, TokenType::BlockEntry));
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1809,6 +1881,7 @@ impl<T: Iterator<Item = char>> Scanner<T> {
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Fetch a value from a mapping (after a `:`).
|
||||||
fn fetch_value(&mut self) -> ScanResult {
|
fn fetch_value(&mut self) -> ScanResult {
|
||||||
let sk = self.simple_keys.last().unwrap().clone();
|
let sk = self.simple_keys.last().unwrap().clone();
|
||||||
let start_mark = self.mark;
|
let start_mark = self.mark;
|
||||||
|
@ -1868,8 +1941,23 @@ impl<T: Iterator<Item = char>> Scanner<T> {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If the last indent was a non-block indent, remove it.
|
||||||
|
// This means that we prepared an indent that we thought we wouldn't use, but realized just
|
||||||
|
// now that it is a block indent.
|
||||||
|
if self.indent == col as isize {
|
||||||
|
if let Some(indent) = self.indents.last() {
|
||||||
|
if !indent.needs_block_end {
|
||||||
|
self.indent = indent.indent;
|
||||||
|
self.indents.pop();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if self.indent < col as isize {
|
if self.indent < col as isize {
|
||||||
self.indents.push(self.indent);
|
self.indents.push(Indent {
|
||||||
|
indent: self.indent,
|
||||||
|
needs_block_end: true,
|
||||||
|
});
|
||||||
self.indent = col as isize;
|
self.indent = col as isize;
|
||||||
let tokens_parsed = self.tokens_parsed;
|
let tokens_parsed = self.tokens_parsed;
|
||||||
match number {
|
match number {
|
||||||
|
@ -1889,14 +1977,19 @@ impl<T: Iterator<Item = char>> Scanner<T> {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
while self.indent > col {
|
while self.indent > col {
|
||||||
|
let indent = self.indents.pop().unwrap();
|
||||||
|
self.indent = indent.indent;
|
||||||
|
if indent.needs_block_end {
|
||||||
self.tokens.push_back(Token(self.mark, TokenType::BlockEnd));
|
self.tokens.push_back(Token(self.mark, TokenType::BlockEnd));
|
||||||
self.indent = self.indents.pop().unwrap();
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn save_simple_key(&mut self) -> ScanResult {
|
fn save_simple_key(&mut self) -> ScanResult {
|
||||||
let required = self.flow_level > 0 && self.indent == (self.mark.col as isize);
|
|
||||||
if self.simple_key_allowed {
|
if self.simple_key_allowed {
|
||||||
|
let required = self.flow_level > 0
|
||||||
|
&& self.indent == (self.mark.col as isize)
|
||||||
|
&& self.indents.last().unwrap().needs_block_end;
|
||||||
let mut sk = SimpleKey::new(self.mark);
|
let mut sk = SimpleKey::new(self.mark);
|
||||||
sk.possible = true;
|
sk.possible = true;
|
||||||
sk.required = required;
|
sk.required = required;
|
||||||
|
@ -1922,6 +2015,6 @@ impl<T: Iterator<Item = char>> Scanner<T> {
|
||||||
|
|
||||||
/// Return whether the scanner is inside a block but outside of a flow sequence.
|
/// Return whether the scanner is inside a block but outside of a flow sequence.
|
||||||
fn is_within_block(&self) -> bool {
|
fn is_within_block(&self) -> bool {
|
||||||
!self.indents.is_empty() && self.flow_level == 0
|
!self.indents.is_empty()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -299,7 +299,6 @@ fn expected_events(expected_tree: &str) -> Vec<String> {
|
||||||
static EXPECTED_FAILURES: &[&str] = &[
|
static EXPECTED_FAILURES: &[&str] = &[
|
||||||
// These seem to be plain bugs
|
// These seem to be plain bugs
|
||||||
// TAB as start of plain scalar instead of whitespace
|
// TAB as start of plain scalar instead of whitespace
|
||||||
"Y79Y-03", // unexpected pass
|
|
||||||
"Y79Y-04", // unexpected pass
|
"Y79Y-04", // unexpected pass
|
||||||
"Y79Y-05", // unexpected pass
|
"Y79Y-05", // unexpected pass
|
||||||
// TABs in whitespace-only lines
|
// TABs in whitespace-only lines
|
||||||
|
|
Loading…
Reference in a new issue