Avoid a trip to self.buffer
.
`self.buffer` is a `VecDeque<char>`, meaning that characters are stored on 4B. When reading as we used to do, this means that every 1 byte character we read was turned into 4 bytes, which was turned into 1 byte in `String::extend`. Instead of going through `self.buffer`, use a local `String` to store the characters before pushing them to `string`.
This commit is contained in:
parent
2471c5793a
commit
28893c4567
1 changed files with 59 additions and 35 deletions
|
@ -503,15 +503,13 @@ impl<T: Iterator<Item = char>> Scanner<T> {
|
||||||
self.ch()
|
self.ch()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Read a character from the input stream, place it in the buffer and return it.
|
/// Read a character from the input stream, returning it directly.
|
||||||
///
|
///
|
||||||
/// No character is consumed. The character returned is the one at the back of the buffer (the
|
/// The buffer is bypassed and `self.mark` would need to be updated manually.
|
||||||
/// one we just read from the input stream).
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn read_ch(&mut self) -> char {
|
#[must_use]
|
||||||
let c = self.rdr.next().unwrap_or('\0');
|
fn raw_read_ch(&mut self) -> char {
|
||||||
self.buffer.push_back(c);
|
self.rdr.next().unwrap_or('\0')
|
||||||
c
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return whether the next character is `c`.
|
/// Return whether the next character is `c`.
|
||||||
|
@ -1614,6 +1612,7 @@ impl<T: Iterator<Item = char>> Scanner<T> {
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let mut line_buffer = String::with_capacity(100);
|
||||||
let start_mark = self.mark;
|
let start_mark = self.mark;
|
||||||
while self.mark.col == indent && !is_z(self.ch()) {
|
while self.mark.col == indent && !is_z(self.ch()) {
|
||||||
if indent == 0 {
|
if indent == 0 {
|
||||||
|
@ -1640,34 +1639,7 @@ impl<T: Iterator<Item = char>> Scanner<T> {
|
||||||
|
|
||||||
leading_blank = is_blank(self.ch());
|
leading_blank = is_blank(self.ch());
|
||||||
|
|
||||||
// Start by evaluating characters in the buffer.
|
self.scan_block_scalar_content_line(&mut string, &mut line_buffer);
|
||||||
while !self.buffer.is_empty() && !is_breakz(self.ch()) {
|
|
||||||
string.push(self.ch());
|
|
||||||
// We may technically skip non-blank characters. However, the only distinction is
|
|
||||||
// to determine what is leading whitespace and what is not. Here, we read the
|
|
||||||
// contents of the line until either eof or a linebreak. We know we will not read
|
|
||||||
// `self.leading_whitespace` until the end of the line, where it will be reset.
|
|
||||||
// This allows us to call a slightly less expensive function.
|
|
||||||
self.skip_blank();
|
|
||||||
}
|
|
||||||
|
|
||||||
// All characters that were in the buffer were consumed. We need to check if more
|
|
||||||
// follow.
|
|
||||||
if self.buffer.is_empty() {
|
|
||||||
// We will read all consecutive non-breakz characters into `self.buffer` before
|
|
||||||
// pushing them all in `string` instead of moving them one by one.
|
|
||||||
while !is_breakz(self.read_ch()) {}
|
|
||||||
// The last character from the buffer is a breakz. We must not insert it.
|
|
||||||
let last_char = self.buffer.pop_back().unwrap();
|
|
||||||
// We need to manually update our position; we won't call a `skip` function.
|
|
||||||
self.mark.col += self.buffer.len();
|
|
||||||
self.mark.index += self.buffer.len();
|
|
||||||
string.reserve(self.buffer.len());
|
|
||||||
string.extend(self.buffer.iter());
|
|
||||||
// Put back our breakz character, we didn't consume this one.
|
|
||||||
self.buffer.clear();
|
|
||||||
self.buffer.push_back(last_char);
|
|
||||||
}
|
|
||||||
|
|
||||||
// break on EOF
|
// break on EOF
|
||||||
if is_z(self.ch()) {
|
if is_z(self.ch()) {
|
||||||
|
@ -1699,6 +1671,58 @@ impl<T: Iterator<Item = char>> Scanner<T> {
|
||||||
Ok(Token(start_mark, TokenType::Scalar(style, string)))
|
Ok(Token(start_mark, TokenType::Scalar(style, string)))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Retrieve the contents of the line, parsing it as a block scalar.
|
||||||
|
///
|
||||||
|
/// The contents will be appended to `string`. `line_buffer` is used as a temporary buffer to
|
||||||
|
/// store bytes before pushing them to `string` and thus avoiding reallocating more than
|
||||||
|
/// necessary. `line_buffer` is assumed to be empty upon calling this function. It will be
|
||||||
|
/// `clear`ed before the end of the function.
|
||||||
|
///
|
||||||
|
/// This function assumed the first character to read is the first content character in the
|
||||||
|
/// line. This function does not consume the line break character(s) after the line.
|
||||||
|
fn scan_block_scalar_content_line(&mut self, string: &mut String, line_buffer: &mut String) {
|
||||||
|
// Start by evaluating characters in the buffer.
|
||||||
|
while !self.buffer.is_empty() && !is_breakz(self.ch()) {
|
||||||
|
string.push(self.ch());
|
||||||
|
// We may technically skip non-blank characters. However, the only distinction is
|
||||||
|
// to determine what is leading whitespace and what is not. Here, we read the
|
||||||
|
// contents of the line until either eof or a linebreak. We know we will not read
|
||||||
|
// `self.leading_whitespace` until the end of the line, where it will be reset.
|
||||||
|
// This allows us to call a slightly less expensive function.
|
||||||
|
self.skip_blank();
|
||||||
|
}
|
||||||
|
|
||||||
|
// All characters that were in the buffer were consumed. We need to check if more
|
||||||
|
// follow.
|
||||||
|
if self.buffer.is_empty() {
|
||||||
|
// We will read all consecutive non-breakz characters. We push them into a
|
||||||
|
// temporary buffer. The main difference with going through `self.buffer` is that
|
||||||
|
// characters are appended here as their real size (1B for ascii, or up to 4 bytes for
|
||||||
|
// UTF-8). We can then use the internal `line_buffer` `Vec` to push data into `string`
|
||||||
|
// (using `String::push_str`).
|
||||||
|
let mut c = self.raw_read_ch();
|
||||||
|
while !is_breakz(c) {
|
||||||
|
line_buffer.push(c);
|
||||||
|
c = self.raw_read_ch();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Our last character read is stored in `c`. It is either an EOF or a break. In any
|
||||||
|
// case, we need to push it back into `self.buffer` so it may be properly read
|
||||||
|
// after. We must not insert it in `string`.
|
||||||
|
self.buffer.push_back(c);
|
||||||
|
|
||||||
|
// We need to manually update our position; we haven't called a `skip` function.
|
||||||
|
self.mark.col += line_buffer.len();
|
||||||
|
self.mark.index += line_buffer.len();
|
||||||
|
|
||||||
|
// We can now append our bytes to our `string`.
|
||||||
|
string.reserve(line_buffer.as_bytes().len());
|
||||||
|
string.push_str(line_buffer);
|
||||||
|
// This clears the _contents_ without touching the _capacity_.
|
||||||
|
line_buffer.clear();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Skip the block scalar indentation and empty lines.
|
/// Skip the block scalar indentation and empty lines.
|
||||||
fn skip_block_scalar_indent(&mut self, indent: usize, breaks: &mut String) {
|
fn skip_block_scalar_indent(&mut self, indent: usize, breaks: &mut String) {
|
||||||
loop {
|
loop {
|
||||||
|
|
Loading…
Reference in a new issue