Switch from encoding
to encoding_rs
.
See https://github.com/rustsec/advisory-db/issues/1605.
This commit is contained in:
parent
c5c9f4af16
commit
ee92e953f1
3 changed files with 155 additions and 19 deletions
|
@ -1,5 +1,27 @@
|
|||
# Changelog
|
||||
|
||||
## Upcoming
|
||||
### Breaking changes
|
||||
- The `encoding` library has been replaced with `encoding_rs`. If you use the
|
||||
`trap` of `YamlDecoder`, this change will make your code not compile.
|
||||
An additional enum `YamlDecoderTrap` has been added to abstract the
|
||||
underlying library and avoid breaking changes in the future. This
|
||||
additionally lifts the `encoding` dependency on _your_ project if you were
|
||||
using that feature.
|
||||
- The `encoding::types::DecoderTrap` has been replaced with `YamlDecoderTrap`.
|
||||
- The signature of the function for `YamlDecoderTrap::Call` has changed:
|
||||
```rs
|
||||
// Before, with `encoding::types::DecoderTrap::Call`
|
||||
fn(_: &mut encoding::RawDecoder, _: &[u8], _: &mut encoding::StringWriter) -> bool;
|
||||
// Now, with `YamlDecoderTrap::Call`
|
||||
fn(_: u8, _: u8, _: &[u8], _: &mut String) -> ControlFlow<Cow<'static str>>;
|
||||
```
|
||||
Please refer to the `YamlDecoderTrapFn` documentation for more details.
|
||||
|
||||
**Features**:
|
||||
|
||||
**Development**:
|
||||
|
||||
## v0.7.0
|
||||
|
||||
**Features**:
|
||||
|
|
|
@ -15,7 +15,7 @@ edition = "2021"
|
|||
|
||||
[dependencies]
|
||||
arraydeque = "0.5.1"
|
||||
encoding = "0.2"
|
||||
encoding_rs = "0.8.33"
|
||||
hashlink = "0.8"
|
||||
|
||||
[dev-dependencies]
|
||||
|
|
|
@ -2,8 +2,11 @@
|
|||
|
||||
#![allow(clippy::module_name_repetitions)]
|
||||
|
||||
use std::borrow::Cow;
|
||||
use std::ops::ControlFlow;
|
||||
use std::{collections::BTreeMap, convert::TryFrom, mem, ops::Index};
|
||||
|
||||
use encoding_rs::{Decoder, DecoderResult, Encoding};
|
||||
use hashlink::LinkedHashMap;
|
||||
|
||||
use crate::parser::{Event, MarkedEventReceiver, Parser, Tag};
|
||||
|
@ -238,11 +241,51 @@ impl YamlLoader {
|
|||
}
|
||||
}
|
||||
|
||||
/// The signature of the function to call when using [`YAMLDecodingTrap::Call`].
|
||||
///
|
||||
/// The arguments are as follows:
|
||||
/// * `malformation_length`: The length of the sequence the decoder failed to decode.
|
||||
/// * `bytes_read_after_malformation`: The number of lookahead bytes the decoder consumed after
|
||||
/// the malformation.
|
||||
/// * `input_at_malformation`: What the input buffer is at the malformation.
|
||||
/// This is the buffer starting at the malformation. The first `malformation_length` bytes are
|
||||
/// the problematic sequence. The following `bytes_read_after_malformation` are already stored
|
||||
/// in the decoder and will not be re-fed.
|
||||
/// * `output`: The output string.
|
||||
///
|
||||
/// The function must modify `output` as it feels is best. For instance, one could recreate the
|
||||
/// behavior of [`YAMLDecodingTrap::Ignore`] with an empty function, [`YAMLDecodingTrap::Replace`]
|
||||
/// by pushing a `\u{FFFD}` into `output` and [`YAMLDecodingTrap::Strict`] by returning
|
||||
/// [`ControlFlow::Break`].
|
||||
///
|
||||
/// # Returns
|
||||
/// The function must return [`ControlFlow::Continue`] if decoding may continue or
|
||||
/// [`ControlFlow::Break`] if decoding must be aborted. An optional error string may be supplied.
|
||||
pub type YAMLDecodingTrapFn = fn(
|
||||
malformation_length: u8,
|
||||
bytes_read_after_malformation: u8,
|
||||
input_at_malformation: &[u8],
|
||||
output: &mut String,
|
||||
) -> ControlFlow<Cow<'static, str>>;
|
||||
|
||||
/// The behavior [`YamlDecoder`] must have when an decoding error occurs.
|
||||
#[derive(Copy, Clone, PartialEq, Eq)]
|
||||
pub enum YAMLDecodingTrap {
|
||||
/// Ignore the offending bytes, remove them from the output.
|
||||
Ignore,
|
||||
/// Error out.
|
||||
Strict,
|
||||
/// Replace them with the Unicode REPLACEMENT CHARACTER.
|
||||
Replace,
|
||||
/// Call the user-supplied function upon decoding malformation.
|
||||
Call(YAMLDecodingTrapFn),
|
||||
}
|
||||
|
||||
/// `YamlDecoder` is a `YamlLoader` builder that allows you to supply your own encoding error trap.
|
||||
/// For example, to read a YAML file while ignoring Unicode decoding errors you can set the
|
||||
/// `encoding_trap` to `encoding::DecoderTrap::Ignore`.
|
||||
/// ```rust
|
||||
/// use yaml_rust2::yaml::YamlDecoder;
|
||||
/// use yaml_rust2::yaml::{YamlDecoder, YAMLDecodingTrap};
|
||||
///
|
||||
/// let string = b"---
|
||||
/// a\xa9: 1
|
||||
|
@ -250,13 +293,13 @@ impl YamlLoader {
|
|||
/// c: [1, 2]
|
||||
/// ";
|
||||
/// let out = YamlDecoder::read(string as &[u8])
|
||||
/// .encoding_trap(encoding::DecoderTrap::Ignore)
|
||||
/// .encoding_trap(YAMLDecodingTrap::Ignore)
|
||||
/// .decode()
|
||||
/// .unwrap();
|
||||
/// ```
|
||||
pub struct YamlDecoder<T: std::io::Read> {
|
||||
source: T,
|
||||
trap: encoding::types::DecoderTrap,
|
||||
trap: YAMLDecodingTrap,
|
||||
}
|
||||
|
||||
impl<T: std::io::Read> YamlDecoder<T> {
|
||||
|
@ -264,12 +307,12 @@ impl<T: std::io::Read> YamlDecoder<T> {
|
|||
pub fn read(source: T) -> YamlDecoder<T> {
|
||||
YamlDecoder {
|
||||
source,
|
||||
trap: encoding::DecoderTrap::Strict,
|
||||
trap: YAMLDecodingTrap::Strict,
|
||||
}
|
||||
}
|
||||
|
||||
/// Set the behavior of the decoder when the encoding is invalid.
|
||||
pub fn encoding_trap(&mut self, trap: encoding::types::DecoderTrap) -> &mut Self {
|
||||
pub fn encoding_trap(&mut self, trap: YAMLDecodingTrap) -> &mut Self {
|
||||
self.trap = trap;
|
||||
self
|
||||
}
|
||||
|
@ -282,13 +325,84 @@ impl<T: std::io::Read> YamlDecoder<T> {
|
|||
let mut buffer = Vec::new();
|
||||
self.source.read_to_end(&mut buffer)?;
|
||||
|
||||
// Decodes the input buffer using either UTF-8, UTF-16LE or UTF-16BE depending on the BOM codepoint.
|
||||
// If the buffer doesn't start with a BOM codepoint, it will use a fallback encoding obtained by
|
||||
// detect_utf16_endianness.
|
||||
let (res, _) =
|
||||
encoding::types::decode(&buffer, self.trap, detect_utf16_endianness(&buffer));
|
||||
let s = res.map_err(LoadError::Decode)?;
|
||||
YamlLoader::load_from_str(&s).map_err(LoadError::Scan)
|
||||
// Check if the `encoding` library can detect encoding from the BOM, otherwise use
|
||||
// `detect_utf16_endianness`.
|
||||
let (encoding, _) =
|
||||
Encoding::for_bom(&buffer).unwrap_or_else(|| (detect_utf16_endianness(&buffer), 2));
|
||||
let mut decoder = encoding.new_decoder();
|
||||
let mut output = String::new();
|
||||
|
||||
// Decode the input buffer.
|
||||
decode_loop(&buffer, &mut output, &mut decoder, self.trap)?;
|
||||
|
||||
YamlLoader::load_from_str(&output).map_err(LoadError::Scan)
|
||||
}
|
||||
}
|
||||
|
||||
/// Perform a loop of [`Decoder::decode_to_string`], reallocating `output` if needed.
|
||||
fn decode_loop(
|
||||
input: &[u8],
|
||||
output: &mut String,
|
||||
decoder: &mut Decoder,
|
||||
trap: YAMLDecodingTrap,
|
||||
) -> Result<(), LoadError> {
|
||||
output.reserve(input.len());
|
||||
let mut total_bytes_read = 0;
|
||||
|
||||
loop {
|
||||
match decoder.decode_to_string_without_replacement(&input[total_bytes_read..], output, true)
|
||||
{
|
||||
// If the input is empty, we processed the whole input.
|
||||
(DecoderResult::InputEmpty, _) => break Ok(()),
|
||||
// If the output is full, we must reallocate.
|
||||
(DecoderResult::OutputFull, bytes_read) => {
|
||||
total_bytes_read += bytes_read;
|
||||
// The output is already reserved to the size of the input. We slowly resize. Here,
|
||||
// we're expecting that 10% of bytes will double in size when converting to UTF-8.
|
||||
output.reserve(input.len() / 10);
|
||||
}
|
||||
(DecoderResult::Malformed(malformed_len, bytes_after_malformed), bytes_read) => {
|
||||
total_bytes_read += bytes_read;
|
||||
match trap {
|
||||
// Ignore (skip over) malformed character.
|
||||
YAMLDecodingTrap::Ignore => {}
|
||||
// Replace them with the Unicode REPLACEMENT CHARACTER.
|
||||
YAMLDecodingTrap::Replace => {
|
||||
output.push('\u{FFFD}');
|
||||
}
|
||||
// Otherwise error, getting as much context as possible.
|
||||
YAMLDecodingTrap::Strict => {
|
||||
let malformed_len = malformed_len as usize;
|
||||
let bytes_after_malformed = bytes_after_malformed as usize;
|
||||
let byte_idx = total_bytes_read - (malformed_len + bytes_after_malformed);
|
||||
let malformed_sequence = &input[byte_idx..byte_idx + malformed_len];
|
||||
|
||||
break Err(LoadError::Decode(Cow::Owned(format!(
|
||||
"Invalid character sequence at {byte_idx}: {malformed_sequence:?}",
|
||||
))));
|
||||
}
|
||||
YAMLDecodingTrap::Call(callback) => {
|
||||
let byte_idx =
|
||||
total_bytes_read - ((malformed_len + bytes_after_malformed) as usize);
|
||||
let malformed_sequence =
|
||||
&input[byte_idx..byte_idx + malformed_len as usize];
|
||||
if let ControlFlow::Break(error) = callback(
|
||||
malformed_len,
|
||||
bytes_after_malformed,
|
||||
&input[byte_idx..],
|
||||
output,
|
||||
) {
|
||||
if error.is_empty() {
|
||||
break Err(LoadError::Decode(Cow::Owned(format!(
|
||||
"Invalid character sequence at {byte_idx}: {malformed_sequence:?}",
|
||||
))));
|
||||
}
|
||||
break Err(LoadError::Decode(error));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -301,15 +415,15 @@ impl<T: std::io::Read> YamlDecoder<T> {
|
|||
/// This allows the encoding to be deduced by the pattern of null (#x00) characters.
|
||||
//
|
||||
/// See spec at <https://yaml.org/spec/1.2/spec.html#id2771184>
|
||||
fn detect_utf16_endianness(b: &[u8]) -> encoding::types::EncodingRef {
|
||||
fn detect_utf16_endianness(b: &[u8]) -> &'static Encoding {
|
||||
if b.len() > 1 && (b[0] != b[1]) {
|
||||
if b[0] == 0 {
|
||||
return encoding::all::UTF_16BE;
|
||||
return encoding_rs::UTF_16BE;
|
||||
} else if b[1] == 0 {
|
||||
return encoding::all::UTF_16LE;
|
||||
return encoding_rs::UTF_16LE;
|
||||
}
|
||||
}
|
||||
encoding::all::UTF_8
|
||||
encoding_rs::UTF_8
|
||||
}
|
||||
|
||||
macro_rules! define_as (
|
||||
|
@ -550,7 +664,7 @@ impl Iterator for YamlIter {
|
|||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::{Yaml, YamlDecoder};
|
||||
use super::{YAMLDecodingTrap, Yaml, YamlDecoder};
|
||||
|
||||
#[test]
|
||||
fn test_read_bom() {
|
||||
|
@ -623,7 +737,7 @@ b: 2.2
|
|||
c: [1, 2]
|
||||
";
|
||||
let out = YamlDecoder::read(s as &[u8])
|
||||
.encoding_trap(encoding::DecoderTrap::Ignore)
|
||||
.encoding_trap(YAMLDecodingTrap::Ignore)
|
||||
.decode()
|
||||
.unwrap();
|
||||
let doc = &out[0];
|
||||
|
|
Loading…
Reference in a new issue