Split yaml.rs
into sizeable files.
This commit is contained in:
parent
976007017d
commit
2b8eb3f62b
5 changed files with 533 additions and 520 deletions
|
@ -1,4 +1,4 @@
|
||||||
use saphyr::yaml;
|
use saphyr::{Yaml, YamlLoader};
|
||||||
use std::env;
|
use std::env;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::prelude::*;
|
use std::io::prelude::*;
|
||||||
|
@ -9,14 +9,14 @@ fn print_indent(indent: usize) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn dump_node(doc: &yaml::Yaml, indent: usize) {
|
fn dump_node(doc: &Yaml, indent: usize) {
|
||||||
match *doc {
|
match *doc {
|
||||||
yaml::Yaml::Array(ref v) => {
|
Yaml::Array(ref v) => {
|
||||||
for x in v {
|
for x in v {
|
||||||
dump_node(x, indent + 1);
|
dump_node(x, indent + 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
yaml::Yaml::Hash(ref h) => {
|
Yaml::Hash(ref h) => {
|
||||||
for (k, v) in h {
|
for (k, v) in h {
|
||||||
print_indent(indent);
|
print_indent(indent);
|
||||||
println!("{k:?}:");
|
println!("{k:?}:");
|
||||||
|
@ -36,7 +36,7 @@ fn main() {
|
||||||
let mut s = String::new();
|
let mut s = String::new();
|
||||||
f.read_to_string(&mut s).unwrap();
|
f.read_to_string(&mut s).unwrap();
|
||||||
|
|
||||||
let docs = yaml::YamlLoader::load_from_str(&s).unwrap();
|
let docs = YamlLoader::load_from_str(&s).unwrap();
|
||||||
for doc in &docs {
|
for doc in &docs {
|
||||||
println!("---");
|
println!("---");
|
||||||
dump_node(doc, 0);
|
dump_node(doc, 0);
|
||||||
|
|
289
saphyr/src/encoding.rs
Normal file
289
saphyr/src/encoding.rs
Normal file
|
@ -0,0 +1,289 @@
|
||||||
|
//! Encoding utilities. Available only with the `encoding` feature.
|
||||||
|
|
||||||
|
use std::{borrow::Cow, ops::ControlFlow};
|
||||||
|
|
||||||
|
use encoding_rs::{Decoder, DecoderResult, Encoding};
|
||||||
|
|
||||||
|
use crate::{loader::LoadError, Yaml, YamlLoader};
|
||||||
|
|
||||||
|
/// The signature of the function to call when using [`YAMLDecodingTrap::Call`].
|
||||||
|
///
|
||||||
|
/// The arguments are as follows:
|
||||||
|
/// * `malformation_length`: The length of the sequence the decoder failed to decode.
|
||||||
|
/// * `bytes_read_after_malformation`: The number of lookahead bytes the decoder consumed after
|
||||||
|
/// the malformation.
|
||||||
|
/// * `input_at_malformation`: What the input buffer is at the malformation.
|
||||||
|
/// This is the buffer starting at the malformation. The first `malformation_length` bytes are
|
||||||
|
/// the problematic sequence. The following `bytes_read_after_malformation` are already stored
|
||||||
|
/// in the decoder and will not be re-fed.
|
||||||
|
/// * `output`: The output string.
|
||||||
|
///
|
||||||
|
/// The function must modify `output` as it feels is best. For instance, one could recreate the
|
||||||
|
/// behavior of [`YAMLDecodingTrap::Ignore`] with an empty function, [`YAMLDecodingTrap::Replace`]
|
||||||
|
/// by pushing a `\u{FFFD}` into `output` and [`YAMLDecodingTrap::Strict`] by returning
|
||||||
|
/// [`ControlFlow::Break`].
|
||||||
|
///
|
||||||
|
/// # Returns
|
||||||
|
/// The function must return [`ControlFlow::Continue`] if decoding may continue or
|
||||||
|
/// [`ControlFlow::Break`] if decoding must be aborted. An optional error string may be supplied.
|
||||||
|
pub type YAMLDecodingTrapFn = fn(
|
||||||
|
malformation_length: u8,
|
||||||
|
bytes_read_after_malformation: u8,
|
||||||
|
input_at_malformation: &[u8],
|
||||||
|
output: &mut String,
|
||||||
|
) -> ControlFlow<Cow<'static, str>>;
|
||||||
|
|
||||||
|
/// The behavior [`YamlDecoder`] must have when an decoding error occurs.
|
||||||
|
#[derive(Copy, Clone, PartialEq, Eq)]
|
||||||
|
pub enum YAMLDecodingTrap {
|
||||||
|
/// Ignore the offending bytes, remove them from the output.
|
||||||
|
Ignore,
|
||||||
|
/// Error out.
|
||||||
|
Strict,
|
||||||
|
/// Replace them with the Unicode REPLACEMENT CHARACTER.
|
||||||
|
Replace,
|
||||||
|
/// Call the user-supplied function upon decoding malformation.
|
||||||
|
Call(YAMLDecodingTrapFn),
|
||||||
|
}
|
||||||
|
|
||||||
|
/// `YamlDecoder` is a `YamlLoader` builder that allows you to supply your own encoding error trap.
|
||||||
|
/// For example, to read a YAML file while ignoring Unicode decoding errors you can set the
|
||||||
|
/// `encoding_trap` to `encoding::DecoderTrap::Ignore`.
|
||||||
|
/// ```rust
|
||||||
|
/// use saphyr::{YamlDecoder, YAMLDecodingTrap};
|
||||||
|
///
|
||||||
|
/// let string = b"---
|
||||||
|
/// a\xa9: 1
|
||||||
|
/// b: 2.2
|
||||||
|
/// c: [1, 2]
|
||||||
|
/// ";
|
||||||
|
/// let out = YamlDecoder::read(string as &[u8])
|
||||||
|
/// .encoding_trap(YAMLDecodingTrap::Ignore)
|
||||||
|
/// .decode()
|
||||||
|
/// .unwrap();
|
||||||
|
/// ```
|
||||||
|
pub struct YamlDecoder<T: std::io::Read> {
|
||||||
|
/// The input stream.
|
||||||
|
source: T,
|
||||||
|
/// The behavior to adopt when encountering a malformed encoding.
|
||||||
|
trap: YAMLDecodingTrap,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: std::io::Read> YamlDecoder<T> {
|
||||||
|
/// Create a `YamlDecoder` decoding the given source.
|
||||||
|
pub fn read(source: T) -> YamlDecoder<T> {
|
||||||
|
YamlDecoder {
|
||||||
|
source,
|
||||||
|
trap: YAMLDecodingTrap::Strict,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Set the behavior of the decoder when the encoding is invalid.
|
||||||
|
pub fn encoding_trap(&mut self, trap: YAMLDecodingTrap) -> &mut Self {
|
||||||
|
self.trap = trap;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Run the decode operation with the source and trap the `YamlDecoder` was built with.
|
||||||
|
///
|
||||||
|
/// # Errors
|
||||||
|
/// Returns `LoadError` when decoding fails.
|
||||||
|
pub fn decode(&mut self) -> Result<Vec<Yaml>, LoadError> {
|
||||||
|
let mut buffer = Vec::new();
|
||||||
|
self.source.read_to_end(&mut buffer)?;
|
||||||
|
|
||||||
|
// Check if the `encoding` library can detect encoding from the BOM, otherwise use
|
||||||
|
// `detect_utf16_endianness`.
|
||||||
|
let (encoding, _) =
|
||||||
|
Encoding::for_bom(&buffer).unwrap_or_else(|| (detect_utf16_endianness(&buffer), 2));
|
||||||
|
let mut decoder = encoding.new_decoder();
|
||||||
|
let mut output = String::new();
|
||||||
|
|
||||||
|
// Decode the input buffer.
|
||||||
|
decode_loop(&buffer, &mut output, &mut decoder, self.trap)?;
|
||||||
|
|
||||||
|
YamlLoader::load_from_str(&output).map_err(LoadError::Scan)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Perform a loop of [`Decoder::decode_to_string`], reallocating `output` if needed.
|
||||||
|
fn decode_loop(
|
||||||
|
input: &[u8],
|
||||||
|
output: &mut String,
|
||||||
|
decoder: &mut Decoder,
|
||||||
|
trap: YAMLDecodingTrap,
|
||||||
|
) -> Result<(), LoadError> {
|
||||||
|
use crate::loader::LoadError;
|
||||||
|
|
||||||
|
output.reserve(input.len());
|
||||||
|
let mut total_bytes_read = 0;
|
||||||
|
|
||||||
|
loop {
|
||||||
|
match decoder.decode_to_string_without_replacement(&input[total_bytes_read..], output, true)
|
||||||
|
{
|
||||||
|
// If the input is empty, we processed the whole input.
|
||||||
|
(DecoderResult::InputEmpty, _) => break Ok(()),
|
||||||
|
// If the output is full, we must reallocate.
|
||||||
|
(DecoderResult::OutputFull, bytes_read) => {
|
||||||
|
total_bytes_read += bytes_read;
|
||||||
|
// The output is already reserved to the size of the input. We slowly resize. Here,
|
||||||
|
// we're expecting that 10% of bytes will double in size when converting to UTF-8.
|
||||||
|
output.reserve(input.len() / 10);
|
||||||
|
}
|
||||||
|
(DecoderResult::Malformed(malformed_len, bytes_after_malformed), bytes_read) => {
|
||||||
|
total_bytes_read += bytes_read;
|
||||||
|
match trap {
|
||||||
|
// Ignore (skip over) malformed character.
|
||||||
|
YAMLDecodingTrap::Ignore => {}
|
||||||
|
// Replace them with the Unicode REPLACEMENT CHARACTER.
|
||||||
|
YAMLDecodingTrap::Replace => {
|
||||||
|
output.push('\u{FFFD}');
|
||||||
|
}
|
||||||
|
// Otherwise error, getting as much context as possible.
|
||||||
|
YAMLDecodingTrap::Strict => {
|
||||||
|
let malformed_len = malformed_len as usize;
|
||||||
|
let bytes_after_malformed = bytes_after_malformed as usize;
|
||||||
|
let byte_idx = total_bytes_read - (malformed_len + bytes_after_malformed);
|
||||||
|
let malformed_sequence = &input[byte_idx..byte_idx + malformed_len];
|
||||||
|
|
||||||
|
break Err(LoadError::Decode(Cow::Owned(format!(
|
||||||
|
"Invalid character sequence at {byte_idx}: {malformed_sequence:?}",
|
||||||
|
))));
|
||||||
|
}
|
||||||
|
YAMLDecodingTrap::Call(callback) => {
|
||||||
|
let byte_idx =
|
||||||
|
total_bytes_read - ((malformed_len + bytes_after_malformed) as usize);
|
||||||
|
let malformed_sequence =
|
||||||
|
&input[byte_idx..byte_idx + malformed_len as usize];
|
||||||
|
if let ControlFlow::Break(error) = callback(
|
||||||
|
malformed_len,
|
||||||
|
bytes_after_malformed,
|
||||||
|
&input[byte_idx..],
|
||||||
|
output,
|
||||||
|
) {
|
||||||
|
if error.is_empty() {
|
||||||
|
break Err(LoadError::Decode(Cow::Owned(format!(
|
||||||
|
"Invalid character sequence at {byte_idx}: {malformed_sequence:?}",
|
||||||
|
))));
|
||||||
|
}
|
||||||
|
break Err(LoadError::Decode(error));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The encoding crate knows how to tell apart UTF-8 from UTF-16LE and utf-16BE, when the
|
||||||
|
/// bytestream starts with BOM codepoint.
|
||||||
|
/// However, it doesn't even attempt to guess the UTF-16 endianness of the input bytestream since
|
||||||
|
/// in the general case the bytestream could start with a codepoint that uses both bytes.
|
||||||
|
///
|
||||||
|
/// The YAML-1.2 spec mandates that the first character of a YAML document is an ASCII character.
|
||||||
|
/// This allows the encoding to be deduced by the pattern of null (#x00) characters.
|
||||||
|
//
|
||||||
|
/// See spec at <https://yaml.org/spec/1.2/spec.html#id2771184>
|
||||||
|
fn detect_utf16_endianness(b: &[u8]) -> &'static Encoding {
|
||||||
|
if b.len() > 1 && (b[0] != b[1]) {
|
||||||
|
if b[0] == 0 {
|
||||||
|
return encoding_rs::UTF_16BE;
|
||||||
|
} else if b[1] == 0 {
|
||||||
|
return encoding_rs::UTF_16LE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
encoding_rs::UTF_8
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod test {
|
||||||
|
use super::{YAMLDecodingTrap, Yaml, YamlDecoder};
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_read_bom() {
|
||||||
|
let s = b"\xef\xbb\xbf---
|
||||||
|
a: 1
|
||||||
|
b: 2.2
|
||||||
|
c: [1, 2]
|
||||||
|
";
|
||||||
|
let out = YamlDecoder::read(s as &[u8]).decode().unwrap();
|
||||||
|
let doc = &out[0];
|
||||||
|
assert_eq!(doc["a"].as_i64().unwrap(), 1i64);
|
||||||
|
assert!((doc["b"].as_f64().unwrap() - 2.2f64).abs() <= f64::EPSILON);
|
||||||
|
assert_eq!(doc["c"][1].as_i64().unwrap(), 2i64);
|
||||||
|
assert!(doc["d"][0].is_badvalue());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_read_utf16le() {
|
||||||
|
let s = b"\xff\xfe-\x00-\x00-\x00
|
||||||
|
\x00a\x00:\x00 \x001\x00
|
||||||
|
\x00b\x00:\x00 \x002\x00.\x002\x00
|
||||||
|
\x00c\x00:\x00 \x00[\x001\x00,\x00 \x002\x00]\x00
|
||||||
|
\x00";
|
||||||
|
let out = YamlDecoder::read(s as &[u8]).decode().unwrap();
|
||||||
|
let doc = &out[0];
|
||||||
|
println!("GOT: {doc:?}");
|
||||||
|
assert_eq!(doc["a"].as_i64().unwrap(), 1i64);
|
||||||
|
assert!((doc["b"].as_f64().unwrap() - 2.2f64) <= f64::EPSILON);
|
||||||
|
assert_eq!(doc["c"][1].as_i64().unwrap(), 2i64);
|
||||||
|
assert!(doc["d"][0].is_badvalue());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_read_utf16be() {
|
||||||
|
let s = b"\xfe\xff\x00-\x00-\x00-\x00
|
||||||
|
\x00a\x00:\x00 \x001\x00
|
||||||
|
\x00b\x00:\x00 \x002\x00.\x002\x00
|
||||||
|
\x00c\x00:\x00 \x00[\x001\x00,\x00 \x002\x00]\x00
|
||||||
|
";
|
||||||
|
let out = YamlDecoder::read(s as &[u8]).decode().unwrap();
|
||||||
|
let doc = &out[0];
|
||||||
|
println!("GOT: {doc:?}");
|
||||||
|
assert_eq!(doc["a"].as_i64().unwrap(), 1i64);
|
||||||
|
assert!((doc["b"].as_f64().unwrap() - 2.2f64).abs() <= f64::EPSILON);
|
||||||
|
assert_eq!(doc["c"][1].as_i64().unwrap(), 2i64);
|
||||||
|
assert!(doc["d"][0].is_badvalue());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_read_utf16le_nobom() {
|
||||||
|
let s = b"-\x00-\x00-\x00
|
||||||
|
\x00a\x00:\x00 \x001\x00
|
||||||
|
\x00b\x00:\x00 \x002\x00.\x002\x00
|
||||||
|
\x00c\x00:\x00 \x00[\x001\x00,\x00 \x002\x00]\x00
|
||||||
|
\x00";
|
||||||
|
let out = YamlDecoder::read(s as &[u8]).decode().unwrap();
|
||||||
|
let doc = &out[0];
|
||||||
|
println!("GOT: {doc:?}");
|
||||||
|
assert_eq!(doc["a"].as_i64().unwrap(), 1i64);
|
||||||
|
assert!((doc["b"].as_f64().unwrap() - 2.2f64).abs() <= f64::EPSILON);
|
||||||
|
assert_eq!(doc["c"][1].as_i64().unwrap(), 2i64);
|
||||||
|
assert!(doc["d"][0].is_badvalue());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_read_trap() {
|
||||||
|
let s = b"---
|
||||||
|
a\xa9: 1
|
||||||
|
b: 2.2
|
||||||
|
c: [1, 2]
|
||||||
|
";
|
||||||
|
let out = YamlDecoder::read(s as &[u8])
|
||||||
|
.encoding_trap(YAMLDecodingTrap::Ignore)
|
||||||
|
.decode()
|
||||||
|
.unwrap();
|
||||||
|
let doc = &out[0];
|
||||||
|
println!("GOT: {doc:?}");
|
||||||
|
assert_eq!(doc["a"].as_i64().unwrap(), 1i64);
|
||||||
|
assert!((doc["b"].as_f64().unwrap() - 2.2f64).abs() <= f64::EPSILON);
|
||||||
|
assert_eq!(doc["c"][1].as_i64().unwrap(), 2i64);
|
||||||
|
assert!(doc["d"][0].is_badvalue());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_or() {
|
||||||
|
assert_eq!(Yaml::Null.or(Yaml::Integer(3)), Yaml::Integer(3));
|
||||||
|
assert_eq!(Yaml::Integer(3).or(Yaml::Integer(7)), Yaml::Integer(3));
|
||||||
|
}
|
||||||
|
}
|
|
@ -43,16 +43,20 @@
|
||||||
|
|
||||||
#![warn(missing_docs, clippy::pedantic)]
|
#![warn(missing_docs, clippy::pedantic)]
|
||||||
|
|
||||||
pub(crate) mod char_traits;
|
mod char_traits;
|
||||||
pub mod emitter;
|
mod emitter;
|
||||||
pub mod yaml;
|
mod loader;
|
||||||
|
mod yaml;
|
||||||
|
|
||||||
// Re-export main components.
|
// Re-export main components.
|
||||||
pub use crate::emitter::YamlEmitter;
|
pub use crate::emitter::YamlEmitter;
|
||||||
pub use crate::yaml::{Array, Hash, Yaml, YamlLoader};
|
pub use crate::loader::YamlLoader;
|
||||||
|
pub use crate::yaml::{Array, Hash, Yaml};
|
||||||
|
|
||||||
#[cfg(feature = "encoding")]
|
#[cfg(feature = "encoding")]
|
||||||
pub use crate::yaml::{YAMLDecodingTrap, YAMLDecodingTrapFn, YamlDecoder};
|
mod encoding;
|
||||||
|
#[cfg(feature = "encoding")]
|
||||||
|
pub use crate::encoding::{YAMLDecodingTrap, YAMLDecodingTrapFn, YamlDecoder};
|
||||||
|
|
||||||
// Re-export `ScanError` as it is used as part of our public API and we want consumers to be able
|
// Re-export `ScanError` as it is used as part of our public API and we want consumers to be able
|
||||||
// to inspect it (e.g. perform a `match`). They wouldn't be able without it.
|
// to inspect it (e.g. perform a `match`). They wouldn't be able without it.
|
||||||
|
|
227
saphyr/src/loader.rs
Normal file
227
saphyr/src/loader.rs
Normal file
|
@ -0,0 +1,227 @@
|
||||||
|
//! The default loader.
|
||||||
|
|
||||||
|
use std::collections::BTreeMap;
|
||||||
|
|
||||||
|
use saphyr_parser::{Event, MarkedEventReceiver, Marker, Parser, ScanError, TScalarStyle, Tag};
|
||||||
|
|
||||||
|
use crate::{Hash, Yaml};
|
||||||
|
|
||||||
|
/// Main structure for quickly parsing YAML.
|
||||||
|
///
|
||||||
|
/// See [`YamlLoader::load_from_str`].
|
||||||
|
#[derive(Default)]
|
||||||
|
#[allow(clippy::module_name_repetitions)]
|
||||||
|
pub struct YamlLoader {
|
||||||
|
/// The different YAML documents that are loaded.
|
||||||
|
docs: Vec<Yaml>,
|
||||||
|
// states
|
||||||
|
// (current node, anchor_id) tuple
|
||||||
|
doc_stack: Vec<(Yaml, usize)>,
|
||||||
|
key_stack: Vec<Yaml>,
|
||||||
|
anchor_map: BTreeMap<usize, Yaml>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl MarkedEventReceiver for YamlLoader {
|
||||||
|
fn on_event(&mut self, ev: Event, _: Marker) {
|
||||||
|
// println!("EV {:?}", ev);
|
||||||
|
match ev {
|
||||||
|
Event::DocumentStart | Event::Nothing | Event::StreamStart | Event::StreamEnd => {
|
||||||
|
// do nothing
|
||||||
|
}
|
||||||
|
Event::DocumentEnd => {
|
||||||
|
match self.doc_stack.len() {
|
||||||
|
// empty document
|
||||||
|
0 => self.docs.push(Yaml::BadValue),
|
||||||
|
1 => self.docs.push(self.doc_stack.pop().unwrap().0),
|
||||||
|
_ => unreachable!(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Event::SequenceStart(aid, _) => {
|
||||||
|
self.doc_stack.push((Yaml::Array(Vec::new()), aid));
|
||||||
|
}
|
||||||
|
Event::SequenceEnd => {
|
||||||
|
let node = self.doc_stack.pop().unwrap();
|
||||||
|
self.insert_new_node(node);
|
||||||
|
}
|
||||||
|
Event::MappingStart(aid, _) => {
|
||||||
|
self.doc_stack.push((Yaml::Hash(Hash::new()), aid));
|
||||||
|
self.key_stack.push(Yaml::BadValue);
|
||||||
|
}
|
||||||
|
Event::MappingEnd => {
|
||||||
|
self.key_stack.pop().unwrap();
|
||||||
|
let node = self.doc_stack.pop().unwrap();
|
||||||
|
self.insert_new_node(node);
|
||||||
|
}
|
||||||
|
Event::Scalar(v, style, aid, tag) => {
|
||||||
|
let node = if style != TScalarStyle::Plain {
|
||||||
|
Yaml::String(v)
|
||||||
|
} else if let Some(Tag {
|
||||||
|
ref handle,
|
||||||
|
ref suffix,
|
||||||
|
}) = tag
|
||||||
|
{
|
||||||
|
if handle == "tag:yaml.org,2002:" {
|
||||||
|
match suffix.as_ref() {
|
||||||
|
"bool" => {
|
||||||
|
// "true" or "false"
|
||||||
|
match v.parse::<bool>() {
|
||||||
|
Err(_) => Yaml::BadValue,
|
||||||
|
Ok(v) => Yaml::Boolean(v),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"int" => match v.parse::<i64>() {
|
||||||
|
Err(_) => Yaml::BadValue,
|
||||||
|
Ok(v) => Yaml::Integer(v),
|
||||||
|
},
|
||||||
|
"float" => match parse_f64(&v) {
|
||||||
|
Some(_) => Yaml::Real(v),
|
||||||
|
None => Yaml::BadValue,
|
||||||
|
},
|
||||||
|
"null" => match v.as_ref() {
|
||||||
|
"~" | "null" => Yaml::Null,
|
||||||
|
_ => Yaml::BadValue,
|
||||||
|
},
|
||||||
|
_ => Yaml::String(v),
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
Yaml::String(v)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Datatype is not specified, or unrecognized
|
||||||
|
Yaml::from_str(&v)
|
||||||
|
};
|
||||||
|
|
||||||
|
self.insert_new_node((node, aid));
|
||||||
|
}
|
||||||
|
Event::Alias(id) => {
|
||||||
|
let n = match self.anchor_map.get(&id) {
|
||||||
|
Some(v) => v.clone(),
|
||||||
|
None => Yaml::BadValue,
|
||||||
|
};
|
||||||
|
self.insert_new_node((n, 0));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// println!("DOC {:?}", self.doc_stack);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// An error that happened when loading a YAML document.
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub enum LoadError {
|
||||||
|
/// An I/O error.
|
||||||
|
IO(std::io::Error),
|
||||||
|
/// An error within the scanner. This indicates a malformed YAML input.
|
||||||
|
Scan(ScanError),
|
||||||
|
/// A decoding error (e.g.: Invalid UTF-8).
|
||||||
|
Decode(std::borrow::Cow<'static, str>),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<std::io::Error> for LoadError {
|
||||||
|
fn from(error: std::io::Error) -> Self {
|
||||||
|
LoadError::IO(error)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::error::Error for LoadError {
|
||||||
|
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
|
||||||
|
Some(match &self {
|
||||||
|
LoadError::IO(e) => e,
|
||||||
|
LoadError::Scan(e) => e,
|
||||||
|
LoadError::Decode(_) => return None,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::fmt::Display for LoadError {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
match self {
|
||||||
|
LoadError::IO(e) => e.fmt(f),
|
||||||
|
LoadError::Scan(e) => e.fmt(f),
|
||||||
|
LoadError::Decode(e) => e.fmt(f),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl YamlLoader {
|
||||||
|
fn insert_new_node(&mut self, node: (Yaml, usize)) {
|
||||||
|
// valid anchor id starts from 1
|
||||||
|
if node.1 > 0 {
|
||||||
|
self.anchor_map.insert(node.1, node.0.clone());
|
||||||
|
}
|
||||||
|
if self.doc_stack.is_empty() {
|
||||||
|
self.doc_stack.push(node);
|
||||||
|
} else {
|
||||||
|
let parent = self.doc_stack.last_mut().unwrap();
|
||||||
|
match *parent {
|
||||||
|
(Yaml::Array(ref mut v), _) => v.push(node.0),
|
||||||
|
(Yaml::Hash(ref mut h), _) => {
|
||||||
|
let cur_key = self.key_stack.last_mut().unwrap();
|
||||||
|
// current node is a key
|
||||||
|
if cur_key.is_badvalue() {
|
||||||
|
*cur_key = node.0;
|
||||||
|
// current node is a value
|
||||||
|
} else {
|
||||||
|
let mut newkey = Yaml::BadValue;
|
||||||
|
std::mem::swap(&mut newkey, cur_key);
|
||||||
|
h.insert(newkey, node.0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => unreachable!(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Load the given string as a set of YAML documents.
|
||||||
|
///
|
||||||
|
/// The `source` is interpreted as YAML documents and is parsed. Parsing succeeds if and only
|
||||||
|
/// if all documents are parsed successfully. An error in a latter document prevents the former
|
||||||
|
/// from being returned.
|
||||||
|
/// # Errors
|
||||||
|
/// Returns `ScanError` when loading fails.
|
||||||
|
pub fn load_from_str(source: &str) -> Result<Vec<Yaml>, ScanError> {
|
||||||
|
Self::load_from_iter(source.chars())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Load the contents of the given iterator as a set of YAML documents.
|
||||||
|
///
|
||||||
|
/// The `source` is interpreted as YAML documents and is parsed. Parsing succeeds if and only
|
||||||
|
/// if all documents are parsed successfully. An error in a latter document prevents the former
|
||||||
|
/// from being returned.
|
||||||
|
/// # Errors
|
||||||
|
/// Returns `ScanError` when loading fails.
|
||||||
|
pub fn load_from_iter<I: Iterator<Item = char>>(source: I) -> Result<Vec<Yaml>, ScanError> {
|
||||||
|
let mut parser = Parser::new(source);
|
||||||
|
Self::load_from_parser(&mut parser)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Load the contents from the specified Parser as a set of YAML documents.
|
||||||
|
///
|
||||||
|
/// Parsing succeeds if and only if all documents are parsed successfully.
|
||||||
|
/// An error in a latter document prevents the former from being returned.
|
||||||
|
/// # Errors
|
||||||
|
/// Returns `ScanError` when loading fails.
|
||||||
|
pub fn load_from_parser<I: Iterator<Item = char>>(
|
||||||
|
parser: &mut Parser<I>,
|
||||||
|
) -> Result<Vec<Yaml>, ScanError> {
|
||||||
|
let mut loader = YamlLoader::default();
|
||||||
|
parser.load(&mut loader, true)?;
|
||||||
|
Ok(loader.docs)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return a reference to the parsed Yaml documents.
|
||||||
|
#[must_use]
|
||||||
|
pub fn documents(&self) -> &[Yaml] {
|
||||||
|
&self.docs
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// parse f64 as Core schema
|
||||||
|
// See: https://github.com/chyh1990/yaml-rust/issues/51
|
||||||
|
pub(crate) fn parse_f64(v: &str) -> Option<f64> {
|
||||||
|
match v {
|
||||||
|
".inf" | ".Inf" | ".INF" | "+.inf" | "+.Inf" | "+.INF" => Some(f64::INFINITY),
|
||||||
|
"-.inf" | "-.Inf" | "-.INF" => Some(f64::NEG_INFINITY),
|
||||||
|
".nan" | "NaN" | ".NAN" => Some(f64::NAN),
|
||||||
|
_ => v.parse::<f64>().ok(),
|
||||||
|
}
|
||||||
|
}
|
|
@ -2,15 +2,11 @@
|
||||||
|
|
||||||
#![allow(clippy::module_name_repetitions)]
|
#![allow(clippy::module_name_repetitions)]
|
||||||
|
|
||||||
use std::borrow::Cow;
|
use std::{convert::TryFrom, ops::Index, ops::IndexMut};
|
||||||
use std::ops::ControlFlow;
|
|
||||||
use std::{collections::BTreeMap, convert::TryFrom, mem, ops::Index, ops::IndexMut};
|
|
||||||
|
|
||||||
#[cfg(feature = "encoding")]
|
|
||||||
use encoding_rs::{Decoder, DecoderResult, Encoding};
|
|
||||||
use hashlink::LinkedHashMap;
|
use hashlink::LinkedHashMap;
|
||||||
|
|
||||||
use saphyr_parser::{Event, MarkedEventReceiver, Marker, Parser, ScanError, TScalarStyle, Tag};
|
use crate::loader::parse_f64;
|
||||||
|
|
||||||
/// A YAML node is stored as this `Yaml` enumeration, which provides an easy way to
|
/// A YAML node is stored as this `Yaml` enumeration, which provides an easy way to
|
||||||
/// access your YAML document.
|
/// access your YAML document.
|
||||||
|
@ -60,416 +56,6 @@ pub type Array = Vec<Yaml>;
|
||||||
/// The type contained in the `Yaml::Hash` variant. This corresponds to YAML mappings.
|
/// The type contained in the `Yaml::Hash` variant. This corresponds to YAML mappings.
|
||||||
pub type Hash = LinkedHashMap<Yaml, Yaml>;
|
pub type Hash = LinkedHashMap<Yaml, Yaml>;
|
||||||
|
|
||||||
// parse f64 as Core schema
|
|
||||||
// See: https://github.com/chyh1990/yaml-rust/issues/51
|
|
||||||
fn parse_f64(v: &str) -> Option<f64> {
|
|
||||||
match v {
|
|
||||||
".inf" | ".Inf" | ".INF" | "+.inf" | "+.Inf" | "+.INF" => Some(f64::INFINITY),
|
|
||||||
"-.inf" | "-.Inf" | "-.INF" => Some(f64::NEG_INFINITY),
|
|
||||||
".nan" | "NaN" | ".NAN" => Some(f64::NAN),
|
|
||||||
_ => v.parse::<f64>().ok(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Main structure for quickly parsing YAML.
|
|
||||||
///
|
|
||||||
/// See [`YamlLoader::load_from_str`].
|
|
||||||
#[derive(Default)]
|
|
||||||
pub struct YamlLoader {
|
|
||||||
/// The different YAML documents that are loaded.
|
|
||||||
docs: Vec<Yaml>,
|
|
||||||
// states
|
|
||||||
// (current node, anchor_id) tuple
|
|
||||||
doc_stack: Vec<(Yaml, usize)>,
|
|
||||||
key_stack: Vec<Yaml>,
|
|
||||||
anchor_map: BTreeMap<usize, Yaml>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl MarkedEventReceiver for YamlLoader {
|
|
||||||
fn on_event(&mut self, ev: Event, _: Marker) {
|
|
||||||
// println!("EV {:?}", ev);
|
|
||||||
match ev {
|
|
||||||
Event::DocumentStart | Event::Nothing | Event::StreamStart | Event::StreamEnd => {
|
|
||||||
// do nothing
|
|
||||||
}
|
|
||||||
Event::DocumentEnd => {
|
|
||||||
match self.doc_stack.len() {
|
|
||||||
// empty document
|
|
||||||
0 => self.docs.push(Yaml::BadValue),
|
|
||||||
1 => self.docs.push(self.doc_stack.pop().unwrap().0),
|
|
||||||
_ => unreachable!(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Event::SequenceStart(aid, _) => {
|
|
||||||
self.doc_stack.push((Yaml::Array(Vec::new()), aid));
|
|
||||||
}
|
|
||||||
Event::SequenceEnd => {
|
|
||||||
let node = self.doc_stack.pop().unwrap();
|
|
||||||
self.insert_new_node(node);
|
|
||||||
}
|
|
||||||
Event::MappingStart(aid, _) => {
|
|
||||||
self.doc_stack.push((Yaml::Hash(Hash::new()), aid));
|
|
||||||
self.key_stack.push(Yaml::BadValue);
|
|
||||||
}
|
|
||||||
Event::MappingEnd => {
|
|
||||||
self.key_stack.pop().unwrap();
|
|
||||||
let node = self.doc_stack.pop().unwrap();
|
|
||||||
self.insert_new_node(node);
|
|
||||||
}
|
|
||||||
Event::Scalar(v, style, aid, tag) => {
|
|
||||||
let node = if style != TScalarStyle::Plain {
|
|
||||||
Yaml::String(v)
|
|
||||||
} else if let Some(Tag {
|
|
||||||
ref handle,
|
|
||||||
ref suffix,
|
|
||||||
}) = tag
|
|
||||||
{
|
|
||||||
if handle == "tag:yaml.org,2002:" {
|
|
||||||
match suffix.as_ref() {
|
|
||||||
"bool" => {
|
|
||||||
// "true" or "false"
|
|
||||||
match v.parse::<bool>() {
|
|
||||||
Err(_) => Yaml::BadValue,
|
|
||||||
Ok(v) => Yaml::Boolean(v),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
"int" => match v.parse::<i64>() {
|
|
||||||
Err(_) => Yaml::BadValue,
|
|
||||||
Ok(v) => Yaml::Integer(v),
|
|
||||||
},
|
|
||||||
"float" => match parse_f64(&v) {
|
|
||||||
Some(_) => Yaml::Real(v),
|
|
||||||
None => Yaml::BadValue,
|
|
||||||
},
|
|
||||||
"null" => match v.as_ref() {
|
|
||||||
"~" | "null" => Yaml::Null,
|
|
||||||
_ => Yaml::BadValue,
|
|
||||||
},
|
|
||||||
_ => Yaml::String(v),
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
Yaml::String(v)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// Datatype is not specified, or unrecognized
|
|
||||||
Yaml::from_str(&v)
|
|
||||||
};
|
|
||||||
|
|
||||||
self.insert_new_node((node, aid));
|
|
||||||
}
|
|
||||||
Event::Alias(id) => {
|
|
||||||
let n = match self.anchor_map.get(&id) {
|
|
||||||
Some(v) => v.clone(),
|
|
||||||
None => Yaml::BadValue,
|
|
||||||
};
|
|
||||||
self.insert_new_node((n, 0));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// println!("DOC {:?}", self.doc_stack);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// An error that happened when loading a YAML document.
|
|
||||||
#[derive(Debug)]
|
|
||||||
pub enum LoadError {
|
|
||||||
/// An I/O error.
|
|
||||||
IO(std::io::Error),
|
|
||||||
/// An error within the scanner. This indicates a malformed YAML input.
|
|
||||||
Scan(ScanError),
|
|
||||||
/// A decoding error (e.g.: Invalid UTF_8).
|
|
||||||
Decode(std::borrow::Cow<'static, str>),
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<std::io::Error> for LoadError {
|
|
||||||
fn from(error: std::io::Error) -> Self {
|
|
||||||
LoadError::IO(error)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl std::error::Error for LoadError {
|
|
||||||
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
|
|
||||||
Some(match &self {
|
|
||||||
LoadError::IO(e) => e,
|
|
||||||
LoadError::Scan(e) => e,
|
|
||||||
LoadError::Decode(_) => return None,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl std::fmt::Display for LoadError {
|
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
||||||
match self {
|
|
||||||
LoadError::IO(e) => e.fmt(f),
|
|
||||||
LoadError::Scan(e) => e.fmt(f),
|
|
||||||
LoadError::Decode(e) => e.fmt(f),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl YamlLoader {
|
|
||||||
fn insert_new_node(&mut self, node: (Yaml, usize)) {
|
|
||||||
// valid anchor id starts from 1
|
|
||||||
if node.1 > 0 {
|
|
||||||
self.anchor_map.insert(node.1, node.0.clone());
|
|
||||||
}
|
|
||||||
if self.doc_stack.is_empty() {
|
|
||||||
self.doc_stack.push(node);
|
|
||||||
} else {
|
|
||||||
let parent = self.doc_stack.last_mut().unwrap();
|
|
||||||
match *parent {
|
|
||||||
(Yaml::Array(ref mut v), _) => v.push(node.0),
|
|
||||||
(Yaml::Hash(ref mut h), _) => {
|
|
||||||
let cur_key = self.key_stack.last_mut().unwrap();
|
|
||||||
// current node is a key
|
|
||||||
if cur_key.is_badvalue() {
|
|
||||||
*cur_key = node.0;
|
|
||||||
// current node is a value
|
|
||||||
} else {
|
|
||||||
let mut newkey = Yaml::BadValue;
|
|
||||||
mem::swap(&mut newkey, cur_key);
|
|
||||||
h.insert(newkey, node.0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
_ => unreachable!(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Load the given string as a set of YAML documents.
|
|
||||||
///
|
|
||||||
/// The `source` is interpreted as YAML documents and is parsed. Parsing succeeds if and only
|
|
||||||
/// if all documents are parsed successfully. An error in a latter document prevents the former
|
|
||||||
/// from being returned.
|
|
||||||
/// # Errors
|
|
||||||
/// Returns `ScanError` when loading fails.
|
|
||||||
pub fn load_from_str(source: &str) -> Result<Vec<Yaml>, ScanError> {
|
|
||||||
Self::load_from_iter(source.chars())
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Load the contents of the given iterator as a set of YAML documents.
|
|
||||||
///
|
|
||||||
/// The `source` is interpreted as YAML documents and is parsed. Parsing succeeds if and only
|
|
||||||
/// if all documents are parsed successfully. An error in a latter document prevents the former
|
|
||||||
/// from being returned.
|
|
||||||
/// # Errors
|
|
||||||
/// Returns `ScanError` when loading fails.
|
|
||||||
pub fn load_from_iter<I: Iterator<Item = char>>(source: I) -> Result<Vec<Yaml>, ScanError> {
|
|
||||||
let mut parser = Parser::new(source);
|
|
||||||
Self::load_from_parser(&mut parser)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Load the contents from the specified Parser as a set of YAML documents.
|
|
||||||
///
|
|
||||||
/// Parsing succeeds if and only if all documents are parsed successfully.
|
|
||||||
/// An error in a latter document prevents the former from being returned.
|
|
||||||
/// # Errors
|
|
||||||
/// Returns `ScanError` when loading fails.
|
|
||||||
pub fn load_from_parser<I: Iterator<Item = char>>(
|
|
||||||
parser: &mut Parser<I>,
|
|
||||||
) -> Result<Vec<Yaml>, ScanError> {
|
|
||||||
let mut loader = YamlLoader::default();
|
|
||||||
parser.load(&mut loader, true)?;
|
|
||||||
Ok(loader.docs)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Return a reference to the parsed Yaml documents.
|
|
||||||
#[must_use]
|
|
||||||
pub fn documents(&self) -> &[Yaml] {
|
|
||||||
&self.docs
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// The signature of the function to call when using [`YAMLDecodingTrap::Call`].
|
|
||||||
///
|
|
||||||
/// The arguments are as follows:
|
|
||||||
/// * `malformation_length`: The length of the sequence the decoder failed to decode.
|
|
||||||
/// * `bytes_read_after_malformation`: The number of lookahead bytes the decoder consumed after
|
|
||||||
/// the malformation.
|
|
||||||
/// * `input_at_malformation`: What the input buffer is at the malformation.
|
|
||||||
/// This is the buffer starting at the malformation. The first `malformation_length` bytes are
|
|
||||||
/// the problematic sequence. The following `bytes_read_after_malformation` are already stored
|
|
||||||
/// in the decoder and will not be re-fed.
|
|
||||||
/// * `output`: The output string.
|
|
||||||
///
|
|
||||||
/// The function must modify `output` as it feels is best. For instance, one could recreate the
|
|
||||||
/// behavior of [`YAMLDecodingTrap::Ignore`] with an empty function, [`YAMLDecodingTrap::Replace`]
|
|
||||||
/// by pushing a `\u{FFFD}` into `output` and [`YAMLDecodingTrap::Strict`] by returning
|
|
||||||
/// [`ControlFlow::Break`].
|
|
||||||
///
|
|
||||||
/// # Returns
|
|
||||||
/// The function must return [`ControlFlow::Continue`] if decoding may continue or
|
|
||||||
/// [`ControlFlow::Break`] if decoding must be aborted. An optional error string may be supplied.
|
|
||||||
#[cfg(feature = "encoding")]
|
|
||||||
pub type YAMLDecodingTrapFn = fn(
|
|
||||||
malformation_length: u8,
|
|
||||||
bytes_read_after_malformation: u8,
|
|
||||||
input_at_malformation: &[u8],
|
|
||||||
output: &mut String,
|
|
||||||
) -> ControlFlow<Cow<'static, str>>;
|
|
||||||
|
|
||||||
/// The behavior [`YamlDecoder`] must have when an decoding error occurs.
|
|
||||||
#[cfg(feature = "encoding")]
|
|
||||||
#[derive(Copy, Clone, PartialEq, Eq)]
|
|
||||||
pub enum YAMLDecodingTrap {
|
|
||||||
/// Ignore the offending bytes, remove them from the output.
|
|
||||||
Ignore,
|
|
||||||
/// Error out.
|
|
||||||
Strict,
|
|
||||||
/// Replace them with the Unicode REPLACEMENT CHARACTER.
|
|
||||||
Replace,
|
|
||||||
/// Call the user-supplied function upon decoding malformation.
|
|
||||||
Call(YAMLDecodingTrapFn),
|
|
||||||
}
|
|
||||||
|
|
||||||
/// `YamlDecoder` is a `YamlLoader` builder that allows you to supply your own encoding error trap.
|
|
||||||
/// For example, to read a YAML file while ignoring Unicode decoding errors you can set the
|
|
||||||
/// `encoding_trap` to `encoding::DecoderTrap::Ignore`.
|
|
||||||
/// ```rust
|
|
||||||
/// use saphyr::{YamlDecoder, YAMLDecodingTrap};
|
|
||||||
///
|
|
||||||
/// let string = b"---
|
|
||||||
/// a\xa9: 1
|
|
||||||
/// b: 2.2
|
|
||||||
/// c: [1, 2]
|
|
||||||
/// ";
|
|
||||||
/// let out = YamlDecoder::read(string as &[u8])
|
|
||||||
/// .encoding_trap(YAMLDecodingTrap::Ignore)
|
|
||||||
/// .decode()
|
|
||||||
/// .unwrap();
|
|
||||||
/// ```
|
|
||||||
#[cfg(feature = "encoding")]
|
|
||||||
pub struct YamlDecoder<T: std::io::Read> {
|
|
||||||
source: T,
|
|
||||||
trap: YAMLDecodingTrap,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(feature = "encoding")]
|
|
||||||
impl<T: std::io::Read> YamlDecoder<T> {
|
|
||||||
/// Create a `YamlDecoder` decoding the given source.
|
|
||||||
pub fn read(source: T) -> YamlDecoder<T> {
|
|
||||||
YamlDecoder {
|
|
||||||
source,
|
|
||||||
trap: YAMLDecodingTrap::Strict,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Set the behavior of the decoder when the encoding is invalid.
|
|
||||||
pub fn encoding_trap(&mut self, trap: YAMLDecodingTrap) -> &mut Self {
|
|
||||||
self.trap = trap;
|
|
||||||
self
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Run the decode operation with the source and trap the `YamlDecoder` was built with.
|
|
||||||
///
|
|
||||||
/// # Errors
|
|
||||||
/// Returns `LoadError` when decoding fails.
|
|
||||||
pub fn decode(&mut self) -> Result<Vec<Yaml>, LoadError> {
|
|
||||||
let mut buffer = Vec::new();
|
|
||||||
self.source.read_to_end(&mut buffer)?;
|
|
||||||
|
|
||||||
// Check if the `encoding` library can detect encoding from the BOM, otherwise use
|
|
||||||
// `detect_utf16_endianness`.
|
|
||||||
let (encoding, _) =
|
|
||||||
Encoding::for_bom(&buffer).unwrap_or_else(|| (detect_utf16_endianness(&buffer), 2));
|
|
||||||
let mut decoder = encoding.new_decoder();
|
|
||||||
let mut output = String::new();
|
|
||||||
|
|
||||||
// Decode the input buffer.
|
|
||||||
decode_loop(&buffer, &mut output, &mut decoder, self.trap)?;
|
|
||||||
|
|
||||||
YamlLoader::load_from_str(&output).map_err(LoadError::Scan)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Perform a loop of [`Decoder::decode_to_string`], reallocating `output` if needed.
|
|
||||||
#[cfg(feature = "encoding")]
|
|
||||||
fn decode_loop(
|
|
||||||
input: &[u8],
|
|
||||||
output: &mut String,
|
|
||||||
decoder: &mut Decoder,
|
|
||||||
trap: YAMLDecodingTrap,
|
|
||||||
) -> Result<(), LoadError> {
|
|
||||||
output.reserve(input.len());
|
|
||||||
let mut total_bytes_read = 0;
|
|
||||||
|
|
||||||
loop {
|
|
||||||
match decoder.decode_to_string_without_replacement(&input[total_bytes_read..], output, true)
|
|
||||||
{
|
|
||||||
// If the input is empty, we processed the whole input.
|
|
||||||
(DecoderResult::InputEmpty, _) => break Ok(()),
|
|
||||||
// If the output is full, we must reallocate.
|
|
||||||
(DecoderResult::OutputFull, bytes_read) => {
|
|
||||||
total_bytes_read += bytes_read;
|
|
||||||
// The output is already reserved to the size of the input. We slowly resize. Here,
|
|
||||||
// we're expecting that 10% of bytes will double in size when converting to UTF-8.
|
|
||||||
output.reserve(input.len() / 10);
|
|
||||||
}
|
|
||||||
(DecoderResult::Malformed(malformed_len, bytes_after_malformed), bytes_read) => {
|
|
||||||
total_bytes_read += bytes_read;
|
|
||||||
match trap {
|
|
||||||
// Ignore (skip over) malformed character.
|
|
||||||
YAMLDecodingTrap::Ignore => {}
|
|
||||||
// Replace them with the Unicode REPLACEMENT CHARACTER.
|
|
||||||
YAMLDecodingTrap::Replace => {
|
|
||||||
output.push('\u{FFFD}');
|
|
||||||
}
|
|
||||||
// Otherwise error, getting as much context as possible.
|
|
||||||
YAMLDecodingTrap::Strict => {
|
|
||||||
let malformed_len = malformed_len as usize;
|
|
||||||
let bytes_after_malformed = bytes_after_malformed as usize;
|
|
||||||
let byte_idx = total_bytes_read - (malformed_len + bytes_after_malformed);
|
|
||||||
let malformed_sequence = &input[byte_idx..byte_idx + malformed_len];
|
|
||||||
|
|
||||||
break Err(LoadError::Decode(Cow::Owned(format!(
|
|
||||||
"Invalid character sequence at {byte_idx}: {malformed_sequence:?}",
|
|
||||||
))));
|
|
||||||
}
|
|
||||||
YAMLDecodingTrap::Call(callback) => {
|
|
||||||
let byte_idx =
|
|
||||||
total_bytes_read - ((malformed_len + bytes_after_malformed) as usize);
|
|
||||||
let malformed_sequence =
|
|
||||||
&input[byte_idx..byte_idx + malformed_len as usize];
|
|
||||||
if let ControlFlow::Break(error) = callback(
|
|
||||||
malformed_len,
|
|
||||||
bytes_after_malformed,
|
|
||||||
&input[byte_idx..],
|
|
||||||
output,
|
|
||||||
) {
|
|
||||||
if error.is_empty() {
|
|
||||||
break Err(LoadError::Decode(Cow::Owned(format!(
|
|
||||||
"Invalid character sequence at {byte_idx}: {malformed_sequence:?}",
|
|
||||||
))));
|
|
||||||
}
|
|
||||||
break Err(LoadError::Decode(error));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// The encoding crate knows how to tell apart UTF-8 from UTF-16LE and utf-16BE, when the
|
|
||||||
/// bytestream starts with BOM codepoint.
|
|
||||||
/// However, it doesn't even attempt to guess the UTF-16 endianness of the input bytestream since
|
|
||||||
/// in the general case the bytestream could start with a codepoint that uses both bytes.
|
|
||||||
///
|
|
||||||
/// The YAML-1.2 spec mandates that the first character of a YAML document is an ASCII character.
|
|
||||||
/// This allows the encoding to be deduced by the pattern of null (#x00) characters.
|
|
||||||
//
|
|
||||||
/// See spec at <https://yaml.org/spec/1.2/spec.html#id2771184>
|
|
||||||
#[cfg(feature = "encoding")]
|
|
||||||
fn detect_utf16_endianness(b: &[u8]) -> &'static Encoding {
|
|
||||||
if b.len() > 1 && (b[0] != b[1]) {
|
|
||||||
if b[0] == 0 {
|
|
||||||
return encoding_rs::UTF_16BE;
|
|
||||||
} else if b[1] == 0 {
|
|
||||||
return encoding_rs::UTF_16LE;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
encoding_rs::UTF_8
|
|
||||||
}
|
|
||||||
|
|
||||||
macro_rules! define_as (
|
macro_rules! define_as (
|
||||||
($name:ident, $t:ident, $yt:ident) => (
|
($name:ident, $t:ident, $yt:ident) => (
|
||||||
/// Get a copy of the inner object in the YAML enum if it is a `$t`.
|
/// Get a copy of the inner object in the YAML enum if it is a `$t`.
|
||||||
|
@ -623,7 +209,7 @@ impl Yaml {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg_attr(feature = "cargo-clippy", allow(clippy::should_implement_trait))]
|
#[allow(clippy::should_implement_trait)]
|
||||||
impl Yaml {
|
impl Yaml {
|
||||||
/// Convert a string to a [`Yaml`] node.
|
/// Convert a string to a [`Yaml`] node.
|
||||||
///
|
///
|
||||||
|
@ -757,96 +343,3 @@ impl Iterator for YamlIter {
|
||||||
self.yaml.next()
|
self.yaml.next()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod test {
|
|
||||||
use super::{YAMLDecodingTrap, Yaml, YamlDecoder};
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_read_bom() {
|
|
||||||
let s = b"\xef\xbb\xbf---
|
|
||||||
a: 1
|
|
||||||
b: 2.2
|
|
||||||
c: [1, 2]
|
|
||||||
";
|
|
||||||
let out = YamlDecoder::read(s as &[u8]).decode().unwrap();
|
|
||||||
let doc = &out[0];
|
|
||||||
assert_eq!(doc["a"].as_i64().unwrap(), 1i64);
|
|
||||||
assert!((doc["b"].as_f64().unwrap() - 2.2f64).abs() <= f64::EPSILON);
|
|
||||||
assert_eq!(doc["c"][1].as_i64().unwrap(), 2i64);
|
|
||||||
assert!(doc["d"][0].is_badvalue());
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_read_utf16le() {
|
|
||||||
let s = b"\xff\xfe-\x00-\x00-\x00
|
|
||||||
\x00a\x00:\x00 \x001\x00
|
|
||||||
\x00b\x00:\x00 \x002\x00.\x002\x00
|
|
||||||
\x00c\x00:\x00 \x00[\x001\x00,\x00 \x002\x00]\x00
|
|
||||||
\x00";
|
|
||||||
let out = YamlDecoder::read(s as &[u8]).decode().unwrap();
|
|
||||||
let doc = &out[0];
|
|
||||||
println!("GOT: {doc:?}");
|
|
||||||
assert_eq!(doc["a"].as_i64().unwrap(), 1i64);
|
|
||||||
assert!((doc["b"].as_f64().unwrap() - 2.2f64) <= f64::EPSILON);
|
|
||||||
assert_eq!(doc["c"][1].as_i64().unwrap(), 2i64);
|
|
||||||
assert!(doc["d"][0].is_badvalue());
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_read_utf16be() {
|
|
||||||
let s = b"\xfe\xff\x00-\x00-\x00-\x00
|
|
||||||
\x00a\x00:\x00 \x001\x00
|
|
||||||
\x00b\x00:\x00 \x002\x00.\x002\x00
|
|
||||||
\x00c\x00:\x00 \x00[\x001\x00,\x00 \x002\x00]\x00
|
|
||||||
";
|
|
||||||
let out = YamlDecoder::read(s as &[u8]).decode().unwrap();
|
|
||||||
let doc = &out[0];
|
|
||||||
println!("GOT: {doc:?}");
|
|
||||||
assert_eq!(doc["a"].as_i64().unwrap(), 1i64);
|
|
||||||
assert!((doc["b"].as_f64().unwrap() - 2.2f64).abs() <= f64::EPSILON);
|
|
||||||
assert_eq!(doc["c"][1].as_i64().unwrap(), 2i64);
|
|
||||||
assert!(doc["d"][0].is_badvalue());
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_read_utf16le_nobom() {
|
|
||||||
let s = b"-\x00-\x00-\x00
|
|
||||||
\x00a\x00:\x00 \x001\x00
|
|
||||||
\x00b\x00:\x00 \x002\x00.\x002\x00
|
|
||||||
\x00c\x00:\x00 \x00[\x001\x00,\x00 \x002\x00]\x00
|
|
||||||
\x00";
|
|
||||||
let out = YamlDecoder::read(s as &[u8]).decode().unwrap();
|
|
||||||
let doc = &out[0];
|
|
||||||
println!("GOT: {doc:?}");
|
|
||||||
assert_eq!(doc["a"].as_i64().unwrap(), 1i64);
|
|
||||||
assert!((doc["b"].as_f64().unwrap() - 2.2f64).abs() <= f64::EPSILON);
|
|
||||||
assert_eq!(doc["c"][1].as_i64().unwrap(), 2i64);
|
|
||||||
assert!(doc["d"][0].is_badvalue());
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_read_trap() {
|
|
||||||
let s = b"---
|
|
||||||
a\xa9: 1
|
|
||||||
b: 2.2
|
|
||||||
c: [1, 2]
|
|
||||||
";
|
|
||||||
let out = YamlDecoder::read(s as &[u8])
|
|
||||||
.encoding_trap(YAMLDecodingTrap::Ignore)
|
|
||||||
.decode()
|
|
||||||
.unwrap();
|
|
||||||
let doc = &out[0];
|
|
||||||
println!("GOT: {doc:?}");
|
|
||||||
assert_eq!(doc["a"].as_i64().unwrap(), 1i64);
|
|
||||||
assert!((doc["b"].as_f64().unwrap() - 2.2f64).abs() <= f64::EPSILON);
|
|
||||||
assert_eq!(doc["c"][1].as_i64().unwrap(), 2i64);
|
|
||||||
assert!(doc["d"][0].is_badvalue());
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_or() {
|
|
||||||
assert_eq!(Yaml::Null.or(Yaml::Integer(3)), Yaml::Integer(3));
|
|
||||||
assert_eq!(Yaml::Integer(3).or(Yaml::Integer(7)), Yaml::Integer(3));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
Loading…
Reference in a new issue