saphyr-serde/saphyr/src/yaml.rs

827 lines
28 KiB
Rust
Raw Normal View History

2024-03-20 14:50:48 +00:00
//! YAML objects manipulation utilities.
2023-08-11 23:54:46 +00:00
#![allow(clippy::module_name_repetitions)]
use std::borrow::Cow;
use std::ops::ControlFlow;
use std::{collections::BTreeMap, convert::TryFrom, mem, ops::Index, ops::IndexMut};
2024-02-08 06:12:14 +00:00
use encoding_rs::{Decoder, DecoderResult, Encoding};
2020-05-26 10:35:06 +00:00
use hashlink::LinkedHashMap;
2024-02-08 06:12:14 +00:00
2023-11-19 00:09:41 +00:00
use crate::parser::{Event, MarkedEventReceiver, Parser, Tag};
2023-12-20 23:14:08 +00:00
use crate::scanner::{Marker, ScanError, TScalarStyle};
2015-05-24 06:27:42 +00:00
2016-03-10 09:55:21 +00:00
/// A YAML node is stored as this `Yaml` enumeration, which provides an easy way to
2015-05-31 09:59:43 +00:00
/// access your YAML document.
2015-06-29 16:31:22 +00:00
///
2015-05-31 09:59:43 +00:00
/// # Examples
2015-06-29 16:31:22 +00:00
///
2015-05-31 09:59:43 +00:00
/// ```
2024-02-08 06:12:14 +00:00
/// use yaml_rust2::Yaml;
2015-05-31 09:59:43 +00:00
/// let foo = Yaml::from_str("-123"); // convert the string to the appropriate YAML type
/// assert_eq!(foo.as_i64().unwrap(), -123);
2015-06-29 16:31:22 +00:00
///
2016-03-10 09:55:21 +00:00
/// // iterate over an Array
2015-05-31 09:59:43 +00:00
/// let vec = Yaml::Array(vec![Yaml::Integer(1), Yaml::Integer(2)]);
/// for v in vec.as_vec().unwrap() {
/// assert!(v.as_i64().is_some());
/// }
/// ```
#[derive(Clone, PartialEq, PartialOrd, Debug, Eq, Ord, Hash)]
2015-05-24 06:27:42 +00:00
pub enum Yaml {
2016-03-10 09:55:21 +00:00
/// Float types are stored as String and parsed on demand.
/// Note that `f64` does NOT implement Eq trait and can NOT be stored in `BTreeMap`.
Real(String),
2016-03-10 09:55:21 +00:00
/// YAML int is stored as i64.
2015-05-30 14:39:50 +00:00
Integer(i64),
2016-03-10 09:55:21 +00:00
/// YAML scalar.
String(String),
2016-03-10 09:55:21 +00:00
/// YAML bool, e.g. `true` or `false`.
2015-05-24 06:27:42 +00:00
Boolean(bool),
2016-03-10 09:55:21 +00:00
/// YAML array, can be accessed as a `Vec`.
Array(Array),
2017-01-28 04:50:52 +00:00
/// YAML hash, can be accessed as a `LinkedHashMap`.
///
/// Insertion order will match the order of insertion into the map.
Hash(Hash),
2015-05-31 09:59:43 +00:00
/// Alias, not fully supported yet.
2015-05-28 17:56:03 +00:00
Alias(usize),
2016-03-10 09:55:21 +00:00
/// YAML null, e.g. `null` or `~`.
2015-05-24 06:27:42 +00:00
Null,
2016-03-10 09:55:21 +00:00
/// Accessing a nonexistent node via the Index trait returns `BadValue`. This
/// simplifies error handling in the calling code. Invalid type conversion also
/// returns `BadValue`.
2015-05-24 18:16:28 +00:00
BadValue,
2015-05-24 06:27:42 +00:00
}
2024-03-20 14:50:48 +00:00
/// The type contained in the `Yaml::Array` variant. This corresponds to YAML sequences.
2015-05-24 06:27:42 +00:00
pub type Array = Vec<Yaml>;
2024-03-20 14:50:48 +00:00
/// The type contained in the `Yaml::Hash` variant. This corresponds to YAML mappings.
2017-01-28 04:50:52 +00:00
pub type Hash = LinkedHashMap<Yaml, Yaml>;
2015-05-24 06:27:42 +00:00
2017-05-13 13:22:19 +00:00
// parse f64 as Core schema
// See: https://github.com/chyh1990/yaml-rust/issues/51
fn parse_f64(v: &str) -> Option<f64> {
match v {
".inf" | ".Inf" | ".INF" | "+.inf" | "+.Inf" | "+.INF" => Some(f64::INFINITY),
"-.inf" | "-.Inf" | "-.INF" => Some(f64::NEG_INFINITY),
".nan" | "NaN" | ".NAN" => Some(f64::NAN),
2018-09-15 16:49:04 +00:00
_ => v.parse::<f64>().ok(),
2017-05-13 13:22:19 +00:00
}
}
/// Main structure for quickly parsing YAML.
///
/// See [`YamlLoader::load_from_str`].
#[derive(Default)]
2015-05-26 18:50:51 +00:00
pub struct YamlLoader {
2024-03-20 14:50:48 +00:00
/// The different YAML documents that are loaded.
2015-05-26 18:50:51 +00:00
docs: Vec<Yaml>,
// states
2015-12-16 07:10:02 +00:00
// (current node, anchor_id) tuple
doc_stack: Vec<(Yaml, usize)>,
2015-05-26 18:50:51 +00:00
key_stack: Vec<Yaml>,
2015-12-16 07:10:02 +00:00
anchor_map: BTreeMap<usize, Yaml>,
2015-05-26 18:50:51 +00:00
}
impl MarkedEventReceiver for YamlLoader {
fn on_event(&mut self, ev: Event, _: Marker) {
2015-05-30 14:39:50 +00:00
// println!("EV {:?}", ev);
match ev {
2024-03-17 09:18:39 +00:00
Event::DocumentStart | Event::Nothing | Event::StreamStart | Event::StreamEnd => {
2015-05-26 18:50:51 +00:00
// do nothing
2018-09-15 16:49:04 +00:00
}
2015-05-26 18:50:51 +00:00
Event::DocumentEnd => {
match self.doc_stack.len() {
// empty document
0 => self.docs.push(Yaml::BadValue),
2015-12-16 07:10:02 +00:00
1 => self.docs.push(self.doc_stack.pop().unwrap().0),
2018-09-15 16:49:04 +00:00
_ => unreachable!(),
2015-05-26 18:50:51 +00:00
}
2018-09-15 16:49:04 +00:00
}
2023-11-19 13:40:01 +00:00
Event::SequenceStart(aid, _) => {
2015-12-16 07:10:02 +00:00
self.doc_stack.push((Yaml::Array(Vec::new()), aid));
2018-09-15 16:49:04 +00:00
}
2015-05-26 18:50:51 +00:00
Event::SequenceEnd => {
let node = self.doc_stack.pop().unwrap();
self.insert_new_node(node);
2018-09-15 16:49:04 +00:00
}
2023-11-19 00:09:41 +00:00
Event::MappingStart(aid, _) => {
2015-12-16 07:10:02 +00:00
self.doc_stack.push((Yaml::Hash(Hash::new()), aid));
2015-05-26 18:50:51 +00:00
self.key_stack.push(Yaml::BadValue);
2018-09-15 16:49:04 +00:00
}
2015-05-26 18:50:51 +00:00
Event::MappingEnd => {
self.key_stack.pop().unwrap();
let node = self.doc_stack.pop().unwrap();
self.insert_new_node(node);
2018-09-15 16:49:04 +00:00
}
Event::Scalar(v, style, aid, tag) => {
2015-05-26 18:50:51 +00:00
let node = if style != TScalarStyle::Plain {
Yaml::String(v)
2023-11-19 13:40:01 +00:00
} else if let Some(Tag {
ref handle,
ref suffix,
}) = tag
{
2023-11-19 15:00:19 +00:00
if handle == "tag:yaml.org,2002:" {
match suffix.as_ref() {
"bool" => {
// "true" or "false"
match v.parse::<bool>() {
Err(_) => Yaml::BadValue,
2018-09-15 16:49:04 +00:00
Ok(v) => Yaml::Boolean(v),
}
2018-09-15 16:49:04 +00:00
}
"int" => match v.parse::<i64>() {
Err(_) => Yaml::BadValue,
Ok(v) => Yaml::Integer(v),
},
2018-09-15 16:49:04 +00:00
"float" => match parse_f64(&v) {
Some(_) => Yaml::Real(v),
None => Yaml::BadValue,
},
2018-09-15 16:49:04 +00:00
"null" => match v.as_ref() {
"~" | "null" => Yaml::Null,
_ => Yaml::BadValue,
},
2018-09-15 16:49:04 +00:00
_ => Yaml::String(v),
}
} else {
Yaml::String(v)
2015-05-26 18:50:51 +00:00
}
} else {
// Datatype is not specified, or unrecognized
Yaml::from_str(&v)
2015-05-26 18:50:51 +00:00
};
2015-12-16 07:10:02 +00:00
self.insert_new_node((node, aid));
2018-09-15 16:49:04 +00:00
}
2015-05-28 17:56:03 +00:00
Event::Alias(id) => {
2015-12-16 07:10:02 +00:00
let n = match self.anchor_map.get(&id) {
Some(v) => v.clone(),
None => Yaml::BadValue,
};
self.insert_new_node((n, 0));
2015-05-28 17:56:03 +00:00
}
2015-05-26 18:50:51 +00:00
}
// println!("DOC {:?}", self.doc_stack);
}
}
2024-03-20 14:50:48 +00:00
/// An error that happened when loading a YAML document.
#[derive(Debug)]
pub enum LoadError {
2024-03-20 14:50:48 +00:00
/// An I/O error.
IO(std::io::Error),
2024-03-20 14:50:48 +00:00
/// An error within the scanner. This indicates a malformed YAML input.
Scan(ScanError),
2024-03-20 14:50:48 +00:00
/// A decoding error (e.g.: Invalid UTF_8).
Decode(std::borrow::Cow<'static, str>),
}
impl From<std::io::Error> for LoadError {
fn from(error: std::io::Error) -> Self {
LoadError::IO(error)
}
}
2015-05-26 18:50:51 +00:00
impl YamlLoader {
2015-12-16 07:10:02 +00:00
fn insert_new_node(&mut self, node: (Yaml, usize)) {
// valid anchor id starts from 1
if node.1 > 0 {
self.anchor_map.insert(node.1, node.0.clone());
}
2016-03-10 12:49:02 +00:00
if self.doc_stack.is_empty() {
self.doc_stack.push(node);
} else {
2015-05-26 18:50:51 +00:00
let parent = self.doc_stack.last_mut().unwrap();
match *parent {
2015-12-16 07:10:02 +00:00
(Yaml::Array(ref mut v), _) => v.push(node.0),
(Yaml::Hash(ref mut h), _) => {
2018-01-02 06:57:27 +00:00
let cur_key = self.key_stack.last_mut().unwrap();
2015-05-26 18:50:51 +00:00
// current node is a key
if cur_key.is_badvalue() {
2015-12-16 07:10:02 +00:00
*cur_key = node.0;
2015-05-26 18:50:51 +00:00
// current node is a value
} else {
let mut newkey = Yaml::BadValue;
mem::swap(&mut newkey, cur_key);
2015-12-16 07:10:02 +00:00
h.insert(newkey, node.0);
2015-05-26 18:50:51 +00:00
}
2018-09-15 16:49:04 +00:00
}
2015-05-26 18:50:51 +00:00
_ => unreachable!(),
}
}
}
/// Load the given string as a set of YAML documents.
///
/// The `source` is interpreted as YAML documents and is parsed. Parsing succeeds if and only
/// if all documents are parsed successfully. An error in a latter document prevents the former
/// from being returned.
/// # Errors
/// Returns `ScanError` when loading fails.
2018-09-15 16:49:04 +00:00
pub fn load_from_str(source: &str) -> Result<Vec<Yaml>, ScanError> {
Self::load_from_iter(source.chars())
}
/// Load the contents of the given iterator as a set of YAML documents.
///
/// The `source` is interpreted as YAML documents and is parsed. Parsing succeeds if and only
/// if all documents are parsed successfully. An error in a latter document prevents the former
/// from being returned.
/// # Errors
/// Returns `ScanError` when loading fails.
pub fn load_from_iter<I: Iterator<Item = char>>(source: I) -> Result<Vec<Yaml>, ScanError> {
let mut parser = Parser::new(source);
Self::load_from_parser(&mut parser)
}
/// Load the contents from the specified Parser as a set of YAML documents.
///
/// Parsing succeeds if and only if all documents are parsed successfully.
/// An error in a latter document prevents the former from being returned.
/// # Errors
/// Returns `ScanError` when loading fails.
pub fn load_from_parser<I: Iterator<Item = char>>(
parser: &mut Parser<I>,
) -> Result<Vec<Yaml>, ScanError> {
let mut loader = YamlLoader::default();
2018-09-16 06:58:48 +00:00
parser.load(&mut loader, true)?;
2015-05-26 18:50:51 +00:00
Ok(loader.docs)
}
/// Return a reference to the parsed Yaml documents.
#[must_use]
pub fn documents(&self) -> &[Yaml] {
&self.docs
}
}
/// The signature of the function to call when using [`YAMLDecodingTrap::Call`].
///
/// The arguments are as follows:
/// * `malformation_length`: The length of the sequence the decoder failed to decode.
/// * `bytes_read_after_malformation`: The number of lookahead bytes the decoder consumed after
/// the malformation.
/// * `input_at_malformation`: What the input buffer is at the malformation.
/// This is the buffer starting at the malformation. The first `malformation_length` bytes are
/// the problematic sequence. The following `bytes_read_after_malformation` are already stored
/// in the decoder and will not be re-fed.
/// * `output`: The output string.
///
/// The function must modify `output` as it feels is best. For instance, one could recreate the
/// behavior of [`YAMLDecodingTrap::Ignore`] with an empty function, [`YAMLDecodingTrap::Replace`]
/// by pushing a `\u{FFFD}` into `output` and [`YAMLDecodingTrap::Strict`] by returning
/// [`ControlFlow::Break`].
///
/// # Returns
/// The function must return [`ControlFlow::Continue`] if decoding may continue or
/// [`ControlFlow::Break`] if decoding must be aborted. An optional error string may be supplied.
pub type YAMLDecodingTrapFn = fn(
malformation_length: u8,
bytes_read_after_malformation: u8,
input_at_malformation: &[u8],
output: &mut String,
) -> ControlFlow<Cow<'static, str>>;
/// The behavior [`YamlDecoder`] must have when an decoding error occurs.
#[derive(Copy, Clone, PartialEq, Eq)]
pub enum YAMLDecodingTrap {
/// Ignore the offending bytes, remove them from the output.
Ignore,
/// Error out.
Strict,
/// Replace them with the Unicode REPLACEMENT CHARACTER.
Replace,
/// Call the user-supplied function upon decoding malformation.
Call(YAMLDecodingTrapFn),
}
2024-03-19 17:18:59 +00:00
/// `YamlDecoder` is a `YamlLoader` builder that allows you to supply your own encoding error trap.
/// For example, to read a YAML file while ignoring Unicode decoding errors you can set the
/// `encoding_trap` to `encoding::DecoderTrap::Ignore`.
/// ```rust
/// use yaml_rust2::yaml::{YamlDecoder, YAMLDecodingTrap};
///
/// let string = b"---
/// a\xa9: 1
/// b: 2.2
/// c: [1, 2]
/// ";
/// let out = YamlDecoder::read(string as &[u8])
/// .encoding_trap(YAMLDecodingTrap::Ignore)
/// .decode()
/// .unwrap();
/// ```
pub struct YamlDecoder<T: std::io::Read> {
source: T,
trap: YAMLDecodingTrap,
}
impl<T: std::io::Read> YamlDecoder<T> {
2024-03-20 14:50:48 +00:00
/// Create a `YamlDecoder` decoding the given source.
pub fn read(source: T) -> YamlDecoder<T> {
YamlDecoder {
source,
trap: YAMLDecodingTrap::Strict,
}
}
2024-03-20 14:50:48 +00:00
/// Set the behavior of the decoder when the encoding is invalid.
pub fn encoding_trap(&mut self, trap: YAMLDecodingTrap) -> &mut Self {
self.trap = trap;
self
}
2024-03-20 14:50:48 +00:00
/// Run the decode operation with the source and trap the `YamlDecoder` was built with.
///
/// # Errors
/// Returns `LoadError` when decoding fails.
pub fn decode(&mut self) -> Result<Vec<Yaml>, LoadError> {
let mut buffer = Vec::new();
self.source.read_to_end(&mut buffer)?;
// Check if the `encoding` library can detect encoding from the BOM, otherwise use
// `detect_utf16_endianness`.
let (encoding, _) =
Encoding::for_bom(&buffer).unwrap_or_else(|| (detect_utf16_endianness(&buffer), 2));
let mut decoder = encoding.new_decoder();
let mut output = String::new();
// Decode the input buffer.
decode_loop(&buffer, &mut output, &mut decoder, self.trap)?;
YamlLoader::load_from_str(&output).map_err(LoadError::Scan)
}
}
/// Perform a loop of [`Decoder::decode_to_string`], reallocating `output` if needed.
fn decode_loop(
input: &[u8],
output: &mut String,
decoder: &mut Decoder,
trap: YAMLDecodingTrap,
) -> Result<(), LoadError> {
output.reserve(input.len());
let mut total_bytes_read = 0;
loop {
match decoder.decode_to_string_without_replacement(&input[total_bytes_read..], output, true)
{
// If the input is empty, we processed the whole input.
(DecoderResult::InputEmpty, _) => break Ok(()),
// If the output is full, we must reallocate.
(DecoderResult::OutputFull, bytes_read) => {
total_bytes_read += bytes_read;
// The output is already reserved to the size of the input. We slowly resize. Here,
// we're expecting that 10% of bytes will double in size when converting to UTF-8.
output.reserve(input.len() / 10);
}
(DecoderResult::Malformed(malformed_len, bytes_after_malformed), bytes_read) => {
total_bytes_read += bytes_read;
match trap {
// Ignore (skip over) malformed character.
YAMLDecodingTrap::Ignore => {}
// Replace them with the Unicode REPLACEMENT CHARACTER.
YAMLDecodingTrap::Replace => {
output.push('\u{FFFD}');
}
// Otherwise error, getting as much context as possible.
YAMLDecodingTrap::Strict => {
let malformed_len = malformed_len as usize;
let bytes_after_malformed = bytes_after_malformed as usize;
let byte_idx = total_bytes_read - (malformed_len + bytes_after_malformed);
let malformed_sequence = &input[byte_idx..byte_idx + malformed_len];
break Err(LoadError::Decode(Cow::Owned(format!(
"Invalid character sequence at {byte_idx}: {malformed_sequence:?}",
))));
}
YAMLDecodingTrap::Call(callback) => {
let byte_idx =
total_bytes_read - ((malformed_len + bytes_after_malformed) as usize);
let malformed_sequence =
&input[byte_idx..byte_idx + malformed_len as usize];
if let ControlFlow::Break(error) = callback(
malformed_len,
bytes_after_malformed,
&input[byte_idx..],
output,
) {
if error.is_empty() {
break Err(LoadError::Decode(Cow::Owned(format!(
"Invalid character sequence at {byte_idx}: {malformed_sequence:?}",
))));
}
break Err(LoadError::Decode(error));
}
}
}
}
}
}
}
/// The encoding crate knows how to tell apart UTF-8 from UTF-16LE and utf-16BE, when the
/// bytestream starts with BOM codepoint.
/// However, it doesn't even attempt to guess the UTF-16 endianness of the input bytestream since
/// in the general case the bytestream could start with a codepoint that uses both bytes.
///
/// The YAML-1.2 spec mandates that the first character of a YAML document is an ASCII character.
/// This allows the encoding to be deduced by the pattern of null (#x00) characters.
//
/// See spec at <https://yaml.org/spec/1.2/spec.html#id2771184>
fn detect_utf16_endianness(b: &[u8]) -> &'static Encoding {
if b.len() > 1 && (b[0] != b[1]) {
if b[0] == 0 {
return encoding_rs::UTF_16BE;
} else if b[1] == 0 {
return encoding_rs::UTF_16LE;
}
}
encoding_rs::UTF_8
2015-05-26 18:50:51 +00:00
}
2015-05-24 17:34:18 +00:00
macro_rules! define_as (
($name:ident, $t:ident, $yt:ident) => (
2024-03-20 14:50:48 +00:00
/// Get a copy of the inner object in the YAML enum if it is a `$t`.
///
/// # Return
/// If the variant of `self` is `Yaml::$yt`, return `Some($t)` with a copy of the `$t` contained.
/// Otherwise, return `None`.
2023-08-11 23:54:46 +00:00
#[must_use]
2015-05-24 17:34:18 +00:00
pub fn $name(&self) -> Option<$t> {
match *self {
Yaml::$yt(v) => Some(v),
_ => None
}
}
);
);
macro_rules! define_as_ref (
($name:ident, $t:ty, $yt:ident) => (
2024-03-20 14:50:48 +00:00
/// Get a reference to the inner object in the YAML enum if it is a `$t`.
///
/// # Return
/// If the variant of `self` is `Yaml::$yt`, return `Some(&$t)` with the `$t` contained. Otherwise,
/// return `None`.
2023-08-11 23:54:46 +00:00
#[must_use]
2015-05-24 17:34:18 +00:00
pub fn $name(&self) -> Option<$t> {
match *self {
Yaml::$yt(ref v) => Some(v),
_ => None
}
}
);
);
macro_rules! define_as_mut_ref (
($name:ident, $t:ty, $yt:ident) => (
/// Get a mutable reference to the inner object in the YAML enum if it is a `$t`.
///
/// # Return
/// If the variant of `self` is `Yaml::$yt`, return `Some(&mut $t)` with the `$t` contained.
/// Otherwise, return `None`.
#[must_use]
pub fn $name(&mut self) -> Option<$t> {
match *self {
Yaml::$yt(ref mut v) => Some(v),
_ => None
}
}
);
);
macro_rules! define_into (
($name:ident, $t:ty, $yt:ident) => (
2024-03-20 14:50:48 +00:00
/// Get the inner object in the YAML enum if it is a `$t`.
///
/// # Return
/// If the variant of `self` is `Yaml::$yt`, return `Some($t)` with the `$t` contained. Otherwise,
/// return `None`.
2023-08-11 23:54:46 +00:00
#[must_use]
pub fn $name(self) -> Option<$t> {
match self {
Yaml::$yt(v) => Some(v),
_ => None
}
}
);
2016-08-08 21:52:24 +00:00
);
2015-05-24 17:34:18 +00:00
impl Yaml {
define_as!(as_bool, bool, Boolean);
2015-05-30 14:39:50 +00:00
define_as!(as_i64, i64, Integer);
2015-05-24 17:34:18 +00:00
define_as_ref!(as_str, &str, String);
define_as_ref!(as_hash, &Hash, Hash);
define_as_ref!(as_vec, &Array, Array);
define_as_mut_ref!(as_mut_hash, &mut Hash, Hash);
define_as_mut_ref!(as_mut_vec, &mut Array, Array);
define_into!(into_bool, bool, Boolean);
2016-08-08 21:52:24 +00:00
define_into!(into_i64, i64, Integer);
define_into!(into_string, String, String);
define_into!(into_hash, Hash, Hash);
define_into!(into_vec, Array, Array);
/// Return whether `self` is a [`Yaml::Null`] node.
2023-08-11 23:54:46 +00:00
#[must_use]
2015-05-24 17:34:18 +00:00
pub fn is_null(&self) -> bool {
2023-08-11 23:54:46 +00:00
matches!(*self, Yaml::Null)
2015-05-24 17:34:18 +00:00
}
/// Return whether `self` is a [`Yaml::BadValue`] node.
2023-08-11 23:54:46 +00:00
#[must_use]
2015-05-24 18:16:28 +00:00
pub fn is_badvalue(&self) -> bool {
2023-08-11 23:54:46 +00:00
matches!(*self, Yaml::BadValue)
2015-05-24 18:16:28 +00:00
}
/// Return whether `self` is a [`Yaml::Array`] node.
2023-08-11 23:54:46 +00:00
#[must_use]
pub fn is_array(&self) -> bool {
2023-08-11 23:54:46 +00:00
matches!(*self, Yaml::Array(_))
}
/// Return the `f64` value contained in this YAML node.
///
/// If the node is not a [`Yaml::Real`] YAML node or its contents is not a valid `f64` string,
/// `None` is returned.
2023-08-11 23:54:46 +00:00
#[must_use]
2015-05-30 14:39:50 +00:00
pub fn as_f64(&self) -> Option<f64> {
2023-08-11 23:54:46 +00:00
if let Yaml::Real(ref v) = self {
parse_f64(v)
} else {
None
2015-05-24 17:34:18 +00:00
}
}
/// Return the `f64` value contained in this YAML node.
///
/// If the node is not a [`Yaml::Real`] YAML node or its contents is not a valid `f64` string,
/// `None` is returned.
2023-08-11 23:54:46 +00:00
#[must_use]
pub fn into_f64(self) -> Option<f64> {
self.as_f64()
}
/// If a value is null or otherwise bad (see variants), consume it and
/// replace it with a given value `other`. Otherwise, return self unchanged.
///
/// ```
/// use yaml_rust2::yaml::Yaml;
///
/// assert_eq!(Yaml::BadValue.or(Yaml::Integer(3)), Yaml::Integer(3));
/// assert_eq!(Yaml::Integer(3).or(Yaml::BadValue), Yaml::Integer(3));
/// ```
#[must_use]
pub fn or(self, other: Self) -> Self {
match self {
Yaml::BadValue | Yaml::Null => other,
this => this,
}
}
/// See `or` for behavior. This performs the same operations, but with
/// borrowed values for less linear pipelines.
#[must_use]
pub fn borrowed_or<'a>(&'a self, other: &'a Self) -> &'a Self {
match self {
Yaml::BadValue | Yaml::Null => other,
this => this,
}
}
}
2015-05-24 18:16:28 +00:00
2023-08-11 23:54:46 +00:00
#[cfg_attr(feature = "cargo-clippy", allow(clippy::should_implement_trait))]
impl Yaml {
/// Convert a string to a [`Yaml`] node.
///
/// [`Yaml`] does not implement [`std::str::FromStr`] since conversion may not fail. This
/// function falls back to [`Yaml::String`] if nothing else matches.
///
/// # Examples
/// ```
2024-02-08 06:12:14 +00:00
/// # use yaml_rust2::yaml::Yaml;
/// assert!(matches!(Yaml::from_str("42"), Yaml::Integer(42)));
/// assert!(matches!(Yaml::from_str("0x2A"), Yaml::Integer(42)));
/// assert!(matches!(Yaml::from_str("0o52"), Yaml::Integer(42)));
/// assert!(matches!(Yaml::from_str("~"), Yaml::Null));
/// assert!(matches!(Yaml::from_str("null"), Yaml::Null));
/// assert!(matches!(Yaml::from_str("true"), Yaml::Boolean(true)));
/// assert!(matches!(Yaml::from_str("3.14"), Yaml::Real(_)));
/// assert!(matches!(Yaml::from_str("foo"), Yaml::String(_)));
/// ```
2023-08-11 23:54:46 +00:00
#[must_use]
2015-05-30 14:39:50 +00:00
pub fn from_str(v: &str) -> Yaml {
2023-08-11 23:54:46 +00:00
if let Some(number) = v.strip_prefix("0x") {
if let Ok(i) = i64::from_str_radix(number, 16) {
2020-06-01 12:34:13 +00:00
return Yaml::Integer(i);
2016-02-07 21:52:20 +00:00
}
2023-08-11 23:54:46 +00:00
} else if let Some(number) = v.strip_prefix("0o") {
if let Ok(i) = i64::from_str_radix(number, 8) {
2020-06-01 12:34:13 +00:00
return Yaml::Integer(i);
2016-02-07 21:52:20 +00:00
}
2023-08-11 23:54:46 +00:00
} else if let Some(number) = v.strip_prefix('+') {
if let Ok(i) = number.parse::<i64>() {
2020-06-01 12:34:13 +00:00
return Yaml::Integer(i);
}
2016-02-07 22:21:05 +00:00
}
2015-05-30 14:39:50 +00:00
match v {
"~" | "null" => Yaml::Null,
"true" => Yaml::Boolean(true),
"false" => Yaml::Boolean(false),
2023-08-11 23:54:46 +00:00
_ => {
if let Ok(integer) = v.parse::<i64>() {
Yaml::Integer(integer)
} else if parse_f64(v).is_some() {
Yaml::Real(v.to_owned())
} else {
Yaml::String(v.to_owned())
}
}
2015-05-30 14:39:50 +00:00
}
2015-05-24 18:16:28 +00:00
}
2015-05-24 17:34:18 +00:00
}
2015-05-24 18:16:28 +00:00
static BAD_VALUE: Yaml = Yaml::BadValue;
impl<'a> Index<&'a str> for Yaml {
type Output = Yaml;
fn index(&self, idx: &'a str) -> &Yaml {
let key = Yaml::String(idx.to_owned());
2015-05-24 18:16:28 +00:00
match self.as_hash() {
Some(h) => h.get(&key).unwrap_or(&BAD_VALUE),
2018-09-15 16:49:04 +00:00
None => &BAD_VALUE,
2015-05-24 18:16:28 +00:00
}
}
}
impl<'a> IndexMut<&'a str> for Yaml {
fn index_mut(&mut self, idx: &'a str) -> &mut Yaml {
let key = Yaml::String(idx.to_owned());
match self.as_mut_hash() {
Some(h) => h.get_mut(&key).unwrap(),
None => panic!("Not a hash type"),
}
}
}
2015-05-24 18:16:28 +00:00
impl Index<usize> for Yaml {
type Output = Yaml;
fn index(&self, idx: usize) -> &Yaml {
if let Some(v) = self.as_vec() {
v.get(idx).unwrap_or(&BAD_VALUE)
} else if let Some(v) = self.as_hash() {
2024-02-08 06:12:14 +00:00
let key = Yaml::Integer(i64::try_from(idx).unwrap());
v.get(&key).unwrap_or(&BAD_VALUE)
} else {
&BAD_VALUE
2015-05-24 18:16:28 +00:00
}
}
}
impl IndexMut<usize> for Yaml {
/// Perform indexing if `self` is a sequence or a mapping.
///
/// # Panics
/// This function panics if the index given is out of range (as per [`IndexMut`]). If `self` i
/// a [`Yaml::Array`], this is when the index is bigger or equal to the length of the
/// underlying `Vec`. If `self` is a [`Yaml::Hash`], this is when the mapping sequence does no
/// contain [`Yaml::Integer`]`(idx)` as a key.
///
/// This function also panics if `self` is not a [`Yaml::Array`] nor a [`Yaml::Hash`].
fn index_mut(&mut self, idx: usize) -> &mut Yaml {
match self {
Yaml::Array(sequence) => sequence.index_mut(idx),
Yaml::Hash(mapping) => {
let key = Yaml::Integer(i64::try_from(idx).unwrap());
mapping.get_mut(&key).unwrap()
}
_ => panic!("Attempting to index but `self` is not a sequence nor a mapping"),
}
}
}
2016-08-08 21:31:36 +00:00
impl IntoIterator for Yaml {
type Item = Yaml;
type IntoIter = YamlIter;
fn into_iter(self) -> Self::IntoIter {
2016-09-22 08:54:51 +00:00
YamlIter {
2023-08-11 23:54:46 +00:00
yaml: self.into_vec().unwrap_or_default().into_iter(),
2016-09-22 08:54:51 +00:00
}
2016-08-08 21:31:36 +00:00
}
}
/// An iterator over a [`Yaml`] node.
2016-08-08 21:31:36 +00:00
pub struct YamlIter {
yaml: std::vec::IntoIter<Yaml>,
2016-08-08 21:31:36 +00:00
}
impl Iterator for YamlIter {
type Item = Yaml;
fn next(&mut self) -> Option<Yaml> {
2016-08-08 22:21:57 +00:00
self.yaml.next()
2016-08-08 21:31:36 +00:00
}
}
#[cfg(test)]
mod test {
use super::{YAMLDecodingTrap, Yaml, YamlDecoder};
#[test]
fn test_read_bom() {
let s = b"\xef\xbb\xbf---
a: 1
b: 2.2
c: [1, 2]
";
let out = YamlDecoder::read(s as &[u8]).decode().unwrap();
let doc = &out[0];
assert_eq!(doc["a"].as_i64().unwrap(), 1i64);
assert!((doc["b"].as_f64().unwrap() - 2.2f64).abs() <= f64::EPSILON);
assert_eq!(doc["c"][1].as_i64().unwrap(), 2i64);
assert!(doc["d"][0].is_badvalue());
}
#[test]
fn test_read_utf16le() {
let s = b"\xff\xfe-\x00-\x00-\x00
\x00a\x00:\x00 \x001\x00
\x00b\x00:\x00 \x002\x00.\x002\x00
\x00c\x00:\x00 \x00[\x001\x00,\x00 \x002\x00]\x00
\x00";
let out = YamlDecoder::read(s as &[u8]).decode().unwrap();
let doc = &out[0];
println!("GOT: {doc:?}");
assert_eq!(doc["a"].as_i64().unwrap(), 1i64);
assert!((doc["b"].as_f64().unwrap() - 2.2f64) <= f64::EPSILON);
assert_eq!(doc["c"][1].as_i64().unwrap(), 2i64);
assert!(doc["d"][0].is_badvalue());
}
#[test]
fn test_read_utf16be() {
let s = b"\xfe\xff\x00-\x00-\x00-\x00
\x00a\x00:\x00 \x001\x00
\x00b\x00:\x00 \x002\x00.\x002\x00
\x00c\x00:\x00 \x00[\x001\x00,\x00 \x002\x00]\x00
";
let out = YamlDecoder::read(s as &[u8]).decode().unwrap();
let doc = &out[0];
println!("GOT: {doc:?}");
assert_eq!(doc["a"].as_i64().unwrap(), 1i64);
assert!((doc["b"].as_f64().unwrap() - 2.2f64).abs() <= f64::EPSILON);
assert_eq!(doc["c"][1].as_i64().unwrap(), 2i64);
assert!(doc["d"][0].is_badvalue());
}
#[test]
fn test_read_utf16le_nobom() {
let s = b"-\x00-\x00-\x00
\x00a\x00:\x00 \x001\x00
\x00b\x00:\x00 \x002\x00.\x002\x00
\x00c\x00:\x00 \x00[\x001\x00,\x00 \x002\x00]\x00
\x00";
let out = YamlDecoder::read(s as &[u8]).decode().unwrap();
let doc = &out[0];
println!("GOT: {doc:?}");
assert_eq!(doc["a"].as_i64().unwrap(), 1i64);
assert!((doc["b"].as_f64().unwrap() - 2.2f64).abs() <= f64::EPSILON);
assert_eq!(doc["c"][1].as_i64().unwrap(), 2i64);
assert!(doc["d"][0].is_badvalue());
}
#[test]
fn test_read_trap() {
let s = b"---
a\xa9: 1
b: 2.2
c: [1, 2]
";
let out = YamlDecoder::read(s as &[u8])
.encoding_trap(YAMLDecodingTrap::Ignore)
.decode()
.unwrap();
let doc = &out[0];
println!("GOT: {doc:?}");
assert_eq!(doc["a"].as_i64().unwrap(), 1i64);
assert!((doc["b"].as_f64().unwrap() - 2.2f64).abs() <= f64::EPSILON);
assert_eq!(doc["c"][1].as_i64().unwrap(), 2i64);
assert!(doc["d"][0].is_badvalue());
}
#[test]
fn test_or() {
assert_eq!(Yaml::Null.or(Yaml::Integer(3)), Yaml::Integer(3));
assert_eq!(Yaml::Integer(3).or(Yaml::Integer(7)), Yaml::Integer(3));
}
}