saphyr-serde/saphyr/src/loader.rs

228 lines
8 KiB
Rust
Raw Normal View History

2024-06-10 16:05:25 +00:00
//! The default loader.
use std::collections::BTreeMap;
use saphyr_parser::{Event, MarkedEventReceiver, Marker, Parser, ScanError, TScalarStyle, Tag};
use crate::{Hash, Yaml};
/// Main structure for quickly parsing YAML.
///
/// See [`YamlLoader::load_from_str`].
#[derive(Default)]
#[allow(clippy::module_name_repetitions)]
pub struct YamlLoader {
/// The different YAML documents that are loaded.
docs: Vec<Yaml>,
// states
// (current node, anchor_id) tuple
doc_stack: Vec<(Yaml, usize)>,
key_stack: Vec<Yaml>,
anchor_map: BTreeMap<usize, Yaml>,
}
impl MarkedEventReceiver for YamlLoader {
fn on_event(&mut self, ev: Event, _: Marker) {
// println!("EV {:?}", ev);
match ev {
Event::DocumentStart | Event::Nothing | Event::StreamStart | Event::StreamEnd => {
// do nothing
}
Event::DocumentEnd => {
match self.doc_stack.len() {
// empty document
0 => self.docs.push(Yaml::BadValue),
1 => self.docs.push(self.doc_stack.pop().unwrap().0),
_ => unreachable!(),
}
}
Event::SequenceStart(aid, _) => {
self.doc_stack.push((Yaml::Array(Vec::new()), aid));
}
Event::SequenceEnd => {
let node = self.doc_stack.pop().unwrap();
self.insert_new_node(node);
}
Event::MappingStart(aid, _) => {
self.doc_stack.push((Yaml::Hash(Hash::new()), aid));
self.key_stack.push(Yaml::BadValue);
}
Event::MappingEnd => {
self.key_stack.pop().unwrap();
let node = self.doc_stack.pop().unwrap();
self.insert_new_node(node);
}
Event::Scalar(v, style, aid, tag) => {
let node = if style != TScalarStyle::Plain {
Yaml::String(v)
} else if let Some(Tag {
ref handle,
ref suffix,
}) = tag
{
if handle == "tag:yaml.org,2002:" {
match suffix.as_ref() {
"bool" => {
// "true" or "false"
match v.parse::<bool>() {
Err(_) => Yaml::BadValue,
Ok(v) => Yaml::Boolean(v),
}
}
"int" => match v.parse::<i64>() {
Err(_) => Yaml::BadValue,
Ok(v) => Yaml::Integer(v),
},
"float" => match parse_f64(&v) {
Some(_) => Yaml::Real(v),
None => Yaml::BadValue,
},
"null" => match v.as_ref() {
"~" | "null" => Yaml::Null,
_ => Yaml::BadValue,
},
_ => Yaml::String(v),
}
} else {
Yaml::String(v)
}
} else {
// Datatype is not specified, or unrecognized
Yaml::from_str(&v)
};
self.insert_new_node((node, aid));
}
Event::Alias(id) => {
let n = match self.anchor_map.get(&id) {
Some(v) => v.clone(),
None => Yaml::BadValue,
};
self.insert_new_node((n, 0));
}
}
// println!("DOC {:?}", self.doc_stack);
}
}
/// An error that happened when loading a YAML document.
#[derive(Debug)]
pub enum LoadError {
/// An I/O error.
IO(std::io::Error),
/// An error within the scanner. This indicates a malformed YAML input.
Scan(ScanError),
/// A decoding error (e.g.: Invalid UTF-8).
Decode(std::borrow::Cow<'static, str>),
}
impl From<std::io::Error> for LoadError {
fn from(error: std::io::Error) -> Self {
LoadError::IO(error)
}
}
impl std::error::Error for LoadError {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
Some(match &self {
LoadError::IO(e) => e,
LoadError::Scan(e) => e,
LoadError::Decode(_) => return None,
})
}
}
impl std::fmt::Display for LoadError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
LoadError::IO(e) => e.fmt(f),
LoadError::Scan(e) => e.fmt(f),
LoadError::Decode(e) => e.fmt(f),
}
}
}
impl YamlLoader {
fn insert_new_node(&mut self, node: (Yaml, usize)) {
// valid anchor id starts from 1
if node.1 > 0 {
self.anchor_map.insert(node.1, node.0.clone());
}
if self.doc_stack.is_empty() {
self.doc_stack.push(node);
} else {
let parent = self.doc_stack.last_mut().unwrap();
match *parent {
(Yaml::Array(ref mut v), _) => v.push(node.0),
(Yaml::Hash(ref mut h), _) => {
let cur_key = self.key_stack.last_mut().unwrap();
// current node is a key
if cur_key.is_badvalue() {
*cur_key = node.0;
// current node is a value
} else {
let mut newkey = Yaml::BadValue;
std::mem::swap(&mut newkey, cur_key);
h.insert(newkey, node.0);
}
}
_ => unreachable!(),
}
}
}
/// Load the given string as a set of YAML documents.
///
/// The `source` is interpreted as YAML documents and is parsed. Parsing succeeds if and only
/// if all documents are parsed successfully. An error in a latter document prevents the former
/// from being returned.
/// # Errors
/// Returns `ScanError` when loading fails.
pub fn load_from_str(source: &str) -> Result<Vec<Yaml>, ScanError> {
Self::load_from_iter(source.chars())
}
/// Load the contents of the given iterator as a set of YAML documents.
///
/// The `source` is interpreted as YAML documents and is parsed. Parsing succeeds if and only
/// if all documents are parsed successfully. An error in a latter document prevents the former
/// from being returned.
/// # Errors
/// Returns `ScanError` when loading fails.
pub fn load_from_iter<I: Iterator<Item = char>>(source: I) -> Result<Vec<Yaml>, ScanError> {
let mut parser = Parser::new(source);
Self::load_from_parser(&mut parser)
}
/// Load the contents from the specified Parser as a set of YAML documents.
///
/// Parsing succeeds if and only if all documents are parsed successfully.
/// An error in a latter document prevents the former from being returned.
/// # Errors
/// Returns `ScanError` when loading fails.
pub fn load_from_parser<I: Iterator<Item = char>>(
parser: &mut Parser<I>,
) -> Result<Vec<Yaml>, ScanError> {
let mut loader = YamlLoader::default();
parser.load(&mut loader, true)?;
Ok(loader.docs)
}
/// Return a reference to the parsed Yaml documents.
#[must_use]
pub fn documents(&self) -> &[Yaml] {
&self.docs
}
}
// parse f64 as Core schema
// See: https://github.com/chyh1990/yaml-rust/issues/51
pub(crate) fn parse_f64(v: &str) -> Option<f64> {
match v {
".inf" | ".Inf" | ".INF" | "+.inf" | "+.Inf" | "+.INF" => Some(f64::INFINITY),
"-.inf" | "-.Inf" | "-.INF" => Some(f64::NEG_INFINITY),
".nan" | "NaN" | ".NAN" => Some(f64::NAN),
_ => v.parse::<f64>().ok(),
}
}