Prepare the ground for annotated parsing.

* Make `YamlLoader` generic on the type of the `Node`. This is required
   because deeper node need to have annotations too.
 * Add a `LoadableYamlNode` trait, required for YAML node types to be
   loaded by `YamlLoader`. It contains methods required by `YamlLoader`
   during loading.
 * Implement `LoadableYamlNode` for `Yaml`.
 * Take `load_from_str` out of `YamlLoader` for parsing non-annotated
   nodes. This avoids every user to specify the generics in
   `YamlLoader::<Yaml>::load_from_str`.
This commit is contained in:
Ethiraric 2024-06-13 18:30:03 +02:00
parent 425f00ceb8
commit d2caaf2ab3
11 changed files with 218 additions and 131 deletions

View file

@ -2,7 +2,14 @@
## Upcoming
**Features**
**Breaking Changes**:
- Move `load_from_*` methods out of the `YamlLoader`. Now, `YamlLoader` gained
a generic parameter. Moving those functions out of it spares having to
manually specify the generic in `YamlLoader::<Yaml>::load_from_str`.
Manipulating the `YamlLoader` directly was not common.
**Features**:
- ([#19](https://github.com/Ethiraric/yaml-rust2/pull/19)) `Yaml` now
implements `IndexMut<usize>` and `IndexMut<&'a str>`. These functions may not

View file

@ -1,4 +1,4 @@
use saphyr::{Yaml, YamlLoader};
use saphyr::{load_from_str, Yaml};
use std::env;
use std::fs::File;
use std::io::prelude::*;
@ -36,7 +36,7 @@ fn main() {
let mut s = String::new();
f.read_to_string(&mut s).unwrap();
let docs = YamlLoader::load_from_str(&s).unwrap();
let docs = load_from_str(&s).unwrap();
for doc in &docs {
println!("---");
dump_node(doc, 0);

View file

@ -36,9 +36,9 @@ impl From<fmt::Error> for EmitError {
/// The YAML serializer.
///
/// ```
/// # use saphyr::{YamlLoader, YamlEmitter};
/// # use saphyr::{load_from_str, YamlEmitter};
/// let input_string = "a: b\nc: d";
/// let yaml = YamlLoader::load_from_str(input_string).unwrap();
/// let yaml = load_from_str(input_string).unwrap();
///
/// let mut output = String::new();
/// YamlEmitter::new(&mut output).dump(&yaml[0]).unwrap();
@ -159,10 +159,10 @@ impl<'a> YamlEmitter<'a> {
/// # Examples
///
/// ```rust
/// use saphyr::{Yaml, YamlEmitter, YamlLoader};
/// use saphyr::{Yaml, YamlEmitter, load_from_str};
///
/// let input = r#"{foo: "bar!\nbar!", baz: 42}"#;
/// let parsed = YamlLoader::load_from_str(input).unwrap();
/// let parsed = load_from_str(input).unwrap();
/// eprintln!("{:?}", parsed);
///
/// let mut output = String::new();
@ -410,12 +410,11 @@ fn need_quotes(string: &str) -> bool {
#[cfg(test)]
mod test {
use super::YamlEmitter;
use crate::YamlLoader;
#[test]
fn test_multiline_string() {
let input = r#"{foo: "bar!\nbar!", baz: 42}"#;
let parsed = YamlLoader::load_from_str(input).unwrap();
let parsed = crate::load_from_str(input).unwrap();
let mut output = String::new();
let mut emitter = YamlEmitter::new(&mut output);
emitter.multiline_strings(true);

View file

@ -4,7 +4,7 @@ use std::{borrow::Cow, ops::ControlFlow};
use encoding_rs::{Decoder, DecoderResult, Encoding};
use crate::{loader::LoadError, Yaml, YamlLoader};
use crate::{loader::LoadError, Yaml};
/// The signature of the function to call when using [`YAMLDecodingTrap::Call`].
///
@ -102,7 +102,7 @@ impl<T: std::io::Read> YamlDecoder<T> {
// Decode the input buffer.
decode_loop(&buffer, &mut output, &mut decoder, self.trap)?;
YamlLoader::load_from_str(&output).map_err(LoadError::Scan)
crate::load_from_str(&output).map_err(LoadError::Scan)
}
}

View file

@ -21,9 +21,9 @@
//! Parse a string into `Vec<Yaml>` and then serialize it as a YAML string.
//!
//! ```
//! use saphyr::{YamlLoader, YamlEmitter};
//! use saphyr::{load_from_str, YamlEmitter};
//!
//! let docs = YamlLoader::load_from_str("[1, 2, 3]").unwrap();
//! let docs = load_from_str("[1, 2, 3]").unwrap();
//! let doc = &docs[0]; // select the first YAML document
//! assert_eq!(doc[0].as_i64().unwrap(), 1); // access elements by index
//!
@ -55,7 +55,9 @@ mod yaml;
// Re-export main components.
pub use crate::annotated::{AnnotatedArray, AnnotatedHash, YamlData};
pub use crate::emitter::YamlEmitter;
pub use crate::loader::YamlLoader;
pub use crate::loader::{
load_from_iter, load_from_parser, load_from_str, LoadableYamlNode, YamlLoader,
};
pub use crate::yaml::{Array, Hash, Yaml, YamlIter};
#[cfg(feature = "encoding")]

View file

@ -2,26 +2,84 @@
use std::collections::BTreeMap;
use hashlink::LinkedHashMap;
use saphyr_parser::{Event, MarkedEventReceiver, Marker, Parser, ScanError, TScalarStyle, Tag};
use crate::{Hash, Yaml};
/// Main structure for quickly parsing YAML.
/// Load the given string as a set of YAML documents.
///
/// See [`YamlLoader::load_from_str`].
#[derive(Default)]
#[allow(clippy::module_name_repetitions)]
pub struct YamlLoader {
/// The different YAML documents that are loaded.
docs: Vec<Yaml>,
// states
// (current node, anchor_id) tuple
doc_stack: Vec<(Yaml, usize)>,
key_stack: Vec<Yaml>,
anchor_map: BTreeMap<usize, Yaml>,
/// The `source` is interpreted as YAML documents and is parsed. Parsing succeeds if and only
/// if all documents are parsed successfully. An error in a latter document prevents the former
/// from being returned.
/// # Errors
/// Returns `ScanError` when loading fails.
pub fn load_from_str(source: &str) -> Result<Vec<Yaml>, ScanError> {
load_from_iter(source.chars())
}
impl MarkedEventReceiver for YamlLoader {
/// Load the contents of the given iterator as a set of YAML documents.
///
/// The `source` is interpreted as YAML documents and is parsed. Parsing succeeds if and only
/// if all documents are parsed successfully. An error in a latter document prevents the former
/// from being returned.
/// # Errors
/// Returns `ScanError` when loading fails.
pub fn load_from_iter<I: Iterator<Item = char>>(source: I) -> Result<Vec<Yaml>, ScanError> {
let mut parser = Parser::new(source);
load_from_parser(&mut parser)
}
/// Load the contents from the specified Parser as a set of YAML documents.
///
/// Parsing succeeds if and only if all documents are parsed successfully.
/// An error in a latter document prevents the former from being returned.
/// # Errors
/// Returns `ScanError` when loading fails.
pub fn load_from_parser<I: Iterator<Item = char>>(
parser: &mut Parser<I>,
) -> Result<Vec<Yaml>, ScanError> {
let mut loader = YamlLoader::default();
parser.load(&mut loader, true)?;
Ok(loader.docs)
}
/// Main structure for quickly parsing YAML.
///
/// See [`load_from_str`].
#[allow(clippy::module_name_repetitions)]
pub struct YamlLoader<Node>
where
Node: LoadableYamlNode,
{
/// The different YAML documents that are loaded.
docs: Vec<Node>,
// states
// (current node, anchor_id) tuple
doc_stack: Vec<(Node, usize)>,
key_stack: Vec<Node>,
anchor_map: BTreeMap<usize, Node>,
}
// For some reason, rustc wants `Node: Default` if I `#[derive(Default)]`.
impl<Node> Default for YamlLoader<Node>
where
Node: LoadableYamlNode,
{
fn default() -> Self {
Self {
docs: vec![],
doc_stack: vec![],
key_stack: vec![],
anchor_map: BTreeMap::new(),
}
}
}
impl<Node> MarkedEventReceiver for YamlLoader<Node>
where
Node: LoadableYamlNode,
{
fn on_event(&mut self, ev: Event, _: Marker) {
// println!("EV {:?}", ev);
match ev {
@ -31,21 +89,21 @@ impl MarkedEventReceiver for YamlLoader {
Event::DocumentEnd => {
match self.doc_stack.len() {
// empty document
0 => self.docs.push(Yaml::BadValue),
0 => self.docs.push(Yaml::BadValue.into()),
1 => self.docs.push(self.doc_stack.pop().unwrap().0),
_ => unreachable!(),
}
}
Event::SequenceStart(aid, _) => {
self.doc_stack.push((Yaml::Array(Vec::new()), aid));
self.doc_stack.push((Yaml::Array(Vec::new()).into(), aid));
}
Event::SequenceEnd => {
let node = self.doc_stack.pop().unwrap();
self.insert_new_node(node);
}
Event::MappingStart(aid, _) => {
self.doc_stack.push((Yaml::Hash(Hash::new()), aid));
self.key_stack.push(Yaml::BadValue);
self.doc_stack.push((Yaml::Hash(Hash::new()).into(), aid));
self.key_stack.push(Yaml::BadValue.into());
}
Event::MappingEnd => {
self.key_stack.pop().unwrap();
@ -91,17 +149,47 @@ impl MarkedEventReceiver for YamlLoader {
Yaml::from_str(&v)
};
self.insert_new_node((node, aid));
self.insert_new_node((node.into(), aid));
}
Event::Alias(id) => {
let n = match self.anchor_map.get(&id) {
Some(v) => v.clone(),
None => Yaml::BadValue,
None => Yaml::BadValue.into(),
};
self.insert_new_node((n, 0));
}
}
// println!("DOC {:?}", self.doc_stack);
}
}
impl<Node> YamlLoader<Node>
where
Node: LoadableYamlNode,
{
fn insert_new_node(&mut self, node: (Node, usize)) {
// valid anchor id starts from 1
if node.1 > 0 {
self.anchor_map.insert(node.1, node.0.clone());
}
if self.doc_stack.is_empty() {
self.doc_stack.push(node);
} else {
let parent = self.doc_stack.last_mut().unwrap();
let parent_node = &mut parent.0;
if parent_node.is_array() {
parent_node.array_mut().push(node.0);
} else if parent_node.is_hash() {
let cur_key = self.key_stack.last_mut().unwrap();
// current node is a key
if cur_key.is_badvalue() {
*cur_key = node.0;
// current node is a value
} else {
let hash = parent_node.hash_mut();
hash.insert(cur_key.take(), node.0);
}
}
}
}
}
@ -142,76 +230,70 @@ impl std::fmt::Display for LoadError {
}
}
impl YamlLoader {
fn insert_new_node(&mut self, node: (Yaml, usize)) {
// valid anchor id starts from 1
if node.1 > 0 {
self.anchor_map.insert(node.1, node.0.clone());
}
if self.doc_stack.is_empty() {
self.doc_stack.push(node);
} else {
let parent = self.doc_stack.last_mut().unwrap();
match *parent {
(Yaml::Array(ref mut v), _) => v.push(node.0),
(Yaml::Hash(ref mut h), _) => {
let cur_key = self.key_stack.last_mut().unwrap();
// current node is a key
if cur_key.is_badvalue() {
*cur_key = node.0;
// current node is a value
} else {
let mut newkey = Yaml::BadValue;
std::mem::swap(&mut newkey, cur_key);
h.insert(newkey, node.0);
}
}
_ => unreachable!(),
}
}
}
/// Load the given string as a set of YAML documents.
/// A trait providing methods used by the [`YamlLoader`].
///
/// The `source` is interpreted as YAML documents and is parsed. Parsing succeeds if and only
/// if all documents are parsed successfully. An error in a latter document prevents the former
/// from being returned.
/// # Errors
/// Returns `ScanError` when loading fails.
pub fn load_from_str(source: &str) -> Result<Vec<Yaml>, ScanError> {
Self::load_from_iter(source.chars())
}
/// This trait must be implemented on YAML node types (i.e.: [`Yaml`] and annotated YAML nodes). It
/// provides the necessary methods for [`YamlLoader`] to load data into the node.
pub trait LoadableYamlNode: From<Yaml> + Clone + std::hash::Hash + Eq {
/// Return whether the YAML node is an array.
fn is_array(&self) -> bool;
/// Load the contents of the given iterator as a set of YAML documents.
/// Return whether the YAML node is a hash.
fn is_hash(&self) -> bool;
/// Return whether the YAML node is `BadValue`.
fn is_badvalue(&self) -> bool;
/// Retrieve the array variant of the YAML node.
///
/// The `source` is interpreted as YAML documents and is parsed. Parsing succeeds if and only
/// if all documents are parsed successfully. An error in a latter document prevents the former
/// from being returned.
/// # Errors
/// Returns `ScanError` when loading fails.
pub fn load_from_iter<I: Iterator<Item = char>>(source: I) -> Result<Vec<Yaml>, ScanError> {
let mut parser = Parser::new(source);
Self::load_from_parser(&mut parser)
}
/// # Panics
/// This function panics if `self` is not an array.
fn array_mut(&mut self) -> &mut Vec<Self>;
/// Load the contents from the specified Parser as a set of YAML documents.
/// Retrieve the hash variant of the YAML node.
///
/// Parsing succeeds if and only if all documents are parsed successfully.
/// An error in a latter document prevents the former from being returned.
/// # Errors
/// Returns `ScanError` when loading fails.
pub fn load_from_parser<I: Iterator<Item = char>>(
parser: &mut Parser<I>,
) -> Result<Vec<Yaml>, ScanError> {
let mut loader = YamlLoader::default();
parser.load(&mut loader, true)?;
Ok(loader.docs)
}
/// # Panics
/// This function panics if `self` is not a hash.
fn hash_mut(&mut self) -> &mut LinkedHashMap<Self, Self>;
/// Return a reference to the parsed Yaml documents.
/// Take the contained node out of `Self`, leaving a `BadValue` in its place.
#[must_use]
pub fn documents(&self) -> &[Yaml] {
&self.docs
fn take(&mut self) -> Self;
}
impl LoadableYamlNode for Yaml {
fn is_array(&self) -> bool {
matches!(self, Yaml::Array(_))
}
fn is_hash(&self) -> bool {
matches!(self, Yaml::Hash(_))
}
fn is_badvalue(&self) -> bool {
matches!(self, Yaml::BadValue)
}
fn array_mut(&mut self) -> &mut Vec<Self> {
if let Yaml::Array(x) = self {
x
} else {
panic!("Called array_mut on a non-array");
}
}
fn hash_mut(&mut self) -> &mut LinkedHashMap<Self, Self> {
if let Yaml::Hash(x) = self {
x
} else {
panic!("Called hash_mut on a non-hash");
}
}
fn take(&mut self) -> Self {
let mut taken_out = Yaml::BadValue;
std::mem::swap(&mut taken_out, self);
taken_out
}
}

View file

@ -1,7 +1,7 @@
#![allow(clippy::bool_assert_comparison)]
#![allow(clippy::float_cmp)]
use saphyr::{Yaml, YamlEmitter, YamlLoader};
use saphyr::{load_from_str, Yaml, YamlEmitter};
#[test]
fn test_api() {
@ -29,7 +29,7 @@ fn test_api() {
- name: Staff
damage: 3
";
let docs = YamlLoader::load_from_str(s).unwrap();
let docs = load_from_str(s).unwrap();
let doc = &docs[0];
assert_eq!(doc[0]["name"].as_str().unwrap(), "Ogre");
@ -50,7 +50,7 @@ a: 1
b: 2.2
c: [1, 2]
";
let out = YamlLoader::load_from_str(s).unwrap();
let out = load_from_str(s).unwrap();
let doc = &out[0];
assert_eq!(doc["a"].as_i64().unwrap(), 1i64);
assert_eq!(doc["b"].as_f64().unwrap(), 2.2f64);
@ -66,7 +66,7 @@ a1: &DEFAULT
b2: d
a2: *DEFAULT
";
let out = YamlLoader::load_from_str(s).unwrap();
let out = load_from_str(s).unwrap();
let doc = &out[0];
assert_eq!(doc["a2"]["b1"].as_i64().unwrap(), 4);
}
@ -78,7 +78,7 @@ a1: &DEFAULT
b1: 4
b2: *DEFAULT
";
let out = YamlLoader::load_from_str(s).unwrap();
let out = load_from_str(s).unwrap();
let doc = &out[0];
assert_eq!(doc["a1"]["b2"], Yaml::BadValue);
}
@ -114,7 +114,7 @@ fn test_plain_datatype() {
- +12345
- [ true, false ]
";
let out = YamlLoader::load_from_str(s).unwrap();
let out = load_from_str(s).unwrap();
let doc = &out[0];
assert_eq!(doc[0].as_str().unwrap(), "string");
@ -171,7 +171,7 @@ fn test_plain_datatype_with_into_methods() {
- .NAN
- !!float .INF
";
let mut out = YamlLoader::load_from_str(s).unwrap().into_iter();
let mut out = load_from_str(s).unwrap().into_iter();
let mut doc = out.next().unwrap().into_iter();
assert_eq!(doc.next().unwrap().into_string().unwrap(), "string");
@ -203,7 +203,7 @@ b: ~
a: ~
c: ~
";
let out = YamlLoader::load_from_str(s).unwrap();
let out = load_from_str(s).unwrap();
let first = out.into_iter().next().unwrap();
let mut iter = first.into_hash().unwrap().into_iter();
assert_eq!(
@ -229,7 +229,7 @@ fn test_integer_key() {
1:
important: false
";
let out = YamlLoader::load_from_str(s).unwrap();
let out = load_from_str(s).unwrap();
let first = out.into_iter().next().unwrap();
assert_eq!(first[0]["important"].as_bool().unwrap(), true);
}

View file

@ -1,4 +1,4 @@
use saphyr::{YamlEmitter, YamlLoader};
use saphyr::{load_from_str, YamlEmitter};
#[allow(clippy::similar_names)]
#[test]
@ -16,7 +16,7 @@ a4:
- 2
";
let docs = YamlLoader::load_from_str(s).unwrap();
let docs = load_from_str(s).unwrap();
let doc = &docs[0];
let mut writer = String::new();
{
@ -25,7 +25,7 @@ a4:
}
println!("original:\n{s}");
println!("emitted:\n{writer}");
let docs_new = match YamlLoader::load_from_str(&writer) {
let docs_new = match load_from_str(&writer) {
Ok(y) => y,
Err(e) => panic!("{}", e),
};
@ -55,14 +55,14 @@ products:
{}:
empty hash key
";
let docs = YamlLoader::load_from_str(s).unwrap();
let docs = load_from_str(s).unwrap();
let doc = &docs[0];
let mut writer = String::new();
{
let mut emitter = YamlEmitter::new(&mut writer);
emitter.dump(doc).unwrap();
}
let docs_new = match YamlLoader::load_from_str(&writer) {
let docs_new = match load_from_str(&writer) {
Ok(y) => y,
Err(e) => panic!("{}", e),
};
@ -106,7 +106,7 @@ x: test
y: avoid quoting here
z: string with spaces"#;
let docs = YamlLoader::load_from_str(s).unwrap();
let docs = load_from_str(s).unwrap();
let doc = &docs[0];
let mut writer = String::new();
{
@ -164,7 +164,7 @@ null0: ~
bool0: true
bool1: false"#;
let docs = YamlLoader::load_from_str(input).unwrap();
let docs = load_from_str(input).unwrap();
let doc = &docs[0];
let mut writer = String::new();
{
@ -212,7 +212,7 @@ e:
h: []"
};
let docs = YamlLoader::load_from_str(s).unwrap();
let docs = load_from_str(s).unwrap();
let doc = &docs[0];
let mut writer = String::new();
{
@ -234,7 +234,7 @@ a:
- - e
- f";
let docs = YamlLoader::load_from_str(s).unwrap();
let docs = load_from_str(s).unwrap();
let doc = &docs[0];
let mut writer = String::new();
{
@ -258,7 +258,7 @@ a:
- - f
- - e";
let docs = YamlLoader::load_from_str(s).unwrap();
let docs = load_from_str(s).unwrap();
let doc = &docs[0];
let mut writer = String::new();
{
@ -280,7 +280,7 @@ a:
d:
e: f";
let docs = YamlLoader::load_from_str(s).unwrap();
let docs = load_from_str(s).unwrap();
let doc = &docs[0];
let mut writer = String::new();
{

View file

@ -3,7 +3,7 @@ extern crate quickcheck;
use quickcheck::TestResult;
use saphyr::{Yaml, YamlEmitter, YamlLoader};
use saphyr::{load_from_str, Yaml, YamlEmitter};
quickcheck! {
fn test_check_weird_keys(xs: Vec<String>) -> TestResult {
@ -13,7 +13,7 @@ quickcheck! {
let mut emitter = YamlEmitter::new(&mut out_str);
emitter.dump(&input).unwrap();
}
match YamlLoader::load_from_str(&out_str) {
match load_from_str(&out_str) {
Ok(output) => TestResult::from_bool(output.len() == 1 && input == output[0]),
Err(err) => TestResult::error(err.to_string()),
}

View file

@ -1,4 +1,4 @@
use saphyr::{Hash, Yaml, YamlEmitter, YamlLoader};
use saphyr::{load_from_str, Hash, Yaml, YamlEmitter};
#[test]
fn test_mapvec_legal() {
@ -53,5 +53,5 @@ fn test_mapvec_legal() {
// - 6
// ```
YamlLoader::load_from_str(&out_str).unwrap();
load_from_str(&out_str).unwrap();
}

View file

@ -1,10 +1,10 @@
use saphyr::{Yaml, YamlEmitter, YamlLoader};
use saphyr::{load_from_str, Yaml, YamlEmitter};
fn roundtrip(original: &Yaml) {
let mut emitted = String::new();
YamlEmitter::new(&mut emitted).dump(original).unwrap();
let documents = YamlLoader::load_from_str(&emitted).unwrap();
let documents = load_from_str(&emitted).unwrap();
println!("emitted {emitted}");
assert_eq!(documents.len(), 1);
@ -12,12 +12,12 @@ fn roundtrip(original: &Yaml) {
}
fn double_roundtrip(original: &str) {
let parsed = YamlLoader::load_from_str(original).unwrap();
let parsed = load_from_str(original).unwrap();
let mut serialized = String::new();
YamlEmitter::new(&mut serialized).dump(&parsed[0]).unwrap();
let reparsed = YamlLoader::load_from_str(&serialized).unwrap();
let reparsed = load_from_str(&serialized).unwrap();
assert_eq!(parsed, reparsed);
}
@ -55,15 +55,12 @@ fn test_numberlike_strings() {
/// Example from <https://github.com/chyh1990/yaml-rust/issues/133>
#[test]
fn test_issue133() {
let doc = YamlLoader::load_from_str("\"0x123\"")
.unwrap()
.pop()
.unwrap();
let doc = load_from_str("\"0x123\"").unwrap().pop().unwrap();
assert_eq!(doc, Yaml::String("0x123".to_string()));
let mut out_str = String::new();
YamlEmitter::new(&mut out_str).dump(&doc).unwrap();
let doc2 = YamlLoader::load_from_str(&out_str).unwrap().pop().unwrap();
let doc2 = load_from_str(&out_str).unwrap().pop().unwrap();
assert_eq!(doc, doc2); // This failed because the type has changed to a number now
}