From 842d536cb0fe6c6d399d90dde0014d0d34c8f2d1 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Thu, 13 Jun 2024 22:23:05 +0200 Subject: [PATCH] Implement `LoadableYamlNode` for `MarkedYaml`. A few changes have had to be made to `LoadableYamlNode`: * The `From` requirement has been removed as it can be error-prone. It was not a direct conversion as it is unable to handle `Yaml::Hash` or `Yaml::Array` with a non-empty array/map. * Instead, `from_bare_yaml` was added, which does essentially the same as `From` but does not leak for users of the library. * `with_marker` has been added to populate the marker for the `Node`. The function is empty for `Yaml`. `load_from_*` methods have been added to `MarkedYaml` for convenience. They load YAML using the markers. The markers returned from `saphyr-parser` are not all correct, meaning that tests are kind of useless for now as they will fail due to bugs outside of the scope of this library. --- saphyr/CHANGELOG.md | 8 ++ saphyr/Cargo.toml | 2 +- saphyr/src/annotated.rs | 9 +- saphyr/src/annotated/marked_yaml.rs | 152 ++++++++++++++++++++++++++++ saphyr/src/lib.rs | 6 +- saphyr/src/loader.rs | 59 ++++++++--- saphyr/src/yaml.rs | 1 + 7 files changed, 222 insertions(+), 15 deletions(-) create mode 100644 saphyr/src/annotated/marked_yaml.rs diff --git a/saphyr/CHANGELOG.md b/saphyr/CHANGELOG.md index b3ac065..e87aa0a 100644 --- a/saphyr/CHANGELOG.md +++ b/saphyr/CHANGELOG.md @@ -29,6 +29,14 @@ already use this. Users of the original `yaml-rust` crate may freely disable this feature (`cargo <...> --no-default-features`) and lower MSRV to 1.65.0. +- Load with metadata + + The `YamlLoader` now supports adding metadata alongside the nodes. For now, + the only one supported is the `Marker`, pointing to the position in the input + stream of the start of the node. + + This feature is extensible and should allow (later) to add comments. + ## v0.8.0 **Breaking Changes**: diff --git a/saphyr/Cargo.toml b/saphyr/Cargo.toml index 4b62419..dd5f7a2 100644 --- a/saphyr/Cargo.toml +++ b/saphyr/Cargo.toml @@ -22,7 +22,7 @@ encoding = [ "dep:encoding_rs" ] [dependencies] arraydeque = "0.5.1" -saphyr-parser = "0.0.1" +saphyr-parser = "0.0.2" encoding_rs = { version = "0.8.33", optional = true } hashlink = "0.8" diff --git a/saphyr/src/annotated.rs b/saphyr/src/annotated.rs index 930e37e..21ba8f3 100644 --- a/saphyr/src/annotated.rs +++ b/saphyr/src/annotated.rs @@ -1,12 +1,17 @@ //! Utilities for extracting YAML with certain metadata. +pub mod marked_yaml; + use std::ops::{Index, IndexMut}; use hashlink::LinkedHashMap; use crate::loader::parse_f64; -/// A YAML node without annotation. See [`Yaml`], you probably want that. +/// YAML data for nodes that will contain annotations. +/// +/// If you want a YAML node without annotations, see [`Yaml`]. +/// If you want a YAML node with annotations, see types using [`YamlData`] such as [`MarkedYaml`] /// /// Unlike [`Yaml`] which only supports storing data, [`YamlData`] allows storing metadata /// alongside the YAML data. It is unlikely one would build it directly; it is mostly intended to @@ -32,6 +37,7 @@ use crate::loader::parse_f64; /// * Indexing cannot return `BadValue` and will panic instead. /// /// [`Yaml`]: crate::Yaml +/// [`MarkedYaml`]: marked_yaml::MarkedYaml #[derive(Clone, PartialEq, PartialOrd, Debug, Eq, Ord, Hash)] pub enum YamlData where @@ -93,6 +99,7 @@ where define_is!(is_array, Self::Array(_)); define_is!(is_badvalue, Self::BadValue); define_is!(is_boolean, Self::Boolean(_)); + define_is!(is_hash, Self::Hash(_)); define_is!(is_integer, Self::Integer(_)); define_is!(is_null, Self::Null); define_is!(is_real, Self::Real(_)); diff --git a/saphyr/src/annotated/marked_yaml.rs b/saphyr/src/annotated/marked_yaml.rs new file mode 100644 index 0000000..1c86072 --- /dev/null +++ b/saphyr/src/annotated/marked_yaml.rs @@ -0,0 +1,152 @@ +//! A YAML node with position in the source document. +//! +//! This is set aside so as to not clutter `annotated.rs`. + +use hashlink::LinkedHashMap; +use saphyr_parser::{Marker, Parser, ScanError}; + +use crate::{LoadableYamlNode, Yaml, YamlData, YamlLoader}; + +/// A YAML node with [`Marker`]s pointing to the start of the node. +/// +/// This structure does not implement functions to operate on the YAML object. To access those, +/// refer to the [`Self::data`] field. +#[derive(Clone, Debug)] +pub struct MarkedYaml { + /// The marker pointing to the start of the node. + /// + /// The marker is relative to the start of the input stream that was given to the parser, not + /// to the start of the document within the input stream. + pub marker: Marker, + /// The YAML contents of the node. + pub data: YamlData, +} + +impl MarkedYaml { + /// Load the given string as an array of YAML documents. + /// + /// See the function [`load_from_str`] for more details. + /// + /// # Errors + /// Returns `ScanError` when loading fails. + /// + /// [`load_from_str`]: `crate::load_from_str` + pub fn load_from_str(source: &str) -> Result, ScanError> { + Self::load_from_iter(source.chars()) + } + + /// Load the contents of the given iterator as an array of YAML documents. + /// + /// See the function [`load_from_iter`] for more details. + /// + /// # Errors + /// Returns `ScanError` when loading fails. + /// + /// [`load_from_iter`]: `crate::load_from_iter` + pub fn load_from_iter>(source: I) -> Result, ScanError> { + let mut parser = Parser::new(source); + Self::load_from_parser(&mut parser) + } + + /// Load the contents from the specified [`Parser`] as an array of YAML documents. + /// + /// See the function [`load_from_parser`] for more details. + /// + /// # Errors + /// Returns `ScanError` when loading fails. + /// + /// [`load_from_parser`]: `crate::load_from_parser` + pub fn load_from_parser>( + parser: &mut Parser, + ) -> Result, ScanError> { + let mut loader = YamlLoader::::default(); + parser.load(&mut loader, true)?; + Ok(loader.into_documents()) + } +} + +impl PartialEq for MarkedYaml { + fn eq(&self, other: &Self) -> bool { + self.data.eq(&other.data) + } +} + +// I don't know if it's okay to implement that, but we need it for the hashmap. +impl Eq for MarkedYaml {} + +impl std::hash::Hash for MarkedYaml { + fn hash(&self, state: &mut H) { + self.data.hash(state); + } +} + +impl From> for MarkedYaml { + fn from(value: YamlData) -> Self { + Self { + marker: Marker::default(), + data: value, + } + } +} + +impl LoadableYamlNode for MarkedYaml { + fn from_bare_yaml(yaml: Yaml) -> Self { + Self { + marker: Marker::default(), + data: match yaml { + Yaml::Real(x) => YamlData::Real(x), + Yaml::Integer(x) => YamlData::Integer(x), + Yaml::String(x) => YamlData::String(x), + Yaml::Boolean(x) => YamlData::Boolean(x), + // Array and Hash will always have their container empty. + Yaml::Array(_) => YamlData::Array(vec![]), + Yaml::Hash(_) => YamlData::Hash(LinkedHashMap::new()), + Yaml::Alias(x) => YamlData::Alias(x), + Yaml::Null => YamlData::Null, + Yaml::BadValue => YamlData::BadValue, + }, + } + } + + fn is_array(&self) -> bool { + self.data.is_array() + } + + fn is_hash(&self) -> bool { + self.data.is_hash() + } + + fn is_badvalue(&self) -> bool { + self.data.is_badvalue() + } + + fn array_mut(&mut self) -> &mut Vec { + if let YamlData::Array(x) = &mut self.data { + x + } else { + panic!("Called array_mut on a non-array"); + } + } + + fn hash_mut(&mut self) -> &mut LinkedHashMap { + if let YamlData::Hash(x) = &mut self.data { + x + } else { + panic!("Called array_mut on a non-array"); + } + } + + fn take(&mut self) -> Self { + let mut taken_out = MarkedYaml { + marker: Marker::default(), + data: YamlData::BadValue, + }; + std::mem::swap(&mut taken_out, self); + taken_out + } + + fn with_marker(mut self, marker: Marker) -> Self { + self.marker = marker; + self + } +} diff --git a/saphyr/src/lib.rs b/saphyr/src/lib.rs index f41cdd4..1f431f3 100644 --- a/saphyr/src/lib.rs +++ b/saphyr/src/lib.rs @@ -52,7 +52,9 @@ mod loader; mod yaml; // Re-export main components. -pub use crate::annotated::{AnnotatedArray, AnnotatedHash, YamlData}; +pub use crate::annotated::{ + marked_yaml::MarkedYaml, AnnotatedArray, AnnotatedHash, AnnotatedYamlIter, YamlData, +}; pub use crate::emitter::YamlEmitter; pub use crate::loader::{ load_from_iter, load_from_parser, load_from_str, LoadableYamlNode, YamlLoader, @@ -67,3 +69,5 @@ pub use crate::encoding::{YAMLDecodingTrap, YAMLDecodingTrapFn, YamlDecoder}; // Re-export `ScanError` as it is used as part of our public API and we want consumers to be able // to inspect it (e.g. perform a `match`). They wouldn't be able without it. pub use saphyr_parser::ScanError; +// Re-export [`Marker`] which is used for annotated YAMLs. +pub use saphyr_parser::Marker; diff --git a/saphyr/src/loader.rs b/saphyr/src/loader.rs index 0ea0b34..d6ececd 100644 --- a/saphyr/src/loader.rs +++ b/saphyr/src/loader.rs @@ -50,7 +50,7 @@ pub fn load_from_iter>(source: I) -> Result, load_from_parser(&mut parser) } -/// Load the contents from the specified Parser as an array of YAML documents. +/// Load the contents from the specified [`Parser`] as an array of YAML documents. /// /// See [`load_from_str`] for details. /// @@ -104,8 +104,7 @@ impl MarkedEventReceiver for YamlLoader where Node: LoadableYamlNode, { - fn on_event(&mut self, ev: Event, _: Marker) { - // println!("EV {:?}", ev); + fn on_event(&mut self, ev: Event, marker: Marker) { match ev { Event::DocumentStart | Event::Nothing | Event::StreamStart | Event::StreamEnd => { // do nothing @@ -113,21 +112,29 @@ where Event::DocumentEnd => { match self.doc_stack.len() { // empty document - 0 => self.docs.push(Yaml::BadValue.into()), + 0 => self + .docs + .push(Node::from_bare_yaml(Yaml::BadValue).with_marker(marker)), 1 => self.docs.push(self.doc_stack.pop().unwrap().0), _ => unreachable!(), } } Event::SequenceStart(aid, _) => { - self.doc_stack.push((Yaml::Array(Vec::new()).into(), aid)); + self.doc_stack.push(( + Node::from_bare_yaml(Yaml::Array(Vec::new())).with_marker(marker), + aid, + )); } Event::SequenceEnd => { let node = self.doc_stack.pop().unwrap(); self.insert_new_node(node); } Event::MappingStart(aid, _) => { - self.doc_stack.push((Yaml::Hash(Hash::new()).into(), aid)); - self.key_stack.push(Yaml::BadValue.into()); + self.doc_stack.push(( + Node::from_bare_yaml(Yaml::Hash(Hash::new())).with_marker(marker), + aid, + )); + self.key_stack.push(Node::from_bare_yaml(Yaml::BadValue)); } Event::MappingEnd => { self.key_stack.pop().unwrap(); @@ -172,15 +179,14 @@ where // Datatype is not specified, or unrecognized Yaml::from_str(&v) }; - - self.insert_new_node((node.into(), aid)); + self.insert_new_node((Node::from_bare_yaml(node).with_marker(marker), aid)); } Event::Alias(id) => { let n = match self.anchor_map.get(&id) { Some(v) => v.clone(), - None => Yaml::BadValue.into(), + None => Node::from_bare_yaml(Yaml::BadValue), }; - self.insert_new_node((n, 0)); + self.insert_new_node((n.with_marker(marker), 0)); } } } @@ -215,6 +221,12 @@ where } } } + + /// Return the document nodes from `self`, consuming it in the process. + #[must_use] + pub fn into_documents(self) -> Vec { + self.docs + } } /// An error that happened when loading a YAML document. @@ -258,7 +270,19 @@ impl std::fmt::Display for LoadError { /// /// This trait must be implemented on YAML node types (i.e.: [`Yaml`] and annotated YAML nodes). It /// provides the necessary methods for [`YamlLoader`] to load data into the node. -pub trait LoadableYamlNode: From + Clone + std::hash::Hash + Eq { +pub trait LoadableYamlNode: Clone + std::hash::Hash + Eq { + /// Create an instance of `Self` from a [`Yaml`]. + /// + /// Nodes must implement this to be built. The optional metadata that they contain will be + /// later provided by the loader and can be default initialized. The [`Yaml`] object passed as + /// parameter may be of the [`Array`] or [`Hash`] variants. In this event, the inner container + /// will always be empty. There is no need to traverse all elements to convert them from + /// [`Yaml`] to `Self`. + /// + /// [`Array`]: `Yaml::Array` + /// [`Hash`]: `Yaml::Hash` + fn from_bare_yaml(yaml: Yaml) -> Self; + /// Return whether the YAML node is an array. fn is_array(&self) -> bool; @@ -283,9 +307,20 @@ pub trait LoadableYamlNode: From + Clone + std::hash::Hash + Eq { /// Take the contained node out of `Self`, leaving a `BadValue` in its place. #[must_use] fn take(&mut self) -> Self; + + /// Provide the marker for the node (builder-style). + #[inline] + #[must_use] + fn with_marker(self, _: Marker) -> Self { + self + } } impl LoadableYamlNode for Yaml { + fn from_bare_yaml(yaml: Yaml) -> Self { + yaml + } + fn is_array(&self) -> bool { matches!(self, Yaml::Array(_)) } diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index 5ac883f..f15ba00 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -77,6 +77,7 @@ impl Yaml { define_is!(is_array, Self::Array(_)); define_is!(is_badvalue, Self::BadValue); define_is!(is_boolean, Self::Boolean(_)); + define_is!(is_hash, Self::Hash(_)); define_is!(is_integer, Self::Integer(_)); define_is!(is_null, Self::Null); define_is!(is_real, Self::Real(_));