From 425f00ceb84eb7a38eef3c841fab32b2635398cf Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Mon, 10 Jun 2024 22:39:13 +0200 Subject: [PATCH] Add base support for annotated YAML objects. --- saphyr/README.md | 2 + saphyr/src/annotated.rs | 276 ++++++++++++++++++++++++++++++++++++++++ saphyr/src/lib.rs | 7 +- saphyr/src/macros.rs | 85 +++++++++++++ saphyr/src/yaml.rs | 112 +++------------- 5 files changed, 390 insertions(+), 92 deletions(-) create mode 100644 saphyr/src/annotated.rs create mode 100644 saphyr/src/macros.rs diff --git a/saphyr/README.md b/saphyr/README.md index 1b417b9..c1df782 100644 --- a/saphyr/README.md +++ b/saphyr/README.md @@ -72,6 +72,8 @@ Note that `saphyr::Yaml` implements `Index<&'a str>` and `Index`: * `Index<&'a str>` assumes the container is a string to value map * otherwise, `Yaml::BadValue` is returned +Note that `annotated::YamlData` cannot return `BadValue` and will panic. + If your document does not conform to this convention (e.g. map with complex type key), you can use the `Yaml::as_XXX` family API of functions to access your objects. diff --git a/saphyr/src/annotated.rs b/saphyr/src/annotated.rs new file mode 100644 index 0000000..930e37e --- /dev/null +++ b/saphyr/src/annotated.rs @@ -0,0 +1,276 @@ +//! Utilities for extracting YAML with certain metadata. + +use std::ops::{Index, IndexMut}; + +use hashlink::LinkedHashMap; + +use crate::loader::parse_f64; + +/// A YAML node without annotation. See [`Yaml`], you probably want that. +/// +/// Unlike [`Yaml`] which only supports storing data, [`YamlData`] allows storing metadata +/// alongside the YAML data. It is unlikely one would build it directly; it is mostly intended to +/// be used, for instance, when parsing a YAML where retrieving markers / comments is relevant. +/// +/// This definition is recursive. Each annotated node will be a structure storing the annotations +/// and the YAML data. We need to have a distinct enumeration from [`Yaml`] because the type for +/// the `Array` and `Hash` variants is dependant on that structure. +/// +/// If we had written [`YamlData`] as: +/// ```ignore +/// pub enum YamlData { +/// // ... +/// Array(Vec), +/// Hash(LinkedHashMap), +/// // ... +/// } +/// ``` +/// we would have stored metadata for the root node only. All subsequent nodes would be [`Yaml`], +/// which does not contain any annotation. +/// +/// Notable differences with [`Yaml`]: +/// * Indexing cannot return `BadValue` and will panic instead. +/// +/// [`Yaml`]: crate::Yaml +#[derive(Clone, PartialEq, PartialOrd, Debug, Eq, Ord, Hash)] +pub enum YamlData +where + Node: std::hash::Hash + std::cmp::Eq + From, +{ + /// Float types are stored as String and parsed on demand. + /// Note that `f64` does NOT implement Eq trait and can NOT be stored in `BTreeMap`. + Real(String), + /// YAML int is stored as i64. + Integer(i64), + /// YAML scalar. + String(String), + /// YAML bool, e.g. `true` or `false`. + Boolean(bool), + /// YAML array, can be accessed as a `Vec`. + Array(AnnotatedArray), + /// YAML hash, can be accessed as a `LinkedHashMap`. + /// + /// Insertion order will match the order of insertion into the map. + Hash(AnnotatedHash), + /// Alias, not fully supported yet. + Alias(usize), + /// YAML null, e.g. `null` or `~`. + Null, + /// Accessing a nonexistent node via the Index trait returns `BadValue`. This + /// simplifies error handling in the calling code. Invalid type conversion also + /// returns `BadValue`. + BadValue, +} + +/// The type contained in the [`YamlData::Array`] variant. This corresponds to YAML sequences. +#[allow(clippy::module_name_repetitions)] +pub type AnnotatedArray = Vec; +/// The type contained in the [`YamlData::Hash`] variant. This corresponds to YAML mappings. +#[allow(clippy::module_name_repetitions)] +pub type AnnotatedHash = LinkedHashMap; + +impl YamlData +where + Node: std::hash::Hash + std::cmp::Eq + From, +{ + define_as!(as_bool, bool, Boolean); + define_as!(as_i64, i64, Integer); + + define_as_ref!(as_hash, &AnnotatedHash, Hash); + define_as_ref!(as_str, &str, String); + define_as_ref!(as_vec, &AnnotatedArray, Array); + + define_as_mut_ref!(as_mut_hash, &mut AnnotatedHash, Hash); + define_as_mut_ref!(as_mut_vec, &mut AnnotatedArray, Array); + + define_into!(into_bool, bool, Boolean); + define_into!(into_hash, AnnotatedHash, Hash); + define_into!(into_i64, i64, Integer); + define_into!(into_string, String, String); + define_into!(into_vec, AnnotatedArray, Array); + + define_is!(is_alias, Self::Alias(_)); + define_is!(is_array, Self::Array(_)); + define_is!(is_badvalue, Self::BadValue); + define_is!(is_boolean, Self::Boolean(_)); + define_is!(is_integer, Self::Integer(_)); + define_is!(is_null, Self::Null); + define_is!(is_real, Self::Real(_)); + define_is!(is_string, Self::String(_)); + + /// Return the `f64` value contained in this YAML node. + /// + /// If the node is not a [`YamlData::Real`] YAML node or its contents is not a valid `f64` + /// string, `None` is returned. + #[must_use] + pub fn as_f64(&self) -> Option { + if let Self::Real(ref v) = self { + parse_f64(v) + } else { + None + } + } + + /// Return the `f64` value contained in this YAML node. + /// + /// If the node is not a [`YamlData::Real`] YAML node or its contents is not a valid `f64` + /// string, `None` is returned. + #[must_use] + pub fn into_f64(self) -> Option { + self.as_f64() + } + + /// If a value is null or otherwise bad (see variants), consume it and + /// replace it with a given value `other`. Otherwise, return self unchanged. + /// + /// See [`Yaml::or`] for examples. + /// + /// [`Yaml::or`]: crate::Yaml::or + #[must_use] + pub fn or(self, other: Self) -> Self { + match self { + Self::BadValue | Self::Null => other, + this => this, + } + } + + /// See [`Self::or`] for behavior. + /// + /// This performs the same operations, but with borrowed values for less linear pipelines. + #[must_use] + pub fn borrowed_or<'a>(&'a self, other: &'a Self) -> &'a Self { + match self { + Self::BadValue | Self::Null => other, + this => this, + } + } +} + +// NOTE(ethiraric, 10/06/2024): We cannot create a "generic static" variable which would act as a +// `BAD_VALUE`. This means that, unlike for `Yaml`, we have to make the indexing method panic. + +impl<'a, Node> Index<&'a str> for YamlData +where + Node: std::hash::Hash + std::cmp::Eq + From, +{ + type Output = Node; + + /// Perform indexing if `self` is a mapping. + /// + /// # Panics + /// This function panics if the key given does not exist within `self` (as per [`Index`]). + /// + /// This function also panics if `self` is not a [`YamlData::Hash`]. + fn index(&self, idx: &'a str) -> &Node { + let key = Self::String(idx.to_owned()); + match self.as_hash() { + Some(h) => h.get(&key.into()).unwrap(), + None => panic!("{idx}: key does not exist"), + } + } +} + +impl<'a, Node> IndexMut<&'a str> for YamlData +where + Node: std::hash::Hash + std::cmp::Eq + From, +{ + /// Perform indexing if `self` is a mapping. + /// + /// # Panics + /// This function panics if the key given does not exist within `self` (as per [`Index`]). + /// + /// This function also panics if `self` is not a [`YamlData::Hash`]. + fn index_mut(&mut self, idx: &'a str) -> &mut Node { + let key = Self::String(idx.to_owned()); + match self.as_mut_hash() { + Some(h) => h.get_mut(&key.into()).unwrap(), + None => panic!("Not a hash type"), + } + } +} + +impl Index for YamlData +where + Node: std::hash::Hash + std::cmp::Eq + From, +{ + type Output = Node; + + /// Perform indexing if `self` is a sequence or a mapping. + /// + /// # Panics + /// This function panics if the index given is out of range (as per [`Index`]). If `self` is a + /// [`YamlData::Array`], this is when the index is bigger or equal to the length of the + /// underlying `Vec`. If `self` is a [`YamlData::Hash`], this is when the mapping sequence does + /// not contain [`YamlData::Integer`]`(idx)` as a key. + /// + /// This function also panics if `self` is not a [`YamlData::Array`] nor a [`YamlData::Hash`]. + fn index(&self, idx: usize) -> &Node { + if let Some(v) = self.as_vec() { + v.get(idx).unwrap() + } else if let Some(v) = self.as_hash() { + let key = Self::Integer(i64::try_from(idx).unwrap()); + v.get(&key.into()).unwrap() + } else { + panic!("{idx}: Index out of bounds"); + } + } +} + +impl IndexMut for YamlData +where + Node: std::hash::Hash + std::cmp::Eq + From, +{ + /// Perform indexing if `self` is a sequence or a mapping. + /// + /// # Panics + /// This function panics if the index given is out of range (as per [`IndexMut`]). If `self` is + /// a [`YamlData::Array`], this is when the index is bigger or equal to the length of the + /// underlying `Vec`. If `self` is a [`YamlData::Hash`], this is when the mapping sequence does + /// not contain [`YamlData::Integer`]`(idx)` as a key. + /// + /// This function also panics if `self` is not a [`YamlData::Array`] nor a [`YamlData::Hash`]. + fn index_mut(&mut self, idx: usize) -> &mut Node { + match self { + Self::Array(sequence) => sequence.index_mut(idx), + Self::Hash(mapping) => { + let key = Self::Integer(i64::try_from(idx).unwrap()); + mapping.get_mut(&key.into()).unwrap() + } + _ => panic!("Attempting to index but `self` is not a sequence nor a mapping"), + } + } +} + +impl IntoIterator for YamlData +where + Node: std::hash::Hash + std::cmp::Eq + From, +{ + type Item = Node; + type IntoIter = AnnotatedYamlIter; + + fn into_iter(self) -> Self::IntoIter { + Self::IntoIter { + yaml: self.into_vec().unwrap_or_default().into_iter(), + } + } +} + +/// An iterator over a [`YamlData`] node. +#[allow(clippy::module_name_repetitions)] +pub struct AnnotatedYamlIter +where + Node: std::hash::Hash + std::cmp::Eq + From>, +{ + yaml: std::vec::IntoIter, +} + +impl Iterator for AnnotatedYamlIter +where + Node: std::hash::Hash + std::cmp::Eq + From>, +{ + type Item = Node; + + fn next(&mut self) -> Option { + self.yaml.next() + } +} diff --git a/saphyr/src/lib.rs b/saphyr/src/lib.rs index ede027b..43cd0b8 100644 --- a/saphyr/src/lib.rs +++ b/saphyr/src/lib.rs @@ -43,15 +43,20 @@ #![warn(missing_docs, clippy::pedantic)] +#[macro_use] +mod macros; + +mod annotated; mod char_traits; mod emitter; mod loader; mod yaml; // Re-export main components. +pub use crate::annotated::{AnnotatedArray, AnnotatedHash, YamlData}; pub use crate::emitter::YamlEmitter; pub use crate::loader::YamlLoader; -pub use crate::yaml::{Array, Hash, Yaml}; +pub use crate::yaml::{Array, Hash, Yaml, YamlIter}; #[cfg(feature = "encoding")] mod encoding; diff --git a/saphyr/src/macros.rs b/saphyr/src/macros.rs new file mode 100644 index 0000000..a455736 --- /dev/null +++ b/saphyr/src/macros.rs @@ -0,0 +1,85 @@ +/// Generate `as_TYPE` methods for the [`crate::Yaml`] enum. +macro_rules! define_as ( + ($fn_name:ident, $t:ident, $variant:ident) => ( +/// Get a copy of the inner object in the YAML enum if it is a `$t`. +/// +/// # Return +/// If the variant of `self` is `Self::$variant`, return `Some($t)` with a copy of the `$t` contained. +/// Otherwise, return `None`. +#[must_use] +pub fn $fn_name(&self) -> Option<$t> { + match *self { + Self::$variant(v) => Some(v), + _ => None + } +} + ); +); + +/// Generate `as_TYPE` methods for the [`crate::Yaml`] enum, returning references. +macro_rules! define_as_ref ( + ($fn_name:ident, $t:ty, $variant:ident) => ( +/// Get a reference to the inner object in the YAML enum if it is a `$t`. +/// +/// # Return +/// If the variant of `self` is `Self::$variant`, return `Some(&$t)` with the `$t` contained. Otherwise, +/// return `None`. +#[must_use] +pub fn $fn_name(&self) -> Option<$t> { + match *self { + Self::$variant(ref v) => Some(v), + _ => None + } +} + ); +); + +/// Generate `as_TYPE` methods for the [`crate::Yaml`] enum, returning mutable references. +macro_rules! define_as_mut_ref ( + ($fn_name:ident, $t:ty, $variant:ident) => ( +/// Get a mutable reference to the inner object in the YAML enum if it is a `$t`. +/// +/// # Return +/// If the variant of `self` is `Self::$variant`, return `Some(&mut $t)` with the `$t` contained. +/// Otherwise, return `None`. +#[must_use] +pub fn $fn_name(&mut self) -> Option<$t> { + match *self { + Self::$variant(ref mut v) => Some(v), + _ => None + } +} + ); +); + +/// Generate `into_TYPE` methods for the [`crate::Yaml`] enum. +macro_rules! define_into ( + ($fn_name:ident, $t:ty, $variant:ident) => ( +/// Get the inner object in the YAML enum if it is a `$t`. +/// +/// # Return +/// If the variant of `self` is `Self::$variant`, return `Some($t)` with the `$t` contained. Otherwise, +/// return `None`. +#[must_use] +pub fn $fn_name(self) -> Option<$t> { + match self { + Self::$variant(v) => Some(v), + _ => None + } +} + ); +); + +/// Generate `is_TYPE` methods for the [`crate::Yaml`] enum. +macro_rules! define_is ( + ($fn_name:ident, $variant:pat) => ( +/// Check whether the YAML enum contains the given variant. +/// +/// # Return +/// If the variant of `self` is `Self::$variant`, return `true`. Otherwise, return `False`. +#[must_use] +pub fn $fn_name(&self) -> bool { + matches!(self, $variant) +} + ); +); diff --git a/saphyr/src/yaml.rs b/saphyr/src/yaml.rs index acd8f68..5ac883f 100644 --- a/saphyr/src/yaml.rs +++ b/saphyr/src/yaml.rs @@ -56,108 +56,31 @@ pub type Array = Vec; /// The type contained in the `Yaml::Hash` variant. This corresponds to YAML mappings. pub type Hash = LinkedHashMap; -macro_rules! define_as ( - ($name:ident, $t:ident, $yt:ident) => ( -/// Get a copy of the inner object in the YAML enum if it is a `$t`. -/// -/// # Return -/// If the variant of `self` is `Yaml::$yt`, return `Some($t)` with a copy of the `$t` contained. -/// Otherwise, return `None`. -#[must_use] -pub fn $name(&self) -> Option<$t> { - match *self { - Yaml::$yt(v) => Some(v), - _ => None - } -} - ); -); - -macro_rules! define_as_ref ( - ($name:ident, $t:ty, $yt:ident) => ( -/// Get a reference to the inner object in the YAML enum if it is a `$t`. -/// -/// # Return -/// If the variant of `self` is `Yaml::$yt`, return `Some(&$t)` with the `$t` contained. Otherwise, -/// return `None`. -#[must_use] -pub fn $name(&self) -> Option<$t> { - match *self { - Yaml::$yt(ref v) => Some(v), - _ => None - } -} - ); -); - -macro_rules! define_as_mut_ref ( - ($name:ident, $t:ty, $yt:ident) => ( -/// Get a mutable reference to the inner object in the YAML enum if it is a `$t`. -/// -/// # Return -/// If the variant of `self` is `Yaml::$yt`, return `Some(&mut $t)` with the `$t` contained. -/// Otherwise, return `None`. -#[must_use] -pub fn $name(&mut self) -> Option<$t> { - match *self { - Yaml::$yt(ref mut v) => Some(v), - _ => None - } -} - ); -); - -macro_rules! define_into ( - ($name:ident, $t:ty, $yt:ident) => ( -/// Get the inner object in the YAML enum if it is a `$t`. -/// -/// # Return -/// If the variant of `self` is `Yaml::$yt`, return `Some($t)` with the `$t` contained. Otherwise, -/// return `None`. -#[must_use] -pub fn $name(self) -> Option<$t> { - match self { - Yaml::$yt(v) => Some(v), - _ => None - } -} - ); -); - impl Yaml { define_as!(as_bool, bool, Boolean); define_as!(as_i64, i64, Integer); - define_as_ref!(as_str, &str, String); define_as_ref!(as_hash, &Hash, Hash); + define_as_ref!(as_str, &str, String); define_as_ref!(as_vec, &Array, Array); define_as_mut_ref!(as_mut_hash, &mut Hash, Hash); define_as_mut_ref!(as_mut_vec, &mut Array, Array); define_into!(into_bool, bool, Boolean); + define_into!(into_hash, Hash, Hash); define_into!(into_i64, i64, Integer); define_into!(into_string, String, String); - define_into!(into_hash, Hash, Hash); define_into!(into_vec, Array, Array); - /// Return whether `self` is a [`Yaml::Null`] node. - #[must_use] - pub fn is_null(&self) -> bool { - matches!(*self, Yaml::Null) - } - - /// Return whether `self` is a [`Yaml::BadValue`] node. - #[must_use] - pub fn is_badvalue(&self) -> bool { - matches!(*self, Yaml::BadValue) - } - - /// Return whether `self` is a [`Yaml::Array`] node. - #[must_use] - pub fn is_array(&self) -> bool { - matches!(*self, Yaml::Array(_)) - } + define_is!(is_alias, Self::Alias(_)); + define_is!(is_array, Self::Array(_)); + define_is!(is_badvalue, Self::BadValue); + define_is!(is_boolean, Self::Boolean(_)); + define_is!(is_integer, Self::Integer(_)); + define_is!(is_null, Self::Null); + define_is!(is_real, Self::Real(_)); + define_is!(is_string, Self::String(_)); /// Return the `f64` value contained in this YAML node. /// @@ -198,8 +121,9 @@ impl Yaml { } } - /// See `or` for behavior. This performs the same operations, but with - /// borrowed values for less linear pipelines. + /// See [`Self::or`] for behavior. + /// + /// This performs the same operations, but with borrowed values for less linear pipelines. #[must_use] pub fn borrowed_or<'a>(&'a self, other: &'a Self) -> &'a Self { match self { @@ -274,6 +198,12 @@ impl<'a> Index<&'a str> for Yaml { } impl<'a> IndexMut<&'a str> for Yaml { + /// Perform indexing if `self` is a mapping. + /// + /// # Panics + /// This function panics if the key given does not exist within `self` (as per [`Index`]). + /// + /// This function also panics if `self` is not a [`Yaml::Hash`]. fn index_mut(&mut self, idx: &'a str) -> &mut Yaml { let key = Yaml::String(idx.to_owned()); match self.as_mut_hash() { @@ -302,9 +232,9 @@ impl IndexMut for Yaml { /// Perform indexing if `self` is a sequence or a mapping. /// /// # Panics - /// This function panics if the index given is out of range (as per [`IndexMut`]). If `self` i + /// This function panics if the index given is out of range (as per [`IndexMut`]). If `self` is /// a [`Yaml::Array`], this is when the index is bigger or equal to the length of the - /// underlying `Vec`. If `self` is a [`Yaml::Hash`], this is when the mapping sequence does no + /// underlying `Vec`. If `self` is a [`Yaml::Hash`], this is when the mapping sequence does not /// contain [`Yaml::Integer`]`(idx)` as a key. /// /// This function also panics if `self` is not a [`Yaml::Array`] nor a [`Yaml::Hash`].