diff --git a/parser/justfile b/parser/justfile index 238ded7..58c8c4d 100644 --- a/parser/justfile +++ b/parser/justfile @@ -5,6 +5,7 @@ before_commit: cargo build --all-targets cargo test cargo test --release + cargo test --doc cargo build --profile=release-lto --package gen_large_yaml --bin gen_large_yaml --manifest-path tools/gen_large_yaml/Cargo.toml ethi_bench: diff --git a/parser/src/emitter.rs b/parser/src/emitter.rs index 0c365a8..15f8cab 100644 --- a/parser/src/emitter.rs +++ b/parser/src/emitter.rs @@ -1,13 +1,16 @@ +//! YAML serialization helpers. + use crate::char_traits; use crate::yaml::{Hash, Yaml}; use std::convert::From; use std::error::Error; use std::fmt::{self, Display}; +/// An error when emitting YAML. #[derive(Copy, Clone, Debug)] pub enum EmitError { + /// A formatting error. FmtError(fmt::Error), - BadHashmapKey, } impl Error for EmitError { @@ -20,7 +23,6 @@ impl Display for EmitError { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { match *self { EmitError::FmtError(ref err) => Display::fmt(err, formatter), - EmitError::BadHashmapKey => formatter.write_str("bad hashmap key"), } } } @@ -31,6 +33,20 @@ impl From for EmitError { } } +/// The YAML serializer. +/// +/// ``` +/// # use yaml_rust2::{YamlLoader, YamlEmitter}; +/// let input_string = "a: b\nc: d"; +/// let yaml = YamlLoader::load_from_str(input_string).unwrap(); +/// +/// let mut output = String::new(); +/// YamlEmitter::new(&mut output).dump(&yaml[0]).unwrap(); +/// +/// assert_eq!(output, r#"--- +/// a: b +/// c: d"#); +/// ``` #[allow(clippy::module_name_repetitions)] pub struct YamlEmitter<'a> { writer: &'a mut dyn fmt::Write, @@ -40,6 +56,7 @@ pub struct YamlEmitter<'a> { multiline_strings: bool, } +/// A convenience alias for emitter functions that may fail without returning a value. pub type EmitResult = Result<(), EmitError>; // from serialize::json @@ -106,6 +123,7 @@ fn escape_str(wr: &mut dyn fmt::Write, v: &str) -> Result<(), fmt::Error> { } impl<'a> YamlEmitter<'a> { + /// Create a nwe emitter serializing into `writer`. pub fn new(writer: &'a mut dyn fmt::Write) -> YamlEmitter { YamlEmitter { writer, diff --git a/parser/src/lib.rs b/parser/src/lib.rs index a6e49b5..9ca9a45 100644 --- a/parser/src/lib.rs +++ b/parser/src/lib.rs @@ -30,14 +30,7 @@ //! //! ``` -#![warn(clippy::pedantic)] -#![allow( - clippy::match_same_arms, - clippy::should_implement_trait, - clippy::missing_errors_doc, - clippy::missing_panics_doc, - clippy::redundant_else -)] +#![warn(missing_docs, clippy::pedantic)] extern crate hashlink; diff --git a/parser/src/parser.rs b/parser/src/parser.rs index 7543e97..b834c38 100644 --- a/parser/src/parser.rs +++ b/parser/src/parser.rs @@ -1,3 +1,9 @@ +//! Home to the YAML Parser. +//! +//! The parser takes input from the [`crate::scanner::Scanner`], performs final checks for YAML +//! compliance, and emits a stream of tokens that can be used by the [`crate::YamlLoader`] to +//! construct the [`crate::Yaml`] object. + use crate::scanner::{Marker, ScanError, Scanner, TScalarStyle, Token, TokenType}; use std::collections::HashMap; @@ -53,19 +59,23 @@ pub enum Event { ), /// Value, style, anchor_id, tag Scalar(String, TScalarStyle, usize, Option), + /// The start of a YAML sequence (array). SequenceStart( /// The anchor ID of the start of the squence. usize, /// An optional tag Option, ), + /// The end of a YAML sequence (array). SequenceEnd, + /// The start of a YAML mapping (object, hash). MappingStart( /// The anchor ID of the start of the mapping. usize, /// An optional tag Option, ), + /// The end of a YAML mapping (object, hash). MappingEnd, } @@ -195,6 +205,7 @@ impl MarkedEventReceiver for R { } } +/// A convenience alias for a `Result` of a parser event. pub type ParseResult = Result<(Event, Marker), ScanError>; impl> Parser { diff --git a/parser/src/scanner.rs b/parser/src/scanner.rs index d6e9788..556ca9b 100644 --- a/parser/src/scanner.rs +++ b/parser/src/scanner.rs @@ -1,3 +1,11 @@ +//! Home to the YAML Scanner. +//! +//! The scanner is the lowest-level parsing utility. It is the lexer / tokenizer, reading input a +//! character at a time and emitting tokens that can later be interpreted by the [`crate::parser`] +//! to check for more context and validity. +//! +//! Due to the grammar of YAML, the scanner has to have some context and is not error-free. + #![allow(clippy::cast_possible_wrap)] #![allow(clippy::cast_sign_loss)] @@ -10,19 +18,26 @@ use crate::char_traits::{ is_flow, is_hex, is_tag_char, is_uri_char, is_z, }; +/// The encoding of the input. Currently, only UTF-8 is supported. #[derive(Clone, Copy, PartialEq, Debug, Eq)] pub enum TEncoding { + /// UTF-8 encoding. Utf8, } +/// The style as which the scalar was written in the YAML document. #[derive(Clone, Copy, PartialEq, Debug, Eq)] pub enum TScalarStyle { - Any, + /// A YAML plain scalar. Plain, + /// A YAML single quoted scalar. SingleQuoted, + /// A YAML double quoted scalar. DoubleQuoted, + /// A YAML literal block (`|` block). Literal, + /// A YAML folded block (`>` block). Folded, } @@ -120,17 +135,18 @@ impl fmt::Display for ScanError { /// The contents of a scanner token. #[derive(Clone, PartialEq, Debug, Eq)] pub enum TokenType { - NoToken, /// The start of the stream. Sent first, before even [`DocumentStart`]. StreamStart(TEncoding), /// The end of the stream, EOF. StreamEnd, + /// A YAML version directive. VersionDirective( /// Major u32, /// Minor u32, ), + /// A YAML tag directive (e.g.: `!!str`, `!foo!bar`, ...). TagDirective( /// Handle String, @@ -394,6 +410,7 @@ impl> Iterator for Scanner { } } +/// A convenience alias for scanner functions that may fail without returning a value. pub type ScanResult = Result<(), ScanError>; impl> Scanner { @@ -532,16 +549,19 @@ impl> Scanner { self.buffer[0] == c } + /// Return whether the [`TokenType::StreamStart`] event has been emitted. #[inline] pub fn stream_started(&self) -> bool { self.stream_start_produced } + /// Return whether the [`TokenType::StreamEnd`] event has been emitted. #[inline] pub fn stream_ended(&self) -> bool { self.stream_end_produced } + /// Get the current position in the input stream. #[inline] pub fn mark(&self) -> Marker { self.mark diff --git a/parser/src/yaml.rs b/parser/src/yaml.rs index c1cecd7..e310795 100644 --- a/parser/src/yaml.rs +++ b/parser/src/yaml.rs @@ -1,3 +1,5 @@ +//! YAML objects manipulation utilities. + #![allow(clippy::module_name_repetitions)] use std::{collections::BTreeMap, convert::TryFrom, mem, ops::Index}; @@ -50,7 +52,9 @@ pub enum Yaml { BadValue, } +/// The type contained in the `Yaml::Array` variant. This corresponds to YAML sequences. pub type Array = Vec; +/// The type contained in the `Yaml::Hash` variant. This corresponds to YAML mappings. pub type Hash = LinkedHashMap; // parse f64 as Core schema @@ -69,6 +73,7 @@ fn parse_f64(v: &str) -> Option { /// See [`YamlLoader::load_from_str`]. #[derive(Default)] pub struct YamlLoader { + /// The different YAML documents that are loaded. docs: Vec, // states // (current node, anchor_id) tuple @@ -161,10 +166,14 @@ impl MarkedEventReceiver for YamlLoader { } } +/// An error that happened when loading a YAML document. #[derive(Debug)] pub enum LoadError { + /// An I/O error. IO(std::io::Error), + /// An error within the scanner. This indicates a malformed YAML input. Scan(ScanError), + /// A decoding error (e.g.: Invalid UTF_8). Decode(std::borrow::Cow<'static, str>), } @@ -251,6 +260,7 @@ pub struct YamlDecoder { } impl YamlDecoder { + /// Create a `YamlDecoder` decoding the given source. pub fn read(source: T) -> YamlDecoder { YamlDecoder { source, @@ -258,11 +268,14 @@ impl YamlDecoder { } } + /// Set the behavior of the decoder when the encoding is invalid. pub fn encoding_trap(&mut self, trap: encoding::types::DecoderTrap) -> &mut Self { self.trap = trap; self } + /// Run the decode operation with the source and trap the `YamlDecoder` was built with. + /// /// # Errors /// Returns `LoadError` when decoding fails. pub fn decode(&mut self) -> Result, LoadError> { @@ -301,6 +314,11 @@ fn detect_utf16_endianness(b: &[u8]) -> encoding::types::EncodingRef { macro_rules! define_as ( ($name:ident, $t:ident, $yt:ident) => ( +/// Get a copy of the inner object in the YAML enum if it is a `$t`. +/// +/// # Return +/// If the variant of `self` is `Yaml::$yt`, return `Some($t)` with a copy of the `$t` contained. +/// Otherwise, return `None`. #[must_use] pub fn $name(&self) -> Option<$t> { match *self { @@ -313,6 +331,11 @@ pub fn $name(&self) -> Option<$t> { macro_rules! define_as_ref ( ($name:ident, $t:ty, $yt:ident) => ( +/// Get a reference to the inner object in the YAML enum if it is a `$t`. +/// +/// # Return +/// If the variant of `self` is `Yaml::$yt`, return `Some(&$t)` with the `$t` contained. Otherwise, +/// return `None`. #[must_use] pub fn $name(&self) -> Option<$t> { match *self { @@ -325,6 +348,11 @@ pub fn $name(&self) -> Option<$t> { macro_rules! define_into ( ($name:ident, $t:ty, $yt:ident) => ( +/// Get the inner object in the YAML enum if it is a `$t`. +/// +/// # Return +/// If the variant of `self` is `Yaml::$yt`, return `Some($t)` with the `$t` contained. Otherwise, +/// return `None`. #[must_use] pub fn $name(self) -> Option<$t> { match self { diff --git a/parser/tests/yaml-test-suite.rs b/parser/tests/yaml-test-suite.rs index befebc1..e7ad10e 100644 --- a/parser/tests/yaml-test-suite.rs +++ b/parser/tests/yaml-test-suite.rs @@ -163,7 +163,6 @@ impl EventReceiver for EventReporter { TScalarStyle::DoubleQuoted => r#"""#, TScalarStyle::Literal => "|", TScalarStyle::Folded => ">", - TScalarStyle::Any => unreachable!(), }; format!( "=VAL{}{} {}{}",