diff --git a/parser/Cargo.toml b/parser/Cargo.toml index 6eb3390..2823412 100644 --- a/parser/Cargo.toml +++ b/parser/Cargo.toml @@ -1,32 +1,32 @@ [package] -name = "yaml-rust2" -version = "0.8.0" +name = "saphyr-parser" +version = "0.0.1" authors = [ "Yuheng Chen ", "Ethiraric ", "David Aguilar " ] -documentation = "https://docs.rs/yaml-rust2" +documentation = "https://docs.rs/saphyr-parser" +keywords = [ "yaml", "parser", "deserialization" ] +categories = [ "encoding", "parser-implementations" ] license = "MIT OR Apache-2.0" description = "A fully YAML 1.2 compliant YAML parser" -repository = "https://github.com/Ethiraric/yaml-rust2" +repository = "https://github.com/saphyr-rs/saphyr-parser" readme = "README.md" edition = "2021" rust-version = "1.70.0" [features] -default = [ "encoding" ] debug_prints = [] -encoding = [ "dep:encoding_rs" ] [dependencies] arraydeque = "0.5.1" -encoding_rs = { version = "0.8.33", optional = true } hashlink = "0.8" [dev-dependencies] libtest-mimic = "0.3.0" quickcheck = "1.0" +yaml-rust2 = "0.8.0" [profile.release-lto] inherits = "release" @@ -39,11 +39,3 @@ harness = false [[bin]] name = "dump_events" path = "tools/dump_events.rs" - -[[bin]] -name = "time_parse" -path = "tools/time_parse.rs" - -[[bin]] -name = "run_bench" -path = "tools/run_bench.rs" diff --git a/parser/README.md b/parser/README.md index d9f12ad..76a3591 100644 --- a/parser/README.md +++ b/parser/README.md @@ -1,81 +1,32 @@ -# yaml-rust2 +# saphyr-parser -[yaml-rust2](https://github.com/Ethiraric/yaml-rust2) is a fully compliant YAML 1.2 -implementation written in pure Rust. +[saphyr-parser](https://github.com/saphyr-rs/saphyr-parser) is a fully compliant YAML 1.2 +parser implementation written in pure Rust. + +**If you want to load to a YAML Rust structure or manipulate YAML objects, use +`saphyr` instead of `saphyr-parser`. This crate contains only the parser.** This work is based on [`yaml-rust`](https://github.com/chyh1990/yaml-rust) with fixes towards being compliant to the [YAML test suite](https://github.com/yaml/yaml-test-suite/). `yaml-rust`'s parser is heavily influenced by `libyaml` and `yaml-cpp`. -`yaml-rust2` is a pure Rust YAML 1.2 implementation that benefits from the +`saphyr-parser` is a pure Rust YAML 1.2 implementation that benefits from the memory safety and other benefits from the Rust language. -## Quick Start - -Add the following to the Cargo.toml of your project: +## Installing +Add the following to your Cargo.toml: ```toml [dependencies] -yaml-rust2 = "0.8" +saphyr-parser = "0.0.1" +``` +or use `cargo add` to get the latest version automatically: +```sh +cargo add saphyr-parser ``` -Use `yaml_rust2::YamlLoader` to load YAML documents and access them as `Yaml` objects: - -```rust -use yaml_rust2::{YamlLoader, YamlEmitter}; - -fn main() { - let s = -" -foo: - - list1 - - list2 -bar: - - 1 - - 2.0 -"; - let docs = YamlLoader::load_from_str(s).unwrap(); - - // Multi document support, doc is a yaml::Yaml - let doc = &docs[0]; - - // Debug support - println!("{:?}", doc); - - // Index access for map & array - assert_eq!(doc["foo"][0].as_str().unwrap(), "list1"); - assert_eq!(doc["bar"][1].as_f64().unwrap(), 2.0); - - // Array/map-like accesses are checked and won't panic. - // They will return `BadValue` if the access is invalid. - assert!(doc["INVALID_KEY"][100].is_badvalue()); - - // Dump the YAML object - let mut out_str = String::new(); - { - let mut emitter = YamlEmitter::new(&mut out_str); - emitter.dump(doc).unwrap(); // dump the YAML object to a String - } - println!("{}", out_str); -} -``` - -Note that `yaml_rust2::Yaml` implements `Index<&'a str>` and `Index`: - -* `Index` assumes the container is an array -* `Index<&'a str>` assumes the container is a string to value map -* otherwise, `Yaml::BadValue` is returned - -If your document does not conform to this convention (e.g. map with complex -type key), you can use the `Yaml::as_XXX` family API of functions to access -your objects. - -## Features - -* Pure Rust -* `Vec`/`HashMap` access API -* Low-level YAML events emission +## TODO how-to ## Security @@ -89,21 +40,6 @@ This implementation is fully compatible with the YAML 1.2 specification. In order to help with compliance, `yaml-rust2` tests against (and passes) the [YAML test suite](https://github.com/yaml/yaml-test-suite/). -## Upgrading from yaml-rust - -You can use `yaml-rust2` as a drop-in replacement for the original `yaml-rust` crate. - -```toml -[dependencies] -yaml-rust = { version = "#.#", package = "yaml-rust2" } -``` - -This `Cargo.toml` declaration allows you to refer to this crate as `yaml_rust` in your code. - -```rust -use yaml_rust::{YamlLoader, YamlEmitter}; -``` - ## License Licensed under either of @@ -122,10 +58,9 @@ You can find licences in the [`.licenses`](.licenses) subfolder. ## Contribution -[Fork this repository](https://github.com/Ethiraric/yaml-rust2/fork) and -[Create a Pull Request on Github](https://github.com/Ethiraric/yaml-rust2/compare/master...Ethiraric:yaml-rust2:master). +[Fork this repository](https://github.com/saphyr-rs/saphyr-parser/fork) and +[Create a Pull Request on Github](https://github.com/saphyr-rs/saphyr-parser/compare/master...saphyr-rs:saphyr-parser:master). You may need to click on "compare across forks" and select your fork's branch. -Make sure that `Ethiraric` is selected as the base repository, not `chyh1990`. Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in the work by you, as defined in the Apache-2.0 license, shall @@ -133,10 +68,10 @@ be dual licensed as above, without any additional terms or conditions. ## Links -* [yaml-rust2 source code repository](https://github.com/Ethiraric/yaml-rust2) +* [saphyr-parser source code repository](https://github.com/saphyr-rs/saphyr-parser) -* [yaml-rust2 releases on crates.io](https://crates.io/crates/yaml-rust2) +* [saphyr-parser releases on crates.io](https://crates.io/crates/saphyr-parser) -* [yaml-rust2 documentation on docs.rs](https://docs.rs/yaml-rust2/latest/yaml_rust2/) +* [saphyr-parser documentation on docs.rs](https://docs.rs/saphyr-parser/latest/saphyr-parser/) * [yaml-test-suite](https://github.com/yaml/yaml-test-suite) diff --git a/parser/appveyor.yml b/parser/appveyor.yml deleted file mode 100644 index 5b3ee4c..0000000 --- a/parser/appveyor.yml +++ /dev/null @@ -1,65 +0,0 @@ -clone_depth: 1 - -branches: - only: - - master - -environment: - LLVM_VERSION: 9.0.1 - PLATFORM: x64 - matrix: - - channel: stable - target: i686-pc-windows-msvc - type: msvc - - channel: stable - target: x86_64-pc-windows-msvc - type: msvc - - channel: stable - target: i686-pc-windows-gnu - type: gnu - - channel: stable - target: x86_64-pc-windows-gnu - type: gnu - - channel: nightly - target: i686-pc-windows-msvc - type: msvc - - channel: nightly - target: x86_64-pc-windows-msvc - type: msvc - - channel: nightly - target: i686-pc-windows-gnu - type: gnu - - channel: nightly - target: x86_64-pc-windows-gnu - type: gnu - -install: - - if %PLATFORM% == x86 (set RUST_PLATFORM=i686&set MINGW_BITS=32) else (set RUST_PLATFORM=x86_64&set MINGW_BITS=64) - - ps: >- - If ($env:target -eq 'x86_64-pc-windows-gnu') { - $env:PATH += ';C:\msys64\mingw64\bin' - } ElseIf ($env:target -eq 'i686-pc-windows-gnu') { - $env:PATH += ';C:\msys64\mingw32\bin' - } - - appveyor DownloadFile https://win.rustup.rs/ -FileName rustup-init.exe - - rustup-init -yv --default-toolchain %channel% --default-host %target% - - set PATH=%PATH%;%USERPROFILE%\.cargo\bin - - rustc -vV - - cargo -vV - # Install LLVM for GNU - - if %type%==gnu set PATH=C:\msys64\mingw%MINGW_BITS%\bin;C:\msys64\usr\bin;%PATH% - - if %type%==gnu set "MINGW_URL=http://repo.msys2.org/mingw/%RUST_PLATFORM%/mingw-w64-%RUST_PLATFORM%" - - if %type%==gnu set "URL_VER=%LLVM_VERSION%-1-any.pkg.tar.xz" - - if %type%==gnu bash -lc "pacman -U --noconfirm $MINGW_URL-clang-$URL_VER $MINGW_URL-llvm-$URL_VER" - - if %type%==gnu bash -lc "clang --version" - # Use preinstalled LLVM for MSVC - - if %type%==msvc set PATH=%PATH%;C:\Program Files\LLVM\bin - - if %type%==msvc where clang - - if %type%==msvc clang --version - -build_script: - - cargo build -vv -test_script: - - cargo test -vv -deploy: off - diff --git a/parser/examples/dump_yaml.rs b/parser/examples/dump_yaml.rs deleted file mode 100644 index 1c3c452..0000000 --- a/parser/examples/dump_yaml.rs +++ /dev/null @@ -1,44 +0,0 @@ -use std::env; -use std::fs::File; -use std::io::prelude::*; -use yaml_rust2::yaml; - -fn print_indent(indent: usize) { - for _ in 0..indent { - print!(" "); - } -} - -fn dump_node(doc: &yaml::Yaml, indent: usize) { - match *doc { - yaml::Yaml::Array(ref v) => { - for x in v { - dump_node(x, indent + 1); - } - } - yaml::Yaml::Hash(ref h) => { - for (k, v) in h { - print_indent(indent); - println!("{k:?}:"); - dump_node(v, indent + 1); - } - } - _ => { - print_indent(indent); - println!("{doc:?}"); - } - } -} - -fn main() { - let args: Vec<_> = env::args().collect(); - let mut f = File::open(&args[1]).unwrap(); - let mut s = String::new(); - f.read_to_string(&mut s).unwrap(); - - let docs = yaml::YamlLoader::load_from_str(&s).unwrap(); - for doc in &docs { - println!("---"); - dump_node(doc, 0); - } -} diff --git a/parser/justfile b/parser/justfile index f33ee69..0c36a63 100644 --- a/parser/justfile +++ b/parser/justfile @@ -1,4 +1,5 @@ before_commit: + cargo fmt --check cargo clippy --release --all-targets -- -D warnings cargo clippy --all-targets -- -D warnings cargo build --release --all-targets diff --git a/parser/src/char_traits.rs b/parser/src/char_traits.rs index 82f81bd..4a08da1 100644 --- a/parser/src/char_traits.rs +++ b/parser/src/char_traits.rs @@ -109,16 +109,3 @@ pub(crate) fn is_uri_char(c: char) -> bool { pub(crate) fn is_tag_char(c: char) -> bool { is_uri_char(c) && !is_flow(c) && c != '!' } - -/// Check if the string can be expressed a valid literal block scalar. -/// The YAML spec supports all of the following in block literals except `#xFEFF`: -/// ```no_compile -/// #x9 | #xA | [#x20-#x7E] /* 8 bit */ -/// | #x85 | [#xA0-#xD7FF] | [#xE000-#xFFFD] /* 16 bit */ -/// | [#x10000-#x10FFFF] /* 32 bit */ -/// ``` -#[inline] -pub(crate) fn is_valid_literal_block_scalar(string: &str) -> bool { - string.chars().all(|character: char| - matches!(character, '\t' | '\n' | '\x20'..='\x7e' | '\u{0085}' | '\u{00a0}'..='\u{d7fff}')) -} diff --git a/parser/src/debug.rs b/parser/src/debug.rs index c1411cb..a35d05b 100644 --- a/parser/src/debug.rs +++ b/parser/src/debug.rs @@ -2,8 +2,8 @@ //! //! Debugging is governed by two conditions: //! 1. The build mode. Debugging code is not emitted in release builds and thus not available. -//! 2. The `YAMLALL_DEBUG` environment variable. If built in debug mode, the program must be fed -//! the `YAMLALL_DEBUG` variable in its environment. While debugging code is present in debug +//! 2. The `SAPHYR_DEBUG` environment variable. If built in debug mode, the program must be fed +//! the `SAPHYR_DEBUG` variable in its environment. While debugging code is present in debug //! build, debug helpers will only trigger if that variable is set when running the program. // If a debug build, use stuff in the debug submodule. @@ -36,6 +36,6 @@ mod debug { #[cfg(debug_assertions)] pub fn enabled() -> bool { static ENABLED: OnceLock = OnceLock::new(); - *ENABLED.get_or_init(|| std::env::var("YAMLRUST2_DEBUG").is_ok()) + *ENABLED.get_or_init(|| std::env::var("SAPHYR_DEBUG").is_ok()) } } diff --git a/parser/src/emitter.rs b/parser/src/emitter.rs deleted file mode 100644 index 48c8b5c..0000000 --- a/parser/src/emitter.rs +++ /dev/null @@ -1,422 +0,0 @@ -//! YAML serialization helpers. - -use crate::char_traits; -use crate::yaml::{Hash, Yaml}; -use std::convert::From; -use std::error::Error; -use std::fmt::{self, Display}; - -/// An error when emitting YAML. -#[derive(Copy, Clone, Debug)] -pub enum EmitError { - /// A formatting error. - FmtError(fmt::Error), -} - -impl Error for EmitError { - fn cause(&self) -> Option<&dyn Error> { - None - } -} - -impl Display for EmitError { - fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - match *self { - EmitError::FmtError(ref err) => Display::fmt(err, formatter), - } - } -} - -impl From for EmitError { - fn from(f: fmt::Error) -> Self { - EmitError::FmtError(f) - } -} - -/// The YAML serializer. -/// -/// ``` -/// # use yaml_rust2::{YamlLoader, YamlEmitter}; -/// let input_string = "a: b\nc: d"; -/// let yaml = YamlLoader::load_from_str(input_string).unwrap(); -/// -/// let mut output = String::new(); -/// YamlEmitter::new(&mut output).dump(&yaml[0]).unwrap(); -/// -/// assert_eq!(output, r#"--- -/// a: b -/// c: d"#); -/// ``` -#[allow(clippy::module_name_repetitions)] -pub struct YamlEmitter<'a> { - writer: &'a mut dyn fmt::Write, - best_indent: usize, - compact: bool, - level: isize, - multiline_strings: bool, -} - -/// A convenience alias for emitter functions that may fail without returning a value. -pub type EmitResult = Result<(), EmitError>; - -// from serialize::json -fn escape_str(wr: &mut dyn fmt::Write, v: &str) -> Result<(), fmt::Error> { - wr.write_str("\"")?; - - let mut start = 0; - - for (i, byte) in v.bytes().enumerate() { - let escaped = match byte { - b'"' => "\\\"", - b'\\' => "\\\\", - b'\x00' => "\\u0000", - b'\x01' => "\\u0001", - b'\x02' => "\\u0002", - b'\x03' => "\\u0003", - b'\x04' => "\\u0004", - b'\x05' => "\\u0005", - b'\x06' => "\\u0006", - b'\x07' => "\\u0007", - b'\x08' => "\\b", - b'\t' => "\\t", - b'\n' => "\\n", - b'\x0b' => "\\u000b", - b'\x0c' => "\\f", - b'\r' => "\\r", - b'\x0e' => "\\u000e", - b'\x0f' => "\\u000f", - b'\x10' => "\\u0010", - b'\x11' => "\\u0011", - b'\x12' => "\\u0012", - b'\x13' => "\\u0013", - b'\x14' => "\\u0014", - b'\x15' => "\\u0015", - b'\x16' => "\\u0016", - b'\x17' => "\\u0017", - b'\x18' => "\\u0018", - b'\x19' => "\\u0019", - b'\x1a' => "\\u001a", - b'\x1b' => "\\u001b", - b'\x1c' => "\\u001c", - b'\x1d' => "\\u001d", - b'\x1e' => "\\u001e", - b'\x1f' => "\\u001f", - b'\x7f' => "\\u007f", - _ => continue, - }; - - if start < i { - wr.write_str(&v[start..i])?; - } - - wr.write_str(escaped)?; - - start = i + 1; - } - - if start != v.len() { - wr.write_str(&v[start..])?; - } - - wr.write_str("\"")?; - Ok(()) -} - -impl<'a> YamlEmitter<'a> { - /// Create a new emitter serializing into `writer`. - pub fn new(writer: &'a mut dyn fmt::Write) -> YamlEmitter { - YamlEmitter { - writer, - best_indent: 2, - compact: true, - level: -1, - multiline_strings: false, - } - } - - /// Set 'compact inline notation' on or off, as described for block - /// [sequences](http://www.yaml.org/spec/1.2/spec.html#id2797382) - /// and - /// [mappings](http://www.yaml.org/spec/1.2/spec.html#id2798057). - /// - /// In this form, blocks cannot have any properties (such as anchors - /// or tags), which should be OK, because this emitter doesn't - /// (currently) emit those anyways. - pub fn compact(&mut self, compact: bool) { - self.compact = compact; - } - - /// Determine if this emitter is using 'compact inline notation'. - #[must_use] - pub fn is_compact(&self) -> bool { - self.compact - } - - /// Render strings containing multiple lines in [literal style]. - /// - /// # Examples - /// - /// ```rust - /// use yaml_rust2::{Yaml, YamlEmitter, YamlLoader}; - /// - /// let input = r#"{foo: "bar!\nbar!", baz: 42}"#; - /// let parsed = YamlLoader::load_from_str(input).unwrap(); - /// eprintln!("{:?}", parsed); - /// - /// let mut output = String::new(); - /// let mut emitter = YamlEmitter::new(&mut output); - /// emitter.multiline_strings(true); - /// emitter.dump(&parsed[0]).unwrap(); - /// assert_eq!(output.as_str(), "\ - /// --- - /// foo: | - /// bar! - /// bar! - /// baz: 42"); - /// ``` - /// - /// [literal style]: https://yaml.org/spec/1.2/spec.html#id2795688 - pub fn multiline_strings(&mut self, multiline_strings: bool) { - self.multiline_strings = multiline_strings; - } - - /// Determine if this emitter will emit multiline strings when appropriate. - #[must_use] - pub fn is_multiline_strings(&self) -> bool { - self.multiline_strings - } - - /// Dump Yaml to an output stream. - /// # Errors - /// Returns `EmitError` when an error occurs. - pub fn dump(&mut self, doc: &Yaml) -> EmitResult { - // write DocumentStart - writeln!(self.writer, "---")?; - self.level = -1; - self.emit_node(doc) - } - - fn write_indent(&mut self) -> EmitResult { - if self.level <= 0 { - return Ok(()); - } - for _ in 0..self.level { - for _ in 0..self.best_indent { - write!(self.writer, " ")?; - } - } - Ok(()) - } - - fn emit_node(&mut self, node: &Yaml) -> EmitResult { - match *node { - Yaml::Array(ref v) => self.emit_array(v), - Yaml::Hash(ref h) => self.emit_hash(h), - Yaml::String(ref v) => { - if self.multiline_strings - && v.contains('\n') - && char_traits::is_valid_literal_block_scalar(v) - { - write!(self.writer, "|")?; - self.level += 1; - for line in v.lines() { - writeln!(self.writer)?; - self.write_indent()?; - // It's literal text, so don't escape special chars. - write!(self.writer, "{line}")?; - } - self.level -= 1; - } else if need_quotes(v) { - escape_str(self.writer, v)?; - } else { - write!(self.writer, "{v}")?; - } - Ok(()) - } - Yaml::Boolean(v) => { - if v { - self.writer.write_str("true")?; - } else { - self.writer.write_str("false")?; - } - Ok(()) - } - Yaml::Integer(v) => { - write!(self.writer, "{v}")?; - Ok(()) - } - Yaml::Real(ref v) => { - write!(self.writer, "{v}")?; - Ok(()) - } - Yaml::Null | Yaml::BadValue => { - write!(self.writer, "~")?; - Ok(()) - } - // XXX(chenyh) Alias - Yaml::Alias(_) => Ok(()), - } - } - - fn emit_array(&mut self, v: &[Yaml]) -> EmitResult { - if v.is_empty() { - write!(self.writer, "[]")?; - } else { - self.level += 1; - for (cnt, x) in v.iter().enumerate() { - if cnt > 0 { - writeln!(self.writer)?; - self.write_indent()?; - } - write!(self.writer, "-")?; - self.emit_val(true, x)?; - } - self.level -= 1; - } - Ok(()) - } - - fn emit_hash(&mut self, h: &Hash) -> EmitResult { - if h.is_empty() { - self.writer.write_str("{}")?; - } else { - self.level += 1; - for (cnt, (k, v)) in h.iter().enumerate() { - let complex_key = matches!(*k, Yaml::Hash(_) | Yaml::Array(_)); - if cnt > 0 { - writeln!(self.writer)?; - self.write_indent()?; - } - if complex_key { - write!(self.writer, "?")?; - self.emit_val(true, k)?; - writeln!(self.writer)?; - self.write_indent()?; - write!(self.writer, ":")?; - self.emit_val(true, v)?; - } else { - self.emit_node(k)?; - write!(self.writer, ":")?; - self.emit_val(false, v)?; - } - } - self.level -= 1; - } - Ok(()) - } - - /// Emit a yaml as a hash or array value: i.e., which should appear - /// following a ":" or "-", either after a space, or on a new line. - /// If `inline` is true, then the preceding characters are distinct - /// and short enough to respect the compact flag. - fn emit_val(&mut self, inline: bool, val: &Yaml) -> EmitResult { - match *val { - Yaml::Array(ref v) => { - if (inline && self.compact) || v.is_empty() { - write!(self.writer, " ")?; - } else { - writeln!(self.writer)?; - self.level += 1; - self.write_indent()?; - self.level -= 1; - } - self.emit_array(v) - } - Yaml::Hash(ref h) => { - if (inline && self.compact) || h.is_empty() { - write!(self.writer, " ")?; - } else { - writeln!(self.writer)?; - self.level += 1; - self.write_indent()?; - self.level -= 1; - } - self.emit_hash(h) - } - _ => { - write!(self.writer, " ")?; - self.emit_node(val) - } - } - } -} - -/// Check if the string requires quoting. -/// Strings starting with any of the following characters must be quoted. -/// :, &, *, ?, |, -, <, >, =, !, %, @ -/// Strings containing any of the following characters must be quoted. -/// {, }, \[, t \], ,, #, ` -/// -/// If the string contains any of the following control characters, it must be escaped with double quotes: -/// \0, \x01, \x02, \x03, \x04, \x05, \x06, \a, \b, \t, \n, \v, \f, \r, \x0e, \x0f, \x10, \x11, \x12, \x13, \x14, \x15, \x16, \x17, \x18, \x19, \x1a, \e, \x1c, \x1d, \x1e, \x1f, \N, \_, \L, \P -/// -/// Finally, there are other cases when the strings must be quoted, no matter if you're using single or double quotes: -/// * When the string is true or false (otherwise, it would be treated as a boolean value); -/// * When the string is null or ~ (otherwise, it would be considered as a null value); -/// * When the string looks like a number, such as integers (e.g. 2, 14, etc.), floats (e.g. 2.6, 14.9) and exponential numbers (e.g. 12e7, etc.) (otherwise, it would be treated as a numeric value); -/// * When the string looks like a date (e.g. 2014-12-31) (otherwise it would be automatically converted into a Unix timestamp). -#[allow(clippy::doc_markdown)] -fn need_quotes(string: &str) -> bool { - fn need_quotes_spaces(string: &str) -> bool { - string.starts_with(' ') || string.ends_with(' ') - } - - string.is_empty() - || need_quotes_spaces(string) - || string.starts_with(|character: char| { - matches!( - character, - '&' | '*' | '?' | '|' | '-' | '<' | '>' | '=' | '!' | '%' | '@' - ) - }) - || string.contains(|character: char| { - matches!(character, ':' - | '{' - | '}' - | '[' - | ']' - | ',' - | '#' - | '`' - | '\"' - | '\'' - | '\\' - | '\0'..='\x06' - | '\t' - | '\n' - | '\r' - | '\x0e'..='\x1a' - | '\x1c'..='\x1f') - }) - || [ - // http://yaml.org/type/bool.html - // Note: 'y', 'Y', 'n', 'N', is not quoted deliberately, as in libyaml. PyYAML also parse - // them as string, not booleans, although it is violating the YAML 1.1 specification. - // See https://github.com/dtolnay/serde-yaml/pull/83#discussion_r152628088. - "yes", "Yes", "YES", "no", "No", "NO", "True", "TRUE", "true", "False", "FALSE", - "false", "on", "On", "ON", "off", "Off", "OFF", - // http://yaml.org/type/null.html - "null", "Null", "NULL", "~", - ] - .contains(&string) - || string.starts_with('.') - || string.starts_with("0x") - || string.parse::().is_ok() - || string.parse::().is_ok() -} - -#[cfg(test)] -mod test { - use super::YamlEmitter; - use crate::YamlLoader; - - #[test] - fn test_multiline_string() { - let input = r#"{foo: "bar!\nbar!", baz: 42}"#; - let parsed = YamlLoader::load_from_str(input).unwrap(); - let mut output = String::new(); - let mut emitter = YamlEmitter::new(&mut output); - emitter.multiline_strings(true); - emitter.dump(&parsed[0]).unwrap(); - } -} diff --git a/parser/src/lib.rs b/parser/src/lib.rs index d430177..a820a75 100644 --- a/parser/src/lib.rs +++ b/parser/src/lib.rs @@ -2,40 +2,27 @@ // Copyright 2023, Ethiraric. // See the LICENSE file at the top-level directory of this distribution. -//! YAML 1.2 implementation in pure Rust. +//! YAML 1.2 parser implementation in pure Rust. +//! +//! **If you want to load to a YAML Rust structure or manipulate YAML objects, use `saphyr` instead +//! of `saphyr-parser`. This crate contains only the parser.** +//! +//! This is YAML 1.2 parser implementation and low-level parsing API for YAML. It allows users to +//! fetch a stream of YAML events from a stream of characters/bytes. //! //! # Usage //! -//! This crate is [on github](https://github.com/Ethiraric/yaml-rust2) and can be used by adding -//! `yaml-rust2` to the dependencies in your project's `Cargo.toml`. +//! This crate is [on github](https://github.com/saphyr-rs/saphyr-parser) and can be used by adding +//! `saphyr-parser` to the dependencies in your project's `Cargo.toml`. //! //! ```toml //! [dependencies] -//! yaml-rust2 = "0.8.0" -//! ``` -//! -//! # Examples -//! Parse a string into `Vec` and then serialize it as a YAML string. -//! -//! ``` -//! use yaml_rust2::{YamlLoader, YamlEmitter}; -//! -//! let docs = YamlLoader::load_from_str("[1, 2, 3]").unwrap(); -//! let doc = &docs[0]; // select the first YAML document -//! assert_eq!(doc[0].as_i64().unwrap(), 1); // access elements by index -//! -//! let mut out_str = String::new(); -//! let mut emitter = YamlEmitter::new(&mut out_str); -//! emitter.dump(doc).unwrap(); // dump the YAML object to a String -//! +//! saphyr-parser = "0.0.1" //! ``` //! //! # Features //! **Note:** With all features disabled, this crate's MSRV is `1.65.0`. //! -//! #### `encoding` (_enabled by default_) -//! Enables encoding-aware decoding of Yaml documents. -//! //! The MSRV for this feature is `1.70.0`. //! //! #### `debug_prints` @@ -47,18 +34,11 @@ #![warn(missing_docs, clippy::pedantic)] -extern crate hashlink; - pub(crate) mod char_traits; #[macro_use] pub(crate) mod debug; -pub mod emitter; pub mod parser; pub mod scanner; -pub mod yaml; -// reexport key APIs -pub use crate::emitter::{EmitError, YamlEmitter}; -pub use crate::parser::Event; -pub use crate::scanner::ScanError; -pub use crate::yaml::{Yaml, YamlLoader}; +pub use crate::parser::{Event, EventReceiver, MarkedEventReceiver, Parser, Tag}; +pub use crate::scanner::{Marker, ScanError, TScalarStyle}; diff --git a/parser/src/parser.rs b/parser/src/parser.rs index 59869a2..f2be076 100644 --- a/parser/src/parser.rs +++ b/parser/src/parser.rs @@ -1,23 +1,20 @@ //! Home to the YAML Parser. //! //! The parser takes input from the [`crate::scanner::Scanner`], performs final checks for YAML -//! compliance, and emits a stream of tokens that can be used by the [`crate::YamlLoader`] to -//! construct the [`crate::Yaml`] object. +//! compliance, and emits a stream of YAML events. This stream can for instance be used to create +//! YAML objects. use crate::scanner::{Marker, ScanError, Scanner, TScalarStyle, Token, TokenType}; use std::collections::HashMap; #[derive(Clone, Copy, PartialEq, Debug, Eq)] enum State { - /// We await the start of the stream. StreamStart, ImplicitDocumentStart, DocumentStart, DocumentContent, DocumentEnd, BlockNode, - // BlockNodeOrIndentlessSequence, - // FlowNode, BlockSequenceFirstEntry, BlockSequenceEntry, IndentlessSequenceEntry, @@ -165,7 +162,7 @@ pub struct Parser { /// /// # Example /// ``` -/// # use yaml_rust2::parser::{Event, EventReceiver, Parser}; +/// # use saphyr_parser::{Event, EventReceiver, Parser}; /// # /// /// Sink of events. Collects them into an array. /// struct EventSink { @@ -1088,10 +1085,17 @@ impl> Parser { } } +impl> Iterator for Parser { + type Item = Result<(Event, Marker), ScanError>; + + fn next(&mut self) -> Option { + Some(self.next_token()) + } +} + #[cfg(test)] mod test { use super::{Event, Parser}; - use crate::YamlLoader; #[test] fn test_peek_eq_parse() { @@ -1126,18 +1130,29 @@ foo: "bar" --- !t!2 &2 baz: "qux" "#; - let mut parser = Parser::new_from_str(text).keep_tags(true); - let result = YamlLoader::load_from_parser(&mut parser); - assert!(result.is_ok()); - let docs = result.unwrap(); - assert_eq!(docs.len(), 2); - let yaml = &docs[0]; - assert_eq!(yaml["foo"].as_str(), Some("bar")); - let yaml = &docs[1]; - assert_eq!(yaml["baz"].as_str(), Some("qux")); + for x in Parser::new_from_str(text).keep_tags(true) { + let x = x.unwrap(); + match x.0 { + Event::StreamEnd => break, + Event::MappingStart(_, tag) => { + let tag = tag.unwrap(); + assert_eq!(tag.handle, "tag:test,2024:"); + } + _ => (), + } + } - let mut parser = Parser::new_from_str(text).keep_tags(false); - let result = YamlLoader::load_from_parser(&mut parser); - assert!(result.is_err()); + for x in Parser::new_from_str(text).keep_tags(false) { + match x { + Err(..) => { + // Test successful + return; + } + Ok((Event::StreamEnd, _)) => { + panic!("Test failed, did not encounter error") + } + _ => (), + } + } } } diff --git a/parser/src/yaml.rs b/parser/src/yaml.rs deleted file mode 100644 index 3c429d5..0000000 --- a/parser/src/yaml.rs +++ /dev/null @@ -1,833 +0,0 @@ -//! YAML objects manipulation utilities. - -#![allow(clippy::module_name_repetitions)] - -use std::borrow::Cow; -use std::ops::ControlFlow; -use std::{collections::BTreeMap, convert::TryFrom, mem, ops::Index, ops::IndexMut}; - -#[cfg(feature = "encoding")] -use encoding_rs::{Decoder, DecoderResult, Encoding}; -use hashlink::LinkedHashMap; - -use crate::parser::{Event, MarkedEventReceiver, Parser, Tag}; -use crate::scanner::{Marker, ScanError, TScalarStyle}; - -/// A YAML node is stored as this `Yaml` enumeration, which provides an easy way to -/// access your YAML document. -/// -/// # Examples -/// -/// ``` -/// use yaml_rust2::Yaml; -/// let foo = Yaml::from_str("-123"); // convert the string to the appropriate YAML type -/// assert_eq!(foo.as_i64().unwrap(), -123); -/// -/// // iterate over an Array -/// let vec = Yaml::Array(vec![Yaml::Integer(1), Yaml::Integer(2)]); -/// for v in vec.as_vec().unwrap() { -/// assert!(v.as_i64().is_some()); -/// } -/// ``` -#[derive(Clone, PartialEq, PartialOrd, Debug, Eq, Ord, Hash)] -pub enum Yaml { - /// Float types are stored as String and parsed on demand. - /// Note that `f64` does NOT implement Eq trait and can NOT be stored in `BTreeMap`. - Real(String), - /// YAML int is stored as i64. - Integer(i64), - /// YAML scalar. - String(String), - /// YAML bool, e.g. `true` or `false`. - Boolean(bool), - /// YAML array, can be accessed as a `Vec`. - Array(Array), - /// YAML hash, can be accessed as a `LinkedHashMap`. - /// - /// Insertion order will match the order of insertion into the map. - Hash(Hash), - /// Alias, not fully supported yet. - Alias(usize), - /// YAML null, e.g. `null` or `~`. - Null, - /// Accessing a nonexistent node via the Index trait returns `BadValue`. This - /// simplifies error handling in the calling code. Invalid type conversion also - /// returns `BadValue`. - BadValue, -} - -/// The type contained in the `Yaml::Array` variant. This corresponds to YAML sequences. -pub type Array = Vec; -/// The type contained in the `Yaml::Hash` variant. This corresponds to YAML mappings. -pub type Hash = LinkedHashMap; - -// parse f64 as Core schema -// See: https://github.com/chyh1990/yaml-rust/issues/51 -fn parse_f64(v: &str) -> Option { - match v { - ".inf" | ".Inf" | ".INF" | "+.inf" | "+.Inf" | "+.INF" => Some(f64::INFINITY), - "-.inf" | "-.Inf" | "-.INF" => Some(f64::NEG_INFINITY), - ".nan" | "NaN" | ".NAN" => Some(f64::NAN), - _ => v.parse::().ok(), - } -} - -/// Main structure for quickly parsing YAML. -/// -/// See [`YamlLoader::load_from_str`]. -#[derive(Default)] -pub struct YamlLoader { - /// The different YAML documents that are loaded. - docs: Vec, - // states - // (current node, anchor_id) tuple - doc_stack: Vec<(Yaml, usize)>, - key_stack: Vec, - anchor_map: BTreeMap, -} - -impl MarkedEventReceiver for YamlLoader { - fn on_event(&mut self, ev: Event, _: Marker) { - // println!("EV {:?}", ev); - match ev { - Event::DocumentStart | Event::Nothing | Event::StreamStart | Event::StreamEnd => { - // do nothing - } - Event::DocumentEnd => { - match self.doc_stack.len() { - // empty document - 0 => self.docs.push(Yaml::BadValue), - 1 => self.docs.push(self.doc_stack.pop().unwrap().0), - _ => unreachable!(), - } - } - Event::SequenceStart(aid, _) => { - self.doc_stack.push((Yaml::Array(Vec::new()), aid)); - } - Event::SequenceEnd => { - let node = self.doc_stack.pop().unwrap(); - self.insert_new_node(node); - } - Event::MappingStart(aid, _) => { - self.doc_stack.push((Yaml::Hash(Hash::new()), aid)); - self.key_stack.push(Yaml::BadValue); - } - Event::MappingEnd => { - self.key_stack.pop().unwrap(); - let node = self.doc_stack.pop().unwrap(); - self.insert_new_node(node); - } - Event::Scalar(v, style, aid, tag) => { - let node = if style != TScalarStyle::Plain { - Yaml::String(v) - } else if let Some(Tag { - ref handle, - ref suffix, - }) = tag - { - if handle == "tag:yaml.org,2002:" { - match suffix.as_ref() { - "bool" => { - // "true" or "false" - match v.parse::() { - Err(_) => Yaml::BadValue, - Ok(v) => Yaml::Boolean(v), - } - } - "int" => match v.parse::() { - Err(_) => Yaml::BadValue, - Ok(v) => Yaml::Integer(v), - }, - "float" => match parse_f64(&v) { - Some(_) => Yaml::Real(v), - None => Yaml::BadValue, - }, - "null" => match v.as_ref() { - "~" | "null" => Yaml::Null, - _ => Yaml::BadValue, - }, - _ => Yaml::String(v), - } - } else { - Yaml::String(v) - } - } else { - // Datatype is not specified, or unrecognized - Yaml::from_str(&v) - }; - - self.insert_new_node((node, aid)); - } - Event::Alias(id) => { - let n = match self.anchor_map.get(&id) { - Some(v) => v.clone(), - None => Yaml::BadValue, - }; - self.insert_new_node((n, 0)); - } - } - // println!("DOC {:?}", self.doc_stack); - } -} - -/// An error that happened when loading a YAML document. -#[derive(Debug)] -pub enum LoadError { - /// An I/O error. - IO(std::io::Error), - /// An error within the scanner. This indicates a malformed YAML input. - Scan(ScanError), - /// A decoding error (e.g.: Invalid UTF_8). - Decode(std::borrow::Cow<'static, str>), -} - -impl From for LoadError { - fn from(error: std::io::Error) -> Self { - LoadError::IO(error) - } -} - -impl YamlLoader { - fn insert_new_node(&mut self, node: (Yaml, usize)) { - // valid anchor id starts from 1 - if node.1 > 0 { - self.anchor_map.insert(node.1, node.0.clone()); - } - if self.doc_stack.is_empty() { - self.doc_stack.push(node); - } else { - let parent = self.doc_stack.last_mut().unwrap(); - match *parent { - (Yaml::Array(ref mut v), _) => v.push(node.0), - (Yaml::Hash(ref mut h), _) => { - let cur_key = self.key_stack.last_mut().unwrap(); - // current node is a key - if cur_key.is_badvalue() { - *cur_key = node.0; - // current node is a value - } else { - let mut newkey = Yaml::BadValue; - mem::swap(&mut newkey, cur_key); - h.insert(newkey, node.0); - } - } - _ => unreachable!(), - } - } - } - - /// Load the given string as a set of YAML documents. - /// - /// The `source` is interpreted as YAML documents and is parsed. Parsing succeeds if and only - /// if all documents are parsed successfully. An error in a latter document prevents the former - /// from being returned. - /// # Errors - /// Returns `ScanError` when loading fails. - pub fn load_from_str(source: &str) -> Result, ScanError> { - Self::load_from_iter(source.chars()) - } - - /// Load the contents of the given iterator as a set of YAML documents. - /// - /// The `source` is interpreted as YAML documents and is parsed. Parsing succeeds if and only - /// if all documents are parsed successfully. An error in a latter document prevents the former - /// from being returned. - /// # Errors - /// Returns `ScanError` when loading fails. - pub fn load_from_iter>(source: I) -> Result, ScanError> { - let mut parser = Parser::new(source); - Self::load_from_parser(&mut parser) - } - - /// Load the contents from the specified Parser as a set of YAML documents. - /// - /// Parsing succeeds if and only if all documents are parsed successfully. - /// An error in a latter document prevents the former from being returned. - /// # Errors - /// Returns `ScanError` when loading fails. - pub fn load_from_parser>( - parser: &mut Parser, - ) -> Result, ScanError> { - let mut loader = YamlLoader::default(); - parser.load(&mut loader, true)?; - Ok(loader.docs) - } - - /// Return a reference to the parsed Yaml documents. - #[must_use] - pub fn documents(&self) -> &[Yaml] { - &self.docs - } -} - -/// The signature of the function to call when using [`YAMLDecodingTrap::Call`]. -/// -/// The arguments are as follows: -/// * `malformation_length`: The length of the sequence the decoder failed to decode. -/// * `bytes_read_after_malformation`: The number of lookahead bytes the decoder consumed after -/// the malformation. -/// * `input_at_malformation`: What the input buffer is at the malformation. -/// This is the buffer starting at the malformation. The first `malformation_length` bytes are -/// the problematic sequence. The following `bytes_read_after_malformation` are already stored -/// in the decoder and will not be re-fed. -/// * `output`: The output string. -/// -/// The function must modify `output` as it feels is best. For instance, one could recreate the -/// behavior of [`YAMLDecodingTrap::Ignore`] with an empty function, [`YAMLDecodingTrap::Replace`] -/// by pushing a `\u{FFFD}` into `output` and [`YAMLDecodingTrap::Strict`] by returning -/// [`ControlFlow::Break`]. -/// -/// # Returns -/// The function must return [`ControlFlow::Continue`] if decoding may continue or -/// [`ControlFlow::Break`] if decoding must be aborted. An optional error string may be supplied. -#[cfg(feature = "encoding")] -pub type YAMLDecodingTrapFn = fn( - malformation_length: u8, - bytes_read_after_malformation: u8, - input_at_malformation: &[u8], - output: &mut String, -) -> ControlFlow>; - -/// The behavior [`YamlDecoder`] must have when an decoding error occurs. -#[cfg(feature = "encoding")] -#[derive(Copy, Clone, PartialEq, Eq)] -pub enum YAMLDecodingTrap { - /// Ignore the offending bytes, remove them from the output. - Ignore, - /// Error out. - Strict, - /// Replace them with the Unicode REPLACEMENT CHARACTER. - Replace, - /// Call the user-supplied function upon decoding malformation. - Call(YAMLDecodingTrapFn), -} - -/// `YamlDecoder` is a `YamlLoader` builder that allows you to supply your own encoding error trap. -/// For example, to read a YAML file while ignoring Unicode decoding errors you can set the -/// `encoding_trap` to `encoding::DecoderTrap::Ignore`. -/// ```rust -/// use yaml_rust2::yaml::{YamlDecoder, YAMLDecodingTrap}; -/// -/// let string = b"--- -/// a\xa9: 1 -/// b: 2.2 -/// c: [1, 2] -/// "; -/// let out = YamlDecoder::read(string as &[u8]) -/// .encoding_trap(YAMLDecodingTrap::Ignore) -/// .decode() -/// .unwrap(); -/// ``` -#[cfg(feature = "encoding")] -pub struct YamlDecoder { - source: T, - trap: YAMLDecodingTrap, -} - -#[cfg(feature = "encoding")] -impl YamlDecoder { - /// Create a `YamlDecoder` decoding the given source. - pub fn read(source: T) -> YamlDecoder { - YamlDecoder { - source, - trap: YAMLDecodingTrap::Strict, - } - } - - /// Set the behavior of the decoder when the encoding is invalid. - pub fn encoding_trap(&mut self, trap: YAMLDecodingTrap) -> &mut Self { - self.trap = trap; - self - } - - /// Run the decode operation with the source and trap the `YamlDecoder` was built with. - /// - /// # Errors - /// Returns `LoadError` when decoding fails. - pub fn decode(&mut self) -> Result, LoadError> { - let mut buffer = Vec::new(); - self.source.read_to_end(&mut buffer)?; - - // Check if the `encoding` library can detect encoding from the BOM, otherwise use - // `detect_utf16_endianness`. - let (encoding, _) = - Encoding::for_bom(&buffer).unwrap_or_else(|| (detect_utf16_endianness(&buffer), 2)); - let mut decoder = encoding.new_decoder(); - let mut output = String::new(); - - // Decode the input buffer. - decode_loop(&buffer, &mut output, &mut decoder, self.trap)?; - - YamlLoader::load_from_str(&output).map_err(LoadError::Scan) - } -} - -/// Perform a loop of [`Decoder::decode_to_string`], reallocating `output` if needed. -#[cfg(feature = "encoding")] -fn decode_loop( - input: &[u8], - output: &mut String, - decoder: &mut Decoder, - trap: YAMLDecodingTrap, -) -> Result<(), LoadError> { - output.reserve(input.len()); - let mut total_bytes_read = 0; - - loop { - match decoder.decode_to_string_without_replacement(&input[total_bytes_read..], output, true) - { - // If the input is empty, we processed the whole input. - (DecoderResult::InputEmpty, _) => break Ok(()), - // If the output is full, we must reallocate. - (DecoderResult::OutputFull, bytes_read) => { - total_bytes_read += bytes_read; - // The output is already reserved to the size of the input. We slowly resize. Here, - // we're expecting that 10% of bytes will double in size when converting to UTF-8. - output.reserve(input.len() / 10); - } - (DecoderResult::Malformed(malformed_len, bytes_after_malformed), bytes_read) => { - total_bytes_read += bytes_read; - match trap { - // Ignore (skip over) malformed character. - YAMLDecodingTrap::Ignore => {} - // Replace them with the Unicode REPLACEMENT CHARACTER. - YAMLDecodingTrap::Replace => { - output.push('\u{FFFD}'); - } - // Otherwise error, getting as much context as possible. - YAMLDecodingTrap::Strict => { - let malformed_len = malformed_len as usize; - let bytes_after_malformed = bytes_after_malformed as usize; - let byte_idx = total_bytes_read - (malformed_len + bytes_after_malformed); - let malformed_sequence = &input[byte_idx..byte_idx + malformed_len]; - - break Err(LoadError::Decode(Cow::Owned(format!( - "Invalid character sequence at {byte_idx}: {malformed_sequence:?}", - )))); - } - YAMLDecodingTrap::Call(callback) => { - let byte_idx = - total_bytes_read - ((malformed_len + bytes_after_malformed) as usize); - let malformed_sequence = - &input[byte_idx..byte_idx + malformed_len as usize]; - if let ControlFlow::Break(error) = callback( - malformed_len, - bytes_after_malformed, - &input[byte_idx..], - output, - ) { - if error.is_empty() { - break Err(LoadError::Decode(Cow::Owned(format!( - "Invalid character sequence at {byte_idx}: {malformed_sequence:?}", - )))); - } - break Err(LoadError::Decode(error)); - } - } - } - } - } - } -} - -/// The encoding crate knows how to tell apart UTF-8 from UTF-16LE and utf-16BE, when the -/// bytestream starts with BOM codepoint. -/// However, it doesn't even attempt to guess the UTF-16 endianness of the input bytestream since -/// in the general case the bytestream could start with a codepoint that uses both bytes. -/// -/// The YAML-1.2 spec mandates that the first character of a YAML document is an ASCII character. -/// This allows the encoding to be deduced by the pattern of null (#x00) characters. -// -/// See spec at -#[cfg(feature = "encoding")] -fn detect_utf16_endianness(b: &[u8]) -> &'static Encoding { - if b.len() > 1 && (b[0] != b[1]) { - if b[0] == 0 { - return encoding_rs::UTF_16BE; - } else if b[1] == 0 { - return encoding_rs::UTF_16LE; - } - } - encoding_rs::UTF_8 -} - -macro_rules! define_as ( - ($name:ident, $t:ident, $yt:ident) => ( -/// Get a copy of the inner object in the YAML enum if it is a `$t`. -/// -/// # Return -/// If the variant of `self` is `Yaml::$yt`, return `Some($t)` with a copy of the `$t` contained. -/// Otherwise, return `None`. -#[must_use] -pub fn $name(&self) -> Option<$t> { - match *self { - Yaml::$yt(v) => Some(v), - _ => None - } -} - ); -); - -macro_rules! define_as_ref ( - ($name:ident, $t:ty, $yt:ident) => ( -/// Get a reference to the inner object in the YAML enum if it is a `$t`. -/// -/// # Return -/// If the variant of `self` is `Yaml::$yt`, return `Some(&$t)` with the `$t` contained. Otherwise, -/// return `None`. -#[must_use] -pub fn $name(&self) -> Option<$t> { - match *self { - Yaml::$yt(ref v) => Some(v), - _ => None - } -} - ); -); - -macro_rules! define_as_mut_ref ( - ($name:ident, $t:ty, $yt:ident) => ( -/// Get a mutable reference to the inner object in the YAML enum if it is a `$t`. -/// -/// # Return -/// If the variant of `self` is `Yaml::$yt`, return `Some(&mut $t)` with the `$t` contained. -/// Otherwise, return `None`. -#[must_use] -pub fn $name(&mut self) -> Option<$t> { - match *self { - Yaml::$yt(ref mut v) => Some(v), - _ => None - } -} - ); -); - -macro_rules! define_into ( - ($name:ident, $t:ty, $yt:ident) => ( -/// Get the inner object in the YAML enum if it is a `$t`. -/// -/// # Return -/// If the variant of `self` is `Yaml::$yt`, return `Some($t)` with the `$t` contained. Otherwise, -/// return `None`. -#[must_use] -pub fn $name(self) -> Option<$t> { - match self { - Yaml::$yt(v) => Some(v), - _ => None - } -} - ); -); - -impl Yaml { - define_as!(as_bool, bool, Boolean); - define_as!(as_i64, i64, Integer); - - define_as_ref!(as_str, &str, String); - define_as_ref!(as_hash, &Hash, Hash); - define_as_ref!(as_vec, &Array, Array); - - define_as_mut_ref!(as_mut_hash, &mut Hash, Hash); - define_as_mut_ref!(as_mut_vec, &mut Array, Array); - - define_into!(into_bool, bool, Boolean); - define_into!(into_i64, i64, Integer); - define_into!(into_string, String, String); - define_into!(into_hash, Hash, Hash); - define_into!(into_vec, Array, Array); - - /// Return whether `self` is a [`Yaml::Null`] node. - #[must_use] - pub fn is_null(&self) -> bool { - matches!(*self, Yaml::Null) - } - - /// Return whether `self` is a [`Yaml::BadValue`] node. - #[must_use] - pub fn is_badvalue(&self) -> bool { - matches!(*self, Yaml::BadValue) - } - - /// Return whether `self` is a [`Yaml::Array`] node. - #[must_use] - pub fn is_array(&self) -> bool { - matches!(*self, Yaml::Array(_)) - } - - /// Return the `f64` value contained in this YAML node. - /// - /// If the node is not a [`Yaml::Real`] YAML node or its contents is not a valid `f64` string, - /// `None` is returned. - #[must_use] - pub fn as_f64(&self) -> Option { - if let Yaml::Real(ref v) = self { - parse_f64(v) - } else { - None - } - } - - /// Return the `f64` value contained in this YAML node. - /// - /// If the node is not a [`Yaml::Real`] YAML node or its contents is not a valid `f64` string, - /// `None` is returned. - #[must_use] - pub fn into_f64(self) -> Option { - self.as_f64() - } - - /// If a value is null or otherwise bad (see variants), consume it and - /// replace it with a given value `other`. Otherwise, return self unchanged. - /// - /// ``` - /// use yaml_rust2::yaml::Yaml; - /// - /// assert_eq!(Yaml::BadValue.or(Yaml::Integer(3)), Yaml::Integer(3)); - /// assert_eq!(Yaml::Integer(3).or(Yaml::BadValue), Yaml::Integer(3)); - /// ``` - #[must_use] - pub fn or(self, other: Self) -> Self { - match self { - Yaml::BadValue | Yaml::Null => other, - this => this, - } - } - - /// See `or` for behavior. This performs the same operations, but with - /// borrowed values for less linear pipelines. - #[must_use] - pub fn borrowed_or<'a>(&'a self, other: &'a Self) -> &'a Self { - match self { - Yaml::BadValue | Yaml::Null => other, - this => this, - } - } -} - -#[cfg_attr(feature = "cargo-clippy", allow(clippy::should_implement_trait))] -impl Yaml { - /// Convert a string to a [`Yaml`] node. - /// - /// [`Yaml`] does not implement [`std::str::FromStr`] since conversion may not fail. This - /// function falls back to [`Yaml::String`] if nothing else matches. - /// - /// # Examples - /// ``` - /// # use yaml_rust2::yaml::Yaml; - /// assert!(matches!(Yaml::from_str("42"), Yaml::Integer(42))); - /// assert!(matches!(Yaml::from_str("0x2A"), Yaml::Integer(42))); - /// assert!(matches!(Yaml::from_str("0o52"), Yaml::Integer(42))); - /// assert!(matches!(Yaml::from_str("~"), Yaml::Null)); - /// assert!(matches!(Yaml::from_str("null"), Yaml::Null)); - /// assert!(matches!(Yaml::from_str("true"), Yaml::Boolean(true))); - /// assert!(matches!(Yaml::from_str("3.14"), Yaml::Real(_))); - /// assert!(matches!(Yaml::from_str("foo"), Yaml::String(_))); - /// ``` - #[must_use] - pub fn from_str(v: &str) -> Yaml { - if let Some(number) = v.strip_prefix("0x") { - if let Ok(i) = i64::from_str_radix(number, 16) { - return Yaml::Integer(i); - } - } else if let Some(number) = v.strip_prefix("0o") { - if let Ok(i) = i64::from_str_radix(number, 8) { - return Yaml::Integer(i); - } - } else if let Some(number) = v.strip_prefix('+') { - if let Ok(i) = number.parse::() { - return Yaml::Integer(i); - } - } - match v { - "~" | "null" => Yaml::Null, - "true" => Yaml::Boolean(true), - "false" => Yaml::Boolean(false), - _ => { - if let Ok(integer) = v.parse::() { - Yaml::Integer(integer) - } else if parse_f64(v).is_some() { - Yaml::Real(v.to_owned()) - } else { - Yaml::String(v.to_owned()) - } - } - } - } -} - -static BAD_VALUE: Yaml = Yaml::BadValue; -impl<'a> Index<&'a str> for Yaml { - type Output = Yaml; - - fn index(&self, idx: &'a str) -> &Yaml { - let key = Yaml::String(idx.to_owned()); - match self.as_hash() { - Some(h) => h.get(&key).unwrap_or(&BAD_VALUE), - None => &BAD_VALUE, - } - } -} - -impl<'a> IndexMut<&'a str> for Yaml { - fn index_mut(&mut self, idx: &'a str) -> &mut Yaml { - let key = Yaml::String(idx.to_owned()); - match self.as_mut_hash() { - Some(h) => h.get_mut(&key).unwrap(), - None => panic!("Not a hash type"), - } - } -} - -impl Index for Yaml { - type Output = Yaml; - - fn index(&self, idx: usize) -> &Yaml { - if let Some(v) = self.as_vec() { - v.get(idx).unwrap_or(&BAD_VALUE) - } else if let Some(v) = self.as_hash() { - let key = Yaml::Integer(i64::try_from(idx).unwrap()); - v.get(&key).unwrap_or(&BAD_VALUE) - } else { - &BAD_VALUE - } - } -} - -impl IndexMut for Yaml { - /// Perform indexing if `self` is a sequence or a mapping. - /// - /// # Panics - /// This function panics if the index given is out of range (as per [`IndexMut`]). If `self` i - /// a [`Yaml::Array`], this is when the index is bigger or equal to the length of the - /// underlying `Vec`. If `self` is a [`Yaml::Hash`], this is when the mapping sequence does no - /// contain [`Yaml::Integer`]`(idx)` as a key. - /// - /// This function also panics if `self` is not a [`Yaml::Array`] nor a [`Yaml::Hash`]. - fn index_mut(&mut self, idx: usize) -> &mut Yaml { - match self { - Yaml::Array(sequence) => sequence.index_mut(idx), - Yaml::Hash(mapping) => { - let key = Yaml::Integer(i64::try_from(idx).unwrap()); - mapping.get_mut(&key).unwrap() - } - _ => panic!("Attempting to index but `self` is not a sequence nor a mapping"), - } - } -} - -impl IntoIterator for Yaml { - type Item = Yaml; - type IntoIter = YamlIter; - - fn into_iter(self) -> Self::IntoIter { - YamlIter { - yaml: self.into_vec().unwrap_or_default().into_iter(), - } - } -} - -/// An iterator over a [`Yaml`] node. -pub struct YamlIter { - yaml: std::vec::IntoIter, -} - -impl Iterator for YamlIter { - type Item = Yaml; - - fn next(&mut self) -> Option { - self.yaml.next() - } -} - -#[cfg(test)] -mod test { - use super::{YAMLDecodingTrap, Yaml, YamlDecoder}; - - #[test] - fn test_read_bom() { - let s = b"\xef\xbb\xbf--- -a: 1 -b: 2.2 -c: [1, 2] -"; - let out = YamlDecoder::read(s as &[u8]).decode().unwrap(); - let doc = &out[0]; - assert_eq!(doc["a"].as_i64().unwrap(), 1i64); - assert!((doc["b"].as_f64().unwrap() - 2.2f64).abs() <= f64::EPSILON); - assert_eq!(doc["c"][1].as_i64().unwrap(), 2i64); - assert!(doc["d"][0].is_badvalue()); - } - - #[test] - fn test_read_utf16le() { - let s = b"\xff\xfe-\x00-\x00-\x00 -\x00a\x00:\x00 \x001\x00 -\x00b\x00:\x00 \x002\x00.\x002\x00 -\x00c\x00:\x00 \x00[\x001\x00,\x00 \x002\x00]\x00 -\x00"; - let out = YamlDecoder::read(s as &[u8]).decode().unwrap(); - let doc = &out[0]; - println!("GOT: {doc:?}"); - assert_eq!(doc["a"].as_i64().unwrap(), 1i64); - assert!((doc["b"].as_f64().unwrap() - 2.2f64) <= f64::EPSILON); - assert_eq!(doc["c"][1].as_i64().unwrap(), 2i64); - assert!(doc["d"][0].is_badvalue()); - } - - #[test] - fn test_read_utf16be() { - let s = b"\xfe\xff\x00-\x00-\x00-\x00 -\x00a\x00:\x00 \x001\x00 -\x00b\x00:\x00 \x002\x00.\x002\x00 -\x00c\x00:\x00 \x00[\x001\x00,\x00 \x002\x00]\x00 -"; - let out = YamlDecoder::read(s as &[u8]).decode().unwrap(); - let doc = &out[0]; - println!("GOT: {doc:?}"); - assert_eq!(doc["a"].as_i64().unwrap(), 1i64); - assert!((doc["b"].as_f64().unwrap() - 2.2f64).abs() <= f64::EPSILON); - assert_eq!(doc["c"][1].as_i64().unwrap(), 2i64); - assert!(doc["d"][0].is_badvalue()); - } - - #[test] - fn test_read_utf16le_nobom() { - let s = b"-\x00-\x00-\x00 -\x00a\x00:\x00 \x001\x00 -\x00b\x00:\x00 \x002\x00.\x002\x00 -\x00c\x00:\x00 \x00[\x001\x00,\x00 \x002\x00]\x00 -\x00"; - let out = YamlDecoder::read(s as &[u8]).decode().unwrap(); - let doc = &out[0]; - println!("GOT: {doc:?}"); - assert_eq!(doc["a"].as_i64().unwrap(), 1i64); - assert!((doc["b"].as_f64().unwrap() - 2.2f64).abs() <= f64::EPSILON); - assert_eq!(doc["c"][1].as_i64().unwrap(), 2i64); - assert!(doc["d"][0].is_badvalue()); - } - - #[test] - fn test_read_trap() { - let s = b"--- -a\xa9: 1 -b: 2.2 -c: [1, 2] -"; - let out = YamlDecoder::read(s as &[u8]) - .encoding_trap(YAMLDecodingTrap::Ignore) - .decode() - .unwrap(); - let doc = &out[0]; - println!("GOT: {doc:?}"); - assert_eq!(doc["a"].as_i64().unwrap(), 1i64); - assert!((doc["b"].as_f64().unwrap() - 2.2f64).abs() <= f64::EPSILON); - assert_eq!(doc["c"][1].as_i64().unwrap(), 2i64); - assert!(doc["d"][0].is_badvalue()); - } - - #[test] - fn test_or() { - assert_eq!(Yaml::Null.or(Yaml::Integer(3)), Yaml::Integer(3)); - assert_eq!(Yaml::Integer(3).or(Yaml::Integer(7)), Yaml::Integer(3)); - } -} diff --git a/parser/tests/basic.rs b/parser/tests/basic.rs index b769c2b..ebb39cf 100644 --- a/parser/tests/basic.rs +++ b/parser/tests/basic.rs @@ -1,47 +1,23 @@ #![allow(clippy::bool_assert_comparison)] #![allow(clippy::float_cmp)] -use std::vec; -use yaml_rust2::{Yaml, YamlEmitter, YamlLoader}; +use saphyr_parser::{Event, Parser, ScanError, TScalarStyle}; -#[test] -fn test_api() { - let s = " -# from yaml-cpp example -- name: Ogre - position: [0, 5, 0] - powers: - - name: Club - damage: 10 - - name: Fist - damage: 8 -- name: Dragon - position: [1, 0, 10] - powers: - - name: Fire Breath - damage: 25 - - name: Claws - damage: 15 -- name: Wizard - position: [5, -3, 0] - powers: - - name: Acid Rain - damage: 50 - - name: Staff - damage: 3 -"; - let docs = YamlLoader::load_from_str(s).unwrap(); - let doc = &docs[0]; - - assert_eq!(doc[0]["name"].as_str().unwrap(), "Ogre"); - - let mut writer = String::new(); - { - let mut emitter = YamlEmitter::new(&mut writer); - emitter.dump(doc).unwrap(); +/// Run the parser through the string. +/// +/// # Returns +/// This functions returns the events if parsing succeeds, the error the parser returned otherwise. +fn run_parser(input: &str) -> Result, ScanError> { + let mut events = vec![]; + for x in Parser::new_from_str(input) { + let x = x?; + let end = x.0 == Event::StreamEnd; + events.push(x.0); + if end { + break; + } } - - assert!(!writer.is_empty()); + Ok(events) } #[test] @@ -52,9 +28,7 @@ scalar key: [1, 2]] key1:a2 "; - let Err(error) = YamlLoader::load_from_str(s) else { - panic!() - }; + let Err(error) = run_parser(s) else { panic!() }; assert_eq!( error.info(), "mapping values are not allowed in this context" @@ -65,176 +39,143 @@ key1:a2 ); } -#[test] -fn test_coerce() { - let s = "--- -a: 1 -b: 2.2 -c: [1, 2] -"; - let out = YamlLoader::load_from_str(s).unwrap(); - let doc = &out[0]; - assert_eq!(doc["a"].as_i64().unwrap(), 1i64); - assert_eq!(doc["b"].as_f64().unwrap(), 2.2f64); - assert_eq!(doc["c"][1].as_i64().unwrap(), 2i64); - assert!(doc["d"][0].is_badvalue()); -} - #[test] fn test_empty_doc() { - let s: String = String::new(); - YamlLoader::load_from_str(&s).unwrap(); - let s: String = "---".to_owned(); - assert_eq!(YamlLoader::load_from_str(&s).unwrap()[0], Yaml::Null); + assert_eq!( + run_parser("").unwrap(), + [Event::StreamStart, Event::StreamEnd] + ); + + assert_eq!( + run_parser("---").unwrap(), + [ + Event::StreamStart, + Event::DocumentStart, + Event::Scalar("~".to_string(), TScalarStyle::Plain, 0, None), + Event::DocumentEnd, + Event::StreamEnd, + ] + ); } #[test] -fn test_parser() { - let s: String = " -# comment -a0 bb: val -a1: - b1: 4 - b2: d -a2: 4 # i'm comment -a3: [1, 2, 3] -a4: - - - a1 - - a2 - - 2 -a5: 'single_quoted' -a6: \"double_quoted\" -a7: 你好 -" - .to_owned(); - let out = YamlLoader::load_from_str(&s).unwrap(); - let doc = &out[0]; - assert_eq!(doc["a7"].as_str().unwrap(), "你好"); +fn test_utf() { + assert_eq!( + run_parser("a: 你好").unwrap(), + [ + Event::StreamStart, + Event::DocumentStart, + Event::MappingStart(0, None), + Event::Scalar("a".to_string(), TScalarStyle::Plain, 0, None), + Event::Scalar("你好".to_string(), TScalarStyle::Plain, 0, None), + Event::MappingEnd, + Event::DocumentEnd, + Event::StreamEnd, + ] + ); +} + +#[test] +fn test_comments() { + let s = " +# This is a comment +a: b # This is another comment +## + # +"; + + assert_eq!( + run_parser(s).unwrap(), + [ + Event::StreamStart, + Event::DocumentStart, + Event::MappingStart(0, None), + Event::Scalar("a".to_string(), TScalarStyle::Plain, 0, None), + Event::Scalar("b".to_string(), TScalarStyle::Plain, 0, None), + Event::MappingEnd, + Event::DocumentEnd, + Event::StreamEnd, + ] + ); +} + +#[test] +fn test_quoting() { + let s = " +- plain +- 'squote' +- \"dquote\" +"; + + assert_eq!( + run_parser(s).unwrap(), + [ + Event::StreamStart, + Event::DocumentStart, + Event::SequenceStart(0, None), + Event::Scalar("plain".to_string(), TScalarStyle::Plain, 0, None), + Event::Scalar("squote".to_string(), TScalarStyle::SingleQuoted, 0, None), + Event::Scalar("dquote".to_string(), TScalarStyle::DoubleQuoted, 0, None), + Event::SequenceEnd, + Event::DocumentEnd, + Event::StreamEnd, + ] + ); } #[test] fn test_multi_doc() { let s = " -'a scalar' +a scalar --- -'a scalar' +a scalar --- -'a scalar' +a scalar "; - let out = YamlLoader::load_from_str(s).unwrap(); - assert_eq!(out.len(), 3); -} - -#[test] -fn test_anchor() { - let s = " -a1: &DEFAULT - b1: 4 - b2: d -a2: *DEFAULT -"; - let out = YamlLoader::load_from_str(s).unwrap(); - let doc = &out[0]; - assert_eq!(doc["a2"]["b1"].as_i64().unwrap(), 4); -} - -#[test] -fn test_bad_anchor() { - let s = " -a1: &DEFAULT - b1: 4 - b2: *DEFAULT -"; - let out = YamlLoader::load_from_str(s).unwrap(); - let doc = &out[0]; - assert_eq!(doc["a1"]["b2"], Yaml::BadValue); + assert_eq!( + run_parser(s).unwrap(), + [ + Event::StreamStart, + Event::DocumentStart, + Event::Scalar("a scalar".to_string(), TScalarStyle::Plain, 0, None), + Event::DocumentEnd, + Event::DocumentStart, + Event::Scalar("a scalar".to_string(), TScalarStyle::Plain, 0, None), + Event::DocumentEnd, + Event::DocumentStart, + Event::Scalar("a scalar".to_string(), TScalarStyle::Plain, 0, None), + Event::DocumentEnd, + Event::StreamEnd, + ] + ); } #[test] fn test_github_27() { // https://github.com/chyh1990/yaml-rust/issues/27 - let s = "&a"; - let out = YamlLoader::load_from_str(s).unwrap(); - let doc = &out[0]; - assert_eq!(doc.as_str().unwrap(), ""); -} - -#[test] -fn test_plain_datatype() { - let s = " -- 'string' -- \"string\" -- string -- 123 -- -321 -- 1.23 -- -1e4 -- ~ -- null -- true -- false -- !!str 0 -- !!int 100 -- !!float 2 -- !!null ~ -- !!bool true -- !!bool false -- 0xFF -# bad values -- !!int string -- !!float string -- !!bool null -- !!null val -- 0o77 -- [ 0xF, 0xF ] -- +12345 -- [ true, false ] -"; - let out = YamlLoader::load_from_str(s).unwrap(); - let doc = &out[0]; - - assert_eq!(doc[0].as_str().unwrap(), "string"); - assert_eq!(doc[1].as_str().unwrap(), "string"); - assert_eq!(doc[2].as_str().unwrap(), "string"); - assert_eq!(doc[3].as_i64().unwrap(), 123); - assert_eq!(doc[4].as_i64().unwrap(), -321); - assert_eq!(doc[5].as_f64().unwrap(), 1.23); - assert_eq!(doc[6].as_f64().unwrap(), -1e4); - assert!(doc[7].is_null()); - assert!(doc[8].is_null()); - assert_eq!(doc[9].as_bool().unwrap(), true); - assert_eq!(doc[10].as_bool().unwrap(), false); - assert_eq!(doc[11].as_str().unwrap(), "0"); - assert_eq!(doc[12].as_i64().unwrap(), 100); - assert_eq!(doc[13].as_f64().unwrap(), 2.0); - assert!(doc[14].is_null()); - assert_eq!(doc[15].as_bool().unwrap(), true); - assert_eq!(doc[16].as_bool().unwrap(), false); - assert_eq!(doc[17].as_i64().unwrap(), 255); - assert!(doc[18].is_badvalue()); - assert!(doc[19].is_badvalue()); - assert!(doc[20].is_badvalue()); - assert!(doc[21].is_badvalue()); - assert_eq!(doc[22].as_i64().unwrap(), 63); - assert_eq!(doc[23][0].as_i64().unwrap(), 15); - assert_eq!(doc[23][1].as_i64().unwrap(), 15); - assert_eq!(doc[24].as_i64().unwrap(), 12345); - assert!(doc[25][0].as_bool().unwrap()); - assert!(!doc[25][1].as_bool().unwrap()); + assert_eq!( + run_parser("&a").unwrap(), + [ + Event::StreamStart, + Event::DocumentStart, + Event::Scalar(String::new(), TScalarStyle::Plain, 1, None), + Event::DocumentEnd, + Event::StreamEnd, + ] + ); } #[test] fn test_bad_hyphen() { // See: https://github.com/chyh1990/yaml-rust/issues/23 - let s = "{-"; - assert!(YamlLoader::load_from_str(s).is_err()); + assert!(run_parser("{-").is_err()); } #[test] fn test_issue_65() { // See: https://github.com/chyh1990/yaml-rust/issues/65 let b = "\n\"ll\\\"ll\\\r\n\"ll\\\"ll\\\r\r\r\rU\r\r\rU"; - assert!(YamlLoader::load_from_str(b).is_err()); + assert!(run_parser(b).is_err()); } #[test] @@ -242,201 +183,104 @@ fn test_issue_65_mwe() { // A MWE for `test_issue_65`. The error over there is that there is invalid trailing content // after a double quoted string. let b = r#""foo" l"#; - assert!(YamlLoader::load_from_str(b).is_err()); + assert!(run_parser(b).is_err()); } #[test] fn test_bad_docstart() { - assert!(YamlLoader::load_from_str("---This used to cause an infinite loop").is_ok()); + assert!(run_parser("---This used to cause an infinite loop").is_ok()); assert_eq!( - YamlLoader::load_from_str("----"), - Ok(vec![Yaml::String(String::from("----"))]) + run_parser("----").unwrap(), + [ + Event::StreamStart, + Event::DocumentStart, + Event::Scalar("----".to_string(), TScalarStyle::Plain, 0, None), + Event::DocumentEnd, + Event::StreamEnd, + ] ); - assert_eq!( - YamlLoader::load_from_str("--- #here goes a comment"), - Ok(vec![Yaml::Null]) - ); - assert_eq!( - YamlLoader::load_from_str("---- #here goes a comment"), - Ok(vec![Yaml::String(String::from("----"))]) - ); -} -#[test] -fn test_plain_datatype_with_into_methods() { - let s = " -- 'string' -- \"string\" -- string -- 123 -- -321 -- 1.23 -- -1e4 -- true -- false -- !!str 0 -- !!int 100 -- !!float 2 -- !!bool true -- !!bool false -- 0xFF -- 0o77 -- +12345 -- -.INF -- .NAN -- !!float .INF -"; - let mut out = YamlLoader::load_from_str(s).unwrap().into_iter(); - let mut doc = out.next().unwrap().into_iter(); - - assert_eq!(doc.next().unwrap().into_string().unwrap(), "string"); - assert_eq!(doc.next().unwrap().into_string().unwrap(), "string"); - assert_eq!(doc.next().unwrap().into_string().unwrap(), "string"); - assert_eq!(doc.next().unwrap().into_i64().unwrap(), 123); - assert_eq!(doc.next().unwrap().into_i64().unwrap(), -321); - assert_eq!(doc.next().unwrap().into_f64().unwrap(), 1.23); - assert_eq!(doc.next().unwrap().into_f64().unwrap(), -1e4); - assert_eq!(doc.next().unwrap().into_bool().unwrap(), true); - assert_eq!(doc.next().unwrap().into_bool().unwrap(), false); - assert_eq!(doc.next().unwrap().into_string().unwrap(), "0"); - assert_eq!(doc.next().unwrap().into_i64().unwrap(), 100); - assert_eq!(doc.next().unwrap().into_f64().unwrap(), 2.0); - assert_eq!(doc.next().unwrap().into_bool().unwrap(), true); - assert_eq!(doc.next().unwrap().into_bool().unwrap(), false); - assert_eq!(doc.next().unwrap().into_i64().unwrap(), 255); - assert_eq!(doc.next().unwrap().into_i64().unwrap(), 63); - assert_eq!(doc.next().unwrap().into_i64().unwrap(), 12345); - assert_eq!(doc.next().unwrap().into_f64().unwrap(), f64::NEG_INFINITY); - assert!(doc.next().unwrap().into_f64().is_some()); - assert_eq!(doc.next().unwrap().into_f64().unwrap(), f64::INFINITY); -} - -#[test] -fn test_hash_order() { - let s = "--- -b: ~ -a: ~ -c: ~ -"; - let out = YamlLoader::load_from_str(s).unwrap(); - let first = out.into_iter().next().unwrap(); - let mut iter = first.into_hash().unwrap().into_iter(); assert_eq!( - Some((Yaml::String("b".to_owned()), Yaml::Null)), - iter.next() + run_parser("--- #comment").unwrap(), + [ + Event::StreamStart, + Event::DocumentStart, + Event::Scalar("~".to_string(), TScalarStyle::Plain, 0, None), + Event::DocumentEnd, + Event::StreamEnd, + ] ); - assert_eq!( - Some((Yaml::String("a".to_owned()), Yaml::Null)), - iter.next() - ); - assert_eq!( - Some((Yaml::String("c".to_owned()), Yaml::Null)), - iter.next() - ); - assert_eq!(None, iter.next()); -} -#[test] -fn test_integer_key() { - let s = " -0: - important: true -1: - important: false -"; - let out = YamlLoader::load_from_str(s).unwrap(); - let first = out.into_iter().next().unwrap(); - assert_eq!(first[0]["important"].as_bool().unwrap(), true); + assert_eq!( + run_parser("---- #comment").unwrap(), + [ + Event::StreamStart, + Event::DocumentStart, + Event::Scalar("----".to_string(), TScalarStyle::Plain, 0, None), + Event::DocumentEnd, + Event::StreamEnd, + ] + ); } #[test] fn test_indentation_equality() { - let four_spaces = YamlLoader::load_from_str( + let four_spaces = run_parser( r" hash: with: indentations ", ) - .unwrap() - .into_iter() - .next() .unwrap(); - let two_spaces = YamlLoader::load_from_str( + let two_spaces = run_parser( r" hash: with: indentations ", ) - .unwrap() - .into_iter() - .next() .unwrap(); - let one_space = YamlLoader::load_from_str( + let one_space = run_parser( r" hash: with: indentations ", ) - .unwrap() - .into_iter() - .next() .unwrap(); - let mixed_spaces = YamlLoader::load_from_str( + let mixed_spaces = run_parser( r" hash: with: indentations ", ) - .unwrap() - .into_iter() - .next() .unwrap(); - assert_eq!(four_spaces, two_spaces); - assert_eq!(two_spaces, one_space); - assert_eq!(four_spaces, mixed_spaces); -} - -#[test] -fn test_two_space_indentations() { - // https://github.com/kbknapp/clap-rs/issues/965 - - let s = r" -subcommands: - - server: - about: server related commands -subcommands2: - - server: - about: server related commands -subcommands3: - - server: - about: server related commands - "; - - let out = YamlLoader::load_from_str(s).unwrap(); - let doc = &out.into_iter().next().unwrap(); - - println!("{doc:#?}"); - assert_eq!(doc["subcommands"][0]["server"], Yaml::Null); - assert!(doc["subcommands2"][0]["server"].as_hash().is_some()); - assert!(doc["subcommands3"][0]["server"].as_hash().is_some()); + for (((a, b), c), d) in four_spaces + .iter() + .zip(two_spaces.iter()) + .zip(one_space.iter()) + .zip(mixed_spaces.iter()) + { + assert!(a == b); + assert!(a == c); + assert!(a == d); + } } #[test] fn test_recursion_depth_check_objects() { let s = "{a:".repeat(10_000) + &"}".repeat(10_000); - assert!(YamlLoader::load_from_str(&s).is_err()); + assert!(run_parser(&s).is_err()); } #[test] fn test_recursion_depth_check_arrays() { let s = "[".repeat(10_000) + &"]".repeat(10_000); - assert!(YamlLoader::load_from_str(&s).is_err()); + assert!(run_parser(&s).is_err()); } diff --git a/parser/tests/emitter.rs b/parser/tests/emitter.rs deleted file mode 100644 index c085a56..0000000 --- a/parser/tests/emitter.rs +++ /dev/null @@ -1,294 +0,0 @@ -use yaml_rust2::{YamlEmitter, YamlLoader}; - -#[allow(clippy::similar_names)] -#[test] -fn test_emit_simple() { - let s = " -# comment -a0 bb: val -a1: - b1: 4 - b2: d -a2: 4 # i'm comment -a3: [1, 2, 3] -a4: - - [a1, a2] - - 2 -"; - - let docs = YamlLoader::load_from_str(s).unwrap(); - let doc = &docs[0]; - let mut writer = String::new(); - { - let mut emitter = YamlEmitter::new(&mut writer); - emitter.dump(doc).unwrap(); - } - println!("original:\n{s}"); - println!("emitted:\n{writer}"); - let docs_new = match YamlLoader::load_from_str(&writer) { - Ok(y) => y, - Err(e) => panic!("{}", e), - }; - let doc_new = &docs_new[0]; - - assert_eq!(doc, doc_new); -} - -#[test] -fn test_emit_complex() { - let s = r" -catalogue: - product: &coffee { name: Coffee, price: 2.5 , unit: 1l } - product: &cookies { name: Cookies!, price: 3.40 , unit: 400g} - -products: - *coffee : - amount: 4 - *cookies : - amount: 4 - [1,2,3,4]: - array key - 2.4: - real key - true: - bool key - {}: - empty hash key - "; - let docs = YamlLoader::load_from_str(s).unwrap(); - let doc = &docs[0]; - let mut writer = String::new(); - { - let mut emitter = YamlEmitter::new(&mut writer); - emitter.dump(doc).unwrap(); - } - let docs_new = match YamlLoader::load_from_str(&writer) { - Ok(y) => y, - Err(e) => panic!("{}", e), - }; - let new_doc = &docs_new[0]; - assert_eq!(doc, new_doc); -} - -#[test] -fn test_emit_avoid_quotes() { - let s = r#"--- -a7: 你好 -boolean: "true" -boolean2: "false" -date: 2014-12-31 -empty_string: "" -empty_string1: " " -empty_string2: " a" -empty_string3: " a " -exp: "12e7" -field: ":" -field2: "{" -field3: "\\" -field4: "\n" -field5: "can't avoid quote" -float: "2.6" -int: "4" -nullable: "null" -nullable2: "~" -products: - "*coffee": - amount: 4 - "*cookies": - amount: 4 - ".milk": - amount: 1 - "2.4": real key - "[1,2,3,4]": array key - "true": bool key - "{}": empty hash key -x: test -y: avoid quoting here -z: string with spaces"#; - - let docs = YamlLoader::load_from_str(s).unwrap(); - let doc = &docs[0]; - let mut writer = String::new(); - { - let mut emitter = YamlEmitter::new(&mut writer); - emitter.dump(doc).unwrap(); - } - - assert_eq!(s, writer, "actual:\n\n{writer}\n"); -} - -#[test] -fn emit_quoted_bools() { - let input = r#"--- -string0: yes -string1: no -string2: "true" -string3: "false" -string4: "~" -null0: ~ -[true, false]: real_bools -[True, TRUE, False, FALSE, y,Y,yes,Yes,YES,n,N,no,No,NO,on,On,ON,off,Off,OFF]: false_bools -bool0: true -bool1: false"#; - let expected = r#"--- -string0: "yes" -string1: "no" -string2: "true" -string3: "false" -string4: "~" -null0: ~ -? - true - - false -: real_bools -? - "True" - - "TRUE" - - "False" - - "FALSE" - - y - - Y - - "yes" - - "Yes" - - "YES" - - n - - N - - "no" - - "No" - - "NO" - - "on" - - "On" - - "ON" - - "off" - - "Off" - - "OFF" -: false_bools -bool0: true -bool1: false"#; - - let docs = YamlLoader::load_from_str(input).unwrap(); - let doc = &docs[0]; - let mut writer = String::new(); - { - let mut emitter = YamlEmitter::new(&mut writer); - emitter.dump(doc).unwrap(); - } - - assert_eq!( - expected, writer, - "expected:\n{expected}\nactual:\n{writer}\n", - ); -} - -#[test] -fn test_empty_and_nested() { - test_empty_and_nested_flag(false); -} - -#[test] -fn test_empty_and_nested_compact() { - test_empty_and_nested_flag(true); -} - -fn test_empty_and_nested_flag(compact: bool) { - let s = if compact { - r"--- -a: - b: - c: hello - d: {} -e: - - f - - g - - h: []" - } else { - r"--- -a: - b: - c: hello - d: {} -e: - - f - - g - - - h: []" - }; - - let docs = YamlLoader::load_from_str(s).unwrap(); - let doc = &docs[0]; - let mut writer = String::new(); - { - let mut emitter = YamlEmitter::new(&mut writer); - emitter.compact(compact); - emitter.dump(doc).unwrap(); - } - - assert_eq!(s, writer); -} - -#[test] -fn test_nested_arrays() { - let s = r"--- -a: - - b - - - c - - d - - - e - - f"; - - let docs = YamlLoader::load_from_str(s).unwrap(); - let doc = &docs[0]; - let mut writer = String::new(); - { - let mut emitter = YamlEmitter::new(&mut writer); - emitter.dump(doc).unwrap(); - } - println!("original:\n{s}"); - println!("emitted:\n{writer}"); - - assert_eq!(s, writer); -} - -#[test] -fn test_deeply_nested_arrays() { - let s = r"--- -a: - - b - - - c - - d - - - e - - - f - - - e"; - - let docs = YamlLoader::load_from_str(s).unwrap(); - let doc = &docs[0]; - let mut writer = String::new(); - { - let mut emitter = YamlEmitter::new(&mut writer); - emitter.dump(doc).unwrap(); - } - println!("original:\n{s}"); - println!("emitted:\n{writer}"); - - assert_eq!(s, writer); -} - -#[test] -fn test_nested_hashes() { - let s = r"--- -a: - b: - c: - d: - e: f"; - - let docs = YamlLoader::load_from_str(s).unwrap(); - let doc = &docs[0]; - let mut writer = String::new(); - { - let mut emitter = YamlEmitter::new(&mut writer); - emitter.dump(doc).unwrap(); - } - println!("original:\n{s}"); - println!("emitted:\n{writer}"); - - assert_eq!(s, writer); -} diff --git a/parser/tests/quickcheck.rs b/parser/tests/quickcheck.rs deleted file mode 100644 index fdf2549..0000000 --- a/parser/tests/quickcheck.rs +++ /dev/null @@ -1,21 +0,0 @@ -extern crate yaml_rust2; -#[macro_use] -extern crate quickcheck; - -use quickcheck::TestResult; -use yaml_rust2::{Yaml, YamlEmitter, YamlLoader}; - -quickcheck! { - fn test_check_weird_keys(xs: Vec) -> TestResult { - let mut out_str = String::new(); - let input = Yaml::Array(xs.into_iter().map(Yaml::String).collect()); - { - let mut emitter = YamlEmitter::new(&mut out_str); - emitter.dump(&input).unwrap(); - } - match YamlLoader::load_from_str(&out_str) { - Ok(output) => TestResult::from_bool(output.len() == 1 && input == output[0]), - Err(err) => TestResult::error(err.to_string()), - } - } -} diff --git a/parser/tests/scanner.rs b/parser/tests/scanner.rs deleted file mode 100644 index 0a09517..0000000 --- a/parser/tests/scanner.rs +++ /dev/null @@ -1,440 +0,0 @@ -#![allow(clippy::enum_glob_use)] - -use yaml_rust2::{scanner::TokenType::*, scanner::*}; - -macro_rules! next { - ($p:ident, $tk:pat) => {{ - let tok = $p.next().unwrap(); - match tok.1 { - $tk => {} - _ => panic!("unexpected token: {:?}", tok), - } - }}; -} - -macro_rules! next_scalar { - ($p:ident, $tk:expr, $v:expr) => {{ - let tok = $p.next().unwrap(); - match tok.1 { - Scalar(style, ref v) => { - assert_eq!(style, $tk); - assert_eq!(v, $v); - } - _ => panic!("unexpected token: {:?}", tok), - } - }}; -} - -macro_rules! end { - ($p:ident) => {{ - assert_eq!($p.next(), None); - }}; -} -/// test cases in libyaml scanner.c -#[test] -fn test_empty() { - let s = ""; - let mut p = Scanner::new(s.chars()); - next!(p, StreamStart(..)); - next!(p, StreamEnd); - end!(p); -} - -#[test] -fn test_scalar() { - let s = "a scalar"; - let mut p = Scanner::new(s.chars()); - next!(p, StreamStart(..)); - next!(p, Scalar(TScalarStyle::Plain, _)); - next!(p, StreamEnd); - end!(p); -} - -#[test] -fn test_explicit_scalar() { - let s = "--- -'a scalar' -... -"; - let mut p = Scanner::new(s.chars()); - next!(p, StreamStart(..)); - next!(p, DocumentStart); - next!(p, Scalar(TScalarStyle::SingleQuoted, _)); - next!(p, DocumentEnd); - next!(p, StreamEnd); - end!(p); -} - -#[test] -fn test_multiple_documents() { - let s = " -'a scalar' ---- -'a scalar' ---- -'a scalar' -"; - let mut p = Scanner::new(s.chars()); - next!(p, StreamStart(..)); - next!(p, Scalar(TScalarStyle::SingleQuoted, _)); - next!(p, DocumentStart); - next!(p, Scalar(TScalarStyle::SingleQuoted, _)); - next!(p, DocumentStart); - next!(p, Scalar(TScalarStyle::SingleQuoted, _)); - next!(p, StreamEnd); - end!(p); -} - -#[test] -fn test_a_flow_sequence() { - let s = "[item 1, item 2, item 3]"; - let mut p = Scanner::new(s.chars()); - next!(p, StreamStart(..)); - next!(p, FlowSequenceStart); - next_scalar!(p, TScalarStyle::Plain, "item 1"); - next!(p, FlowEntry); - next!(p, Scalar(TScalarStyle::Plain, _)); - next!(p, FlowEntry); - next!(p, Scalar(TScalarStyle::Plain, _)); - next!(p, FlowSequenceEnd); - next!(p, StreamEnd); - end!(p); -} - -#[test] -fn test_a_flow_mapping() { - let s = " -{ - a simple key: a value, # Note that the KEY token is produced. - ? a complex key: another value, -} -"; - let mut p = Scanner::new(s.chars()); - next!(p, StreamStart(..)); - next!(p, FlowMappingStart); - next!(p, Key); - next!(p, Scalar(TScalarStyle::Plain, _)); - next!(p, Value); - next!(p, Scalar(TScalarStyle::Plain, _)); - next!(p, FlowEntry); - next!(p, Key); - next_scalar!(p, TScalarStyle::Plain, "a complex key"); - next!(p, Value); - next!(p, Scalar(TScalarStyle::Plain, _)); - next!(p, FlowEntry); - next!(p, FlowMappingEnd); - next!(p, StreamEnd); - end!(p); -} - -#[test] -fn test_block_sequences() { - let s = " -- item 1 -- item 2 -- - - item 3.1 - - item 3.2 -- - key 1: value 1 - key 2: value 2 -"; - let mut p = Scanner::new(s.chars()); - next!(p, StreamStart(..)); - next!(p, BlockSequenceStart); - next!(p, BlockEntry); - next_scalar!(p, TScalarStyle::Plain, "item 1"); - next!(p, BlockEntry); - next_scalar!(p, TScalarStyle::Plain, "item 2"); - next!(p, BlockEntry); - next!(p, BlockSequenceStart); - next!(p, BlockEntry); - next_scalar!(p, TScalarStyle::Plain, "item 3.1"); - next!(p, BlockEntry); - next_scalar!(p, TScalarStyle::Plain, "item 3.2"); - next!(p, BlockEnd); - next!(p, BlockEntry); - next!(p, BlockMappingStart); - next!(p, Key); - next_scalar!(p, TScalarStyle::Plain, "key 1"); - next!(p, Value); - next_scalar!(p, TScalarStyle::Plain, "value 1"); - next!(p, Key); - next_scalar!(p, TScalarStyle::Plain, "key 2"); - next!(p, Value); - next_scalar!(p, TScalarStyle::Plain, "value 2"); - next!(p, BlockEnd); - next!(p, BlockEnd); - next!(p, StreamEnd); - end!(p); -} - -#[test] -fn test_block_mappings() { - let s = " -a simple key: a value # The KEY token is produced here. -? a complex key -: another value -a mapping: - key 1: value 1 - key 2: value 2 -a sequence: - - item 1 - - item 2 -"; - let mut p = Scanner::new(s.chars()); - next!(p, StreamStart(..)); - next!(p, BlockMappingStart); - next!(p, Key); - next!(p, Scalar(_, _)); - next!(p, Value); - next!(p, Scalar(_, _)); - next!(p, Key); - next!(p, Scalar(_, _)); - next!(p, Value); - next!(p, Scalar(_, _)); - next!(p, Key); - next!(p, Scalar(_, _)); - next!(p, Value); // libyaml comment seems to be wrong - next!(p, BlockMappingStart); - next!(p, Key); - next!(p, Scalar(_, _)); - next!(p, Value); - next!(p, Scalar(_, _)); - next!(p, Key); - next!(p, Scalar(_, _)); - next!(p, Value); - next!(p, Scalar(_, _)); - next!(p, BlockEnd); - next!(p, Key); - next!(p, Scalar(_, _)); - next!(p, Value); - next!(p, BlockSequenceStart); - next!(p, BlockEntry); - next!(p, Scalar(_, _)); - next!(p, BlockEntry); - next!(p, Scalar(_, _)); - next!(p, BlockEnd); - next!(p, BlockEnd); - next!(p, StreamEnd); - end!(p); -} - -#[test] -fn test_no_block_sequence_start() { - let s = " -key: -- item 1 -- item 2 -"; - let mut p = Scanner::new(s.chars()); - next!(p, StreamStart(..)); - next!(p, BlockMappingStart); - next!(p, Key); - next_scalar!(p, TScalarStyle::Plain, "key"); - next!(p, Value); - next!(p, BlockEntry); - next_scalar!(p, TScalarStyle::Plain, "item 1"); - next!(p, BlockEntry); - next_scalar!(p, TScalarStyle::Plain, "item 2"); - next!(p, BlockEnd); - next!(p, StreamEnd); - end!(p); -} - -#[test] -fn test_collections_in_sequence() { - let s = " -- - item 1 - - item 2 -- key 1: value 1 - key 2: value 2 -- ? complex key - : complex value -"; - let mut p = Scanner::new(s.chars()); - next!(p, StreamStart(..)); - next!(p, BlockSequenceStart); - next!(p, BlockEntry); - next!(p, BlockSequenceStart); - next!(p, BlockEntry); - next_scalar!(p, TScalarStyle::Plain, "item 1"); - next!(p, BlockEntry); - next_scalar!(p, TScalarStyle::Plain, "item 2"); - next!(p, BlockEnd); - next!(p, BlockEntry); - next!(p, BlockMappingStart); - next!(p, Key); - next_scalar!(p, TScalarStyle::Plain, "key 1"); - next!(p, Value); - next_scalar!(p, TScalarStyle::Plain, "value 1"); - next!(p, Key); - next_scalar!(p, TScalarStyle::Plain, "key 2"); - next!(p, Value); - next_scalar!(p, TScalarStyle::Plain, "value 2"); - next!(p, BlockEnd); - next!(p, BlockEntry); - next!(p, BlockMappingStart); - next!(p, Key); - next_scalar!(p, TScalarStyle::Plain, "complex key"); - next!(p, Value); - next_scalar!(p, TScalarStyle::Plain, "complex value"); - next!(p, BlockEnd); - next!(p, BlockEnd); - next!(p, StreamEnd); - end!(p); -} - -#[test] -fn test_collections_in_mapping() { - let s = " -? a sequence -: - item 1 - - item 2 -? a mapping -: key 1: value 1 - key 2: value 2 -"; - let mut p = Scanner::new(s.chars()); - next!(p, StreamStart(..)); - next!(p, BlockMappingStart); - next!(p, Key); - next_scalar!(p, TScalarStyle::Plain, "a sequence"); - next!(p, Value); - next!(p, BlockSequenceStart); - next!(p, BlockEntry); - next_scalar!(p, TScalarStyle::Plain, "item 1"); - next!(p, BlockEntry); - next_scalar!(p, TScalarStyle::Plain, "item 2"); - next!(p, BlockEnd); - next!(p, Key); - next_scalar!(p, TScalarStyle::Plain, "a mapping"); - next!(p, Value); - next!(p, BlockMappingStart); - next!(p, Key); - next_scalar!(p, TScalarStyle::Plain, "key 1"); - next!(p, Value); - next_scalar!(p, TScalarStyle::Plain, "value 1"); - next!(p, Key); - next_scalar!(p, TScalarStyle::Plain, "key 2"); - next!(p, Value); - next_scalar!(p, TScalarStyle::Plain, "value 2"); - next!(p, BlockEnd); - next!(p, BlockEnd); - next!(p, StreamEnd); - end!(p); -} - -#[test] -fn test_spec_ex7_3() { - let s = " -{ - ? foo :, - : bar, -} -"; - let mut p = Scanner::new(s.chars()); - next!(p, StreamStart(..)); - next!(p, FlowMappingStart); - next!(p, Key); - next_scalar!(p, TScalarStyle::Plain, "foo"); - next!(p, Value); - next!(p, FlowEntry); - next!(p, Value); - next_scalar!(p, TScalarStyle::Plain, "bar"); - next!(p, FlowEntry); - next!(p, FlowMappingEnd); - next!(p, StreamEnd); - end!(p); -} - -#[test] -fn test_plain_scalar_starting_with_indicators_in_flow() { - // "Plain scalars must not begin with most indicators, as this would cause ambiguity with - // other YAML constructs. However, the “:”, “?” and “-” indicators may be used as the first - // character if followed by a non-space “safe” character, as this causes no ambiguity." - - let s = "{a: :b}"; - let mut p = Scanner::new(s.chars()); - next!(p, StreamStart(..)); - next!(p, FlowMappingStart); - next!(p, Key); - next_scalar!(p, TScalarStyle::Plain, "a"); - next!(p, Value); - next_scalar!(p, TScalarStyle::Plain, ":b"); - next!(p, FlowMappingEnd); - next!(p, StreamEnd); - end!(p); - - let s = "{a: ?b}"; - let mut p = Scanner::new(s.chars()); - next!(p, StreamStart(..)); - next!(p, FlowMappingStart); - next!(p, Key); - next_scalar!(p, TScalarStyle::Plain, "a"); - next!(p, Value); - next_scalar!(p, TScalarStyle::Plain, "?b"); - next!(p, FlowMappingEnd); - next!(p, StreamEnd); - end!(p); -} - -#[test] -fn test_plain_scalar_starting_with_indicators_in_block() { - let s = ":a"; - let mut p = Scanner::new(s.chars()); - next!(p, StreamStart(..)); - next_scalar!(p, TScalarStyle::Plain, ":a"); - next!(p, StreamEnd); - end!(p); - - let s = "?a"; - let mut p = Scanner::new(s.chars()); - next!(p, StreamStart(..)); - next_scalar!(p, TScalarStyle::Plain, "?a"); - next!(p, StreamEnd); - end!(p); -} - -#[test] -fn test_plain_scalar_containing_indicators_in_block() { - let s = "a:,b"; - let mut p = Scanner::new(s.chars()); - next!(p, StreamStart(..)); - next_scalar!(p, TScalarStyle::Plain, "a:,b"); - next!(p, StreamEnd); - end!(p); - - let s = ":,b"; - let mut p = Scanner::new(s.chars()); - next!(p, StreamStart(..)); - next_scalar!(p, TScalarStyle::Plain, ":,b"); - next!(p, StreamEnd); - end!(p); -} - -#[test] -fn test_scanner_cr() { - let s = "---\r\n- tok1\r\n- tok2"; - let mut p = Scanner::new(s.chars()); - next!(p, StreamStart(..)); - next!(p, DocumentStart); - next!(p, BlockSequenceStart); - next!(p, BlockEntry); - next_scalar!(p, TScalarStyle::Plain, "tok1"); - next!(p, BlockEntry); - next_scalar!(p, TScalarStyle::Plain, "tok2"); - next!(p, BlockEnd); - next!(p, StreamEnd); - end!(p); -} - -#[test] -fn test_uri() { - // TODO -} - -#[test] -fn test_uri_escapes() { - // TODO -} diff --git a/parser/tests/spec_test.rs b/parser/tests/spec_test.rs index ecf1327..4cffbc0 100644 --- a/parser/tests/spec_test.rs +++ b/parser/tests/spec_test.rs @@ -1,9 +1,8 @@ #![allow(dead_code)] #![allow(non_upper_case_globals)] -extern crate yaml_rust2; +extern crate saphyr_parser; -use yaml_rust2::parser::{Event, EventReceiver, Parser}; -use yaml_rust2::scanner::TScalarStyle; +use saphyr_parser::{Event, EventReceiver, Parser, TScalarStyle}; // These names match the names used in the C++ test suite. #[cfg_attr(feature = "cargo-clippy", allow(clippy::enum_variant_names))] @@ -68,67 +67,3 @@ macro_rules! assert_next { // auto generated from handler_spec_test.cpp include!("specexamples.rs.inc"); include!("spec_test.rs.inc"); - -// hand-crafted tests -//#[test] -//fn test_hc_alias() { -//} - -#[test] -fn test_mapvec_legal() { - use yaml_rust2::yaml::{Hash, Yaml}; - use yaml_rust2::{YamlEmitter, YamlLoader}; - - // Emitting a `map>, _>` should result in legal yaml that - // we can parse. - - let key = vec![Yaml::Integer(1), Yaml::Integer(2), Yaml::Integer(3)]; - - let mut keyhash = Hash::new(); - keyhash.insert(Yaml::String("key".into()), Yaml::Array(key)); - - let val = vec![Yaml::Integer(4), Yaml::Integer(5), Yaml::Integer(6)]; - - let mut hash = Hash::new(); - hash.insert(Yaml::Hash(keyhash), Yaml::Array(val)); - - let mut out_str = String::new(); - { - let mut emitter = YamlEmitter::new(&mut out_str); - emitter.dump(&Yaml::Hash(hash)).unwrap(); - } - - // At this point, we are tempted to naively render like this: - // - // ```yaml - // --- - // {key: - // - 1 - // - 2 - // - 3}: - // - 4 - // - 5 - // - 6 - // ``` - // - // However, this doesn't work, because the key sequence [1, 2, 3] is - // rendered in block mode, which is not legal (as far as I can tell) - // inside the flow mode of the key. We need to either fully render - // everything that's in a key in flow mode (which may make for some - // long lines), or use the explicit map identifier '?': - // - // ```yaml - // --- - // ? - // key: - // - 1 - // - 2 - // - 3 - // : - // - 4 - // - 5 - // - 6 - // ``` - - YamlLoader::load_from_str(&out_str).unwrap(); -} diff --git a/parser/tests/test_round_trip.rs b/parser/tests/test_round_trip.rs deleted file mode 100644 index 5f0a7a1..0000000 --- a/parser/tests/test_round_trip.rs +++ /dev/null @@ -1,82 +0,0 @@ -extern crate yaml_rust2; - -use yaml_rust2::{Yaml, YamlEmitter, YamlLoader}; - -fn roundtrip(original: &Yaml) { - let mut emitted = String::new(); - YamlEmitter::new(&mut emitted).dump(original).unwrap(); - - let documents = YamlLoader::load_from_str(&emitted).unwrap(); - println!("emitted {emitted}"); - - assert_eq!(documents.len(), 1); - assert_eq!(documents[0], *original); -} - -fn double_roundtrip(original: &str) { - let parsed = YamlLoader::load_from_str(original).unwrap(); - - let mut serialized = String::new(); - YamlEmitter::new(&mut serialized).dump(&parsed[0]).unwrap(); - - let reparsed = YamlLoader::load_from_str(&serialized).unwrap(); - - assert_eq!(parsed, reparsed); -} - -#[test] -fn test_escape_character() { - let y = Yaml::String("\x1b".to_owned()); - roundtrip(&y); -} - -#[test] -fn test_colon_in_string() { - let y = Yaml::String("x: %".to_owned()); - roundtrip(&y); -} - -#[test] -fn test_numberlike_strings() { - let docs = [ - r#"x: "1234""#, - r#"x: "01234""#, - r#""1234""#, - r#""01234""#, - r#"" 01234""#, - r#""0x1234""#, - r#"" 0x1234""#, - ]; - - for doc in &docs { - roundtrip(&Yaml::String((*doc).to_string())); - double_roundtrip(doc); - } -} - -/// Example from -#[test] -fn test_issue133() { - let doc = YamlLoader::load_from_str("\"0x123\"") - .unwrap() - .pop() - .unwrap(); - assert_eq!(doc, Yaml::String("0x123".to_string())); - - let mut out_str = String::new(); - YamlEmitter::new(&mut out_str).dump(&doc).unwrap(); - let doc2 = YamlLoader::load_from_str(&out_str).unwrap().pop().unwrap(); - assert_eq!(doc, doc2); // This failed because the type has changed to a number now -} - -#[test] -fn test_newline() { - let y = Yaml::Array(vec![Yaml::String("\n".to_owned())]); - roundtrip(&y); -} - -#[test] -fn test_crlf() { - let y = Yaml::Array(vec![Yaml::String("\r\n".to_owned())]); - roundtrip(&y); -} diff --git a/parser/tests/yaml-test-suite.rs b/parser/tests/yaml-test-suite.rs index 818083f..5067514 100644 --- a/parser/tests/yaml-test-suite.rs +++ b/parser/tests/yaml-test-suite.rs @@ -2,11 +2,7 @@ use std::fs::{self, DirEntry}; use libtest_mimic::{run_tests, Arguments, Outcome, Test}; -use yaml_rust2::{ - parser::{Event, EventReceiver, Parser, Tag}, - scanner::TScalarStyle, - yaml, ScanError, Yaml, YamlLoader, -}; +use saphyr_parser::{Event, EventReceiver, Parser, ScanError, TScalarStyle, Tag}; type Result> = std::result::Result; @@ -76,6 +72,8 @@ fn run_yaml_test(test: &Test) -> Outcome { } fn load_tests_from_file(entry: &DirEntry) -> Result>> { + use yaml_rust2::{yaml, Yaml, YamlLoader}; + let file_name = entry.file_name().to_string_lossy().to_string(); let test_name = file_name .strip_suffix(".yaml") @@ -123,7 +121,15 @@ fn load_tests_from_file(entry: &DirEntry) -> Result>> { fn parse_to_events(source: &str) -> Result, ScanError> { let mut reporter = EventReporter::new(); - Parser::new_from_str(source).load(&mut reporter, true)?; + for x in Parser::new_from_str(source) { + match x? { + (Event::StreamEnd, _) => { + reporter.on_event(Event::StreamEnd); + break; + } + (x, _) => reporter.on_event(x), + } + } Ok(reporter.events) } diff --git a/parser/tools/README.md b/parser/tools/README.md index 7728a0f..48d6232 100644 --- a/parser/tools/README.md +++ b/parser/tools/README.md @@ -1,6 +1,6 @@ -# `yaml-rust2` tools +# `saphyr-parser` tools This directory contains tools that are used to develop the crate. -Due to dependency management, only some of them are available as binaries from the `yaml-rust2` crate. +Due to dependency management, only some of them are available as binaries from the `saphyr-parser` crate. | Tool | Invocation | |------|------------| @@ -14,7 +14,7 @@ Due to dependency management, only some of them are available as binaries from t See the [dedicated README file](./bench_compare/README.md). ## `dump_events` -This is a debugging helper for the parser. It outputs events emitted by the parser for a given file. This can be paired with the `YAMLRUST2_DEBUG` environment variable to have an in-depth overview of which steps the scanner and the parser are taking. +This is a debugging helper for the parser. It outputs events emitted by the parser for a given file. This can be paired with the `SAPHYR_DEBUG` environment variable to have an in-depth overview of which steps the scanner and the parser are taking. ### Example Consider the following `input.yaml` YAML file: @@ -48,7 +48,7 @@ Running `cargo run --bin dump_events -- input.yaml` outputs: ↳ StreamEnd ``` -Running `YAMLRUST2_DEBUG=1 cargo run --bin dump_events -- input.yaml` outputs much more details: +Running `SAPHYR_DEBUG=1 cargo run --bin dump_events -- input.yaml` outputs much more details:
Full output @@ -174,7 +174,7 @@ The generated files are the following: All generated files are meant to be between 200 and 250 MiB in size. -This tool depends on external dependencies that are not part of `yaml-rust2`'s dependencies or `dev-dependencies` and as such can't be called through `cargo run` directly. A dedicated `cargo gen_large_yaml` alias can be used to generate the benchmark files. +This tool depends on external dependencies that are not part of `saphyr-parser`'s dependencies or `dev-dependencies` and as such can't be called through `cargo run` directly. A dedicated `cargo gen_large_yaml` alias can be used to generate the benchmark files. ## `run_bench` This is a benchmarking helper that runs the parser on the given file a given number of times and is able to extract simple metrics out of the results. The `--output-yaml` flag can be specified to make the output a YAML file that can be fed into other tools. @@ -192,7 +192,7 @@ Max: 1.633045284s 95%: 1.633045284s $> cargo run --release --bin run_bench -- bench_yaml/big.yaml 10 --output-yaml -parser: yaml-rust2 +parser: saphyr input: bench_yaml/big.yaml average: 1649847674 min: 1648277149 diff --git a/parser/tools/bench_compare/README.md b/parser/tools/bench_compare/README.md index b9e990b..8e2c1c5 100644 --- a/parser/tools/bench_compare/README.md +++ b/parser/tools/bench_compare/README.md @@ -69,7 +69,7 @@ Max : 1.597028s # This will be read by this tool. # This must output a YAML as described below. $> run_bench ../file.yaml 10 --output-yaml -parser: yaml-rust2 +parser: saphyr input: ../file.yaml average: 1620303590 min: 1611632108 @@ -110,7 +110,7 @@ yaml_output_dir = "yaml_output" # The directory in which `run_bench`'s yamls are csv_output = "benchmark.csv" # The CSV output aggregating times for each parser and file [[parsers]] # A parser, can be repeated as many times as there are parsers -name = "yaml-rust2" # The name of the parser (used for logging) +name = "saphyr" # The name of the parser (used for logging) path = "target/release/" # The path in which the parsers' `run_bench` and `time_parse` are # If there is another parser, another block can be added diff --git a/parser/tools/dump_events.rs b/parser/tools/dump_events.rs index 747e9b9..16908b9 100644 --- a/parser/tools/dump_events.rs +++ b/parser/tools/dump_events.rs @@ -1,7 +1,8 @@ use std::env; use std::fs::File; use std::io::prelude::*; -use yaml_rust2::{ + +use saphyr_parser::{ parser::{MarkedEventReceiver, Parser}, scanner::Marker, Event, diff --git a/parser/tools/gen_large_yaml/Cargo.toml b/parser/tools/gen_large_yaml/Cargo.toml index d57bdea..d8526fb 100644 --- a/parser/tools/gen_large_yaml/Cargo.toml +++ b/parser/tools/gen_large_yaml/Cargo.toml @@ -11,7 +11,6 @@ readme = "README.md" edition = "2018" [dependencies] -yaml-rust2 = { path = "../.." } rand = { version = "0.8.5", features = [ "small_rng" ] } lipsum = "0.9.0" diff --git a/parser/tools/run_bench.rs b/parser/tools/run_bench.rs index 795f7bc..f194e35 100644 --- a/parser/tools/run_bench.rs +++ b/parser/tools/run_bench.rs @@ -1,7 +1,7 @@ #![allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)] use std::{env, fs::File, io::prelude::*}; -use yaml_rust2::{ +use saphyr_parser::{ parser::{MarkedEventReceiver, Parser}, scanner::Marker, Event, diff --git a/parser/tools/time_parse.rs b/parser/tools/time_parse.rs index 1555dde..02778bd 100644 --- a/parser/tools/time_parse.rs +++ b/parser/tools/time_parse.rs @@ -1,7 +1,7 @@ use std::env; use std::fs::File; use std::io::prelude::*; -use yaml_rust2::{ +use saphyr_parser::{ parser::{MarkedEventReceiver, Parser}, scanner::Marker, Event,