Remove tools from examples.
Add documentation for those tools, and make it so that `gen_large_yaml` generates a predetermined set of files instead of outputting to its standard output.
This commit is contained in:
parent
1c36ffbc4d
commit
861dfb6497
7 changed files with 799 additions and 0 deletions
188
bench/tools/README.md
Normal file
188
bench/tools/README.md
Normal file
|
@ -0,0 +1,188 @@
|
|||
# `yaml-rust2` tools
|
||||
This directory contains tools that are used to develop the crate.
|
||||
Due to dependency management, only some of them are available as binaries from the `yaml-rust2` crate.
|
||||
|
||||
| Tool | Invocation |
|
||||
|------|------------|
|
||||
| `dump_events` | `cargo run --bin dump_events -- [...]` |
|
||||
| `gen_large_yaml` | `cargo gen_large_yaml` |
|
||||
| `time_parse` | `cargo run --bin time_parse -- [...]` |
|
||||
|
||||
## `dump_events`
|
||||
This is a debugging helper for the parser. It outputs events emitted by the parser for a given file. This can be paired with the `YAMLRUST2_DEBUG` environment variable to have an in-depth overview of which steps the scanner and the parser are taking.
|
||||
|
||||
### Example
|
||||
Consider the following `input.yaml` YAML file:
|
||||
```yaml
|
||||
- foo: bar
|
||||
- baz:
|
||||
c: [3, 4, 5]
|
||||
```
|
||||
|
||||
Running `cargo run --bin dump_events -- input.yaml` outputs:
|
||||
```
|
||||
↳ StreamStart
|
||||
↳ DocumentStart
|
||||
↳ SequenceStart(0, None)
|
||||
↳ MappingStart(0, None)
|
||||
↳ Scalar("foo", Plain, 0, None)
|
||||
↳ Scalar("bar", Plain, 0, None)
|
||||
↳ MappingEnd
|
||||
↳ MappingStart(0, None)
|
||||
↳ Scalar("baz", Plain, 0, None)
|
||||
↳ Scalar("~", Plain, 0, None)
|
||||
↳ Scalar("c", Plain, 0, None)
|
||||
↳ SequenceStart(0, None)
|
||||
↳ Scalar("3", Plain, 0, None)
|
||||
↳ Scalar("4", Plain, 0, None)
|
||||
↳ Scalar("5", Plain, 0, None)
|
||||
↳ SequenceEnd
|
||||
↳ MappingEnd
|
||||
↳ SequenceEnd
|
||||
↳ DocumentEnd
|
||||
↳ StreamEnd
|
||||
```
|
||||
|
||||
Running `YAMLRUST2_DEBUG=1 cargo run --bin dump_events -- input.yaml` outputs much more details:
|
||||
<details>
|
||||
<summary> Full output </summary>
|
||||
|
||||
```
|
||||
Parser state: StreamStart
|
||||
↳ StreamStart(Utf8) Marker { index: 0, line: 1, col: 0 }
|
||||
↳ StreamStart
|
||||
|
||||
Parser state: ImplicitDocumentStart
|
||||
→ fetch_next_token after whitespace Marker { index: 0, line: 1, col: 0 } '-'
|
||||
↳ BlockSequenceStart Marker { index: 0, line: 1, col: 0 }
|
||||
↳ DocumentStart
|
||||
|
||||
Parser state: BlockNode
|
||||
↳ SequenceStart(0, None)
|
||||
|
||||
Parser state: BlockSequenceFirstEntry
|
||||
↳ BlockEntry Marker { index: 2, line: 1, col: 2 }
|
||||
→ fetch_next_token after whitespace Marker { index: 2, line: 1, col: 2 } 'f'
|
||||
→ fetch_next_token after whitespace Marker { index: 5, line: 1, col: 5 } ':'
|
||||
↳ BlockMappingStart Marker { index: 5, line: 1, col: 5 }
|
||||
↳ MappingStart(0, None)
|
||||
|
||||
Parser state: BlockMappingFirstKey
|
||||
↳ Key Marker { index: 2, line: 1, col: 2 }
|
||||
↳ Scalar(Plain, "foo") Marker { index: 2, line: 1, col: 2 }
|
||||
↳ Scalar("foo", Plain, 0, None)
|
||||
|
||||
Parser state: BlockMappingValue
|
||||
↳ Value Marker { index: 5, line: 1, col: 5 }
|
||||
→ fetch_next_token after whitespace Marker { index: 7, line: 1, col: 7 } 'b'
|
||||
↳ Scalar(Plain, "bar") Marker { index: 7, line: 1, col: 7 }
|
||||
↳ Scalar("bar", Plain, 0, None)
|
||||
|
||||
Parser state: BlockMappingKey
|
||||
→ fetch_next_token after whitespace Marker { index: 11, line: 2, col: 0 } '-'
|
||||
↳ BlockEnd Marker { index: 11, line: 2, col: 0 }
|
||||
↳ MappingEnd
|
||||
|
||||
Parser state: BlockSequenceEntry
|
||||
↳ BlockEntry Marker { index: 13, line: 2, col: 2 }
|
||||
→ fetch_next_token after whitespace Marker { index: 13, line: 2, col: 2 } 'b'
|
||||
→ fetch_next_token after whitespace Marker { index: 16, line: 2, col: 5 } ':'
|
||||
↳ BlockMappingStart Marker { index: 16, line: 2, col: 5 }
|
||||
↳ MappingStart(0, None)
|
||||
|
||||
Parser state: BlockMappingFirstKey
|
||||
↳ Key Marker { index: 13, line: 2, col: 2 }
|
||||
↳ Scalar(Plain, "baz") Marker { index: 13, line: 2, col: 2 }
|
||||
↳ Scalar("baz", Plain, 0, None)
|
||||
|
||||
Parser state: BlockMappingValue
|
||||
↳ Value Marker { index: 16, line: 2, col: 5 }
|
||||
→ fetch_next_token after whitespace Marker { index: 20, line: 3, col: 2 } 'c'
|
||||
→ fetch_next_token after whitespace Marker { index: 21, line: 3, col: 3 } ':'
|
||||
↳ Key Marker { index: 20, line: 3, col: 2 }
|
||||
↳ Scalar("~", Plain, 0, None)
|
||||
|
||||
Parser state: BlockMappingKey
|
||||
↳ Scalar(Plain, "c") Marker { index: 20, line: 3, col: 2 }
|
||||
↳ Scalar("c", Plain, 0, None)
|
||||
|
||||
Parser state: BlockMappingValue
|
||||
↳ Value Marker { index: 21, line: 3, col: 3 }
|
||||
→ fetch_next_token after whitespace Marker { index: 23, line: 3, col: 5 } '['
|
||||
↳ FlowSequenceStart Marker { index: 23, line: 3, col: 5 }
|
||||
↳ SequenceStart(0, None)
|
||||
|
||||
Parser state: FlowSequenceFirstEntry
|
||||
→ fetch_next_token after whitespace Marker { index: 24, line: 3, col: 6 } '3'
|
||||
→ fetch_next_token after whitespace Marker { index: 25, line: 3, col: 7 } ','
|
||||
↳ Scalar(Plain, "3") Marker { index: 24, line: 3, col: 6 }
|
||||
↳ Scalar("3", Plain, 0, None)
|
||||
|
||||
Parser state: FlowSequenceEntry
|
||||
↳ FlowEntry Marker { index: 25, line: 3, col: 7 }
|
||||
→ fetch_next_token after whitespace Marker { index: 27, line: 3, col: 9 } '4'
|
||||
→ fetch_next_token after whitespace Marker { index: 28, line: 3, col: 10 } ','
|
||||
↳ Scalar(Plain, "4") Marker { index: 27, line: 3, col: 9 }
|
||||
↳ Scalar("4", Plain, 0, None)
|
||||
|
||||
Parser state: FlowSequenceEntry
|
||||
↳ FlowEntry Marker { index: 28, line: 3, col: 10 }
|
||||
→ fetch_next_token after whitespace Marker { index: 30, line: 3, col: 12 } '5'
|
||||
→ fetch_next_token after whitespace Marker { index: 31, line: 3, col: 13 } ']'
|
||||
↳ Scalar(Plain, "5") Marker { index: 30, line: 3, col: 12 }
|
||||
↳ Scalar("5", Plain, 0, None)
|
||||
|
||||
Parser state: FlowSequenceEntry
|
||||
↳ FlowSequenceEnd Marker { index: 31, line: 3, col: 13 }
|
||||
↳ SequenceEnd
|
||||
|
||||
Parser state: BlockMappingKey
|
||||
→ fetch_next_token after whitespace Marker { index: 33, line: 4, col: 0 } '\0'
|
||||
↳ BlockEnd Marker { index: 33, line: 4, col: 0 }
|
||||
↳ MappingEnd
|
||||
|
||||
Parser state: BlockSequenceEntry
|
||||
↳ BlockEnd Marker { index: 33, line: 4, col: 0 }
|
||||
↳ SequenceEnd
|
||||
|
||||
Parser state: DocumentEnd
|
||||
↳ StreamEnd Marker { index: 33, line: 4, col: 0 }
|
||||
↳ DocumentEnd
|
||||
|
||||
Parser state: DocumentStart
|
||||
↳ StreamEnd
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
While this cannot be shown in Markdown, the output is colored so that it is a bit easier to read.
|
||||
|
||||
## `gen_large_yaml`
|
||||
It is hard to find large (100+MiB) real-world YAML files that could be used to benchmark a parser. This utility generates multiple large files that are meant to stress the parser with different layouts of YAML files. The resulting files do not look like anything that would be encountered in production, but can serve as a base to test several features of a YAML parser.
|
||||
|
||||
The generated files are the following:
|
||||
|
||||
- `big.yaml`: A large array of records with few fields. One of the fields is a description, a large text block scalar spanning multiple lines. Most of the scanning happens in block scalars.
|
||||
- `nested.yaml`: Very short key-value pairs that nest deeply.
|
||||
- `small_objects.yaml`: A large array of 2 key-value mappings.
|
||||
- `strings_array.yaml`: A large array of lipsum one-liners (~150-175 characters in length).
|
||||
|
||||
All generated files are meant to be between 200 and 250 MiB in size.
|
||||
|
||||
This tool depends on external dependencies that are not part of `yaml-rust2`'s dependencies or `dev-dependencies` and as such can't be called through `cargo run` directly. A dedicated `cargo gen_large_yaml` alias can be used to generate the benchmark files.
|
||||
|
||||
## `time_parse`
|
||||
This is a benchmarking helper that times how long it takes for the parser to emit all events. It calls the parser on the given input file, receives parsing events and then immediately discards them. It is advised to run this tool with `--release`.
|
||||
|
||||
### Examples
|
||||
Loading a small file could output the following:
|
||||
```sh
|
||||
$> cargo run --release --bin time_parse -- input.yaml
|
||||
Loaded 0MiB in 14.189µs
|
||||
```
|
||||
|
||||
While loading a larger file could output the following:
|
||||
```sh
|
||||
$> cargo run --release --bin time_parse -- bench_yaml/big.yaml
|
||||
Loaded 220MiB in 1.612677853s
|
||||
```
|
38
bench/tools/dump_events.rs
Normal file
38
bench/tools/dump_events.rs
Normal file
|
@ -0,0 +1,38 @@
|
|||
use std::env;
|
||||
use std::fs::File;
|
||||
use std::io::prelude::*;
|
||||
use yaml_rust2::{
|
||||
parser::{MarkedEventReceiver, Parser},
|
||||
scanner::Marker,
|
||||
Event,
|
||||
};
|
||||
|
||||
#[derive(Debug)]
|
||||
struct EventSink {
|
||||
events: Vec<(Event, Marker)>,
|
||||
}
|
||||
|
||||
impl MarkedEventReceiver for EventSink {
|
||||
fn on_event(&mut self, ev: Event, mark: Marker) {
|
||||
eprintln!(" \x1B[;34m\u{21B3} {:?}\x1B[;m", &ev);
|
||||
self.events.push((ev, mark));
|
||||
}
|
||||
}
|
||||
|
||||
fn str_to_events(yaml: &str) -> Vec<(Event, Marker)> {
|
||||
let mut sink = EventSink { events: Vec::new() };
|
||||
let mut parser = Parser::new(yaml.chars());
|
||||
// Load events using our sink as the receiver.
|
||||
parser.load(&mut sink, true).unwrap();
|
||||
sink.events
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let args: Vec<_> = env::args().collect();
|
||||
let mut f = File::open(&args[1]).unwrap();
|
||||
let mut s = String::new();
|
||||
f.read_to_string(&mut s).unwrap();
|
||||
|
||||
// dbg!(str_to_events(&s));
|
||||
str_to_events(&s);
|
||||
}
|
20
bench/tools/gen_large_yaml/Cargo.toml
Normal file
20
bench/tools/gen_large_yaml/Cargo.toml
Normal file
|
@ -0,0 +1,20 @@
|
|||
[package]
|
||||
name = "gen_large_yaml"
|
||||
version = "0.5.0"
|
||||
authors = [
|
||||
"Ethiraric <ethiraric@gmail.com>"
|
||||
]
|
||||
license = "MIT OR Apache-2.0"
|
||||
description = "A helper to generate large YAML files"
|
||||
repository = "https://github.com/Ethiraric/yaml-rust2"
|
||||
readme = "README.md"
|
||||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
yaml-rust2 = { version = "0.5.0", path = "../../" }
|
||||
rand = "0.8.5"
|
||||
lipsum = "0.9.0"
|
||||
|
||||
[profile.release-lto]
|
||||
inherits = "release"
|
||||
lto = true
|
156
bench/tools/gen_large_yaml/src/gen.rs
Normal file
156
bench/tools/gen_large_yaml/src/gen.rs
Normal file
|
@ -0,0 +1,156 @@
|
|||
#![allow(clippy::too_many_arguments)]
|
||||
|
||||
use rand::{distributions::Alphanumeric, rngs::ThreadRng, Rng};
|
||||
|
||||
/// Generate a string with hexadecimal digits of the specified length.
|
||||
pub fn hex_string(rng: &mut ThreadRng, len: usize) -> String {
|
||||
const DIGITS: &[u8] = b"0123456789abcdef";
|
||||
string_from_set(rng, len, len + 1, DIGITS)
|
||||
}
|
||||
|
||||
/// Generate an e-mail address.
|
||||
pub fn email(rng: &mut ThreadRng, len_lo: usize, len_hi: usize) -> String {
|
||||
const CHARSET: &[u8] = b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-_.0123456789";
|
||||
format!(
|
||||
"{}@example.com",
|
||||
string_from_set(rng, len_lo, len_hi, CHARSET)
|
||||
)
|
||||
}
|
||||
|
||||
/// Generate a random URL.
|
||||
pub fn url(
|
||||
rng: &mut ThreadRng,
|
||||
scheme: &str,
|
||||
n_paths_lo: usize,
|
||||
n_paths_hi: usize,
|
||||
path_len_lo: usize,
|
||||
path_len_hi: usize,
|
||||
extension: Option<&str>,
|
||||
) -> String {
|
||||
let mut string = format!("{scheme}://example.com");
|
||||
for _ in 0..rng.gen_range(n_paths_lo..n_paths_hi) {
|
||||
string.push('/');
|
||||
string.push_str(&alnum_string(rng, path_len_lo, path_len_hi));
|
||||
}
|
||||
if let Some(extension) = extension {
|
||||
string.push('.');
|
||||
string.push_str(extension);
|
||||
}
|
||||
string
|
||||
}
|
||||
|
||||
/// Generate a random integer.
|
||||
pub fn integer(rng: &mut ThreadRng, lo: i64, hi: i64) -> i64 {
|
||||
rng.gen_range(lo..hi)
|
||||
}
|
||||
|
||||
/// Generate an alphanumeric string with a length between `lo_len` and `hi_len`.
|
||||
pub fn alnum_string(rng: &mut ThreadRng, lo_len: usize, hi_len: usize) -> String {
|
||||
let len = rng.gen_range(lo_len..hi_len);
|
||||
rng.sample_iter(&Alphanumeric)
|
||||
.take(len)
|
||||
.map(char::from)
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Generate a string with hexadecimal digits of the specified length.
|
||||
pub fn string_from_set(rng: &mut ThreadRng, len_lo: usize, len_hi: usize, set: &[u8]) -> String {
|
||||
(0..rng.gen_range(len_lo..len_hi))
|
||||
.map(|_| set[rng.gen_range(0..set.len())] as char)
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Generate a lipsum paragraph.
|
||||
pub fn paragraph(
|
||||
rng: &mut ThreadRng,
|
||||
lines_lo: usize,
|
||||
lines_hi: usize,
|
||||
wps_lo: usize,
|
||||
wps_hi: usize,
|
||||
line_maxcol: usize,
|
||||
) -> Vec<String> {
|
||||
let mut ret = Vec::new();
|
||||
let nlines = rng.gen_range(lines_lo..lines_hi);
|
||||
|
||||
while ret.len() < nlines {
|
||||
let words_in_sentence = rng.gen_range(wps_lo..wps_hi);
|
||||
let mut sentence = lipsum::lipsum_words_with_rng(rng.clone(), words_in_sentence);
|
||||
|
||||
if let Some(last_line) = ret.pop() {
|
||||
sentence = format!("{last_line} {sentence}");
|
||||
}
|
||||
|
||||
while sentence.len() > line_maxcol {
|
||||
let last_space_idx = line_maxcol
|
||||
- sentence[0..line_maxcol]
|
||||
.chars()
|
||||
.rev()
|
||||
.position(char::is_whitespace)
|
||||
.unwrap();
|
||||
ret.push(sentence[0..last_space_idx].to_string());
|
||||
sentence = sentence[last_space_idx + 1..].to_string();
|
||||
}
|
||||
if !sentence.is_empty() {
|
||||
ret.push(sentence);
|
||||
}
|
||||
}
|
||||
|
||||
ret
|
||||
}
|
||||
|
||||
/// Generate a full name.
|
||||
pub fn full_name(rng: &mut ThreadRng, len_lo: usize, len_hi: usize) -> String {
|
||||
format!(
|
||||
"{} {}",
|
||||
name(rng, len_lo, len_hi),
|
||||
name(rng, len_lo, len_hi)
|
||||
)
|
||||
}
|
||||
|
||||
/// Generate a name.
|
||||
pub fn name(rng: &mut ThreadRng, len_lo: usize, len_hi: usize) -> String {
|
||||
const UPPER: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ";
|
||||
const LOWER: &[u8] = b"abcdefghijklmnopqrstuvwxyz";
|
||||
|
||||
let len = rng.gen_range(len_lo..len_hi);
|
||||
let mut ret = String::new();
|
||||
ret.push(UPPER[rng.gen_range(0..UPPER.len())] as char);
|
||||
ret.push_str(string_from_set(rng, len, len + 1, LOWER).as_str());
|
||||
|
||||
ret
|
||||
}
|
||||
|
||||
/// Generate a set of words.
|
||||
pub fn words(rng: &mut ThreadRng, words_lo: usize, words_hi: usize) -> String {
|
||||
let nwords = rng.gen_range(words_lo..words_hi);
|
||||
lipsum::lipsum_words_with_rng(rng.clone(), nwords).replace(|c| "-\'\",*:".contains(c), "")
|
||||
}
|
||||
|
||||
/// Generate a lipsum text.
|
||||
///
|
||||
/// Texts are composed of some paragraphs and empty lines between them.
|
||||
pub fn text(
|
||||
rng: &mut ThreadRng,
|
||||
paragraphs_lo: usize,
|
||||
paragraphs_hi: usize,
|
||||
lines_lo: usize,
|
||||
lines_hi: usize,
|
||||
wps_lo: usize,
|
||||
wps_hi: usize,
|
||||
line_maxcol: usize,
|
||||
) -> Vec<String> {
|
||||
let mut ret = Vec::new();
|
||||
let mut first = true;
|
||||
|
||||
for _ in 0..rng.gen_range(paragraphs_lo..paragraphs_hi) {
|
||||
if first {
|
||||
first = false;
|
||||
} else {
|
||||
ret.push(String::new());
|
||||
}
|
||||
|
||||
ret.extend(paragraph(rng, lines_lo, lines_hi, wps_lo, wps_hi, line_maxcol).into_iter());
|
||||
}
|
||||
|
||||
ret
|
||||
}
|
255
bench/tools/gen_large_yaml/src/main.rs
Normal file
255
bench/tools/gen_large_yaml/src/main.rs
Normal file
|
@ -0,0 +1,255 @@
|
|||
#![allow(dead_code)]
|
||||
|
||||
mod gen;
|
||||
mod nested;
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::fs::File;
|
||||
use std::io::BufWriter;
|
||||
use std::path::Path;
|
||||
|
||||
use rand::{rngs::ThreadRng, Rng};
|
||||
|
||||
/// The path into which the generated YAML files will be written.
|
||||
const OUTPUT_DIR: &str = "bench_yaml";
|
||||
|
||||
fn main() -> std::io::Result<()> {
|
||||
let mut generator = Generator::new();
|
||||
let output_path = Path::new(OUTPUT_DIR);
|
||||
if !output_path.is_dir() {
|
||||
std::fs::create_dir(output_path).unwrap();
|
||||
}
|
||||
|
||||
println!("Generating big.yaml");
|
||||
let mut out = BufWriter::new(File::create(output_path.join("big.yaml")).unwrap());
|
||||
generator.gen_record_array(&mut out, 100_000, 100_001)?;
|
||||
|
||||
println!("Generating nested.yaml");
|
||||
let mut out = BufWriter::new(File::create(output_path.join("nested.yaml")).unwrap());
|
||||
nested::create_deep_object(&mut out, 5_000_000)?;
|
||||
|
||||
println!("Generating small_objects.yaml");
|
||||
let mut out = BufWriter::new(File::create(output_path.join("small_objects.yaml")).unwrap());
|
||||
generator.gen_authors_array(&mut out, 4_000_000, 4_000_001)?;
|
||||
|
||||
println!("Generating strings_array.yaml");
|
||||
let mut out = BufWriter::new(File::create(output_path.join("strings_array.yaml")).unwrap());
|
||||
generator.gen_strings_array(&mut out, 1_300_000, 1_300_001, 10, 40)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// YAML Generator.
|
||||
struct Generator {
|
||||
/// The RNG state.
|
||||
rng: ThreadRng,
|
||||
/// The stack of indentations.
|
||||
indents: Vec<usize>,
|
||||
}
|
||||
|
||||
type GenFn<W> = dyn FnOnce(&mut Generator, &mut W) -> std::io::Result<()>;
|
||||
|
||||
impl Generator {
|
||||
/// Create a new generator.
|
||||
fn new() -> Self {
|
||||
Generator {
|
||||
rng: rand::thread_rng(),
|
||||
indents: vec![0],
|
||||
}
|
||||
}
|
||||
|
||||
/// Generate an array of records as per [`Self::gen_record_object`].
|
||||
fn gen_record_array<W: std::io::Write>(
|
||||
&mut self,
|
||||
writer: &mut W,
|
||||
items_lo: usize,
|
||||
items_hi: usize,
|
||||
) -> std::io::Result<()> {
|
||||
self.gen_array(writer, items_lo, items_hi, Generator::gen_record_object)
|
||||
}
|
||||
|
||||
/// Generate an array of lipsum one-liners.
|
||||
fn gen_strings_array<W: std::io::Write>(
|
||||
&mut self,
|
||||
writer: &mut W,
|
||||
items_lo: usize,
|
||||
items_hi: usize,
|
||||
words_lo: usize,
|
||||
words_hi: usize,
|
||||
) -> std::io::Result<()> {
|
||||
self.gen_array(writer, items_lo, items_hi, |gen, writer| {
|
||||
write!(writer, "{}", gen::words(&mut gen.rng, words_lo, words_hi))
|
||||
})
|
||||
}
|
||||
|
||||
/// Generate a YAML object/mapping containing a record.
|
||||
///
|
||||
/// Fields are description, hash, version, home, repository and pdf.
|
||||
/// The `description` field is a long string and puts a lot of weight in plain scalar / block
|
||||
/// scalar parsing.
|
||||
fn gen_record_object<W: std::io::Write>(&mut self, writer: &mut W) -> std::io::Result<()> {
|
||||
let mut fields = HashMap::<String, Box<GenFn<W>>>::new();
|
||||
fields.insert(
|
||||
"description".to_string(),
|
||||
Box::new(|gen, w| {
|
||||
write!(w, "|")?;
|
||||
gen.push_indent(2);
|
||||
gen.nl(w)?;
|
||||
let indent = gen.indent();
|
||||
let text = gen::text(&mut gen.rng, 1, 9, 3, 8, 10, 20, 80 - indent);
|
||||
gen.write_lines(w, &text)?;
|
||||
gen.pop_indent();
|
||||
Ok(())
|
||||
}),
|
||||
);
|
||||
|
||||
fields.insert(
|
||||
"authors".to_string(),
|
||||
Box::new(|gen, w| {
|
||||
gen.push_indent(2);
|
||||
gen.nl(w)?;
|
||||
gen.gen_authors_array(w, 1, 10)?;
|
||||
gen.pop_indent();
|
||||
Ok(())
|
||||
}),
|
||||
);
|
||||
|
||||
fields.insert(
|
||||
"hash".to_string(),
|
||||
Box::new(|gen, w| write!(w, "{}", gen::hex_string(&mut gen.rng, 64))),
|
||||
);
|
||||
fields.insert(
|
||||
"version".to_string(),
|
||||
Box::new(|gen, w| write!(w, "{}", gen::integer(&mut gen.rng, 1, 9))),
|
||||
);
|
||||
fields.insert(
|
||||
"home".to_string(),
|
||||
Box::new(|gen, w| write!(w, "{}", gen::url(&mut gen.rng, "https", 0, 1, 0, 0, None))),
|
||||
);
|
||||
fields.insert(
|
||||
"repository".to_string(),
|
||||
Box::new(|gen, w| write!(w, "{}", gen::url(&mut gen.rng, "git", 1, 4, 10, 20, None))),
|
||||
);
|
||||
fields.insert(
|
||||
"pdf".to_string(),
|
||||
Box::new(|gen, w| {
|
||||
write!(
|
||||
w,
|
||||
"{}",
|
||||
gen::url(&mut gen.rng, "https", 1, 4, 10, 30, Some("pdf"))
|
||||
)
|
||||
}),
|
||||
);
|
||||
self.gen_object(writer, fields)
|
||||
}
|
||||
|
||||
/// Generate an array of authors as per [`Self::gen_author_object`].
|
||||
fn gen_authors_array<W: std::io::Write>(
|
||||
&mut self,
|
||||
writer: &mut W,
|
||||
items_lo: usize,
|
||||
items_hi: usize,
|
||||
) -> std::io::Result<()> {
|
||||
self.gen_array(writer, items_lo, items_hi, Generator::gen_author_object)
|
||||
}
|
||||
|
||||
/// Generate a small object with 2 string fields.
|
||||
fn gen_author_object<W: std::io::Write>(&mut self, writer: &mut W) -> std::io::Result<()> {
|
||||
let mut fields = HashMap::<String, Box<GenFn<W>>>::new();
|
||||
fields.insert(
|
||||
"name".to_string(),
|
||||
Box::new(|gen, w| write!(w, "{}", gen::full_name(&mut gen.rng, 10, 15))),
|
||||
);
|
||||
fields.insert(
|
||||
"email".to_string(),
|
||||
Box::new(|gen, w| write!(w, "{}", gen::email(&mut gen.rng, 1, 9))),
|
||||
);
|
||||
self.gen_object(writer, fields)
|
||||
}
|
||||
|
||||
/// Generate a YAML array/sequence containing nodes generated by the given function.
|
||||
fn gen_array<W: std::io::Write, F: FnMut(&mut Generator, &mut W) -> std::io::Result<()>>(
|
||||
&mut self,
|
||||
writer: &mut W,
|
||||
len_lo: usize,
|
||||
len_hi: usize,
|
||||
mut obj_creator: F,
|
||||
) -> std::io::Result<()> {
|
||||
let mut first = true;
|
||||
for _ in 0..self.rng.gen_range(len_lo..len_hi) {
|
||||
if first {
|
||||
first = false;
|
||||
} else {
|
||||
self.nl(writer)?;
|
||||
}
|
||||
write!(writer, "- ")?;
|
||||
self.push_indent(2);
|
||||
(obj_creator)(self, writer)?;
|
||||
self.pop_indent();
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Create a Yaml object with some fields in it.
|
||||
fn gen_object<W: std::io::Write>(
|
||||
&mut self,
|
||||
writer: &mut W,
|
||||
fields: HashMap<String, Box<GenFn<W>>>,
|
||||
) -> std::io::Result<()> {
|
||||
let mut first = true;
|
||||
for (key, f) in fields {
|
||||
if first {
|
||||
first = false;
|
||||
} else {
|
||||
self.nl(writer)?;
|
||||
}
|
||||
write!(writer, "{key}: ")?;
|
||||
f(self, writer)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Write the given lines at the right indentation.
|
||||
fn write_lines<W: std::io::Write>(
|
||||
&mut self,
|
||||
writer: &mut W,
|
||||
lines: &[String],
|
||||
) -> std::io::Result<()> {
|
||||
let mut first = true;
|
||||
|
||||
for line in lines {
|
||||
if first {
|
||||
first = false;
|
||||
} else {
|
||||
self.nl(writer)?;
|
||||
}
|
||||
write!(writer, "{line}")?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Write a new line to the writer and indent.
|
||||
fn nl<W: std::io::Write>(&mut self, writer: &mut W) -> std::io::Result<()> {
|
||||
writeln!(writer)?;
|
||||
for _ in 0..self.indent() {
|
||||
write!(writer, " ")?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Return the given indent.
|
||||
fn indent(&self) -> usize {
|
||||
*self.indents.last().unwrap()
|
||||
}
|
||||
|
||||
/// Push a new indent with the given relative offset.
|
||||
fn push_indent(&mut self, offset: usize) {
|
||||
self.indents.push(self.indent() + offset);
|
||||
}
|
||||
|
||||
/// Pops the last indent.
|
||||
fn pop_indent(&mut self) {
|
||||
self.indents.pop();
|
||||
assert!(!self.indents.is_empty());
|
||||
}
|
||||
}
|
111
bench/tools/gen_large_yaml/src/nested.rs
Normal file
111
bench/tools/gen_large_yaml/src/nested.rs
Normal file
|
@ -0,0 +1,111 @@
|
|||
use std::{cell::RefCell, rc::Rc};
|
||||
|
||||
use rand::{rngs::ThreadRng, Rng};
|
||||
|
||||
/// Create a deep object with the given amount of nodes.
|
||||
pub fn create_deep_object<W: std::io::Write>(
|
||||
writer: &mut W,
|
||||
n_nodes: usize,
|
||||
) -> std::io::Result<()> {
|
||||
let mut tree = Tree::new();
|
||||
for _ in 0..n_nodes {
|
||||
tree.push_node();
|
||||
}
|
||||
tree.write_to(writer)
|
||||
}
|
||||
|
||||
/// An n-tree.
|
||||
///
|
||||
/// The algorithm used to generate a potentially deep object is to create a tree, one node at a
|
||||
/// time, where each node is put as a child of a random existing node in the tree.
|
||||
struct Tree {
|
||||
/// The tree-view of the tree.
|
||||
root: Rc<RefCell<Node>>,
|
||||
/// Array of all the nodes in the tree, including the root node.
|
||||
nodes: Vec<Rc<RefCell<Node>>>,
|
||||
/// The RNG state.
|
||||
rng: ThreadRng,
|
||||
}
|
||||
|
||||
/// A node in a tree.
|
||||
struct Node {
|
||||
/// All the children of the node.
|
||||
children: Vec<Rc<RefCell<Node>>>,
|
||||
}
|
||||
|
||||
impl Tree {
|
||||
/// Create a new tree.
|
||||
fn new() -> Self {
|
||||
let root = Node::new_rc_refcell();
|
||||
Tree {
|
||||
root: root.clone(),
|
||||
nodes: vec![root],
|
||||
rng: rand::thread_rng(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Add a new node as a child of a random node in the tree.
|
||||
fn push_node(&mut self) {
|
||||
let new_node = Node::new_rc_refcell();
|
||||
let n_nodes = self.nodes.len();
|
||||
let parent = &mut self.nodes[self.rng.gen_range(0..n_nodes)];
|
||||
(**parent).borrow_mut().push_child(new_node.clone());
|
||||
self.nodes.push(new_node);
|
||||
}
|
||||
|
||||
/// Write the YAML representation of the tree to `writer`.
|
||||
fn write_to<W: std::io::Write>(&self, writer: &mut W) -> std::io::Result<()> {
|
||||
(*self.root).borrow().write_to(writer, 0)
|
||||
}
|
||||
}
|
||||
|
||||
impl Node {
|
||||
/// Create a new node.
|
||||
fn new() -> Self {
|
||||
Node { children: vec![] }
|
||||
}
|
||||
|
||||
fn new_rc_refcell() -> Rc<RefCell<Self>> {
|
||||
Rc::new(RefCell::new(Self::new()))
|
||||
}
|
||||
|
||||
/// Append a child to the node.
|
||||
fn push_child(&mut self, child: Rc<RefCell<Self>>) {
|
||||
self.children.push(child);
|
||||
}
|
||||
|
||||
/// Write the YAML representation of the node to `writer`.
|
||||
fn write_to<W: std::io::Write>(&self, writer: &mut W, indent: usize) -> std::io::Result<()> {
|
||||
if self.children.is_empty() {
|
||||
write_n(writer, ' ', indent)?;
|
||||
writer.write_all(b"a: 1\n")?;
|
||||
} else {
|
||||
for (n, child) in self.children.iter().enumerate() {
|
||||
write_n(writer, ' ', indent)?;
|
||||
write_id_for_number(writer, n)?;
|
||||
writer.write_all(b":\n")?;
|
||||
(**child).borrow().write_to(writer, indent + 2)?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Write `n` times `c` to `out`.
|
||||
fn write_n<W: std::io::Write>(out: &mut W, c: char, n: usize) -> std::io::Result<()> {
|
||||
for _ in 0..n {
|
||||
write!(out, "{c}")?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Create a valid identifier for the given number.
|
||||
fn write_id_for_number<W: std::io::Write>(out: &mut W, mut n: usize) -> std::io::Result<()> {
|
||||
const DIGITS: &[u8] = b"_abcdefghijklmnopqrstuvwxyz";
|
||||
n += 1;
|
||||
while n > 0 {
|
||||
write!(out, "{}", DIGITS[n % DIGITS.len()] as char)?;
|
||||
n /= DIGITS.len();
|
||||
}
|
||||
Ok(())
|
||||
}
|
31
bench/tools/time_parse.rs
Normal file
31
bench/tools/time_parse.rs
Normal file
|
@ -0,0 +1,31 @@
|
|||
use std::env;
|
||||
use std::fs::File;
|
||||
use std::io::prelude::*;
|
||||
use yaml_rust2::{
|
||||
parser::{MarkedEventReceiver, Parser},
|
||||
scanner::Marker,
|
||||
Event,
|
||||
};
|
||||
|
||||
/// A sink which discards any event sent.
|
||||
struct NullSink {}
|
||||
|
||||
impl MarkedEventReceiver for NullSink {
|
||||
fn on_event(&mut self, _: Event, _: Marker) {}
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let args: Vec<_> = env::args().collect();
|
||||
let mut f = File::open(&args[1]).unwrap();
|
||||
let mut s = String::new();
|
||||
f.read_to_string(&mut s).unwrap();
|
||||
|
||||
let mut sink = NullSink {};
|
||||
let mut parser = Parser::new(s.chars());
|
||||
|
||||
// Load events using our sink as the receiver.
|
||||
let begin = std::time::Instant::now();
|
||||
parser.load(&mut sink, true).unwrap();
|
||||
let end = std::time::Instant::now();
|
||||
println!("Loaded {}MiB in {:?}", s.len() / 1024 / 1024, end - begin);
|
||||
}
|
Loading…
Reference in a new issue