Make gen_large_yaml
reproductible.
* Use a seedable RNG so that we can have the same number sequence. * Replace `HashMap`s with `Vec`s to avoid undeterministic iteration.
This commit is contained in:
parent
e8415713ab
commit
869a2d1a15
4 changed files with 91 additions and 82 deletions
|
@ -12,7 +12,7 @@ edition = "2018"
|
|||
|
||||
[dependencies]
|
||||
yaml-rust2 = { version = "0.7.0", path = "../../" }
|
||||
rand = "0.8.5"
|
||||
rand = { version = "0.8.5", features = [ "small_rng" ] }
|
||||
lipsum = "0.9.0"
|
||||
|
||||
[profile.release-lto]
|
||||
|
|
|
@ -1,15 +1,15 @@
|
|||
#![allow(clippy::too_many_arguments)]
|
||||
|
||||
use rand::{distributions::Alphanumeric, rngs::ThreadRng, Rng};
|
||||
use rand::{distributions::Alphanumeric, rngs::SmallRng, Rng};
|
||||
|
||||
/// Generate a string with hexadecimal digits of the specified length.
|
||||
pub fn hex_string(rng: &mut ThreadRng, len: usize) -> String {
|
||||
pub fn hex_string(rng: &mut SmallRng, len: usize) -> String {
|
||||
const DIGITS: &[u8] = b"0123456789abcdef";
|
||||
string_from_set(rng, len, len + 1, DIGITS)
|
||||
}
|
||||
|
||||
/// Generate an e-mail address.
|
||||
pub fn email(rng: &mut ThreadRng, len_lo: usize, len_hi: usize) -> String {
|
||||
pub fn email(rng: &mut SmallRng, len_lo: usize, len_hi: usize) -> String {
|
||||
const CHARSET: &[u8] = b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-_.0123456789";
|
||||
format!(
|
||||
"{}@example.com",
|
||||
|
@ -19,7 +19,7 @@ pub fn email(rng: &mut ThreadRng, len_lo: usize, len_hi: usize) -> String {
|
|||
|
||||
/// Generate a random URL.
|
||||
pub fn url(
|
||||
rng: &mut ThreadRng,
|
||||
rng: &mut SmallRng,
|
||||
scheme: &str,
|
||||
n_paths_lo: usize,
|
||||
n_paths_hi: usize,
|
||||
|
@ -40,12 +40,12 @@ pub fn url(
|
|||
}
|
||||
|
||||
/// Generate a random integer.
|
||||
pub fn integer(rng: &mut ThreadRng, lo: i64, hi: i64) -> i64 {
|
||||
pub fn integer(rng: &mut SmallRng, lo: i64, hi: i64) -> i64 {
|
||||
rng.gen_range(lo..hi)
|
||||
}
|
||||
|
||||
/// Generate an alphanumeric string with a length between `lo_len` and `hi_len`.
|
||||
pub fn alnum_string(rng: &mut ThreadRng, lo_len: usize, hi_len: usize) -> String {
|
||||
pub fn alnum_string(rng: &mut SmallRng, lo_len: usize, hi_len: usize) -> String {
|
||||
let len = rng.gen_range(lo_len..hi_len);
|
||||
rng.sample_iter(&Alphanumeric)
|
||||
.take(len)
|
||||
|
@ -54,7 +54,7 @@ pub fn alnum_string(rng: &mut ThreadRng, lo_len: usize, hi_len: usize) -> String
|
|||
}
|
||||
|
||||
/// Generate a string with hexadecimal digits of the specified length.
|
||||
pub fn string_from_set(rng: &mut ThreadRng, len_lo: usize, len_hi: usize, set: &[u8]) -> String {
|
||||
pub fn string_from_set(rng: &mut SmallRng, len_lo: usize, len_hi: usize, set: &[u8]) -> String {
|
||||
(0..rng.gen_range(len_lo..len_hi))
|
||||
.map(|_| set[rng.gen_range(0..set.len())] as char)
|
||||
.collect()
|
||||
|
@ -62,7 +62,7 @@ pub fn string_from_set(rng: &mut ThreadRng, len_lo: usize, len_hi: usize, set: &
|
|||
|
||||
/// Generate a lipsum paragraph.
|
||||
pub fn paragraph(
|
||||
rng: &mut ThreadRng,
|
||||
rng: &mut SmallRng,
|
||||
lines_lo: usize,
|
||||
lines_hi: usize,
|
||||
wps_lo: usize,
|
||||
|
@ -99,7 +99,7 @@ pub fn paragraph(
|
|||
}
|
||||
|
||||
/// Generate a full name.
|
||||
pub fn full_name(rng: &mut ThreadRng, len_lo: usize, len_hi: usize) -> String {
|
||||
pub fn full_name(rng: &mut SmallRng, len_lo: usize, len_hi: usize) -> String {
|
||||
format!(
|
||||
"{} {}",
|
||||
name(rng, len_lo, len_hi),
|
||||
|
@ -108,7 +108,7 @@ pub fn full_name(rng: &mut ThreadRng, len_lo: usize, len_hi: usize) -> String {
|
|||
}
|
||||
|
||||
/// Generate a name.
|
||||
pub fn name(rng: &mut ThreadRng, len_lo: usize, len_hi: usize) -> String {
|
||||
pub fn name(rng: &mut SmallRng, len_lo: usize, len_hi: usize) -> String {
|
||||
const UPPER: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ";
|
||||
const LOWER: &[u8] = b"abcdefghijklmnopqrstuvwxyz";
|
||||
|
||||
|
@ -121,7 +121,7 @@ pub fn name(rng: &mut ThreadRng, len_lo: usize, len_hi: usize) -> String {
|
|||
}
|
||||
|
||||
/// Generate a set of words.
|
||||
pub fn words(rng: &mut ThreadRng, words_lo: usize, words_hi: usize) -> String {
|
||||
pub fn words(rng: &mut SmallRng, words_lo: usize, words_hi: usize) -> String {
|
||||
let nwords = rng.gen_range(words_lo..words_hi);
|
||||
lipsum::lipsum_words_with_rng(rng.clone(), nwords).replace(|c| "-\'\",*:".contains(c), "")
|
||||
}
|
||||
|
@ -130,7 +130,7 @@ pub fn words(rng: &mut ThreadRng, words_lo: usize, words_hi: usize) -> String {
|
|||
///
|
||||
/// Texts are composed of some paragraphs and empty lines between them.
|
||||
pub fn text(
|
||||
rng: &mut ThreadRng,
|
||||
rng: &mut SmallRng,
|
||||
paragraphs_lo: usize,
|
||||
paragraphs_hi: usize,
|
||||
lines_lo: usize,
|
||||
|
|
|
@ -3,12 +3,11 @@
|
|||
mod gen;
|
||||
mod nested;
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::fs::File;
|
||||
use std::io::BufWriter;
|
||||
use std::path::Path;
|
||||
|
||||
use rand::{rngs::ThreadRng, Rng};
|
||||
use rand::{rngs::SmallRng, Rng, SeedableRng};
|
||||
|
||||
/// The path into which the generated YAML files will be written.
|
||||
const OUTPUT_DIR: &str = "bench_yaml";
|
||||
|
@ -41,7 +40,10 @@ fn main() -> std::io::Result<()> {
|
|||
/// YAML Generator.
|
||||
struct Generator {
|
||||
/// The RNG state.
|
||||
rng: ThreadRng,
|
||||
///
|
||||
/// We don't need to be cryptographically secure. [`SmallRng`] also implements the
|
||||
/// [`SeedableRng`] trait, allowing runs to be predictible.
|
||||
rng: SmallRng,
|
||||
/// The stack of indentations.
|
||||
indents: Vec<usize>,
|
||||
}
|
||||
|
@ -52,7 +54,7 @@ impl Generator {
|
|||
/// Create a new generator.
|
||||
fn new() -> Self {
|
||||
Generator {
|
||||
rng: rand::thread_rng(),
|
||||
rng: SmallRng::seed_from_u64(42),
|
||||
indents: vec![0],
|
||||
}
|
||||
}
|
||||
|
@ -87,58 +89,61 @@ impl Generator {
|
|||
/// The `description` field is a long string and puts a lot of weight in plain scalar / block
|
||||
/// scalar parsing.
|
||||
fn gen_record_object<W: std::io::Write>(&mut self, writer: &mut W) -> std::io::Result<()> {
|
||||
let mut fields = HashMap::<String, Box<GenFn<W>>>::new();
|
||||
fields.insert(
|
||||
"description".to_string(),
|
||||
Box::new(|gen, w| {
|
||||
write!(w, "|")?;
|
||||
gen.push_indent(2);
|
||||
gen.nl(w)?;
|
||||
let indent = gen.indent();
|
||||
let text = gen::text(&mut gen.rng, 1, 9, 3, 8, 10, 20, 80 - indent);
|
||||
gen.write_lines(w, &text)?;
|
||||
gen.pop_indent();
|
||||
Ok(())
|
||||
}),
|
||||
);
|
||||
|
||||
fields.insert(
|
||||
"authors".to_string(),
|
||||
Box::new(|gen, w| {
|
||||
gen.push_indent(2);
|
||||
gen.nl(w)?;
|
||||
gen.gen_authors_array(w, 1, 10)?;
|
||||
gen.pop_indent();
|
||||
Ok(())
|
||||
}),
|
||||
);
|
||||
|
||||
fields.insert(
|
||||
"hash".to_string(),
|
||||
Box::new(|gen, w| write!(w, "{}", gen::hex_string(&mut gen.rng, 64))),
|
||||
);
|
||||
fields.insert(
|
||||
"version".to_string(),
|
||||
Box::new(|gen, w| write!(w, "{}", gen::integer(&mut gen.rng, 1, 9))),
|
||||
);
|
||||
fields.insert(
|
||||
"home".to_string(),
|
||||
Box::new(|gen, w| write!(w, "{}", gen::url(&mut gen.rng, "https", 0, 1, 0, 0, None))),
|
||||
);
|
||||
fields.insert(
|
||||
"repository".to_string(),
|
||||
Box::new(|gen, w| write!(w, "{}", gen::url(&mut gen.rng, "git", 1, 4, 10, 20, None))),
|
||||
);
|
||||
fields.insert(
|
||||
"pdf".to_string(),
|
||||
Box::new(|gen, w| {
|
||||
write!(
|
||||
w,
|
||||
"{}",
|
||||
gen::url(&mut gen.rng, "https", 1, 4, 10, 30, Some("pdf"))
|
||||
)
|
||||
}),
|
||||
);
|
||||
let fields: Vec<(String, Box<GenFn<W>>)> = vec![
|
||||
(
|
||||
"description".to_string(),
|
||||
Box::new(|gen, w| {
|
||||
write!(w, "|")?;
|
||||
gen.push_indent(2);
|
||||
gen.nl(w)?;
|
||||
let indent = gen.indent();
|
||||
let text = gen::text(&mut gen.rng, 1, 9, 3, 8, 10, 20, 80 - indent);
|
||||
gen.write_lines(w, &text)?;
|
||||
gen.pop_indent();
|
||||
Ok(())
|
||||
}),
|
||||
),
|
||||
(
|
||||
"authors".to_string(),
|
||||
Box::new(|gen, w| {
|
||||
gen.push_indent(2);
|
||||
gen.nl(w)?;
|
||||
gen.gen_authors_array(w, 1, 10)?;
|
||||
gen.pop_indent();
|
||||
Ok(())
|
||||
}),
|
||||
),
|
||||
(
|
||||
"hash".to_string(),
|
||||
Box::new(|gen, w| write!(w, "{}", gen::hex_string(&mut gen.rng, 64))),
|
||||
),
|
||||
(
|
||||
"version".to_string(),
|
||||
Box::new(|gen, w| write!(w, "{}", gen::integer(&mut gen.rng, 1, 9))),
|
||||
),
|
||||
(
|
||||
"home".to_string(),
|
||||
Box::new(|gen, w| {
|
||||
write!(w, "{}", gen::url(&mut gen.rng, "https", 0, 1, 0, 0, None))
|
||||
}),
|
||||
),
|
||||
(
|
||||
"repository".to_string(),
|
||||
Box::new(|gen, w| {
|
||||
write!(w, "{}", gen::url(&mut gen.rng, "git", 1, 4, 10, 20, None))
|
||||
}),
|
||||
),
|
||||
(
|
||||
"pdf".to_string(),
|
||||
Box::new(|gen, w| {
|
||||
write!(
|
||||
w,
|
||||
"{}",
|
||||
gen::url(&mut gen.rng, "https", 1, 4, 10, 30, Some("pdf"))
|
||||
)
|
||||
}),
|
||||
),
|
||||
];
|
||||
self.gen_object(writer, fields)
|
||||
}
|
||||
|
||||
|
@ -154,15 +159,16 @@ impl Generator {
|
|||
|
||||
/// Generate a small object with 2 string fields.
|
||||
fn gen_author_object<W: std::io::Write>(&mut self, writer: &mut W) -> std::io::Result<()> {
|
||||
let mut fields = HashMap::<String, Box<GenFn<W>>>::new();
|
||||
fields.insert(
|
||||
"name".to_string(),
|
||||
Box::new(|gen, w| write!(w, "{}", gen::full_name(&mut gen.rng, 10, 15))),
|
||||
);
|
||||
fields.insert(
|
||||
"email".to_string(),
|
||||
Box::new(|gen, w| write!(w, "{}", gen::email(&mut gen.rng, 1, 9))),
|
||||
);
|
||||
let fields: Vec<(String, Box<GenFn<W>>)> = vec![
|
||||
(
|
||||
"name".to_string(),
|
||||
Box::new(|gen, w| write!(w, "{}", gen::full_name(&mut gen.rng, 10, 15))),
|
||||
),
|
||||
(
|
||||
"email".to_string(),
|
||||
Box::new(|gen, w| write!(w, "{}", gen::email(&mut gen.rng, 1, 9))),
|
||||
),
|
||||
];
|
||||
self.gen_object(writer, fields)
|
||||
}
|
||||
|
||||
|
@ -193,7 +199,7 @@ impl Generator {
|
|||
fn gen_object<W: std::io::Write>(
|
||||
&mut self,
|
||||
writer: &mut W,
|
||||
fields: HashMap<String, Box<GenFn<W>>>,
|
||||
fields: Vec<(String, Box<GenFn<W>>)>,
|
||||
) -> std::io::Result<()> {
|
||||
let mut first = true;
|
||||
for (key, f) in fields {
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
use std::{cell::RefCell, rc::Rc};
|
||||
|
||||
use rand::{rngs::ThreadRng, Rng};
|
||||
use rand::{rngs::SmallRng, Rng, SeedableRng};
|
||||
|
||||
/// Create a deep object with the given amount of nodes.
|
||||
pub fn create_deep_object<W: std::io::Write>(
|
||||
|
@ -24,7 +24,10 @@ struct Tree {
|
|||
/// Array of all the nodes in the tree, including the root node.
|
||||
nodes: Vec<Rc<RefCell<Node>>>,
|
||||
/// The RNG state.
|
||||
rng: ThreadRng,
|
||||
///
|
||||
/// We don't need to be cryptographically secure. [`SmallRng`] also implements the
|
||||
/// [`SeedableRng`] trait, allowing runs to be predictible.
|
||||
rng: SmallRng,
|
||||
}
|
||||
|
||||
/// A node in a tree.
|
||||
|
@ -40,7 +43,7 @@ impl Tree {
|
|||
Tree {
|
||||
root: root.clone(),
|
||||
nodes: vec![root],
|
||||
rng: rand::thread_rng(),
|
||||
rng: SmallRng::seed_from_u64(42),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue