Make gen_large_yaml reproductible.

* Use a seedable RNG so that we can have the same number sequence.
* Replace `HashMap`s with `Vec`s to avoid undeterministic iteration.
This commit is contained in:
Ethiraric 2024-03-20 23:07:08 +01:00
parent 60bf9c80ba
commit 0f97d2b9e4
4 changed files with 91 additions and 82 deletions

View file

@ -12,7 +12,7 @@ edition = "2018"
[dependencies] [dependencies]
yaml-rust2 = { version = "0.7.0", path = "../../" } yaml-rust2 = { version = "0.7.0", path = "../../" }
rand = "0.8.5" rand = { version = "0.8.5", features = [ "small_rng" ] }
lipsum = "0.9.0" lipsum = "0.9.0"
[profile.release-lto] [profile.release-lto]

View file

@ -1,15 +1,15 @@
#![allow(clippy::too_many_arguments)] #![allow(clippy::too_many_arguments)]
use rand::{distributions::Alphanumeric, rngs::ThreadRng, Rng}; use rand::{distributions::Alphanumeric, rngs::SmallRng, Rng};
/// Generate a string with hexadecimal digits of the specified length. /// Generate a string with hexadecimal digits of the specified length.
pub fn hex_string(rng: &mut ThreadRng, len: usize) -> String { pub fn hex_string(rng: &mut SmallRng, len: usize) -> String {
const DIGITS: &[u8] = b"0123456789abcdef"; const DIGITS: &[u8] = b"0123456789abcdef";
string_from_set(rng, len, len + 1, DIGITS) string_from_set(rng, len, len + 1, DIGITS)
} }
/// Generate an e-mail address. /// Generate an e-mail address.
pub fn email(rng: &mut ThreadRng, len_lo: usize, len_hi: usize) -> String { pub fn email(rng: &mut SmallRng, len_lo: usize, len_hi: usize) -> String {
const CHARSET: &[u8] = b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-_.0123456789"; const CHARSET: &[u8] = b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-_.0123456789";
format!( format!(
"{}@example.com", "{}@example.com",
@ -19,7 +19,7 @@ pub fn email(rng: &mut ThreadRng, len_lo: usize, len_hi: usize) -> String {
/// Generate a random URL. /// Generate a random URL.
pub fn url( pub fn url(
rng: &mut ThreadRng, rng: &mut SmallRng,
scheme: &str, scheme: &str,
n_paths_lo: usize, n_paths_lo: usize,
n_paths_hi: usize, n_paths_hi: usize,
@ -40,12 +40,12 @@ pub fn url(
} }
/// Generate a random integer. /// Generate a random integer.
pub fn integer(rng: &mut ThreadRng, lo: i64, hi: i64) -> i64 { pub fn integer(rng: &mut SmallRng, lo: i64, hi: i64) -> i64 {
rng.gen_range(lo..hi) rng.gen_range(lo..hi)
} }
/// Generate an alphanumeric string with a length between `lo_len` and `hi_len`. /// Generate an alphanumeric string with a length between `lo_len` and `hi_len`.
pub fn alnum_string(rng: &mut ThreadRng, lo_len: usize, hi_len: usize) -> String { pub fn alnum_string(rng: &mut SmallRng, lo_len: usize, hi_len: usize) -> String {
let len = rng.gen_range(lo_len..hi_len); let len = rng.gen_range(lo_len..hi_len);
rng.sample_iter(&Alphanumeric) rng.sample_iter(&Alphanumeric)
.take(len) .take(len)
@ -54,7 +54,7 @@ pub fn alnum_string(rng: &mut ThreadRng, lo_len: usize, hi_len: usize) -> String
} }
/// Generate a string with hexadecimal digits of the specified length. /// Generate a string with hexadecimal digits of the specified length.
pub fn string_from_set(rng: &mut ThreadRng, len_lo: usize, len_hi: usize, set: &[u8]) -> String { pub fn string_from_set(rng: &mut SmallRng, len_lo: usize, len_hi: usize, set: &[u8]) -> String {
(0..rng.gen_range(len_lo..len_hi)) (0..rng.gen_range(len_lo..len_hi))
.map(|_| set[rng.gen_range(0..set.len())] as char) .map(|_| set[rng.gen_range(0..set.len())] as char)
.collect() .collect()
@ -62,7 +62,7 @@ pub fn string_from_set(rng: &mut ThreadRng, len_lo: usize, len_hi: usize, set: &
/// Generate a lipsum paragraph. /// Generate a lipsum paragraph.
pub fn paragraph( pub fn paragraph(
rng: &mut ThreadRng, rng: &mut SmallRng,
lines_lo: usize, lines_lo: usize,
lines_hi: usize, lines_hi: usize,
wps_lo: usize, wps_lo: usize,
@ -99,7 +99,7 @@ pub fn paragraph(
} }
/// Generate a full name. /// Generate a full name.
pub fn full_name(rng: &mut ThreadRng, len_lo: usize, len_hi: usize) -> String { pub fn full_name(rng: &mut SmallRng, len_lo: usize, len_hi: usize) -> String {
format!( format!(
"{} {}", "{} {}",
name(rng, len_lo, len_hi), name(rng, len_lo, len_hi),
@ -108,7 +108,7 @@ pub fn full_name(rng: &mut ThreadRng, len_lo: usize, len_hi: usize) -> String {
} }
/// Generate a name. /// Generate a name.
pub fn name(rng: &mut ThreadRng, len_lo: usize, len_hi: usize) -> String { pub fn name(rng: &mut SmallRng, len_lo: usize, len_hi: usize) -> String {
const UPPER: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"; const UPPER: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ";
const LOWER: &[u8] = b"abcdefghijklmnopqrstuvwxyz"; const LOWER: &[u8] = b"abcdefghijklmnopqrstuvwxyz";
@ -121,7 +121,7 @@ pub fn name(rng: &mut ThreadRng, len_lo: usize, len_hi: usize) -> String {
} }
/// Generate a set of words. /// Generate a set of words.
pub fn words(rng: &mut ThreadRng, words_lo: usize, words_hi: usize) -> String { pub fn words(rng: &mut SmallRng, words_lo: usize, words_hi: usize) -> String {
let nwords = rng.gen_range(words_lo..words_hi); let nwords = rng.gen_range(words_lo..words_hi);
lipsum::lipsum_words_with_rng(rng.clone(), nwords).replace(|c| "-\'\",*:".contains(c), "") lipsum::lipsum_words_with_rng(rng.clone(), nwords).replace(|c| "-\'\",*:".contains(c), "")
} }
@ -130,7 +130,7 @@ pub fn words(rng: &mut ThreadRng, words_lo: usize, words_hi: usize) -> String {
/// ///
/// Texts are composed of some paragraphs and empty lines between them. /// Texts are composed of some paragraphs and empty lines between them.
pub fn text( pub fn text(
rng: &mut ThreadRng, rng: &mut SmallRng,
paragraphs_lo: usize, paragraphs_lo: usize,
paragraphs_hi: usize, paragraphs_hi: usize,
lines_lo: usize, lines_lo: usize,

View file

@ -3,12 +3,11 @@
mod gen; mod gen;
mod nested; mod nested;
use std::collections::HashMap;
use std::fs::File; use std::fs::File;
use std::io::BufWriter; use std::io::BufWriter;
use std::path::Path; use std::path::Path;
use rand::{rngs::ThreadRng, Rng}; use rand::{rngs::SmallRng, Rng, SeedableRng};
/// The path into which the generated YAML files will be written. /// The path into which the generated YAML files will be written.
const OUTPUT_DIR: &str = "bench_yaml"; const OUTPUT_DIR: &str = "bench_yaml";
@ -41,7 +40,10 @@ fn main() -> std::io::Result<()> {
/// YAML Generator. /// YAML Generator.
struct Generator { struct Generator {
/// The RNG state. /// The RNG state.
rng: ThreadRng, ///
/// We don't need to be cryptographically secure. [`SmallRng`] also implements the
/// [`SeedableRng`] trait, allowing runs to be predictible.
rng: SmallRng,
/// The stack of indentations. /// The stack of indentations.
indents: Vec<usize>, indents: Vec<usize>,
} }
@ -52,7 +54,7 @@ impl Generator {
/// Create a new generator. /// Create a new generator.
fn new() -> Self { fn new() -> Self {
Generator { Generator {
rng: rand::thread_rng(), rng: SmallRng::seed_from_u64(42),
indents: vec![0], indents: vec![0],
} }
} }
@ -87,58 +89,61 @@ impl Generator {
/// The `description` field is a long string and puts a lot of weight in plain scalar / block /// The `description` field is a long string and puts a lot of weight in plain scalar / block
/// scalar parsing. /// scalar parsing.
fn gen_record_object<W: std::io::Write>(&mut self, writer: &mut W) -> std::io::Result<()> { fn gen_record_object<W: std::io::Write>(&mut self, writer: &mut W) -> std::io::Result<()> {
let mut fields = HashMap::<String, Box<GenFn<W>>>::new(); let fields: Vec<(String, Box<GenFn<W>>)> = vec![
fields.insert( (
"description".to_string(), "description".to_string(),
Box::new(|gen, w| { Box::new(|gen, w| {
write!(w, "|")?; write!(w, "|")?;
gen.push_indent(2); gen.push_indent(2);
gen.nl(w)?; gen.nl(w)?;
let indent = gen.indent(); let indent = gen.indent();
let text = gen::text(&mut gen.rng, 1, 9, 3, 8, 10, 20, 80 - indent); let text = gen::text(&mut gen.rng, 1, 9, 3, 8, 10, 20, 80 - indent);
gen.write_lines(w, &text)?; gen.write_lines(w, &text)?;
gen.pop_indent(); gen.pop_indent();
Ok(()) Ok(())
}), }),
); ),
(
fields.insert( "authors".to_string(),
"authors".to_string(), Box::new(|gen, w| {
Box::new(|gen, w| { gen.push_indent(2);
gen.push_indent(2); gen.nl(w)?;
gen.nl(w)?; gen.gen_authors_array(w, 1, 10)?;
gen.gen_authors_array(w, 1, 10)?; gen.pop_indent();
gen.pop_indent(); Ok(())
Ok(()) }),
}), ),
); (
"hash".to_string(),
fields.insert( Box::new(|gen, w| write!(w, "{}", gen::hex_string(&mut gen.rng, 64))),
"hash".to_string(), ),
Box::new(|gen, w| write!(w, "{}", gen::hex_string(&mut gen.rng, 64))), (
); "version".to_string(),
fields.insert( Box::new(|gen, w| write!(w, "{}", gen::integer(&mut gen.rng, 1, 9))),
"version".to_string(), ),
Box::new(|gen, w| write!(w, "{}", gen::integer(&mut gen.rng, 1, 9))), (
); "home".to_string(),
fields.insert( Box::new(|gen, w| {
"home".to_string(), write!(w, "{}", gen::url(&mut gen.rng, "https", 0, 1, 0, 0, None))
Box::new(|gen, w| write!(w, "{}", gen::url(&mut gen.rng, "https", 0, 1, 0, 0, None))), }),
); ),
fields.insert( (
"repository".to_string(), "repository".to_string(),
Box::new(|gen, w| write!(w, "{}", gen::url(&mut gen.rng, "git", 1, 4, 10, 20, None))), Box::new(|gen, w| {
); write!(w, "{}", gen::url(&mut gen.rng, "git", 1, 4, 10, 20, None))
fields.insert( }),
"pdf".to_string(), ),
Box::new(|gen, w| { (
write!( "pdf".to_string(),
w, Box::new(|gen, w| {
"{}", write!(
gen::url(&mut gen.rng, "https", 1, 4, 10, 30, Some("pdf")) w,
) "{}",
}), gen::url(&mut gen.rng, "https", 1, 4, 10, 30, Some("pdf"))
); )
}),
),
];
self.gen_object(writer, fields) self.gen_object(writer, fields)
} }
@ -154,15 +159,16 @@ impl Generator {
/// Generate a small object with 2 string fields. /// Generate a small object with 2 string fields.
fn gen_author_object<W: std::io::Write>(&mut self, writer: &mut W) -> std::io::Result<()> { fn gen_author_object<W: std::io::Write>(&mut self, writer: &mut W) -> std::io::Result<()> {
let mut fields = HashMap::<String, Box<GenFn<W>>>::new(); let fields: Vec<(String, Box<GenFn<W>>)> = vec![
fields.insert( (
"name".to_string(), "name".to_string(),
Box::new(|gen, w| write!(w, "{}", gen::full_name(&mut gen.rng, 10, 15))), Box::new(|gen, w| write!(w, "{}", gen::full_name(&mut gen.rng, 10, 15))),
); ),
fields.insert( (
"email".to_string(), "email".to_string(),
Box::new(|gen, w| write!(w, "{}", gen::email(&mut gen.rng, 1, 9))), Box::new(|gen, w| write!(w, "{}", gen::email(&mut gen.rng, 1, 9))),
); ),
];
self.gen_object(writer, fields) self.gen_object(writer, fields)
} }
@ -193,7 +199,7 @@ impl Generator {
fn gen_object<W: std::io::Write>( fn gen_object<W: std::io::Write>(
&mut self, &mut self,
writer: &mut W, writer: &mut W,
fields: HashMap<String, Box<GenFn<W>>>, fields: Vec<(String, Box<GenFn<W>>)>,
) -> std::io::Result<()> { ) -> std::io::Result<()> {
let mut first = true; let mut first = true;
for (key, f) in fields { for (key, f) in fields {

View file

@ -1,6 +1,6 @@
use std::{cell::RefCell, rc::Rc}; use std::{cell::RefCell, rc::Rc};
use rand::{rngs::ThreadRng, Rng}; use rand::{rngs::SmallRng, Rng, SeedableRng};
/// Create a deep object with the given amount of nodes. /// Create a deep object with the given amount of nodes.
pub fn create_deep_object<W: std::io::Write>( pub fn create_deep_object<W: std::io::Write>(
@ -24,7 +24,10 @@ struct Tree {
/// Array of all the nodes in the tree, including the root node. /// Array of all the nodes in the tree, including the root node.
nodes: Vec<Rc<RefCell<Node>>>, nodes: Vec<Rc<RefCell<Node>>>,
/// The RNG state. /// The RNG state.
rng: ThreadRng, ///
/// We don't need to be cryptographically secure. [`SmallRng`] also implements the
/// [`SeedableRng`] trait, allowing runs to be predictible.
rng: SmallRng,
} }
/// A node in a tree. /// A node in a tree.
@ -40,7 +43,7 @@ impl Tree {
Tree { Tree {
root: root.clone(), root: root.clone(),
nodes: vec![root], nodes: vec![root],
rng: rand::thread_rng(), rng: SmallRng::seed_from_u64(42),
} }
} }