diff --git a/parser/tools/gen_large_yaml/Cargo.toml b/parser/tools/gen_large_yaml/Cargo.toml index 750a4c4..a70e779 100644 --- a/parser/tools/gen_large_yaml/Cargo.toml +++ b/parser/tools/gen_large_yaml/Cargo.toml @@ -12,7 +12,7 @@ edition = "2018" [dependencies] yaml-rust2 = { version = "0.7.0", path = "../../" } -rand = "0.8.5" +rand = { version = "0.8.5", features = [ "small_rng" ] } lipsum = "0.9.0" [profile.release-lto] diff --git a/parser/tools/gen_large_yaml/src/gen.rs b/parser/tools/gen_large_yaml/src/gen.rs index 2a7dffe..78d16ba 100644 --- a/parser/tools/gen_large_yaml/src/gen.rs +++ b/parser/tools/gen_large_yaml/src/gen.rs @@ -1,15 +1,15 @@ #![allow(clippy::too_many_arguments)] -use rand::{distributions::Alphanumeric, rngs::ThreadRng, Rng}; +use rand::{distributions::Alphanumeric, rngs::SmallRng, Rng}; /// Generate a string with hexadecimal digits of the specified length. -pub fn hex_string(rng: &mut ThreadRng, len: usize) -> String { +pub fn hex_string(rng: &mut SmallRng, len: usize) -> String { const DIGITS: &[u8] = b"0123456789abcdef"; string_from_set(rng, len, len + 1, DIGITS) } /// Generate an e-mail address. -pub fn email(rng: &mut ThreadRng, len_lo: usize, len_hi: usize) -> String { +pub fn email(rng: &mut SmallRng, len_lo: usize, len_hi: usize) -> String { const CHARSET: &[u8] = b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-_.0123456789"; format!( "{}@example.com", @@ -19,7 +19,7 @@ pub fn email(rng: &mut ThreadRng, len_lo: usize, len_hi: usize) -> String { /// Generate a random URL. pub fn url( - rng: &mut ThreadRng, + rng: &mut SmallRng, scheme: &str, n_paths_lo: usize, n_paths_hi: usize, @@ -40,12 +40,12 @@ pub fn url( } /// Generate a random integer. -pub fn integer(rng: &mut ThreadRng, lo: i64, hi: i64) -> i64 { +pub fn integer(rng: &mut SmallRng, lo: i64, hi: i64) -> i64 { rng.gen_range(lo..hi) } /// Generate an alphanumeric string with a length between `lo_len` and `hi_len`. -pub fn alnum_string(rng: &mut ThreadRng, lo_len: usize, hi_len: usize) -> String { +pub fn alnum_string(rng: &mut SmallRng, lo_len: usize, hi_len: usize) -> String { let len = rng.gen_range(lo_len..hi_len); rng.sample_iter(&Alphanumeric) .take(len) @@ -54,7 +54,7 @@ pub fn alnum_string(rng: &mut ThreadRng, lo_len: usize, hi_len: usize) -> String } /// Generate a string with hexadecimal digits of the specified length. -pub fn string_from_set(rng: &mut ThreadRng, len_lo: usize, len_hi: usize, set: &[u8]) -> String { +pub fn string_from_set(rng: &mut SmallRng, len_lo: usize, len_hi: usize, set: &[u8]) -> String { (0..rng.gen_range(len_lo..len_hi)) .map(|_| set[rng.gen_range(0..set.len())] as char) .collect() @@ -62,7 +62,7 @@ pub fn string_from_set(rng: &mut ThreadRng, len_lo: usize, len_hi: usize, set: & /// Generate a lipsum paragraph. pub fn paragraph( - rng: &mut ThreadRng, + rng: &mut SmallRng, lines_lo: usize, lines_hi: usize, wps_lo: usize, @@ -99,7 +99,7 @@ pub fn paragraph( } /// Generate a full name. -pub fn full_name(rng: &mut ThreadRng, len_lo: usize, len_hi: usize) -> String { +pub fn full_name(rng: &mut SmallRng, len_lo: usize, len_hi: usize) -> String { format!( "{} {}", name(rng, len_lo, len_hi), @@ -108,7 +108,7 @@ pub fn full_name(rng: &mut ThreadRng, len_lo: usize, len_hi: usize) -> String { } /// Generate a name. -pub fn name(rng: &mut ThreadRng, len_lo: usize, len_hi: usize) -> String { +pub fn name(rng: &mut SmallRng, len_lo: usize, len_hi: usize) -> String { const UPPER: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"; const LOWER: &[u8] = b"abcdefghijklmnopqrstuvwxyz"; @@ -121,7 +121,7 @@ pub fn name(rng: &mut ThreadRng, len_lo: usize, len_hi: usize) -> String { } /// Generate a set of words. -pub fn words(rng: &mut ThreadRng, words_lo: usize, words_hi: usize) -> String { +pub fn words(rng: &mut SmallRng, words_lo: usize, words_hi: usize) -> String { let nwords = rng.gen_range(words_lo..words_hi); lipsum::lipsum_words_with_rng(rng.clone(), nwords).replace(|c| "-\'\",*:".contains(c), "") } @@ -130,7 +130,7 @@ pub fn words(rng: &mut ThreadRng, words_lo: usize, words_hi: usize) -> String { /// /// Texts are composed of some paragraphs and empty lines between them. pub fn text( - rng: &mut ThreadRng, + rng: &mut SmallRng, paragraphs_lo: usize, paragraphs_hi: usize, lines_lo: usize, diff --git a/parser/tools/gen_large_yaml/src/main.rs b/parser/tools/gen_large_yaml/src/main.rs index 1c7ee6e..86423bf 100644 --- a/parser/tools/gen_large_yaml/src/main.rs +++ b/parser/tools/gen_large_yaml/src/main.rs @@ -3,12 +3,11 @@ mod gen; mod nested; -use std::collections::HashMap; use std::fs::File; use std::io::BufWriter; use std::path::Path; -use rand::{rngs::ThreadRng, Rng}; +use rand::{rngs::SmallRng, Rng, SeedableRng}; /// The path into which the generated YAML files will be written. const OUTPUT_DIR: &str = "bench_yaml"; @@ -41,7 +40,10 @@ fn main() -> std::io::Result<()> { /// YAML Generator. struct Generator { /// The RNG state. - rng: ThreadRng, + /// + /// We don't need to be cryptographically secure. [`SmallRng`] also implements the + /// [`SeedableRng`] trait, allowing runs to be predictible. + rng: SmallRng, /// The stack of indentations. indents: Vec, } @@ -52,7 +54,7 @@ impl Generator { /// Create a new generator. fn new() -> Self { Generator { - rng: rand::thread_rng(), + rng: SmallRng::seed_from_u64(42), indents: vec![0], } } @@ -87,58 +89,61 @@ impl Generator { /// The `description` field is a long string and puts a lot of weight in plain scalar / block /// scalar parsing. fn gen_record_object(&mut self, writer: &mut W) -> std::io::Result<()> { - let mut fields = HashMap::>>::new(); - fields.insert( - "description".to_string(), - Box::new(|gen, w| { - write!(w, "|")?; - gen.push_indent(2); - gen.nl(w)?; - let indent = gen.indent(); - let text = gen::text(&mut gen.rng, 1, 9, 3, 8, 10, 20, 80 - indent); - gen.write_lines(w, &text)?; - gen.pop_indent(); - Ok(()) - }), - ); - - fields.insert( - "authors".to_string(), - Box::new(|gen, w| { - gen.push_indent(2); - gen.nl(w)?; - gen.gen_authors_array(w, 1, 10)?; - gen.pop_indent(); - Ok(()) - }), - ); - - fields.insert( - "hash".to_string(), - Box::new(|gen, w| write!(w, "{}", gen::hex_string(&mut gen.rng, 64))), - ); - fields.insert( - "version".to_string(), - Box::new(|gen, w| write!(w, "{}", gen::integer(&mut gen.rng, 1, 9))), - ); - fields.insert( - "home".to_string(), - Box::new(|gen, w| write!(w, "{}", gen::url(&mut gen.rng, "https", 0, 1, 0, 0, None))), - ); - fields.insert( - "repository".to_string(), - Box::new(|gen, w| write!(w, "{}", gen::url(&mut gen.rng, "git", 1, 4, 10, 20, None))), - ); - fields.insert( - "pdf".to_string(), - Box::new(|gen, w| { - write!( - w, - "{}", - gen::url(&mut gen.rng, "https", 1, 4, 10, 30, Some("pdf")) - ) - }), - ); + let fields: Vec<(String, Box>)> = vec![ + ( + "description".to_string(), + Box::new(|gen, w| { + write!(w, "|")?; + gen.push_indent(2); + gen.nl(w)?; + let indent = gen.indent(); + let text = gen::text(&mut gen.rng, 1, 9, 3, 8, 10, 20, 80 - indent); + gen.write_lines(w, &text)?; + gen.pop_indent(); + Ok(()) + }), + ), + ( + "authors".to_string(), + Box::new(|gen, w| { + gen.push_indent(2); + gen.nl(w)?; + gen.gen_authors_array(w, 1, 10)?; + gen.pop_indent(); + Ok(()) + }), + ), + ( + "hash".to_string(), + Box::new(|gen, w| write!(w, "{}", gen::hex_string(&mut gen.rng, 64))), + ), + ( + "version".to_string(), + Box::new(|gen, w| write!(w, "{}", gen::integer(&mut gen.rng, 1, 9))), + ), + ( + "home".to_string(), + Box::new(|gen, w| { + write!(w, "{}", gen::url(&mut gen.rng, "https", 0, 1, 0, 0, None)) + }), + ), + ( + "repository".to_string(), + Box::new(|gen, w| { + write!(w, "{}", gen::url(&mut gen.rng, "git", 1, 4, 10, 20, None)) + }), + ), + ( + "pdf".to_string(), + Box::new(|gen, w| { + write!( + w, + "{}", + gen::url(&mut gen.rng, "https", 1, 4, 10, 30, Some("pdf")) + ) + }), + ), + ]; self.gen_object(writer, fields) } @@ -154,15 +159,16 @@ impl Generator { /// Generate a small object with 2 string fields. fn gen_author_object(&mut self, writer: &mut W) -> std::io::Result<()> { - let mut fields = HashMap::>>::new(); - fields.insert( - "name".to_string(), - Box::new(|gen, w| write!(w, "{}", gen::full_name(&mut gen.rng, 10, 15))), - ); - fields.insert( - "email".to_string(), - Box::new(|gen, w| write!(w, "{}", gen::email(&mut gen.rng, 1, 9))), - ); + let fields: Vec<(String, Box>)> = vec![ + ( + "name".to_string(), + Box::new(|gen, w| write!(w, "{}", gen::full_name(&mut gen.rng, 10, 15))), + ), + ( + "email".to_string(), + Box::new(|gen, w| write!(w, "{}", gen::email(&mut gen.rng, 1, 9))), + ), + ]; self.gen_object(writer, fields) } @@ -193,7 +199,7 @@ impl Generator { fn gen_object( &mut self, writer: &mut W, - fields: HashMap>>, + fields: Vec<(String, Box>)>, ) -> std::io::Result<()> { let mut first = true; for (key, f) in fields { diff --git a/parser/tools/gen_large_yaml/src/nested.rs b/parser/tools/gen_large_yaml/src/nested.rs index f54b55c..db93ff9 100644 --- a/parser/tools/gen_large_yaml/src/nested.rs +++ b/parser/tools/gen_large_yaml/src/nested.rs @@ -1,6 +1,6 @@ use std::{cell::RefCell, rc::Rc}; -use rand::{rngs::ThreadRng, Rng}; +use rand::{rngs::SmallRng, Rng, SeedableRng}; /// Create a deep object with the given amount of nodes. pub fn create_deep_object( @@ -24,7 +24,10 @@ struct Tree { /// Array of all the nodes in the tree, including the root node. nodes: Vec>>, /// The RNG state. - rng: ThreadRng, + /// + /// We don't need to be cryptographically secure. [`SmallRng`] also implements the + /// [`SeedableRng`] trait, allowing runs to be predictible. + rng: SmallRng, } /// A node in a tree. @@ -40,7 +43,7 @@ impl Tree { Tree { root: root.clone(), nodes: vec![root], - rng: rand::thread_rng(), + rng: SmallRng::seed_from_u64(42), } }