#![allow(dead_code)] mod gen; mod nested; use std::fs::File; use std::io::BufWriter; use std::path::Path; use rand::{rngs::SmallRng, Rng, SeedableRng}; /// The path into which the generated YAML files will be written. const OUTPUT_DIR: &str = "bench_yaml"; fn main() -> std::io::Result<()> { let mut generator = Generator::new(); let output_path = Path::new(OUTPUT_DIR); if !output_path.is_dir() { std::fs::create_dir(output_path).unwrap(); } println!("Generating big.yaml"); let mut out = BufWriter::new(File::create(output_path.join("big.yaml")).unwrap()); generator.gen_record_array(&mut out, 100_000, 100_001)?; println!("Generating nested.yaml"); let mut out = BufWriter::new(File::create(output_path.join("nested.yaml")).unwrap()); nested::create_deep_object(&mut out, 1_100_000)?; println!("Generating small_objects.yaml"); let mut out = BufWriter::new(File::create(output_path.join("small_objects.yaml")).unwrap()); generator.gen_authors_array(&mut out, 4_000_000, 4_000_001)?; println!("Generating strings_array.yaml"); let mut out = BufWriter::new(File::create(output_path.join("strings_array.yaml")).unwrap()); generator.gen_strings_array(&mut out, 1_300_000, 1_300_001, 10, 40)?; Ok(()) } /// YAML Generator. struct Generator { /// The RNG state. /// /// We don't need to be cryptographically secure. [`SmallRng`] also implements the /// [`SeedableRng`] trait, allowing runs to be predictable. rng: SmallRng, /// The stack of indentations. indents: Vec, } type GenFn = dyn FnOnce(&mut Generator, &mut W) -> std::io::Result<()>; impl Generator { /// Create a new generator. fn new() -> Self { Generator { rng: SmallRng::seed_from_u64(42), indents: vec![0], } } /// Generate an array of records as per [`Self::gen_record_object`]. fn gen_record_array( &mut self, writer: &mut W, items_lo: usize, items_hi: usize, ) -> std::io::Result<()> { self.gen_array(writer, items_lo, items_hi, Generator::gen_record_object) } /// Generate an array of lipsum one-liners. fn gen_strings_array( &mut self, writer: &mut W, items_lo: usize, items_hi: usize, words_lo: usize, words_hi: usize, ) -> std::io::Result<()> { self.gen_array(writer, items_lo, items_hi, |gen, writer| { write!(writer, "{}", gen::words(&mut gen.rng, words_lo, words_hi)) }) } /// Generate a YAML object/mapping containing a record. /// /// Fields are description, hash, version, home, repository and pdf. /// The `description` field is a long string and puts a lot of weight in plain scalar / block /// scalar parsing. fn gen_record_object(&mut self, writer: &mut W) -> std::io::Result<()> { let fields: Vec<(String, Box>)> = vec![ ( "description".to_string(), Box::new(|gen, w| { write!(w, "|")?; gen.push_indent(2); gen.nl(w)?; let indent = gen.indent(); let text = gen::text(&mut gen.rng, 1, 9, 3, 8, 10, 20, 80 - indent); gen.write_lines(w, &text)?; gen.pop_indent(); Ok(()) }), ), ( "authors".to_string(), Box::new(|gen, w| { gen.push_indent(2); gen.nl(w)?; gen.gen_authors_array(w, 1, 10)?; gen.pop_indent(); Ok(()) }), ), ( "hash".to_string(), Box::new(|gen, w| write!(w, "{}", gen::hex_string(&mut gen.rng, 64))), ), ( "version".to_string(), Box::new(|gen, w| write!(w, "{}", gen::integer(&mut gen.rng, 1, 9))), ), ( "home".to_string(), Box::new(|gen, w| { write!(w, "{}", gen::url(&mut gen.rng, "https", 0, 1, 0, 0, None)) }), ), ( "repository".to_string(), Box::new(|gen, w| { write!(w, "{}", gen::url(&mut gen.rng, "git", 1, 4, 10, 20, None)) }), ), ( "pdf".to_string(), Box::new(|gen, w| { write!( w, "{}", gen::url(&mut gen.rng, "https", 1, 4, 10, 30, Some("pdf")) ) }), ), ]; self.gen_object(writer, fields) } /// Generate an array of authors as per [`Self::gen_author_object`]. fn gen_authors_array( &mut self, writer: &mut W, items_lo: usize, items_hi: usize, ) -> std::io::Result<()> { self.gen_array(writer, items_lo, items_hi, Generator::gen_author_object) } /// Generate a small object with 2 string fields. fn gen_author_object(&mut self, writer: &mut W) -> std::io::Result<()> { let fields: Vec<(String, Box>)> = vec![ ( "name".to_string(), Box::new(|gen, w| write!(w, "{}", gen::full_name(&mut gen.rng, 10, 15))), ), ( "email".to_string(), Box::new(|gen, w| write!(w, "{}", gen::email(&mut gen.rng, 1, 9))), ), ]; self.gen_object(writer, fields) } /// Generate a YAML array/sequence containing nodes generated by the given function. fn gen_array std::io::Result<()>>( &mut self, writer: &mut W, len_lo: usize, len_hi: usize, mut obj_creator: F, ) -> std::io::Result<()> { let mut first = true; for _ in 0..self.rng.gen_range(len_lo..len_hi) { if first { first = false; } else { self.nl(writer)?; } write!(writer, "- ")?; self.push_indent(2); (obj_creator)(self, writer)?; self.pop_indent(); } Ok(()) } /// Create a Yaml object with some fields in it. fn gen_object( &mut self, writer: &mut W, fields: Vec<(String, Box>)>, ) -> std::io::Result<()> { let mut first = true; for (key, f) in fields { if first { first = false; } else { self.nl(writer)?; } write!(writer, "{key}: ")?; f(self, writer)?; } Ok(()) } /// Write the given lines at the right indentation. fn write_lines( &mut self, writer: &mut W, lines: &[String], ) -> std::io::Result<()> { let mut first = true; for line in lines { if first { first = false; } else { self.nl(writer)?; } write!(writer, "{line}")?; } Ok(()) } /// Write a new line to the writer and indent. fn nl(&mut self, writer: &mut W) -> std::io::Result<()> { writeln!(writer)?; for _ in 0..self.indent() { write!(writer, " ")?; } Ok(()) } /// Return the given indent. fn indent(&self) -> usize { *self.indents.last().unwrap() } /// Push a new indent with the given relative offset. fn push_indent(&mut self, offset: usize) { self.indents.push(self.indent() + offset); } /// Pops the last indent. fn pop_indent(&mut self) { self.indents.pop(); assert!(!self.indents.is_empty()); } }