From 36e8b06e8288149af0759561382756ff492ef7f0 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Tue, 23 Jan 2024 22:48:20 +0100 Subject: [PATCH] Add benchmarking tools. --- saphyr/Cargo.toml | 2 + saphyr/examples/gen_large_yaml_array/gen.rs | 150 +++++++++++++ saphyr/examples/gen_large_yaml_array/main.rs | 214 +++++++++++++++++++ saphyr/examples/time_parse.rs | 31 +++ 4 files changed, 397 insertions(+) create mode 100644 saphyr/examples/gen_large_yaml_array/gen.rs create mode 100644 saphyr/examples/gen_large_yaml_array/main.rs create mode 100644 saphyr/examples/time_parse.rs diff --git a/saphyr/Cargo.toml b/saphyr/Cargo.toml index 209b969..bf409e1 100644 --- a/saphyr/Cargo.toml +++ b/saphyr/Cargo.toml @@ -18,6 +18,8 @@ linked-hash-map = "0.5.3" [dev-dependencies] libtest-mimic = "0.3.0" quickcheck = "0.9" +rand = "0.8.5" +lipsum = "0.9.0" [profile.release-lto] inherits = "release" diff --git a/saphyr/examples/gen_large_yaml_array/gen.rs b/saphyr/examples/gen_large_yaml_array/gen.rs new file mode 100644 index 0000000..b687371 --- /dev/null +++ b/saphyr/examples/gen_large_yaml_array/gen.rs @@ -0,0 +1,150 @@ +#![allow(clippy::too_many_arguments)] + +use rand::{distributions::Alphanumeric, rngs::ThreadRng, Rng}; + +/// Generate a string with hexadecimal digits of the specified length. +pub fn hex_string(rng: &mut ThreadRng, len: usize) -> String { + const DIGITS: &[u8] = b"0123456789abcdef"; + string_from_set(rng, len, len + 1, DIGITS) +} + +/// Generate an e-mail address. +pub fn email(rng: &mut ThreadRng, len_lo: usize, len_hi: usize) -> String { + const CHARSET: &[u8] = b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-_.0123456789"; + format!( + "{}@example.com", + string_from_set(rng, len_lo, len_hi, CHARSET) + ) +} + +/// Generate a random URL. +pub fn url( + rng: &mut ThreadRng, + scheme: &str, + n_paths_lo: usize, + n_paths_hi: usize, + path_len_lo: usize, + path_len_hi: usize, + extension: Option<&str>, +) -> String { + let mut string = format!("{scheme}://example.com"); + for _ in 0..rng.gen_range(n_paths_lo..n_paths_hi) { + string.push('/'); + string.push_str(&alnum_string(rng, path_len_lo, path_len_hi)); + } + if let Some(extension) = extension { + string.push('.'); + string.push_str(extension); + } + string +} + +/// Generate a random integer. +pub fn integer(rng: &mut ThreadRng, lo: i64, hi: i64) -> i64 { + rng.gen_range(lo..hi) +} + +/// Generate an alphanumeric string with a length between `lo_len` and `hi_len`. +pub fn alnum_string(rng: &mut ThreadRng, lo_len: usize, hi_len: usize) -> String { + let len = rng.gen_range(lo_len..hi_len); + rng.sample_iter(&Alphanumeric) + .take(len) + .map(char::from) + .collect() +} + +/// Generate a string with hexadecimal digits of the specified length. +pub fn string_from_set(rng: &mut ThreadRng, len_lo: usize, len_hi: usize, set: &[u8]) -> String { + (0..rng.gen_range(len_lo..len_hi)) + .map(|_| set[rng.gen_range(0..set.len())] as char) + .collect() +} + +/// Generate a lipsum paragraph. +pub fn paragraph( + rng: &mut ThreadRng, + lines_lo: usize, + lines_hi: usize, + wps_lo: usize, + wps_hi: usize, + line_maxcol: usize, +) -> Vec { + let mut ret = Vec::new(); + let nlines = rng.gen_range(lines_lo..lines_hi); + + while ret.len() < nlines { + let words_in_sentence = rng.gen_range(wps_lo..wps_hi); + let mut sentence = lipsum::lipsum_words_with_rng(rng.clone(), words_in_sentence); + + if let Some(last_line) = ret.pop() { + sentence = format!("{last_line} {sentence}"); + } + + while sentence.len() > line_maxcol { + let last_space_idx = line_maxcol + - sentence[0..line_maxcol] + .chars() + .rev() + .position(char::is_whitespace) + .unwrap(); + ret.push(sentence[0..last_space_idx].to_string()); + sentence = sentence[last_space_idx + 1..].to_string(); + } + if !sentence.is_empty() { + ret.push(sentence); + } + } + + ret +} + +/// Generate a full name. +pub fn full_name(rng: &mut ThreadRng, len_lo: usize, len_hi: usize) -> String { + format!( + "{} {}", + name(rng, len_lo, len_hi), + name(rng, len_lo, len_hi) + ) +} + +/// Generate a name. +pub fn name(rng: &mut ThreadRng, len_lo: usize, len_hi: usize) -> String { + const UPPER: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + const LOWER: &[u8] = b"abcdefghijklmnopqrstuvwxyz"; + + let len = rng.gen_range(len_lo..len_hi); + let mut ret = String::new(); + ret.push(UPPER[rng.gen_range(0..UPPER.len())] as char); + ret.push_str(string_from_set(rng, len, len + 1, LOWER).as_str()); + + ret +} + +/// Generate a lipsum text. +/// +/// Texts are composed of some paragraphs and empty lines between them. +pub fn text( + rng: &mut ThreadRng, + paragraphs_lo: usize, + paragraphs_hi: usize, + lines_lo: usize, + lines_hi: usize, + wps_lo: usize, + wps_hi: usize, + line_maxcol: usize, +) -> Vec { + let mut ret = Vec::new(); + let mut first = true; + + for _ in 0..rng.gen_range(paragraphs_lo..paragraphs_hi) { + if first { + first = false; + } else { + ret.push(String::new()); + } + + ret.extend(paragraph(rng, lines_lo, lines_hi, wps_lo, wps_hi, line_maxcol).into_iter()); + } + + ret +} diff --git a/saphyr/examples/gen_large_yaml_array/main.rs b/saphyr/examples/gen_large_yaml_array/main.rs new file mode 100644 index 0000000..3d21c48 --- /dev/null +++ b/saphyr/examples/gen_large_yaml_array/main.rs @@ -0,0 +1,214 @@ +#![allow(dead_code)] + +mod gen; + +use std::collections::HashMap; + +use rand::{rngs::ThreadRng, Rng}; + +fn main() -> std::fmt::Result { + let mut s = String::new(); + let mut g = Generator::new(); + g.gen_record_array(&mut s, 100_000, 100_001)?; + println!("{s}"); + Ok(()) +} + +/// YAML Generator. +struct Generator { + /// The RNG state. + rng: ThreadRng, + /// The stack of indentations. + indents: Vec, +} + +type GenFn = dyn FnOnce(&mut Generator, &mut W) -> std::fmt::Result; + +impl Generator { + /// Create a new generator. + fn new() -> Self { + Generator { + rng: rand::thread_rng(), + indents: vec![0], + } + } + + /// Generate an array of records as per [`Self::gen_record_object`]. + fn gen_record_array( + &mut self, + writer: &mut W, + items_lo: usize, + items_hi: usize, + ) -> std::fmt::Result { + self.gen_array(writer, items_lo, items_hi, Generator::gen_record_object) + } + + /// Generate a YAML object/mapping containing a record. + /// + /// Fields are description, hash, version, home, repository and pdf. + fn gen_record_object(&mut self, writer: &mut W) -> std::fmt::Result { + let mut fields = HashMap::>>::new(); + fields.insert( + "description".to_string(), + Box::new(|gen, w| { + write!(w, "|")?; + gen.push_indent(2); + gen.nl(w)?; + let indent = gen.indent(); + let text = gen::text(&mut gen.rng, 1, 9, 3, 8, 10, 20, 80 - indent); + gen.write_lines(w, &text)?; + gen.pop_indent(); + Ok(()) + }), + ); + + fields.insert( + "authors".to_string(), + Box::new(|gen, w| { + gen.push_indent(2); + gen.nl(w)?; + gen.gen_authors_array(w, 1, 10)?; + gen.pop_indent(); + Ok(()) + }), + ); + + fields.insert( + "hash".to_string(), + Box::new(|gen, w| write!(w, "{}", gen::hex_string(&mut gen.rng, 64))), + ); + fields.insert( + "version".to_string(), + Box::new(|gen, w| write!(w, "{}", gen::integer(&mut gen.rng, 1, 9))), + ); + fields.insert( + "home".to_string(), + Box::new(|gen, w| write!(w, "{}", gen::url(&mut gen.rng, "https", 0, 1, 0, 0, None))), + ); + fields.insert( + "repository".to_string(), + Box::new(|gen, w| write!(w, "{}", gen::url(&mut gen.rng, "git", 1, 4, 10, 20, None))), + ); + fields.insert( + "pdf".to_string(), + Box::new(|gen, w| { + write!( + w, + "{}", + gen::url(&mut gen.rng, "https", 1, 4, 10, 30, Some("pdf")) + ) + }), + ); + self.gen_object(writer, fields) + } + + /// Generate an array of authors as per [`Self::gen_author_object`]. + fn gen_authors_array( + &mut self, + writer: &mut W, + items_lo: usize, + items_hi: usize, + ) -> std::fmt::Result { + self.gen_array(writer, items_lo, items_hi, Generator::gen_author_object) + } + + fn gen_author_object(&mut self, writer: &mut W) -> std::fmt::Result { + let mut fields = HashMap::>>::new(); + fields.insert( + "name".to_string(), + Box::new(|gen, w| write!(w, "{}", gen::full_name(&mut gen.rng, 10, 15))), + ); + fields.insert( + "email".to_string(), + Box::new(|gen, w| write!(w, "{}", gen::email(&mut gen.rng, 1, 9))), + ); + self.gen_object(writer, fields) + } + + /// Generate a YAML array/sequence containing nodes generated by the given function. + fn gen_array std::fmt::Result>( + &mut self, + writer: &mut W, + len_lo: usize, + len_hi: usize, + mut obj_creator: F, + ) -> std::fmt::Result { + let mut first = true; + for _ in 0..self.rng.gen_range(len_lo..len_hi) { + if first { + first = false; + } else { + self.nl(writer)?; + } + write!(writer, "- ")?; + self.push_indent(2); + (obj_creator)(self, writer)?; + self.pop_indent(); + } + Ok(()) + } + + /// Create a Yaml object with some fields in it. + fn gen_object( + &mut self, + writer: &mut W, + fields: HashMap>>, + ) -> std::fmt::Result { + let mut first = true; + for (key, f) in fields { + if first { + first = false; + } else { + self.nl(writer)?; + } + write!(writer, "{key}: ")?; + f(self, writer)?; + } + Ok(()) + } + + /// Write the given lines at the right indentation. + fn write_lines( + &mut self, + writer: &mut W, + lines: &[String], + ) -> std::fmt::Result { + let mut first = true; + + for line in lines { + if first { + first = false; + } else { + self.nl(writer)?; + } + write!(writer, "{line}")?; + } + + Ok(()) + } + + /// Write a new line to the writer and indent. + fn nl(&mut self, writer: &mut W) -> std::fmt::Result { + writeln!(writer)?; + for _ in 0..self.indent() { + write!(writer, " ")?; + } + Ok(()) + } + + /// Return the given indent. + fn indent(&self) -> usize { + *self.indents.last().unwrap() + } + + /// Push a new indent with the given relative offset. + fn push_indent(&mut self, offset: usize) { + self.indents.push(self.indent() + offset); + } + + /// Pops the last indent. + fn pop_indent(&mut self) { + self.indents.pop(); + assert!(!self.indents.is_empty()); + } +} diff --git a/saphyr/examples/time_parse.rs b/saphyr/examples/time_parse.rs new file mode 100644 index 0000000..9b551e1 --- /dev/null +++ b/saphyr/examples/time_parse.rs @@ -0,0 +1,31 @@ +use std::env; +use std::fs::File; +use std::io::prelude::*; +use yaml_rust2::{ + parser::{MarkedEventReceiver, Parser}, + scanner::Marker, + Event, +}; + +/// A sink which discards any event sent. +struct NullSink {} + +impl MarkedEventReceiver for NullSink { + fn on_event(&mut self, _: Event, _: Marker) {} +} + +fn main() { + let args: Vec<_> = env::args().collect(); + let mut f = File::open(&args[1]).unwrap(); + let mut s = String::new(); + f.read_to_string(&mut s).unwrap(); + + let mut sink = NullSink {}; + let mut parser = Parser::new(s.chars()); + + // Load events using our sink as the receiver. + let begin = std::time::Instant::now(); + parser.load(&mut sink, true).unwrap(); + let end = std::time::Instant::now(); + println!("Loaded {}MiB in {:?}", s.len() / 1024 / 1024, end - begin); +}