Add benchmarking tools.

This commit is contained in:
Ethiraric 2024-01-23 22:48:20 +01:00
parent 6e5286c2e1
commit 36e8b06e82
4 changed files with 397 additions and 0 deletions

View file

@ -18,6 +18,8 @@ linked-hash-map = "0.5.3"
[dev-dependencies] [dev-dependencies]
libtest-mimic = "0.3.0" libtest-mimic = "0.3.0"
quickcheck = "0.9" quickcheck = "0.9"
rand = "0.8.5"
lipsum = "0.9.0"
[profile.release-lto] [profile.release-lto]
inherits = "release" inherits = "release"

View file

@ -0,0 +1,150 @@
#![allow(clippy::too_many_arguments)]
use rand::{distributions::Alphanumeric, rngs::ThreadRng, Rng};
/// Generate a string with hexadecimal digits of the specified length.
pub fn hex_string(rng: &mut ThreadRng, len: usize) -> String {
const DIGITS: &[u8] = b"0123456789abcdef";
string_from_set(rng, len, len + 1, DIGITS)
}
/// Generate an e-mail address.
pub fn email(rng: &mut ThreadRng, len_lo: usize, len_hi: usize) -> String {
const CHARSET: &[u8] = b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-_.0123456789";
format!(
"{}@example.com",
string_from_set(rng, len_lo, len_hi, CHARSET)
)
}
/// Generate a random URL.
pub fn url(
rng: &mut ThreadRng,
scheme: &str,
n_paths_lo: usize,
n_paths_hi: usize,
path_len_lo: usize,
path_len_hi: usize,
extension: Option<&str>,
) -> String {
let mut string = format!("{scheme}://example.com");
for _ in 0..rng.gen_range(n_paths_lo..n_paths_hi) {
string.push('/');
string.push_str(&alnum_string(rng, path_len_lo, path_len_hi));
}
if let Some(extension) = extension {
string.push('.');
string.push_str(extension);
}
string
}
/// Generate a random integer.
pub fn integer(rng: &mut ThreadRng, lo: i64, hi: i64) -> i64 {
rng.gen_range(lo..hi)
}
/// Generate an alphanumeric string with a length between `lo_len` and `hi_len`.
pub fn alnum_string(rng: &mut ThreadRng, lo_len: usize, hi_len: usize) -> String {
let len = rng.gen_range(lo_len..hi_len);
rng.sample_iter(&Alphanumeric)
.take(len)
.map(char::from)
.collect()
}
/// Generate a string with hexadecimal digits of the specified length.
pub fn string_from_set(rng: &mut ThreadRng, len_lo: usize, len_hi: usize, set: &[u8]) -> String {
(0..rng.gen_range(len_lo..len_hi))
.map(|_| set[rng.gen_range(0..set.len())] as char)
.collect()
}
/// Generate a lipsum paragraph.
pub fn paragraph(
rng: &mut ThreadRng,
lines_lo: usize,
lines_hi: usize,
wps_lo: usize,
wps_hi: usize,
line_maxcol: usize,
) -> Vec<String> {
let mut ret = Vec::new();
let nlines = rng.gen_range(lines_lo..lines_hi);
while ret.len() < nlines {
let words_in_sentence = rng.gen_range(wps_lo..wps_hi);
let mut sentence = lipsum::lipsum_words_with_rng(rng.clone(), words_in_sentence);
if let Some(last_line) = ret.pop() {
sentence = format!("{last_line} {sentence}");
}
while sentence.len() > line_maxcol {
let last_space_idx = line_maxcol
- sentence[0..line_maxcol]
.chars()
.rev()
.position(char::is_whitespace)
.unwrap();
ret.push(sentence[0..last_space_idx].to_string());
sentence = sentence[last_space_idx + 1..].to_string();
}
if !sentence.is_empty() {
ret.push(sentence);
}
}
ret
}
/// Generate a full name.
pub fn full_name(rng: &mut ThreadRng, len_lo: usize, len_hi: usize) -> String {
format!(
"{} {}",
name(rng, len_lo, len_hi),
name(rng, len_lo, len_hi)
)
}
/// Generate a name.
pub fn name(rng: &mut ThreadRng, len_lo: usize, len_hi: usize) -> String {
const UPPER: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ";
const LOWER: &[u8] = b"abcdefghijklmnopqrstuvwxyz";
let len = rng.gen_range(len_lo..len_hi);
let mut ret = String::new();
ret.push(UPPER[rng.gen_range(0..UPPER.len())] as char);
ret.push_str(string_from_set(rng, len, len + 1, LOWER).as_str());
ret
}
/// Generate a lipsum text.
///
/// Texts are composed of some paragraphs and empty lines between them.
pub fn text(
rng: &mut ThreadRng,
paragraphs_lo: usize,
paragraphs_hi: usize,
lines_lo: usize,
lines_hi: usize,
wps_lo: usize,
wps_hi: usize,
line_maxcol: usize,
) -> Vec<String> {
let mut ret = Vec::new();
let mut first = true;
for _ in 0..rng.gen_range(paragraphs_lo..paragraphs_hi) {
if first {
first = false;
} else {
ret.push(String::new());
}
ret.extend(paragraph(rng, lines_lo, lines_hi, wps_lo, wps_hi, line_maxcol).into_iter());
}
ret
}

View file

@ -0,0 +1,214 @@
#![allow(dead_code)]
mod gen;
use std::collections::HashMap;
use rand::{rngs::ThreadRng, Rng};
fn main() -> std::fmt::Result {
let mut s = String::new();
let mut g = Generator::new();
g.gen_record_array(&mut s, 100_000, 100_001)?;
println!("{s}");
Ok(())
}
/// YAML Generator.
struct Generator {
/// The RNG state.
rng: ThreadRng,
/// The stack of indentations.
indents: Vec<usize>,
}
type GenFn<W> = dyn FnOnce(&mut Generator, &mut W) -> std::fmt::Result;
impl Generator {
/// Create a new generator.
fn new() -> Self {
Generator {
rng: rand::thread_rng(),
indents: vec![0],
}
}
/// Generate an array of records as per [`Self::gen_record_object`].
fn gen_record_array<W: std::fmt::Write>(
&mut self,
writer: &mut W,
items_lo: usize,
items_hi: usize,
) -> std::fmt::Result {
self.gen_array(writer, items_lo, items_hi, Generator::gen_record_object)
}
/// Generate a YAML object/mapping containing a record.
///
/// Fields are description, hash, version, home, repository and pdf.
fn gen_record_object<W: std::fmt::Write>(&mut self, writer: &mut W) -> std::fmt::Result {
let mut fields = HashMap::<String, Box<GenFn<W>>>::new();
fields.insert(
"description".to_string(),
Box::new(|gen, w| {
write!(w, "|")?;
gen.push_indent(2);
gen.nl(w)?;
let indent = gen.indent();
let text = gen::text(&mut gen.rng, 1, 9, 3, 8, 10, 20, 80 - indent);
gen.write_lines(w, &text)?;
gen.pop_indent();
Ok(())
}),
);
fields.insert(
"authors".to_string(),
Box::new(|gen, w| {
gen.push_indent(2);
gen.nl(w)?;
gen.gen_authors_array(w, 1, 10)?;
gen.pop_indent();
Ok(())
}),
);
fields.insert(
"hash".to_string(),
Box::new(|gen, w| write!(w, "{}", gen::hex_string(&mut gen.rng, 64))),
);
fields.insert(
"version".to_string(),
Box::new(|gen, w| write!(w, "{}", gen::integer(&mut gen.rng, 1, 9))),
);
fields.insert(
"home".to_string(),
Box::new(|gen, w| write!(w, "{}", gen::url(&mut gen.rng, "https", 0, 1, 0, 0, None))),
);
fields.insert(
"repository".to_string(),
Box::new(|gen, w| write!(w, "{}", gen::url(&mut gen.rng, "git", 1, 4, 10, 20, None))),
);
fields.insert(
"pdf".to_string(),
Box::new(|gen, w| {
write!(
w,
"{}",
gen::url(&mut gen.rng, "https", 1, 4, 10, 30, Some("pdf"))
)
}),
);
self.gen_object(writer, fields)
}
/// Generate an array of authors as per [`Self::gen_author_object`].
fn gen_authors_array<W: std::fmt::Write>(
&mut self,
writer: &mut W,
items_lo: usize,
items_hi: usize,
) -> std::fmt::Result {
self.gen_array(writer, items_lo, items_hi, Generator::gen_author_object)
}
fn gen_author_object<W: std::fmt::Write>(&mut self, writer: &mut W) -> std::fmt::Result {
let mut fields = HashMap::<String, Box<GenFn<W>>>::new();
fields.insert(
"name".to_string(),
Box::new(|gen, w| write!(w, "{}", gen::full_name(&mut gen.rng, 10, 15))),
);
fields.insert(
"email".to_string(),
Box::new(|gen, w| write!(w, "{}", gen::email(&mut gen.rng, 1, 9))),
);
self.gen_object(writer, fields)
}
/// Generate a YAML array/sequence containing nodes generated by the given function.
fn gen_array<W: std::fmt::Write, F: FnMut(&mut Generator, &mut W) -> std::fmt::Result>(
&mut self,
writer: &mut W,
len_lo: usize,
len_hi: usize,
mut obj_creator: F,
) -> std::fmt::Result {
let mut first = true;
for _ in 0..self.rng.gen_range(len_lo..len_hi) {
if first {
first = false;
} else {
self.nl(writer)?;
}
write!(writer, "- ")?;
self.push_indent(2);
(obj_creator)(self, writer)?;
self.pop_indent();
}
Ok(())
}
/// Create a Yaml object with some fields in it.
fn gen_object<W: std::fmt::Write>(
&mut self,
writer: &mut W,
fields: HashMap<String, Box<GenFn<W>>>,
) -> std::fmt::Result {
let mut first = true;
for (key, f) in fields {
if first {
first = false;
} else {
self.nl(writer)?;
}
write!(writer, "{key}: ")?;
f(self, writer)?;
}
Ok(())
}
/// Write the given lines at the right indentation.
fn write_lines<W: std::fmt::Write>(
&mut self,
writer: &mut W,
lines: &[String],
) -> std::fmt::Result {
let mut first = true;
for line in lines {
if first {
first = false;
} else {
self.nl(writer)?;
}
write!(writer, "{line}")?;
}
Ok(())
}
/// Write a new line to the writer and indent.
fn nl<W: std::fmt::Write>(&mut self, writer: &mut W) -> std::fmt::Result {
writeln!(writer)?;
for _ in 0..self.indent() {
write!(writer, " ")?;
}
Ok(())
}
/// Return the given indent.
fn indent(&self) -> usize {
*self.indents.last().unwrap()
}
/// Push a new indent with the given relative offset.
fn push_indent(&mut self, offset: usize) {
self.indents.push(self.indent() + offset);
}
/// Pops the last indent.
fn pop_indent(&mut self) {
self.indents.pop();
assert!(!self.indents.is_empty());
}
}

View file

@ -0,0 +1,31 @@
use std::env;
use std::fs::File;
use std::io::prelude::*;
use yaml_rust2::{
parser::{MarkedEventReceiver, Parser},
scanner::Marker,
Event,
};
/// A sink which discards any event sent.
struct NullSink {}
impl MarkedEventReceiver for NullSink {
fn on_event(&mut self, _: Event, _: Marker) {}
}
fn main() {
let args: Vec<_> = env::args().collect();
let mut f = File::open(&args[1]).unwrap();
let mut s = String::new();
f.read_to_string(&mut s).unwrap();
let mut sink = NullSink {};
let mut parser = Parser::new(s.chars());
// Load events using our sink as the receiver.
let begin = std::time::Instant::now();
parser.load(&mut sink, true).unwrap();
let end = std::time::Instant::now();
println!("Loaded {}MiB in {:?}", s.len() / 1024 / 1024, end - begin);
}