From d997b53c8d4bf9589a8315f5bb422d2f0d6b216f Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Sat, 19 Oct 2024 18:53:51 +0200 Subject: [PATCH] Add fuzz test and fix it. --- parser/src/scanner.rs | 4 ++-- parser/tests/fuzz.rs | 42 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 2 deletions(-) create mode 100644 parser/tests/fuzz.rs diff --git a/parser/src/scanner.rs b/parser/src/scanner.rs index 0509181..e97361e 100644 --- a/parser/src/scanner.rs +++ b/parser/src/scanner.rs @@ -2249,7 +2249,7 @@ impl Scanner { } // Process blank characters. - self.input.lookahead(1); + self.input.lookahead(2); while self.input.next_is_blank_or_break() { if self.input.next_is_blank() { if !self.leading_whitespace { @@ -2280,7 +2280,7 @@ impl Scanner { self.leading_whitespace = true; } } - self.input.lookahead(1); + self.input.lookahead(2); } // check indentation level diff --git a/parser/tests/fuzz.rs b/parser/tests/fuzz.rs new file mode 100644 index 0000000..3076c8c --- /dev/null +++ b/parser/tests/fuzz.rs @@ -0,0 +1,42 @@ +use core::str; + +use saphyr_parser::{Event, Parser, ScanError}; + +/// Run the parser through the string. +/// +/// The parser is run through both the `StrInput` and `BufferedInput` variants. The resulting +/// events are then compared and must match. +/// +/// # Returns +/// This function returns the events if parsing succeeds, the error the parser returned otherwise. +/// +/// # Panics +/// This function panics if there is a mismatch between the 2 parser invocations with the different +/// input traits. +fn run_parser(input: &str) -> Result, ScanError> { + let mut str_events = vec![]; + let mut iter_events = vec![]; + + for x in Parser::new_from_str(input) { + str_events.push(x?.0); + } + for x in Parser::new_from_iter(input.chars()) { + iter_events.push(x?.0); + } + + assert_eq!(str_events, iter_events); + + Ok(str_events) +} + +#[test] +fn fuzz_1() { + // Crashing with an index out-of-bounds error. + // In `scan_plain_scalar`, we would lookahead 1 and call `skip_break`, which requires a + // lookahead of 2. + let raw_input: &[u8] = &[ + 1, 39, 110, 117, 108, 108, 34, 13, 13, 13, 13, 13, 10, 13, 13, 13, 13, + ]; + let s = str::from_utf8(raw_input).unwrap(); + let _ = run_parser(s); +}