From e6c4d042e4507e9344e80b7ea6dc872868610b1c Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Sun, 20 Oct 2024 16:56:12 +0200 Subject: [PATCH] Fix use of byte count instead of char count. --- parser/src/scanner.rs | 5 +++-- parser/tests/fuzz.rs | 11 +++++++++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/parser/src/scanner.rs b/parser/src/scanner.rs index 606be85..17ca133 100644 --- a/parser/src/scanner.rs +++ b/parser/src/scanner.rs @@ -1775,8 +1775,9 @@ impl Scanner { } // We need to manually update our position; we haven't called a `skip` function. - self.mark.col += line_buffer.len(); - self.mark.index += line_buffer.len(); + let n_chars = line_buffer.chars().count(); + self.mark.col += n_chars; + self.mark.index += n_chars; // We can now append our bytes to our `string`. string.reserve(line_buffer.as_bytes().len()); diff --git a/parser/tests/fuzz.rs b/parser/tests/fuzz.rs index 3d22c21..6341c79 100644 --- a/parser/tests/fuzz.rs +++ b/parser/tests/fuzz.rs @@ -74,3 +74,14 @@ fn fuzz_2() { let s = str::from_utf8(raw_input).unwrap(); let _ = run_parser(s); } + +#[test] +fn fuzz_3() { + // Span mismatch when parsing with `StrInput` and `BufferedInput`. + // In block scalars, there was a section in which we took the byte count rather than the char + // count to update the index. The issue didn't happen with `StrInput` as the buffer was always + // full and the offending code was never executed. + let raw_input: &[u8] = &[124, 13, 32, 210, 180, 65]; + let s = str::from_utf8(raw_input).unwrap(); + let _ = run_parser(s); +}