From 368b865f2e583ab93fe5c2f7615696cfe850f7b0 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Thu, 28 Nov 2024 02:39:29 +0100 Subject: [PATCH] Add a lower-level `YamlEmitter`. --- saphyr/CHANGELOG.md | 4 + saphyr/src/emitter.rs | 324 +++-------------- saphyr/src/emitter/event.rs | 700 ++++++++++++++++++++++++++++++++++++ saphyr/src/lib.rs | 7 +- saphyr/tests/emitter.rs | 243 +++++++------ 5 files changed, 892 insertions(+), 386 deletions(-) create mode 100644 saphyr/src/emitter/event.rs diff --git a/saphyr/CHANGELOG.md b/saphyr/CHANGELOG.md index a0bc01d..e52c3b8 100644 --- a/saphyr/CHANGELOG.md +++ b/saphyr/CHANGELOG.md @@ -2,6 +2,10 @@ ## Upcoming +- Add `EventYamlEmitter` and its types for a lower-level YAML serializing API. + This API is now a building block used in the `YamlEmitter`. It allows + emitting YAML without the need to build a `Yaml` object. + ## v0.0.3 Skipping version `v0.0.2` to align this crate's version with that of diff --git a/saphyr/src/emitter.rs b/saphyr/src/emitter.rs index d94637f..d3e956e 100644 --- a/saphyr/src/emitter.rs +++ b/saphyr/src/emitter.rs @@ -6,11 +6,16 @@ use std::{ fmt::{self, Display}, }; +use saphyr_parser::TScalarStyle; + use crate::{ char_traits, + emitter::event::{EmitterEvent, EventYamlEmitter}, yaml::{Hash, Yaml}, }; +pub(crate) mod event; + /// The YAML serializer. /// /// ``` @@ -27,29 +32,17 @@ use crate::{ /// ``` #[allow(clippy::module_name_repetitions)] pub struct YamlEmitter<'a> { - /// The output stream in which we output YAML. - writer: &'a mut dyn fmt::Write, - /// Whether compact in-line notation is on or off. - /// - /// See [`Self::compact`]. - compact: bool, - /// The current non-flow nesting level. - level: isize, - /// Whether we render multiline strings in literal style. - /// - /// See [`Self::multiline_strings`]. - multiline_strings: bool, + /// The inner emitter, using the lower-level event API. + event_emitter: EventYamlEmitter<'a>, } impl<'a> YamlEmitter<'a> { /// Create a new emitter serializing into `writer`. pub fn new(writer: &'a mut dyn fmt::Write) -> Self { YamlEmitter { - writer, - compact: true, - level: -1, - multiline_strings: false, + event_emitter: EventYamlEmitter::new(writer), } + // While we could emit the `StreamStart` event, the `EventYamlEmitter` ignores it. } /// Set 'compact in-line notation' on or off, as described for block @@ -57,222 +50,102 @@ impl<'a> YamlEmitter<'a> { /// and /// [mappings](http://www.yaml.org/spec/1.2/spec.html#id2798057). /// - /// In this form, blocks cannot have any properties (such as anchors - /// or tags), which should be OK, because this emitter doesn't - /// (currently) emit those anyways. - /// - /// TODO(ethiraric, 2024/04/02): We can support those now. + /// See [`EventYamlEmitter::compact`]. pub fn compact(&mut self, compact: bool) { - self.compact = compact; + self.event_emitter.compact(compact); } /// Determine if this emitter is using 'compact in-line notation'. /// - /// See [`Self::compact`]. + /// See [`EventYamlEmitter::compact`]. #[must_use] pub fn is_compact(&self) -> bool { - self.compact + self.event_emitter.is_compact() } /// Render strings containing multiple lines in [literal style]. /// - /// # Examples - /// - /// ```rust - /// # use saphyr::{Yaml, YamlEmitter}; - /// # - /// let input = r#"{foo: "bar\nbar", baz: 42}"#; - /// let parsed = Yaml::load_from_str(input).unwrap(); - /// - /// let mut output = String::new(); - /// let mut emitter = YamlEmitter::new(&mut output); - /// emitter.multiline_strings(true); - /// emitter.dump(&parsed[0]).unwrap(); - /// assert_eq!(output.as_str(), "\ - /// --- - /// foo: |- - /// bar - /// bar - /// baz: 42"); - /// ``` - /// - /// [literal style]: https://yaml.org/spec/1.2/spec.html#id2795688 + /// See [`EventYamlEmitter::multiline_strings`]. pub fn multiline_strings(&mut self, multiline_strings: bool) { - self.multiline_strings = multiline_strings; + self.event_emitter.multiline_strings(multiline_strings); } /// Determine if this emitter will emit multiline strings when appropriate. /// - /// See [`Self::multiline_strings`]. + /// See [`EventYamlEmitter::multiline_strings`]. #[must_use] pub fn is_multiline_strings(&self) -> bool { - self.multiline_strings + self.event_emitter.is_multiline_strings() } - /// Dump Yaml to an output stream. + /// Dump the given YAML node as a single document to the inner output stream. /// /// # Errors /// Returns [`EmitError`] when an error occurs. pub fn dump(&mut self, doc: &Yaml) -> EmitResult { - // write DocumentStart - writeln!(self.writer, "---")?; - self.level = -1; - self.emit_node(doc) - } - - fn write_indent(&mut self) -> EmitResult { - if self.level <= 0 { - return Ok(()); - } - for _ in 0..self.level { - write!(self.writer, " ")?; - } - Ok(()) + self.event_emitter.on_document_start(true)?; + self.emit_node(doc)?; + self.event_emitter.on_document_end(false) } + /// Emit a YAML node. fn emit_node(&mut self, node: &Yaml) -> EmitResult { match *node { Yaml::Array(ref v) => self.emit_array(v), Yaml::Hash(ref h) => self.emit_hash(h), Yaml::String(ref v) => { - if self.multiline_strings + let style = if self.event_emitter.is_multiline_strings() && v.contains('\n') && char_traits::is_valid_literal_block_scalar(v) { - self.emit_literal_block(v)?; - } else if need_quotes(v) { - escape_str(self.writer, v)?; + TScalarStyle::Literal + } else if needs_quotes(v) { + TScalarStyle::DoubleQuoted } else { - write!(self.writer, "{v}")?; - } - Ok(()) + TScalarStyle::Plain + }; + self.event_emitter.on_scalar(v, style) } Yaml::Boolean(v) => { - if v { - self.writer.write_str("true")?; - } else { - self.writer.write_str("false")?; - } - Ok(()) + let repr = if v { "true" } else { "false" }; + self.event_emitter.on_scalar(repr, TScalarStyle::Plain) } Yaml::Integer(v) => { - write!(self.writer, "{v}")?; - Ok(()) + let repr = v.to_string(); + self.event_emitter.on_scalar(&repr, TScalarStyle::Plain) } Yaml::Real(ref v) => { - write!(self.writer, "{v}")?; - Ok(()) - } - Yaml::Null | Yaml::BadValue => { - write!(self.writer, "~")?; - Ok(()) + let repr = v.to_string(); + self.event_emitter.on_scalar(&repr, TScalarStyle::Plain) } + Yaml::Null | Yaml::BadValue => self.event_emitter.on_scalar("~", TScalarStyle::Plain), // XXX(chenyh) Alias Yaml::Alias(_) => Ok(()), } } - fn emit_literal_block(&mut self, v: &str) -> EmitResult { - let ends_with_newline = v.ends_with('\n'); - if ends_with_newline { - self.writer.write_str("|")?; - } else { - self.writer.write_str("|-")?; + /// Emit a YAML sequence. + fn emit_array(&mut self, sequence: &[Yaml]) -> EmitResult { + self.event_emitter + .on_event(EmitterEvent::SequenceStart(None))?; + for node in sequence { + self.emit_node(node)?; } - - self.level += 1; - // lines() will omit the last line if it is empty. - for line in v.lines() { - writeln!(self.writer)?; - self.write_indent()?; - // It's literal text, so don't escape special chars. - self.writer.write_str(line)?; - } - self.level -= 1; + self.event_emitter.on_event(EmitterEvent::SequenceEnd)?; Ok(()) } - fn emit_array(&mut self, v: &[Yaml]) -> EmitResult { - if v.is_empty() { - write!(self.writer, "[]")?; - } else { - self.level += 1; - for (cnt, x) in v.iter().enumerate() { - if cnt > 0 { - writeln!(self.writer)?; - self.write_indent()?; - } - write!(self.writer, "-")?; - self.emit_val(true, x)?; - } - self.level -= 1; + /// Emit a YAML mapping. + fn emit_hash(&mut self, mapping: &Hash) -> EmitResult { + self.event_emitter + .on_event(EmitterEvent::MappingStart(None))?; + for (key, value) in mapping { + self.emit_node(key)?; + self.emit_node(value)?; } + self.event_emitter.on_event(EmitterEvent::MappingEnd)?; Ok(()) } - - fn emit_hash(&mut self, h: &Hash) -> EmitResult { - if h.is_empty() { - self.writer.write_str("{}")?; - } else { - self.level += 1; - for (cnt, (k, v)) in h.iter().enumerate() { - let complex_key = matches!(*k, Yaml::Hash(_) | Yaml::Array(_)); - if cnt > 0 { - writeln!(self.writer)?; - self.write_indent()?; - } - if complex_key { - write!(self.writer, "?")?; - self.emit_val(true, k)?; - writeln!(self.writer)?; - self.write_indent()?; - write!(self.writer, ":")?; - self.emit_val(true, v)?; - } else { - self.emit_node(k)?; - write!(self.writer, ":")?; - self.emit_val(false, v)?; - } - } - self.level -= 1; - } - Ok(()) - } - - /// Emit a yaml as a hash or array value: i.e., which should appear - /// following a ":" or "-", either after a space, or on a new line. - /// If `inline` is true, then the preceding characters are distinct - /// and short enough to respect the compact flag. - fn emit_val(&mut self, inline: bool, val: &Yaml) -> EmitResult { - match *val { - Yaml::Array(ref v) => { - if (inline && self.compact) || v.is_empty() { - write!(self.writer, " ")?; - } else { - writeln!(self.writer)?; - self.level += 1; - self.write_indent()?; - self.level -= 1; - } - self.emit_array(v) - } - Yaml::Hash(ref h) => { - if (inline && self.compact) || h.is_empty() { - write!(self.writer, " ")?; - } else { - writeln!(self.writer)?; - self.level += 1; - self.write_indent()?; - self.level -= 1; - } - self.emit_hash(h) - } - _ => { - write!(self.writer, " ")?; - self.emit_node(val) - } - } - } } /// A convenience alias for emitter functions that may fail without returning a value. @@ -283,6 +156,8 @@ pub type EmitResult = Result<(), EmitError>; pub enum EmitError { /// A formatting error. FmtError(fmt::Error), + /// An error in the sequence of event the emitter received. + EventError(&'static str), } impl Error for EmitError { @@ -295,6 +170,7 @@ impl Display for EmitError { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { match *self { EmitError::FmtError(ref err) => Display::fmt(err, formatter), + EmitError::EventError(msg) => Display::fmt(msg, formatter), } } } @@ -306,6 +182,7 @@ impl From for EmitError { } /// Check if the string requires quoting. +/// /// Strings starting with any of the following characters must be quoted. /// :, &, *, ?, |, -, <, >, =, !, %, @ /// Strings containing any of the following characters must be quoted. @@ -320,19 +197,15 @@ impl From for EmitError { /// * When the string looks like a number, such as integers (e.g. 2, 14, etc.), floats (e.g. 2.6, 14.9) and exponential numbers (e.g. 12e7, etc.) (otherwise, it would be treated as a numeric value); /// * When the string looks like a date (e.g. 2014-12-31) (otherwise it would be automatically converted into a Unix timestamp). #[allow(clippy::doc_markdown)] -fn need_quotes(string: &str) -> bool { - fn need_quotes_spaces(string: &str) -> bool { - string.starts_with(' ') || string.ends_with(' ') - } - +fn needs_quotes(string: &str) -> bool { string.is_empty() - || need_quotes_spaces(string) || string.starts_with(|character: char| { matches!( character, - '&' | '*' | '?' | '|' | '-' | '<' | '>' | '=' | '!' | '%' | '@' + ' ' | '&' | '*' | '?' | '|' | '-' | '<' | '>' | '=' | '!' | '%' | '@' ) }) + || string.ends_with(' ') // `starts_with(' ')`tested above || string.contains(|character: char| { matches!(character, ':' | '{' @@ -368,84 +241,3 @@ fn need_quotes(string: &str) -> bool { || string.parse::().is_ok() || string.parse::().is_ok() } - -/// Write the escaped double-quoted string into the given writer. -// from serialize::json -fn escape_str(wr: &mut dyn fmt::Write, v: &str) -> Result<(), fmt::Error> { - wr.write_str("\"")?; - - let mut start = 0; - - for (i, byte) in v.bytes().enumerate() { - let escaped = match byte { - b'"' => "\\\"", - b'\\' => "\\\\", - b'\x00' => "\\u0000", - b'\x01' => "\\u0001", - b'\x02' => "\\u0002", - b'\x03' => "\\u0003", - b'\x04' => "\\u0004", - b'\x05' => "\\u0005", - b'\x06' => "\\u0006", - b'\x07' => "\\u0007", - b'\x08' => "\\b", - b'\t' => "\\t", - b'\n' => "\\n", - b'\x0b' => "\\u000b", - b'\x0c' => "\\f", - b'\r' => "\\r", - b'\x0e' => "\\u000e", - b'\x0f' => "\\u000f", - b'\x10' => "\\u0010", - b'\x11' => "\\u0011", - b'\x12' => "\\u0012", - b'\x13' => "\\u0013", - b'\x14' => "\\u0014", - b'\x15' => "\\u0015", - b'\x16' => "\\u0016", - b'\x17' => "\\u0017", - b'\x18' => "\\u0018", - b'\x19' => "\\u0019", - b'\x1a' => "\\u001a", - b'\x1b' => "\\u001b", - b'\x1c' => "\\u001c", - b'\x1d' => "\\u001d", - b'\x1e' => "\\u001e", - b'\x1f' => "\\u001f", - b'\x7f' => "\\u007f", - _ => continue, - }; - - if start < i { - wr.write_str(&v[start..i])?; - } - - wr.write_str(escaped)?; - - start = i + 1; - } - - if start != v.len() { - wr.write_str(&v[start..])?; - } - - wr.write_str("\"")?; - Ok(()) -} - -#[cfg(test)] -mod test { - use crate::Yaml; - - use super::YamlEmitter; - - #[test] - fn test_multiline_string() { - let input = r#"{foo: "bar!\nbar!", baz: 42}"#; - let parsed = Yaml::load_from_str(input).unwrap(); - let mut output = String::new(); - let mut emitter = YamlEmitter::new(&mut output); - emitter.multiline_strings(true); - emitter.dump(&parsed[0]).unwrap(); - } -} diff --git a/saphyr/src/emitter/event.rs b/saphyr/src/emitter/event.rs new file mode 100644 index 0000000..a0baf48 --- /dev/null +++ b/saphyr/src/emitter/event.rs @@ -0,0 +1,700 @@ +//! Home to the [`EventYamlEmitter`] and its associated types. + +use std::fmt; + +use saphyr_parser::TScalarStyle; + +use crate::emitter::{EmitError, EmitResult}; + +/// A lower-level YAML serializer that is fed events instead of a fully constructed object. +/// +/// This serializer is a building block for [`YamlEmitter`]. It takes [`EmitterEvent`]s and builds +/// the output on the go. If the destination is not an in-memory buffer, then this emitter is a +/// more lightweight alternative (in terms of memory footprint) as it does not need to work with a +/// [`Yaml`] instance. +/// +/// Events are expected to be coherent. The emitter won't panic, but may behave unexpectedely +/// namely if: +/// - Documents aren't started properly ([`DocumentStart`]) +/// - There is an imbalance in collection starting and ending events +/// +/// # Example +/// ``` +/// use saphyr::{EmitterEvent, EventYamlEmitter, TScalarStyle}; +/// +/// let mut output = String::new(); +/// let mut emitter = EventYamlEmitter::new(&mut output); +/// emitter.on_event(EmitterEvent::DocumentStart(true)); +/// emitter.on_event(EmitterEvent::MappingStart(None)); +/// emitter.on_scalar("a", TScalarStyle::Plain); +/// emitter.on_event(EmitterEvent::SequenceStart(None)); +/// emitter.on_scalar("b", TScalarStyle::Plain); +/// emitter.on_scalar("c", TScalarStyle::Plain); +/// emitter.on_event(EmitterEvent::SequenceEnd); +/// emitter.on_event(EmitterEvent::MappingEnd); +/// emitter.on_event(EmitterEvent::DocumentEnd(false)); +/// assert_eq!(output, r#"--- +/// a: +/// - b +/// - c"#); +/// ``` +/// +/// [`DocumentStart`]: EmitterEvent::DocumentStart +/// [`YamlEmitter`]: crate::emitter::YamlEmitter +/// [`Yaml`]: crate::Yaml +#[allow(clippy::module_name_repetitions)] +pub struct EventYamlEmitter<'a> { + /// The output stream in which we output YAML. + writer: &'a mut dyn fmt::Write, + /// Whether compact in-line notation is on or off. + /// + /// See [`Self::compact`]. + compact: bool, + /// Whether we render multiline strings in literal style. + /// + /// See [`Self::multiline_strings`]. + multiline_strings: bool, + /// How many spaces are added to a nested indentation level. + indent_step: u32, + /// The nesting of non-flow collections we are in. + /// + /// We can derive the indentation level from the number of elements that this vec holds. + collections: Vec, + /// The current state of the emitter. + state: EmitterState, +} + +impl<'a> EventYamlEmitter<'a> { + /// Create a new emitter serializing into `writer`. + pub fn new(writer: &'a mut dyn fmt::Write) -> Self { + Self { + writer, + compact: true, + multiline_strings: false, + indent_step: 2, + collections: vec![], + state: EmitterState::Init, + } + } + + /// Set 'compact in-line notation' on or off, as described for block + /// [sequences](http://www.yaml.org/spec/1.2/spec.html#id2797382) + /// and + /// [mappings](http://www.yaml.org/spec/1.2/spec.html#id2798057). + /// + /// In this form, blocks cannot have any properties (such as anchors + /// or tags), which should be OK, because this emitter doesn't + /// (currently) emit those anyways. + /// + /// TODO(ethiraric, 2024/04/02): We can support those now. + pub fn compact(&mut self, compact: bool) { + self.compact = compact; + } + + /// Determine if this emitter is using 'compact in-line notation'. + /// + /// See [`Self::compact`]. + #[must_use] + pub fn is_compact(&self) -> bool { + self.compact + } + + /// Render strings containing multiple lines in [literal style]. + /// + /// # Examples + /// + /// ```rust + /// # use saphyr::{Yaml, YamlEmitter}; + /// # + /// let input = r#"{foo: "bar\nbar", baz: 42}"#; + /// let parsed = Yaml::load_from_str(input).unwrap(); + /// + /// let mut output = String::new(); + /// let mut emitter = YamlEmitter::new(&mut output); + /// emitter.multiline_strings(true); + /// emitter.dump(&parsed[0]).unwrap(); + /// assert_eq!(output.as_str(), "\ + /// --- + /// foo: |- + /// bar + /// bar + /// baz: 42"); + /// ``` + /// + /// [literal style]: https://yaml.org/spec/1.2/spec.html#id2795688 + pub fn multiline_strings(&mut self, multiline_strings: bool) { + self.multiline_strings = multiline_strings; + } + + /// Determine if this emitter will emit multiline strings when appropriate. + /// + /// See [`Self::multiline_strings`]. + #[must_use] + pub fn is_multiline_strings(&self) -> bool { + self.multiline_strings + } + + /// Set how many spaces are added to a nested indentation level. + pub fn indent_step(&mut self, indent_step: u32) { + self.indent_step = indent_step; + } + + /// Get how many spaces are added to a nested indentation level. + #[must_use] + pub fn get_indent_step(&self) -> u32 { + self.indent_step + } + + /// A convenience function for [`on_event`] with a [`Scalar`] event. + /// + /// # Errors + /// Returns an error if outputting to the writer fails. + /// + /// [`on_event`]: Self::on_event + /// [`Scalar`]: EmitterEvent::Scalar + pub fn on_scalar(&mut self, value: &str, style: TScalarStyle) -> EmitResult { + self.on_scalar_impl(&Scalar { + tag: None, + value, + style, + }) + } + + /// Feed a new event into the emitter. + /// + /// # Errors + /// Returns an error if the given event is incoherent with the preceding sequence of events or + /// if writing to the output writer failed. + pub fn on_event(&mut self, event: EmitterEvent) -> EmitResult { + match event { + EmitterEvent::StreamStart | EmitterEvent::StreamEnd => {} + EmitterEvent::DocumentStart(explicit) => self.on_document_start(explicit)?, + EmitterEvent::DocumentEnd(explicit) => self.on_document_end(explicit)?, + EmitterEvent::Scalar(scalar) => self.on_scalar_impl(&scalar)?, + EmitterEvent::SequenceStart(tag) => { + self.on_collection_start(CollectionKind::Sequence(SequenceState::Empty), &tag)?; + } + EmitterEvent::MappingStart(tag) => { + self.on_collection_start(CollectionKind::Mapping(MappingState::Empty), &tag)?; + } + EmitterEvent::SequenceEnd => { + // The value to `Sequence` here does not matter. We won't match against it. + self.on_collection_end(CollectionKind::Sequence(SequenceState::Empty))?; + } + EmitterEvent::MappingEnd => { + // The value to `Mapping` here does not matter. We won't match against it. + self.on_collection_end(CollectionKind::Mapping(MappingState::ExpectsKey))?; + } + } + Ok(()) + } + + /// Check the state allows starting a document and emit `---` if asked. + /// + /// # Errors + /// Returns an error if outputting to the writer fails. + pub fn on_document_start(&mut self, explicit: bool) -> EmitResult { + // If the document was implicily ended, we still need to emit a document start. + if explicit || self.state == EmitterState::DocumentEnded(Implicit) { + writeln!(self.writer, "---")?; + } + self.state = EmitterState::DocumentStarted; + Ok(()) + } + + /// Check the state allows ending a document and emit `...` if asked. + /// + /// # Errors + /// Returns an error if outputting to the writer fails. + pub fn on_document_end(&mut self, explicit: bool) -> EmitResult { + if explicit { + write!(self.writer, "...")?; + } + self.state = EmitterState::DocumentEnded(if explicit { Explicit } else { Implicit }); + Ok(()) + } + + /// Start a new collection. + fn on_collection_start(&mut self, kind: CollectionKind, _tag: &Option) -> EmitResult { + // Emit newline and indent only if needed. We don't emit it: + // - If we just started the document; this would make every emitted string with a root + // collection start with a newline. + // - If our collection is a value in a mapping. Otherwise, our collections would look like: + // a + // : + // - b + if !matches!( + self.state, + EmitterState::MappingExpectingValue | EmitterState::DocumentStarted + ) { + self.emit_lnindent()?; + } + + match self.state { + EmitterState::InSequence => { + // Do not emit a space if we are not in compact mode. Otherwise, there would be a + // trailing space ($ marks eol): + // a:$ + // - $ + // foo: bar$ + if self.compact { + write!(self.writer, "- ")?; + } else { + write!(self.writer, "-")?; + } + } + EmitterState::MappingExpectingKey => { + write!(self.writer, "? ")?; + } + EmitterState::MappingExpectingValue => { + write!(self.writer, ":")?; + } + _ => {} + }; + + self.collections.push(kind); + self.state = match kind { + CollectionKind::Mapping(_) => EmitterState::MappingExpectingKey, + CollectionKind::Sequence(_) => EmitterState::InSequence, + }; + Ok(()) + } + + /// Check the collection end matches an associated collection start. + /// + /// # Errors + /// This function returns an error if there is a mismatch or imbalance in the collection start + /// and the collection end. + fn on_collection_end(&mut self, ev: CollectionKind) -> EmitResult { + use CollectionKind as Kind; // Shorthand to avoid awkward newlines in matches. + + if let Some(kind) = self.collections.pop() { + match (kind, ev) { + (Kind::Mapping(_), Kind::Sequence(_)) | (Kind::Sequence(_), Kind::Mapping(_)) => { + // We have either started a sequence and closed a mapping, or opened a mapping and + // closed a sequence. + return Err(EmitError::EventError("mismatch in collection start/end")); + } + (Kind::Mapping(MappingState::ExpectsValue), _) => { + return Err(EmitError::EventError( + "last mapping pair is missing its value", + )) + } + (Kind::Sequence(SequenceState::Empty), Kind::Sequence(_)) => { + // If the sequence is empty, we still need to emit it. + if self.at_mapping_value() { + // This prints the following space: + // v + // a: [] + write!(self.writer, " []")?; + } else { + write!(self.writer, "[]")?; + } + } + (Kind::Mapping(MappingState::Empty), Kind::Mapping(_)) => { + // If the mapping is empty, we still need to emit it. + if self.at_mapping_value() { + // This prints the following space: + // v + // a: {} + write!(self.writer, " {{}}")?; + } else { + write!(self.writer, "{{}}")?; + } + } + (Kind::Sequence(_), Kind::Sequence(_)) + | (Kind::Mapping(MappingState::ExpectsKey), Kind::Mapping(_)) => {} + } + self.advance_state_with_new_item(); + + // If we are now expecting a mapping value, this means that our collection was a + // complex mapping key. This newline corresponds to that at the `#` below: + // ? - foo + // - bar# + // : baz + if self.state == EmitterState::MappingExpectingValue { + self.emit_lnindent()?; + } + + Ok(()) + } else { + // Can't end a collection if we haven't started any. + Err(EmitError::EventError( + "collection end with no matching collection start", + )) + } + } + + /// Display the given scalar. + /// + /// # Errors + /// Returns an error if outputting to the writer fails. + fn on_scalar_impl(&mut self, scalar: &Scalar) -> EmitResult { + // Don't emit the newline if we are ... + if !( + // At the beginning of the document or just after a `:` in a mapping. + matches!( + self.state, + EmitterState::MappingExpectingValue | EmitterState::DocumentStarted + ) + // Or at the first value in a sequence. + || self.at_sequence_start() + // Or at the first value of a mapping in the root document. + || (self.at_mapping_start() && self.collections.len() == 1) + // Or in compact mode where we could omit a newline (see + // `at_mapping_start_in_sequence`). + || (self.compact && self.at_mapping_start_in_sequence()) + ) { + self.emit_lnindent()?; + } + + // Write preceding tokens for collections. + match self.state { + EmitterState::InSequence => { + if self.at_sequence_start() && self.in_sequence_a_mapping_value() { + // This is the newline that is inserted where the hash is in the example below: + // + // a:# + // - b + self.emit_lnindent()?; + } + write!(self.writer, "- ")?; + } + EmitterState::MappingExpectingValue => { + write!(self.writer, ": ")?; + } + _ => {} + } + + match scalar.style { + TScalarStyle::Plain => write!(self.writer, "{}", scalar.value)?, + TScalarStyle::SingleQuoted => todo!(), // TODO(ethiraric, 24/11/2024) + TScalarStyle::DoubleQuoted => emit_double_quoted_string(self.writer, scalar.value)?, + TScalarStyle::Literal | TScalarStyle::Folded => self.emit_literal_block(scalar)?, + } + + self.advance_state_with_new_item(); + Ok(()) + } + + /// Update the internal state when we have fully constructed a item. + /// + /// This must be called when we receive a scalar (which is an item) and when we receive a + /// collection end event (the collection is an item, which can be a key, a value or an item in + /// a sequence). In the latter case, it must be called _after_ the ending collection has been + /// removed from `self.indent`. + fn advance_state_with_new_item(&mut self) { + if let Some(last_indent) = self.collections.last_mut() { + // If we are in a collection, update its state. + match last_indent { + // If we had a value in a mapping, expect a key, and vice-versa. + CollectionKind::Mapping(MappingState::ExpectsValue) => { + *last_indent = CollectionKind::Mapping(MappingState::ExpectsKey); + self.state = EmitterState::MappingExpectingKey; + } + CollectionKind::Mapping(MappingState::ExpectsKey | MappingState::Empty) => { + *last_indent = CollectionKind::Mapping(MappingState::ExpectsValue); + self.state = EmitterState::MappingExpectingValue; + } + // If we had a sequence, then it no longer is empty. + CollectionKind::Sequence(_) => { + *last_indent = CollectionKind::Sequence(SequenceState::NonEmpty); + // If we were in a mapping inside a sequence, `self.state` would be + // `MappingExpectingKey`. We need to reset it to a + self.state = EmitterState::InSequence; + } + } + } else { + // If we no longer have any open collection, this means we have reached the top-level + // scope. Our document is fully emitted. + self.state = EmitterState::DocumentEmitted; + } + } + + /// Emit the given value as a literal block. + /// + /// The emitter must be positioned prior the `|` or `|-`. + fn emit_literal_block(&mut self, scalar: &Scalar) -> EmitResult { + let ends_with_newline = scalar.value.ends_with('\n'); + if ends_with_newline { + self.writer.write_str("|")?; + } else { + self.writer.write_str("|-")?; + } + + // lines() will omit the last line if it is empty. + for line in scalar.value.lines() { + // TODO(ethiraric, 24/11/2024): Handle folded scalars. + self.emit_lnindent()?; + // Indent the block further than its parent node. + write!(self.writer, " ")?; + // It's literal text, so don't escape special chars. + self.writer.write_str(line)?; + } + Ok(()) + } + + /// Emit a new line and indentation for it. + fn emit_lnindent(&mut self) -> EmitResult { + writeln!(self.writer)?; + self.emit_indent() + } + + /// Emit an amount of spaces equal to the current indentation. + fn emit_indent(&mut self) -> EmitResult { + for _ in 0..self.collections.len().saturating_sub(1) { + write!(self.writer, " ",)?; + } + Ok(()) + } + + /// Return true if we are outputting a sequence as a value in a mapping. + /// + /// Checks that the inner-most collection is a sequence whose immediate parent is a mapping. + /// Also check that this sequence is a value in the parent mapping (i.e.: not a complex key). + fn in_sequence_a_mapping_value(&self) -> bool { + let len = self.collections.len(); + len >= 2 + && matches!(self.collections[len - 1], CollectionKind::Sequence(_)) + && matches!( + self.collections[len - 2], + CollectionKind::Mapping(MappingState::ExpectsValue) + ) + } + + /// Return true if the inner-most collection is a mapping expecting a value. + fn at_mapping_value(&self) -> bool { + matches!( + self.collections.last(), + Some(CollectionKind::Mapping(MappingState::ExpectsValue)) + ) + } + + /// Return true if the inner-most collection is a yet-empty sequence. + fn at_sequence_start(&self) -> bool { + matches!( + self.collections.last(), + Some(CollectionKind::Sequence(SequenceState::Empty)) + ) + } + + /// Return true if the inner-most collection is a yet-empty mapping. + fn at_mapping_start(&self) -> bool { + matches!( + self.collections.last(), + Some(CollectionKind::Mapping(MappingState::Empty)) + ) + } + + /// Return true if we are at the first key in a mapping whose immediate parent is a sequence. + /// + /// Checks that the inner-most collection is a yet-empty mapping whose immediate parent is a + /// sequence. + /// + /// Example: + /// ```yaml + /// - a: b + /// ``` + /// Prior to emitting `a`, this function would return true. + fn at_mapping_start_in_sequence(&self) -> bool { + let len = self.collections.len(); + len >= 2 + && self.at_mapping_start() + && matches!( + self.collections.get(self.collections.len() - 2), + Some(CollectionKind::Sequence(_)) + ) + } +} + +/// The state of the emitter. +#[derive(PartialEq, Eq, Copy, Clone)] +enum EmitterState { + /// We have just built an emitter. + Init, + /// We have started a new document (explicitly or implicitly) and are waiting for its contents. + DocumentStarted, + /// We have ended a document (explicitly or implicitly). + DocumentEnded(Explicity), + /// We have finished emitting the document, but have not yet received a [`DocumentEnd`]. + /// + /// A YAML document is always a single item, whether it be a mapping, a sequence or a scalar. + /// When we reach the end of that item, we enter the [`DocumentEmitted`] state. + /// + /// [`DocumentEnd`]: EmitterEvent::DocumentEnd + /// [`DocumentEmitted`]: EmitterState::DocumentEmitted + DocumentEmitted, + /// Our inner-most collection is a sequence. + InSequence, + /// Our inner-most collection is a mapping. It expects a key (or mapping end) next. + MappingExpectingKey, + /// Our inner-most collection is a mapping. It expects a value next. + MappingExpectingValue, +} + +/// The kind of collection we opened in the emitter. +/// +/// This serves for tracking whether the events we receive are correct. +#[derive(PartialEq, Eq, Copy, Clone, Debug)] +enum CollectionKind { + /// We opened a mapping. + Mapping(MappingState), + /// We opened a sequence. + Sequence(SequenceState), +} + +/// The state of an opened mapping in the emitter. +#[derive(PartialEq, Eq, Copy, Clone, Debug)] +enum MappingState { + /// The mapping has not yet gotten a key-value pair. + /// + /// In this state, the mapping expects a key. It is different from [`ExpectsKey`] in that it is + /// used to know when to emit empty mappings (`{}`). + /// + /// [`ExpectsKey`]: MappingState::ExpectsKey + Empty, + /// The mapping was just opened or has successfully received pairs. + /// + /// If the next event is a scalar, it will be a key. + ExpectsKey, + /// The mapping has received a key but not its associated value yet. + /// + /// If the next event is a scalar, it will be a value. + ExpectsValue, +} + +/// The state of an opened sequence in the emitter. +/// +/// We need to track this in case we need to emit an empty sequence. If we don't emit it, we would +/// read it back as a null value. +#[derive(PartialEq, Eq, Copy, Clone, Debug)] +enum SequenceState { + /// The sequence is empty. + Empty, + /// At least one item has been added to the sequence. + NonEmpty, +} + +/// Fancy boolean value for whether something is implicit or explicit. +#[derive(PartialEq, Eq, Copy, Clone)] +enum Explicity { + /// Explicit. + Explicit, + /// Implicit. + Implicit, +} +use Explicity::{Explicit, Implicit}; + +/// YAML events to send the emitter. +#[allow(clippy::module_name_repetitions)] +#[derive(Debug)] +pub enum EmitterEvent<'a> { + /// The stream started. This must be the first event sent. + StreamStart, + /// The stream has ended. The emitter performs final routines. + StreamEnd, + /// A document has started. + DocumentStart( + /// Whether the document is explicitly or implicitly started. + bool, + ), + /// The current document has ended. + DocumentEnd( + /// Whether the document is explicitly or implicitly ended. + bool, + ), + /// Emit a scalar. + Scalar(Scalar<'a>), + /// Start a sequence. + SequenceStart( + /// An optional YAML tag to the sequence. + Option, + ), + /// End a sequence. + SequenceEnd, + /// Start a mapping. + MappingStart( + /// An optional YAML tag to the mapping. + Option, + ), + /// End a mapping. + MappingEnd, +} + +/// A scalar to emit. +// TODO(ethiraric, 2024/11/11): Use it in `saphyr-parser` to replace `Boolean`, `Real`, ... +#[derive(Debug)] +pub struct Scalar<'a> { + /// An optional YAML tag to the scalar. + pub tag: Option, + /// The literal value of the scalar. + /// + /// If the scalar is not a string (number, boolean, ...) it must be strigified. + pub value: &'a str, + /// The style in which to emit the scalar. + pub style: TScalarStyle, +} + +/// Write the escaped double-quoted string into the given writer. +// from serialize::json +fn emit_double_quoted_string(wr: &mut dyn fmt::Write, v: &str) -> Result<(), fmt::Error> { + wr.write_str("\"")?; + + let mut start = 0; + + for (i, byte) in v.bytes().enumerate() { + let escaped = match byte { + b'"' => "\\\"", + b'\\' => "\\\\", + b'\x00' => "\\u0000", + b'\x01' => "\\u0001", + b'\x02' => "\\u0002", + b'\x03' => "\\u0003", + b'\x04' => "\\u0004", + b'\x05' => "\\u0005", + b'\x06' => "\\u0006", + b'\x07' => "\\u0007", + b'\x08' => "\\b", + b'\t' => "\\t", + b'\n' => "\\n", + b'\x0b' => "\\u000b", + b'\x0c' => "\\f", + b'\r' => "\\r", + b'\x0e' => "\\u000e", + b'\x0f' => "\\u000f", + b'\x10' => "\\u0010", + b'\x11' => "\\u0011", + b'\x12' => "\\u0012", + b'\x13' => "\\u0013", + b'\x14' => "\\u0014", + b'\x15' => "\\u0015", + b'\x16' => "\\u0016", + b'\x17' => "\\u0017", + b'\x18' => "\\u0018", + b'\x19' => "\\u0019", + b'\x1a' => "\\u001a", + b'\x1b' => "\\u001b", + b'\x1c' => "\\u001c", + b'\x1d' => "\\u001d", + b'\x1e' => "\\u001e", + b'\x1f' => "\\u001f", + b'\x7f' => "\\u007f", + _ => continue, + }; + + if start < i { + wr.write_str(&v[start..i])?; + } + + wr.write_str(escaped)?; + + start = i + 1; + } + + if start != v.len() { + wr.write_str(&v[start..])?; + } + + wr.write_str("\"")?; + Ok(()) +} diff --git a/saphyr/src/lib.rs b/saphyr/src/lib.rs index 5309312..b3574a9 100644 --- a/saphyr/src/lib.rs +++ b/saphyr/src/lib.rs @@ -55,7 +55,10 @@ mod yaml; pub use crate::annotated::{ marked_yaml::MarkedYaml, AnnotatedArray, AnnotatedHash, AnnotatedYamlIter, YamlData, }; -pub use crate::emitter::YamlEmitter; +pub use crate::emitter::{ + event::{EmitterEvent, EventYamlEmitter}, + YamlEmitter, +}; pub use crate::loader::{LoadableYamlNode, YamlLoader}; pub use crate::yaml::{Array, Hash, Yaml, YamlIter}; @@ -69,3 +72,5 @@ pub use crate::encoding::{YAMLDecodingTrap, YAMLDecodingTrapFn, YamlDecoder}; pub use saphyr_parser::ScanError; // Re-export [`Marker`] which is used for annotated YAMLs. pub use saphyr_parser::Marker; +// Re-export [`TScalarStyle`] which is used for the event emitter. +pub use saphyr_parser::TScalarStyle; diff --git a/saphyr/tests/emitter.rs b/saphyr/tests/emitter.rs index fdf1acb..6d2c769 100644 --- a/saphyr/tests/emitter.rs +++ b/saphyr/tests/emitter.rs @@ -1,5 +1,57 @@ use saphyr::{Yaml, YamlEmitter}; +/// Test in sequence the parser, emitter and parser with the given input. +/// +/// 1. Pass the input through the loader and build a YAML object from it. +/// 2. Pass the newly created YAML object through the emitter. +/// 3. Pass the emitted string through the loader and build another YAML object from it. +/// 4. Assert that the YAML objects from 1. and 3. are the same. +/// 5. Return the string from 3. so the caller can ensure its formatting. +/// +/// The assertion done in this function is purely on the contents of the YAML objects and not on +/// its presentation. +/// +/// This function additionally prints to stdout the input string and the resulting string from 2.. +/// +/// The configuration function `config` allows the caller to potentially change some settings in +/// the emitter prior to emitting. +fn raw_roundtrip(input: &str, config: Config) -> String { + let original_docs = Yaml::load_from_str(input).unwrap(); + let original_doc = &original_docs[0]; + let mut emitted_string = String::new(); + { + let mut emitter = YamlEmitter::new(&mut emitted_string); + config(&mut emitter); + emitter.dump(original_doc).unwrap(); + } + println!("original:\n{input}"); + println!("emitted:\n{emitted_string}"); + + let emitted_docs = Yaml::load_from_str(&emitted_string).unwrap(); + assert_eq!(original_docs, emitted_docs); + + emitted_string +} + +/// [`raw_roundtrip`] with default configuration +fn roundtrip(input: &str) -> String { + raw_roundtrip(input, |_| {}) +} + +/// Like [`roundtrip`] but with the [compact flag] disabled. +/// +/// [compact flag]: `YamlEmitter::compact` +fn roundtrip_not_compact(input: &str) -> String { + raw_roundtrip(input, |emitter| emitter.compact(false)) +} + +/// Like [`roundtrip`] but with the [multiline strings flag] enabled. +/// +/// [multiline strings flag]: `YamlEmitter::multiline_strings` +fn roundtrip_multiline(input: &str) -> String { + raw_roundtrip(input, |emitter| emitter.multiline_strings(true)) +} + #[allow(clippy::similar_names)] #[test] fn test_emit_simple() { @@ -16,22 +68,7 @@ a4: - 2 "; - let docs = Yaml::load_from_str(s).unwrap(); - let doc = &docs[0]; - let mut writer = String::new(); - { - let mut emitter = YamlEmitter::new(&mut writer); - emitter.dump(doc).unwrap(); - } - println!("original:\n{s}"); - println!("emitted:\n{writer}"); - let docs_new = match Yaml::load_from_str(&writer) { - Ok(y) => y, - Err(e) => panic!("{}", e), - }; - let doc_new = &docs_new[0]; - - assert_eq!(doc, doc_new); + roundtrip(s); } #[test] @@ -55,19 +92,8 @@ products: {}: empty hash key "; - let docs = Yaml::load_from_str(s).unwrap(); - let doc = &docs[0]; - let mut writer = String::new(); - { - let mut emitter = YamlEmitter::new(&mut writer); - emitter.dump(doc).unwrap(); - } - let docs_new = match Yaml::load_from_str(&writer) { - Ok(y) => y, - Err(e) => panic!("{}", e), - }; - let new_doc = &docs_new[0]; - assert_eq!(doc, new_doc); + + roundtrip(s); } #[test] @@ -106,15 +132,7 @@ x: test y: avoid quoting here z: string with spaces"#; - let docs = Yaml::load_from_str(s).unwrap(); - let doc = &docs[0]; - let mut writer = String::new(); - { - let mut emitter = YamlEmitter::new(&mut writer); - emitter.dump(doc).unwrap(); - } - - assert_eq!(s, writer, "actual:\n\n{writer}\n"); + assert_eq!(roundtrip(s), s); } #[test] @@ -164,43 +182,12 @@ null0: ~ bool0: true bool1: false"#; - let docs = Yaml::load_from_str(input).unwrap(); - let doc = &docs[0]; - let mut writer = String::new(); - { - let mut emitter = YamlEmitter::new(&mut writer); - emitter.dump(doc).unwrap(); - } - - assert_eq!( - expected, writer, - "expected:\n{expected}\nactual:\n{writer}\n", - ); + assert_eq!(roundtrip(input), expected); } #[test] -fn test_empty_and_nested() { - test_empty_and_nested_flag(false); -} - -#[test] -fn test_empty_and_nested_compact() { - test_empty_and_nested_flag(true); -} - -fn test_empty_and_nested_flag(compact: bool) { - let s = if compact { - r"--- -a: - b: - c: hello - d: {} -e: - - f - - g - - h: []" - } else { - r"--- +fn test_empty_and_nested_not_compact() { + let s = r"--- a: b: c: hello @@ -209,19 +196,31 @@ e: - f - g - - h: []" - }; + h: []"; + assert_eq!(roundtrip_not_compact(s), s); +} - let docs = Yaml::load_from_str(s).unwrap(); - let doc = &docs[0]; - let mut writer = String::new(); - { - let mut emitter = YamlEmitter::new(&mut writer); - emitter.compact(compact); - emitter.dump(doc).unwrap(); - } +#[test] +fn test_empty_and_nested_compact() { + let s = r"--- +a: + b: + c: hello + d: {} +e: + - f + - g + - h: []"; + assert_eq!(roundtrip(s), s); +} - assert_eq!(s, writer); +#[test] +fn test_interleaved_mappings_and_sequences() { + let input = r"--- +a: + - b: + - c: d"; + assert_eq!(roundtrip(input), input); } #[test] @@ -233,18 +232,7 @@ a: - d - - e - f"; - - let docs = Yaml::load_from_str(s).unwrap(); - let doc = &docs[0]; - let mut writer = String::new(); - { - let mut emitter = YamlEmitter::new(&mut writer); - emitter.dump(doc).unwrap(); - } - println!("original:\n{s}"); - println!("emitted:\n{writer}"); - - assert_eq!(s, writer); + assert_eq!(roundtrip(s), s); } #[test] @@ -257,18 +245,7 @@ a: - - e - - f - - e"; - - let docs = Yaml::load_from_str(s).unwrap(); - let doc = &docs[0]; - let mut writer = String::new(); - { - let mut emitter = YamlEmitter::new(&mut writer); - emitter.dump(doc).unwrap(); - } - println!("original:\n{s}"); - println!("emitted:\n{writer}"); - - assert_eq!(s, writer); + assert_eq!(roundtrip(s), s); } #[test] @@ -279,16 +256,44 @@ a: c: d: e: f"; - - let docs = Yaml::load_from_str(s).unwrap(); - let doc = &docs[0]; - let mut writer = String::new(); - { - let mut emitter = YamlEmitter::new(&mut writer); - emitter.dump(doc).unwrap(); - } - println!("original:\n{s}"); - println!("emitted:\n{writer}"); - - assert_eq!(s, writer); + assert_eq!(roundtrip(s), s); +} + +#[test] +fn test_empty_sequence() { + let s = r"--- +[]"; + assert_eq!(roundtrip(s), s); +} + +#[test] +fn test_empty_mapping() { + let s = r"--- +{}"; + assert_eq!(roundtrip(s), s); +} + +#[test] +fn test_root_sequence() { + let s = r"--- +- a"; + assert_eq!(roundtrip(s), s); +} + +#[test] +fn test_root_mapping() { + let s = r"--- +a: b"; + assert_eq!(roundtrip(s), s); +} + +#[test] +fn test_multiline_string() { + let input = r#"{foo: "bar!\nbar!", baz: 42}"#; + let expected = r"--- +foo: |- + bar! + bar! +baz: 42"; + assert_eq!(roundtrip_multiline(input), expected); }