//! YAML serialization helpers. use crate::char_traits; use crate::yaml::{Hash, Yaml}; use std::convert::From; use std::error::Error; use std::fmt::{self, Display}; /// An error when emitting YAML. #[derive(Copy, Clone, Debug)] pub enum EmitError { /// A formatting error. FmtError(fmt::Error), } impl Error for EmitError { fn cause(&self) -> Option<&dyn Error> { None } } impl Display for EmitError { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { match *self { EmitError::FmtError(ref err) => Display::fmt(err, formatter), } } } impl From for EmitError { fn from(f: fmt::Error) -> Self { EmitError::FmtError(f) } } /// The YAML serializer. /// /// ``` /// # use yaml_rust2::{YamlLoader, YamlEmitter}; /// let input_string = "a: b\nc: d"; /// let yaml = YamlLoader::load_from_str(input_string).unwrap(); /// /// let mut output = String::new(); /// YamlEmitter::new(&mut output).dump(&yaml[0]).unwrap(); /// /// assert_eq!(output, r#"--- /// a: b /// c: d"#); /// ``` #[allow(clippy::module_name_repetitions)] pub struct YamlEmitter<'a> { writer: &'a mut dyn fmt::Write, best_indent: usize, compact: bool, level: isize, multiline_strings: bool, } /// A convenience alias for emitter functions that may fail without returning a value. pub type EmitResult = Result<(), EmitError>; // from serialize::json fn escape_str(wr: &mut dyn fmt::Write, v: &str) -> Result<(), fmt::Error> { wr.write_str("\"")?; let mut start = 0; for (i, byte) in v.bytes().enumerate() { let escaped = match byte { b'"' => "\\\"", b'\\' => "\\\\", b'\x00' => "\\u0000", b'\x01' => "\\u0001", b'\x02' => "\\u0002", b'\x03' => "\\u0003", b'\x04' => "\\u0004", b'\x05' => "\\u0005", b'\x06' => "\\u0006", b'\x07' => "\\u0007", b'\x08' => "\\b", b'\t' => "\\t", b'\n' => "\\n", b'\x0b' => "\\u000b", b'\x0c' => "\\f", b'\r' => "\\r", b'\x0e' => "\\u000e", b'\x0f' => "\\u000f", b'\x10' => "\\u0010", b'\x11' => "\\u0011", b'\x12' => "\\u0012", b'\x13' => "\\u0013", b'\x14' => "\\u0014", b'\x15' => "\\u0015", b'\x16' => "\\u0016", b'\x17' => "\\u0017", b'\x18' => "\\u0018", b'\x19' => "\\u0019", b'\x1a' => "\\u001a", b'\x1b' => "\\u001b", b'\x1c' => "\\u001c", b'\x1d' => "\\u001d", b'\x1e' => "\\u001e", b'\x1f' => "\\u001f", b'\x7f' => "\\u007f", _ => continue, }; if start < i { wr.write_str(&v[start..i])?; } wr.write_str(escaped)?; start = i + 1; } if start != v.len() { wr.write_str(&v[start..])?; } wr.write_str("\"")?; Ok(()) } impl<'a> YamlEmitter<'a> { /// Create a nwe emitter serializing into `writer`. pub fn new(writer: &'a mut dyn fmt::Write) -> YamlEmitter { YamlEmitter { writer, best_indent: 2, compact: true, level: -1, multiline_strings: false, } } /// Set 'compact inline notation' on or off, as described for block /// [sequences](http://www.yaml.org/spec/1.2/spec.html#id2797382) /// and /// [mappings](http://www.yaml.org/spec/1.2/spec.html#id2798057). /// /// In this form, blocks cannot have any properties (such as anchors /// or tags), which should be OK, because this emitter doesn't /// (currently) emit those anyways. pub fn compact(&mut self, compact: bool) { self.compact = compact; } /// Determine if this emitter is using 'compact inline notation'. #[must_use] pub fn is_compact(&self) -> bool { self.compact } /// Render strings containing multiple lines in [literal style]. /// /// # Examples /// /// ```rust /// use yaml_rust2::{Yaml, YamlEmitter, YamlLoader}; /// /// let input = r#"{foo: "bar!\nbar!", baz: 42}"#; /// let parsed = YamlLoader::load_from_str(input).unwrap(); /// eprintln!("{:?}", parsed); /// /// let mut output = String::new(); /// let mut emitter = YamlEmitter::new(&mut output); /// emitter.multiline_strings(true); /// emitter.dump(&parsed[0]).unwrap(); /// assert_eq!(output.as_str(), "\ /// --- /// foo: | /// bar! /// bar! /// baz: 42"); /// ``` /// /// [literal style]: https://yaml.org/spec/1.2/spec.html#id2795688 pub fn multiline_strings(&mut self, multiline_strings: bool) { self.multiline_strings = multiline_strings; } /// Determine if this emitter will emit multiline strings when appropriate. #[must_use] pub fn is_multiline_strings(&self) -> bool { self.multiline_strings } /// Dump Yaml to an output stream. /// # Errors /// Returns `EmitError` when an error occurs. pub fn dump(&mut self, doc: &Yaml) -> EmitResult { // write DocumentStart writeln!(self.writer, "---")?; self.level = -1; self.emit_node(doc) } fn write_indent(&mut self) -> EmitResult { if self.level <= 0 { return Ok(()); } for _ in 0..self.level { for _ in 0..self.best_indent { write!(self.writer, " ")?; } } Ok(()) } fn emit_node(&mut self, node: &Yaml) -> EmitResult { match *node { Yaml::Array(ref v) => self.emit_array(v), Yaml::Hash(ref h) => self.emit_hash(h), Yaml::String(ref v) => { if self.multiline_strings && v.contains('\n') && char_traits::is_valid_literal_block_scalar(v) { write!(self.writer, "|")?; self.level += 1; for line in v.lines() { writeln!(self.writer)?; self.write_indent()?; // It's literal text, so don't escape special chars. write!(self.writer, "{line}")?; } self.level -= 1; } else if need_quotes(v) { escape_str(self.writer, v)?; } else { write!(self.writer, "{v}")?; } Ok(()) } Yaml::Boolean(v) => { if v { self.writer.write_str("true")?; } else { self.writer.write_str("false")?; } Ok(()) } Yaml::Integer(v) => { write!(self.writer, "{v}")?; Ok(()) } Yaml::Real(ref v) => { write!(self.writer, "{v}")?; Ok(()) } Yaml::Null | Yaml::BadValue => { write!(self.writer, "~")?; Ok(()) } // XXX(chenyh) Alias Yaml::Alias(_) => Ok(()), } } fn emit_array(&mut self, v: &[Yaml]) -> EmitResult { if v.is_empty() { write!(self.writer, "[]")?; } else { self.level += 1; for (cnt, x) in v.iter().enumerate() { if cnt > 0 { writeln!(self.writer)?; self.write_indent()?; } write!(self.writer, "-")?; self.emit_val(true, x)?; } self.level -= 1; } Ok(()) } fn emit_hash(&mut self, h: &Hash) -> EmitResult { if h.is_empty() { self.writer.write_str("{}")?; } else { self.level += 1; for (cnt, (k, v)) in h.iter().enumerate() { let complex_key = matches!(*k, Yaml::Hash(_) | Yaml::Array(_)); if cnt > 0 { writeln!(self.writer)?; self.write_indent()?; } if complex_key { write!(self.writer, "?")?; self.emit_val(true, k)?; writeln!(self.writer)?; self.write_indent()?; write!(self.writer, ":")?; self.emit_val(true, v)?; } else { self.emit_node(k)?; write!(self.writer, ":")?; self.emit_val(false, v)?; } } self.level -= 1; } Ok(()) } /// Emit a yaml as a hash or array value: i.e., which should appear /// following a ":" or "-", either after a space, or on a new line. /// If `inline` is true, then the preceding characters are distinct /// and short enough to respect the compact flag. fn emit_val(&mut self, inline: bool, val: &Yaml) -> EmitResult { match *val { Yaml::Array(ref v) => { if (inline && self.compact) || v.is_empty() { write!(self.writer, " ")?; } else { writeln!(self.writer)?; self.level += 1; self.write_indent()?; self.level -= 1; } self.emit_array(v) } Yaml::Hash(ref h) => { if (inline && self.compact) || h.is_empty() { write!(self.writer, " ")?; } else { writeln!(self.writer)?; self.level += 1; self.write_indent()?; self.level -= 1; } self.emit_hash(h) } _ => { write!(self.writer, " ")?; self.emit_node(val) } } } } /// Check if the string requires quoting. /// Strings starting with any of the following characters must be quoted. /// :, &, *, ?, |, -, <, >, =, !, %, @ /// Strings containing any of the following characters must be quoted. /// {, }, [, ], ,, #, ` /// /// If the string contains any of the following control characters, it must be escaped with double quotes: /// \0, \x01, \x02, \x03, \x04, \x05, \x06, \a, \b, \t, \n, \v, \f, \r, \x0e, \x0f, \x10, \x11, \x12, \x13, \x14, \x15, \x16, \x17, \x18, \x19, \x1a, \e, \x1c, \x1d, \x1e, \x1f, \N, \_, \L, \P /// /// Finally, there are other cases when the strings must be quoted, no matter if you're using single or double quotes: /// * When the string is true or false (otherwise, it would be treated as a boolean value); /// * When the string is null or ~ (otherwise, it would be considered as a null value); /// * When the string looks like a number, such as integers (e.g. 2, 14, etc.), floats (e.g. 2.6, 14.9) and exponential numbers (e.g. 12e7, etc.) (otherwise, it would be treated as a numeric value); /// * When the string looks like a date (e.g. 2014-12-31) (otherwise it would be automatically converted into a Unix timestamp). #[allow(clippy::doc_markdown)] fn need_quotes(string: &str) -> bool { fn need_quotes_spaces(string: &str) -> bool { string.starts_with(' ') || string.ends_with(' ') } string.is_empty() || need_quotes_spaces(string) || string.starts_with(|character: char| { matches!( character, '&' | '*' | '?' | '|' | '-' | '<' | '>' | '=' | '!' | '%' | '@' ) }) || string.contains(|character: char| { matches!(character, ':' | '{' | '}' | '[' | ']' | ',' | '#' | '`' | '\"' | '\'' | '\\' | '\0'..='\x06' | '\t' | '\n' | '\r' | '\x0e'..='\x1a' | '\x1c'..='\x1f') }) || [ // http://yaml.org/type/bool.html // Note: 'y', 'Y', 'n', 'N', is not quoted deliberately, as in libyaml. PyYAML also parse // them as string, not booleans, although it is violating the YAML 1.1 specification. // See https://github.com/dtolnay/serde-yaml/pull/83#discussion_r152628088. "yes", "Yes", "YES", "no", "No", "NO", "True", "TRUE", "true", "False", "FALSE", "false", "on", "On", "ON", "off", "Off", "OFF", // http://yaml.org/type/null.html "null", "Null", "NULL", "~", ] .contains(&string) || string.starts_with('.') || string.starts_with("0x") || string.parse::().is_ok() || string.parse::().is_ok() } #[cfg(test)] mod test { use super::YamlEmitter; use crate::YamlLoader; #[test] fn test_multiline_string() { let input = r#"{foo: "bar!\nbar!", baz: 42}"#; let parsed = YamlLoader::load_from_str(input).unwrap(); let mut output = String::new(); let mut emitter = YamlEmitter::new(&mut output); emitter.multiline_strings(true); emitter.dump(&parsed[0]).unwrap(); } }