saphyr-serde/saphyr/src/emitter.rs

624 lines
16 KiB
Rust
Raw Normal View History

use std::fmt::{self, Display};
2015-05-31 04:56:45 +00:00
use std::convert::From;
use std::error::Error;
2017-05-08 18:35:53 +00:00
use yaml::{Hash, Yaml};
2015-05-31 04:56:45 +00:00
2015-05-31 04:56:45 +00:00
#[derive(Copy, Clone, Debug)]
pub enum EmitError {
FmtError(fmt::Error),
BadHashmapKey,
}
impl Error for EmitError {
fn description(&self) -> &str {
match *self {
EmitError::FmtError(ref err) => err.description(),
EmitError::BadHashmapKey => "bad hashmap key",
}
}
fn cause(&self) -> Option<&Error> {
None
}
}
impl Display for EmitError {
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
match *self {
EmitError::FmtError(ref err) => Display::fmt(err, formatter),
EmitError::BadHashmapKey => formatter.write_str("bad hashmap key"),
}
}
}
2015-05-31 04:56:45 +00:00
impl From<fmt::Error> for EmitError {
fn from(f: fmt::Error) -> Self {
EmitError::FmtError(f)
}
}
pub struct YamlEmitter<'a> {
writer: &'a mut fmt::Write,
best_indent: usize,
compact: bool,
2015-05-31 04:56:45 +00:00
level: isize,
}
pub type EmitResult = Result<(), EmitError>;
// from serialize::json
fn escape_str(wr: &mut fmt::Write, v: &str) -> Result<(), fmt::Error> {
try!(wr.write_str("\""));
let mut start = 0;
for (i, byte) in v.bytes().enumerate() {
let escaped = match byte {
b'"' => "\\\"",
b'\\' => "\\\\",
b'\x00' => "\\u0000",
b'\x01' => "\\u0001",
b'\x02' => "\\u0002",
b'\x03' => "\\u0003",
b'\x04' => "\\u0004",
b'\x05' => "\\u0005",
b'\x06' => "\\u0006",
b'\x07' => "\\u0007",
b'\x08' => "\\b",
b'\t' => "\\t",
b'\n' => "\\n",
b'\x0b' => "\\u000b",
b'\x0c' => "\\f",
b'\r' => "\\r",
b'\x0e' => "\\u000e",
b'\x0f' => "\\u000f",
b'\x10' => "\\u0010",
b'\x11' => "\\u0011",
b'\x12' => "\\u0012",
b'\x13' => "\\u0013",
b'\x14' => "\\u0014",
b'\x15' => "\\u0015",
b'\x16' => "\\u0016",
b'\x17' => "\\u0017",
b'\x18' => "\\u0018",
b'\x19' => "\\u0019",
b'\x1a' => "\\u001a",
b'\x1b' => "\\u001b",
b'\x1c' => "\\u001c",
b'\x1d' => "\\u001d",
b'\x1e' => "\\u001e",
b'\x1f' => "\\u001f",
b'\x7f' => "\\u007f",
_ => { continue; }
};
if start < i {
try!(wr.write_str(&v[start..i]));
}
try!(wr.write_str(escaped));
start = i + 1;
}
if start != v.len() {
try!(wr.write_str(&v[start..]));
}
try!(wr.write_str("\""));
Ok(())
}
impl<'a> YamlEmitter<'a> {
pub fn new(writer: &'a mut fmt::Write) -> YamlEmitter {
YamlEmitter {
writer: writer,
best_indent: 2,
compact: true,
2015-05-31 04:56:45 +00:00
level: -1
}
}
/// Set 'compact inline notation' on or off, as described for block
/// [sequences](http://www.yaml.org/spec/1.2/spec.html#id2797382)
/// and
/// [mappings](http://www.yaml.org/spec/1.2/spec.html#id2798057).
///
/// In this form, blocks cannot have any properties (such as anchors
/// or tags), which should be OK, because this emitter doesn't
/// (currently) emit those anyways.
pub fn compact(&mut self, compact: bool) {
self.compact = compact;
}
/// Determine if this emitter is using 'compact inline notation'.
pub fn is_compact(&self) -> bool {
self.compact
}
2015-05-31 04:56:45 +00:00
pub fn dump(&mut self, doc: &Yaml) -> EmitResult {
// write DocumentStart
try!(write!(self.writer, "---\n"));
self.level = -1;
self.emit_node(doc)
}
fn write_indent(&mut self) -> EmitResult {
if self.level <= 0 { return Ok(()); }
for _ in 0..self.level {
for _ in 0..self.best_indent {
try!(write!(self.writer, " "));
}
}
Ok(())
}
fn emit_node(&mut self, node: &Yaml) -> EmitResult {
match *node {
2017-05-02 07:54:46 +00:00
Yaml::Array(ref v) => self.emit_array(v),
Yaml::Hash(ref h) => self.emit_hash(h),
Yaml::String(ref v) => {
if need_quotes(v) {
try!(escape_str(self.writer, v));
}
else {
try!(write!(self.writer, "{}", v));
}
2015-05-31 04:56:45 +00:00
Ok(())
},
Yaml::Boolean(v) => {
2015-05-31 04:56:45 +00:00
if v {
try!(self.writer.write_str("true"));
} else {
try!(self.writer.write_str("false"));
}
Ok(())
},
Yaml::Integer(v) => {
2015-05-31 04:56:45 +00:00
try!(write!(self.writer, "{}", v));
Ok(())
},
Yaml::Real(ref v) => {
2015-05-31 04:56:45 +00:00
try!(write!(self.writer, "{}", v));
Ok(())
},
Yaml::Null | Yaml::BadValue => {
2015-05-31 04:56:45 +00:00
try!(write!(self.writer, "~"));
Ok(())
},
// XXX(chenyh) Alias
_ => { Ok(()) }
}
}
2017-05-02 07:54:46 +00:00
2017-05-08 18:35:53 +00:00
fn emit_array(&mut self, v: &[Yaml]) -> EmitResult {
2017-05-02 07:54:46 +00:00
if v.is_empty() {
try!(write!(self.writer, "[]"));
} else {
self.level += 1;
2017-05-02 07:54:46 +00:00
for (cnt, x) in v.iter().enumerate() {
if cnt > 0 {
try!(write!(self.writer, "\n"));
try!(self.write_indent());
2017-05-02 07:54:46 +00:00
}
try!(write!(self.writer, "-"));
try!(self.emit_val(true, x));
2017-05-02 07:54:46 +00:00
}
self.level -= 1;
2017-05-02 07:54:46 +00:00
}
Ok(())
}
fn emit_hash(&mut self, h: &Hash) -> EmitResult {
if h.is_empty() {
try!(self.writer.write_str("{}"));
} else {
self.level += 1;
for (cnt, (k, v)) in h.iter().enumerate() {
let complex_key = match *k {
Yaml::Hash(_) | Yaml::Array(_) => true,
_ => false,
};
2017-05-02 07:54:46 +00:00
if cnt > 0 {
try!(write!(self.writer, "\n"));
try!(self.write_indent());
}
if complex_key {
try!(write!(self.writer, "?"));
try!(self.emit_val(true, k));
try!(write!(self.writer, "\n"));
try!(self.write_indent());
try!(write!(self.writer, ":"));
try!(self.emit_val(true, v));
} else {
try!(self.emit_node(k));
try!(write!(self.writer, ":"));
try!(self.emit_val(false, v));
2017-05-02 07:54:46 +00:00
}
}
self.level -= 1;
}
Ok(())
}
/// Emit a yaml as a hash or array value: i.e., which should appear
/// following a ":" or "-", either after a space, or on a new line.
/// If `inline` is true, then the preceeding characters are distinct
/// and short enough to respect the compact flag.
fn emit_val(&mut self, inline: bool, val: &Yaml) -> EmitResult {
match *val {
Yaml::Array(ref v) => {
if (inline && self.compact) || v.is_empty() {
try!(write!(self.writer, " "));
} else {
try!(write!(self.writer, "\n"));
self.level += 1;
try!(self.write_indent());
self.level -= 1;
}
self.emit_array(v)
},
Yaml::Hash(ref h) => {
if (inline && self.compact) || h.is_empty() {
try!(write!(self.writer, " "));
} else {
try!(write!(self.writer, "\n"));
self.level += 1;
try!(self.write_indent());
self.level -= 1;
}
self.emit_hash(h)
},
_ => {
try!(write!(self.writer, " "));
self.emit_node(val)
}
}
}
2015-05-31 04:56:45 +00:00
}
/// Check if the string requires quoting.
/// Strings starting with any of the following characters must be quoted.
/// :, &, *, ?, |, -, <, >, =, !, %, @
/// Strings containing any of the following characters must be quoted.
/// {, }, [, ], ,, #, `
///
/// If the string contains any of the following control characters, it must be escaped with double quotes:
/// \0, \x01, \x02, \x03, \x04, \x05, \x06, \a, \b, \t, \n, \v, \f, \r, \x0e, \x0f, \x10, \x11, \x12, \x13, \x14, \x15, \x16, \x17, \x18, \x19, \x1a, \e, \x1c, \x1d, \x1e, \x1f, \N, \_, \L, \P
///
/// Finally, there are other cases when the strings must be quoted, no matter if you're using single or double quotes:
/// * When the string is true or false (otherwise, it would be treated as a boolean value);
/// * When the string is null or ~ (otherwise, it would be considered as a null value);
/// * When the string looks like a number, such as integers (e.g. 2, 14, etc.), floats (e.g. 2.6, 14.9) and exponential numbers (e.g. 12e7, etc.) (otherwise, it would be treated as a numeric value);
/// * When the string looks like a date (e.g. 2014-12-31) (otherwise it would be automatically converted into a Unix timestamp).
fn need_quotes(string: &str) -> bool {
fn need_quotes_spaces(string: &str) -> bool {
string.starts_with(' ')
|| string.ends_with(' ')
}
string == ""
|| need_quotes_spaces(string)
|| string.starts_with(|character: char| {
match character {
':' | '&' | '*' | '?' | '|' | '-' | '<' | '>' | '=' | '!' | '%' | '@' => true,
_ => false,
}
})
|| string.contains(|character: char| {
match character {
'{' | '}' | '[' | ']' | ',' | '#' | '`' | '\"' | '\'' | '\\' | '\0' ... '\x06' | '\t' | '\n' | '\r' | '\x0e' ... '\x1a' | '\x1c' ... '\x1f' => true,
_ => false,
}
2016-11-10 07:31:07 +00:00
})
2017-05-22 17:30:01 +00:00
|| [// http://yaml.org/type/bool.html
2018-01-02 07:55:39 +00:00
// Note: 'y', 'Y', 'n', 'N', is not quoted deliberately, as in libyaml. PyYAML also parse
// them as string, not booleans, although it is volating the YAML 1.1 specification.
// See https://github.com/dtolnay/serde-yaml/pull/83#discussion_r152628088.
"yes","Yes","YES","no","No","NO",
2017-05-22 17:30:01 +00:00
"True", "TRUE", "true", "False", "FALSE", "false",
"on","On","ON","off","Off","OFF",
// http://yaml.org/type/null.html
"null","Null","NULL", "~"
].contains(&string)
|| string.starts_with('.')
2016-11-10 07:31:07 +00:00
|| string.parse::<i64>().is_ok()
|| string.parse::<f64>().is_ok()
}
2015-05-31 04:56:45 +00:00
#[cfg(test)]
mod test {
2015-05-31 04:56:45 +00:00
use super::*;
2017-05-08 17:44:17 +00:00
use YamlLoader;
2015-05-31 04:56:45 +00:00
#[test]
fn test_emit_simple() {
let s = "
# comment
a0 bb: val
a1:
b1: 4
b2: d
a2: 4 # i'm comment
a3: [1, 2, 3]
a4:
- [a1, a2]
2015-05-31 04:56:45 +00:00
- 2
";
2015-05-31 04:56:45 +00:00
let docs = YamlLoader::load_from_str(&s).unwrap();
let doc = &docs[0];
let mut writer = String::new();
{
let mut emitter = YamlEmitter::new(&mut writer);
emitter.dump(doc).unwrap();
}
println!("original:\n{}", s);
println!("emitted:\n{}", writer);
let docs_new = match YamlLoader::load_from_str(&writer) {
Ok(y) => y,
Err(e) => panic!(format!("{}", e))
};
2015-05-31 04:56:45 +00:00
let doc_new = &docs_new[0];
assert_eq!(doc, doc_new);
}
#[test]
fn test_emit_complex() {
let s = r#"
cataloge:
product: &coffee { name: Coffee, price: 2.5 , unit: 1l }
product: &cookies { name: Cookies!, price: 3.40 , unit: 400g}
products:
*coffee:
amount: 4
*cookies:
amount: 4
[1,2,3,4]:
array key
2.4:
real key
true:
bool key
{}:
empty hash key
"#;
let docs = YamlLoader::load_from_str(&s).unwrap();
let doc = &docs[0];
let mut writer = String::new();
{
let mut emitter = YamlEmitter::new(&mut writer);
emitter.dump(doc).unwrap();
}
let docs_new = match YamlLoader::load_from_str(&writer) {
Ok(y) => y,
Err(e) => panic!(format!("{}", e))
};
let doc_new = &docs_new[0];
assert_eq!(doc, doc_new);
}
#[test]
fn test_emit_avoid_quotes() {
let s = r#"---
a7:
boolean: "true"
boolean2: "false"
date: 2014-12-31
2016-11-10 07:31:07 +00:00
empty_string: ""
empty_string1: " "
empty_string2: " a"
empty_string3: " a "
exp: "12e7"
field: ":"
field2: "{"
field3: "\\"
field4: "\n"
2018-01-02 07:55:39 +00:00
field5: "can't avoid quote"
float: "2.6"
int: "4"
nullable: "null"
nullable2: "~"
2017-05-02 07:54:46 +00:00
products:
"*coffee":
amount: 4
2017-05-02 07:54:46 +00:00
"*cookies":
amount: 4
".milk":
amount: 1
"2.4": real key
"[1,2,3,4]": array key
"true": bool key
"{}": empty hash key
x: test
2018-01-02 07:55:39 +00:00
y: avoid quoting here
2017-05-22 17:30:01 +00:00
z: string with spaces"#;
let docs = YamlLoader::load_from_str(&s).unwrap();
let doc = &docs[0];
let mut writer = String::new();
{
let mut emitter = YamlEmitter::new(&mut writer);
emitter.dump(doc).unwrap();
}
2017-05-02 07:54:46 +00:00
assert_eq!(s, writer, "actual:\n\n{}\n", writer);
}
2017-05-22 17:30:01 +00:00
#[test]
fn emit_quoted_bools() {
let input = r#"---
string0: yes
string1: no
string2: "true"
string3: "false"
string4: "~"
null0: ~
[true, false]: real_bools
[True, TRUE, False, FALSE, y,Y,yes,Yes,YES,n,N,no,No,NO,on,On,ON,off,Off,OFF]: false_bools
bool0: true
bool1: false"#;
let expected = r#"---
string0: "yes"
string1: "no"
string2: "true"
string3: "false"
string4: "~"
null0: ~
? - true
- false
: real_bools
? - "True"
- "TRUE"
- "False"
- "FALSE"
2018-01-02 07:55:39 +00:00
- y
- Y
- "yes"
- "Yes"
- "YES"
2018-01-02 07:55:39 +00:00
- n
- N
- "no"
- "No"
- "NO"
- "on"
- "On"
- "ON"
- "off"
- "Off"
- "OFF"
: false_bools
2017-05-22 17:30:01 +00:00
bool0: true
bool1: false"#;
let docs = YamlLoader::load_from_str(&input).unwrap();
let doc = &docs[0];
let mut writer = String::new();
{
let mut emitter = YamlEmitter::new(&mut writer);
emitter.dump(doc).unwrap();
}
2018-01-02 07:55:39 +00:00
assert_eq!(expected, writer, "expected:\n{}\nactual:\n{}\n", expected, writer);
2017-05-22 17:30:01 +00:00
}
2017-05-02 07:54:46 +00:00
#[test]
fn test_empty_and_nested() {
test_empty_and_nested_flag(false)
}
#[test]
fn test_empty_and_nested_compact() {
test_empty_and_nested_flag(true)
}
fn test_empty_and_nested_flag(compact: bool) {
let s = if compact { r#"---
a:
b:
c: hello
d: {}
e:
- f
- g
- h: []"# } else { r#"---
2017-05-02 07:54:46 +00:00
a:
b:
c: hello
d: {}
e:
- f
- g
-
h: []"# };
2017-05-02 07:54:46 +00:00
let docs = YamlLoader::load_from_str(&s).unwrap();
let doc = &docs[0];
let mut writer = String::new();
{
let mut emitter = YamlEmitter::new(&mut writer);
emitter.compact(compact);
2017-05-02 07:54:46 +00:00
emitter.dump(doc).unwrap();
}
assert_eq!(s, writer);
}
#[test]
fn test_nested_arrays() {
let s = r#"---
a:
- b
- - c
- d
- - e
- f"#;
let docs = YamlLoader::load_from_str(&s).unwrap();
let doc = &docs[0];
let mut writer = String::new();
{
let mut emitter = YamlEmitter::new(&mut writer);
emitter.dump(doc).unwrap();
}
println!("original:\n{}", s);
println!("emitted:\n{}", writer);
assert_eq!(s, writer);
}
#[test]
fn test_deeply_nested_arrays() {
let s = r#"---
a:
- b
- - c
- d
- - e
- - f
- - e"#;
let docs = YamlLoader::load_from_str(&s).unwrap();
let doc = &docs[0];
let mut writer = String::new();
{
let mut emitter = YamlEmitter::new(&mut writer);
emitter.dump(doc).unwrap();
}
println!("original:\n{}", s);
println!("emitted:\n{}", writer);
assert_eq!(s, writer);
}
#[test]
fn test_nested_hashes() {
let s = r#"---
a:
b:
c:
d:
e: f"#;
let docs = YamlLoader::load_from_str(&s).unwrap();
let doc = &docs[0];
let mut writer = String::new();
{
let mut emitter = YamlEmitter::new(&mut writer);
emitter.dump(doc).unwrap();
}
println!("original:\n{}", s);
println!("emitted:\n{}", writer);
assert_eq!(s, writer);
}
2015-05-31 04:56:45 +00:00
}