Move char is_xxx
fn to their own file.
This commit is contained in:
parent
ed591e86ca
commit
06936742f2
3 changed files with 123 additions and 113 deletions
111
parser/src/char_traits.rs
Normal file
111
parser/src/char_traits.rs
Normal file
|
@ -0,0 +1,111 @@
|
|||
//! Holds functions to determine if a character belongs to a specific character set.
|
||||
|
||||
/// Check whether the character is nil (`\0`).
|
||||
#[inline]
|
||||
pub(crate) fn is_z(c: char) -> bool {
|
||||
c == '\0'
|
||||
}
|
||||
|
||||
/// Check whether the character is a line break (`\r` or `\n`).
|
||||
#[inline]
|
||||
pub(crate) fn is_break(c: char) -> bool {
|
||||
c == '\n' || c == '\r'
|
||||
}
|
||||
|
||||
/// Check whether the character is nil or a line break (`\0`, `\r`, `\n`).
|
||||
#[inline]
|
||||
pub(crate) fn is_breakz(c: char) -> bool {
|
||||
is_break(c) || is_z(c)
|
||||
}
|
||||
|
||||
/// Check whether the character is a whitespace (` ` or `\t`).
|
||||
#[inline]
|
||||
pub(crate) fn is_blank(c: char) -> bool {
|
||||
c == ' ' || c == '\t'
|
||||
}
|
||||
|
||||
/// Check whether the character is nil, a linebreak or a whitespace.
|
||||
///
|
||||
/// `\0`, ` `, `\t`, `\n`, `\r`
|
||||
#[inline]
|
||||
pub(crate) fn is_blankz(c: char) -> bool {
|
||||
is_blank(c) || is_breakz(c)
|
||||
}
|
||||
|
||||
/// Check whether the character is an ascii digit.
|
||||
#[inline]
|
||||
pub(crate) fn is_digit(c: char) -> bool {
|
||||
c.is_ascii_digit()
|
||||
}
|
||||
|
||||
/// Check whether the character is a digit, letter, `_` or `-`.
|
||||
#[inline]
|
||||
pub(crate) fn is_alpha(c: char) -> bool {
|
||||
matches!(c, '0'..='9' | 'a'..='z' | 'A'..='Z' | '_' | '-')
|
||||
}
|
||||
|
||||
/// Check whether the character is a hexadecimal character (case insensitive).
|
||||
#[inline]
|
||||
pub(crate) fn is_hex(c: char) -> bool {
|
||||
c.is_ascii_digit() || ('a'..='f').contains(&c) || ('A'..='F').contains(&c)
|
||||
}
|
||||
|
||||
/// Convert the hexadecimal digit to an integer.
|
||||
#[inline]
|
||||
pub(crate) fn as_hex(c: char) -> u32 {
|
||||
match c {
|
||||
'0'..='9' => (c as u32) - ('0' as u32),
|
||||
'a'..='f' => (c as u32) - ('a' as u32) + 10,
|
||||
'A'..='F' => (c as u32) - ('A' as u32) + 10,
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Check whether the character is a YAML flow character (one of `,[]{}`).
|
||||
#[inline]
|
||||
pub(crate) fn is_flow(c: char) -> bool {
|
||||
matches!(c, ',' | '[' | ']' | '{' | '}')
|
||||
}
|
||||
|
||||
/// Check whether the character is the BOM character.
|
||||
#[inline]
|
||||
pub(crate) fn is_bom(c: char) -> bool {
|
||||
c == '\u{FEFF}'
|
||||
}
|
||||
|
||||
/// Check whether the character is a YAML non-breaking character.
|
||||
#[inline]
|
||||
pub(crate) fn is_yaml_non_break(c: char) -> bool {
|
||||
// TODO(ethiraric, 28/12/2023): is_printable
|
||||
!is_break(c) && !is_bom(c)
|
||||
}
|
||||
|
||||
/// Check whether the character is NOT a YAML whitespace (` ` / `\t`).
|
||||
#[inline]
|
||||
pub(crate) fn is_yaml_non_space(c: char) -> bool {
|
||||
is_yaml_non_break(c) && !is_blank(c)
|
||||
}
|
||||
|
||||
/// Check whether the character is a valid YAML anchor name character.
|
||||
#[inline]
|
||||
pub(crate) fn is_anchor_char(c: char) -> bool {
|
||||
is_yaml_non_space(c) && !is_flow(c) && !is_z(c)
|
||||
}
|
||||
|
||||
/// Check whether the character is a valid word character.
|
||||
#[inline]
|
||||
pub(crate) fn is_word_char(c: char) -> bool {
|
||||
is_alpha(c) && c != '_'
|
||||
}
|
||||
|
||||
/// Check whether the character is a valid URI character.
|
||||
#[inline]
|
||||
pub(crate) fn is_uri_char(c: char) -> bool {
|
||||
is_word_char(c) || "#;/?:@&=+$,_.!~*\'()[]%".contains(c)
|
||||
}
|
||||
|
||||
/// Check whether the character is a valid tag character.
|
||||
#[inline]
|
||||
pub(crate) fn is_tag_char(c: char) -> bool {
|
||||
is_uri_char(c) && !is_flow(c) && c != '!'
|
||||
}
|
|
@ -45,6 +45,7 @@
|
|||
|
||||
extern crate linked_hash_map;
|
||||
|
||||
pub(crate) mod char_traits;
|
||||
pub mod emitter;
|
||||
pub mod parser;
|
||||
pub mod scanner;
|
||||
|
|
|
@ -1,9 +1,12 @@
|
|||
#![allow(clippy::cast_possible_wrap)]
|
||||
#![allow(clippy::cast_sign_loss)]
|
||||
|
||||
use std::collections::VecDeque;
|
||||
use std::error::Error;
|
||||
use std::{char, fmt};
|
||||
use std::{char, collections::VecDeque, error::Error, fmt};
|
||||
|
||||
use crate::char_traits::{
|
||||
as_hex, is_alpha, is_anchor_char, is_blank, is_blankz, is_break, is_breakz, is_digit, is_flow,
|
||||
is_hex, is_tag_char, is_uri_char, is_z,
|
||||
};
|
||||
|
||||
#[derive(Clone, Copy, PartialEq, Debug, Eq)]
|
||||
pub enum TEncoding {
|
||||
|
@ -24,8 +27,11 @@ pub enum TScalarStyle {
|
|||
/// A location in a yaml document.
|
||||
#[derive(Clone, Copy, PartialEq, Debug, Eq)]
|
||||
pub struct Marker {
|
||||
/// The index (in chars) in the input string.
|
||||
index: usize,
|
||||
/// The line (1-indexed).
|
||||
line: usize,
|
||||
/// The column (1-indexed).
|
||||
col: usize,
|
||||
}
|
||||
|
||||
|
@ -56,7 +62,9 @@ impl Marker {
|
|||
/// An error that occured while scanning.
|
||||
#[derive(Clone, PartialEq, Debug, Eq)]
|
||||
pub struct ScanError {
|
||||
/// The position at which the error happened in the source.
|
||||
mark: Marker,
|
||||
/// Human-readable details about the error.
|
||||
info: String,
|
||||
}
|
||||
|
||||
|
@ -373,116 +381,6 @@ impl<T: Iterator<Item = char>> Iterator for Scanner<T> {
|
|||
}
|
||||
}
|
||||
|
||||
/// Check whether the character is nil (`\0`).
|
||||
#[inline]
|
||||
fn is_z(c: char) -> bool {
|
||||
c == '\0'
|
||||
}
|
||||
|
||||
/// Check whether the character is a line break (`\r` or `\n`).
|
||||
#[inline]
|
||||
fn is_break(c: char) -> bool {
|
||||
c == '\n' || c == '\r'
|
||||
}
|
||||
|
||||
/// Check whether the character is nil or a line break (`\0`, `\r`, `\n`).
|
||||
#[inline]
|
||||
fn is_breakz(c: char) -> bool {
|
||||
is_break(c) || is_z(c)
|
||||
}
|
||||
|
||||
/// Check whether the character is a whitespace (` ` or `\t`).
|
||||
#[inline]
|
||||
fn is_blank(c: char) -> bool {
|
||||
c == ' ' || c == '\t'
|
||||
}
|
||||
|
||||
/// Check whether the character is nil, a linebreak or a whitespace.
|
||||
///
|
||||
/// `\0`, ` `, `\t`, `\n`, `\r`
|
||||
#[inline]
|
||||
fn is_blankz(c: char) -> bool {
|
||||
is_blank(c) || is_breakz(c)
|
||||
}
|
||||
|
||||
/// Check whether the character is an ascii digit.
|
||||
#[inline]
|
||||
fn is_digit(c: char) -> bool {
|
||||
c.is_ascii_digit()
|
||||
}
|
||||
|
||||
/// Check whether the character is a digit, letter, `_` or `-`.
|
||||
#[inline]
|
||||
fn is_alpha(c: char) -> bool {
|
||||
matches!(c, '0'..='9' | 'a'..='z' | 'A'..='Z' | '_' | '-')
|
||||
}
|
||||
|
||||
/// Check whether the character is a hexadecimal character (case insensitive).
|
||||
#[inline]
|
||||
fn is_hex(c: char) -> bool {
|
||||
c.is_ascii_digit() || ('a'..='f').contains(&c) || ('A'..='F').contains(&c)
|
||||
}
|
||||
|
||||
/// Convert the hexadecimal digit to an integer.
|
||||
#[inline]
|
||||
fn as_hex(c: char) -> u32 {
|
||||
match c {
|
||||
'0'..='9' => (c as u32) - ('0' as u32),
|
||||
'a'..='f' => (c as u32) - ('a' as u32) + 10,
|
||||
'A'..='F' => (c as u32) - ('A' as u32) + 10,
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Check whether the character is a YAML flow character (one of `,[]{}`).
|
||||
#[inline]
|
||||
fn is_flow(c: char) -> bool {
|
||||
matches!(c, ',' | '[' | ']' | '{' | '}')
|
||||
}
|
||||
|
||||
/// Check whether the character is the BOM character.
|
||||
#[inline]
|
||||
fn is_bom(c: char) -> bool {
|
||||
c == '\u{FEFF}'
|
||||
}
|
||||
|
||||
/// Check whether the character is a YAML non-breaking character.
|
||||
#[inline]
|
||||
fn is_yaml_non_break(c: char) -> bool {
|
||||
// TODO(ethiraric, 28/12/2023): is_printable
|
||||
!is_break(c) && !is_bom(c)
|
||||
}
|
||||
|
||||
/// Check whether the character is NOT a YAML whitespace (` ` / `\t`).
|
||||
#[inline]
|
||||
fn is_yaml_non_space(c: char) -> bool {
|
||||
is_yaml_non_break(c) && !is_blank(c)
|
||||
}
|
||||
|
||||
/// Check whether the character is a valid YAML anchor name character.
|
||||
#[inline]
|
||||
fn is_anchor_char(c: char) -> bool {
|
||||
is_yaml_non_space(c) && !is_flow(c) && !is_z(c)
|
||||
}
|
||||
|
||||
/// Check whether the character is a valid word character.
|
||||
#[inline]
|
||||
fn is_word_char(c: char) -> bool {
|
||||
is_alpha(c) && c != '_'
|
||||
}
|
||||
|
||||
/// Check whether the character is a valid URI character.
|
||||
#[inline]
|
||||
fn is_uri_char(c: char) -> bool {
|
||||
is_word_char(c) || "#;/?:@&=+$,_.!~*\'()[]%".contains(c)
|
||||
}
|
||||
|
||||
/// Check whether the character is a valid tag character.
|
||||
#[inline]
|
||||
fn is_tag_char(c: char) -> bool {
|
||||
is_uri_char(c) && !is_flow(c) && c != '!'
|
||||
}
|
||||
|
||||
pub type ScanResult = Result<(), ScanError>;
|
||||
|
||||
impl<T: Iterator<Item = char>> Scanner<T> {
|
||||
|
|
Loading…
Reference in a new issue