Move char is_xxx
fn to their own file.
This commit is contained in:
parent
ed591e86ca
commit
06936742f2
3 changed files with 123 additions and 113 deletions
111
parser/src/char_traits.rs
Normal file
111
parser/src/char_traits.rs
Normal file
|
@ -0,0 +1,111 @@
|
||||||
|
//! Holds functions to determine if a character belongs to a specific character set.
|
||||||
|
|
||||||
|
/// Check whether the character is nil (`\0`).
|
||||||
|
#[inline]
|
||||||
|
pub(crate) fn is_z(c: char) -> bool {
|
||||||
|
c == '\0'
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check whether the character is a line break (`\r` or `\n`).
|
||||||
|
#[inline]
|
||||||
|
pub(crate) fn is_break(c: char) -> bool {
|
||||||
|
c == '\n' || c == '\r'
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check whether the character is nil or a line break (`\0`, `\r`, `\n`).
|
||||||
|
#[inline]
|
||||||
|
pub(crate) fn is_breakz(c: char) -> bool {
|
||||||
|
is_break(c) || is_z(c)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check whether the character is a whitespace (` ` or `\t`).
|
||||||
|
#[inline]
|
||||||
|
pub(crate) fn is_blank(c: char) -> bool {
|
||||||
|
c == ' ' || c == '\t'
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check whether the character is nil, a linebreak or a whitespace.
|
||||||
|
///
|
||||||
|
/// `\0`, ` `, `\t`, `\n`, `\r`
|
||||||
|
#[inline]
|
||||||
|
pub(crate) fn is_blankz(c: char) -> bool {
|
||||||
|
is_blank(c) || is_breakz(c)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check whether the character is an ascii digit.
|
||||||
|
#[inline]
|
||||||
|
pub(crate) fn is_digit(c: char) -> bool {
|
||||||
|
c.is_ascii_digit()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check whether the character is a digit, letter, `_` or `-`.
|
||||||
|
#[inline]
|
||||||
|
pub(crate) fn is_alpha(c: char) -> bool {
|
||||||
|
matches!(c, '0'..='9' | 'a'..='z' | 'A'..='Z' | '_' | '-')
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check whether the character is a hexadecimal character (case insensitive).
|
||||||
|
#[inline]
|
||||||
|
pub(crate) fn is_hex(c: char) -> bool {
|
||||||
|
c.is_ascii_digit() || ('a'..='f').contains(&c) || ('A'..='F').contains(&c)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Convert the hexadecimal digit to an integer.
|
||||||
|
#[inline]
|
||||||
|
pub(crate) fn as_hex(c: char) -> u32 {
|
||||||
|
match c {
|
||||||
|
'0'..='9' => (c as u32) - ('0' as u32),
|
||||||
|
'a'..='f' => (c as u32) - ('a' as u32) + 10,
|
||||||
|
'A'..='F' => (c as u32) - ('A' as u32) + 10,
|
||||||
|
_ => unreachable!(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check whether the character is a YAML flow character (one of `,[]{}`).
|
||||||
|
#[inline]
|
||||||
|
pub(crate) fn is_flow(c: char) -> bool {
|
||||||
|
matches!(c, ',' | '[' | ']' | '{' | '}')
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check whether the character is the BOM character.
|
||||||
|
#[inline]
|
||||||
|
pub(crate) fn is_bom(c: char) -> bool {
|
||||||
|
c == '\u{FEFF}'
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check whether the character is a YAML non-breaking character.
|
||||||
|
#[inline]
|
||||||
|
pub(crate) fn is_yaml_non_break(c: char) -> bool {
|
||||||
|
// TODO(ethiraric, 28/12/2023): is_printable
|
||||||
|
!is_break(c) && !is_bom(c)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check whether the character is NOT a YAML whitespace (` ` / `\t`).
|
||||||
|
#[inline]
|
||||||
|
pub(crate) fn is_yaml_non_space(c: char) -> bool {
|
||||||
|
is_yaml_non_break(c) && !is_blank(c)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check whether the character is a valid YAML anchor name character.
|
||||||
|
#[inline]
|
||||||
|
pub(crate) fn is_anchor_char(c: char) -> bool {
|
||||||
|
is_yaml_non_space(c) && !is_flow(c) && !is_z(c)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check whether the character is a valid word character.
|
||||||
|
#[inline]
|
||||||
|
pub(crate) fn is_word_char(c: char) -> bool {
|
||||||
|
is_alpha(c) && c != '_'
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check whether the character is a valid URI character.
|
||||||
|
#[inline]
|
||||||
|
pub(crate) fn is_uri_char(c: char) -> bool {
|
||||||
|
is_word_char(c) || "#;/?:@&=+$,_.!~*\'()[]%".contains(c)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check whether the character is a valid tag character.
|
||||||
|
#[inline]
|
||||||
|
pub(crate) fn is_tag_char(c: char) -> bool {
|
||||||
|
is_uri_char(c) && !is_flow(c) && c != '!'
|
||||||
|
}
|
|
@ -45,6 +45,7 @@
|
||||||
|
|
||||||
extern crate linked_hash_map;
|
extern crate linked_hash_map;
|
||||||
|
|
||||||
|
pub(crate) mod char_traits;
|
||||||
pub mod emitter;
|
pub mod emitter;
|
||||||
pub mod parser;
|
pub mod parser;
|
||||||
pub mod scanner;
|
pub mod scanner;
|
||||||
|
|
|
@ -1,9 +1,12 @@
|
||||||
#![allow(clippy::cast_possible_wrap)]
|
#![allow(clippy::cast_possible_wrap)]
|
||||||
#![allow(clippy::cast_sign_loss)]
|
#![allow(clippy::cast_sign_loss)]
|
||||||
|
|
||||||
use std::collections::VecDeque;
|
use std::{char, collections::VecDeque, error::Error, fmt};
|
||||||
use std::error::Error;
|
|
||||||
use std::{char, fmt};
|
use crate::char_traits::{
|
||||||
|
as_hex, is_alpha, is_anchor_char, is_blank, is_blankz, is_break, is_breakz, is_digit, is_flow,
|
||||||
|
is_hex, is_tag_char, is_uri_char, is_z,
|
||||||
|
};
|
||||||
|
|
||||||
#[derive(Clone, Copy, PartialEq, Debug, Eq)]
|
#[derive(Clone, Copy, PartialEq, Debug, Eq)]
|
||||||
pub enum TEncoding {
|
pub enum TEncoding {
|
||||||
|
@ -24,8 +27,11 @@ pub enum TScalarStyle {
|
||||||
/// A location in a yaml document.
|
/// A location in a yaml document.
|
||||||
#[derive(Clone, Copy, PartialEq, Debug, Eq)]
|
#[derive(Clone, Copy, PartialEq, Debug, Eq)]
|
||||||
pub struct Marker {
|
pub struct Marker {
|
||||||
|
/// The index (in chars) in the input string.
|
||||||
index: usize,
|
index: usize,
|
||||||
|
/// The line (1-indexed).
|
||||||
line: usize,
|
line: usize,
|
||||||
|
/// The column (1-indexed).
|
||||||
col: usize,
|
col: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -56,7 +62,9 @@ impl Marker {
|
||||||
/// An error that occured while scanning.
|
/// An error that occured while scanning.
|
||||||
#[derive(Clone, PartialEq, Debug, Eq)]
|
#[derive(Clone, PartialEq, Debug, Eq)]
|
||||||
pub struct ScanError {
|
pub struct ScanError {
|
||||||
|
/// The position at which the error happened in the source.
|
||||||
mark: Marker,
|
mark: Marker,
|
||||||
|
/// Human-readable details about the error.
|
||||||
info: String,
|
info: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -373,116 +381,6 @@ impl<T: Iterator<Item = char>> Iterator for Scanner<T> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Check whether the character is nil (`\0`).
|
|
||||||
#[inline]
|
|
||||||
fn is_z(c: char) -> bool {
|
|
||||||
c == '\0'
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Check whether the character is a line break (`\r` or `\n`).
|
|
||||||
#[inline]
|
|
||||||
fn is_break(c: char) -> bool {
|
|
||||||
c == '\n' || c == '\r'
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Check whether the character is nil or a line break (`\0`, `\r`, `\n`).
|
|
||||||
#[inline]
|
|
||||||
fn is_breakz(c: char) -> bool {
|
|
||||||
is_break(c) || is_z(c)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Check whether the character is a whitespace (` ` or `\t`).
|
|
||||||
#[inline]
|
|
||||||
fn is_blank(c: char) -> bool {
|
|
||||||
c == ' ' || c == '\t'
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Check whether the character is nil, a linebreak or a whitespace.
|
|
||||||
///
|
|
||||||
/// `\0`, ` `, `\t`, `\n`, `\r`
|
|
||||||
#[inline]
|
|
||||||
fn is_blankz(c: char) -> bool {
|
|
||||||
is_blank(c) || is_breakz(c)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Check whether the character is an ascii digit.
|
|
||||||
#[inline]
|
|
||||||
fn is_digit(c: char) -> bool {
|
|
||||||
c.is_ascii_digit()
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Check whether the character is a digit, letter, `_` or `-`.
|
|
||||||
#[inline]
|
|
||||||
fn is_alpha(c: char) -> bool {
|
|
||||||
matches!(c, '0'..='9' | 'a'..='z' | 'A'..='Z' | '_' | '-')
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Check whether the character is a hexadecimal character (case insensitive).
|
|
||||||
#[inline]
|
|
||||||
fn is_hex(c: char) -> bool {
|
|
||||||
c.is_ascii_digit() || ('a'..='f').contains(&c) || ('A'..='F').contains(&c)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Convert the hexadecimal digit to an integer.
|
|
||||||
#[inline]
|
|
||||||
fn as_hex(c: char) -> u32 {
|
|
||||||
match c {
|
|
||||||
'0'..='9' => (c as u32) - ('0' as u32),
|
|
||||||
'a'..='f' => (c as u32) - ('a' as u32) + 10,
|
|
||||||
'A'..='F' => (c as u32) - ('A' as u32) + 10,
|
|
||||||
_ => unreachable!(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Check whether the character is a YAML flow character (one of `,[]{}`).
|
|
||||||
#[inline]
|
|
||||||
fn is_flow(c: char) -> bool {
|
|
||||||
matches!(c, ',' | '[' | ']' | '{' | '}')
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Check whether the character is the BOM character.
|
|
||||||
#[inline]
|
|
||||||
fn is_bom(c: char) -> bool {
|
|
||||||
c == '\u{FEFF}'
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Check whether the character is a YAML non-breaking character.
|
|
||||||
#[inline]
|
|
||||||
fn is_yaml_non_break(c: char) -> bool {
|
|
||||||
// TODO(ethiraric, 28/12/2023): is_printable
|
|
||||||
!is_break(c) && !is_bom(c)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Check whether the character is NOT a YAML whitespace (` ` / `\t`).
|
|
||||||
#[inline]
|
|
||||||
fn is_yaml_non_space(c: char) -> bool {
|
|
||||||
is_yaml_non_break(c) && !is_blank(c)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Check whether the character is a valid YAML anchor name character.
|
|
||||||
#[inline]
|
|
||||||
fn is_anchor_char(c: char) -> bool {
|
|
||||||
is_yaml_non_space(c) && !is_flow(c) && !is_z(c)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Check whether the character is a valid word character.
|
|
||||||
#[inline]
|
|
||||||
fn is_word_char(c: char) -> bool {
|
|
||||||
is_alpha(c) && c != '_'
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Check whether the character is a valid URI character.
|
|
||||||
#[inline]
|
|
||||||
fn is_uri_char(c: char) -> bool {
|
|
||||||
is_word_char(c) || "#;/?:@&=+$,_.!~*\'()[]%".contains(c)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Check whether the character is a valid tag character.
|
|
||||||
#[inline]
|
|
||||||
fn is_tag_char(c: char) -> bool {
|
|
||||||
is_uri_char(c) && !is_flow(c) && c != '!'
|
|
||||||
}
|
|
||||||
|
|
||||||
pub type ScanResult = Result<(), ScanError>;
|
pub type ScanResult = Result<(), ScanError>;
|
||||||
|
|
||||||
impl<T: Iterator<Item = char>> Scanner<T> {
|
impl<T: Iterator<Item = char>> Scanner<T> {
|
||||||
|
|
Loading…
Reference in a new issue