hash: fix encoding both stored hashstrings and binary files.

direct utf-8 encoding does not work for files that are not necessarily utf-8.
This commit is contained in:
elliot speck 2024-09-30 20:39:17 +10:00
parent 5ed39cf926
commit 7a3db5424f
Signed by: arcayr
SSH key fingerprint: SHA256:ACNNWlqwQA5pfEvX1dnTlr8r4fdg1taXA0lae2FSjto
4 changed files with 30 additions and 18 deletions

View file

@ -1,16 +1,15 @@
//! an ia hash is made up of two parts: the hash algorithm and the hash itself. //! an ia hash is made up of two parts: the hash algorithm and the hash itself.
//! this is simply to allow forward-compatibility. //! this is simply to allow forward-compatibility.
//! hashes can be deserialised from strings representing the format `algorithm:value`. //! hashes can be deserialised from strings representing the format `algorithm:value`.
//! hash values are stored as a slice of bytes, returned as a base 16 encoded string
//! when required to be presented as a string output.
use crate::error; use crate::error;
use digest::{Digest, DynDigest}; use digest::DynDigest;
use serde::{ use serde::{de::Visitor, Deserialize, Deserializer, Serialize, Serializer};
de::Visitor,
Deserialize, Deserializer, Serialize, Serializer,
};
use std::{ use std::{
fmt::Display, fmt::Display,
io::Write, io::{Read, Write},
str::FromStr, str::FromStr,
}; };
@ -26,17 +25,25 @@ pub enum HashAlgorithm {
} }
#[derive(Clone, Debug, Eq, PartialEq)] #[derive(Clone, Debug, Eq, PartialEq)]
pub struct HashValue(Vec<u8>); pub struct HashValue(Box<[u8]>);
impl HashValue { impl HashValue {
pub fn new(val: Box<[u8]>) -> Self { pub fn new<T: AsRef<[u8]>>(val: T) -> Self {
Self(val.as_ref().to_vec()) Self(val.as_ref().to_owned().into_boxed_slice())
} }
} }
impl Display for HashValue { impl Display for HashValue {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", hex::encode(self.0.as_slice())) match String::from_utf8(self.0.to_vec()) {
Ok(s) => write!(f, "{}", s),
Err(_) => write!(
f,
"{}",
base16ct::lower::encode_string(&self.0)
// String::from_utf8_lossy(self.0.to_vec().as_slice())
),
}
} }
} }
@ -51,7 +58,7 @@ pub struct Hash {
impl Display for Hash { impl Display for Hash {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}:{}", self.algorithm, self.value) write!(f, "{}:{}", self.algorithm, self.value.to_string())
} }
} }
@ -73,7 +80,7 @@ impl Hash {
pub fn new(alg: HashAlgorithm, val: String) -> Result<Self, error::Hash> { pub fn new(alg: HashAlgorithm, val: String) -> Result<Self, error::Hash> {
Ok(Self { Ok(Self {
algorithm: alg, algorithm: alg,
value: HashValue(val.as_bytes().to_owned()), value: HashValue::new(val.as_str().as_bytes()),
}) })
} }
} }
@ -135,6 +142,6 @@ impl Serialize for Hash {
where where
S: Serializer, S: Serializer,
{ {
serializer.serialize_str(&format!("{}:{}", self.algorithm, self.value)) serializer.serialize_str(&format!("{}", self.to_string()))
} }
} }

View file

@ -100,11 +100,15 @@ impl File {
self.reset().unwrap(); self.reset().unwrap();
let mut hasher = Hash::hasher_for(&alg)?; let mut hasher = Hash::hasher_for(&alg)?;
io::copy(self, &mut hasher).map_err(|_| error::Hash::Internal)?; // io::copy(self, &mut hasher)
let hash_value = hasher.finalize_reset(); // .map_err(|_| error::Hash::Internal)
hasher.flush().map_err(|_| error::Hash::Internal)?; // .and_then(|_| hasher.flush().map_err(|_| error::Hash::Internal))?;
let hash_value = HashValue::new(hash_value); hasher.update(buf.as_bytes());
let hash_bytes = hasher.finalize();
let hash_value = HashValue::new(&hash_bytes);
Ok(Hash { Ok(Hash {
algorithm: alg, algorithm: alg,

View file

@ -60,7 +60,6 @@ impl<'a> Fetch<'a> {
); );
let mut file = fetcher.fetch(source, self.prefix).unwrap(); let mut file = fetcher.fetch(source, self.prefix).unwrap();
println!("{:?}", file.hash(HashAlgorithm::Sha2).unwrap());
Ok(file) Ok(file)
} }

View file

@ -60,6 +60,8 @@ fn can_fetch() {
#[test] #[test]
fn can_hash_source_file() { fn can_hash_source_file() {
let mut input_bytes = vec![];
test_source_file().read_to_end(&mut input_bytes).unwrap();
assert_eq!( assert_eq!(
test_source_file() test_source_file()
.hash(ia::HashAlgorithm::Sha2) .hash(ia::HashAlgorithm::Sha2)