From 7a3db5424fb8bacf887c5a9607ff14b87f562d89 Mon Sep 17 00:00:00 2001 From: arcayr <git@arcayr.online> Date: Mon, 30 Sep 2024 20:39:17 +1000 Subject: [PATCH] hash: fix encoding both stored hashstrings and binary files. direct utf-8 encoding does not work for files that are not necessarily utf-8. --- crates/ia/src/hash.rs | 33 +++++++++++++++++------------ crates/ia/src/lib.rs | 12 +++++++---- crates/ia/src/phase/fetch/mod.rs | 1 - crates/tests/src/phase/fetch/mod.rs | 2 ++ 4 files changed, 30 insertions(+), 18 deletions(-) diff --git a/crates/ia/src/hash.rs b/crates/ia/src/hash.rs index 2b5c006..f75d381 100644 --- a/crates/ia/src/hash.rs +++ b/crates/ia/src/hash.rs @@ -1,16 +1,15 @@ //! an ia hash is made up of two parts: the hash algorithm and the hash itself. //! this is simply to allow forward-compatibility. //! hashes can be deserialised from strings representing the format `algorithm:value`. +//! hash values are stored as a slice of bytes, returned as a base 16 encoded string +//! when required to be presented as a string output. use crate::error; -use digest::{Digest, DynDigest}; -use serde::{ - de::Visitor, - Deserialize, Deserializer, Serialize, Serializer, -}; +use digest::DynDigest; +use serde::{de::Visitor, Deserialize, Deserializer, Serialize, Serializer}; use std::{ fmt::Display, - io::Write, + io::{Read, Write}, str::FromStr, }; @@ -26,17 +25,25 @@ pub enum HashAlgorithm { } #[derive(Clone, Debug, Eq, PartialEq)] -pub struct HashValue(Vec<u8>); +pub struct HashValue(Box<[u8]>); impl HashValue { - pub fn new(val: Box<[u8]>) -> Self { - Self(val.as_ref().to_vec()) + pub fn new<T: AsRef<[u8]>>(val: T) -> Self { + Self(val.as_ref().to_owned().into_boxed_slice()) } } impl Display for HashValue { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", hex::encode(self.0.as_slice())) + match String::from_utf8(self.0.to_vec()) { + Ok(s) => write!(f, "{}", s), + Err(_) => write!( + f, + "{}", + base16ct::lower::encode_string(&self.0) + // String::from_utf8_lossy(self.0.to_vec().as_slice()) + ), + } } } @@ -51,7 +58,7 @@ pub struct Hash { impl Display for Hash { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}:{}", self.algorithm, self.value) + write!(f, "{}:{}", self.algorithm, self.value.to_string()) } } @@ -73,7 +80,7 @@ impl Hash { pub fn new(alg: HashAlgorithm, val: String) -> Result<Self, error::Hash> { Ok(Self { algorithm: alg, - value: HashValue(val.as_bytes().to_owned()), + value: HashValue::new(val.as_str().as_bytes()), }) } } @@ -135,6 +142,6 @@ impl Serialize for Hash { where S: Serializer, { - serializer.serialize_str(&format!("{}:{}", self.algorithm, self.value)) + serializer.serialize_str(&format!("{}", self.to_string())) } } diff --git a/crates/ia/src/lib.rs b/crates/ia/src/lib.rs index e345966..6e10c5e 100644 --- a/crates/ia/src/lib.rs +++ b/crates/ia/src/lib.rs @@ -100,11 +100,15 @@ impl File { self.reset().unwrap(); let mut hasher = Hash::hasher_for(&alg)?; - io::copy(self, &mut hasher).map_err(|_| error::Hash::Internal)?; - let hash_value = hasher.finalize_reset(); - hasher.flush().map_err(|_| error::Hash::Internal)?; + // io::copy(self, &mut hasher) + // .map_err(|_| error::Hash::Internal) + // .and_then(|_| hasher.flush().map_err(|_| error::Hash::Internal))?; - let hash_value = HashValue::new(hash_value); + hasher.update(buf.as_bytes()); + + let hash_bytes = hasher.finalize(); + + let hash_value = HashValue::new(&hash_bytes); Ok(Hash { algorithm: alg, diff --git a/crates/ia/src/phase/fetch/mod.rs b/crates/ia/src/phase/fetch/mod.rs index 8213e1c..396059d 100644 --- a/crates/ia/src/phase/fetch/mod.rs +++ b/crates/ia/src/phase/fetch/mod.rs @@ -60,7 +60,6 @@ impl<'a> Fetch<'a> { ); let mut file = fetcher.fetch(source, self.prefix).unwrap(); - println!("{:?}", file.hash(HashAlgorithm::Sha2).unwrap()); Ok(file) } diff --git a/crates/tests/src/phase/fetch/mod.rs b/crates/tests/src/phase/fetch/mod.rs index 969cc36..0b50d65 100644 --- a/crates/tests/src/phase/fetch/mod.rs +++ b/crates/tests/src/phase/fetch/mod.rs @@ -60,6 +60,8 @@ fn can_fetch() { #[test] fn can_hash_source_file() { + let mut input_bytes = vec![]; + test_source_file().read_to_end(&mut input_bytes).unwrap(); assert_eq!( test_source_file() .hash(ia::HashAlgorithm::Sha2)