hash: fix encoding both stored hashstrings and binary files.

direct utf-8 encoding does not work for files that are not necessarily utf-8.
This commit is contained in:
elliot speck 2024-09-30 20:39:17 +10:00
parent 5ed39cf926
commit 7a3db5424f
Signed by: arcayr
SSH key fingerprint: SHA256:ACNNWlqwQA5pfEvX1dnTlr8r4fdg1taXA0lae2FSjto
4 changed files with 30 additions and 18 deletions

View file

@ -1,16 +1,15 @@
//! an ia hash is made up of two parts: the hash algorithm and the hash itself.
//! this is simply to allow forward-compatibility.
//! hashes can be deserialised from strings representing the format `algorithm:value`.
//! hash values are stored as a slice of bytes, returned as a base 16 encoded string
//! when required to be presented as a string output.
use crate::error;
use digest::{Digest, DynDigest};
use serde::{
de::Visitor,
Deserialize, Deserializer, Serialize, Serializer,
};
use digest::DynDigest;
use serde::{de::Visitor, Deserialize, Deserializer, Serialize, Serializer};
use std::{
fmt::Display,
io::Write,
io::{Read, Write},
str::FromStr,
};
@ -26,17 +25,25 @@ pub enum HashAlgorithm {
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct HashValue(Vec<u8>);
pub struct HashValue(Box<[u8]>);
impl HashValue {
pub fn new(val: Box<[u8]>) -> Self {
Self(val.as_ref().to_vec())
pub fn new<T: AsRef<[u8]>>(val: T) -> Self {
Self(val.as_ref().to_owned().into_boxed_slice())
}
}
impl Display for HashValue {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", hex::encode(self.0.as_slice()))
match String::from_utf8(self.0.to_vec()) {
Ok(s) => write!(f, "{}", s),
Err(_) => write!(
f,
"{}",
base16ct::lower::encode_string(&self.0)
// String::from_utf8_lossy(self.0.to_vec().as_slice())
),
}
}
}
@ -51,7 +58,7 @@ pub struct Hash {
impl Display for Hash {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}:{}", self.algorithm, self.value)
write!(f, "{}:{}", self.algorithm, self.value.to_string())
}
}
@ -73,7 +80,7 @@ impl Hash {
pub fn new(alg: HashAlgorithm, val: String) -> Result<Self, error::Hash> {
Ok(Self {
algorithm: alg,
value: HashValue(val.as_bytes().to_owned()),
value: HashValue::new(val.as_str().as_bytes()),
})
}
}
@ -135,6 +142,6 @@ impl Serialize for Hash {
where
S: Serializer,
{
serializer.serialize_str(&format!("{}:{}", self.algorithm, self.value))
serializer.serialize_str(&format!("{}", self.to_string()))
}
}

View file

@ -100,11 +100,15 @@ impl File {
self.reset().unwrap();
let mut hasher = Hash::hasher_for(&alg)?;
io::copy(self, &mut hasher).map_err(|_| error::Hash::Internal)?;
let hash_value = hasher.finalize_reset();
hasher.flush().map_err(|_| error::Hash::Internal)?;
// io::copy(self, &mut hasher)
// .map_err(|_| error::Hash::Internal)
// .and_then(|_| hasher.flush().map_err(|_| error::Hash::Internal))?;
let hash_value = HashValue::new(hash_value);
hasher.update(buf.as_bytes());
let hash_bytes = hasher.finalize();
let hash_value = HashValue::new(&hash_bytes);
Ok(Hash {
algorithm: alg,

View file

@ -60,7 +60,6 @@ impl<'a> Fetch<'a> {
);
let mut file = fetcher.fetch(source, self.prefix).unwrap();
println!("{:?}", file.hash(HashAlgorithm::Sha2).unwrap());
Ok(file)
}

View file

@ -60,6 +60,8 @@ fn can_fetch() {
#[test]
fn can_hash_source_file() {
let mut input_bytes = vec![];
test_source_file().read_to_end(&mut input_bytes).unwrap();
assert_eq!(
test_source_file()
.hash(ia::HashAlgorithm::Sha2)