From 7a3db5424fb8bacf887c5a9607ff14b87f562d89 Mon Sep 17 00:00:00 2001
From: arcayr <git@arcayr.online>
Date: Mon, 30 Sep 2024 20:39:17 +1000
Subject: [PATCH] hash: fix encoding both stored hashstrings and binary files.

direct utf-8 encoding does not work for files that are not necessarily utf-8.
---
 crates/ia/src/hash.rs               | 33 +++++++++++++++++------------
 crates/ia/src/lib.rs                | 12 +++++++----
 crates/ia/src/phase/fetch/mod.rs    |  1 -
 crates/tests/src/phase/fetch/mod.rs |  2 ++
 4 files changed, 30 insertions(+), 18 deletions(-)

diff --git a/crates/ia/src/hash.rs b/crates/ia/src/hash.rs
index 2b5c006..f75d381 100644
--- a/crates/ia/src/hash.rs
+++ b/crates/ia/src/hash.rs
@@ -1,16 +1,15 @@
 //! an ia hash is made up of two parts: the hash algorithm and the hash itself.
 //! this is simply to allow forward-compatibility.
 //! hashes can be deserialised from strings representing the format `algorithm:value`.
+//! hash values are stored as a slice of bytes, returned as a base 16 encoded string
+//! when required to be presented as a string output.
 
 use crate::error;
-use digest::{Digest, DynDigest};
-use serde::{
-    de::Visitor,
-    Deserialize, Deserializer, Serialize, Serializer,
-};
+use digest::DynDigest;
+use serde::{de::Visitor, Deserialize, Deserializer, Serialize, Serializer};
 use std::{
     fmt::Display,
-    io::Write,
+    io::{Read, Write},
     str::FromStr,
 };
 
@@ -26,17 +25,25 @@ pub enum HashAlgorithm {
 }
 
 #[derive(Clone, Debug, Eq, PartialEq)]
-pub struct HashValue(Vec<u8>);
+pub struct HashValue(Box<[u8]>);
 
 impl HashValue {
-    pub fn new(val: Box<[u8]>) -> Self {
-        Self(val.as_ref().to_vec())
+    pub fn new<T: AsRef<[u8]>>(val: T) -> Self {
+        Self(val.as_ref().to_owned().into_boxed_slice())
     }
 }
 
 impl Display for HashValue {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "{}", hex::encode(self.0.as_slice()))
+        match String::from_utf8(self.0.to_vec()) {
+            Ok(s) => write!(f, "{}", s),
+            Err(_) => write!(
+                f,
+                "{}",
+                base16ct::lower::encode_string(&self.0)
+                // String::from_utf8_lossy(self.0.to_vec().as_slice())
+            ),
+        }
     }
 }
 
@@ -51,7 +58,7 @@ pub struct Hash {
 
 impl Display for Hash {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "{}:{}", self.algorithm, self.value)
+        write!(f, "{}:{}", self.algorithm, self.value.to_string())
     }
 }
 
@@ -73,7 +80,7 @@ impl Hash {
     pub fn new(alg: HashAlgorithm, val: String) -> Result<Self, error::Hash> {
         Ok(Self {
             algorithm: alg,
-            value: HashValue(val.as_bytes().to_owned()),
+            value: HashValue::new(val.as_str().as_bytes()),
         })
     }
 }
@@ -135,6 +142,6 @@ impl Serialize for Hash {
     where
         S: Serializer,
     {
-        serializer.serialize_str(&format!("{}:{}", self.algorithm, self.value))
+        serializer.serialize_str(&format!("{}", self.to_string()))
     }
 }
diff --git a/crates/ia/src/lib.rs b/crates/ia/src/lib.rs
index e345966..6e10c5e 100644
--- a/crates/ia/src/lib.rs
+++ b/crates/ia/src/lib.rs
@@ -100,11 +100,15 @@ impl File {
         self.reset().unwrap();
 
         let mut hasher = Hash::hasher_for(&alg)?;
-        io::copy(self, &mut hasher).map_err(|_| error::Hash::Internal)?;
-        let hash_value = hasher.finalize_reset();
-        hasher.flush().map_err(|_| error::Hash::Internal)?;
+        // io::copy(self, &mut hasher)
+        //     .map_err(|_| error::Hash::Internal)
+        //     .and_then(|_| hasher.flush().map_err(|_| error::Hash::Internal))?;
 
-        let hash_value = HashValue::new(hash_value);
+        hasher.update(buf.as_bytes());
+
+        let hash_bytes = hasher.finalize();
+
+        let hash_value = HashValue::new(&hash_bytes);
 
         Ok(Hash {
             algorithm: alg,
diff --git a/crates/ia/src/phase/fetch/mod.rs b/crates/ia/src/phase/fetch/mod.rs
index 8213e1c..396059d 100644
--- a/crates/ia/src/phase/fetch/mod.rs
+++ b/crates/ia/src/phase/fetch/mod.rs
@@ -60,7 +60,6 @@ impl<'a> Fetch<'a> {
         );
 
         let mut file = fetcher.fetch(source, self.prefix).unwrap();
-        println!("{:?}", file.hash(HashAlgorithm::Sha2).unwrap());
         Ok(file)
     }
 
diff --git a/crates/tests/src/phase/fetch/mod.rs b/crates/tests/src/phase/fetch/mod.rs
index 969cc36..0b50d65 100644
--- a/crates/tests/src/phase/fetch/mod.rs
+++ b/crates/tests/src/phase/fetch/mod.rs
@@ -60,6 +60,8 @@ fn can_fetch() {
 
 #[test]
 fn can_hash_source_file() {
+    let mut input_bytes = vec![];
+    test_source_file().read_to_end(&mut input_bytes).unwrap();
     assert_eq!(
         test_source_file()
             .hash(ia::HashAlgorithm::Sha2)