//! Content Identifier (CID) - Hash-based content addressing //! //! Every file in Synor Storage is identified by its cryptographic hash, //! not by location. This enables content verification and deduplication. use serde::{Deserialize, Serialize}; use std::fmt; /// Hash algorithm identifiers (multihash compatible) #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] #[repr(u8)] pub enum HashType { /// SHA2-256 (0x12) Sha256 = 0x12, /// Keccak-256 (0x1B) Keccak256 = 0x1B, /// Blake3 (0x1E) - Synor default Blake3 = 0x1E, } impl Default for HashType { fn default() -> Self { Self::Blake3 } } /// Content Identifier - uniquely identifies content by hash #[derive(Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] pub struct ContentId { /// Hash algorithm used pub hash_type: HashType, /// Hash digest (32 bytes) pub digest: [u8; 32], /// Content size in bytes pub size: u64, } impl ContentId { /// Create a new CID from content bytes using Blake3 pub fn from_content(data: &[u8]) -> Self { let hash = blake3::hash(data); Self { hash_type: HashType::Blake3, digest: *hash.as_bytes(), size: data.len() as u64, } } /// Create a new CID from content bytes using SHA256 pub fn from_content_sha256(data: &[u8]) -> Self { use sha2::{Sha256, Digest}; let mut hasher = Sha256::new(); hasher.update(data); let result = hasher.finalize(); let mut digest = [0u8; 32]; digest.copy_from_slice(&result); Self { hash_type: HashType::Sha256, digest, size: data.len() as u64, } } /// Verify that content matches this CID pub fn verify(&self, data: &[u8]) -> bool { if data.len() as u64 != self.size { return false; } match self.hash_type { HashType::Blake3 => { let hash = blake3::hash(data); hash.as_bytes() == &self.digest } HashType::Sha256 => { use sha2::{Sha256, Digest}; let mut hasher = Sha256::new(); hasher.update(data); let result = hasher.finalize(); result.as_slice() == &self.digest } HashType::Keccak256 => { // TODO: Implement Keccak256 verification false } } } /// Encode CID as a string (synor1...) pub fn to_string_repr(&self) -> String { let mut bytes = Vec::with_capacity(34); bytes.push(self.hash_type as u8); bytes.push(32); // digest length bytes.extend_from_slice(&self.digest); format!("synor1{}", bs58::encode(&bytes).into_string()) } /// Parse CID from string representation pub fn from_string(s: &str) -> Result { if !s.starts_with("synor1") { return Err(CidParseError::InvalidPrefix); } let encoded = &s[6..]; // Skip "synor1" let bytes = bs58::decode(encoded) .into_vec() .map_err(|_| CidParseError::InvalidBase58)?; if bytes.len() < 34 { return Err(CidParseError::InvalidLength); } let hash_type = match bytes[0] { 0x12 => HashType::Sha256, 0x1B => HashType::Keccak256, 0x1E => HashType::Blake3, _ => return Err(CidParseError::UnknownHashType), }; let digest_len = bytes[1] as usize; if digest_len != 32 || bytes.len() < 2 + digest_len { return Err(CidParseError::InvalidLength); } let mut digest = [0u8; 32]; digest.copy_from_slice(&bytes[2..34]); Ok(Self { hash_type, digest, size: 0, // Size not encoded in string }) } /// Get the digest as hex string pub fn digest_hex(&self) -> String { hex::encode(self.digest) } /// Create CID for an empty file pub fn empty() -> Self { Self::from_content(&[]) } } impl fmt::Debug for ContentId { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("ContentId") .field("hash_type", &self.hash_type) .field("digest", &self.digest_hex()) .field("size", &self.size) .finish() } } impl fmt::Display for ContentId { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{}", self.to_string_repr()) } } /// Errors when parsing CID from string #[derive(Debug, Clone, PartialEq, Eq)] pub enum CidParseError { /// Missing "synor1" prefix InvalidPrefix, /// Invalid base58 encoding InvalidBase58, /// Invalid length InvalidLength, /// Unknown hash type UnknownHashType, } impl std::error::Error for CidParseError {} impl fmt::Display for CidParseError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Self::InvalidPrefix => write!(f, "CID must start with 'synor1'"), Self::InvalidBase58 => write!(f, "Invalid base58 encoding"), Self::InvalidLength => write!(f, "Invalid CID length"), Self::UnknownHashType => write!(f, "Unknown hash type"), } } } #[cfg(test)] mod tests { use super::*; #[test] fn test_cid_from_content() { let data = b"Hello, Synor Storage!"; let cid = ContentId::from_content(data); assert_eq!(cid.hash_type, HashType::Blake3); assert_eq!(cid.size, data.len() as u64); assert!(cid.verify(data)); } #[test] fn test_cid_verification_fails_wrong_data() { let data = b"Hello, Synor Storage!"; let cid = ContentId::from_content(data); assert!(!cid.verify(b"Wrong data")); } #[test] fn test_cid_string_roundtrip() { let data = b"Test content for CID"; let cid = ContentId::from_content(data); let s = cid.to_string_repr(); assert!(s.starts_with("synor1")); let parsed = ContentId::from_string(&s).unwrap(); assert_eq!(cid.hash_type, parsed.hash_type); assert_eq!(cid.digest, parsed.digest); } #[test] fn test_cid_display() { let data = b"Display test"; let cid = ContentId::from_content(data); let display = format!("{}", cid); assert!(display.starts_with("synor1")); } #[test] fn test_cid_sha256() { let data = b"SHA256 test"; let cid = ContentId::from_content_sha256(data); assert_eq!(cid.hash_type, HashType::Sha256); assert!(cid.verify(data)); } #[test] fn test_empty_cid() { let cid = ContentId::empty(); assert_eq!(cid.size, 0); assert!(cid.verify(&[])); } }