//! Erasure Coding for fault-tolerant storage //! //! Uses Reed-Solomon coding to add redundancy to chunks. //! Allows recovery of data even if some shards are lost. use reed_solomon_erasure::galois_8::ReedSolomon; use serde::{Deserialize, Serialize}; use crate::cid::ContentId; use crate::error::{Error, Result}; /// Default number of data shards pub const DEFAULT_DATA_SHARDS: usize = 10; /// Default number of parity shards pub const DEFAULT_PARITY_SHARDS: usize = 4; /// Erasure coding configuration #[derive(Debug, Clone)] pub struct ErasureConfig { /// Number of data shards (original data pieces) pub data_shards: usize, /// Number of parity shards (redundancy pieces) pub parity_shards: usize, } impl Default for ErasureConfig { fn default() -> Self { Self { data_shards: DEFAULT_DATA_SHARDS, parity_shards: DEFAULT_PARITY_SHARDS, } } } impl ErasureConfig { /// Total number of shards pub fn total_shards(&self) -> usize { self.data_shards + self.parity_shards } /// Maximum shards that can be lost while still recovering pub fn fault_tolerance(&self) -> usize { self.parity_shards } } /// A single shard of encoded data #[derive(Debug, Clone, Serialize, Deserialize)] pub struct Shard { /// Shard index (0..total_shards) pub index: usize, /// Whether this is a data shard (vs parity) pub is_data: bool, /// Shard content #[serde(with = "serde_bytes")] pub data: Vec, /// Hash of shard data pub hash: [u8; 32], } impl Shard { /// Create a new shard pub fn new(index: usize, is_data: bool, data: Vec) -> Self { let hash = *blake3::hash(&data).as_bytes(); Self { index, is_data, data, hash, } } /// Verify shard integrity pub fn verify(&self) -> bool { let computed = *blake3::hash(&self.data).as_bytes(); computed == self.hash } } /// Erasure-coded chunk #[derive(Debug, Clone, Serialize, Deserialize)] pub struct EncodedChunk { /// Original chunk CID pub chunk_cid: ContentId, /// Original data size (before padding) pub original_size: usize, /// Shard size (padded to be divisible by data_shards) pub shard_size: usize, /// Configuration used for encoding pub data_shards: usize, pub parity_shards: usize, /// All shards pub shards: Vec, } /// Erasure encoder/decoder pub struct ErasureCoder { config: ErasureConfig, rs: ReedSolomon, } impl ErasureCoder { /// Create a new erasure coder with default config pub fn new() -> Result { Self::with_config(ErasureConfig::default()) } /// Create a new erasure coder with custom config pub fn with_config(config: ErasureConfig) -> Result { let rs = ReedSolomon::new(config.data_shards, config.parity_shards) .map_err(|e| Error::ErasureCoding(format!("Failed to create RS coder: {}", e)))?; Ok(Self { config, rs }) } /// Encode data into shards with parity pub fn encode(&self, data: &[u8], chunk_cid: ContentId) -> Result { let original_size = data.len(); // Pad data to be divisible by data_shards let shard_size = (data.len() + self.config.data_shards - 1) / self.config.data_shards; let padded_size = shard_size * self.config.data_shards; let mut padded_data = data.to_vec(); padded_data.resize(padded_size, 0); // Split into data shards let mut shards: Vec> = padded_data .chunks(shard_size) .map(|c| c.to_vec()) .collect(); // Add parity shards (initially empty) for _ in 0..self.config.parity_shards { shards.push(vec![0u8; shard_size]); } // Encode parity self.rs.encode(&mut shards) .map_err(|e| Error::ErasureCoding(format!("Encoding failed: {}", e)))?; // Create shard structs let shard_structs: Vec = shards .into_iter() .enumerate() .map(|(i, data)| { Shard::new(i, i < self.config.data_shards, data) }) .collect(); Ok(EncodedChunk { chunk_cid, original_size, shard_size, data_shards: self.config.data_shards, parity_shards: self.config.parity_shards, shards: shard_structs, }) } /// Decode shards back to original data /// Some shards can be None (missing) as long as enough remain pub fn decode(&self, encoded: &EncodedChunk) -> Result> { let total = encoded.data_shards + encoded.parity_shards; // Prepare shards (Some for present, None for missing) let mut shards: Vec>> = vec![None; total]; let mut present_count = 0; for shard in &encoded.shards { if shard.index < total && shard.verify() { shards[shard.index] = Some(shard.data.clone()); present_count += 1; } } // Check if we have enough shards if present_count < encoded.data_shards { return Err(Error::ErasureCoding(format!( "Not enough shards: have {}, need {}", present_count, encoded.data_shards ))); } // Reconstruct missing shards self.rs.reconstruct(&mut shards) .map_err(|e| Error::ErasureCoding(format!("Reconstruction failed: {}", e)))?; // Combine data shards let mut result = Vec::with_capacity(encoded.original_size); for i in 0..encoded.data_shards { if let Some(ref shard_data) = shards[i] { result.extend_from_slice(shard_data); } else { return Err(Error::ErasureCoding("Reconstruction incomplete".into())); } } // Trim padding result.truncate(encoded.original_size); Ok(result) } } impl Default for ErasureCoder { fn default() -> Self { Self::new().expect("Default erasure config should work") } } #[cfg(test)] mod tests { use super::*; #[test] fn test_encode_decode() { let coder = ErasureCoder::new().unwrap(); let data = b"Hello, erasure coding!"; let cid = ContentId::from_content(data); let encoded = coder.encode(data, cid).unwrap(); assert_eq!(encoded.shards.len(), 14); // 10 data + 4 parity let decoded = coder.decode(&encoded).unwrap(); assert_eq!(decoded, data); } #[test] fn test_recovery_with_missing_shards() { let coder = ErasureCoder::new().unwrap(); let data = b"Test data for recovery with some missing shards"; let cid = ContentId::from_content(data); let mut encoded = coder.encode(data, cid).unwrap(); // Remove 4 shards (max we can lose with 4 parity) encoded.shards.remove(0); encoded.shards.remove(2); encoded.shards.remove(4); encoded.shards.remove(6); let decoded = coder.decode(&encoded).unwrap(); assert_eq!(decoded, data); } #[test] fn test_too_many_missing_shards() { let coder = ErasureCoder::new().unwrap(); let data = b"Test data"; let cid = ContentId::from_content(data); let mut encoded = coder.encode(data, cid).unwrap(); // Remove 5 shards (more than 4 parity can handle) for _ in 0..5 { encoded.shards.remove(0); } let result = coder.decode(&encoded); assert!(result.is_err()); } #[test] fn test_shard_verification() { let shard = Shard::new(0, true, b"test data".to_vec()); assert!(shard.verify()); } #[test] fn test_small_config() { let config = ErasureConfig { data_shards: 2, parity_shards: 1, }; let coder = ErasureCoder::with_config(config).unwrap(); let data = b"Small test"; let cid = ContentId::from_content(data); let encoded = coder.encode(data, cid).unwrap(); assert_eq!(encoded.shards.len(), 3); let decoded = coder.decode(&encoded).unwrap(); assert_eq!(decoded, data); } }