synor/crates/synor-storage/src/erasure.rs
Gulshan Yadav 63d2d44e75 chore: apply clippy auto-fixes to reduce warnings
- Applied clippy --fix to synor-storage (19 fixes)
- Applied clippy --fix to synor-zk (2 fixes)
- Simplified code patterns and removed redundant operations
2026-01-26 21:16:10 +05:30

286 lines
8.3 KiB
Rust

//! Erasure Coding for fault-tolerant storage
//!
//! Uses Reed-Solomon coding to add redundancy to chunks.
//! Allows recovery of data even if some shards are lost.
use reed_solomon_erasure::galois_8::ReedSolomon;
use serde::{Deserialize, Serialize};
use crate::cid::ContentId;
use crate::error::{Error, Result};
/// Default number of data shards
pub const DEFAULT_DATA_SHARDS: usize = 10;
/// Default number of parity shards
pub const DEFAULT_PARITY_SHARDS: usize = 4;
/// Erasure coding configuration
#[derive(Debug, Clone)]
pub struct ErasureConfig {
/// Number of data shards (original data pieces)
pub data_shards: usize,
/// Number of parity shards (redundancy pieces)
pub parity_shards: usize,
}
impl Default for ErasureConfig {
fn default() -> Self {
Self {
data_shards: DEFAULT_DATA_SHARDS,
parity_shards: DEFAULT_PARITY_SHARDS,
}
}
}
impl ErasureConfig {
/// Total number of shards
pub fn total_shards(&self) -> usize {
self.data_shards + self.parity_shards
}
/// Maximum shards that can be lost while still recovering
pub fn fault_tolerance(&self) -> usize {
self.parity_shards
}
}
/// A single shard of encoded data
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Shard {
/// Shard index (0..total_shards)
pub index: usize,
/// Whether this is a data shard (vs parity)
pub is_data: bool,
/// Shard content
#[serde(with = "serde_bytes")]
pub data: Vec<u8>,
/// Hash of shard data
pub hash: [u8; 32],
}
impl Shard {
/// Create a new shard
pub fn new(index: usize, is_data: bool, data: Vec<u8>) -> Self {
let hash = *blake3::hash(&data).as_bytes();
Self {
index,
is_data,
data,
hash,
}
}
/// Verify shard integrity
pub fn verify(&self) -> bool {
let computed = *blake3::hash(&self.data).as_bytes();
computed == self.hash
}
}
/// Erasure-coded chunk
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EncodedChunk {
/// Original chunk CID
pub chunk_cid: ContentId,
/// Original data size (before padding)
pub original_size: usize,
/// Shard size (padded to be divisible by data_shards)
pub shard_size: usize,
/// Configuration used for encoding
pub data_shards: usize,
pub parity_shards: usize,
/// All shards
pub shards: Vec<Shard>,
}
/// Erasure encoder/decoder
pub struct ErasureCoder {
config: ErasureConfig,
rs: ReedSolomon,
}
impl ErasureCoder {
/// Create a new erasure coder with default config
pub fn new() -> Result<Self> {
Self::with_config(ErasureConfig::default())
}
/// Create a new erasure coder with custom config
pub fn with_config(config: ErasureConfig) -> Result<Self> {
let rs = ReedSolomon::new(config.data_shards, config.parity_shards)
.map_err(|e| Error::ErasureCoding(format!("Failed to create RS coder: {}", e)))?;
Ok(Self { config, rs })
}
/// Encode data into shards with parity
pub fn encode(&self, data: &[u8], chunk_cid: ContentId) -> Result<EncodedChunk> {
let original_size = data.len();
// Pad data to be divisible by data_shards
let shard_size = data.len().div_ceil(self.config.data_shards);
let padded_size = shard_size * self.config.data_shards;
let mut padded_data = data.to_vec();
padded_data.resize(padded_size, 0);
// Split into data shards
let mut shards: Vec<Vec<u8>> = padded_data
.chunks(shard_size)
.map(|c| c.to_vec())
.collect();
// Add parity shards (initially empty)
for _ in 0..self.config.parity_shards {
shards.push(vec![0u8; shard_size]);
}
// Encode parity
self.rs.encode(&mut shards)
.map_err(|e| Error::ErasureCoding(format!("Encoding failed: {}", e)))?;
// Create shard structs
let shard_structs: Vec<Shard> = shards
.into_iter()
.enumerate()
.map(|(i, data)| {
Shard::new(i, i < self.config.data_shards, data)
})
.collect();
Ok(EncodedChunk {
chunk_cid,
original_size,
shard_size,
data_shards: self.config.data_shards,
parity_shards: self.config.parity_shards,
shards: shard_structs,
})
}
/// Decode shards back to original data
/// Some shards can be None (missing) as long as enough remain
pub fn decode(&self, encoded: &EncodedChunk) -> Result<Vec<u8>> {
let total = encoded.data_shards + encoded.parity_shards;
// Prepare shards (Some for present, None for missing)
let mut shards: Vec<Option<Vec<u8>>> = vec![None; total];
let mut present_count = 0;
for shard in &encoded.shards {
if shard.index < total && shard.verify() {
shards[shard.index] = Some(shard.data.clone());
present_count += 1;
}
}
// Check if we have enough shards
if present_count < encoded.data_shards {
return Err(Error::ErasureCoding(format!(
"Not enough shards: have {}, need {}",
present_count, encoded.data_shards
)));
}
// Reconstruct missing shards
self.rs.reconstruct(&mut shards)
.map_err(|e| Error::ErasureCoding(format!("Reconstruction failed: {}", e)))?;
// Combine data shards
let mut result = Vec::with_capacity(encoded.original_size);
for i in 0..encoded.data_shards {
if let Some(ref shard_data) = shards[i] {
result.extend_from_slice(shard_data);
} else {
return Err(Error::ErasureCoding("Reconstruction incomplete".into()));
}
}
// Trim padding
result.truncate(encoded.original_size);
Ok(result)
}
}
impl Default for ErasureCoder {
fn default() -> Self {
Self::new().expect("Default erasure config should work")
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_encode_decode() {
let coder = ErasureCoder::new().unwrap();
let data = b"Hello, erasure coding!";
let cid = ContentId::from_content(data);
let encoded = coder.encode(data, cid).unwrap();
assert_eq!(encoded.shards.len(), 14); // 10 data + 4 parity
let decoded = coder.decode(&encoded).unwrap();
assert_eq!(decoded, data);
}
#[test]
fn test_recovery_with_missing_shards() {
let coder = ErasureCoder::new().unwrap();
let data = b"Test data for recovery with some missing shards";
let cid = ContentId::from_content(data);
let mut encoded = coder.encode(data, cid).unwrap();
// Remove 4 shards (max we can lose with 4 parity)
encoded.shards.remove(0);
encoded.shards.remove(2);
encoded.shards.remove(4);
encoded.shards.remove(6);
let decoded = coder.decode(&encoded).unwrap();
assert_eq!(decoded, data);
}
#[test]
fn test_too_many_missing_shards() {
let coder = ErasureCoder::new().unwrap();
let data = b"Test data";
let cid = ContentId::from_content(data);
let mut encoded = coder.encode(data, cid).unwrap();
// Remove 5 shards (more than 4 parity can handle)
for _ in 0..5 {
encoded.shards.remove(0);
}
let result = coder.decode(&encoded);
assert!(result.is_err());
}
#[test]
fn test_shard_verification() {
let shard = Shard::new(0, true, b"test data".to_vec());
assert!(shard.verify());
}
#[test]
fn test_small_config() {
let config = ErasureConfig {
data_shards: 2,
parity_shards: 1,
};
let coder = ErasureCoder::with_config(config).unwrap();
let data = b"Small test";
let cid = ContentId::from_content(data);
let encoded = coder.encode(data, cid).unwrap();
assert_eq!(encoded.shards.len(), 3);
let decoded = coder.decode(&encoded).unwrap();
assert_eq!(decoded, data);
}
}