Complete implementation of the Synor Storage Layer (L2) for decentralized content storage. This enables permanent, censorship-resistant storage of any file type including Next.js apps, Flutter apps, and arbitrary data. Core modules: - cid.rs: Content addressing with Blake3/SHA256 hashing (synor1... format) - chunker.rs: File chunking for parallel upload/download (1MB chunks) - erasure.rs: Reed-Solomon erasure coding (10+4 shards) for fault tolerance - proof.rs: Storage proofs with Merkle trees for verification - deal.rs: Storage deals and market economics (3 pricing tiers) Infrastructure: - node/: Storage node service with P2P networking and local storage - gateway/: HTTP gateway for browser access with LRU caching - Docker deployment with nginx load balancer Architecture: - Operates as L2 alongside Synor L1 blockchain - Storage proofs verified on-chain for reward distribution - Can lose 4 shards per chunk and still recover data - Gateway URLs: /synor1<cid> for content access All 28 unit tests passing.
286 lines
8.3 KiB
Rust
286 lines
8.3 KiB
Rust
//! Erasure Coding for fault-tolerant storage
|
|
//!
|
|
//! Uses Reed-Solomon coding to add redundancy to chunks.
|
|
//! Allows recovery of data even if some shards are lost.
|
|
|
|
use reed_solomon_erasure::galois_8::ReedSolomon;
|
|
use serde::{Deserialize, Serialize};
|
|
use crate::cid::ContentId;
|
|
use crate::error::{Error, Result};
|
|
|
|
/// Default number of data shards
|
|
pub const DEFAULT_DATA_SHARDS: usize = 10;
|
|
/// Default number of parity shards
|
|
pub const DEFAULT_PARITY_SHARDS: usize = 4;
|
|
|
|
/// Erasure coding configuration
|
|
#[derive(Debug, Clone)]
|
|
pub struct ErasureConfig {
|
|
/// Number of data shards (original data pieces)
|
|
pub data_shards: usize,
|
|
/// Number of parity shards (redundancy pieces)
|
|
pub parity_shards: usize,
|
|
}
|
|
|
|
impl Default for ErasureConfig {
|
|
fn default() -> Self {
|
|
Self {
|
|
data_shards: DEFAULT_DATA_SHARDS,
|
|
parity_shards: DEFAULT_PARITY_SHARDS,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl ErasureConfig {
|
|
/// Total number of shards
|
|
pub fn total_shards(&self) -> usize {
|
|
self.data_shards + self.parity_shards
|
|
}
|
|
|
|
/// Maximum shards that can be lost while still recovering
|
|
pub fn fault_tolerance(&self) -> usize {
|
|
self.parity_shards
|
|
}
|
|
}
|
|
|
|
/// A single shard of encoded data
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
pub struct Shard {
|
|
/// Shard index (0..total_shards)
|
|
pub index: usize,
|
|
/// Whether this is a data shard (vs parity)
|
|
pub is_data: bool,
|
|
/// Shard content
|
|
#[serde(with = "serde_bytes")]
|
|
pub data: Vec<u8>,
|
|
/// Hash of shard data
|
|
pub hash: [u8; 32],
|
|
}
|
|
|
|
impl Shard {
|
|
/// Create a new shard
|
|
pub fn new(index: usize, is_data: bool, data: Vec<u8>) -> Self {
|
|
let hash = *blake3::hash(&data).as_bytes();
|
|
Self {
|
|
index,
|
|
is_data,
|
|
data,
|
|
hash,
|
|
}
|
|
}
|
|
|
|
/// Verify shard integrity
|
|
pub fn verify(&self) -> bool {
|
|
let computed = *blake3::hash(&self.data).as_bytes();
|
|
computed == self.hash
|
|
}
|
|
}
|
|
|
|
/// Erasure-coded chunk
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
pub struct EncodedChunk {
|
|
/// Original chunk CID
|
|
pub chunk_cid: ContentId,
|
|
/// Original data size (before padding)
|
|
pub original_size: usize,
|
|
/// Shard size (padded to be divisible by data_shards)
|
|
pub shard_size: usize,
|
|
/// Configuration used for encoding
|
|
pub data_shards: usize,
|
|
pub parity_shards: usize,
|
|
/// All shards
|
|
pub shards: Vec<Shard>,
|
|
}
|
|
|
|
/// Erasure encoder/decoder
|
|
pub struct ErasureCoder {
|
|
config: ErasureConfig,
|
|
rs: ReedSolomon,
|
|
}
|
|
|
|
impl ErasureCoder {
|
|
/// Create a new erasure coder with default config
|
|
pub fn new() -> Result<Self> {
|
|
Self::with_config(ErasureConfig::default())
|
|
}
|
|
|
|
/// Create a new erasure coder with custom config
|
|
pub fn with_config(config: ErasureConfig) -> Result<Self> {
|
|
let rs = ReedSolomon::new(config.data_shards, config.parity_shards)
|
|
.map_err(|e| Error::ErasureCoding(format!("Failed to create RS coder: {}", e)))?;
|
|
|
|
Ok(Self { config, rs })
|
|
}
|
|
|
|
/// Encode data into shards with parity
|
|
pub fn encode(&self, data: &[u8], chunk_cid: ContentId) -> Result<EncodedChunk> {
|
|
let original_size = data.len();
|
|
|
|
// Pad data to be divisible by data_shards
|
|
let shard_size = (data.len() + self.config.data_shards - 1) / self.config.data_shards;
|
|
let padded_size = shard_size * self.config.data_shards;
|
|
|
|
let mut padded_data = data.to_vec();
|
|
padded_data.resize(padded_size, 0);
|
|
|
|
// Split into data shards
|
|
let mut shards: Vec<Vec<u8>> = padded_data
|
|
.chunks(shard_size)
|
|
.map(|c| c.to_vec())
|
|
.collect();
|
|
|
|
// Add parity shards (initially empty)
|
|
for _ in 0..self.config.parity_shards {
|
|
shards.push(vec![0u8; shard_size]);
|
|
}
|
|
|
|
// Encode parity
|
|
self.rs.encode(&mut shards)
|
|
.map_err(|e| Error::ErasureCoding(format!("Encoding failed: {}", e)))?;
|
|
|
|
// Create shard structs
|
|
let shard_structs: Vec<Shard> = shards
|
|
.into_iter()
|
|
.enumerate()
|
|
.map(|(i, data)| {
|
|
Shard::new(i, i < self.config.data_shards, data)
|
|
})
|
|
.collect();
|
|
|
|
Ok(EncodedChunk {
|
|
chunk_cid,
|
|
original_size,
|
|
shard_size,
|
|
data_shards: self.config.data_shards,
|
|
parity_shards: self.config.parity_shards,
|
|
shards: shard_structs,
|
|
})
|
|
}
|
|
|
|
/// Decode shards back to original data
|
|
/// Some shards can be None (missing) as long as enough remain
|
|
pub fn decode(&self, encoded: &EncodedChunk) -> Result<Vec<u8>> {
|
|
let total = encoded.data_shards + encoded.parity_shards;
|
|
|
|
// Prepare shards (Some for present, None for missing)
|
|
let mut shards: Vec<Option<Vec<u8>>> = vec![None; total];
|
|
let mut present_count = 0;
|
|
|
|
for shard in &encoded.shards {
|
|
if shard.index < total && shard.verify() {
|
|
shards[shard.index] = Some(shard.data.clone());
|
|
present_count += 1;
|
|
}
|
|
}
|
|
|
|
// Check if we have enough shards
|
|
if present_count < encoded.data_shards {
|
|
return Err(Error::ErasureCoding(format!(
|
|
"Not enough shards: have {}, need {}",
|
|
present_count, encoded.data_shards
|
|
)));
|
|
}
|
|
|
|
// Reconstruct missing shards
|
|
self.rs.reconstruct(&mut shards)
|
|
.map_err(|e| Error::ErasureCoding(format!("Reconstruction failed: {}", e)))?;
|
|
|
|
// Combine data shards
|
|
let mut result = Vec::with_capacity(encoded.original_size);
|
|
for i in 0..encoded.data_shards {
|
|
if let Some(ref shard_data) = shards[i] {
|
|
result.extend_from_slice(shard_data);
|
|
} else {
|
|
return Err(Error::ErasureCoding("Reconstruction incomplete".into()));
|
|
}
|
|
}
|
|
|
|
// Trim padding
|
|
result.truncate(encoded.original_size);
|
|
|
|
Ok(result)
|
|
}
|
|
}
|
|
|
|
impl Default for ErasureCoder {
|
|
fn default() -> Self {
|
|
Self::new().expect("Default erasure config should work")
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn test_encode_decode() {
|
|
let coder = ErasureCoder::new().unwrap();
|
|
let data = b"Hello, erasure coding!";
|
|
let cid = ContentId::from_content(data);
|
|
|
|
let encoded = coder.encode(data, cid).unwrap();
|
|
assert_eq!(encoded.shards.len(), 14); // 10 data + 4 parity
|
|
|
|
let decoded = coder.decode(&encoded).unwrap();
|
|
assert_eq!(decoded, data);
|
|
}
|
|
|
|
#[test]
|
|
fn test_recovery_with_missing_shards() {
|
|
let coder = ErasureCoder::new().unwrap();
|
|
let data = b"Test data for recovery with some missing shards";
|
|
let cid = ContentId::from_content(data);
|
|
|
|
let mut encoded = coder.encode(data, cid).unwrap();
|
|
|
|
// Remove 4 shards (max we can lose with 4 parity)
|
|
encoded.shards.remove(0);
|
|
encoded.shards.remove(2);
|
|
encoded.shards.remove(4);
|
|
encoded.shards.remove(6);
|
|
|
|
let decoded = coder.decode(&encoded).unwrap();
|
|
assert_eq!(decoded, data);
|
|
}
|
|
|
|
#[test]
|
|
fn test_too_many_missing_shards() {
|
|
let coder = ErasureCoder::new().unwrap();
|
|
let data = b"Test data";
|
|
let cid = ContentId::from_content(data);
|
|
|
|
let mut encoded = coder.encode(data, cid).unwrap();
|
|
|
|
// Remove 5 shards (more than 4 parity can handle)
|
|
for _ in 0..5 {
|
|
encoded.shards.remove(0);
|
|
}
|
|
|
|
let result = coder.decode(&encoded);
|
|
assert!(result.is_err());
|
|
}
|
|
|
|
#[test]
|
|
fn test_shard_verification() {
|
|
let shard = Shard::new(0, true, b"test data".to_vec());
|
|
assert!(shard.verify());
|
|
}
|
|
|
|
#[test]
|
|
fn test_small_config() {
|
|
let config = ErasureConfig {
|
|
data_shards: 2,
|
|
parity_shards: 1,
|
|
};
|
|
let coder = ErasureCoder::with_config(config).unwrap();
|
|
|
|
let data = b"Small test";
|
|
let cid = ContentId::from_content(data);
|
|
|
|
let encoded = coder.encode(data, cid).unwrap();
|
|
assert_eq!(encoded.shards.len(), 3);
|
|
|
|
let decoded = coder.decode(&encoded).unwrap();
|
|
assert_eq!(decoded, data);
|
|
}
|
|
}
|