synor/crates/synor-storage/src/chunker.rs
Gulshan Yadav f5bdef2691 feat(storage): add Synor Storage L2 decentralized storage layer
Complete implementation of the Synor Storage Layer (L2) for decentralized
content storage. This enables permanent, censorship-resistant storage of
any file type including Next.js apps, Flutter apps, and arbitrary data.

Core modules:
- cid.rs: Content addressing with Blake3/SHA256 hashing (synor1... format)
- chunker.rs: File chunking for parallel upload/download (1MB chunks)
- erasure.rs: Reed-Solomon erasure coding (10+4 shards) for fault tolerance
- proof.rs: Storage proofs with Merkle trees for verification
- deal.rs: Storage deals and market economics (3 pricing tiers)

Infrastructure:
- node/: Storage node service with P2P networking and local storage
- gateway/: HTTP gateway for browser access with LRU caching
- Docker deployment with nginx load balancer

Architecture:
- Operates as L2 alongside Synor L1 blockchain
- Storage proofs verified on-chain for reward distribution
- Can lose 4 shards per chunk and still recover data
- Gateway URLs: /synor1<cid> for content access

All 28 unit tests passing.
2026-01-10 11:42:03 +05:30

282 lines
7.5 KiB
Rust

//! File chunking for large file storage
//!
//! Files are split into fixed-size chunks for:
//! - Parallel upload/download
//! - Efficient deduplication
//! - Erasure coding application
use crate::cid::ContentId;
use serde::{Deserialize, Serialize};
/// Default chunk size: 1 MB
pub const DEFAULT_CHUNK_SIZE: usize = 1024 * 1024;
/// A chunk of a file
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Chunk {
/// Chunk index within the file
pub index: u32,
/// Content ID of this chunk
pub cid: ContentId,
/// Chunk data
#[serde(with = "serde_bytes")]
pub data: Vec<u8>,
/// Offset in the original file
pub offset: u64,
}
impl Chunk {
/// Create a new chunk
pub fn new(index: u32, data: Vec<u8>, offset: u64) -> Self {
let cid = ContentId::from_content(&data);
Self {
index,
cid,
data,
offset,
}
}
/// Verify chunk integrity
pub fn verify(&self) -> bool {
self.cid.verify(&self.data)
}
/// Get chunk size
pub fn size(&self) -> usize {
self.data.len()
}
}
/// Chunker configuration
#[derive(Debug, Clone)]
pub struct ChunkerConfig {
/// Size of each chunk in bytes
pub chunk_size: usize,
}
impl Default for ChunkerConfig {
fn default() -> Self {
Self {
chunk_size: DEFAULT_CHUNK_SIZE,
}
}
}
/// File chunker - splits files into chunks
pub struct Chunker {
config: ChunkerConfig,
}
impl Chunker {
/// Create a new chunker with default config
pub fn new() -> Self {
Self {
config: ChunkerConfig::default(),
}
}
/// Create a new chunker with custom config
pub fn with_config(config: ChunkerConfig) -> Self {
Self { config }
}
/// Split data into chunks
pub fn chunk(&self, data: &[u8]) -> Vec<Chunk> {
let mut chunks = Vec::new();
let mut offset = 0u64;
let mut index = 0u32;
for chunk_data in data.chunks(self.config.chunk_size) {
chunks.push(Chunk::new(
index,
chunk_data.to_vec(),
offset,
));
offset += chunk_data.len() as u64;
index += 1;
}
chunks
}
/// Reassemble chunks into original data
pub fn reassemble(&self, chunks: &[Chunk]) -> Result<Vec<u8>, ReassembleError> {
if chunks.is_empty() {
return Ok(Vec::new());
}
// Sort by index
let mut sorted: Vec<_> = chunks.iter().collect();
sorted.sort_by_key(|c| c.index);
// Verify indices are contiguous
for (i, chunk) in sorted.iter().enumerate() {
if chunk.index != i as u32 {
return Err(ReassembleError::MissingChunk(i as u32));
}
}
// Verify each chunk
for chunk in &sorted {
if !chunk.verify() {
return Err(ReassembleError::InvalidChunk(chunk.index));
}
}
// Combine data
let total_size: usize = sorted.iter().map(|c| c.data.len()).sum();
let mut result = Vec::with_capacity(total_size);
for chunk in sorted {
result.extend_from_slice(&chunk.data);
}
Ok(result)
}
/// Get the number of chunks for a given file size
pub fn chunk_count(&self, file_size: u64) -> u32 {
let size = file_size as usize;
let full_chunks = size / self.config.chunk_size;
let has_remainder = size % self.config.chunk_size != 0;
(full_chunks + if has_remainder { 1 } else { 0 }) as u32
}
}
impl Default for Chunker {
fn default() -> Self {
Self::new()
}
}
/// Errors during chunk reassembly
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum ReassembleError {
/// A chunk is missing from the sequence
MissingChunk(u32),
/// A chunk failed integrity verification
InvalidChunk(u32),
}
impl std::error::Error for ReassembleError {}
impl std::fmt::Display for ReassembleError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::MissingChunk(i) => write!(f, "Missing chunk at index {}", i),
Self::InvalidChunk(i) => write!(f, "Chunk {} failed verification", i),
}
}
}
/// Metadata about a chunked file
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ChunkedFile {
/// CID of the complete file
pub cid: ContentId,
/// Total file size
pub size: u64,
/// Number of chunks
pub chunk_count: u32,
/// Size of each chunk (except possibly last)
pub chunk_size: usize,
/// CIDs of each chunk in order
pub chunk_cids: Vec<ContentId>,
}
impl ChunkedFile {
/// Create metadata from chunks
pub fn from_chunks(chunks: &[Chunk], original_cid: ContentId) -> Self {
Self {
cid: original_cid,
size: chunks.iter().map(|c| c.data.len() as u64).sum(),
chunk_count: chunks.len() as u32,
chunk_size: if chunks.is_empty() { 0 } else { chunks[0].data.len() },
chunk_cids: chunks.iter().map(|c| c.cid.clone()).collect(),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_chunk_small_file() {
let chunker = Chunker::new();
let data = b"Small file that fits in one chunk";
let chunks = chunker.chunk(data);
assert_eq!(chunks.len(), 1);
assert_eq!(chunks[0].data, data);
assert!(chunks[0].verify());
}
#[test]
fn test_chunk_large_file() {
let config = ChunkerConfig { chunk_size: 10 };
let chunker = Chunker::with_config(config);
let data = b"This is a longer file that will be split into chunks";
let chunks = chunker.chunk(data);
assert!(chunks.len() > 1);
// Verify all chunks
for chunk in &chunks {
assert!(chunk.verify());
}
}
#[test]
fn test_reassemble() {
let config = ChunkerConfig { chunk_size: 10 };
let chunker = Chunker::with_config(config);
let original = b"This is a test file for chunking and reassembly";
let chunks = chunker.chunk(original);
let reassembled = chunker.reassemble(&chunks).unwrap();
assert_eq!(reassembled, original);
}
#[test]
fn test_reassemble_missing_chunk() {
let config = ChunkerConfig { chunk_size: 10 };
let chunker = Chunker::with_config(config);
let data = b"Test data for missing chunk test case here";
let mut chunks = chunker.chunk(data);
// Remove middle chunk
chunks.remove(1);
let result = chunker.reassemble(&chunks);
assert!(matches!(result, Err(ReassembleError::MissingChunk(_))));
}
#[test]
fn test_chunk_count() {
let config = ChunkerConfig { chunk_size: 100 };
let chunker = Chunker::with_config(config);
assert_eq!(chunker.chunk_count(0), 0);
assert_eq!(chunker.chunk_count(50), 1);
assert_eq!(chunker.chunk_count(100), 1);
assert_eq!(chunker.chunk_count(101), 2);
assert_eq!(chunker.chunk_count(250), 3);
}
#[test]
fn test_empty_file() {
let chunker = Chunker::new();
let chunks = chunker.chunk(&[]);
assert!(chunks.is_empty());
let reassembled = chunker.reassemble(&chunks).unwrap();
assert!(reassembled.is_empty());
}
}