//! Bytecode Compression and Deduplication. //! //! Optimizes contract storage through: //! //! - **Zstd Compression**: 40-70% size reduction for WASM bytecode //! - **Code Deduplication**: Hash-based fragment sharing across contracts //! - **Delta Encoding**: Efficient storage of contract upgrades //! //! # Benefits //! //! - Reduced storage costs for deployers //! - Faster network sync (smaller blocks) //! - Lower memory footprint for node operators //! - Efficient contract upgrade storage use std::collections::HashMap; use std::sync::Arc; use parking_lot::RwLock; use crate::{ContractId, VmError}; /// Compression level for bytecode storage. #[derive(Clone, Copy, Debug, Default, PartialEq, Eq)] pub enum CompressionLevel { /// No compression. None, /// Fast compression (level 1). Fast, /// Balanced compression (level 3). #[default] Balanced, /// Maximum compression (level 19). Maximum, } impl CompressionLevel { /// Returns the zstd compression level. fn zstd_level(&self) -> i32 { match self { CompressionLevel::None => 0, CompressionLevel::Fast => 1, CompressionLevel::Balanced => 3, CompressionLevel::Maximum => 19, } } } /// Configuration for bytecode compression. #[derive(Clone, Debug)] pub struct CompressionConfig { /// Compression level. pub level: CompressionLevel, /// Enable deduplication. pub deduplication: bool, /// Minimum fragment size for deduplication (bytes). pub min_fragment_size: usize, /// Maximum fragment size (bytes). pub max_fragment_size: usize, /// Enable delta encoding for upgrades. pub delta_encoding: bool, } impl Default for CompressionConfig { fn default() -> Self { Self { level: CompressionLevel::Balanced, deduplication: true, min_fragment_size: 256, max_fragment_size: 16384, delta_encoding: true, } } } /// Compressed bytecode with metadata. #[derive(Clone, Debug)] pub struct CompressedBytecode { /// Original size before compression. pub original_size: usize, /// Compressed data. pub data: Vec, /// Compression ratio (compressed/original). pub ratio: f64, /// Content hash for deduplication. pub hash: [u8; 32], /// Fragment references (if deduplicated). pub fragments: Vec, } /// Reference to a shared code fragment. #[derive(Clone, Debug)] pub struct FragmentRef { /// Fragment hash. pub hash: [u8; 32], /// Offset in the original bytecode. pub offset: usize, /// Length of the fragment. pub length: usize, } /// A shared code fragment. #[derive(Clone, Debug)] pub struct CodeFragment { /// Fragment hash. pub hash: [u8; 32], /// Fragment data. pub data: Vec, /// Reference count. pub ref_count: u32, } /// Statistics for compression operations. #[derive(Clone, Debug, Default)] pub struct CompressionStats { /// Total bytes before compression. pub total_original: u64, /// Total bytes after compression. pub total_compressed: u64, /// Number of contracts compressed. pub contracts_compressed: u64, /// Number of shared fragments. pub shared_fragments: u64, /// Bytes saved through deduplication. pub dedup_savings: u64, } impl CompressionStats { /// Returns the overall compression ratio. pub fn compression_ratio(&self) -> f64 { if self.total_original == 0 { 1.0 } else { self.total_compressed as f64 / self.total_original as f64 } } /// Returns total bytes saved. pub fn bytes_saved(&self) -> u64 { self.total_original.saturating_sub(self.total_compressed) + self.dedup_savings } } /// The bytecode compressor. pub struct BytecodeCompressor { /// Configuration. config: CompressionConfig, /// Shared fragment store. fragments: RwLock>>, /// Statistics. stats: RwLock, } impl BytecodeCompressor { /// Creates a new compressor with default config. pub fn new() -> Self { Self::with_config(CompressionConfig::default()) } /// Creates with custom configuration. pub fn with_config(config: CompressionConfig) -> Self { Self { config, fragments: RwLock::new(HashMap::new()), stats: RwLock::new(CompressionStats::default()), } } /// Compresses bytecode. pub fn compress(&self, bytecode: &[u8]) -> Result { let original_size = bytecode.len(); let hash = blake3::hash(bytecode).into(); // Compress with zstd let compressed = if self.config.level != CompressionLevel::None { zstd::encode_all(bytecode, self.config.level.zstd_level()) .map_err(|e| VmError::ExecutionError(format!("Compression failed: {}", e)))? } else { bytecode.to_vec() }; let ratio = compressed.len() as f64 / original_size as f64; // Extract fragments for deduplication let fragments = if self.config.deduplication { self.extract_fragments(bytecode) } else { Vec::new() }; // Update stats { let mut stats = self.stats.write(); stats.total_original += original_size as u64; stats.total_compressed += compressed.len() as u64; stats.contracts_compressed += 1; } Ok(CompressedBytecode { original_size, data: compressed, ratio, hash, fragments, }) } /// Decompresses bytecode. pub fn decompress(&self, compressed: &CompressedBytecode) -> Result, VmError> { if self.config.level == CompressionLevel::None { return Ok(compressed.data.clone()); } zstd::decode_all(compressed.data.as_slice()) .map_err(|e| VmError::ExecutionError(format!("Decompression failed: {}", e))) } /// Extracts reusable fragments from bytecode. fn extract_fragments(&self, bytecode: &[u8]) -> Vec { let mut fragments = Vec::new(); // Use content-defined chunking (simplified rolling hash) let min_size = self.config.min_fragment_size; let max_size = self.config.max_fragment_size; if bytecode.len() < min_size { return fragments; } let mut offset = 0; while offset < bytecode.len() { let end = (offset + max_size).min(bytecode.len()); let chunk_end = self.find_chunk_boundary(&bytecode[offset..end], min_size); let chunk = &bytecode[offset..offset + chunk_end]; let hash: [u8; 32] = blake3::hash(chunk).into(); // Store fragment if new, otherwise increment ref count let mut frags = self.fragments.write(); use std::collections::hash_map::Entry; match frags.entry(hash) { Entry::Vacant(e) => { e.insert(Arc::new(CodeFragment { hash, data: chunk.to_vec(), ref_count: 1, })); } Entry::Occupied(mut e) => { // Increment ref count for existing fragment if let Some(inner) = Arc::get_mut(e.get_mut()) { inner.ref_count += 1; } } } fragments.push(FragmentRef { hash, offset, length: chunk_end, }); offset += chunk_end; } // Update stats { let mut stats = self.stats.write(); stats.shared_fragments = self.fragments.read().len() as u64; } fragments } /// Finds a chunk boundary using a simplified rolling hash. fn find_chunk_boundary(&self, data: &[u8], min_size: usize) -> usize { if data.len() <= min_size { return data.len(); } // Use a simple pattern: look for WASM section boundaries // WASM sections start with a single byte ID followed by LEB128 length for i in min_size..data.len() { // Common WASM section markers if i > 0 && matches!(data[i - 1], 0x00 | 0x01 | 0x02 | 0x03 | 0x07 | 0x0A) { // Likely a section boundary return i; } } data.len() } /// Computes delta between two bytecode versions. pub fn compute_delta(&self, old: &[u8], new: &[u8]) -> Result { if !self.config.delta_encoding { return Err(VmError::ExecutionError( "Delta encoding disabled".to_string(), )); } // Simple delta: find common prefix/suffix let prefix_len = old .iter() .zip(new.iter()) .take_while(|(a, b)| a == b) .count(); let suffix_len = old .iter() .rev() .zip(new.iter().rev()) .take_while(|(a, b)| a == b) .count(); // Ensure suffix doesn't overlap with prefix let suffix_len = suffix_len.min(old.len().saturating_sub(prefix_len)); let suffix_len = suffix_len.min(new.len().saturating_sub(prefix_len)); let changed_start = prefix_len; let old_changed_end = old.len() - suffix_len; let new_changed_end = new.len() - suffix_len; let replacement = new[changed_start..new_changed_end].to_vec(); let old_hash: [u8; 32] = blake3::hash(old).into(); let new_hash: [u8; 32] = blake3::hash(new).into(); Ok(DeltaPatch { old_hash, new_hash, prefix_len, suffix_len, removed_len: old_changed_end - changed_start, replacement, }) } /// Applies a delta patch to reconstruct new bytecode. pub fn apply_delta(&self, old: &[u8], patch: &DeltaPatch) -> Result, VmError> { // Verify old hash let old_hash: [u8; 32] = blake3::hash(old).into(); if old_hash != patch.old_hash { return Err(VmError::ExecutionError( "Delta patch: old hash mismatch".to_string(), )); } let prefix = &old[..patch.prefix_len]; let suffix_start = old.len() - patch.suffix_len; let suffix = &old[suffix_start..]; let mut new = Vec::with_capacity(prefix.len() + patch.replacement.len() + suffix.len()); new.extend_from_slice(prefix); new.extend_from_slice(&patch.replacement); new.extend_from_slice(suffix); // Verify new hash let new_hash: [u8; 32] = blake3::hash(&new).into(); if new_hash != patch.new_hash { return Err(VmError::ExecutionError( "Delta patch: new hash mismatch".to_string(), )); } Ok(new) } /// Returns compression statistics. pub fn stats(&self) -> CompressionStats { self.stats.read().clone() } /// Returns number of shared fragments. pub fn fragment_count(&self) -> usize { self.fragments.read().len() } /// Clears fragment cache. pub fn clear_fragments(&self) { self.fragments.write().clear(); self.stats.write().shared_fragments = 0; } } impl Default for BytecodeCompressor { fn default() -> Self { Self::new() } } /// A delta patch between two bytecode versions. #[derive(Clone, Debug)] pub struct DeltaPatch { /// Hash of old bytecode. pub old_hash: [u8; 32], /// Hash of new bytecode. pub new_hash: [u8; 32], /// Length of unchanged prefix. pub prefix_len: usize, /// Length of unchanged suffix. pub suffix_len: usize, /// Length of removed section. pub removed_len: usize, /// Replacement data. pub replacement: Vec, } impl DeltaPatch { /// Returns the size of the patch. pub fn size(&self) -> usize { 64 + 24 + self.replacement.len() // hashes + lengths + data } /// Returns estimated savings compared to full bytecode. pub fn savings(&self, new_size: usize) -> usize { new_size.saturating_sub(self.size()) } } /// Compressed contract storage. pub struct CompressedContractStore { /// Compressor instance. compressor: BytecodeCompressor, /// Stored contracts. contracts: RwLock>, /// Delta patches for upgrades. patches: RwLock>, } impl CompressedContractStore { /// Creates a new store. pub fn new() -> Self { Self::with_config(CompressionConfig::default()) } /// Creates with custom configuration. pub fn with_config(config: CompressionConfig) -> Self { Self { compressor: BytecodeCompressor::with_config(config), contracts: RwLock::new(HashMap::new()), patches: RwLock::new(HashMap::new()), } } /// Stores a contract. pub fn store(&self, id: ContractId, bytecode: &[u8]) -> Result { let compressed = self.compressor.compress(bytecode)?; self.contracts.write().insert(id, compressed.clone()); Ok(compressed) } /// Retrieves a contract. pub fn get(&self, id: &ContractId) -> Result, VmError> { let contracts = self.contracts.read(); let compressed = contracts .get(id) .ok_or(VmError::ContractNotFound(*id))?; self.compressor.decompress(compressed) } /// Stores an upgrade as a delta. pub fn store_upgrade( &self, old_id: ContractId, new_id: ContractId, new_bytecode: &[u8], ) -> Result<(), VmError> { let old_bytecode = self.get(&old_id)?; let patch = self.compressor.compute_delta(&old_bytecode, new_bytecode)?; // Store patch self.patches.write().insert((old_id, new_id), patch); // Also store full compressed for direct access self.store(new_id, new_bytecode)?; Ok(()) } /// Returns store statistics. pub fn stats(&self) -> CompressionStats { self.compressor.stats() } /// Returns number of stored contracts. pub fn contract_count(&self) -> usize { self.contracts.read().len() } } impl Default for CompressedContractStore { fn default() -> Self { Self::new() } } #[cfg(test)] mod tests { use super::*; fn make_contract(id: u8) -> ContractId { ContractId::from_bytes([id; 32]) } fn sample_wasm() -> Vec { // Minimal WASM with some repeated content let mut wasm = vec![ 0x00, 0x61, 0x73, 0x6D, // WASM magic 0x01, 0x00, 0x00, 0x00, // Version 1 ]; // Add some padding to make it compressible wasm.extend(vec![0x00; 1000]); wasm } #[test] fn test_compression() { let compressor = BytecodeCompressor::new(); let wasm = sample_wasm(); let compressed = compressor.compress(&wasm).unwrap(); assert!(compressed.ratio < 1.0); // Should be smaller assert_eq!(compressed.original_size, wasm.len()); } #[test] fn test_roundtrip() { let compressor = BytecodeCompressor::new(); let wasm = sample_wasm(); let compressed = compressor.compress(&wasm).unwrap(); let decompressed = compressor.decompress(&compressed).unwrap(); assert_eq!(wasm, decompressed); } #[test] fn test_no_compression() { let config = CompressionConfig { level: CompressionLevel::None, ..Default::default() }; let compressor = BytecodeCompressor::with_config(config); let wasm = sample_wasm(); let compressed = compressor.compress(&wasm).unwrap(); assert_eq!(compressed.data.len(), wasm.len()); assert_eq!(compressed.ratio, 1.0); } #[test] fn test_delta_patch() { let compressor = BytecodeCompressor::new(); let old = b"Hello, World!"; let new = b"Hello, Rust!"; let patch = compressor.compute_delta(old, new).unwrap(); let reconstructed = compressor.apply_delta(old, &patch).unwrap(); assert_eq!(new.as_slice(), reconstructed.as_slice()); } #[test] fn test_delta_savings() { let compressor = BytecodeCompressor::new(); // Large similar bytecodes let mut old = vec![0u8; 10000]; old[0..4].copy_from_slice(&[0x00, 0x61, 0x73, 0x6D]); let mut new = old.clone(); new[5000..5010].copy_from_slice(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]); let patch = compressor.compute_delta(&old, &new).unwrap(); // Patch should be much smaller than full bytecode assert!(patch.size() < new.len() / 2); } #[test] fn test_store() { let store = CompressedContractStore::new(); let id = make_contract(1); let wasm = sample_wasm(); store.store(id, &wasm).unwrap(); let retrieved = store.get(&id).unwrap(); assert_eq!(wasm, retrieved); } #[test] fn test_stats() { let compressor = BytecodeCompressor::new(); let wasm = sample_wasm(); compressor.compress(&wasm).unwrap(); let stats = compressor.stats(); assert_eq!(stats.contracts_compressed, 1); assert!(stats.compression_ratio() < 1.0); } #[test] fn test_compression_levels() { let wasm = sample_wasm(); let configs = [ CompressionLevel::Fast, CompressionLevel::Balanced, CompressionLevel::Maximum, ]; let mut sizes = Vec::new(); for level in configs { let config = CompressionConfig { level, deduplication: false, ..Default::default() }; let compressor = BytecodeCompressor::with_config(config); let compressed = compressor.compress(&wasm).unwrap(); sizes.push(compressed.data.len()); } // Maximum should be smallest (or equal) assert!(sizes[2] <= sizes[0]); } }