synor/crates/synor-vm/src/compression.rs
2026-02-02 05:58:22 +05:30

630 lines
18 KiB
Rust

//! Bytecode Compression and Deduplication.
//!
//! Optimizes contract storage through:
//!
//! - **Zstd Compression**: 40-70% size reduction for WASM bytecode
//! - **Code Deduplication**: Hash-based fragment sharing across contracts
//! - **Delta Encoding**: Efficient storage of contract upgrades
//!
//! # Benefits
//!
//! - Reduced storage costs for deployers
//! - Faster network sync (smaller blocks)
//! - Lower memory footprint for node operators
//! - Efficient contract upgrade storage
use std::collections::HashMap;
use std::sync::Arc;
use parking_lot::RwLock;
use crate::{ContractId, VmError};
/// Compression level for bytecode storage.
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
pub enum CompressionLevel {
/// No compression.
None,
/// Fast compression (level 1).
Fast,
/// Balanced compression (level 3).
#[default]
Balanced,
/// Maximum compression (level 19).
Maximum,
}
impl CompressionLevel {
/// Returns the zstd compression level.
fn zstd_level(&self) -> i32 {
match self {
CompressionLevel::None => 0,
CompressionLevel::Fast => 1,
CompressionLevel::Balanced => 3,
CompressionLevel::Maximum => 19,
}
}
}
/// Configuration for bytecode compression.
#[derive(Clone, Debug)]
pub struct CompressionConfig {
/// Compression level.
pub level: CompressionLevel,
/// Enable deduplication.
pub deduplication: bool,
/// Minimum fragment size for deduplication (bytes).
pub min_fragment_size: usize,
/// Maximum fragment size (bytes).
pub max_fragment_size: usize,
/// Enable delta encoding for upgrades.
pub delta_encoding: bool,
}
impl Default for CompressionConfig {
fn default() -> Self {
Self {
level: CompressionLevel::Balanced,
deduplication: true,
min_fragment_size: 256,
max_fragment_size: 16384,
delta_encoding: true,
}
}
}
/// Compressed bytecode with metadata.
#[derive(Clone, Debug)]
pub struct CompressedBytecode {
/// Original size before compression.
pub original_size: usize,
/// Compressed data.
pub data: Vec<u8>,
/// Compression ratio (compressed/original).
pub ratio: f64,
/// Content hash for deduplication.
pub hash: [u8; 32],
/// Fragment references (if deduplicated).
pub fragments: Vec<FragmentRef>,
}
/// Reference to a shared code fragment.
#[derive(Clone, Debug)]
pub struct FragmentRef {
/// Fragment hash.
pub hash: [u8; 32],
/// Offset in the original bytecode.
pub offset: usize,
/// Length of the fragment.
pub length: usize,
}
/// A shared code fragment.
#[derive(Clone, Debug)]
pub struct CodeFragment {
/// Fragment hash.
pub hash: [u8; 32],
/// Fragment data.
pub data: Vec<u8>,
/// Reference count.
pub ref_count: u32,
}
/// Statistics for compression operations.
#[derive(Clone, Debug, Default)]
pub struct CompressionStats {
/// Total bytes before compression.
pub total_original: u64,
/// Total bytes after compression.
pub total_compressed: u64,
/// Number of contracts compressed.
pub contracts_compressed: u64,
/// Number of shared fragments.
pub shared_fragments: u64,
/// Bytes saved through deduplication.
pub dedup_savings: u64,
}
impl CompressionStats {
/// Returns the overall compression ratio.
pub fn compression_ratio(&self) -> f64 {
if self.total_original == 0 {
1.0
} else {
self.total_compressed as f64 / self.total_original as f64
}
}
/// Returns total bytes saved.
pub fn bytes_saved(&self) -> u64 {
self.total_original.saturating_sub(self.total_compressed) + self.dedup_savings
}
}
/// The bytecode compressor.
pub struct BytecodeCompressor {
/// Configuration.
config: CompressionConfig,
/// Shared fragment store.
fragments: RwLock<HashMap<[u8; 32], Arc<CodeFragment>>>,
/// Statistics.
stats: RwLock<CompressionStats>,
}
impl BytecodeCompressor {
/// Creates a new compressor with default config.
pub fn new() -> Self {
Self::with_config(CompressionConfig::default())
}
/// Creates with custom configuration.
pub fn with_config(config: CompressionConfig) -> Self {
Self {
config,
fragments: RwLock::new(HashMap::new()),
stats: RwLock::new(CompressionStats::default()),
}
}
/// Compresses bytecode.
pub fn compress(&self, bytecode: &[u8]) -> Result<CompressedBytecode, VmError> {
let original_size = bytecode.len();
let hash = blake3::hash(bytecode).into();
// Compress with zstd
let compressed = if self.config.level != CompressionLevel::None {
zstd::encode_all(bytecode, self.config.level.zstd_level())
.map_err(|e| VmError::ExecutionError(format!("Compression failed: {}", e)))?
} else {
bytecode.to_vec()
};
let ratio = compressed.len() as f64 / original_size as f64;
// Extract fragments for deduplication
let fragments = if self.config.deduplication {
self.extract_fragments(bytecode)
} else {
Vec::new()
};
// Update stats
{
let mut stats = self.stats.write();
stats.total_original += original_size as u64;
stats.total_compressed += compressed.len() as u64;
stats.contracts_compressed += 1;
}
Ok(CompressedBytecode {
original_size,
data: compressed,
ratio,
hash,
fragments,
})
}
/// Decompresses bytecode.
pub fn decompress(&self, compressed: &CompressedBytecode) -> Result<Vec<u8>, VmError> {
if self.config.level == CompressionLevel::None {
return Ok(compressed.data.clone());
}
zstd::decode_all(compressed.data.as_slice())
.map_err(|e| VmError::ExecutionError(format!("Decompression failed: {}", e)))
}
/// Extracts reusable fragments from bytecode.
fn extract_fragments(&self, bytecode: &[u8]) -> Vec<FragmentRef> {
let mut fragments = Vec::new();
// Use content-defined chunking (simplified rolling hash)
let min_size = self.config.min_fragment_size;
let max_size = self.config.max_fragment_size;
if bytecode.len() < min_size {
return fragments;
}
let mut offset = 0;
while offset < bytecode.len() {
let end = (offset + max_size).min(bytecode.len());
let chunk_end = self.find_chunk_boundary(&bytecode[offset..end], min_size);
let chunk = &bytecode[offset..offset + chunk_end];
let hash: [u8; 32] = blake3::hash(chunk).into();
// Store fragment if new, otherwise increment ref count
let mut frags = self.fragments.write();
use std::collections::hash_map::Entry;
match frags.entry(hash) {
Entry::Vacant(e) => {
e.insert(Arc::new(CodeFragment {
hash,
data: chunk.to_vec(),
ref_count: 1,
}));
}
Entry::Occupied(mut e) => {
// Increment ref count for existing fragment
if let Some(inner) = Arc::get_mut(e.get_mut()) {
inner.ref_count += 1;
}
}
}
fragments.push(FragmentRef {
hash,
offset,
length: chunk_end,
});
offset += chunk_end;
}
// Update stats
{
let mut stats = self.stats.write();
stats.shared_fragments = self.fragments.read().len() as u64;
}
fragments
}
/// Finds a chunk boundary using a simplified rolling hash.
fn find_chunk_boundary(&self, data: &[u8], min_size: usize) -> usize {
if data.len() <= min_size {
return data.len();
}
// Use a simple pattern: look for WASM section boundaries
// WASM sections start with a single byte ID followed by LEB128 length
for i in min_size..data.len() {
// Common WASM section markers
if i > 0 && matches!(data[i - 1], 0x00 | 0x01 | 0x02 | 0x03 | 0x07 | 0x0A) {
// Likely a section boundary
return i;
}
}
data.len()
}
/// Computes delta between two bytecode versions.
pub fn compute_delta(&self, old: &[u8], new: &[u8]) -> Result<DeltaPatch, VmError> {
if !self.config.delta_encoding {
return Err(VmError::ExecutionError(
"Delta encoding disabled".to_string(),
));
}
// Simple delta: find common prefix/suffix
let prefix_len = old
.iter()
.zip(new.iter())
.take_while(|(a, b)| a == b)
.count();
let suffix_len = old
.iter()
.rev()
.zip(new.iter().rev())
.take_while(|(a, b)| a == b)
.count();
// Ensure suffix doesn't overlap with prefix
let suffix_len = suffix_len.min(old.len().saturating_sub(prefix_len));
let suffix_len = suffix_len.min(new.len().saturating_sub(prefix_len));
let changed_start = prefix_len;
let old_changed_end = old.len() - suffix_len;
let new_changed_end = new.len() - suffix_len;
let replacement = new[changed_start..new_changed_end].to_vec();
let old_hash: [u8; 32] = blake3::hash(old).into();
let new_hash: [u8; 32] = blake3::hash(new).into();
Ok(DeltaPatch {
old_hash,
new_hash,
prefix_len,
suffix_len,
removed_len: old_changed_end - changed_start,
replacement,
})
}
/// Applies a delta patch to reconstruct new bytecode.
pub fn apply_delta(&self, old: &[u8], patch: &DeltaPatch) -> Result<Vec<u8>, VmError> {
// Verify old hash
let old_hash: [u8; 32] = blake3::hash(old).into();
if old_hash != patch.old_hash {
return Err(VmError::ExecutionError(
"Delta patch: old hash mismatch".to_string(),
));
}
let prefix = &old[..patch.prefix_len];
let suffix_start = old.len() - patch.suffix_len;
let suffix = &old[suffix_start..];
let mut new = Vec::with_capacity(prefix.len() + patch.replacement.len() + suffix.len());
new.extend_from_slice(prefix);
new.extend_from_slice(&patch.replacement);
new.extend_from_slice(suffix);
// Verify new hash
let new_hash: [u8; 32] = blake3::hash(&new).into();
if new_hash != patch.new_hash {
return Err(VmError::ExecutionError(
"Delta patch: new hash mismatch".to_string(),
));
}
Ok(new)
}
/// Returns compression statistics.
pub fn stats(&self) -> CompressionStats {
self.stats.read().clone()
}
/// Returns number of shared fragments.
pub fn fragment_count(&self) -> usize {
self.fragments.read().len()
}
/// Clears fragment cache.
pub fn clear_fragments(&self) {
self.fragments.write().clear();
self.stats.write().shared_fragments = 0;
}
}
impl Default for BytecodeCompressor {
fn default() -> Self {
Self::new()
}
}
/// A delta patch between two bytecode versions.
#[derive(Clone, Debug)]
pub struct DeltaPatch {
/// Hash of old bytecode.
pub old_hash: [u8; 32],
/// Hash of new bytecode.
pub new_hash: [u8; 32],
/// Length of unchanged prefix.
pub prefix_len: usize,
/// Length of unchanged suffix.
pub suffix_len: usize,
/// Length of removed section.
pub removed_len: usize,
/// Replacement data.
pub replacement: Vec<u8>,
}
impl DeltaPatch {
/// Returns the size of the patch.
pub fn size(&self) -> usize {
64 + 24 + self.replacement.len() // hashes + lengths + data
}
/// Returns estimated savings compared to full bytecode.
pub fn savings(&self, new_size: usize) -> usize {
new_size.saturating_sub(self.size())
}
}
/// Compressed contract storage.
pub struct CompressedContractStore {
/// Compressor instance.
compressor: BytecodeCompressor,
/// Stored contracts.
contracts: RwLock<HashMap<ContractId, CompressedBytecode>>,
/// Delta patches for upgrades.
patches: RwLock<HashMap<(ContractId, ContractId), DeltaPatch>>,
}
impl CompressedContractStore {
/// Creates a new store.
pub fn new() -> Self {
Self::with_config(CompressionConfig::default())
}
/// Creates with custom configuration.
pub fn with_config(config: CompressionConfig) -> Self {
Self {
compressor: BytecodeCompressor::with_config(config),
contracts: RwLock::new(HashMap::new()),
patches: RwLock::new(HashMap::new()),
}
}
/// Stores a contract.
pub fn store(&self, id: ContractId, bytecode: &[u8]) -> Result<CompressedBytecode, VmError> {
let compressed = self.compressor.compress(bytecode)?;
self.contracts.write().insert(id, compressed.clone());
Ok(compressed)
}
/// Retrieves a contract.
pub fn get(&self, id: &ContractId) -> Result<Vec<u8>, VmError> {
let contracts = self.contracts.read();
let compressed = contracts.get(id).ok_or(VmError::ContractNotFound(*id))?;
self.compressor.decompress(compressed)
}
/// Stores an upgrade as a delta.
pub fn store_upgrade(
&self,
old_id: ContractId,
new_id: ContractId,
new_bytecode: &[u8],
) -> Result<(), VmError> {
let old_bytecode = self.get(&old_id)?;
let patch = self.compressor.compute_delta(&old_bytecode, new_bytecode)?;
// Store patch
self.patches.write().insert((old_id, new_id), patch);
// Also store full compressed for direct access
self.store(new_id, new_bytecode)?;
Ok(())
}
/// Returns store statistics.
pub fn stats(&self) -> CompressionStats {
self.compressor.stats()
}
/// Returns number of stored contracts.
pub fn contract_count(&self) -> usize {
self.contracts.read().len()
}
}
impl Default for CompressedContractStore {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
fn make_contract(id: u8) -> ContractId {
ContractId::from_bytes([id; 32])
}
fn sample_wasm() -> Vec<u8> {
// Minimal WASM with some repeated content
let mut wasm = vec![
0x00, 0x61, 0x73, 0x6D, // WASM magic
0x01, 0x00, 0x00, 0x00, // Version 1
];
// Add some padding to make it compressible
wasm.extend(vec![0x00; 1000]);
wasm
}
#[test]
fn test_compression() {
let compressor = BytecodeCompressor::new();
let wasm = sample_wasm();
let compressed = compressor.compress(&wasm).unwrap();
assert!(compressed.ratio < 1.0); // Should be smaller
assert_eq!(compressed.original_size, wasm.len());
}
#[test]
fn test_roundtrip() {
let compressor = BytecodeCompressor::new();
let wasm = sample_wasm();
let compressed = compressor.compress(&wasm).unwrap();
let decompressed = compressor.decompress(&compressed).unwrap();
assert_eq!(wasm, decompressed);
}
#[test]
fn test_no_compression() {
let config = CompressionConfig {
level: CompressionLevel::None,
..Default::default()
};
let compressor = BytecodeCompressor::with_config(config);
let wasm = sample_wasm();
let compressed = compressor.compress(&wasm).unwrap();
assert_eq!(compressed.data.len(), wasm.len());
assert_eq!(compressed.ratio, 1.0);
}
#[test]
fn test_delta_patch() {
let compressor = BytecodeCompressor::new();
let old = b"Hello, World!";
let new = b"Hello, Rust!";
let patch = compressor.compute_delta(old, new).unwrap();
let reconstructed = compressor.apply_delta(old, &patch).unwrap();
assert_eq!(new.as_slice(), reconstructed.as_slice());
}
#[test]
fn test_delta_savings() {
let compressor = BytecodeCompressor::new();
// Large similar bytecodes
let mut old = vec![0u8; 10000];
old[0..4].copy_from_slice(&[0x00, 0x61, 0x73, 0x6D]);
let mut new = old.clone();
new[5000..5010].copy_from_slice(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
let patch = compressor.compute_delta(&old, &new).unwrap();
// Patch should be much smaller than full bytecode
assert!(patch.size() < new.len() / 2);
}
#[test]
fn test_store() {
let store = CompressedContractStore::new();
let id = make_contract(1);
let wasm = sample_wasm();
store.store(id, &wasm).unwrap();
let retrieved = store.get(&id).unwrap();
assert_eq!(wasm, retrieved);
}
#[test]
fn test_stats() {
let compressor = BytecodeCompressor::new();
let wasm = sample_wasm();
compressor.compress(&wasm).unwrap();
let stats = compressor.stats();
assert_eq!(stats.contracts_compressed, 1);
assert!(stats.compression_ratio() < 1.0);
}
#[test]
fn test_compression_levels() {
let wasm = sample_wasm();
let configs = [
CompressionLevel::Fast,
CompressionLevel::Balanced,
CompressionLevel::Maximum,
];
let mut sizes = Vec::new();
for level in configs {
let config = CompressionConfig {
level,
deduplication: false,
..Default::default()
};
let compressor = BytecodeCompressor::with_config(config);
let compressed = compressor.compress(&wasm).unwrap();
sizes.push(compressed.data.len());
}
// Maximum should be smallest (or equal)
assert!(sizes[2] <= sizes[0]);
}
}