//! Synor Compute L2 - Heterogeneous Multi-Processor Compute Platform //! //! Provides decentralized compute services with: //! //! - **Heterogeneous Scheduling**: CPU + GPU + TPU + NPU + LPU working simultaneously //! - **Consumer Device Mesh**: Mobile, browser, desktop devices contributing compute //! - **90% Cost Reduction**: Zero margins, spot markets, electricity arbitrage //! - **10x Speed**: Caching, speculative execution, optimal processor assignment //! //! # Architecture //! //! ```text //! ┌─────────────────────────────────────────────────────────────────────────────┐ //! │ SYNOR COMPUTE L2 │ //! ├─────────────────────────────────────────────────────────────────────────────┤ //! │ │ //! │ ┌─────────────────────────────────────────────────────────────────────────┐ │ //! │ │ TASK DECOMPOSER │ │ //! │ │ Analyzes workload → Identifies subtasks → Maps to optimal processors │ │ //! │ └─────────────────────────────────────────────────────────────────────────┘ │ //! │ │ │ //! │ ▼ │ //! │ ┌─────────────────────────────────────────────────────────────────────────┐ │ //! │ │ HETEROGENEOUS SCHEDULER │ │ //! │ │ ┌──────┐ ┌──────┐ ┌──────┐ ┌──────┐ ┌──────┐ ┌──────┐ │ │ //! │ │ │ CPU │ │ GPU │ │ TPU │ │ NPU │ │ LPU │ │Custom│ │ │ //! │ │ │Queue │ │Queue │ │Queue │ │Queue │ │Queue │ │Queue │ │ │ //! │ │ └──────┘ └──────┘ └──────┘ └──────┘ └──────┘ └──────┘ │ │ //! │ └─────────────────────────────────────────────────────────────────────────┘ │ //! │ │ //! │ ┌─────────────────────────────────────────────────────────────────────────┐ │ //! │ │ UNIFIED MEMORY FABRIC │ │ //! │ │ Zero-copy data sharing │ Automatic placement │ Cache coherency │ │ //! │ └─────────────────────────────────────────────────────────────────────────┘ │ //! │ │ //! └─────────────────────────────────────────────────────────────────────────────┘ //! ``` //! //! # Pricing //! //! | Resource | Unit | Price (SYNOR) | //! |----------|------|---------------| //! | GPU (consumer) | hour | 0.10 | //! | GPU (datacenter) | hour | 0.50-4.00 | //! | CPU | core/hour | 0.02 | //! | Memory | GB/hour | 0.005 | //! | Inference | 1M tokens | 0.10 | #![allow(dead_code)] pub mod device; pub mod error; pub mod market; pub mod memory; pub mod model; pub mod processor; pub mod scheduler; pub mod task; pub use device::{ DeviceCapabilities, DeviceId, DeviceInfo, DeviceRegistry, DeviceStatus, DeviceType, }; pub use error::ComputeError; pub use market::{ Auction, AuctionId, CloudComparison, CpuTier as MarketCpuTier, GpuTier as MarketGpuTier, MarketStats, Order, OrderBook, OrderId, OrderSide, OrderType, PricingEngine, ProviderListing, ResourceType, SpotMarket, Trade, }; pub use memory::{MemoryManager, TensorHandle, TransferPath, UnifiedMemory}; pub use model::{ ModelCategory, ModelFormat, ModelId, ModelInfo, ModelRegistry, ModelUploadRequest, ModelUploadResponse, }; pub use processor::{ ComputeThroughput, CpuVariant, GpuVariant, NpuVariant, Operation, OperationType, Processor, ProcessorCapabilities, ProcessorId, ProcessorType, TpuVersion, }; pub use scheduler::{ HeterogeneousScheduler, LoadBalancer, Schedule, ScheduleResult, TaskAssignment, WorkQueue, }; pub use task::{ ComputeTask, DecomposedWorkload, Task, TaskDecomposer, TaskId, TaskPriority, TaskResult, TaskStatus, }; use serde::{Deserialize, Serialize}; use std::collections::HashMap; use std::sync::Arc; use parking_lot::RwLock; /// Compute node identifier. #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)] pub struct NodeId(pub u64); impl std::fmt::Display for NodeId { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "node_{}", self.0) } } /// Job identifier. #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)] pub struct JobId(pub [u8; 32]); impl JobId { /// Creates a new job ID. pub fn new() -> Self { use rand::Rng; let mut bytes = [0u8; 32]; rand::thread_rng().fill(&mut bytes); JobId(bytes) } /// Creates from bytes. pub fn from_bytes(bytes: [u8; 32]) -> Self { JobId(bytes) } } impl Default for JobId { fn default() -> Self { Self::new() } } impl std::fmt::Display for JobId { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "job_{}", hex::encode(&self.0[..8])) } } /// Compute job specification. #[derive(Clone, Debug, Serialize, Deserialize)] pub struct ComputeJob { /// Job ID. pub id: JobId, /// Owner address. pub owner: [u8; 32], /// Job type. pub job_type: JobType, /// Resource requirements. pub resources: ResourceRequirements, /// Input data reference (CID). pub input_cid: Option, /// Maximum budget (in atomic SYNOR). pub max_budget: u64, /// Priority level. pub priority: JobPriority, /// Created timestamp. pub created_at: u64, /// Deadline (optional). pub deadline: Option, } /// Job type classification. #[derive(Clone, Debug, Serialize, Deserialize)] pub enum JobType { /// AI/ML training job. Training { framework: MlFramework, model_cid: String, dataset_cid: String, epochs: u32, batch_size: u32, }, /// AI/ML inference job. Inference { model_cid: String, input_format: String, batch_size: u32, }, /// Container workload. Container { image: String, command: Vec, env: HashMap, }, /// Serverless function. Serverless { runtime: FunctionRuntime, code_cid: String, handler: String, }, /// General compute (WASM). Wasm { module_cid: String, entrypoint: String, }, } /// ML framework specification. #[derive(Clone, Debug, Serialize, Deserialize)] pub enum MlFramework { PyTorch { version: String }, TensorFlow { version: String }, JAX { version: String }, ONNX, } /// Function runtime. #[derive(Clone, Debug, Serialize, Deserialize)] pub enum FunctionRuntime { Node20, Python312, Rust, Go, Custom { image: String }, } /// Job priority levels. #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)] pub enum JobPriority { /// Background job, can be preempted. Background = 0, /// Normal priority. Normal = 1, /// High priority, faster scheduling. High = 2, /// Critical, guaranteed resources. Critical = 3, } impl Default for JobPriority { fn default() -> Self { JobPriority::Normal } } /// Resource requirements for a job. #[derive(Clone, Debug, Default, Serialize, Deserialize)] pub struct ResourceRequirements { /// Minimum CPU cores. pub min_cpu_cores: f32, /// Minimum memory (GB). pub min_memory_gb: f32, /// GPU requirements. pub gpu: Option, /// Preferred processor types (in priority order). pub preferred_processors: Vec, /// Maximum latency (ms) - for inference. pub max_latency_ms: Option, /// Requires distributed execution. pub distributed: bool, } /// GPU resource requirements. #[derive(Clone, Debug, Serialize, Deserialize)] pub struct GpuRequirements { /// Minimum number of GPUs. pub min_count: u32, /// Maximum number of GPUs. pub max_count: u32, /// Minimum VRAM per GPU (GB). pub min_vram_gb: u32, /// Minimum compute capability. pub min_compute_capability: Option<(u8, u8)>, /// Allow GPU sharing (MPS/MIG). pub allow_sharing: bool, } /// Job execution status. #[derive(Clone, Debug, Serialize, Deserialize)] pub enum JobStatus { /// Queued, waiting for resources. Queued, /// Resources allocated, starting. Starting, /// Running. Running { progress: f32, assigned_nodes: Vec, }, /// Completed successfully. Completed { result_cid: String, duration_ms: u64, cost: u64, }, /// Failed. Failed { error: String }, /// Cancelled by user. Cancelled, } /// Compute node registration. #[derive(Clone, Debug, Serialize, Deserialize)] pub struct ComputeNode { /// Node ID. pub id: NodeId, /// Owner address. pub owner: [u8; 32], /// Available processors. pub processors: Vec, /// Total memory (GB). pub total_memory_gb: f32, /// Available memory (GB). pub available_memory_gb: f32, /// Network bandwidth (Gbps). pub bandwidth_gbps: f32, /// Geographic region. pub region: String, /// Stake amount (for PoS). pub stake: u64, /// Reputation score (0-100). pub reputation: u32, /// Current status. pub status: NodeStatus, } /// Processor information on a node. #[derive(Clone, Debug, Serialize, Deserialize)] pub struct ProcessorInfo { /// Processor ID (local to node). pub id: ProcessorId, /// Processor type. pub processor_type: ProcessorType, /// Capabilities. pub capabilities: ProcessorCapabilities, /// Current utilization (0.0 - 1.0). pub utilization: f32, /// Current temperature (Celsius). pub temperature: Option, } /// Node status. #[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)] pub enum NodeStatus { /// Online and accepting jobs. Online, /// Online but not accepting new jobs. Draining, /// Offline. Offline, /// Maintenance mode. Maintenance, } /// Compute cluster manager. pub struct ComputeCluster { /// Registered nodes. nodes: RwLock>, /// Device registry. device_registry: Arc, /// Heterogeneous scheduler. scheduler: Arc, /// Spot market. spot_market: Arc, /// Memory manager. memory_manager: Arc, /// Active jobs. jobs: RwLock>, } impl ComputeCluster { /// Creates a new compute cluster. pub fn new() -> Self { let device_registry = Arc::new(DeviceRegistry::new()); let scheduler = Arc::new(HeterogeneousScheduler::new(device_registry.clone())); let spot_market = Arc::new(SpotMarket::new()); let memory_manager = Arc::new(MemoryManager::new()); Self { nodes: RwLock::new(HashMap::new()), device_registry, scheduler, spot_market, memory_manager, jobs: RwLock::new(HashMap::new()), } } /// Registers a compute node. pub fn register_node(&self, node: ComputeNode) -> Result<(), ComputeError> { let id = node.id; // Register processors with device registry for proc in &node.processors { self.device_registry.register_processor(id, proc.clone())?; } self.nodes.write().insert(id, node); Ok(()) } /// Unregisters a compute node. pub fn unregister_node(&self, node_id: NodeId) -> Result<(), ComputeError> { self.device_registry.unregister_node(node_id)?; self.nodes.write().remove(&node_id); Ok(()) } /// Submits a job for execution. pub async fn submit_job(&self, job: ComputeJob) -> Result { let job_id = job.id; // Decompose job into tasks let tasks = self.decompose_job(&job)?; // Schedule tasks let schedule = self.scheduler.schedule(tasks).await?; // Store job self.jobs.write().insert(job_id, job); // Execute schedule (async) tokio::spawn({ let scheduler = self.scheduler.clone(); async move { let _ = scheduler.execute(&schedule.schedule).await; } }); Ok(job_id) } /// Gets job status. pub fn get_job_status(&self, job_id: &JobId) -> Option { self.jobs.read().get(job_id).map(|_| JobStatus::Queued) } /// Cancels a job. pub fn cancel_job(&self, job_id: &JobId) -> Result<(), ComputeError> { if self.jobs.write().remove(job_id).is_some() { Ok(()) } else { Err(ComputeError::JobNotFound(*job_id)) } } /// Gets cluster statistics. pub fn stats(&self) -> ClusterStats { let nodes = self.nodes.read(); let jobs = self.jobs.read(); let total_nodes = nodes.len(); let online_nodes = nodes .values() .filter(|n| n.status == NodeStatus::Online) .count(); let total_gpus: usize = nodes .values() .flat_map(|n| &n.processors) .filter(|p| matches!(p.processor_type, ProcessorType::Gpu(_))) .count(); let total_memory: f32 = nodes.values().map(|n| n.total_memory_gb).sum(); ClusterStats { total_nodes, online_nodes, total_gpus, total_memory_gb: total_memory, active_jobs: jobs.len(), queued_jobs: jobs.values().filter(|_| true).count(), // Simplified } } /// Decomposes a job into schedulable tasks. fn decompose_job(&self, job: &ComputeJob) -> Result, ComputeError> { let decomposer = TaskDecomposer::new(); decomposer.decompose(job) } } impl Default for ComputeCluster { fn default() -> Self { Self::new() } } /// Cluster statistics. #[derive(Clone, Debug, Default, Serialize, Deserialize)] pub struct ClusterStats { /// Total registered nodes. pub total_nodes: usize, /// Online nodes. pub online_nodes: usize, /// Total GPUs across cluster. pub total_gpus: usize, /// Total memory (GB). pub total_memory_gb: f32, /// Active jobs. pub active_jobs: usize, /// Queued jobs. pub queued_jobs: usize, } /// Pricing calculator for compute operations. #[derive(Clone, Debug)] pub struct ComputePricing { /// GPU cost per hour by type. pub gpu_hourly: HashMap, /// CPU cost per core-hour. pub cpu_core_hour: u64, /// Memory cost per GB-hour. pub memory_gb_hour: u64, /// Network egress per GB. pub network_egress_gb: u64, /// Inference per million tokens. pub inference_per_million_tokens: u64, } /// GPU pricing tiers. #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)] pub enum GpuTier { /// Consumer GPUs (RTX 30xx, 40xx). Consumer, /// Professional GPUs (RTX A series). Professional, /// Data center GPUs (A100). DataCenter, /// Latest generation (H100). Premium, } impl Default for ComputePricing { fn default() -> Self { let mut gpu_hourly = HashMap::new(); gpu_hourly.insert(GpuTier::Consumer, 100_000_000); // 0.10 SYNOR gpu_hourly.insert(GpuTier::Professional, 300_000_000); // 0.30 SYNOR gpu_hourly.insert(GpuTier::DataCenter, 2_000_000_000); // 2.00 SYNOR gpu_hourly.insert(GpuTier::Premium, 4_000_000_000); // 4.00 SYNOR Self { gpu_hourly, cpu_core_hour: 20_000_000, // 0.02 SYNOR memory_gb_hour: 5_000_000, // 0.005 SYNOR network_egress_gb: 50_000_000, // 0.05 SYNOR inference_per_million_tokens: 100_000_000, // 0.10 SYNOR } } } impl ComputePricing { /// Estimates cost for a job. pub fn estimate(&self, job: &ComputeJob, duration_hours: f32) -> u64 { let mut cost = 0u64; // CPU cost cost += (self.cpu_core_hour as f32 * job.resources.min_cpu_cores * duration_hours) as u64; // Memory cost cost += (self.memory_gb_hour as f32 * job.resources.min_memory_gb * duration_hours) as u64; // GPU cost if let Some(gpu) = &job.resources.gpu { let tier = GpuTier::Consumer; // Simplified let gpu_cost = self.gpu_hourly.get(&tier).unwrap_or(&100_000_000); cost += (*gpu_cost as f32 * gpu.min_count as f32 * duration_hours) as u64; } cost } } #[cfg(test)] mod tests { use super::*; #[test] fn test_job_id() { let id1 = JobId::new(); let id2 = JobId::new(); assert_ne!(id1.0, id2.0); } #[test] fn test_compute_cluster() { let cluster = ComputeCluster::new(); let stats = cluster.stats(); assert_eq!(stats.total_nodes, 0); } #[test] fn test_pricing() { let pricing = ComputePricing::default(); let job = ComputeJob { id: JobId::new(), owner: [0u8; 32], job_type: JobType::Inference { model_cid: "model123".to_string(), input_format: "json".to_string(), batch_size: 32, }, resources: ResourceRequirements { min_cpu_cores: 4.0, min_memory_gb: 16.0, gpu: Some(GpuRequirements { min_count: 1, max_count: 1, min_vram_gb: 16, min_compute_capability: None, allow_sharing: false, }), ..Default::default() }, input_cid: None, max_budget: 1_000_000_000, priority: JobPriority::Normal, created_at: 0, deadline: None, }; let cost = pricing.estimate(&job, 1.0); assert!(cost > 0); } #[test] fn test_node_registration() { let cluster = ComputeCluster::new(); let node = ComputeNode { id: NodeId(1), owner: [1u8; 32], processors: vec![ProcessorInfo { id: ProcessorId(0), processor_type: ProcessorType::Cpu(CpuVariant::X86_64 { avx: processor::AvxSupport::Avx512, }), capabilities: ProcessorCapabilities::default(), utilization: 0.0, temperature: Some(45.0), }], total_memory_gb: 64.0, available_memory_gb: 60.0, bandwidth_gbps: 10.0, region: "us-east".to_string(), stake: 1000, reputation: 100, status: NodeStatus::Online, }; cluster.register_node(node).unwrap(); assert_eq!(cluster.stats().total_nodes, 1); } }