//! Synor Compute L2 - Heterogeneous Multi-Processor Compute Platform
//!
//! Provides decentralized compute services with:
//!
//! - **Heterogeneous Scheduling**: CPU + GPU + TPU + NPU + LPU working simultaneously
//! - **Consumer Device Mesh**: Mobile, browser, desktop devices contributing compute
//! - **90% Cost Reduction**: Zero margins, spot markets, electricity arbitrage
//! - **10x Speed**: Caching, speculative execution, optimal processor assignment
//!
//! # Architecture
//!
//! ```text
//! ┌─────────────────────────────────────────────────────────────────────────────┐
//! │                         SYNOR COMPUTE L2                                     │
//! ├─────────────────────────────────────────────────────────────────────────────┤
//! │                                                                              │
//! │  ┌─────────────────────────────────────────────────────────────────────────┐ │
//! │  │                      TASK DECOMPOSER                                     │ │
//! │  │  Analyzes workload → Identifies subtasks → Maps to optimal processors    │ │
//! │  └─────────────────────────────────────────────────────────────────────────┘ │
//! │                                    │                                         │
//! │                                    ▼                                         │
//! │  ┌─────────────────────────────────────────────────────────────────────────┐ │
//! │  │                    HETEROGENEOUS SCHEDULER                               │ │
//! │  │  ┌──────┐ ┌──────┐ ┌──────┐ ┌──────┐ ┌──────┐ ┌──────┐                 │ │
//! │  │  │ CPU  │ │ GPU  │ │ TPU  │ │ NPU  │ │ LPU  │ │Custom│                 │ │
//! │  │  │Queue │ │Queue │ │Queue │ │Queue │ │Queue │ │Queue │                 │ │
//! │  │  └──────┘ └──────┘ └──────┘ └──────┘ └──────┘ └──────┘                 │ │
//! │  └─────────────────────────────────────────────────────────────────────────┘ │
//! │                                                                              │
//! │  ┌─────────────────────────────────────────────────────────────────────────┐ │
//! │  │                    UNIFIED MEMORY FABRIC                                 │ │
//! │  │  Zero-copy data sharing │ Automatic placement │ Cache coherency          │ │
//! │  └─────────────────────────────────────────────────────────────────────────┘ │
//! │                                                                              │
//! └─────────────────────────────────────────────────────────────────────────────┘
//! ```
//!
//! # Pricing
//!
//! | Resource | Unit | Price (SYNOR) |
//! |----------|------|---------------|
//! | GPU (consumer) | hour | 0.10 |
//! | GPU (datacenter) | hour | 0.50-4.00 |
//! | CPU | core/hour | 0.02 |
//! | Memory | GB/hour | 0.005 |
//! | Inference | 1M tokens | 0.10 |

#![allow(dead_code)]

pub mod device;
pub mod error;
pub mod market;
pub mod memory;
pub mod model;
pub mod processor;
pub mod scheduler;
pub mod task;

pub use device::{
    DeviceCapabilities, DeviceId, DeviceInfo, DeviceRegistry, DeviceStatus, DeviceType,
};
pub use error::ComputeError;
pub use market::{
    Auction, AuctionId, CloudComparison, CpuTier as MarketCpuTier, GpuTier as MarketGpuTier,
    MarketStats, Order, OrderBook, OrderId, OrderSide, OrderType, PricingEngine, ProviderListing,
    ResourceType, SpotMarket, Trade,
};
pub use memory::{MemoryManager, TensorHandle, TransferPath, UnifiedMemory};
pub use model::{
    ModelCategory, ModelFormat, ModelId, ModelInfo, ModelRegistry, ModelUploadRequest,
    ModelUploadResponse,
};
pub use processor::{
    ComputeThroughput, CpuVariant, GpuVariant, NpuVariant, Operation, OperationType, Processor,
    ProcessorCapabilities, ProcessorId, ProcessorType, TpuVersion,
};
pub use scheduler::{
    HeterogeneousScheduler, LoadBalancer, Schedule, ScheduleResult, TaskAssignment, WorkQueue,
};
pub use task::{
    ComputeTask, DecomposedWorkload, Task, TaskDecomposer, TaskId, TaskPriority, TaskResult,
    TaskStatus,
};

use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::sync::Arc;

use parking_lot::RwLock;

/// Compute node identifier.
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct NodeId(pub u64);

impl std::fmt::Display for NodeId {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "node_{}", self.0)
    }
}

/// Job identifier.
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct JobId(pub [u8; 32]);

impl JobId {
    /// Creates a new job ID.
    pub fn new() -> Self {
        use rand::Rng;
        let mut bytes = [0u8; 32];
        rand::thread_rng().fill(&mut bytes);
        JobId(bytes)
    }

    /// Creates from bytes.
    pub fn from_bytes(bytes: [u8; 32]) -> Self {
        JobId(bytes)
    }
}

impl Default for JobId {
    fn default() -> Self {
        Self::new()
    }
}

impl std::fmt::Display for JobId {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "job_{}", hex::encode(&self.0[..8]))
    }
}

/// Compute job specification.
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct ComputeJob {
    /// Job ID.
    pub id: JobId,
    /// Owner address.
    pub owner: [u8; 32],
    /// Job type.
    pub job_type: JobType,
    /// Resource requirements.
    pub resources: ResourceRequirements,
    /// Input data reference (CID).
    pub input_cid: Option<String>,
    /// Maximum budget (in atomic SYNOR).
    pub max_budget: u64,
    /// Priority level.
    pub priority: JobPriority,
    /// Created timestamp.
    pub created_at: u64,
    /// Deadline (optional).
    pub deadline: Option<u64>,
}

/// Job type classification.
#[derive(Clone, Debug, Serialize, Deserialize)]
pub enum JobType {
    /// AI/ML training job.
    Training {
        framework: MlFramework,
        model_cid: String,
        dataset_cid: String,
        epochs: u32,
        batch_size: u32,
    },
    /// AI/ML inference job.
    Inference {
        model_cid: String,
        input_format: String,
        batch_size: u32,
    },
    /// Container workload.
    Container {
        image: String,
        command: Vec<String>,
        env: HashMap<String, String>,
    },
    /// Serverless function.
    Serverless {
        runtime: FunctionRuntime,
        code_cid: String,
        handler: String,
    },
    /// General compute (WASM).
    Wasm {
        module_cid: String,
        entrypoint: String,
    },
}

/// ML framework specification.
#[derive(Clone, Debug, Serialize, Deserialize)]
pub enum MlFramework {
    PyTorch { version: String },
    TensorFlow { version: String },
    JAX { version: String },
    ONNX,
}

/// Function runtime.
#[derive(Clone, Debug, Serialize, Deserialize)]
pub enum FunctionRuntime {
    Node20,
    Python312,
    Rust,
    Go,
    Custom { image: String },
}

/// Job priority levels.
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
pub enum JobPriority {
    /// Background job, can be preempted.
    Background = 0,
    /// Normal priority.
    Normal = 1,
    /// High priority, faster scheduling.
    High = 2,
    /// Critical, guaranteed resources.
    Critical = 3,
}

impl Default for JobPriority {
    fn default() -> Self {
        JobPriority::Normal
    }
}

/// Resource requirements for a job.
#[derive(Clone, Debug, Default, Serialize, Deserialize)]
pub struct ResourceRequirements {
    /// Minimum CPU cores.
    pub min_cpu_cores: f32,
    /// Minimum memory (GB).
    pub min_memory_gb: f32,
    /// GPU requirements.
    pub gpu: Option<GpuRequirements>,
    /// Preferred processor types (in priority order).
    pub preferred_processors: Vec<ProcessorType>,
    /// Maximum latency (ms) - for inference.
    pub max_latency_ms: Option<u32>,
    /// Requires distributed execution.
    pub distributed: bool,
}

/// GPU resource requirements.
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct GpuRequirements {
    /// Minimum number of GPUs.
    pub min_count: u32,
    /// Maximum number of GPUs.
    pub max_count: u32,
    /// Minimum VRAM per GPU (GB).
    pub min_vram_gb: u32,
    /// Minimum compute capability.
    pub min_compute_capability: Option<(u8, u8)>,
    /// Allow GPU sharing (MPS/MIG).
    pub allow_sharing: bool,
}

/// Job execution status.
#[derive(Clone, Debug, Serialize, Deserialize)]
pub enum JobStatus {
    /// Queued, waiting for resources.
    Queued,
    /// Resources allocated, starting.
    Starting,
    /// Running.
    Running {
        progress: f32,
        assigned_nodes: Vec<NodeId>,
    },
    /// Completed successfully.
    Completed {
        result_cid: String,
        duration_ms: u64,
        cost: u64,
    },
    /// Failed.
    Failed { error: String },
    /// Cancelled by user.
    Cancelled,
}

/// Compute node registration.
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct ComputeNode {
    /// Node ID.
    pub id: NodeId,
    /// Owner address.
    pub owner: [u8; 32],
    /// Available processors.
    pub processors: Vec<ProcessorInfo>,
    /// Total memory (GB).
    pub total_memory_gb: f32,
    /// Available memory (GB).
    pub available_memory_gb: f32,
    /// Network bandwidth (Gbps).
    pub bandwidth_gbps: f32,
    /// Geographic region.
    pub region: String,
    /// Stake amount (for PoS).
    pub stake: u64,
    /// Reputation score (0-100).
    pub reputation: u32,
    /// Current status.
    pub status: NodeStatus,
}

/// Processor information on a node.
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct ProcessorInfo {
    /// Processor ID (local to node).
    pub id: ProcessorId,
    /// Processor type.
    pub processor_type: ProcessorType,
    /// Capabilities.
    pub capabilities: ProcessorCapabilities,
    /// Current utilization (0.0 - 1.0).
    pub utilization: f32,
    /// Current temperature (Celsius).
    pub temperature: Option<f32>,
}

/// Node status.
#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
pub enum NodeStatus {
    /// Online and accepting jobs.
    Online,
    /// Online but not accepting new jobs.
    Draining,
    /// Offline.
    Offline,
    /// Maintenance mode.
    Maintenance,
}

/// Compute cluster manager.
pub struct ComputeCluster {
    /// Registered nodes.
    nodes: RwLock<HashMap<NodeId, ComputeNode>>,
    /// Device registry.
    device_registry: Arc<DeviceRegistry>,
    /// Heterogeneous scheduler.
    scheduler: Arc<HeterogeneousScheduler>,
    /// Spot market.
    spot_market: Arc<SpotMarket>,
    /// Memory manager.
    memory_manager: Arc<MemoryManager>,
    /// Active jobs.
    jobs: RwLock<HashMap<JobId, ComputeJob>>,
}

impl ComputeCluster {
    /// Creates a new compute cluster.
    pub fn new() -> Self {
        let device_registry = Arc::new(DeviceRegistry::new());
        let scheduler = Arc::new(HeterogeneousScheduler::new(device_registry.clone()));
        let spot_market = Arc::new(SpotMarket::new());
        let memory_manager = Arc::new(MemoryManager::new());

        Self {
            nodes: RwLock::new(HashMap::new()),
            device_registry,
            scheduler,
            spot_market,
            memory_manager,
            jobs: RwLock::new(HashMap::new()),
        }
    }

    /// Registers a compute node.
    pub fn register_node(&self, node: ComputeNode) -> Result<(), ComputeError> {
        let id = node.id;

        // Register processors with device registry
        for proc in &node.processors {
            self.device_registry.register_processor(id, proc.clone())?;
        }

        self.nodes.write().insert(id, node);
        Ok(())
    }

    /// Unregisters a compute node.
    pub fn unregister_node(&self, node_id: NodeId) -> Result<(), ComputeError> {
        self.device_registry.unregister_node(node_id)?;
        self.nodes.write().remove(&node_id);
        Ok(())
    }

    /// Submits a job for execution.
    pub async fn submit_job(&self, job: ComputeJob) -> Result<JobId, ComputeError> {
        let job_id = job.id;

        // Decompose job into tasks
        let tasks = self.decompose_job(&job)?;

        // Schedule tasks
        let schedule = self.scheduler.schedule(tasks).await?;

        // Store job
        self.jobs.write().insert(job_id, job);

        // Execute schedule (async)
        tokio::spawn({
            let scheduler = self.scheduler.clone();
            async move {
                let _ = scheduler.execute(&schedule.schedule).await;
            }
        });

        Ok(job_id)
    }

    /// Gets job status.
    pub fn get_job_status(&self, job_id: &JobId) -> Option<JobStatus> {
        self.jobs.read().get(job_id).map(|_| JobStatus::Queued)
    }

    /// Cancels a job.
    pub fn cancel_job(&self, job_id: &JobId) -> Result<(), ComputeError> {
        if self.jobs.write().remove(job_id).is_some() {
            Ok(())
        } else {
            Err(ComputeError::JobNotFound(*job_id))
        }
    }

    /// Gets cluster statistics.
    pub fn stats(&self) -> ClusterStats {
        let nodes = self.nodes.read();
        let jobs = self.jobs.read();

        let total_nodes = nodes.len();
        let online_nodes = nodes
            .values()
            .filter(|n| n.status == NodeStatus::Online)
            .count();

        let total_gpus: usize = nodes
            .values()
            .flat_map(|n| &n.processors)
            .filter(|p| matches!(p.processor_type, ProcessorType::Gpu(_)))
            .count();

        let total_memory: f32 = nodes.values().map(|n| n.total_memory_gb).sum();

        ClusterStats {
            total_nodes,
            online_nodes,
            total_gpus,
            total_memory_gb: total_memory,
            active_jobs: jobs.len(),
            queued_jobs: jobs.values().filter(|_| true).count(), // Simplified
        }
    }

    /// Decomposes a job into schedulable tasks.
    fn decompose_job(&self, job: &ComputeJob) -> Result<Vec<Task>, ComputeError> {
        let decomposer = TaskDecomposer::new();
        decomposer.decompose(job)
    }
}

impl Default for ComputeCluster {
    fn default() -> Self {
        Self::new()
    }
}

/// Cluster statistics.
#[derive(Clone, Debug, Default, Serialize, Deserialize)]
pub struct ClusterStats {
    /// Total registered nodes.
    pub total_nodes: usize,
    /// Online nodes.
    pub online_nodes: usize,
    /// Total GPUs across cluster.
    pub total_gpus: usize,
    /// Total memory (GB).
    pub total_memory_gb: f32,
    /// Active jobs.
    pub active_jobs: usize,
    /// Queued jobs.
    pub queued_jobs: usize,
}

/// Pricing calculator for compute operations.
#[derive(Clone, Debug)]
pub struct ComputePricing {
    /// GPU cost per hour by type.
    pub gpu_hourly: HashMap<GpuTier, u64>,
    /// CPU cost per core-hour.
    pub cpu_core_hour: u64,
    /// Memory cost per GB-hour.
    pub memory_gb_hour: u64,
    /// Network egress per GB.
    pub network_egress_gb: u64,
    /// Inference per million tokens.
    pub inference_per_million_tokens: u64,
}

/// GPU pricing tiers.
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub enum GpuTier {
    /// Consumer GPUs (RTX 30xx, 40xx).
    Consumer,
    /// Professional GPUs (RTX A series).
    Professional,
    /// Data center GPUs (A100).
    DataCenter,
    /// Latest generation (H100).
    Premium,
}

impl Default for ComputePricing {
    fn default() -> Self {
        let mut gpu_hourly = HashMap::new();
        gpu_hourly.insert(GpuTier::Consumer, 100_000_000); // 0.10 SYNOR
        gpu_hourly.insert(GpuTier::Professional, 300_000_000); // 0.30 SYNOR
        gpu_hourly.insert(GpuTier::DataCenter, 2_000_000_000); // 2.00 SYNOR
        gpu_hourly.insert(GpuTier::Premium, 4_000_000_000); // 4.00 SYNOR

        Self {
            gpu_hourly,
            cpu_core_hour: 20_000_000,                 // 0.02 SYNOR
            memory_gb_hour: 5_000_000,                 // 0.005 SYNOR
            network_egress_gb: 50_000_000,             // 0.05 SYNOR
            inference_per_million_tokens: 100_000_000, // 0.10 SYNOR
        }
    }
}

impl ComputePricing {
    /// Estimates cost for a job.
    pub fn estimate(&self, job: &ComputeJob, duration_hours: f32) -> u64 {
        let mut cost = 0u64;

        // CPU cost
        cost += (self.cpu_core_hour as f32 * job.resources.min_cpu_cores * duration_hours) as u64;

        // Memory cost
        cost += (self.memory_gb_hour as f32 * job.resources.min_memory_gb * duration_hours) as u64;

        // GPU cost
        if let Some(gpu) = &job.resources.gpu {
            let tier = GpuTier::Consumer; // Simplified
            let gpu_cost = self.gpu_hourly.get(&tier).unwrap_or(&100_000_000);
            cost += (*gpu_cost as f32 * gpu.min_count as f32 * duration_hours) as u64;
        }

        cost
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_job_id() {
        let id1 = JobId::new();
        let id2 = JobId::new();
        assert_ne!(id1.0, id2.0);
    }

    #[test]
    fn test_compute_cluster() {
        let cluster = ComputeCluster::new();
        let stats = cluster.stats();
        assert_eq!(stats.total_nodes, 0);
    }

    #[test]
    fn test_pricing() {
        let pricing = ComputePricing::default();

        let job = ComputeJob {
            id: JobId::new(),
            owner: [0u8; 32],
            job_type: JobType::Inference {
                model_cid: "model123".to_string(),
                input_format: "json".to_string(),
                batch_size: 32,
            },
            resources: ResourceRequirements {
                min_cpu_cores: 4.0,
                min_memory_gb: 16.0,
                gpu: Some(GpuRequirements {
                    min_count: 1,
                    max_count: 1,
                    min_vram_gb: 16,
                    min_compute_capability: None,
                    allow_sharing: false,
                }),
                ..Default::default()
            },
            input_cid: None,
            max_budget: 1_000_000_000,
            priority: JobPriority::Normal,
            created_at: 0,
            deadline: None,
        };

        let cost = pricing.estimate(&job, 1.0);
        assert!(cost > 0);
    }

    #[test]
    fn test_node_registration() {
        let cluster = ComputeCluster::new();

        let node = ComputeNode {
            id: NodeId(1),
            owner: [1u8; 32],
            processors: vec![ProcessorInfo {
                id: ProcessorId(0),
                processor_type: ProcessorType::Cpu(CpuVariant::X86_64 {
                    avx: processor::AvxSupport::Avx512,
                }),
                capabilities: ProcessorCapabilities::default(),
                utilization: 0.0,
                temperature: Some(45.0),
            }],
            total_memory_gb: 64.0,
            available_memory_gb: 60.0,
            bandwidth_gbps: 10.0,
            region: "us-east".to_string(),
            stake: 1000,
            reputation: 100,
            status: NodeStatus::Online,
        };

        cluster.register_node(node).unwrap();
        assert_eq!(cluster.stats().total_nodes, 1);
    }
}