# Synor Compute Layer - Docker Compose # Heterogeneous compute orchestration for AI/ML workloads version: '3.9' services: # Compute Orchestrator (schedules tasks across workers) compute-orchestrator: build: context: . dockerfile: docker/compute-node/Dockerfile container_name: synor-compute-orchestrator hostname: compute-orchestrator restart: unless-stopped environment: - RUST_LOG=info - NODE_TYPE=orchestrator - LISTEN_ADDR=0.0.0.0:17200 - WORKER_PORTS=17210,17211,17212,17213 - BALANCING_STRATEGY=balanced - MAX_QUEUE_SIZE=10000 ports: - "17250:17200" # Compute API - "17252:17202" # Metrics/Health networks: - synor-compute-net volumes: - compute-orchestrator-data:/data/compute depends_on: - compute-worker-cpu-1 - compute-worker-cpu-2 healthcheck: test: ["CMD", "curl", "-f", "http://localhost:17202/health"] interval: 30s timeout: 5s retries: 3 start_period: 30s # CPU Worker Node 1 (x86-64 AVX2) compute-worker-cpu-1: build: context: . dockerfile: docker/compute-node/Dockerfile container_name: synor-compute-worker-cpu-1 hostname: compute-worker-cpu-1 restart: unless-stopped environment: - RUST_LOG=info - NODE_TYPE=worker - PROCESSOR_TYPE=cpu - CPU_VARIANT=x86_64_avx2 - LISTEN_ADDR=0.0.0.0:17210 - ORCHESTRATOR_URL=http://compute-orchestrator:17200 - MAX_CONCURRENT_TASKS=8 - WORK_STEAL_ENABLED=true ports: - "17260:17210" # Worker API networks: - synor-compute-net volumes: - compute-worker-cpu-1-data:/data/compute deploy: resources: limits: cpus: '4' memory: 8G reservations: cpus: '2' memory: 4G healthcheck: test: ["CMD", "curl", "-f", "http://localhost:17210/health"] interval: 30s timeout: 5s retries: 3 # CPU Worker Node 2 (x86-64 AVX2) compute-worker-cpu-2: build: context: . dockerfile: docker/compute-node/Dockerfile container_name: synor-compute-worker-cpu-2 hostname: compute-worker-cpu-2 restart: unless-stopped environment: - RUST_LOG=info - NODE_TYPE=worker - PROCESSOR_TYPE=cpu - CPU_VARIANT=x86_64_avx2 - LISTEN_ADDR=0.0.0.0:17211 - ORCHESTRATOR_URL=http://compute-orchestrator:17200 - MAX_CONCURRENT_TASKS=8 - WORK_STEAL_ENABLED=true ports: - "17261:17211" # Worker API networks: - synor-compute-net volumes: - compute-worker-cpu-2-data:/data/compute deploy: resources: limits: cpus: '4' memory: 8G reservations: cpus: '2' memory: 4G healthcheck: test: ["CMD", "curl", "-f", "http://localhost:17211/health"] interval: 30s timeout: 5s retries: 3 # WASM Worker (browser-compatible compute) compute-worker-wasm: build: context: . dockerfile: docker/compute-node/Dockerfile container_name: synor-compute-worker-wasm hostname: compute-worker-wasm restart: unless-stopped environment: - RUST_LOG=info - NODE_TYPE=worker - PROCESSOR_TYPE=wasm - LISTEN_ADDR=0.0.0.0:17212 - ORCHESTRATOR_URL=http://compute-orchestrator:17200 - MAX_CONCURRENT_TASKS=4 ports: - "17262:17212" # Worker API networks: - synor-compute-net volumes: - compute-worker-wasm-data:/data/compute deploy: resources: limits: cpus: '2' memory: 2G reservations: cpus: '1' memory: 1G healthcheck: test: ["CMD", "curl", "-f", "http://localhost:17212/health"] interval: 30s timeout: 5s retries: 3 # Spot Market Service compute-spot-market: build: context: . dockerfile: docker/compute-node/Dockerfile container_name: synor-compute-spot-market hostname: compute-spot-market restart: unless-stopped environment: - RUST_LOG=info - NODE_TYPE=market - LISTEN_ADDR=0.0.0.0:17220 - ORCHESTRATOR_URL=http://compute-orchestrator:17200 - AUCTION_INTERVAL_MS=5000 - MIN_BID_MICRO=100 ports: - "17270:17220" # Market API networks: - synor-compute-net volumes: - compute-spot-market-data:/data/compute depends_on: - compute-orchestrator healthcheck: test: ["CMD", "curl", "-f", "http://localhost:17220/health"] interval: 30s timeout: 5s retries: 3 # Redis for task queue and caching compute-redis: image: redis:7-alpine container_name: synor-compute-redis hostname: compute-redis restart: unless-stopped command: redis-server --appendonly yes --maxmemory 1gb --maxmemory-policy allkeys-lru ports: - "17280:6379" # Redis port (remapped) networks: - synor-compute-net volumes: - compute-redis-data:/data healthcheck: test: ["CMD", "redis-cli", "ping"] interval: 15s timeout: 5s retries: 3 # Prometheus metrics compute-prometheus: image: prom/prometheus:latest container_name: synor-compute-prometheus hostname: compute-prometheus restart: unless-stopped command: - '--config.file=/etc/prometheus/prometheus.yml' - '--storage.tsdb.path=/prometheus' - '--web.enable-lifecycle' volumes: - ./docker/compute-node/prometheus.yml:/etc/prometheus/prometheus.yml:ro - compute-prometheus-data:/prometheus ports: - "17290:9090" # Prometheus UI (remapped) networks: - synor-compute-net healthcheck: test: ["CMD", "wget", "-q", "--spider", "http://localhost:9090/-/healthy"] interval: 30s timeout: 5s retries: 3 volumes: compute-orchestrator-data: driver: local compute-worker-cpu-1-data: driver: local compute-worker-cpu-2-data: driver: local compute-worker-wasm-data: driver: local compute-spot-market-data: driver: local compute-redis-data: driver: local compute-prometheus-data: driver: local networks: synor-compute-net: driver: bridge ipam: config: - subnet: 172.23.0.0/16