Add complete SDK implementations for accessing Synor Compute: JavaScript/TypeScript SDK (sdk/js/): - Full async/await API with TypeScript types - Tensor operations: matmul, conv2d, attention - Model inference with streaming support - WebSocket-based job monitoring - Browser and Node.js compatible Python SDK (sdk/python/): - Async/await with aiohttp - NumPy integration for tensors - Context managers for cleanup - Type hints throughout - PyPI-ready package structure Go SDK (sdk/go/): - Idiomatic Go with context support - Efficient binary tensor serialization - HTTP client with configurable timeouts - Zero external dependencies (stdlib only) All SDKs support: - Matrix multiplication (FP64 to INT4 precision) - Convolution operations (2D, 3D) - Flash attention - LLM inference - Spot pricing queries - Job polling and cancellation - Heterogeneous compute targeting (CPU/GPU/TPU/NPU/LPU)
298 lines
8.9 KiB
Python
298 lines
8.9 KiB
Python
"""Synor Compute Client."""
|
|
|
|
from typing import Any, Optional
|
|
import httpx
|
|
import numpy as np
|
|
|
|
from .types import (
|
|
SynorConfig,
|
|
BalancingStrategy,
|
|
Precision,
|
|
ProcessorType,
|
|
TaskPriority,
|
|
JobResult,
|
|
PricingInfo,
|
|
)
|
|
from .tensor import Tensor
|
|
from .job import ComputeJob
|
|
|
|
|
|
class SynorCompute:
|
|
"""
|
|
Main Synor Compute client.
|
|
|
|
Example:
|
|
>>> compute = SynorCompute(api_key="sk_...")
|
|
>>> result = await compute.matmul(
|
|
... a=np.random.randn(1024, 1024),
|
|
... b=np.random.randn(1024, 1024),
|
|
... precision=Precision.FP16
|
|
... )
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
api_key: str,
|
|
endpoint: str = "https://compute.synor.cc/api/v1",
|
|
strategy: BalancingStrategy = BalancingStrategy.BALANCED,
|
|
precision: Precision = Precision.FP32,
|
|
timeout: float = 30.0,
|
|
debug: bool = False,
|
|
):
|
|
self.config = SynorConfig(
|
|
api_key=api_key,
|
|
endpoint=endpoint,
|
|
strategy=strategy,
|
|
precision=precision,
|
|
timeout=timeout,
|
|
debug=debug,
|
|
)
|
|
self._client = httpx.AsyncClient(
|
|
base_url=endpoint,
|
|
headers={
|
|
"Authorization": f"Bearer {api_key}",
|
|
"Content-Type": "application/json",
|
|
},
|
|
timeout=timeout,
|
|
)
|
|
|
|
async def __aenter__(self) -> "SynorCompute":
|
|
return self
|
|
|
|
async def __aexit__(self, *args: Any) -> None:
|
|
await self.close()
|
|
|
|
async def close(self) -> None:
|
|
"""Close the client."""
|
|
await self._client.aclose()
|
|
|
|
async def matmul(
|
|
self,
|
|
a: np.ndarray | Tensor,
|
|
b: np.ndarray | Tensor,
|
|
precision: Optional[Precision] = None,
|
|
processor: Optional[ProcessorType] = None,
|
|
priority: TaskPriority = TaskPriority.NORMAL,
|
|
) -> JobResult:
|
|
"""
|
|
Matrix multiplication on distributed compute.
|
|
|
|
Args:
|
|
a: First matrix
|
|
b: Second matrix
|
|
precision: Compute precision
|
|
processor: Preferred processor type
|
|
priority: Task priority
|
|
|
|
Returns:
|
|
Job result with computed matrix
|
|
"""
|
|
tensor_a = Tensor.from_numpy(a) if isinstance(a, np.ndarray) else a
|
|
tensor_b = Tensor.from_numpy(b) if isinstance(b, np.ndarray) else b
|
|
|
|
job = await self.submit_job(
|
|
"matmul",
|
|
{
|
|
"a": tensor_a.serialize(),
|
|
"b": tensor_b.serialize(),
|
|
"precision": (precision or self.config.precision).value,
|
|
"processor": processor.value if processor else None,
|
|
"priority": priority.value,
|
|
},
|
|
)
|
|
return await job.wait()
|
|
|
|
async def conv2d(
|
|
self,
|
|
input: np.ndarray | Tensor,
|
|
kernel: np.ndarray | Tensor,
|
|
stride: int | tuple[int, int] = 1,
|
|
padding: int | tuple[int, int] = 0,
|
|
precision: Optional[Precision] = None,
|
|
) -> JobResult:
|
|
"""
|
|
2D Convolution on distributed compute.
|
|
|
|
Args:
|
|
input: Input tensor [batch, channels, height, width]
|
|
kernel: Convolution kernel
|
|
stride: Stride
|
|
padding: Padding
|
|
precision: Compute precision
|
|
|
|
Returns:
|
|
Job result with convolved tensor
|
|
"""
|
|
tensor_input = Tensor.from_numpy(input) if isinstance(input, np.ndarray) else input
|
|
tensor_kernel = Tensor.from_numpy(kernel) if isinstance(kernel, np.ndarray) else kernel
|
|
|
|
job = await self.submit_job(
|
|
"conv2d",
|
|
{
|
|
"input": tensor_input.serialize(),
|
|
"kernel": tensor_kernel.serialize(),
|
|
"stride": stride,
|
|
"padding": padding,
|
|
"precision": (precision or self.config.precision).value,
|
|
},
|
|
)
|
|
return await job.wait()
|
|
|
|
async def attention(
|
|
self,
|
|
query: np.ndarray | Tensor,
|
|
key: np.ndarray | Tensor,
|
|
value: np.ndarray | Tensor,
|
|
num_heads: int,
|
|
flash: bool = True,
|
|
precision: Optional[Precision] = None,
|
|
) -> JobResult:
|
|
"""
|
|
Self-attention on distributed compute.
|
|
|
|
Args:
|
|
query: Query tensor
|
|
key: Key tensor
|
|
value: Value tensor
|
|
num_heads: Number of attention heads
|
|
flash: Use FlashAttention
|
|
precision: Compute precision
|
|
|
|
Returns:
|
|
Job result with attention output
|
|
"""
|
|
q = Tensor.from_numpy(query) if isinstance(query, np.ndarray) else query
|
|
k = Tensor.from_numpy(key) if isinstance(key, np.ndarray) else key
|
|
v = Tensor.from_numpy(value) if isinstance(value, np.ndarray) else value
|
|
|
|
job = await self.submit_job(
|
|
"flash_attention" if flash else "self_attention",
|
|
{
|
|
"query": q.serialize(),
|
|
"key": k.serialize(),
|
|
"value": v.serialize(),
|
|
"num_heads": num_heads,
|
|
"precision": (precision or self.config.precision).value,
|
|
},
|
|
)
|
|
return await job.wait()
|
|
|
|
async def inference(
|
|
self,
|
|
model: str,
|
|
input: str | np.ndarray | Tensor,
|
|
max_tokens: int = 256,
|
|
temperature: float = 0.7,
|
|
top_k: int = 50,
|
|
strategy: Optional[BalancingStrategy] = None,
|
|
stream: bool = False,
|
|
) -> JobResult:
|
|
"""
|
|
Run inference on a model.
|
|
|
|
Args:
|
|
model: Model name or CID
|
|
input: Input prompt or tensor
|
|
max_tokens: Maximum tokens to generate
|
|
temperature: Sampling temperature
|
|
top_k: Top-k sampling
|
|
strategy: Balancing strategy
|
|
stream: Stream responses
|
|
|
|
Returns:
|
|
Job result with inference output
|
|
"""
|
|
input_data: str | dict
|
|
if isinstance(input, str):
|
|
input_data = input
|
|
elif isinstance(input, np.ndarray):
|
|
input_data = Tensor.from_numpy(input).serialize()
|
|
else:
|
|
input_data = input.serialize()
|
|
|
|
job = await self.submit_job(
|
|
"inference",
|
|
{
|
|
"model": model,
|
|
"input": input_data,
|
|
"max_tokens": max_tokens,
|
|
"temperature": temperature,
|
|
"top_k": top_k,
|
|
"strategy": (strategy or self.config.strategy).value,
|
|
},
|
|
)
|
|
return await job.wait()
|
|
|
|
async def get_pricing(self) -> list[PricingInfo]:
|
|
"""Get current pricing for all processor types."""
|
|
response = await self._request("GET", "/pricing")
|
|
return [PricingInfo(**p) for p in response["pricing"]]
|
|
|
|
async def get_processor_pricing(self, processor: ProcessorType) -> PricingInfo:
|
|
"""Get pricing for a specific processor type."""
|
|
response = await self._request("GET", f"/pricing/{processor.value}")
|
|
return PricingInfo(**response)
|
|
|
|
async def get_capacity(self) -> dict[str, Any]:
|
|
"""Get available compute capacity."""
|
|
return await self._request("GET", "/capacity")
|
|
|
|
def set_strategy(self, strategy: BalancingStrategy) -> None:
|
|
"""Set the default balancing strategy."""
|
|
self.config.strategy = strategy
|
|
|
|
async def submit_job(self, operation: str, params: dict[str, Any]) -> ComputeJob:
|
|
"""Submit a generic compute job."""
|
|
response = await self._request(
|
|
"POST",
|
|
"/jobs",
|
|
{
|
|
"operation": operation,
|
|
"params": params,
|
|
"strategy": self.config.strategy.value,
|
|
},
|
|
)
|
|
return ComputeJob(response["job_id"], self)
|
|
|
|
async def get_job_status(self, job_id: str) -> JobResult:
|
|
"""Get job status."""
|
|
response = await self._request("GET", f"/jobs/{job_id}")
|
|
return JobResult(**response)
|
|
|
|
async def cancel_job(self, job_id: str) -> None:
|
|
"""Cancel a job."""
|
|
await self._request("DELETE", f"/jobs/{job_id}")
|
|
|
|
async def _request(
|
|
self,
|
|
method: str,
|
|
path: str,
|
|
data: Optional[dict[str, Any]] = None,
|
|
) -> dict[str, Any]:
|
|
"""Make an API request."""
|
|
if self.config.debug:
|
|
print(f"[SynorCompute] {method} {path}")
|
|
|
|
response = await self._client.request(
|
|
method,
|
|
path,
|
|
json=data,
|
|
)
|
|
|
|
if response.status_code >= 400:
|
|
error = response.json() if response.content else {"message": response.reason_phrase}
|
|
raise SynorError(
|
|
error.get("message", "Request failed"),
|
|
response.status_code,
|
|
)
|
|
|
|
return response.json()
|
|
|
|
|
|
class SynorError(Exception):
|
|
"""Synor SDK error."""
|
|
|
|
def __init__(self, message: str, status_code: Optional[int] = None, code: Optional[str] = None):
|
|
super().__init__(message)
|
|
self.status_code = status_code
|
|
self.code = code
|