Expands SDK support to 8 additional languages/frameworks: - Java SDK with Maven/OkHttp/Jackson - Kotlin SDK with Gradle/Ktor/kotlinx.serialization - Swift SDK with Swift Package Manager/async-await - C SDK with CMake/libcurl - C++ SDK with CMake/Modern C++20 - C# SDK with .NET 8.0/HttpClient - Ruby SDK with Bundler/Faraday - Rust SDK with Cargo/reqwest/tokio All SDKs include: - Tensor operations (matmul, conv2d, attention) - LLM inference with streaming support - Model registry, pricing, and usage APIs - Builder patterns where idiomatic - Full type safety
216 lines
5.4 KiB
Ruby
216 lines
5.4 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
require "faraday"
|
|
require "json"
|
|
|
|
module SynorCompute
|
|
# Synor Compute SDK Client
|
|
#
|
|
# @example
|
|
# client = SynorCompute::Client.new(api_key: 'your-api-key')
|
|
#
|
|
# # Matrix multiplication
|
|
# result = client.matmul(tensor_a, tensor_b, processor: :gpu)
|
|
#
|
|
# # LLM inference
|
|
# response = client.inference('llama-3-70b', 'Hello!')
|
|
#
|
|
class Client
|
|
attr_reader :config
|
|
|
|
def initialize(api_key: nil, **options)
|
|
@config = Config.new(api_key: api_key, **options)
|
|
raise ArgumentError, "API key is required" unless @config.api_key
|
|
|
|
@conn = Faraday.new(url: @config.base_url) do |f|
|
|
f.request :json
|
|
f.response :json
|
|
f.options.timeout = @config.timeout
|
|
f.headers["Authorization"] = "Bearer #{@config.api_key}"
|
|
f.headers["X-SDK-Version"] = "ruby/#{VERSION}"
|
|
end
|
|
@closed = false
|
|
end
|
|
|
|
# ==================== Matrix Operations ====================
|
|
|
|
def matmul(a, b, precision: nil, processor: nil, priority: nil)
|
|
check_closed!
|
|
|
|
body = {
|
|
operation: "matmul",
|
|
a: tensor_to_hash(a),
|
|
b: tensor_to_hash(b),
|
|
precision: (precision || @config.default_precision).to_s,
|
|
processor: (processor || @config.default_processor).to_s,
|
|
priority: (priority || @config.default_priority).to_s
|
|
}
|
|
|
|
response = @conn.post("/compute", body)
|
|
parse_job_result(response.body)
|
|
end
|
|
|
|
def conv2d(input, kernel, stride: [1, 1], padding: [0, 0], precision: nil, processor: nil)
|
|
check_closed!
|
|
|
|
body = {
|
|
operation: "conv2d",
|
|
input: tensor_to_hash(input),
|
|
kernel: tensor_to_hash(kernel),
|
|
stride: stride,
|
|
padding: padding,
|
|
precision: (precision || @config.default_precision).to_s
|
|
}
|
|
|
|
response = @conn.post("/compute", body)
|
|
parse_job_result(response.body)
|
|
end
|
|
|
|
def attention(query, key, value, num_heads: 8, flash: true, precision: nil, processor: nil)
|
|
check_closed!
|
|
|
|
body = {
|
|
operation: "attention",
|
|
query: tensor_to_hash(query),
|
|
key: tensor_to_hash(key),
|
|
value: tensor_to_hash(value),
|
|
num_heads: num_heads,
|
|
flash: flash,
|
|
precision: (precision || Precision::FP16).to_s
|
|
}
|
|
|
|
response = @conn.post("/compute", body)
|
|
parse_job_result(response.body)
|
|
end
|
|
|
|
# ==================== LLM Inference ====================
|
|
|
|
def inference(model, prompt, max_tokens: 256, temperature: 0.7, top_p: 0.9, top_k: 50, processor: nil)
|
|
check_closed!
|
|
|
|
body = {
|
|
operation: "inference",
|
|
model: model,
|
|
prompt: prompt,
|
|
max_tokens: max_tokens,
|
|
temperature: temperature,
|
|
top_p: top_p,
|
|
top_k: top_k
|
|
}
|
|
body[:processor] = processor.to_s if processor
|
|
|
|
response = @conn.post("/inference", body)
|
|
parse_job_result(response.body)
|
|
end
|
|
|
|
def inference_stream(model, prompt, max_tokens: 256, temperature: 0.7, &block)
|
|
check_closed!
|
|
raise ArgumentError, "Block required for streaming" unless block_given?
|
|
|
|
body = {
|
|
operation: "inference",
|
|
model: model,
|
|
prompt: prompt,
|
|
max_tokens: max_tokens,
|
|
temperature: temperature,
|
|
stream: true
|
|
}
|
|
|
|
@conn.post("/inference/stream", body) do |req|
|
|
req.options.on_data = proc do |chunk, _|
|
|
chunk.each_line do |line|
|
|
next unless line.start_with?("data: ")
|
|
|
|
data = line[6..].strip
|
|
break if data == "[DONE]"
|
|
|
|
begin
|
|
json = JSON.parse(data)
|
|
yield json["token"] if json["token"]
|
|
rescue JSON::ParserError
|
|
# Skip malformed JSON
|
|
end
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
# ==================== Model Registry ====================
|
|
|
|
def list_models(category: nil)
|
|
check_closed!
|
|
|
|
path = category ? "/models?category=#{category}" : "/models"
|
|
response = @conn.get(path)
|
|
response.body["models"].map { |m| ModelInfo.from_hash(m) }
|
|
end
|
|
|
|
def get_model(model_id)
|
|
check_closed!
|
|
|
|
response = @conn.get("/models/#{model_id}")
|
|
ModelInfo.from_hash(response.body)
|
|
end
|
|
|
|
def search_models(query)
|
|
check_closed!
|
|
|
|
response = @conn.get("/models/search", q: query)
|
|
response.body["models"].map { |m| ModelInfo.from_hash(m) }
|
|
end
|
|
|
|
# ==================== Pricing & Usage ====================
|
|
|
|
def get_pricing
|
|
check_closed!
|
|
|
|
response = @conn.get("/pricing")
|
|
response.body["pricing"].map { |p| PricingInfo.from_hash(p) }
|
|
end
|
|
|
|
def get_usage
|
|
check_closed!
|
|
|
|
response = @conn.get("/usage")
|
|
UsageStats.from_hash(response.body)
|
|
end
|
|
|
|
# ==================== Health Check ====================
|
|
|
|
def health_check
|
|
response = @conn.get("/health")
|
|
response.body["status"] == "healthy"
|
|
rescue StandardError
|
|
false
|
|
end
|
|
|
|
# ==================== Lifecycle ====================
|
|
|
|
def close
|
|
@closed = true
|
|
@conn.close if @conn.respond_to?(:close)
|
|
end
|
|
|
|
def closed?
|
|
@closed
|
|
end
|
|
|
|
private
|
|
|
|
def check_closed!
|
|
raise ClientClosedError, "Client has been closed" if @closed
|
|
end
|
|
|
|
def tensor_to_hash(tensor)
|
|
{
|
|
shape: tensor.shape,
|
|
data: tensor.data,
|
|
dtype: tensor.dtype.to_s
|
|
}
|
|
end
|
|
|
|
def parse_job_result(body)
|
|
JobResult.from_hash(body)
|
|
end
|
|
end
|
|
end
|