synor/sdk/ruby/lib/synor_compute/client.rb

# frozen_string_literal: true

require "faraday"
require "json"

module SynorCompute
  # Synor Compute SDK Client
  #
  # @example
  #   client = SynorCompute::Client.new(api_key: 'your-api-key')
  #
  #   # Matrix multiplication
  #   result = client.matmul(tensor_a, tensor_b, processor: :gpu)
  #
  #   # LLM inference
  #   response = client.inference('llama-3-70b', 'Hello!')
  #
  class Client
    attr_reader :config

    def initialize(api_key: nil, **options)
      @config = Config.new(api_key: api_key, **options)
      raise ArgumentError, "API key is required" unless @config.api_key

      @conn = Faraday.new(url: @config.base_url) do |f|
        f.request :json
        f.response :json
        f.options.timeout = @config.timeout
        f.headers["Authorization"] = "Bearer #{@config.api_key}"
        f.headers["X-SDK-Version"] = "ruby/#{VERSION}"
      end
      @closed = false
    end

    # ==================== Matrix Operations ====================

    def matmul(a, b, precision: nil, processor: nil, priority: nil)
      check_closed!

      body = {
        operation: "matmul",
        a: tensor_to_hash(a),
        b: tensor_to_hash(b),
        precision: (precision || @config.default_precision).to_s,
        processor: (processor || @config.default_processor).to_s,
        priority: (priority || @config.default_priority).to_s
      }

      response = @conn.post("/compute", body)
      parse_job_result(response.body)
    end

    def conv2d(input, kernel, stride: [1, 1], padding: [0, 0], precision: nil, processor: nil)
      check_closed!

      body = {
        operation: "conv2d",
        input: tensor_to_hash(input),
        kernel: tensor_to_hash(kernel),
        stride: stride,
        padding: padding,
        precision: (precision || @config.default_precision).to_s
      }

      response = @conn.post("/compute", body)
      parse_job_result(response.body)
    end

    def attention(query, key, value, num_heads: 8, flash: true, precision: nil, processor: nil)
      check_closed!

      body = {
        operation: "attention",
        query: tensor_to_hash(query),
        key: tensor_to_hash(key),
        value: tensor_to_hash(value),
        num_heads: num_heads,
        flash: flash,
        precision: (precision || Precision::FP16).to_s
      }

      response = @conn.post("/compute", body)
      parse_job_result(response.body)
    end

    # ==================== LLM Inference ====================

    def inference(model, prompt, max_tokens: 256, temperature: 0.7, top_p: 0.9, top_k: 50, processor: nil)
      check_closed!

      body = {
        operation: "inference",
        model: model,
        prompt: prompt,
        max_tokens: max_tokens,
        temperature: temperature,
        top_p: top_p,
        top_k: top_k
      }
      body[:processor] = processor.to_s if processor

      response = @conn.post("/inference", body)
      parse_job_result(response.body)
    end

    def inference_stream(model, prompt, max_tokens: 256, temperature: 0.7, &block)
      check_closed!
      raise ArgumentError, "Block required for streaming" unless block_given?

      body = {
        operation: "inference",
        model: model,
        prompt: prompt,
        max_tokens: max_tokens,
        temperature: temperature,
        stream: true
      }

      @conn.post("/inference/stream", body) do |req|
        req.options.on_data = proc do |chunk, _|
          chunk.each_line do |line|
            next unless line.start_with?("data: ")

            data = line[6..].strip
            break if data == "[DONE]"

            begin
              json = JSON.parse(data)
              yield json["token"] if json["token"]
            rescue JSON::ParserError
              # Skip malformed JSON
            end
          end
        end
      end
    end

    # ==================== Model Registry ====================

    def list_models(category: nil)
      check_closed!

      path = category ? "/models?category=#{category}" : "/models"
      response = @conn.get(path)
      response.body["models"].map { |m| ModelInfo.from_hash(m) }
    end

    def get_model(model_id)
      check_closed!

      response = @conn.get("/models/#{model_id}")
      ModelInfo.from_hash(response.body)
    end

    def search_models(query)
      check_closed!

      response = @conn.get("/models/search", q: query)
      response.body["models"].map { |m| ModelInfo.from_hash(m) }
    end

    # ==================== Pricing & Usage ====================

    def get_pricing
      check_closed!

      response = @conn.get("/pricing")
      response.body["pricing"].map { |p| PricingInfo.from_hash(p) }
    end

    def get_usage
      check_closed!

      response = @conn.get("/usage")
      UsageStats.from_hash(response.body)
    end

    # ==================== Health Check ====================

    def health_check
      response = @conn.get("/health")
      response.body["status"] == "healthy"
    rescue StandardError
      false
    end

    # ==================== Lifecycle ====================

    def close
      @closed = true
      @conn.close if @conn.respond_to?(:close)
    end

    def closed?
      @closed
    end

    private

    def check_closed!
      raise ClientClosedError, "Client has been closed" if @closed
    end

    def tensor_to_hash(tensor)
      {
        shape: tensor.shape,
        data: tensor.data,
        dtype: tensor.dtype.to_s
      }
    end

    def parse_job_result(body)
      JobResult.from_hash(body)
    end
  end
end