synor/sdk/flutter/example/example.dart
Gulshan Yadav cb071a7a3b feat(sdk/flutter): add dataset upload APIs and comprehensive examples
Add comprehensive dataset management to the Flutter SDK including:
- Dataset formats: JSONL, CSV, Parquet, Arrow, HuggingFace, TFRecord, WebDataset, Text, ImageFolder, Custom
- Dataset types: text completion, instruction tuning, chat, Q&A, classification, NER, vision, audio
- Upload methods: uploadDataset, uploadDatasetFromFile, createDatasetFromRecords
- Management APIs: listDatasets, getDataset, deleteDataset
- Dataset preprocessing: splitting, shuffling, deduplication, tokenization
- Complete examples showing all formats and use cases
2026-01-11 16:47:47 +05:30

519 lines
16 KiB
Dart

import 'dart:convert';
import 'dart:io';
import 'package:synor_compute/synor_compute.dart';
/// Example usage of Synor Compute SDK for Flutter/Dart
void main() async {
// Initialize client with API key
final client = SynorCompute(
apiKey: Platform.environment['SYNOR_API_KEY'] ?? 'your-api-key',
// Optional: customize defaults
defaultProcessor: ProcessorType.auto,
defaultPrecision: Precision.fp32,
defaultPriority: Priority.normal,
);
try {
// Check service health
final isHealthy = await client.healthCheck();
print('Service healthy: $isHealthy\n');
// Example 1: Matrix multiplication
await matrixMultiplicationExample(client);
// Example 2: Tensor operations
await tensorOperationsExample(client);
// Example 3: LLM inference
await llmInferenceExample(client);
// Example 4: Streaming inference
await streamingInferenceExample(client);
// Example 5: Pricing and usage
await pricingExample(client);
// Example 6: List available models
await modelRegistryExample(client);
// Example 7: Training a model
await trainingExample(client);
// Example 8: Custom model upload
await customModelExample(client);
// Example 9: Dataset upload formats
await datasetUploadExamples(client);
} finally {
// Always dispose client to release resources
client.dispose();
}
}
/// Matrix multiplication example
Future<void> matrixMultiplicationExample(SynorCompute client) async {
print('=== Matrix Multiplication ===');
// Create random matrices
final a = Tensor.rand([256, 512]);
final b = Tensor.rand([512, 256]);
print('A: ${a.shape}');
print('B: ${b.shape}');
// Perform multiplication on GPU with FP16 precision
final result = await client.matmul(
a,
b,
options: MatMulOptions(
precision: Precision.fp16,
processor: ProcessorType.gpu,
priority: Priority.high,
),
);
if (result.isSuccess) {
print('Result: ${result.result!.shape}');
print('Execution time: ${result.executionTimeMs}ms');
print('Cost: \$${result.cost?.toStringAsFixed(6)}');
print('Processor: ${result.processor?.value}');
} else {
print('Error: ${result.error}');
}
print('');
}
/// Local tensor operations example
Future<void> tensorOperationsExample(SynorCompute client) async {
print('=== Tensor Operations ===');
// Create tensors
final x = Tensor.randn([100], mean: 0.0, std: 1.0);
print('Random normal tensor: mean=${x.mean().toStringAsFixed(4)}, '
'std=${x.std().toStringAsFixed(4)}');
// Create identity matrix
final eye = Tensor.eye(4);
print('Identity matrix:\n${eye.toNestedList()}');
// Create linspace
final linspace = Tensor.linspace(0, 10, 5);
print('Linspace [0, 10, 5]: ${linspace.toNestedList()}');
// Reshape operations
final matrix = Tensor.arange(0, 12).reshape([3, 4]);
print('Reshaped [0..12] to [3,4]:\n${matrix.toNestedList()}');
// Transpose
final transposed = matrix.transpose();
print('Transposed to ${transposed.shape}');
// Activations
final input = Tensor(shape: [5], data: [-2.0, -1.0, 0.0, 1.0, 2.0]);
print('ReLU of $input: ${input.relu().toNestedList()}');
print('Sigmoid of $input: ${input.sigmoid().toNestedList()}');
// Softmax
final logits = Tensor(shape: [4], data: [1.0, 2.0, 3.0, 4.0]);
print('Softmax of $logits: ${logits.softmax().toNestedList()}');
print('');
}
/// LLM inference example
Future<void> llmInferenceExample(SynorCompute client) async {
print('=== LLM Inference ===');
final result = await client.inference(
'llama-3-70b',
'What is the capital of France? Answer in one word.',
options: InferenceOptions(
maxTokens: 10,
temperature: 0.1,
processor: ProcessorType.lpu, // Use LPU for LLM
),
);
if (result.isSuccess) {
print('Response: ${result.result}');
print('Time: ${result.executionTimeMs}ms');
} else {
print('Error: ${result.error}');
}
print('');
}
/// Streaming inference example
Future<void> streamingInferenceExample(SynorCompute client) async {
print('=== Streaming Inference ===');
print('Response: ');
await for (final token in client.inferenceStream(
'llama-3-70b',
'Write a short poem about distributed computing.',
options: InferenceOptions(
maxTokens: 100,
temperature: 0.7,
),
)) {
stdout.write(token);
}
print('\n');
}
/// Pricing and usage example
Future<void> pricingExample(SynorCompute client) async {
print('=== Pricing Information ===');
final pricing = await client.getPricing();
print('Current spot prices:');
for (final p in pricing) {
print(' ${p.processor.value.toUpperCase().padRight(8)}: '
'\$${p.pricePerSecond.toStringAsFixed(6)}/sec, '
'${p.availableUnits} units available, '
'${p.utilizationPercent.toStringAsFixed(1)}% utilized');
}
print('');
// Get usage stats
final usage = await client.getUsage();
print('Usage Statistics:');
print(' Total jobs: ${usage.totalJobs}');
print(' Completed: ${usage.completedJobs}');
print(' Failed: ${usage.failedJobs}');
print(' Total compute time: ${usage.totalComputeSeconds.toStringAsFixed(2)}s');
print(' Total cost: \$${usage.totalCost.toStringAsFixed(4)}');
print('');
}
/// Model registry example - list available models
Future<void> modelRegistryExample(SynorCompute client) async {
print('=== Model Registry ===');
// List all available models
final allModels = await client.listModels();
print('Total available models: ${allModels.length}');
// List only LLMs
final llms = await client.listModels(category: ModelCategory.llm);
print('\nAvailable LLMs:');
for (final model in llms.take(5)) {
print(' ${model.id.padRight(20)} ${model.formattedParameters.padRight(8)} '
'${model.name}');
}
// Search for a specific model
final searchResults = await client.searchModels('llama');
print('\nSearch "llama": ${searchResults.length} results');
// Get specific model info
final modelInfo = await client.getModel('llama-3-70b');
print('\nModel details for ${modelInfo.name}:');
print(' Parameters: ${modelInfo.formattedParameters}');
print(' Context length: ${modelInfo.contextLength}');
print(' Format: ${modelInfo.format.value}');
print(' Recommended processor: ${modelInfo.recommendedProcessor.value}');
print(' License: ${modelInfo.license}');
// List embedding models
final embeddings = await client.listModels(category: ModelCategory.embedding);
print('\nAvailable embedding models:');
for (final model in embeddings) {
print(' ${model.id} - ${model.name}');
}
// List image generation models
final imageGen =
await client.listModels(category: ModelCategory.imageGeneration);
print('\nAvailable image generation models:');
for (final model in imageGen) {
print(' ${model.id} - ${model.name}');
}
print('');
}
/// Training example - train/fine-tune a model
Future<void> trainingExample(SynorCompute client) async {
print('=== Model Training ===');
// ========== STEP 1: Upload your dataset ==========
print('Step 1: Uploading training dataset...\n');
// Example 1: JSONL format (most common for LLM fine-tuning)
final jsonlData = '''
{"prompt": "What is the capital of France?", "completion": "Paris"}
{"prompt": "Translate 'hello' to Spanish", "completion": "hola"}
{"prompt": "What is 2 + 2?", "completion": "4"}
{"prompt": "Who wrote Romeo and Juliet?", "completion": "William Shakespeare"}
''';
final dataset = await client.uploadDataset(
utf8.encode(jsonlData),
DatasetUploadOptions(
name: 'qa-training-data',
description: 'Question-answering training dataset',
format: DatasetFormat.jsonl,
type: DatasetType.textCompletion,
split: DatasetSplit(train: 0.8, validation: 0.1, test: 0.1, seed: 42),
preprocessing: DatasetPreprocessing(
maxLength: 2048,
shuffle: true,
deduplicate: true,
),
),
);
print('Dataset uploaded!');
print(' CID: ${dataset.cid}');
print(' Total samples: ${dataset.totalSamples}');
print(' Train/Val/Test: ${dataset.trainSamples}/${dataset.validationSamples}/${dataset.testSamples}');
print(' Schema: ${dataset.schema}');
// ========== STEP 2: Fine-tune the model ==========
print('\nStep 2: Fine-tuning llama-3-8b on dataset...\n');
final result = await client.fineTune(
baseModel: 'llama-3-8b',
datasetCid: dataset.cid, // Use the CID from upload
outputAlias: 'my-qa-model',
options: TrainingOptions(
framework: MlFramework.pytorch,
epochs: 3,
batchSize: 8,
learningRate: 0.00002,
optimizer: 'adamw',
hyperparameters: {
'weight_decay': 0.01,
'warmup_steps': 100,
'gradient_accumulation_steps': 4,
},
checkpointEvery: 500,
processor: ProcessorType.gpu,
priority: Priority.high,
),
);
if (result.isSuccess) {
final training = result.result!;
print('Training completed!');
print(' New model CID: ${training.modelCid}');
print(' Final loss: ${training.finalLoss.toStringAsFixed(4)}');
print(' Duration: ${training.durationMs / 1000}s');
print(' Cost: \$${training.cost.toStringAsFixed(4)}');
// ========== STEP 3: Use your trained model ==========
print('\nStep 3: Testing trained model...\n');
final inference = await client.inference(
training.modelCid,
'What is the capital of Germany?',
options: InferenceOptions(maxTokens: 50),
);
print('Response: ${inference.result}');
} else {
print('Training failed: ${result.error}');
}
print('');
}
/// Dataset upload examples - shows all supported formats
Future<void> datasetUploadExamples(SynorCompute client) async {
print('=== Dataset Upload Examples ===\n');
// ========== FORMAT 1: JSONL (JSON Lines) ==========
print('Format 1: JSONL - One JSON object per line');
print('''
// Text completion format
{"prompt": "Hello", "completion": "Hi there!"}
// Instruction tuning format
{"instruction": "Summarize", "input": "Long text...", "output": "Summary"}
// Chat format
{"messages": [{"role": "user", "content": "Hi"}, {"role": "assistant", "content": "Hello!"}]}
''');
// Example: Instruction tuning dataset
final instructionData = await client.createDatasetFromRecords(
name: 'instruction-dataset',
records: [
{
'instruction': 'Summarize the following text',
'input': 'The quick brown fox jumps over the lazy dog.',
'output': 'A fox jumps over a dog.'
},
{
'instruction': 'Translate to French',
'input': 'Hello world',
'output': 'Bonjour le monde'
},
],
type: DatasetType.instructionTuning,
);
print('Instruction dataset CID: ${instructionData.cid}');
// ========== FORMAT 2: CSV ==========
print('\nFormat 2: CSV - Comma-separated values with headers');
print('''
prompt,completion
"What is AI?","Artificial Intelligence is..."
"Define ML","Machine Learning is..."
''');
final csvData = '''
prompt,completion
"What is AI?","Artificial Intelligence is the simulation of human intelligence"
"Define ML","Machine Learning is a subset of AI that learns from data"
''';
final csvDataset = await client.uploadDataset(
utf8.encode(csvData),
DatasetUploadOptions(
name: 'csv-dataset',
format: DatasetFormat.csv,
type: DatasetType.textCompletion,
columnMapping: {'prompt': 'input', 'completion': 'output'},
),
);
print('CSV dataset CID: ${csvDataset.cid}');
// ========== FORMAT 3: Parquet (for large datasets) ==========
print('\nFormat 3: Parquet - Efficient columnar format for large datasets');
print(' - Best for datasets > 1GB');
print(' - Supports compression');
print(' - Fast random access');
print('''
final parquetDataset = await client.uploadDatasetFromFile(
'/path/to/dataset.parquet',
DatasetUploadOptions(
name: 'large-dataset',
format: DatasetFormat.parquet,
type: DatasetType.textCompletion,
),
);
''');
// ========== FORMAT 4: HuggingFace ==========
print('\nFormat 4: HuggingFace datasets format');
print(' - Compatible with datasets library');
print(' - Automatic schema detection');
// ========== FORMAT 5: Image folder ==========
print('\nFormat 5: Image folder structure');
print('''
dataset/
├── train/
│ ├── cat/
│ │ ├── img001.jpg
│ │ └── img002.jpg
│ └── dog/
│ ├── img001.jpg
│ └── img002.jpg
└── val/
├── cat/
└── dog/
''');
// ========== ALL SUPPORTED FORMATS ==========
print('\nAll supported dataset formats:');
for (final format in DatasetFormat.values) {
final description = switch (format) {
DatasetFormat.jsonl => 'JSON Lines - one JSON per line (recommended for text)',
DatasetFormat.csv => 'CSV - comma-separated with headers',
DatasetFormat.parquet => 'Parquet - columnar format for large datasets',
DatasetFormat.arrow => 'Apache Arrow - in-memory format',
DatasetFormat.huggingface => 'HuggingFace datasets format',
DatasetFormat.tfrecord => 'TFRecord - TensorFlow format',
DatasetFormat.webdataset => 'WebDataset - PyTorch streaming format',
DatasetFormat.text => 'Plain text - one sample per line',
DatasetFormat.imagefolder => 'Image folder structure',
DatasetFormat.custom => 'Custom binary format',
};
print(' ${format.value.padRight(15)} - $description');
}
// ========== ALL DATASET TYPES ==========
print('\nAll supported dataset types:');
for (final type in DatasetType.values) {
final description = switch (type) {
DatasetType.textCompletion => 'prompt → completion pairs',
DatasetType.instructionTuning => 'instruction + input → output',
DatasetType.chat => 'multi-turn conversations',
DatasetType.questionAnswering => 'question → answer pairs',
DatasetType.textClassification => 'text → label',
DatasetType.ner => 'named entity recognition',
DatasetType.imageClassification => 'image → label',
DatasetType.objectDetection => 'image → bounding boxes',
DatasetType.imageSegmentation => 'image → mask',
DatasetType.imageText => 'image-text pairs (CLIP, etc.)',
DatasetType.audioTranscription => 'audio → text',
DatasetType.custom => 'custom format',
};
print(' ${type.value.padRight(22)} - $description');
}
print('');
}
/// Custom model upload example
Future<void> customModelExample(SynorCompute client) async {
print('=== Custom Model Upload ===');
// Example: Upload a custom ONNX model
// In practice, you'd read this from a file:
// final modelBytes = await File('my_model.onnx').readAsBytes();
// For demonstration, we'll show the API structure
print('To upload your own Python-trained model:');
print('''
1. Train your model in Python:
import torch
model = MyModel()
# ... train model ...
torch.onnx.export(model, dummy_input, "my_model.onnx")
2. Upload to Synor Compute:
final modelBytes = await File('my_model.onnx').readAsBytes();
final result = await client.uploadModel(
modelBytes,
ModelUploadOptions(
name: 'my-custom-model',
description: 'My custom trained model',
category: ModelCategory.custom,
format: ModelFormat.onnx,
alias: 'my-model', // Optional shortcut name
isPublic: false, // Keep private
license: 'Proprietary',
),
);
print('Uploaded! CID: \${result.cid}');
3. Use for inference:
final result = await client.inference(
result.cid, // or 'my-model' if you set an alias
'Your input data',
);
''');
// Supported model formats
print('Supported model formats:');
for (final format in ModelFormat.values) {
print(' - ${format.value}');
}
// Supported categories
print('\nSupported model categories:');
for (final category in ModelCategory.values) {
print(' - ${category.value}');
}
print('');
}