Add comprehensive dataset management to the Flutter SDK including: - Dataset formats: JSONL, CSV, Parquet, Arrow, HuggingFace, TFRecord, WebDataset, Text, ImageFolder, Custom - Dataset types: text completion, instruction tuning, chat, Q&A, classification, NER, vision, audio - Upload methods: uploadDataset, uploadDatasetFromFile, createDatasetFromRecords - Management APIs: listDatasets, getDataset, deleteDataset - Dataset preprocessing: splitting, shuffling, deduplication, tokenization - Complete examples showing all formats and use cases
519 lines
16 KiB
Dart
519 lines
16 KiB
Dart
import 'dart:convert';
|
|
import 'dart:io';
|
|
|
|
import 'package:synor_compute/synor_compute.dart';
|
|
|
|
/// Example usage of Synor Compute SDK for Flutter/Dart
|
|
void main() async {
|
|
// Initialize client with API key
|
|
final client = SynorCompute(
|
|
apiKey: Platform.environment['SYNOR_API_KEY'] ?? 'your-api-key',
|
|
// Optional: customize defaults
|
|
defaultProcessor: ProcessorType.auto,
|
|
defaultPrecision: Precision.fp32,
|
|
defaultPriority: Priority.normal,
|
|
);
|
|
|
|
try {
|
|
// Check service health
|
|
final isHealthy = await client.healthCheck();
|
|
print('Service healthy: $isHealthy\n');
|
|
|
|
// Example 1: Matrix multiplication
|
|
await matrixMultiplicationExample(client);
|
|
|
|
// Example 2: Tensor operations
|
|
await tensorOperationsExample(client);
|
|
|
|
// Example 3: LLM inference
|
|
await llmInferenceExample(client);
|
|
|
|
// Example 4: Streaming inference
|
|
await streamingInferenceExample(client);
|
|
|
|
// Example 5: Pricing and usage
|
|
await pricingExample(client);
|
|
|
|
// Example 6: List available models
|
|
await modelRegistryExample(client);
|
|
|
|
// Example 7: Training a model
|
|
await trainingExample(client);
|
|
|
|
// Example 8: Custom model upload
|
|
await customModelExample(client);
|
|
|
|
// Example 9: Dataset upload formats
|
|
await datasetUploadExamples(client);
|
|
} finally {
|
|
// Always dispose client to release resources
|
|
client.dispose();
|
|
}
|
|
}
|
|
|
|
/// Matrix multiplication example
|
|
Future<void> matrixMultiplicationExample(SynorCompute client) async {
|
|
print('=== Matrix Multiplication ===');
|
|
|
|
// Create random matrices
|
|
final a = Tensor.rand([256, 512]);
|
|
final b = Tensor.rand([512, 256]);
|
|
|
|
print('A: ${a.shape}');
|
|
print('B: ${b.shape}');
|
|
|
|
// Perform multiplication on GPU with FP16 precision
|
|
final result = await client.matmul(
|
|
a,
|
|
b,
|
|
options: MatMulOptions(
|
|
precision: Precision.fp16,
|
|
processor: ProcessorType.gpu,
|
|
priority: Priority.high,
|
|
),
|
|
);
|
|
|
|
if (result.isSuccess) {
|
|
print('Result: ${result.result!.shape}');
|
|
print('Execution time: ${result.executionTimeMs}ms');
|
|
print('Cost: \$${result.cost?.toStringAsFixed(6)}');
|
|
print('Processor: ${result.processor?.value}');
|
|
} else {
|
|
print('Error: ${result.error}');
|
|
}
|
|
print('');
|
|
}
|
|
|
|
/// Local tensor operations example
|
|
Future<void> tensorOperationsExample(SynorCompute client) async {
|
|
print('=== Tensor Operations ===');
|
|
|
|
// Create tensors
|
|
final x = Tensor.randn([100], mean: 0.0, std: 1.0);
|
|
print('Random normal tensor: mean=${x.mean().toStringAsFixed(4)}, '
|
|
'std=${x.std().toStringAsFixed(4)}');
|
|
|
|
// Create identity matrix
|
|
final eye = Tensor.eye(4);
|
|
print('Identity matrix:\n${eye.toNestedList()}');
|
|
|
|
// Create linspace
|
|
final linspace = Tensor.linspace(0, 10, 5);
|
|
print('Linspace [0, 10, 5]: ${linspace.toNestedList()}');
|
|
|
|
// Reshape operations
|
|
final matrix = Tensor.arange(0, 12).reshape([3, 4]);
|
|
print('Reshaped [0..12] to [3,4]:\n${matrix.toNestedList()}');
|
|
|
|
// Transpose
|
|
final transposed = matrix.transpose();
|
|
print('Transposed to ${transposed.shape}');
|
|
|
|
// Activations
|
|
final input = Tensor(shape: [5], data: [-2.0, -1.0, 0.0, 1.0, 2.0]);
|
|
print('ReLU of $input: ${input.relu().toNestedList()}');
|
|
print('Sigmoid of $input: ${input.sigmoid().toNestedList()}');
|
|
|
|
// Softmax
|
|
final logits = Tensor(shape: [4], data: [1.0, 2.0, 3.0, 4.0]);
|
|
print('Softmax of $logits: ${logits.softmax().toNestedList()}');
|
|
|
|
print('');
|
|
}
|
|
|
|
/// LLM inference example
|
|
Future<void> llmInferenceExample(SynorCompute client) async {
|
|
print('=== LLM Inference ===');
|
|
|
|
final result = await client.inference(
|
|
'llama-3-70b',
|
|
'What is the capital of France? Answer in one word.',
|
|
options: InferenceOptions(
|
|
maxTokens: 10,
|
|
temperature: 0.1,
|
|
processor: ProcessorType.lpu, // Use LPU for LLM
|
|
),
|
|
);
|
|
|
|
if (result.isSuccess) {
|
|
print('Response: ${result.result}');
|
|
print('Time: ${result.executionTimeMs}ms');
|
|
} else {
|
|
print('Error: ${result.error}');
|
|
}
|
|
print('');
|
|
}
|
|
|
|
/// Streaming inference example
|
|
Future<void> streamingInferenceExample(SynorCompute client) async {
|
|
print('=== Streaming Inference ===');
|
|
print('Response: ');
|
|
|
|
await for (final token in client.inferenceStream(
|
|
'llama-3-70b',
|
|
'Write a short poem about distributed computing.',
|
|
options: InferenceOptions(
|
|
maxTokens: 100,
|
|
temperature: 0.7,
|
|
),
|
|
)) {
|
|
stdout.write(token);
|
|
}
|
|
|
|
print('\n');
|
|
}
|
|
|
|
/// Pricing and usage example
|
|
Future<void> pricingExample(SynorCompute client) async {
|
|
print('=== Pricing Information ===');
|
|
|
|
final pricing = await client.getPricing();
|
|
|
|
print('Current spot prices:');
|
|
for (final p in pricing) {
|
|
print(' ${p.processor.value.toUpperCase().padRight(8)}: '
|
|
'\$${p.pricePerSecond.toStringAsFixed(6)}/sec, '
|
|
'${p.availableUnits} units available, '
|
|
'${p.utilizationPercent.toStringAsFixed(1)}% utilized');
|
|
}
|
|
|
|
print('');
|
|
|
|
// Get usage stats
|
|
final usage = await client.getUsage();
|
|
print('Usage Statistics:');
|
|
print(' Total jobs: ${usage.totalJobs}');
|
|
print(' Completed: ${usage.completedJobs}');
|
|
print(' Failed: ${usage.failedJobs}');
|
|
print(' Total compute time: ${usage.totalComputeSeconds.toStringAsFixed(2)}s');
|
|
print(' Total cost: \$${usage.totalCost.toStringAsFixed(4)}');
|
|
print('');
|
|
}
|
|
|
|
/// Model registry example - list available models
|
|
Future<void> modelRegistryExample(SynorCompute client) async {
|
|
print('=== Model Registry ===');
|
|
|
|
// List all available models
|
|
final allModels = await client.listModels();
|
|
print('Total available models: ${allModels.length}');
|
|
|
|
// List only LLMs
|
|
final llms = await client.listModels(category: ModelCategory.llm);
|
|
print('\nAvailable LLMs:');
|
|
for (final model in llms.take(5)) {
|
|
print(' ${model.id.padRight(20)} ${model.formattedParameters.padRight(8)} '
|
|
'${model.name}');
|
|
}
|
|
|
|
// Search for a specific model
|
|
final searchResults = await client.searchModels('llama');
|
|
print('\nSearch "llama": ${searchResults.length} results');
|
|
|
|
// Get specific model info
|
|
final modelInfo = await client.getModel('llama-3-70b');
|
|
print('\nModel details for ${modelInfo.name}:');
|
|
print(' Parameters: ${modelInfo.formattedParameters}');
|
|
print(' Context length: ${modelInfo.contextLength}');
|
|
print(' Format: ${modelInfo.format.value}');
|
|
print(' Recommended processor: ${modelInfo.recommendedProcessor.value}');
|
|
print(' License: ${modelInfo.license}');
|
|
|
|
// List embedding models
|
|
final embeddings = await client.listModels(category: ModelCategory.embedding);
|
|
print('\nAvailable embedding models:');
|
|
for (final model in embeddings) {
|
|
print(' ${model.id} - ${model.name}');
|
|
}
|
|
|
|
// List image generation models
|
|
final imageGen =
|
|
await client.listModels(category: ModelCategory.imageGeneration);
|
|
print('\nAvailable image generation models:');
|
|
for (final model in imageGen) {
|
|
print(' ${model.id} - ${model.name}');
|
|
}
|
|
|
|
print('');
|
|
}
|
|
|
|
/// Training example - train/fine-tune a model
|
|
Future<void> trainingExample(SynorCompute client) async {
|
|
print('=== Model Training ===');
|
|
|
|
// ========== STEP 1: Upload your dataset ==========
|
|
print('Step 1: Uploading training dataset...\n');
|
|
|
|
// Example 1: JSONL format (most common for LLM fine-tuning)
|
|
final jsonlData = '''
|
|
{"prompt": "What is the capital of France?", "completion": "Paris"}
|
|
{"prompt": "Translate 'hello' to Spanish", "completion": "hola"}
|
|
{"prompt": "What is 2 + 2?", "completion": "4"}
|
|
{"prompt": "Who wrote Romeo and Juliet?", "completion": "William Shakespeare"}
|
|
''';
|
|
|
|
final dataset = await client.uploadDataset(
|
|
utf8.encode(jsonlData),
|
|
DatasetUploadOptions(
|
|
name: 'qa-training-data',
|
|
description: 'Question-answering training dataset',
|
|
format: DatasetFormat.jsonl,
|
|
type: DatasetType.textCompletion,
|
|
split: DatasetSplit(train: 0.8, validation: 0.1, test: 0.1, seed: 42),
|
|
preprocessing: DatasetPreprocessing(
|
|
maxLength: 2048,
|
|
shuffle: true,
|
|
deduplicate: true,
|
|
),
|
|
),
|
|
);
|
|
|
|
print('Dataset uploaded!');
|
|
print(' CID: ${dataset.cid}');
|
|
print(' Total samples: ${dataset.totalSamples}');
|
|
print(' Train/Val/Test: ${dataset.trainSamples}/${dataset.validationSamples}/${dataset.testSamples}');
|
|
print(' Schema: ${dataset.schema}');
|
|
|
|
// ========== STEP 2: Fine-tune the model ==========
|
|
print('\nStep 2: Fine-tuning llama-3-8b on dataset...\n');
|
|
|
|
final result = await client.fineTune(
|
|
baseModel: 'llama-3-8b',
|
|
datasetCid: dataset.cid, // Use the CID from upload
|
|
outputAlias: 'my-qa-model',
|
|
options: TrainingOptions(
|
|
framework: MlFramework.pytorch,
|
|
epochs: 3,
|
|
batchSize: 8,
|
|
learningRate: 0.00002,
|
|
optimizer: 'adamw',
|
|
hyperparameters: {
|
|
'weight_decay': 0.01,
|
|
'warmup_steps': 100,
|
|
'gradient_accumulation_steps': 4,
|
|
},
|
|
checkpointEvery: 500,
|
|
processor: ProcessorType.gpu,
|
|
priority: Priority.high,
|
|
),
|
|
);
|
|
|
|
if (result.isSuccess) {
|
|
final training = result.result!;
|
|
print('Training completed!');
|
|
print(' New model CID: ${training.modelCid}');
|
|
print(' Final loss: ${training.finalLoss.toStringAsFixed(4)}');
|
|
print(' Duration: ${training.durationMs / 1000}s');
|
|
print(' Cost: \$${training.cost.toStringAsFixed(4)}');
|
|
|
|
// ========== STEP 3: Use your trained model ==========
|
|
print('\nStep 3: Testing trained model...\n');
|
|
final inference = await client.inference(
|
|
training.modelCid,
|
|
'What is the capital of Germany?',
|
|
options: InferenceOptions(maxTokens: 50),
|
|
);
|
|
print('Response: ${inference.result}');
|
|
} else {
|
|
print('Training failed: ${result.error}');
|
|
}
|
|
|
|
print('');
|
|
}
|
|
|
|
/// Dataset upload examples - shows all supported formats
|
|
Future<void> datasetUploadExamples(SynorCompute client) async {
|
|
print('=== Dataset Upload Examples ===\n');
|
|
|
|
// ========== FORMAT 1: JSONL (JSON Lines) ==========
|
|
print('Format 1: JSONL - One JSON object per line');
|
|
print('''
|
|
// Text completion format
|
|
{"prompt": "Hello", "completion": "Hi there!"}
|
|
|
|
// Instruction tuning format
|
|
{"instruction": "Summarize", "input": "Long text...", "output": "Summary"}
|
|
|
|
// Chat format
|
|
{"messages": [{"role": "user", "content": "Hi"}, {"role": "assistant", "content": "Hello!"}]}
|
|
''');
|
|
|
|
// Example: Instruction tuning dataset
|
|
final instructionData = await client.createDatasetFromRecords(
|
|
name: 'instruction-dataset',
|
|
records: [
|
|
{
|
|
'instruction': 'Summarize the following text',
|
|
'input': 'The quick brown fox jumps over the lazy dog.',
|
|
'output': 'A fox jumps over a dog.'
|
|
},
|
|
{
|
|
'instruction': 'Translate to French',
|
|
'input': 'Hello world',
|
|
'output': 'Bonjour le monde'
|
|
},
|
|
],
|
|
type: DatasetType.instructionTuning,
|
|
);
|
|
print('Instruction dataset CID: ${instructionData.cid}');
|
|
|
|
// ========== FORMAT 2: CSV ==========
|
|
print('\nFormat 2: CSV - Comma-separated values with headers');
|
|
print('''
|
|
prompt,completion
|
|
"What is AI?","Artificial Intelligence is..."
|
|
"Define ML","Machine Learning is..."
|
|
''');
|
|
|
|
final csvData = '''
|
|
prompt,completion
|
|
"What is AI?","Artificial Intelligence is the simulation of human intelligence"
|
|
"Define ML","Machine Learning is a subset of AI that learns from data"
|
|
''';
|
|
|
|
final csvDataset = await client.uploadDataset(
|
|
utf8.encode(csvData),
|
|
DatasetUploadOptions(
|
|
name: 'csv-dataset',
|
|
format: DatasetFormat.csv,
|
|
type: DatasetType.textCompletion,
|
|
columnMapping: {'prompt': 'input', 'completion': 'output'},
|
|
),
|
|
);
|
|
print('CSV dataset CID: ${csvDataset.cid}');
|
|
|
|
// ========== FORMAT 3: Parquet (for large datasets) ==========
|
|
print('\nFormat 3: Parquet - Efficient columnar format for large datasets');
|
|
print(' - Best for datasets > 1GB');
|
|
print(' - Supports compression');
|
|
print(' - Fast random access');
|
|
print('''
|
|
final parquetDataset = await client.uploadDatasetFromFile(
|
|
'/path/to/dataset.parquet',
|
|
DatasetUploadOptions(
|
|
name: 'large-dataset',
|
|
format: DatasetFormat.parquet,
|
|
type: DatasetType.textCompletion,
|
|
),
|
|
);
|
|
''');
|
|
|
|
// ========== FORMAT 4: HuggingFace ==========
|
|
print('\nFormat 4: HuggingFace datasets format');
|
|
print(' - Compatible with datasets library');
|
|
print(' - Automatic schema detection');
|
|
|
|
// ========== FORMAT 5: Image folder ==========
|
|
print('\nFormat 5: Image folder structure');
|
|
print('''
|
|
dataset/
|
|
├── train/
|
|
│ ├── cat/
|
|
│ │ ├── img001.jpg
|
|
│ │ └── img002.jpg
|
|
│ └── dog/
|
|
│ ├── img001.jpg
|
|
│ └── img002.jpg
|
|
└── val/
|
|
├── cat/
|
|
└── dog/
|
|
''');
|
|
|
|
// ========== ALL SUPPORTED FORMATS ==========
|
|
print('\nAll supported dataset formats:');
|
|
for (final format in DatasetFormat.values) {
|
|
final description = switch (format) {
|
|
DatasetFormat.jsonl => 'JSON Lines - one JSON per line (recommended for text)',
|
|
DatasetFormat.csv => 'CSV - comma-separated with headers',
|
|
DatasetFormat.parquet => 'Parquet - columnar format for large datasets',
|
|
DatasetFormat.arrow => 'Apache Arrow - in-memory format',
|
|
DatasetFormat.huggingface => 'HuggingFace datasets format',
|
|
DatasetFormat.tfrecord => 'TFRecord - TensorFlow format',
|
|
DatasetFormat.webdataset => 'WebDataset - PyTorch streaming format',
|
|
DatasetFormat.text => 'Plain text - one sample per line',
|
|
DatasetFormat.imagefolder => 'Image folder structure',
|
|
DatasetFormat.custom => 'Custom binary format',
|
|
};
|
|
print(' ${format.value.padRight(15)} - $description');
|
|
}
|
|
|
|
// ========== ALL DATASET TYPES ==========
|
|
print('\nAll supported dataset types:');
|
|
for (final type in DatasetType.values) {
|
|
final description = switch (type) {
|
|
DatasetType.textCompletion => 'prompt → completion pairs',
|
|
DatasetType.instructionTuning => 'instruction + input → output',
|
|
DatasetType.chat => 'multi-turn conversations',
|
|
DatasetType.questionAnswering => 'question → answer pairs',
|
|
DatasetType.textClassification => 'text → label',
|
|
DatasetType.ner => 'named entity recognition',
|
|
DatasetType.imageClassification => 'image → label',
|
|
DatasetType.objectDetection => 'image → bounding boxes',
|
|
DatasetType.imageSegmentation => 'image → mask',
|
|
DatasetType.imageText => 'image-text pairs (CLIP, etc.)',
|
|
DatasetType.audioTranscription => 'audio → text',
|
|
DatasetType.custom => 'custom format',
|
|
};
|
|
print(' ${type.value.padRight(22)} - $description');
|
|
}
|
|
|
|
print('');
|
|
}
|
|
|
|
/// Custom model upload example
|
|
Future<void> customModelExample(SynorCompute client) async {
|
|
print('=== Custom Model Upload ===');
|
|
|
|
// Example: Upload a custom ONNX model
|
|
// In practice, you'd read this from a file:
|
|
// final modelBytes = await File('my_model.onnx').readAsBytes();
|
|
|
|
// For demonstration, we'll show the API structure
|
|
print('To upload your own Python-trained model:');
|
|
print('''
|
|
1. Train your model in Python:
|
|
|
|
import torch
|
|
model = MyModel()
|
|
# ... train model ...
|
|
torch.onnx.export(model, dummy_input, "my_model.onnx")
|
|
|
|
2. Upload to Synor Compute:
|
|
|
|
final modelBytes = await File('my_model.onnx').readAsBytes();
|
|
final result = await client.uploadModel(
|
|
modelBytes,
|
|
ModelUploadOptions(
|
|
name: 'my-custom-model',
|
|
description: 'My custom trained model',
|
|
category: ModelCategory.custom,
|
|
format: ModelFormat.onnx,
|
|
alias: 'my-model', // Optional shortcut name
|
|
isPublic: false, // Keep private
|
|
license: 'Proprietary',
|
|
),
|
|
);
|
|
print('Uploaded! CID: \${result.cid}');
|
|
|
|
3. Use for inference:
|
|
|
|
final result = await client.inference(
|
|
result.cid, // or 'my-model' if you set an alias
|
|
'Your input data',
|
|
);
|
|
''');
|
|
|
|
// Supported model formats
|
|
print('Supported model formats:');
|
|
for (final format in ModelFormat.values) {
|
|
print(' - ${format.value}');
|
|
}
|
|
|
|
// Supported categories
|
|
print('\nSupported model categories:');
|
|
for (final category in ModelCategory.values) {
|
|
print(' - ${category.value}');
|
|
}
|
|
|
|
print('');
|
|
}
|