Multi-model database layer for Synor blockchain: - Key-Value Store: Redis-compatible API with TTL, INCR, MGET/MSET - Document Store: MongoDB-compatible queries with filters - Vector Store: AI/RAG optimized with cosine, euclidean, dot product similarity - Time-Series Store: Metrics with downsampling and aggregations - Query Engine: Unified queries across all data models - Index Manager: B-tree, hash, unique, and compound indexes - Schema Validator: Field validation with type checking - Database Pricing: Pay-per-use model (0.1 SYNOR/GB/month) Updates roadmap with Phase 10-12 milestones: - Phase 10: Synor Database L2 - Phase 11: Economics & Billing - Phase 12: Fiat Gateway (Ramp Network integration) 41 tests passing
522 lines
14 KiB
Rust
522 lines
14 KiB
Rust
//! Index Management for efficient queries.
|
|
//!
|
|
//! Supports B-tree, hash, and vector indexes.
|
|
|
|
use crate::document::DocumentId;
|
|
use crate::error::DatabaseError;
|
|
use parking_lot::RwLock;
|
|
use serde::{Deserialize, Serialize};
|
|
use serde_json::Value as JsonValue;
|
|
use std::collections::{BTreeMap, HashMap, HashSet};
|
|
|
|
/// Index type.
|
|
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
|
|
pub enum IndexType {
|
|
/// B-tree index for range queries.
|
|
BTree,
|
|
/// Hash index for equality lookups.
|
|
Hash,
|
|
/// Full-text search index.
|
|
FullText,
|
|
/// Vector index (HNSW).
|
|
Vector,
|
|
/// Compound index on multiple fields.
|
|
Compound,
|
|
/// Unique constraint index.
|
|
Unique,
|
|
}
|
|
|
|
/// Index configuration.
|
|
#[derive(Clone, Debug, Serialize, Deserialize)]
|
|
pub struct IndexConfig {
|
|
/// Index name.
|
|
pub name: String,
|
|
/// Collection name.
|
|
pub collection: String,
|
|
/// Fields to index.
|
|
pub fields: Vec<String>,
|
|
/// Index type.
|
|
pub index_type: IndexType,
|
|
/// Whether index enforces uniqueness.
|
|
pub unique: bool,
|
|
/// Sparse index (skip null values).
|
|
pub sparse: bool,
|
|
}
|
|
|
|
impl IndexConfig {
|
|
/// Creates a new index config.
|
|
pub fn new(name: impl Into<String>, collection: impl Into<String>) -> Self {
|
|
Self {
|
|
name: name.into(),
|
|
collection: collection.into(),
|
|
fields: Vec::new(),
|
|
index_type: IndexType::BTree,
|
|
unique: false,
|
|
sparse: false,
|
|
}
|
|
}
|
|
|
|
/// Adds a field to index.
|
|
pub fn field(mut self, field: impl Into<String>) -> Self {
|
|
self.fields.push(field.into());
|
|
self
|
|
}
|
|
|
|
/// Sets index type.
|
|
pub fn index_type(mut self, t: IndexType) -> Self {
|
|
self.index_type = t;
|
|
self
|
|
}
|
|
|
|
/// Sets as unique.
|
|
pub fn unique(mut self) -> Self {
|
|
self.unique = true;
|
|
self
|
|
}
|
|
|
|
/// Sets as sparse.
|
|
pub fn sparse(mut self) -> Self {
|
|
self.sparse = true;
|
|
self
|
|
}
|
|
}
|
|
|
|
/// An index entry.
|
|
#[derive(Clone, Debug)]
|
|
struct IndexEntry {
|
|
/// Indexed value (serialized for comparison).
|
|
key: IndexKey,
|
|
/// Document IDs with this value.
|
|
doc_ids: HashSet<DocumentId>,
|
|
}
|
|
|
|
/// Index key for ordering.
|
|
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
|
|
enum IndexKey {
|
|
Null,
|
|
Bool(bool),
|
|
Int(i64),
|
|
String(String),
|
|
Bytes(Vec<u8>),
|
|
}
|
|
|
|
impl PartialOrd for IndexKey {
|
|
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
|
|
Some(self.cmp(other))
|
|
}
|
|
}
|
|
|
|
impl Ord for IndexKey {
|
|
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
|
|
match (self, other) {
|
|
(IndexKey::Null, IndexKey::Null) => std::cmp::Ordering::Equal,
|
|
(IndexKey::Null, _) => std::cmp::Ordering::Less,
|
|
(_, IndexKey::Null) => std::cmp::Ordering::Greater,
|
|
(IndexKey::Bool(a), IndexKey::Bool(b)) => a.cmp(b),
|
|
(IndexKey::Int(a), IndexKey::Int(b)) => a.cmp(b),
|
|
(IndexKey::String(a), IndexKey::String(b)) => a.cmp(b),
|
|
(IndexKey::Bytes(a), IndexKey::Bytes(b)) => a.cmp(b),
|
|
_ => std::cmp::Ordering::Equal,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<&JsonValue> for IndexKey {
|
|
fn from(value: &JsonValue) -> Self {
|
|
match value {
|
|
JsonValue::Null => IndexKey::Null,
|
|
JsonValue::Bool(b) => IndexKey::Bool(*b),
|
|
JsonValue::Number(n) => IndexKey::Int(n.as_i64().unwrap_or(0)),
|
|
JsonValue::String(s) => IndexKey::String(s.clone()),
|
|
_ => IndexKey::Bytes(serde_json::to_vec(value).unwrap_or_default()),
|
|
}
|
|
}
|
|
}
|
|
|
|
/// A single index instance.
|
|
pub struct Index {
|
|
/// Index configuration.
|
|
pub config: IndexConfig,
|
|
/// B-tree index data.
|
|
btree: RwLock<BTreeMap<IndexKey, HashSet<DocumentId>>>,
|
|
/// Hash index data.
|
|
hash: RwLock<HashMap<IndexKey, HashSet<DocumentId>>>,
|
|
/// Statistics.
|
|
stats: RwLock<IndexStats>,
|
|
}
|
|
|
|
/// Index statistics.
|
|
#[derive(Clone, Debug, Default)]
|
|
pub struct IndexStats {
|
|
/// Total entries.
|
|
pub entries: u64,
|
|
/// Index lookups.
|
|
pub lookups: u64,
|
|
/// Index hits.
|
|
pub hits: u64,
|
|
}
|
|
|
|
impl Index {
|
|
/// Creates a new index.
|
|
pub fn new(config: IndexConfig) -> Self {
|
|
Self {
|
|
config,
|
|
btree: RwLock::new(BTreeMap::new()),
|
|
hash: RwLock::new(HashMap::new()),
|
|
stats: RwLock::new(IndexStats::default()),
|
|
}
|
|
}
|
|
|
|
/// Adds a document to the index.
|
|
pub fn insert(&self, doc_id: DocumentId, value: &JsonValue) -> Result<(), DatabaseError> {
|
|
let key = IndexKey::from(value);
|
|
|
|
// Check uniqueness if required
|
|
if self.config.unique {
|
|
let exists = match self.config.index_type {
|
|
IndexType::Hash | IndexType::Unique => {
|
|
self.hash.read().get(&key).map(|s| !s.is_empty()).unwrap_or(false)
|
|
}
|
|
_ => {
|
|
self.btree.read().get(&key).map(|s| !s.is_empty()).unwrap_or(false)
|
|
}
|
|
};
|
|
if exists {
|
|
return Err(DatabaseError::AlreadyExists(
|
|
format!("Unique constraint violation on index '{}'", self.config.name)
|
|
));
|
|
}
|
|
}
|
|
|
|
match self.config.index_type {
|
|
IndexType::Hash | IndexType::Unique => {
|
|
self.hash
|
|
.write()
|
|
.entry(key)
|
|
.or_insert_with(HashSet::new)
|
|
.insert(doc_id);
|
|
}
|
|
_ => {
|
|
self.btree
|
|
.write()
|
|
.entry(key)
|
|
.or_insert_with(HashSet::new)
|
|
.insert(doc_id);
|
|
}
|
|
}
|
|
|
|
self.stats.write().entries += 1;
|
|
Ok(())
|
|
}
|
|
|
|
/// Removes a document from the index.
|
|
pub fn remove(&self, doc_id: &DocumentId, value: &JsonValue) {
|
|
let key = IndexKey::from(value);
|
|
|
|
match self.config.index_type {
|
|
IndexType::Hash | IndexType::Unique => {
|
|
if let Some(set) = self.hash.write().get_mut(&key) {
|
|
set.remove(doc_id);
|
|
if set.is_empty() {
|
|
self.hash.write().remove(&key);
|
|
}
|
|
}
|
|
}
|
|
_ => {
|
|
if let Some(set) = self.btree.write().get_mut(&key) {
|
|
set.remove(doc_id);
|
|
if set.is_empty() {
|
|
self.btree.write().remove(&key);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Looks up documents by exact value.
|
|
pub fn lookup(&self, value: &JsonValue) -> Vec<DocumentId> {
|
|
let key = IndexKey::from(value);
|
|
self.stats.write().lookups += 1;
|
|
|
|
let result: Vec<DocumentId> = match self.config.index_type {
|
|
IndexType::Hash | IndexType::Unique => {
|
|
self.hash
|
|
.read()
|
|
.get(&key)
|
|
.map(|s| s.iter().cloned().collect())
|
|
.unwrap_or_default()
|
|
}
|
|
_ => {
|
|
self.btree
|
|
.read()
|
|
.get(&key)
|
|
.map(|s| s.iter().cloned().collect())
|
|
.unwrap_or_default()
|
|
}
|
|
};
|
|
|
|
if !result.is_empty() {
|
|
self.stats.write().hits += 1;
|
|
}
|
|
|
|
result
|
|
}
|
|
|
|
/// Range query (only for B-tree indexes).
|
|
pub fn range(&self, start: Option<&JsonValue>, end: Option<&JsonValue>) -> Vec<DocumentId> {
|
|
if self.config.index_type != IndexType::BTree {
|
|
return Vec::new();
|
|
}
|
|
|
|
self.stats.write().lookups += 1;
|
|
|
|
let btree = self.btree.read();
|
|
let start_key = start.map(IndexKey::from);
|
|
let end_key = end.map(IndexKey::from);
|
|
|
|
let mut result = Vec::new();
|
|
for (key, doc_ids) in btree.iter() {
|
|
let in_range = match (&start_key, &end_key) {
|
|
(Some(s), Some(e)) => key >= s && key <= e,
|
|
(Some(s), None) => key >= s,
|
|
(None, Some(e)) => key <= e,
|
|
(None, None) => true,
|
|
};
|
|
if in_range {
|
|
result.extend(doc_ids.iter().cloned());
|
|
}
|
|
}
|
|
|
|
if !result.is_empty() {
|
|
self.stats.write().hits += 1;
|
|
}
|
|
|
|
result
|
|
}
|
|
|
|
/// Returns index statistics.
|
|
pub fn stats(&self) -> IndexStats {
|
|
self.stats.read().clone()
|
|
}
|
|
|
|
/// Clears the index.
|
|
pub fn clear(&self) {
|
|
self.btree.write().clear();
|
|
self.hash.write().clear();
|
|
self.stats.write().entries = 0;
|
|
}
|
|
}
|
|
|
|
/// Manages indexes for a database.
|
|
pub struct IndexManager {
|
|
/// Indexes by name.
|
|
indexes: RwLock<HashMap<String, Index>>,
|
|
/// Index by collection and field.
|
|
by_collection: RwLock<HashMap<String, Vec<String>>>,
|
|
}
|
|
|
|
impl IndexManager {
|
|
/// Creates a new index manager.
|
|
pub fn new() -> Self {
|
|
Self {
|
|
indexes: RwLock::new(HashMap::new()),
|
|
by_collection: RwLock::new(HashMap::new()),
|
|
}
|
|
}
|
|
|
|
/// Creates a new index.
|
|
pub fn create_index(&self, config: IndexConfig) -> Result<(), DatabaseError> {
|
|
let name = config.name.clone();
|
|
let collection = config.collection.clone();
|
|
|
|
let mut indexes = self.indexes.write();
|
|
if indexes.contains_key(&name) {
|
|
return Err(DatabaseError::AlreadyExists(name));
|
|
}
|
|
|
|
indexes.insert(name.clone(), Index::new(config));
|
|
|
|
self.by_collection
|
|
.write()
|
|
.entry(collection)
|
|
.or_insert_with(Vec::new)
|
|
.push(name);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
/// Drops an index.
|
|
pub fn drop_index(&self, name: &str) -> Result<(), DatabaseError> {
|
|
let mut indexes = self.indexes.write();
|
|
let index = indexes
|
|
.remove(name)
|
|
.ok_or_else(|| DatabaseError::IndexNotFound(name.to_string()))?;
|
|
|
|
// Remove from collection mapping
|
|
let mut by_collection = self.by_collection.write();
|
|
if let Some(names) = by_collection.get_mut(&index.config.collection) {
|
|
names.retain(|n| n != name);
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
/// Gets an index by name.
|
|
pub fn get_index(&self, name: &str) -> Option<std::sync::Arc<Index>> {
|
|
// Simplified - real impl would use Arc
|
|
None
|
|
}
|
|
|
|
/// Gets indexes for a collection.
|
|
pub fn get_collection_indexes(&self, collection: &str) -> Vec<String> {
|
|
self.by_collection
|
|
.read()
|
|
.get(collection)
|
|
.cloned()
|
|
.unwrap_or_default()
|
|
}
|
|
|
|
/// Indexes a document.
|
|
pub fn index_document(
|
|
&self,
|
|
collection: &str,
|
|
doc_id: DocumentId,
|
|
document: &JsonValue,
|
|
) -> Result<(), DatabaseError> {
|
|
let index_names = self.get_collection_indexes(collection);
|
|
let indexes = self.indexes.read();
|
|
|
|
for name in index_names {
|
|
if let Some(index) = indexes.get(&name) {
|
|
for field in &index.config.fields {
|
|
if let Some(value) = document.get(field) {
|
|
index.insert(doc_id.clone(), value)?;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
/// Removes a document from indexes.
|
|
pub fn unindex_document(
|
|
&self,
|
|
collection: &str,
|
|
doc_id: &DocumentId,
|
|
document: &JsonValue,
|
|
) {
|
|
let index_names = self.get_collection_indexes(collection);
|
|
let indexes = self.indexes.read();
|
|
|
|
for name in index_names {
|
|
if let Some(index) = indexes.get(&name) {
|
|
for field in &index.config.fields {
|
|
if let Some(value) = document.get(field) {
|
|
index.remove(doc_id, value);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Lists all indexes.
|
|
pub fn list_indexes(&self) -> Vec<IndexConfig> {
|
|
self.indexes
|
|
.read()
|
|
.values()
|
|
.map(|i| i.config.clone())
|
|
.collect()
|
|
}
|
|
}
|
|
|
|
impl Default for IndexManager {
|
|
fn default() -> Self {
|
|
Self::new()
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
use serde_json::json;
|
|
|
|
#[test]
|
|
fn test_btree_index() {
|
|
let config = IndexConfig::new("age_idx", "users")
|
|
.field("age")
|
|
.index_type(IndexType::BTree);
|
|
|
|
let index = Index::new(config);
|
|
|
|
let doc1 = DocumentId::new();
|
|
let doc2 = DocumentId::new();
|
|
let doc3 = DocumentId::new();
|
|
|
|
index.insert(doc1.clone(), &json!(25)).unwrap();
|
|
index.insert(doc2.clone(), &json!(30)).unwrap();
|
|
index.insert(doc3.clone(), &json!(35)).unwrap();
|
|
|
|
// Exact lookup
|
|
let results = index.lookup(&json!(30));
|
|
assert_eq!(results.len(), 1);
|
|
assert_eq!(results[0], doc2);
|
|
|
|
// Range query
|
|
let results = index.range(Some(&json!(28)), Some(&json!(36)));
|
|
assert_eq!(results.len(), 2);
|
|
}
|
|
|
|
#[test]
|
|
fn test_hash_index() {
|
|
let config = IndexConfig::new("email_idx", "users")
|
|
.field("email")
|
|
.index_type(IndexType::Hash);
|
|
|
|
let index = Index::new(config);
|
|
|
|
let doc1 = DocumentId::new();
|
|
index.insert(doc1.clone(), &json!("alice@example.com")).unwrap();
|
|
|
|
let results = index.lookup(&json!("alice@example.com"));
|
|
assert_eq!(results.len(), 1);
|
|
|
|
let results = index.lookup(&json!("bob@example.com"));
|
|
assert!(results.is_empty());
|
|
}
|
|
|
|
#[test]
|
|
fn test_unique_index() {
|
|
let config = IndexConfig::new("email_unique", "users")
|
|
.field("email")
|
|
.index_type(IndexType::Unique)
|
|
.unique();
|
|
|
|
let index = Index::new(config);
|
|
|
|
let doc1 = DocumentId::new();
|
|
let doc2 = DocumentId::new();
|
|
|
|
index.insert(doc1, &json!("alice@example.com")).unwrap();
|
|
|
|
// Should fail - duplicate
|
|
let result = index.insert(doc2, &json!("alice@example.com"));
|
|
assert!(result.is_err());
|
|
}
|
|
|
|
#[test]
|
|
fn test_index_manager() {
|
|
let manager = IndexManager::new();
|
|
|
|
let config = IndexConfig::new("age_idx", "users").field("age");
|
|
manager.create_index(config).unwrap();
|
|
|
|
let doc_id = DocumentId::new();
|
|
let doc = json!({"name": "Alice", "age": 30});
|
|
|
|
manager.index_document("users", doc_id.clone(), &doc).unwrap();
|
|
|
|
let indexes = manager.list_indexes();
|
|
assert_eq!(indexes.len(), 1);
|
|
}
|
|
}
|