//! Index Management for efficient queries. //! //! Supports B-tree, hash, and vector indexes. use crate::document::DocumentId; use crate::error::DatabaseError; use parking_lot::RwLock; use serde::{Deserialize, Serialize}; use serde_json::Value as JsonValue; use std::collections::{BTreeMap, HashMap, HashSet}; /// Index type. #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] pub enum IndexType { /// B-tree index for range queries. BTree, /// Hash index for equality lookups. Hash, /// Full-text search index. FullText, /// Vector index (HNSW). Vector, /// Compound index on multiple fields. Compound, /// Unique constraint index. Unique, } /// Index configuration. #[derive(Clone, Debug, Serialize, Deserialize)] pub struct IndexConfig { /// Index name. pub name: String, /// Collection name. pub collection: String, /// Fields to index. pub fields: Vec, /// Index type. pub index_type: IndexType, /// Whether index enforces uniqueness. pub unique: bool, /// Sparse index (skip null values). pub sparse: bool, } impl IndexConfig { /// Creates a new index config. pub fn new(name: impl Into, collection: impl Into) -> Self { Self { name: name.into(), collection: collection.into(), fields: Vec::new(), index_type: IndexType::BTree, unique: false, sparse: false, } } /// Adds a field to index. pub fn field(mut self, field: impl Into) -> Self { self.fields.push(field.into()); self } /// Sets index type. pub fn index_type(mut self, t: IndexType) -> Self { self.index_type = t; self } /// Sets as unique. pub fn unique(mut self) -> Self { self.unique = true; self } /// Sets as sparse. pub fn sparse(mut self) -> Self { self.sparse = true; self } } /// An index entry. #[derive(Clone, Debug)] struct IndexEntry { /// Indexed value (serialized for comparison). key: IndexKey, /// Document IDs with this value. doc_ids: HashSet, } /// Index key for ordering. #[derive(Clone, Debug, PartialEq, Eq, Hash)] enum IndexKey { Null, Bool(bool), Int(i64), String(String), Bytes(Vec), } impl PartialOrd for IndexKey { fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) } } impl Ord for IndexKey { fn cmp(&self, other: &Self) -> std::cmp::Ordering { match (self, other) { (IndexKey::Null, IndexKey::Null) => std::cmp::Ordering::Equal, (IndexKey::Null, _) => std::cmp::Ordering::Less, (_, IndexKey::Null) => std::cmp::Ordering::Greater, (IndexKey::Bool(a), IndexKey::Bool(b)) => a.cmp(b), (IndexKey::Int(a), IndexKey::Int(b)) => a.cmp(b), (IndexKey::String(a), IndexKey::String(b)) => a.cmp(b), (IndexKey::Bytes(a), IndexKey::Bytes(b)) => a.cmp(b), _ => std::cmp::Ordering::Equal, } } } impl From<&JsonValue> for IndexKey { fn from(value: &JsonValue) -> Self { match value { JsonValue::Null => IndexKey::Null, JsonValue::Bool(b) => IndexKey::Bool(*b), JsonValue::Number(n) => IndexKey::Int(n.as_i64().unwrap_or(0)), JsonValue::String(s) => IndexKey::String(s.clone()), _ => IndexKey::Bytes(serde_json::to_vec(value).unwrap_or_default()), } } } /// A single index instance. pub struct Index { /// Index configuration. pub config: IndexConfig, /// B-tree index data. btree: RwLock>>, /// Hash index data. hash: RwLock>>, /// Statistics. stats: RwLock, } /// Index statistics. #[derive(Clone, Debug, Default)] pub struct IndexStats { /// Total entries. pub entries: u64, /// Index lookups. pub lookups: u64, /// Index hits. pub hits: u64, } impl Index { /// Creates a new index. pub fn new(config: IndexConfig) -> Self { Self { config, btree: RwLock::new(BTreeMap::new()), hash: RwLock::new(HashMap::new()), stats: RwLock::new(IndexStats::default()), } } /// Adds a document to the index. pub fn insert(&self, doc_id: DocumentId, value: &JsonValue) -> Result<(), DatabaseError> { let key = IndexKey::from(value); // Check uniqueness if required if self.config.unique { let exists = match self.config.index_type { IndexType::Hash | IndexType::Unique => { self.hash.read().get(&key).map(|s| !s.is_empty()).unwrap_or(false) } _ => { self.btree.read().get(&key).map(|s| !s.is_empty()).unwrap_or(false) } }; if exists { return Err(DatabaseError::AlreadyExists( format!("Unique constraint violation on index '{}'", self.config.name) )); } } match self.config.index_type { IndexType::Hash | IndexType::Unique => { self.hash .write() .entry(key) .or_insert_with(HashSet::new) .insert(doc_id); } _ => { self.btree .write() .entry(key) .or_insert_with(HashSet::new) .insert(doc_id); } } self.stats.write().entries += 1; Ok(()) } /// Removes a document from the index. pub fn remove(&self, doc_id: &DocumentId, value: &JsonValue) { let key = IndexKey::from(value); match self.config.index_type { IndexType::Hash | IndexType::Unique => { if let Some(set) = self.hash.write().get_mut(&key) { set.remove(doc_id); if set.is_empty() { self.hash.write().remove(&key); } } } _ => { if let Some(set) = self.btree.write().get_mut(&key) { set.remove(doc_id); if set.is_empty() { self.btree.write().remove(&key); } } } } } /// Looks up documents by exact value. pub fn lookup(&self, value: &JsonValue) -> Vec { let key = IndexKey::from(value); self.stats.write().lookups += 1; let result: Vec = match self.config.index_type { IndexType::Hash | IndexType::Unique => { self.hash .read() .get(&key) .map(|s| s.iter().cloned().collect()) .unwrap_or_default() } _ => { self.btree .read() .get(&key) .map(|s| s.iter().cloned().collect()) .unwrap_or_default() } }; if !result.is_empty() { self.stats.write().hits += 1; } result } /// Range query (only for B-tree indexes). pub fn range(&self, start: Option<&JsonValue>, end: Option<&JsonValue>) -> Vec { if self.config.index_type != IndexType::BTree { return Vec::new(); } self.stats.write().lookups += 1; let btree = self.btree.read(); let start_key = start.map(IndexKey::from); let end_key = end.map(IndexKey::from); let mut result = Vec::new(); for (key, doc_ids) in btree.iter() { let in_range = match (&start_key, &end_key) { (Some(s), Some(e)) => key >= s && key <= e, (Some(s), None) => key >= s, (None, Some(e)) => key <= e, (None, None) => true, }; if in_range { result.extend(doc_ids.iter().cloned()); } } if !result.is_empty() { self.stats.write().hits += 1; } result } /// Returns index statistics. pub fn stats(&self) -> IndexStats { self.stats.read().clone() } /// Clears the index. pub fn clear(&self) { self.btree.write().clear(); self.hash.write().clear(); self.stats.write().entries = 0; } } /// Manages indexes for a database. pub struct IndexManager { /// Indexes by name. indexes: RwLock>, /// Index by collection and field. by_collection: RwLock>>, } impl IndexManager { /// Creates a new index manager. pub fn new() -> Self { Self { indexes: RwLock::new(HashMap::new()), by_collection: RwLock::new(HashMap::new()), } } /// Creates a new index. pub fn create_index(&self, config: IndexConfig) -> Result<(), DatabaseError> { let name = config.name.clone(); let collection = config.collection.clone(); let mut indexes = self.indexes.write(); if indexes.contains_key(&name) { return Err(DatabaseError::AlreadyExists(name)); } indexes.insert(name.clone(), Index::new(config)); self.by_collection .write() .entry(collection) .or_insert_with(Vec::new) .push(name); Ok(()) } /// Drops an index. pub fn drop_index(&self, name: &str) -> Result<(), DatabaseError> { let mut indexes = self.indexes.write(); let index = indexes .remove(name) .ok_or_else(|| DatabaseError::IndexNotFound(name.to_string()))?; // Remove from collection mapping let mut by_collection = self.by_collection.write(); if let Some(names) = by_collection.get_mut(&index.config.collection) { names.retain(|n| n != name); } Ok(()) } /// Gets an index by name. pub fn get_index(&self, name: &str) -> Option> { // Simplified - real impl would use Arc None } /// Gets indexes for a collection. pub fn get_collection_indexes(&self, collection: &str) -> Vec { self.by_collection .read() .get(collection) .cloned() .unwrap_or_default() } /// Indexes a document. pub fn index_document( &self, collection: &str, doc_id: DocumentId, document: &JsonValue, ) -> Result<(), DatabaseError> { let index_names = self.get_collection_indexes(collection); let indexes = self.indexes.read(); for name in index_names { if let Some(index) = indexes.get(&name) { for field in &index.config.fields { if let Some(value) = document.get(field) { index.insert(doc_id.clone(), value)?; } } } } Ok(()) } /// Removes a document from indexes. pub fn unindex_document( &self, collection: &str, doc_id: &DocumentId, document: &JsonValue, ) { let index_names = self.get_collection_indexes(collection); let indexes = self.indexes.read(); for name in index_names { if let Some(index) = indexes.get(&name) { for field in &index.config.fields { if let Some(value) = document.get(field) { index.remove(doc_id, value); } } } } } /// Lists all indexes. pub fn list_indexes(&self) -> Vec { self.indexes .read() .values() .map(|i| i.config.clone()) .collect() } } impl Default for IndexManager { fn default() -> Self { Self::new() } } #[cfg(test)] mod tests { use super::*; use serde_json::json; #[test] fn test_btree_index() { let config = IndexConfig::new("age_idx", "users") .field("age") .index_type(IndexType::BTree); let index = Index::new(config); let doc1 = DocumentId::new(); let doc2 = DocumentId::new(); let doc3 = DocumentId::new(); index.insert(doc1.clone(), &json!(25)).unwrap(); index.insert(doc2.clone(), &json!(30)).unwrap(); index.insert(doc3.clone(), &json!(35)).unwrap(); // Exact lookup let results = index.lookup(&json!(30)); assert_eq!(results.len(), 1); assert_eq!(results[0], doc2); // Range query let results = index.range(Some(&json!(28)), Some(&json!(36))); assert_eq!(results.len(), 2); } #[test] fn test_hash_index() { let config = IndexConfig::new("email_idx", "users") .field("email") .index_type(IndexType::Hash); let index = Index::new(config); let doc1 = DocumentId::new(); index.insert(doc1.clone(), &json!("alice@example.com")).unwrap(); let results = index.lookup(&json!("alice@example.com")); assert_eq!(results.len(), 1); let results = index.lookup(&json!("bob@example.com")); assert!(results.is_empty()); } #[test] fn test_unique_index() { let config = IndexConfig::new("email_unique", "users") .field("email") .index_type(IndexType::Unique) .unique(); let index = Index::new(config); let doc1 = DocumentId::new(); let doc2 = DocumentId::new(); index.insert(doc1, &json!("alice@example.com")).unwrap(); // Should fail - duplicate let result = index.insert(doc2, &json!("alice@example.com")); assert!(result.is_err()); } #[test] fn test_index_manager() { let manager = IndexManager::new(); let config = IndexConfig::new("age_idx", "users").field("age"); manager.create_index(config).unwrap(); let doc_id = DocumentId::new(); let doc = json!({"name": "Alice", "age": 30}); manager.index_document("users", doc_id.clone(), &doc).unwrap(); let indexes = manager.list_indexes(); assert_eq!(indexes.len(), 1); } }