RagBenchCapstone10 / createmilvusschema.py
Saiteja Solleti
milvas insert and search addition
9a8353d
raw
history blame
2.23 kB
import os
from pymilvus import connections, FieldSchema, CollectionSchema, DataType, Collection
milvus_token = os.getenv("MILVUS_TOKEN")
COLLECTION_NAME = "final_ragbench_document_embeddings"
MILVUS_CLOUD_URI = "https://in03-7b4da1b7b588a88.serverless.gcp-us-west1.cloud.zilliz.com"
#Function to create milvus db schema to insert the data
def CreateMilvusDbSchema():
connections.connect("default", uri=MILVUS_CLOUD_URI, token=milvus_token)
print(connections.get_connection_addr("default"))
# Define the fields for the collection
fields = [
FieldSchema(name="chunk_doc_id", dtype=DataType.VARCHAR, max_length=350, is_primary=True, auto_id=False), # Primary Key
FieldSchema(name="doc_id", dtype=DataType.VARCHAR, max_length=300), # Document ID
FieldSchema(name="chunk_embedding", dtype=DataType.FLOAT_VECTOR, dim=384), # Vector Field (embedding)
FieldSchema(name="context_relevance", dtype=DataType.FLOAT), # Context Relevance Score
FieldSchema(name="context_utilization", dtype=DataType.FLOAT), # Context Utilization Score
FieldSchema(name="adherence", dtype=DataType.FLOAT), # Adherence Score
FieldSchema(name="dataset_name", dtype=DataType.VARCHAR, max_length=300), # Dataset Name
FieldSchema(name="relevance_score", dtype=DataType.FLOAT), # Relevance Score
FieldSchema(name="utilization_score", dtype=DataType.FLOAT), # Utilization Score
FieldSchema(name="completeness_score", dtype=DataType.FLOAT) # Completeness Score
]
# Define the collection schema
schema = CollectionSchema(fields, description="RAG Bench document vector collection")
# Create the collection in Milvus
collection = Collection(name=COLLECTION_NAME, schema=schema)
# Create an optimized index for fast vector search
collection.create_index(
"chunk_embedding",
{
"index_type": "HNSW", # Hierarchical Navigable Small World (HNSW) index
"metric_type": "COSINE", # Cosine similarity for vector search
"params": {"M": 16, "efConstruction": 200} # HNSW parameters
}
)
print(f"Collection '{COLLECTION_NAME}' created successfully.")
return collection