Saiteja Solleti commited on
Commit
e8e78ae
·
1 Parent(s): 39560b9

milvas schema addition

Browse files
Files changed (3) hide show
  1. app.py +7 -0
  2. createmilvusschema.py +47 -0
  3. crudmilvus.py +2 -1
app.py CHANGED
@@ -2,6 +2,7 @@ import gradio as gr
2
  import os
3
 
4
  from loaddataset import ExtractRagBenchData
 
5
  from model import generate_response
6
  from huggingface_hub import login
7
  from huggingface_hub import whoami
@@ -13,6 +14,12 @@ login(hf_token)
13
 
14
  rag_extracted_data = ExtractRagBenchData()
15
 
 
 
 
 
 
 
16
  print(rag_extracted_data.head(5))
17
 
18
  def chatbot(prompt):
 
2
  import os
3
 
4
  from loaddataset import ExtractRagBenchData
5
+ from createmilvusschema import CreateMilvusDbSchema
6
  from model import generate_response
7
  from huggingface_hub import login
8
  from huggingface_hub import whoami
 
14
 
15
  rag_extracted_data = ExtractRagBenchData()
16
 
17
+ #invoke create milvus db function
18
+ try:
19
+ db_collection = CreateMilvusDbSchema()
20
+ except Exception as e:
21
+ print(f"Error creating Milvus DB schema: {e}")
22
+
23
  print(rag_extracted_data.head(5))
24
 
25
  def chatbot(prompt):
createmilvusschema.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from pymilvus import connections, FieldSchema, CollectionSchema, DataType, Collection
3
+
4
+ milvus_token = os.getenv("MILVUS_TOKEN")
5
+
6
+ COLLECTION_NAME = "final_ragbench_document_embeddings"
7
+ MILVUS_CLOUD_URI = "https://in03-7b4da1b7b588a88.serverless.gcp-us-west1.cloud.zilliz.com"
8
+ connections.connect("default", uri=MILVUS_CLOUD_URI, token=milvus_token)
9
+
10
+ #Function to create milvus db schema to insert the data
11
+ def CreateMilvusDbSchema():
12
+
13
+ connections.connect("default", uri=MILVUS_CLOUD_URI, token=milvus_token)
14
+ print(connections.get_connection_addr("default"))
15
+
16
+ # Define the fields for the collection
17
+ fields = [
18
+ FieldSchema(name="chunk_doc_id", dtype=DataType.VARCHAR, max_length=350, is_primary=True, auto_id=False), # Primary Key
19
+ FieldSchema(name="doc_id", dtype=DataType.VARCHAR, max_length=300), # Document ID
20
+ FieldSchema(name="chunk_embedding", dtype=DataType.FLOAT_VECTOR, dim=384), # Vector Field (embedding)
21
+ FieldSchema(name="context_relevance", dtype=DataType.FLOAT), # Context Relevance Score
22
+ FieldSchema(name="context_utilization", dtype=DataType.FLOAT), # Context Utilization Score
23
+ FieldSchema(name="adherence", dtype=DataType.FLOAT), # Adherence Score
24
+ FieldSchema(name="dataset_name", dtype=DataType.VARCHAR, max_length=300), # Dataset Name
25
+ FieldSchema(name="relevance_score", dtype=DataType.FLOAT), # Relevance Score
26
+ FieldSchema(name="utilization_score", dtype=DataType.FLOAT), # Utilization Score
27
+ FieldSchema(name="completeness_score", dtype=DataType.FLOAT) # Completeness Score
28
+ ]
29
+
30
+ # Define the collection schema
31
+ schema = CollectionSchema(fields, description="RAG Bench document vector collection")
32
+
33
+ # Create the collection in Milvus
34
+ collection = Collection(name=COLLECTION_NAME, schema=schema)
35
+
36
+ # Create an optimized index for fast vector search
37
+ collection.create_index(
38
+ "chunk_embedding",
39
+ {
40
+ "index_type": "HNSW", # Hierarchical Navigable Small World (HNSW) index
41
+ "metric_type": "COSINE", # Cosine similarity for vector search
42
+ "params": {"M": 16, "efConstruction": 200} # HNSW parameters
43
+ }
44
+ )
45
+
46
+ print(f"Collection '{COLLECTION_NAME}' created successfully.")
47
+ return collection
crudmilvus.py CHANGED
@@ -9,4 +9,5 @@ MILVUS_CLOUD_URI = "https://in03-7b4da1b7b588a88.serverless.gcp-us-west1.cloud.z
9
  connections.connect("default", uri=MILVUS_CLOUD_URI, token=milvus_token)
10
 
11
  # Verify connection
12
- print(connections.get_connection_addr("default"))
 
 
9
  connections.connect("default", uri=MILVUS_CLOUD_URI, token=milvus_token)
10
 
11
  # Verify connection
12
+ print(connections.get_connection_addr("default"))
13
+