Spaces:
Running
Running
Saiteja Solleti
commited on
Commit
·
e8e78ae
1
Parent(s):
39560b9
milvas schema addition
Browse files- app.py +7 -0
- createmilvusschema.py +47 -0
- crudmilvus.py +2 -1
app.py
CHANGED
@@ -2,6 +2,7 @@ import gradio as gr
|
|
2 |
import os
|
3 |
|
4 |
from loaddataset import ExtractRagBenchData
|
|
|
5 |
from model import generate_response
|
6 |
from huggingface_hub import login
|
7 |
from huggingface_hub import whoami
|
@@ -13,6 +14,12 @@ login(hf_token)
|
|
13 |
|
14 |
rag_extracted_data = ExtractRagBenchData()
|
15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
print(rag_extracted_data.head(5))
|
17 |
|
18 |
def chatbot(prompt):
|
|
|
2 |
import os
|
3 |
|
4 |
from loaddataset import ExtractRagBenchData
|
5 |
+
from createmilvusschema import CreateMilvusDbSchema
|
6 |
from model import generate_response
|
7 |
from huggingface_hub import login
|
8 |
from huggingface_hub import whoami
|
|
|
14 |
|
15 |
rag_extracted_data = ExtractRagBenchData()
|
16 |
|
17 |
+
#invoke create milvus db function
|
18 |
+
try:
|
19 |
+
db_collection = CreateMilvusDbSchema()
|
20 |
+
except Exception as e:
|
21 |
+
print(f"Error creating Milvus DB schema: {e}")
|
22 |
+
|
23 |
print(rag_extracted_data.head(5))
|
24 |
|
25 |
def chatbot(prompt):
|
createmilvusschema.py
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from pymilvus import connections, FieldSchema, CollectionSchema, DataType, Collection
|
3 |
+
|
4 |
+
milvus_token = os.getenv("MILVUS_TOKEN")
|
5 |
+
|
6 |
+
COLLECTION_NAME = "final_ragbench_document_embeddings"
|
7 |
+
MILVUS_CLOUD_URI = "https://in03-7b4da1b7b588a88.serverless.gcp-us-west1.cloud.zilliz.com"
|
8 |
+
connections.connect("default", uri=MILVUS_CLOUD_URI, token=milvus_token)
|
9 |
+
|
10 |
+
#Function to create milvus db schema to insert the data
|
11 |
+
def CreateMilvusDbSchema():
|
12 |
+
|
13 |
+
connections.connect("default", uri=MILVUS_CLOUD_URI, token=milvus_token)
|
14 |
+
print(connections.get_connection_addr("default"))
|
15 |
+
|
16 |
+
# Define the fields for the collection
|
17 |
+
fields = [
|
18 |
+
FieldSchema(name="chunk_doc_id", dtype=DataType.VARCHAR, max_length=350, is_primary=True, auto_id=False), # Primary Key
|
19 |
+
FieldSchema(name="doc_id", dtype=DataType.VARCHAR, max_length=300), # Document ID
|
20 |
+
FieldSchema(name="chunk_embedding", dtype=DataType.FLOAT_VECTOR, dim=384), # Vector Field (embedding)
|
21 |
+
FieldSchema(name="context_relevance", dtype=DataType.FLOAT), # Context Relevance Score
|
22 |
+
FieldSchema(name="context_utilization", dtype=DataType.FLOAT), # Context Utilization Score
|
23 |
+
FieldSchema(name="adherence", dtype=DataType.FLOAT), # Adherence Score
|
24 |
+
FieldSchema(name="dataset_name", dtype=DataType.VARCHAR, max_length=300), # Dataset Name
|
25 |
+
FieldSchema(name="relevance_score", dtype=DataType.FLOAT), # Relevance Score
|
26 |
+
FieldSchema(name="utilization_score", dtype=DataType.FLOAT), # Utilization Score
|
27 |
+
FieldSchema(name="completeness_score", dtype=DataType.FLOAT) # Completeness Score
|
28 |
+
]
|
29 |
+
|
30 |
+
# Define the collection schema
|
31 |
+
schema = CollectionSchema(fields, description="RAG Bench document vector collection")
|
32 |
+
|
33 |
+
# Create the collection in Milvus
|
34 |
+
collection = Collection(name=COLLECTION_NAME, schema=schema)
|
35 |
+
|
36 |
+
# Create an optimized index for fast vector search
|
37 |
+
collection.create_index(
|
38 |
+
"chunk_embedding",
|
39 |
+
{
|
40 |
+
"index_type": "HNSW", # Hierarchical Navigable Small World (HNSW) index
|
41 |
+
"metric_type": "COSINE", # Cosine similarity for vector search
|
42 |
+
"params": {"M": 16, "efConstruction": 200} # HNSW parameters
|
43 |
+
}
|
44 |
+
)
|
45 |
+
|
46 |
+
print(f"Collection '{COLLECTION_NAME}' created successfully.")
|
47 |
+
return collection
|
crudmilvus.py
CHANGED
@@ -9,4 +9,5 @@ MILVUS_CLOUD_URI = "https://in03-7b4da1b7b588a88.serverless.gcp-us-west1.cloud.z
|
|
9 |
connections.connect("default", uri=MILVUS_CLOUD_URI, token=milvus_token)
|
10 |
|
11 |
# Verify connection
|
12 |
-
print(connections.get_connection_addr("default"))
|
|
|
|
9 |
connections.connect("default", uri=MILVUS_CLOUD_URI, token=milvus_token)
|
10 |
|
11 |
# Verify connection
|
12 |
+
print(connections.get_connection_addr("default"))
|
13 |
+
|