Spaces:
Runtime error
Runtime error
File size: 2,340 Bytes
6ab617c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 |
import os
from dotenv import load_dotenv
load_dotenv()
import yaml
import argparse
from policy_rag.text_utils import DocLoader
from policy_rag.vectorstore_utils import QdrantVectorstoreHelper
from policy_rag.app_utils import (
CHUNK_METHOD,
EMBEDDING_MODEL_SOURCE,
get_chunk_func,
get_embedding_model
)
from policy_rag.eval_utils import eval_on_ls_dataset
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('--config', default=None, help='YAML config file to run')
parser.add_argument('--config_dir', default=None, help='Directory of YAML config files to run')
args = parser.parse_args()
with open(args.config, 'r') as file:
config_yml = yaml.safe_load(file)
data_dir = config_yml['data_dir']
chunk_method = config_yml['chunk_method']
ls_project = config_yml['ls_project']
ls_dataset_name = config_yml['ls_dataset_name']
ls_experiment_name = config_yml['ls_experiment_name']
vectorstore_model = config_yml['vectorstore_model']
metrics = config_yml['metrics']
os.environ['LANGCHAIN_PROJECT'] = ls_project
os.environ['LANGCHAIN_TRACING_V2'] = 'false'
# Load Raw Data
print('Loading Docs')
loader = DocLoader()
docs = loader.load_dir(data_dir)
# Chunk Docs
print('Chunking Docs')
chunk_func, chunk_func_args = get_chunk_func(chunk_method)
print(chunk_func_args)
chunks = chunk_func(docs=docs, **chunk_func_args)
print(f"len of chunks: {len(chunks)}")
# Load chunks into vectorstore
print('Creating Qdrant Collection and Getting Retriever')
qdrant_vectorstore = QdrantVectorstoreHelper()
qdrant_vectorstore.create_cloud_vectorstore(
chunks=chunks,
collection_name=ls_experiment_name,
embedding_model=get_embedding_model(vectorstore_model),
vector_size=vectorstore_model['vector_size']
)
retriever = qdrant_vectorstore.get_retriever(
collection_name=ls_experiment_name,
embedding_model=get_embedding_model(vectorstore_model),
k=3
)
# Run RAGAS Evaluation in LangSmith
result = eval_on_ls_dataset(
metrics=metrics,
retriever=retriever,
ls_dataset_name=ls_dataset_name,
ls_project_name=ls_project,
ls_experiment_name=ls_experiment_name
) |