import torch from langchain_community.vectorstores import FAISS from langchain_core.prompts import ChatPromptTemplate from langchain_openai import ChatOpenAI from sentence_transformers import SentenceTransformer from langchain_huggingface import HuggingFaceEmbeddings from operator import itemgetter from langchain_core.output_parsers import StrOutputParser from langchain_core.runnables import RunnablePassthrough from huggingface_hub import hf_hub_download, list_repo_files import os from dotenv import load_dotenv from safetensors import safe_open def setup_rag_pipeline(training_documents, model_path, base_model_name, retriever_k, llm_model, llm_temperature): # Load environment variables load_dotenv() hf_token = os.getenv('HF_TOKEN') # Load the fine-tuned model directly fine_tuned_model = SentenceTransformer(model_path, use_auth_token=hf_token) # Create embeddings using the fine-tuned model fine_tuned_embeddings = HuggingFaceEmbeddings(model_name=model_path, model_kwargs={'device': 'cpu'}) vectorstore = FAISS.from_documents(training_documents, fine_tuned_embeddings) retriever = vectorstore.as_retriever(search_kwargs={"k": retriever_k}) RAG_PROMPT = """ You are an AI assistant specializing in the EU AI Act. Given the context and question below, provide a concise and accurate answer. If the information is not in the context, state that you don't have enough information to answer. Context: {context} Question: {question} Answer: """ rag_prompt_template = ChatPromptTemplate.from_template(RAG_PROMPT) rag_llm = ChatOpenAI(model=llm_model, temperature=llm_temperature) rag_chain = ( {"context": itemgetter("question") | retriever, "question": itemgetter("question")} | RunnablePassthrough.assign(context=itemgetter("context")) | {"response": rag_prompt_template | rag_llm | StrOutputParser(), "context": itemgetter("context")} ) return rag_chain, retriever