LLama2_gguf / qa.py
Jayem-11's picture
Upload 4 files
58b29d7 verified
from langchain.document_loaders import DirectoryLoader, TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.llms import CTransformers
from langchain import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.vectorstores import FAISS
import time
loader = DirectoryLoader("./infotext", glob="*.txt", loader_cls=TextLoader)
# interpret information in the documents
documents = loader.load()
splitter = RecursiveCharacterTextSplitter()
texts = splitter.split_documents(documents)
embeddings = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-MiniLM-L6-v2",
model_kwargs={'device': 'cpu'})
# create and save the local database
db = FAISS.from_documents(texts, embeddings)
db.save_local("faiss")
# prepare the template we will use when prompting the AI
template = """Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Context: {context}
Question: {question}
Only return the helpful answer below and nothing else.
Helpful answer:
"""
# load the language model
config = {'max_new_tokens': 256, 'temperature': 0.01}
llm = CTransformers(model="TheBloke/Llama-2-13B-chat-GGML",
model_file="llama-2-13b-chat.ggmlv3.q2_K.bin",
model_type="llama",config=config)
# load the interpreted information from the local database
embeddings = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-MiniLM-L6-v2",
model_kwargs={'device': 'cpu'})
db = FAISS.load_local("faiss", embeddings)
# prepare a version of the llm pre-loaded with the local content
retriever = db.as_retriever(search_kwargs={'k': 2})
prompt = PromptTemplate(
template=template,
input_variables=['context', 'question'])
def query(question):
model = RetrievalQA.from_chain_type(llm=llm,
chain_type='stuff',
retriever=retriever,
return_source_documents=True,
chain_type_kwargs={'prompt': prompt})
time_start = time.time()
output = model({'query': question})
response = output["result"]
time_elapsed = time.time() - time_start
return [response, time_elapsed]