AI-RESEARCHER-2024's picture
Update app.py
6565328 verified
raw
history blame
2.35 kB
import os
import gradio as gr
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
from langchain_community.embeddings import HuggingFaceEmbeddings
from llama_index.llms.llama_cpp import LlamaCPP
from llama_index.llms.llama_cpp.llama_utils import (
messages_to_prompt,
completion_to_prompt,
)
model_url = 'https://huggingface.co./bartowski/Llama-3.2-3B-Instruct-GGUF/resolve/main/Llama-3.2-3B-Instruct-Q4_K_M.gguf'
llm = LlamaCPP(
# You can pass in the URL to a GGML model to download it automatically
model_url=model_url,
temperature=0.1,
max_new_tokens=256,
context_window=2048,
# kwargs to pass to __call__()
generate_kwargs={},
# kwargs to pass to __init__()
# set to at least 1 to use GPU
model_kwargs={"n_gpu_layers": 1},
# transform inputs into Llama2 format
messages_to_prompt=messages_to_prompt,
completion_to_prompt=completion_to_prompt,
verbose=True,
)
# Initialize embeddings and LLM
embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en-v1.5")
def initialize_index():
"""Initialize the vector store index from PDF files in the data directory"""
# Load documents from the data directory
loader = SimpleDirectoryReader(
input_dir="data",
required_exts=[".pdf"]
)
documents = loader.load_data()
# Create index
index = VectorStoreIndex.from_documents(
documents,
embed_model=embeddings,
)
# Return query engine with Llama
return index.as_query_engine(llm=llm)
# Initialize the query engine at startup
query_engine = initialize_index()
def process_query(
message: str,
history: list[tuple[str, str]],
) -> str:
"""Process a query using the RAG system"""
try:
# Get response from the query engine
response = query_engine.query(
message,
#streaming=True
)
return str(response)
except Exception as e:
return f"Error processing query: {str(e)}"
# Create the Gradio interface
demo = gr.ChatInterface(
process_query,
title="PDF Question Answering with RAG + Llama",
description="Ask questions about the content of the loaded PDF documents using Llama model",
#undo_btn="Delete Previous",
#clear_btn="Clear",
)
if __name__ == "__main__":
demo.launch(debug=True)