Vishal1806's picture
new update
25e4fe5 verified
raw
history blame
2.87 kB
# import gradio as gr
# gr.load("models/HuggingFaceH4/zephyr-7b-alpha").launch()
import os
import numpy as np
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
import faiss
# Step 1: Load Precomputed Embeddings and Metadata
def load_embeddings(embeddings_folder='embeddings'):
all_embeddings = []
metadata = []
for file in os.listdir(embeddings_folder):
if file.endswith('.npy'):
embedding_path = os.path.join(embeddings_folder, file)
embedding = np.load(embedding_path) # Shape: (27, 384)
all_embeddings.append(embedding)
# Metadata corresponds to each .npy file
meta_info = file.replace('.npy', '') # Example: 'course_1'
metadata.extend([meta_info] * embedding.shape[0]) # Repeat metadata for each sub-embedding
# Flatten list of embeddings to shape (n * 27, 384)
all_embeddings = np.vstack(all_embeddings)
return all_embeddings, metadata
embeddings, metadata = load_embeddings()
# Step 2: Set Up FAISS Index with Flattened Embeddings
dimension = embeddings.shape[1] # Should be 384 after flattening
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)
# Step 3: Load the Language Model
# model_name = "HuggingFaceH4/zephyr-7b-alpha"
# tokenizer = AutoTokenizer.from_pretrained(model_name)
# model = AutoModelForCausalLM.from_pretrained(model_name)
model_name = "TheBloke/zephyr-7B-beta-GPTQ"
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="balanced", trust_remote_code=False)
# Step 4: Define the Retrieval Function
def retrieve_documents(query, top_k=3):
query_embedding = np.mean([embeddings[i] for i in range(len(metadata)) if query.lower() in metadata[i].lower()], axis=0)
distances, indices = index.search(np.array([query_embedding]), top_k)
retrieved_docs = [metadata[idx] for idx in indices[0]]
return retrieved_docs
# Step 5: Define the Response Generation Function
def generate_response(query):
retrieved_docs = retrieve_documents(query)
context = " ".join(retrieved_docs)
input_text = f"Context: {context}\n\nQuestion: {query}\n\nAnswer:"
inputs = tokenizer(input_text, return_tensors="pt")
output = model.generate(**inputs, max_length=512)
answer = tokenizer.decode(output[0], skip_special_tokens=True)
return answer
# Step 6: Create Gradio Interface
def gradio_interface(query):
response = generate_response(query)
return response
iface = gr.Interface(
fn=gradio_interface,
inputs=gr.Textbox(lines=2, placeholder="Enter your query here..."),
outputs="text",
title="RAG-based Course Search",
description="Enter a query to search for relevant courses using Retrieval Augmented Generation."
)
if _name_ == "_main_":
iface.launch()