CHATBOT1 / app.py
FridayMaster's picture
Update app.py
9042b33 verified
raw
history blame
2.33 kB
import os
import pandas as pd
import PyPDF2
import spacy
import faiss
from sentence_transformers import SentenceTransformer
import torch
import gradio as gr
# Load and preprocess PDF text
def extract_text_from_pdf(pdf_path):
text = ""
with open(pdf_path, 'rb') as pdf_file:
pdf_reader = PyPDF2.PdfReader(pdf_file)
for page_num in range(len(pdf_reader.pages)):
page = pdf_reader.pages[page_num]
text += page.extract_text()
return text
# Path to your PDF file
pdf_path = 'FridayMaster/UBANTUMANUAL/Getting Started with Ubuntu 16.04.pdf'
# Extract text from the PDF
pdf_text = extract_text_from_pdf(pdf_path)
# Convert the text to a DataFrame
df = pd.DataFrame({'text': [pdf_text]})
# Load the custom embedding model
class CustomEmbeddingModel:
def __init__(self, model_name):
self.model = SentenceTransformer(model_name)
def embed_text(self, text):
return self.model.encode(text, convert_to_tensor=True)
embedding_model = CustomEmbeddingModel('distilbert-base-uncased') # Replace with your model name
# Load Spacy model for preprocessing
nlp = spacy.load("en_core_web_sm")
def preprocess_text(text):
doc = nlp(text)
tokens = [token.lemma_.lower() for token in doc if token.is_alpha]
return ' '.join(tokens)
# Apply preprocessing and embedding
df['text'] = df['text'].apply(preprocess_text)
df['text_embeddings'] = df['text'].apply(lambda x: embedding_model.embed_text(x))
# Create a FAISS index
index = faiss.IndexFlatL2(768) # Assuming embeddings are 768-dimensional
embeddings = torch.stack(df['text_embeddings'].tolist())
faiss_index = faiss.IndexFlatL2(embeddings.shape[1])
faiss_index.add(embeddings.numpy())
# Function to generate a response
def generate_response(prompt):
query_embedding = embedding_model.embed_text(prompt).unsqueeze(0)
distances, indices = faiss_index.search(query_embedding.numpy(), k=1)
response = df.iloc[indices[0][0]]['text']
return response
# Gradio interface
iface = gr.Interface(
fn=generate_response,
inputs=gr.Textbox(label="Enter your query", placeholder="Ask about Ubuntu..."),
outputs=gr.Textbox(label="Response"),
title="Ubuntu Manual Chatbot",
description="Ask questions about the Ubuntu manual."
)
if __name__ == "__main__":
iface.launch()