Spaces:
Sleeping
Sleeping
import os | |
import pickle | |
import gradio as gr | |
from crawler import ContentCrawler | |
from rag import RAGEngine | |
# Define file paths for the pickled chunks and embeddings | |
chunks = "./data/chunks.pkl" | |
embeddings = "./data/embeddings.pkl" | |
# Check if the chunks file exists; if not, crawl the website and save the results | |
if os.path.exists(chunks): | |
print("Loading chunks") | |
with open(chunks, "rb") as f: | |
results = pickle.load(f) | |
else: | |
print("Chunks file not found. Crawling the website...") | |
# Define the base URL and initialize the content crawler | |
base_url = "https://doc-publik.entrouvert.com/" | |
crawler = ContentCrawler(base_url) | |
results = crawler.crawl() | |
# Save the crawled chunks to a pickle file | |
with open(chunks, "wb") as f: | |
pickle.dump(results, f) | |
# Initialize the RAGEngine with the loaded chunks | |
rag_engine = RAGEngine(results) | |
# Check if the embeddings file exists; if not, create the embeddings and save them | |
if os.path.exists(embeddings): | |
print("Loading embeddings") | |
with open(embeddings, "rb") as f: | |
rag_engine.embeddings = pickle.load(f) | |
else: | |
print("Creating embeddings") | |
rag_engine.index_documents() | |
with open(embeddings, "wb") as f: | |
pickle.dump(rag_engine.embeddings, f) | |
# Define a function to answer questions using the RAG engine. | |
# This function also retrieves the "urls" field and formats them as clickable Markdown links. | |
def answer_question(question): | |
# Affiche immédiatement un message de chargement | |
yield "Chargement en cours..." | |
try: | |
result = rag_engine.rag(question, top_k=5) | |
# Récupération de la réponse et des URLs associées | |
prompt = result.get("prompt", "") | |
response = result.get("response", "") | |
urls = result.get("urls", []) | |
# Formatage de la réponse avec les liens Markdown si des URLs sont présentes | |
if urls: | |
links_md = "\n".join([f"- [{url}]({url})" for url in urls]) | |
markdown_output = f"{response}\n\n**Sources:**\n{links_md}" | |
else: | |
markdown_output = response | |
# Envoi de la réponse finale | |
yield markdown_output | |
except Exception as e: | |
# En cas d'erreur, affiche le message de l'exception | |
yield f"Une erreur est survenue: {str(e)}" | |
# Create a Gradio interface for the Q&A with Markdown formatted output and flagging disabled | |
iface = gr.Interface( | |
fn=answer_question, | |
inputs=gr.Textbox(label="Votre question"), | |
outputs=gr.Markdown(label="Réponse"), | |
title="Publik Q&A", | |
flagging_mode="never", | |
description="Poser des questions sur Publik", | |
) | |
# Launch the Gradio interface | |
iface.launch() |