Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -12,6 +12,7 @@ from huggingface_hub import login
|
|
12 |
import arxiv
|
13 |
import numpy as np
|
14 |
import torch # Add torch to explicitly set the device
|
|
|
15 |
|
16 |
# Access the Hugging Face token from the environment variable
|
17 |
HF_TOKEN = os.getenv("HF_Token")
|
@@ -63,7 +64,6 @@ def download_youtube_video(video_url, output_dir, title=None):
|
|
63 |
try:
|
64 |
with YoutubeDL(ydl_opts) as ydl:
|
65 |
info = ydl.extract_info(video_url, download=True)
|
66 |
-
# Extract the final downloaded file path
|
67 |
downloaded_file = ydl.prepare_filename(info)
|
68 |
return downloaded_file
|
69 |
except Exception as e:
|
@@ -81,22 +81,18 @@ def fetch_and_download_youtube_video(query, output_dir="./videos"):
|
|
81 |
|
82 |
try:
|
83 |
with YoutubeDL(ydl_opts) as ydl:
|
84 |
-
# Perform a search for the query on YouTube
|
85 |
search_results = ydl.extract_info(f"ytsearch:{query}", download=False)
|
86 |
-
|
87 |
if 'entries' not in search_results or len(search_results['entries']) == 0:
|
88 |
print(f"No YouTube results found for query: '{query}'")
|
89 |
return []
|
90 |
|
91 |
-
video_info = search_results['entries'][0]
|
92 |
video_title = video_info.get("title", "unknown_title")
|
93 |
video_url = video_info.get("webpage_url", None)
|
94 |
-
|
95 |
if not video_url:
|
96 |
print("No URL found for the video.")
|
97 |
return []
|
98 |
|
99 |
-
# Download the video
|
100 |
local_path = download_youtube_video(video_url, output_dir, title=video_title)
|
101 |
if not local_path:
|
102 |
return []
|
@@ -108,24 +104,21 @@ def fetch_and_download_youtube_video(query, output_dir="./videos"):
|
|
108 |
print(f"Error fetching YouTube video for query '{query}': {e}")
|
109 |
return []
|
110 |
|
111 |
-
from arxiv import Client, Search, SortCriterion
|
112 |
-
|
113 |
def fetch_from_arxiv(query="machine learning", max_results=2, output_dir="./papers"):
|
114 |
"""Fetch papers from arXiv and download their PDFs."""
|
115 |
print(f"Fetching papers for query: {query}")
|
116 |
-
client = Client()
|
117 |
-
search = Search(
|
118 |
query=query,
|
119 |
max_results=max_results,
|
120 |
-
sort_by=SortCriterion.Relevance
|
121 |
)
|
122 |
metadata = []
|
123 |
for i, result in enumerate(client.results(search)):
|
124 |
-
pdf_url = result.pdf_url
|
125 |
filename = f"{query.replace(' ', '_')}_arxiv_{i}.pdf"
|
126 |
local_path = os.path.join(output_dir, filename)
|
127 |
try:
|
128 |
-
# Download the PDF
|
129 |
response = requests.get(pdf_url)
|
130 |
if response.status_code == 200:
|
131 |
with open(local_path, 'wb') as f:
|
@@ -182,8 +175,21 @@ def hybrid_rag_system_with_llama(query):
|
|
182 |
final_response = generate_llama_response(query, context)
|
183 |
return final_results, final_response
|
184 |
|
185 |
-
#
|
186 |
-
query
|
187 |
-
|
188 |
-
|
189 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
import arxiv
|
13 |
import numpy as np
|
14 |
import torch # Add torch to explicitly set the device
|
15 |
+
import gradio as gr
|
16 |
|
17 |
# Access the Hugging Face token from the environment variable
|
18 |
HF_TOKEN = os.getenv("HF_Token")
|
|
|
64 |
try:
|
65 |
with YoutubeDL(ydl_opts) as ydl:
|
66 |
info = ydl.extract_info(video_url, download=True)
|
|
|
67 |
downloaded_file = ydl.prepare_filename(info)
|
68 |
return downloaded_file
|
69 |
except Exception as e:
|
|
|
81 |
|
82 |
try:
|
83 |
with YoutubeDL(ydl_opts) as ydl:
|
|
|
84 |
search_results = ydl.extract_info(f"ytsearch:{query}", download=False)
|
|
|
85 |
if 'entries' not in search_results or len(search_results['entries']) == 0:
|
86 |
print(f"No YouTube results found for query: '{query}'")
|
87 |
return []
|
88 |
|
89 |
+
video_info = search_results['entries'][0]
|
90 |
video_title = video_info.get("title", "unknown_title")
|
91 |
video_url = video_info.get("webpage_url", None)
|
|
|
92 |
if not video_url:
|
93 |
print("No URL found for the video.")
|
94 |
return []
|
95 |
|
|
|
96 |
local_path = download_youtube_video(video_url, output_dir, title=video_title)
|
97 |
if not local_path:
|
98 |
return []
|
|
|
104 |
print(f"Error fetching YouTube video for query '{query}': {e}")
|
105 |
return []
|
106 |
|
|
|
|
|
107 |
def fetch_from_arxiv(query="machine learning", max_results=2, output_dir="./papers"):
|
108 |
"""Fetch papers from arXiv and download their PDFs."""
|
109 |
print(f"Fetching papers for query: {query}")
|
110 |
+
client = arxiv.Client()
|
111 |
+
search = arxiv.Search(
|
112 |
query=query,
|
113 |
max_results=max_results,
|
114 |
+
sort_by=arxiv.SortCriterion.Relevance
|
115 |
)
|
116 |
metadata = []
|
117 |
for i, result in enumerate(client.results(search)):
|
118 |
+
pdf_url = result.pdf_url
|
119 |
filename = f"{query.replace(' ', '_')}_arxiv_{i}.pdf"
|
120 |
local_path = os.path.join(output_dir, filename)
|
121 |
try:
|
|
|
122 |
response = requests.get(pdf_url)
|
123 |
if response.status_code == 200:
|
124 |
with open(local_path, 'wb') as f:
|
|
|
175 |
final_response = generate_llama_response(query, context)
|
176 |
return final_results, final_response
|
177 |
|
178 |
+
# Define Gradio interface
|
179 |
+
def gradio_interface(query):
|
180 |
+
"""Gradio wrapper for hybrid RAG system."""
|
181 |
+
_, final_response = hybrid_rag_system_with_llama(query)
|
182 |
+
return final_response
|
183 |
+
|
184 |
+
# Create Gradio app
|
185 |
+
interface = gr.Interface(
|
186 |
+
fn=gradio_interface,
|
187 |
+
inputs=gr.Textbox(label="Enter your query", placeholder="e.g., short easy machine learning"),
|
188 |
+
outputs=gr.Textbox(label="Generated Response"),
|
189 |
+
title="Hybrid RAG System with LLaMA",
|
190 |
+
description="Enter a query to retrieve relevant resources and generate a response using LLaMA."
|
191 |
+
)
|
192 |
+
|
193 |
+
# Launch Gradio app
|
194 |
+
if __name__ == "__main__":
|
195 |
+
interface.launch()
|