Spaces:
Sleeping
Sleeping
wjbmattingly
commited on
Commit
·
289ba91
1
Parent(s):
c27b5cf
Add application file
Browse files
app.py
ADDED
@@ -0,0 +1,289 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from dotenv import load_dotenv
|
2 |
+
import os
|
3 |
+
import pandas as pd
|
4 |
+
|
5 |
+
# Load environment variables from .env file
|
6 |
+
load_dotenv()
|
7 |
+
|
8 |
+
import gradio as gr
|
9 |
+
from weaviate.classes.query import QueryReference
|
10 |
+
import weaviate
|
11 |
+
from sentence_transformers import SentenceTransformer
|
12 |
+
from weaviate.auth import Auth
|
13 |
+
|
14 |
+
model = SentenceTransformer('all-MiniLM-L6-v2')
|
15 |
+
|
16 |
+
# Now these will work with your .env file
|
17 |
+
WEAVIATE_URL = os.getenv("WEAVIATE_URL")
|
18 |
+
WEAVIATE_API_KEY = os.getenv("WEAVIATE_API_KEY")
|
19 |
+
|
20 |
+
RESULTS_PER_PAGE = 5
|
21 |
+
|
22 |
+
# Add custom CSS near the top of the file
|
23 |
+
custom_css = """
|
24 |
+
.container {
|
25 |
+
max-width: 1000px !important;
|
26 |
+
margin: 0 auto !important;
|
27 |
+
padding: 2rem !important;
|
28 |
+
background-color: #f8fafc !important; /* Light blue-gray background */
|
29 |
+
}
|
30 |
+
|
31 |
+
.search-box {
|
32 |
+
margin-bottom: 2rem !important;
|
33 |
+
}
|
34 |
+
|
35 |
+
.search-button {
|
36 |
+
background-color: #0f172a !important; /* Deep blue */
|
37 |
+
color: #ffffff !important;
|
38 |
+
border-radius: 6px !important;
|
39 |
+
transition: background-color 0.3s ease !important;
|
40 |
+
}
|
41 |
+
|
42 |
+
.search-button:hover {
|
43 |
+
background-color: #1e293b !important; /* Slightly lighter blue on hover */
|
44 |
+
}
|
45 |
+
|
46 |
+
.pagination-button {
|
47 |
+
background-color: #ffffff !important;
|
48 |
+
color: #0f172a !important;
|
49 |
+
border: 1px solid #cbd5e1 !important;
|
50 |
+
border-radius: 6px !important;
|
51 |
+
min-width: 100px !important;
|
52 |
+
transition: all 0.3s ease !important;
|
53 |
+
}
|
54 |
+
|
55 |
+
.pagination-button:hover {
|
56 |
+
background-color: #f1f5f9 !important;
|
57 |
+
border-color: #94a3b8 !important;
|
58 |
+
}
|
59 |
+
|
60 |
+
.paper-card {
|
61 |
+
border: 1px solid #e2e8f0 !important;
|
62 |
+
border-radius: 12px !important;
|
63 |
+
margin-bottom: 1.5rem !important;
|
64 |
+
box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1) !important;
|
65 |
+
background: #ffffff !important;
|
66 |
+
transition: transform 0.2s ease, box-shadow 0.2s ease !important;
|
67 |
+
}
|
68 |
+
|
69 |
+
.paper-card:hover {
|
70 |
+
transform: translateY(-2px) !important;
|
71 |
+
box-shadow: 0 6px 12px -2px rgba(0, 0, 0, 0.15) !important;
|
72 |
+
}
|
73 |
+
|
74 |
+
.card-header {
|
75 |
+
background: #f1f5f9 !important;
|
76 |
+
padding: 1.25rem !important;
|
77 |
+
border-bottom: 1px solid #e2e8f0 !important;
|
78 |
+
border-radius: 12px 12px 0 0 !important;
|
79 |
+
cursor: pointer !important;
|
80 |
+
}
|
81 |
+
|
82 |
+
.card-header h3 {
|
83 |
+
color: #0f172a !important; /* Darker text for better contrast */
|
84 |
+
font-size: 1.1rem !important;
|
85 |
+
margin: 0 !important;
|
86 |
+
font-weight: 600 !important;
|
87 |
+
}
|
88 |
+
|
89 |
+
.card-content {
|
90 |
+
padding: 1.25rem !important;
|
91 |
+
color: #0f172a !important; /* Changed from #334155 to darker color */
|
92 |
+
line-height: 1.6 !important;
|
93 |
+
}
|
94 |
+
|
95 |
+
/* Additional styles for better typography and links */
|
96 |
+
a {
|
97 |
+
color: #2563eb !important;
|
98 |
+
text-decoration: none !important;
|
99 |
+
transition: color 0.2s ease !important;
|
100 |
+
}
|
101 |
+
|
102 |
+
a:hover {
|
103 |
+
color: #1d4ed8 !important;
|
104 |
+
}
|
105 |
+
|
106 |
+
/* Style for the main title */
|
107 |
+
h1 {
|
108 |
+
color: #0f172a !important;
|
109 |
+
font-weight: 700 !important;
|
110 |
+
margin-bottom: 2rem !important;
|
111 |
+
}
|
112 |
+
|
113 |
+
/* Style for the search input */
|
114 |
+
.gradio-textbox input {
|
115 |
+
border: 2px solid #e2e8f0 !important;
|
116 |
+
border-radius: 8px !important;
|
117 |
+
padding: 0.75rem !important;
|
118 |
+
transition: border-color 0.3s ease !important;
|
119 |
+
}
|
120 |
+
|
121 |
+
.gradio-textbox input:focus {
|
122 |
+
border-color: #2563eb !important;
|
123 |
+
outline: none !important;
|
124 |
+
box-shadow: 0 0 0 3px rgba(37, 99, 235, 0.1) !important;
|
125 |
+
}
|
126 |
+
|
127 |
+
/* Make sure all text content has good contrast */
|
128 |
+
p, span, label {
|
129 |
+
color: #0f172a !important; /* Consistent dark color for all text */
|
130 |
+
}
|
131 |
+
|
132 |
+
/* Style for labels and other UI text */
|
133 |
+
.gradio-textbox label {
|
134 |
+
color: #0f172a !important;
|
135 |
+
font-weight: 500 !important;
|
136 |
+
}
|
137 |
+
|
138 |
+
/* Page label styling */
|
139 |
+
.gradio-label {
|
140 |
+
color: #0f172a !important;
|
141 |
+
font-weight: 500 !important;
|
142 |
+
font-size: 0.875rem !important; /* Smaller font size */
|
143 |
+
}
|
144 |
+
|
145 |
+
/* Make sure author links maintain proper color */
|
146 |
+
.card-content a {
|
147 |
+
color: #2563eb !important;
|
148 |
+
}
|
149 |
+
"""
|
150 |
+
|
151 |
+
def search_papers(query):
|
152 |
+
if not query:
|
153 |
+
return "Please enter a search query", "Page 1 of 1", None
|
154 |
+
|
155 |
+
vector_query = model.encode(query)
|
156 |
+
|
157 |
+
client = weaviate.connect_to_weaviate_cloud(
|
158 |
+
cluster_url=WEAVIATE_URL,
|
159 |
+
auth_credentials=Auth.api_key(WEAVIATE_API_KEY),
|
160 |
+
)
|
161 |
+
|
162 |
+
work_collection = client.collections.get("Work")
|
163 |
+
|
164 |
+
# Get all results at once
|
165 |
+
response = work_collection.query.near_vector(
|
166 |
+
near_vector=vector_query,
|
167 |
+
return_properties=["title", "abstract", "open_alex_id"],
|
168 |
+
limit=1000, # Adjust this based on your needs
|
169 |
+
return_references=[
|
170 |
+
QueryReference(
|
171 |
+
link_on="authors",
|
172 |
+
return_properties=["display_name", "open_alex_id", "concept_ids"]
|
173 |
+
)
|
174 |
+
]
|
175 |
+
)
|
176 |
+
|
177 |
+
if not response.objects:
|
178 |
+
return "No results found", "Page 0 of 0", None
|
179 |
+
|
180 |
+
# Convert results to DataFrame
|
181 |
+
results = []
|
182 |
+
for work in response.objects:
|
183 |
+
author_links = []
|
184 |
+
if work.references.get('authors'):
|
185 |
+
for author in work.references['authors'].objects:
|
186 |
+
author_url = author.properties['open_alex_id']
|
187 |
+
author_name = author.properties['display_name']
|
188 |
+
author_links.append(f"<a href='{author_url}' target='_blank' style='color: #2563eb !important;'>{author_name}</a>")
|
189 |
+
author_links = list(set(author_links))
|
190 |
+
results.append({
|
191 |
+
'title': work.properties['title'],
|
192 |
+
'work_url': work.properties['open_alex_id'],
|
193 |
+
'abstract': work.properties['abstract'],
|
194 |
+
'authors': ', '.join(author_links),
|
195 |
+
})
|
196 |
+
|
197 |
+
return pd.DataFrame(results), len(results)
|
198 |
+
|
199 |
+
def format_page(df, page_num):
|
200 |
+
if df is None:
|
201 |
+
return "No results found", '<div style="text-align: center; margin: 1rem 0; color: #0f172a;">Page 0 of 0</div>'
|
202 |
+
|
203 |
+
start_idx = (page_num - 1) * RESULTS_PER_PAGE
|
204 |
+
end_idx = start_idx + RESULTS_PER_PAGE
|
205 |
+
page_df = df.iloc[start_idx:end_idx]
|
206 |
+
|
207 |
+
total_pages = (len(df) + RESULTS_PER_PAGE - 1) // RESULTS_PER_PAGE
|
208 |
+
|
209 |
+
results_html = ""
|
210 |
+
for i, row in enumerate(page_df.itertuples(), start=start_idx+1):
|
211 |
+
results_html += f"""
|
212 |
+
<div class="paper-card">
|
213 |
+
<div class="card-header"
|
214 |
+
onclick="this.nextElementSibling.style.display = this.nextElementSibling.style.display === 'none' ? 'block' : 'none'">
|
215 |
+
<h3>{i}. {row.title}</h3>
|
216 |
+
</div>
|
217 |
+
<div class="card-content" style="display:none;">
|
218 |
+
<p style="color: #0f172a !important;"><b style="color: #0f172a !important;">Authors:</b> <span style="color: #0f172a !important;">{row.authors}</span></p>
|
219 |
+
<p>{row.abstract}</p>
|
220 |
+
<p><a href="{row.work_url}" target="_blank"
|
221 |
+
style="color: #2563eb !important; text-decoration: none;">View on OpenAlex →</a></p>
|
222 |
+
</div>
|
223 |
+
</div>
|
224 |
+
"""
|
225 |
+
|
226 |
+
return results_html, f'<div style="text-align: center; margin: 1rem 0; color: #0f172a;">Page {page_num} of {total_pages}</div>'
|
227 |
+
|
228 |
+
# Modified Gradio interface
|
229 |
+
with gr.Blocks(css=custom_css) as demo:
|
230 |
+
with gr.Column(elem_classes="container"):
|
231 |
+
gr.Markdown("# MENA Open-Alex Semantic Search")
|
232 |
+
|
233 |
+
with gr.Column(elem_classes="search-box"):
|
234 |
+
query_input = gr.Textbox(
|
235 |
+
label="Enter your query:",
|
236 |
+
placeholder="Search for papers..."
|
237 |
+
)
|
238 |
+
search_button = gr.Button("Search", elem_classes="search-button")
|
239 |
+
|
240 |
+
# Results display
|
241 |
+
results_output = gr.HTML()
|
242 |
+
page_label = gr.HTML(value='<div style="text-align: center; margin: 1rem 0; color: #0f172a;">Page 1 of 1</div>')
|
243 |
+
|
244 |
+
# Pagination controls
|
245 |
+
with gr.Row():
|
246 |
+
prev_button = gr.Button("Previous", elem_classes="pagination-button")
|
247 |
+
next_button = gr.Button("Next", elem_classes="pagination-button")
|
248 |
+
|
249 |
+
# Page state
|
250 |
+
page_number = gr.State(value=1)
|
251 |
+
|
252 |
+
# Add DataFrame state
|
253 |
+
results_df = gr.State(value=None)
|
254 |
+
|
255 |
+
def search_with_page(query, page):
|
256 |
+
df, total = search_papers(query)
|
257 |
+
return (*format_page(df, 1), df, 1)
|
258 |
+
|
259 |
+
def prev_page(query, page, df):
|
260 |
+
if page > 1:
|
261 |
+
return (*format_page(df, page - 1), page - 1)
|
262 |
+
return (*format_page(df, page), page)
|
263 |
+
|
264 |
+
def next_page(query, page, df):
|
265 |
+
total_pages = (len(df) + RESULTS_PER_PAGE - 1) // RESULTS_PER_PAGE
|
266 |
+
if page < total_pages:
|
267 |
+
return (*format_page(df, page + 1), page + 1)
|
268 |
+
return (*format_page(df, page), page)
|
269 |
+
|
270 |
+
# Modified event handlers
|
271 |
+
search_button.click(
|
272 |
+
fn=search_with_page,
|
273 |
+
inputs=[query_input, page_number],
|
274 |
+
outputs=[results_output, page_label, results_df, page_number]
|
275 |
+
)
|
276 |
+
|
277 |
+
prev_button.click(
|
278 |
+
fn=prev_page,
|
279 |
+
inputs=[query_input, page_number, results_df],
|
280 |
+
outputs=[results_output, page_label, page_number]
|
281 |
+
)
|
282 |
+
|
283 |
+
next_button.click(
|
284 |
+
fn=next_page,
|
285 |
+
inputs=[query_input, page_number, results_df],
|
286 |
+
outputs=[results_output, page_label, page_number]
|
287 |
+
)
|
288 |
+
|
289 |
+
demo.launch()
|