Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -7,8 +7,7 @@ def get_matches(query, db_name="miread_contrastive"):
|
|
7 |
"""
|
8 |
Wrapper to call the similarity search on the required index
|
9 |
"""
|
10 |
-
matches = vecdbs[index_names.index(
|
11 |
-
db_name)].similarity_search_with_score(query, k=60)
|
12 |
return matches
|
13 |
|
14 |
|
@@ -19,69 +18,50 @@ def inference(query, model="miread_contrastive"):
|
|
19 |
"""
|
20 |
matches = get_matches(query, model)
|
21 |
auth_counts = {}
|
22 |
-
|
23 |
-
|
24 |
-
|
|
|
|
|
25 |
scores = [round(match[1].item(), 3) for match in matches]
|
26 |
-
min_score = min(scores)
|
27 |
-
|
28 |
-
|
29 |
-
for i,
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
else:
|
47 |
-
j_bucket[journal] += score
|
48 |
-
|
49 |
-
# For authors
|
50 |
-
record = [i+1,
|
51 |
-
score,
|
52 |
-
author,
|
53 |
-
title,
|
54 |
-
link,
|
55 |
-
date]
|
56 |
if auth_counts.get(author, 0) < 2:
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
# For abstracts
|
64 |
-
record = [i+1,
|
65 |
-
title,
|
66 |
-
author,
|
67 |
-
submitter,
|
68 |
-
journal,
|
69 |
-
date,
|
70 |
-
link,
|
71 |
-
score
|
72 |
-
]
|
73 |
-
a_table.append(record)
|
74 |
|
|
|
75 |
del j_bucket['None']
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
j_table = [[i+1, item[0], item[1]] for i, item in enumerate(j_table)]
|
80 |
-
j_output = gr.Dataframe.update(value=j_table, visible=True)
|
81 |
-
n_output = gr.Dataframe.update(value=n_table, visible=True)
|
82 |
-
a_output = gr.Dataframe.update(value=a_table, visible=True)
|
83 |
|
84 |
-
return [
|
|
|
|
|
|
|
|
|
85 |
|
86 |
|
87 |
index_names = ["miread_large", "miread_contrastive", "scibert_contrastive"]
|
@@ -96,9 +76,10 @@ faiss_embedders = [HuggingFaceEmbeddings(
|
|
96 |
model_name=name,
|
97 |
model_kwargs=model_kwargs,
|
98 |
encode_kwargs=encode_kwargs) for name in model_names]
|
99 |
-
|
100 |
-
|
101 |
-
|
|
|
102 |
|
103 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
104 |
gr.Markdown("# NBDT Recommendation Engine for Editors")
|
@@ -142,22 +123,20 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
142 |
visible=False
|
143 |
)
|
144 |
|
145 |
-
action_btn1.click(
|
146 |
-
|
147 |
-
abst,
|
148 |
-
],
|
149 |
outputs=[a_output, j_output, n_output],
|
150 |
-
api_name="neurojane"
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
outputs=[a_output, j_output, n_output],
|
156 |
api_name="neurojane")
|
157 |
-
action_btn3.click(
|
158 |
-
|
159 |
-
abst,
|
160 |
-
],
|
161 |
outputs=[a_output, j_output, n_output],
|
162 |
api_name="neurojane")
|
163 |
|
|
|
7 |
"""
|
8 |
Wrapper to call the similarity search on the required index
|
9 |
"""
|
10 |
+
matches = vecdbs[index_names.index(db_name)].similarity_search_with_score(query, k=60)
|
|
|
11 |
return matches
|
12 |
|
13 |
|
|
|
18 |
"""
|
19 |
matches = get_matches(query, model)
|
20 |
auth_counts = {}
|
21 |
+
journal_bucket = {}
|
22 |
+
author_table = [] # Author table
|
23 |
+
abstract_table = [] # Abstract table
|
24 |
+
|
25 |
+
# Calculate normalized scores
|
26 |
scores = [round(match[1].item(), 3) for match in matches]
|
27 |
+
min_score, max_score = min(scores), max(scores)
|
28 |
+
normaliser = lambda x: round(1 - (x-min_score)/max_score, 3)
|
29 |
+
|
30 |
+
for i, (doc, score) in enumerate(matches):
|
31 |
+
norm_score = round(normaliser(round(score.item(), 3)), 3)
|
32 |
+
metadata = doc.metadata
|
33 |
+
|
34 |
+
# Extract metadata
|
35 |
+
title = metadata['title']
|
36 |
+
author = metadata['authors'][0].title()
|
37 |
+
date = metadata.get('date', 'None')
|
38 |
+
link = metadata.get('link', 'None')
|
39 |
+
submitter = metadata.get('submitter', 'None')
|
40 |
+
journal = metadata['journal'].strip() if metadata['journal'] else 'None'
|
41 |
+
|
42 |
+
# Update journal scores
|
43 |
+
if journal != 'None':
|
44 |
+
j_bucket[journal] = j_bucket.get(journal, 0) + norm_score
|
45 |
+
|
46 |
+
# Build author table (limit 2 entries per author)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
if auth_counts.get(author, 0) < 2:
|
48 |
+
author_table.append([i+1, norm_score, author, title, link, date])
|
49 |
+
auth_counts[author] = auth_counts.get(author, 0) + 1
|
50 |
+
|
51 |
+
# Build abstract table
|
52 |
+
abstract_table.append([i+1, title, author, submitter, journal, date, link, norm_score])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
|
54 |
+
# Build journal table
|
55 |
del j_bucket['None']
|
56 |
+
journal_table = [[i+1, j, s] for i, (j, s) in enumerate(
|
57 |
+
sorted(j_bucket.items(), key=lambda x: x[1], reverse=True)
|
58 |
+
)]
|
|
|
|
|
|
|
|
|
59 |
|
60 |
+
return [
|
61 |
+
gr.Dataframe.update(value=abstract_table, visible=True),
|
62 |
+
gr.Dataframe.update(value=journal_table, visible=True),
|
63 |
+
gr.Dataframe.update(value=author_table, visible=True)
|
64 |
+
]
|
65 |
|
66 |
|
67 |
index_names = ["miread_large", "miread_contrastive", "scibert_contrastive"]
|
|
|
76 |
model_name=name,
|
77 |
model_kwargs=model_kwargs,
|
78 |
encode_kwargs=encode_kwargs) for name in model_names]
|
79 |
+
vecdbs = [
|
80 |
+
FAISS.load_local(index_name, faiss_embedder)
|
81 |
+
for index_name, faiss_embedder in zip(index_names, faiss_embedders)
|
82 |
+
]
|
83 |
|
84 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
85 |
gr.Markdown("# NBDT Recommendation Engine for Editors")
|
|
|
123 |
visible=False
|
124 |
)
|
125 |
|
126 |
+
action_btn1.click(
|
127 |
+
fn=lambda x: inference(x, index_names[0]),
|
128 |
+
inputs=[abst],
|
|
|
129 |
outputs=[a_output, j_output, n_output],
|
130 |
+
api_name="neurojane"
|
131 |
+
)
|
132 |
+
action_btn2.click(
|
133 |
+
fn=lambda x: inference(x, index_names[1]),
|
134 |
+
inputs=[abst],
|
135 |
outputs=[a_output, j_output, n_output],
|
136 |
api_name="neurojane")
|
137 |
+
action_btn3.click(
|
138 |
+
fn=lambda x: inference(x, index_names[2]),
|
139 |
+
inputs=[abst,],
|
|
|
140 |
outputs=[a_output, j_output, n_output],
|
141 |
api_name="neurojane")
|
142 |
|