titipata commited on
Commit
5f5d98b
Β·
verified Β·
1 Parent(s): 5392e43

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -76
app.py CHANGED
@@ -7,8 +7,7 @@ def get_matches(query, db_name="miread_contrastive"):
7
  """
8
  Wrapper to call the similarity search on the required index
9
  """
10
- matches = vecdbs[index_names.index(
11
- db_name)].similarity_search_with_score(query, k=60)
12
  return matches
13
 
14
 
@@ -19,69 +18,50 @@ def inference(query, model="miread_contrastive"):
19
  """
20
  matches = get_matches(query, model)
21
  auth_counts = {}
22
- j_bucket = {}
23
- n_table = []
24
- a_table = []
 
 
25
  scores = [round(match[1].item(), 3) for match in matches]
26
- min_score = min(scores)
27
- max_score = max(scores)
28
- def normaliser(x): return round(1 - (x-min_score)/max_score, 3)
29
- for i, match in enumerate(matches):
30
- doc = match[0]
31
- score = round(normaliser(round(match[1].item(), 3)), 3)
32
- title = doc.metadata['title']
33
- author = doc.metadata['authors'][0].title()
34
- date = doc.metadata.get('date', 'None')
35
- link = doc.metadata.get('link', 'None')
36
- submitter = doc.metadata.get('submitter', 'None')
37
- journal = doc.metadata['journal']
38
- if (journal is None or journal.strip() == ''):
39
- journal = 'None'
40
- else:
41
- journal = journal.strip()
42
-
43
- # For journals
44
- if journal not in j_bucket:
45
- j_bucket[journal] = score
46
- else:
47
- j_bucket[journal] += score
48
-
49
- # For authors
50
- record = [i+1,
51
- score,
52
- author,
53
- title,
54
- link,
55
- date]
56
  if auth_counts.get(author, 0) < 2:
57
- n_table.append(record)
58
- if auth_counts.get(author, 0) == 0:
59
- auth_counts[author] = 1
60
- else:
61
- auth_counts[author] += 1
62
-
63
- # For abstracts
64
- record = [i+1,
65
- title,
66
- author,
67
- submitter,
68
- journal,
69
- date,
70
- link,
71
- score
72
- ]
73
- a_table.append(record)
74
 
 
75
  del j_bucket['None']
76
- j_table = sorted([[journal, round(score, 3)] for journal,
77
- score in j_bucket.items()],
78
- key=lambda x: x[1], reverse=True)
79
- j_table = [[i+1, item[0], item[1]] for i, item in enumerate(j_table)]
80
- j_output = gr.Dataframe.update(value=j_table, visible=True)
81
- n_output = gr.Dataframe.update(value=n_table, visible=True)
82
- a_output = gr.Dataframe.update(value=a_table, visible=True)
83
 
84
- return [a_output, j_output, n_output]
 
 
 
 
85
 
86
 
87
  index_names = ["miread_large", "miread_contrastive", "scibert_contrastive"]
@@ -96,9 +76,10 @@ faiss_embedders = [HuggingFaceEmbeddings(
96
  model_name=name,
97
  model_kwargs=model_kwargs,
98
  encode_kwargs=encode_kwargs) for name in model_names]
99
-
100
- vecdbs = [FAISS.load_local(index_name, faiss_embedder)
101
- for index_name, faiss_embedder in zip(index_names, faiss_embedders)]
 
102
 
103
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
104
  gr.Markdown("# NBDT Recommendation Engine for Editors")
@@ -142,22 +123,20 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
142
  visible=False
143
  )
144
 
145
- action_btn1.click(fn=lambda x: inference(x, index_names[0]),
146
- inputs=[
147
- abst,
148
- ],
149
  outputs=[a_output, j_output, n_output],
150
- api_name="neurojane")
151
- action_btn2.click(fn=lambda x: inference(x, index_names[1]),
152
- inputs=[
153
- abst,
154
- ],
155
  outputs=[a_output, j_output, n_output],
156
  api_name="neurojane")
157
- action_btn3.click(fn=lambda x: inference(x, index_names[2]),
158
- inputs=[
159
- abst,
160
- ],
161
  outputs=[a_output, j_output, n_output],
162
  api_name="neurojane")
163
 
 
7
  """
8
  Wrapper to call the similarity search on the required index
9
  """
10
+ matches = vecdbs[index_names.index(db_name)].similarity_search_with_score(query, k=60)
 
11
  return matches
12
 
13
 
 
18
  """
19
  matches = get_matches(query, model)
20
  auth_counts = {}
21
+ journal_bucket = {}
22
+ author_table = [] # Author table
23
+ abstract_table = [] # Abstract table
24
+
25
+ # Calculate normalized scores
26
  scores = [round(match[1].item(), 3) for match in matches]
27
+ min_score, max_score = min(scores), max(scores)
28
+ normaliser = lambda x: round(1 - (x-min_score)/max_score, 3)
29
+
30
+ for i, (doc, score) in enumerate(matches):
31
+ norm_score = round(normaliser(round(score.item(), 3)), 3)
32
+ metadata = doc.metadata
33
+
34
+ # Extract metadata
35
+ title = metadata['title']
36
+ author = metadata['authors'][0].title()
37
+ date = metadata.get('date', 'None')
38
+ link = metadata.get('link', 'None')
39
+ submitter = metadata.get('submitter', 'None')
40
+ journal = metadata['journal'].strip() if metadata['journal'] else 'None'
41
+
42
+ # Update journal scores
43
+ if journal != 'None':
44
+ j_bucket[journal] = j_bucket.get(journal, 0) + norm_score
45
+
46
+ # Build author table (limit 2 entries per author)
 
 
 
 
 
 
 
 
 
 
47
  if auth_counts.get(author, 0) < 2:
48
+ author_table.append([i+1, norm_score, author, title, link, date])
49
+ auth_counts[author] = auth_counts.get(author, 0) + 1
50
+
51
+ # Build abstract table
52
+ abstract_table.append([i+1, title, author, submitter, journal, date, link, norm_score])
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
+ # Build journal table
55
  del j_bucket['None']
56
+ journal_table = [[i+1, j, s] for i, (j, s) in enumerate(
57
+ sorted(j_bucket.items(), key=lambda x: x[1], reverse=True)
58
+ )]
 
 
 
 
59
 
60
+ return [
61
+ gr.Dataframe.update(value=abstract_table, visible=True),
62
+ gr.Dataframe.update(value=journal_table, visible=True),
63
+ gr.Dataframe.update(value=author_table, visible=True)
64
+ ]
65
 
66
 
67
  index_names = ["miread_large", "miread_contrastive", "scibert_contrastive"]
 
76
  model_name=name,
77
  model_kwargs=model_kwargs,
78
  encode_kwargs=encode_kwargs) for name in model_names]
79
+ vecdbs = [
80
+ FAISS.load_local(index_name, faiss_embedder)
81
+ for index_name, faiss_embedder in zip(index_names, faiss_embedders)
82
+ ]
83
 
84
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
85
  gr.Markdown("# NBDT Recommendation Engine for Editors")
 
123
  visible=False
124
  )
125
 
126
+ action_btn1.click(
127
+ fn=lambda x: inference(x, index_names[0]),
128
+ inputs=[abst],
 
129
  outputs=[a_output, j_output, n_output],
130
+ api_name="neurojane"
131
+ )
132
+ action_btn2.click(
133
+ fn=lambda x: inference(x, index_names[1]),
134
+ inputs=[abst],
135
  outputs=[a_output, j_output, n_output],
136
  api_name="neurojane")
137
+ action_btn3.click(
138
+ fn=lambda x: inference(x, index_names[2]),
139
+ inputs=[abst,],
 
140
  outputs=[a_output, j_output, n_output],
141
  api_name="neurojane")
142