Spaces:

biodatlab
/

NBDT-Recommendation-Engine

Runtime error

App Files Files Community

titipata commited on 1 day ago

Commit

5f5d98b

verified ·

1 Parent(s): 5392e43

Update app.py

Browse files

Files changed (1) hide show

app.py +55 -76

app.py CHANGED Viewed

@@ -7,8 +7,7 @@ def get_matches(query, db_name="miread_contrastive"):
     """
     Wrapper to call the similarity search on the required index
     """
-    matches = vecdbs[index_names.index(
-        db_name)].similarity_search_with_score(query, k=60)
     return matches
@@ -19,69 +18,50 @@ def inference(query, model="miread_contrastive"):
     """
     matches = get_matches(query, model)
     auth_counts = {}
-    j_bucket = {}
-    n_table = []
-    a_table = []
     scores = [round(match[1].item(), 3) for match in matches]
-    min_score = min(scores)
-    max_score = max(scores)
-    def normaliser(x): return round(1 - (x-min_score)/max_score, 3)
-    for i, match in enumerate(matches):
-        doc = match[0]
-        score = round(normaliser(round(match[1].item(), 3)), 3)
-        title = doc.metadata['title']
-        author = doc.metadata['authors'][0].title()
-        date = doc.metadata.get('date', 'None')
-        link = doc.metadata.get('link', 'None')
-        submitter = doc.metadata.get('submitter', 'None')
-        journal = doc.metadata['journal']
-        if (journal is None or journal.strip() == ''):
-            journal = 'None'
-        else:
-            journal = journal.strip()
-        # For journals
-        if journal not in j_bucket:
-            j_bucket[journal] = score
-        else:
-            j_bucket[journal] += score
-        # For authors
-        record = [i+1,
-                  score,
-                  author,
-                  title,
-                  link,
-                  date]
         if auth_counts.get(author, 0) < 2:
-            n_table.append(record)
-            if auth_counts.get(author, 0) == 0:
-                auth_counts[author] = 1
-            else:
-                auth_counts[author] += 1
-        # For abstracts
-        record = [i+1,
-                  title,
-                  author,
-                  submitter,
-                  journal,
-                  date,
-                  link,
-                  score
-                  ]
-        a_table.append(record)
     del j_bucket['None']
-    j_table = sorted([[journal, round(score, 3)] for journal,
-                     score in j_bucket.items()],
-                     key=lambda x: x[1], reverse=True)
-    j_table = [[i+1, item[0], item[1]] for i, item in enumerate(j_table)]
-    j_output = gr.Dataframe.update(value=j_table, visible=True)
-    n_output = gr.Dataframe.update(value=n_table, visible=True)
-    a_output = gr.Dataframe.update(value=a_table, visible=True)
-    return [a_output, j_output, n_output]
 index_names = ["miread_large", "miread_contrastive", "scibert_contrastive"]
@@ -96,9 +76,10 @@ faiss_embedders = [HuggingFaceEmbeddings(
     model_name=name,
     model_kwargs=model_kwargs,
     encode_kwargs=encode_kwargs) for name in model_names]
-vecdbs = [FAISS.load_local(index_name, faiss_embedder)
-          for index_name, faiss_embedder in zip(index_names, faiss_embedders)]
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("# NBDT Recommendation Engine for Editors")
@@ -142,22 +123,20 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
             visible=False
         )
-    action_btn1.click(fn=lambda x: inference(x, index_names[0]),
-                      inputs=[
-        abst,
-    ],
         outputs=[a_output, j_output, n_output],
-        api_name="neurojane")
-    action_btn2.click(fn=lambda x: inference(x, index_names[1]),
-                      inputs=[
-        abst,
-    ],
         outputs=[a_output, j_output, n_output],
         api_name="neurojane")
-    action_btn3.click(fn=lambda x: inference(x, index_names[2]),
-                      inputs=[
-        abst,
-    ],
         outputs=[a_output, j_output, n_output],
         api_name="neurojane")

     """
     Wrapper to call the similarity search on the required index
     """
+    matches = vecdbs[index_names.index(db_name)].similarity_search_with_score(query, k=60)
     return matches
     """
     matches = get_matches(query, model)
     auth_counts = {}
+    journal_bucket = {}
+    author_table = []  # Author table
+    abstract_table = []  # Abstract table
+    # Calculate normalized scores
     scores = [round(match[1].item(), 3) for match in matches]
+    min_score, max_score = min(scores), max(scores)
+    normaliser = lambda x: round(1 - (x-min_score)/max_score, 3)
+    for i, (doc, score) in enumerate(matches):
+        norm_score = round(normaliser(round(score.item(), 3)), 3)
+        metadata = doc.metadata
+        # Extract metadata
+        title = metadata['title']
+        author = metadata['authors'][0].title()
+        date = metadata.get('date', 'None')
+        link = metadata.get('link', 'None')
+        submitter = metadata.get('submitter', 'None')
+        journal = metadata['journal'].strip() if metadata['journal'] else 'None'
+        # Update journal scores
+        if journal != 'None':
+            j_bucket[journal] = j_bucket.get(journal, 0) + norm_score
+        # Build author table (limit 2 entries per author)
         if auth_counts.get(author, 0) < 2:
+            author_table.append([i+1, norm_score, author, title, link, date])
+            auth_counts[author] = auth_counts.get(author, 0) + 1
+        # Build abstract table
+        abstract_table.append([i+1, title, author, submitter, journal, date, link, norm_score])
+    # Build journal table
     del j_bucket['None']
+    journal_table = [[i+1, j, s] for i, (j, s) in enumerate(
+        sorted(j_bucket.items(), key=lambda x: x[1], reverse=True)
+    )]
+    return [
+        gr.Dataframe.update(value=abstract_table, visible=True),
+        gr.Dataframe.update(value=journal_table, visible=True),
+        gr.Dataframe.update(value=author_table, visible=True)
+    ]
 index_names = ["miread_large", "miread_contrastive", "scibert_contrastive"]
     model_name=name,
     model_kwargs=model_kwargs,
     encode_kwargs=encode_kwargs) for name in model_names]
+vecdbs = [
+    FAISS.load_local(index_name, faiss_embedder)
+    for index_name, faiss_embedder in zip(index_names, faiss_embedders)
+]
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("# NBDT Recommendation Engine for Editors")
             visible=False
         )
+    action_btn1.click(
+        fn=lambda x: inference(x, index_names[0]),
+        inputs=[abst],
         outputs=[a_output, j_output, n_output],
+        api_name="neurojane"
+    )
+    action_btn2.click(
+        fn=lambda x: inference(x, index_names[1]),
+        inputs=[abst],
         outputs=[a_output, j_output, n_output],
         api_name="neurojane")
+    action_btn3.click(
+        fn=lambda x: inference(x, index_names[2]),
+        inputs=[abst,],
         outputs=[a_output, j_output, n_output],
         api_name="neurojane")