import gradio as gr from langchain.vectorstores import FAISS from langchain.embeddings import HuggingFaceEmbeddings def get_matches(query, db_name="miread_contrastive"): """ Wrapper to call the similarity search on the required index """ matches = vecdbs[index_names.index(db_name)].similarity_search_with_score(query, k=60) return matches def inference(query, model="miread_contrastive"): """ This function processes information retrieved by the get_matches() function Returns - Gradio update commands for the authors, abstracts and journals tablular output """ matches = get_matches(query, model) auth_counts = {} journal_bucket = {} author_table = [] # Author table abstract_table = [] # Abstract table # Calculate normalized scores scores = [round(match[1].item(), 3) for match in matches] min_score, max_score = min(scores), max(scores) normaliser = lambda x: round(1 - (x-min_score)/max_score, 3) for i, (doc, score) in enumerate(matches): norm_score = round(normaliser(round(score.item(), 3)), 3) metadata = doc.metadata # Extract metadata title = metadata['title'] author = metadata['authors'][0].title() date = metadata.get('date', 'None') link = metadata.get('link', 'None') submitter = metadata.get('submitter', 'None') journal = metadata['journal'].strip() if metadata['journal'] else 'None' # Update journal scores if journal != 'None': j_bucket[journal] = j_bucket.get(journal, 0) + norm_score # Build author table (limit 2 entries per author) if auth_counts.get(author, 0) < 2: author_table.append([i+1, norm_score, author, title, link, date]) auth_counts[author] = auth_counts.get(author, 0) + 1 # Build abstract table abstract_table.append([i+1, title, author, submitter, journal, date, link, norm_score]) # Build journal table del j_bucket['None'] journal_table = [[i+1, j, s] for i, (j, s) in enumerate( sorted(j_bucket.items(), key=lambda x: x[1], reverse=True) )] return [ gr.Dataframe.update(value=abstract_table, visible=True), gr.Dataframe.update(value=journal_table, visible=True), gr.Dataframe.update(value=author_table, visible=True) ] index_names = ["miread_large", "miread_contrastive", "scibert_contrastive"] model_names = [ "biodatlab/MIReAD-Neuro-Large", "biodatlab/MIReAD-Neuro-Contrastive", "biodatlab/SciBERT-Neuro-Contrastive", ] model_kwargs = {'device': 'cpu'} encode_kwargs = {'normalize_embeddings': False} faiss_embedders = [HuggingFaceEmbeddings( model_name=name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs) for name in model_names] vecdbs = [ FAISS.load_local(index_name, faiss_embedder) for index_name, faiss_embedder in zip(index_names, faiss_embedders) ] with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.Markdown("# NBDT Recommendation Engine for Editors") gr.Markdown("NBDT Recommendation Engine for Editors is a tool for neuroscience authors/abstracts/journalsrecommendation built for NBDT journal editors. \ It aims to help an editor to find similar reviewers, abstracts, and journals to a given submitted abstract.\ To find a recommendation, paste a `title[SEP]abstract` or `abstract` in the text box below and click on the appropriate \"Find Matches\" button.\ Then, you can hover to authors/abstracts/journals tab to find a suggested list.\ The data in our current demo includes authors associated with the NBDT Journal. We will update the data monthly for an up-to-date publications.") abst = gr.Textbox(label="Abstract", lines=10) action_btn1 = gr.Button(value="Find Matches with MIReAD-Neuro-Large") action_btn2 = gr.Button(value="Find Matches with MIReAD-Neuro-Contrastive") action_btn3 = gr.Button( value="Find Matches with SciBERT-Neuro-Contrastive") with gr.Tab("Authors"): n_output = gr.Dataframe( headers=['No.', 'Score', 'Name', 'Title', 'Link', 'Date'], datatype=['number', 'number', 'str', 'str', 'str', 'str'], col_count=(6, "fixed"), wrap=True, visible=False ) with gr.Tab("Abstracts"): a_output = gr.Dataframe( headers=['No.', 'Title', 'Author', 'Corresponding Author', 'Journal', 'Date', 'Link', 'Score'], datatype=['number', 'str', 'str', 'str', 'str', 'str', 'str', 'number'], col_count=(8, "fixed"), wrap=True, visible=False ) with gr.Tab("Journals"): j_output = gr.Dataframe( headers=['No.', 'Name', 'Score'], datatype=['number', 'str', 'number'], col_count=(3, "fixed"), wrap=True, visible=False ) action_btn1.click( fn=lambda x: inference(x, index_names[0]), inputs=[abst], outputs=[a_output, j_output, n_output], api_name="neurojane" ) action_btn2.click( fn=lambda x: inference(x, index_names[1]), inputs=[abst], outputs=[a_output, j_output, n_output], api_name="neurojane") action_btn3.click( fn=lambda x: inference(x, index_names[2]), inputs=[abst,], outputs=[a_output, j_output, n_output], api_name="neurojane") demo.launch(debug=True)