Spaces:
Running
Running
Carlo Moro
Implement app.py for reranking and deduplication, and add requirements for strive-ranker library
3d745f2
from strive.reranker import Reranker, EmbeddingType, deduplicate_results | |
import gradio as gr | |
def rerank_and_deduplicate(query, corpus_text): | |
corpus = [line.strip() for line in corpus_text.split("\n") if line.strip()] | |
textual_reranker = Reranker(embedding_type=EmbeddingType.textual) | |
semantic_reranker = Reranker(embedding_type=EmbeddingType.semantic) | |
textual_results = textual_reranker.rerank_documents(query, corpus, top_k=len(corpus)) | |
semantic_results = semantic_reranker.rerank_documents(query, corpus, top_k=len(corpus)) | |
merged_results = textual_results + semantic_results | |
deduplicated_results = deduplicate_results(merged_results, top_k=50) | |
return "\n".join([f"{text} (Score: {score:.4f})" for text, score in deduplicated_results]) | |
app = gr.Interface( | |
fn=rerank_and_deduplicate, | |
inputs=[ | |
gr.Textbox(label="Query", placeholder="Enter your query here"), | |
gr.Textbox(label="Corpus", placeholder="Enter one sentence per line", lines=10) | |
], | |
outputs=gr.Textbox(label="Top Ranked Results"), | |
title="STRIVE: Semantic Tokenized Ranking via Vectorization & Embeddings", | |
description="Enter a query and multiple sentences to test the reranking algorithm." | |
) | |
app.launch() | |