Spaces:
Running
Running
Carlo Moro
commited on
Commit
·
3d745f2
1
Parent(s):
72bec54
Implement app.py for reranking and deduplication, and add requirements for strive-ranker library
Browse files- app.py +28 -0
- requirements.txt +1 -0
app.py
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from strive.reranker import Reranker, EmbeddingType, deduplicate_results
|
2 |
+
import gradio as gr
|
3 |
+
|
4 |
+
def rerank_and_deduplicate(query, corpus_text):
|
5 |
+
corpus = [line.strip() for line in corpus_text.split("\n") if line.strip()]
|
6 |
+
textual_reranker = Reranker(embedding_type=EmbeddingType.textual)
|
7 |
+
semantic_reranker = Reranker(embedding_type=EmbeddingType.semantic)
|
8 |
+
|
9 |
+
textual_results = textual_reranker.rerank_documents(query, corpus, top_k=len(corpus))
|
10 |
+
semantic_results = semantic_reranker.rerank_documents(query, corpus, top_k=len(corpus))
|
11 |
+
|
12 |
+
merged_results = textual_results + semantic_results
|
13 |
+
deduplicated_results = deduplicate_results(merged_results, top_k=50)
|
14 |
+
|
15 |
+
return "\n".join([f"{text} (Score: {score:.4f})" for text, score in deduplicated_results])
|
16 |
+
|
17 |
+
app = gr.Interface(
|
18 |
+
fn=rerank_and_deduplicate,
|
19 |
+
inputs=[
|
20 |
+
gr.Textbox(label="Query", placeholder="Enter your query here"),
|
21 |
+
gr.Textbox(label="Corpus", placeholder="Enter one sentence per line", lines=10)
|
22 |
+
],
|
23 |
+
outputs=gr.Textbox(label="Top Ranked Results"),
|
24 |
+
title="STRIVE: Semantic Tokenized Ranking via Vectorization & Embeddings",
|
25 |
+
description="Enter a query and multiple sentences to test the reranking algorithm."
|
26 |
+
)
|
27 |
+
|
28 |
+
app.launch()
|
requirements.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
strive-ranker
|