Carlo Moro commited on
Commit
3d745f2
·
1 Parent(s): 72bec54

Implement app.py for reranking and deduplication, and add requirements for strive-ranker library

Browse files
Files changed (2) hide show
  1. app.py +28 -0
  2. requirements.txt +1 -0
app.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from strive.reranker import Reranker, EmbeddingType, deduplicate_results
2
+ import gradio as gr
3
+
4
+ def rerank_and_deduplicate(query, corpus_text):
5
+ corpus = [line.strip() for line in corpus_text.split("\n") if line.strip()]
6
+ textual_reranker = Reranker(embedding_type=EmbeddingType.textual)
7
+ semantic_reranker = Reranker(embedding_type=EmbeddingType.semantic)
8
+
9
+ textual_results = textual_reranker.rerank_documents(query, corpus, top_k=len(corpus))
10
+ semantic_results = semantic_reranker.rerank_documents(query, corpus, top_k=len(corpus))
11
+
12
+ merged_results = textual_results + semantic_results
13
+ deduplicated_results = deduplicate_results(merged_results, top_k=50)
14
+
15
+ return "\n".join([f"{text} (Score: {score:.4f})" for text, score in deduplicated_results])
16
+
17
+ app = gr.Interface(
18
+ fn=rerank_and_deduplicate,
19
+ inputs=[
20
+ gr.Textbox(label="Query", placeholder="Enter your query here"),
21
+ gr.Textbox(label="Corpus", placeholder="Enter one sentence per line", lines=10)
22
+ ],
23
+ outputs=gr.Textbox(label="Top Ranked Results"),
24
+ title="STRIVE: Semantic Tokenized Ranking via Vectorization & Embeddings",
25
+ description="Enter a query and multiple sentences to test the reranking algorithm."
26
+ )
27
+
28
+ app.launch()
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ strive-ranker