davidberenstein1957 HF staff commited on
Commit
fc223d6
·
verified ·
1 Parent(s): bc0a26f

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -0
app.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from huggingface_hub import get_token, InferenceClient
3
+ from sentence_transformers import CrossEncoder
4
+
5
+ inference_client = InferenceClient(api_key=get_token())
6
+ reranker = CrossEncoder("sentence-transformers/all-MiniLM-L12-v2")
7
+
8
+
9
+ def query_and_rerank_documents(query: str, k_retrieved: int = 10):
10
+ documents = similarity_search(query, k_retrieved)
11
+ documents = documents.drop_duplicates("chunk")
12
+ documents["rank"] = reranker.predict([[query, hit] for hit in documents["chunk"]])
13
+ reranked_documents = documents.sort_values(by="rank", ascending=False)
14
+ return reranked_documents
15
+
16
+
17
+ def generate_response_api(query: str):
18
+ messages = [
19
+ {
20
+ "role": "system",
21
+ "content": "You will receive a query and context. Only return the answer based on the context without mentioning the context.",
22
+ },
23
+ {"role": "user", "content": query},
24
+ ]
25
+ completion = inference_client.chat.completions.create(
26
+ model="HuggingFaceTB/SmolLM2-360M-Instruct", messages=messages, max_tokens=2000
27
+ )
28
+
29
+ return completion.choices[0].message
30
+
31
+
32
+ def rag_pipeline(query: str, k_retrieved: int = 10, k_reranked: int = 5):
33
+ documents = query_and_rerank_documents(query, k_retrieved=k_retrieved)
34
+ query_with_context = (
35
+ f"Context: {documents['chunk'].to_list()[:k_reranked]}\n\nQuery: {query}"
36
+ )
37
+ return generate_response_api(query_with_context), documents
38
+
39
+
40
+ with gr.Blocks() as demo:
41
+ gr.Markdown("""# RAG Hub Datasets
42
+
43
+ Part of [smol blueprint](https://github.com/davidberenstein1957/smol-blueprint) - a smol blueprint for AI development, focusing on practical examples of RAG, information extraction, analysis and fine-tuning in the age of LLMs.""")
44
+
45
+ with gr.Row():
46
+ query_input = gr.Textbox(
47
+ label="Query", placeholder="Enter your question here...", lines=3
48
+ )
49
+
50
+ with gr.Row():
51
+ with gr.Column():
52
+ retrieve_slider = gr.Slider(
53
+ minimum=1,
54
+ maximum=20,
55
+ value=10,
56
+ label="Number of documents to retrieve",
57
+ )
58
+ with gr.Column():
59
+ rerank_slider = gr.Slider(
60
+ minimum=1,
61
+ maximum=10,
62
+ value=5,
63
+ label="Number of documents to use after reranking",
64
+ )
65
+
66
+ submit_btn = gr.Button("Submit")
67
+ response_output = gr.Textbox(label="Response", lines=10)
68
+ documents_output = gr.Dataframe(
69
+ label="Documents", headers=["chunk", "url", "distance", "rank"], wrap=True
70
+ )
71
+
72
+ submit_btn.click(
73
+ fn=rag_interface,
74
+ inputs=[query_input, retrieve_slider, rerank_slider],
75
+ outputs=[response_output, documents_output],
76
+ )
77
+
78
+ demo.launch()