capradeepgujaran
commited on
Commit
•
5e44f25
1
Parent(s):
7733a10
Update app.py
Browse files
app.py
CHANGED
@@ -5,8 +5,6 @@ import fitz # PyMuPDF for reading PDF files
|
|
5 |
import pytesseract
|
6 |
from PIL import Image
|
7 |
import docx # for reading .docx files
|
8 |
-
from ragchecker import RAGResults, RAGChecker
|
9 |
-
from ragchecker.metrics import all_metrics
|
10 |
from llama_index.core import VectorStoreIndex, Document
|
11 |
from llama_index.embeddings.openai import OpenAIEmbedding
|
12 |
from llama_index.llms.openai import OpenAI
|
@@ -24,7 +22,7 @@ load_dotenv()
|
|
24 |
|
25 |
# Initialize global variables
|
26 |
vector_index = None
|
27 |
-
query_log = [] # Store queries and results for
|
28 |
|
29 |
# Function to handle PDF and OCR for scanned PDFs
|
30 |
def load_pdf_manually(pdf_path):
|
@@ -102,7 +100,10 @@ def query_app(query, model_name, use_rag_checker, openai_api_key):
|
|
102 |
return "Please provide a valid OpenAI API Key.", None
|
103 |
|
104 |
# Initialize the LLM with the selected model and provided API key
|
105 |
-
|
|
|
|
|
|
|
106 |
|
107 |
# Create a query engine and query the indexed documents
|
108 |
response_synthesizer = get_response_synthesizer(llm=llm)
|
@@ -123,39 +124,8 @@ def query_app(query, model_name, use_rag_checker, openai_api_key):
|
|
123 |
"retrieved_context": [{"text": doc.text} for doc in response.source_nodes]
|
124 |
})
|
125 |
|
126 |
-
#
|
127 |
-
|
128 |
-
|
129 |
-
# Calculate BERTScore if RAGChecker is selected
|
130 |
-
if use_rag_checker:
|
131 |
-
try:
|
132 |
-
rag_results = RAGResults.from_dict({"results": query_log})
|
133 |
-
evaluator = RAGChecker(
|
134 |
-
extractor_name="openai/gpt-4o-mini",
|
135 |
-
checker_name="openai/gpt-4o-mini",
|
136 |
-
batch_size_extractor=32, # Removed any instances where max_workers might be passed
|
137 |
-
batch_size_checker=32
|
138 |
-
)
|
139 |
-
evaluator.evaluate(rag_results, all_metrics)
|
140 |
-
metrics = rag_results.metrics
|
141 |
-
|
142 |
-
# Calculate BERTScore as an additional metric
|
143 |
-
gt_answer = ["Placeholder ground truth answer"] # Replace with actual ground truth
|
144 |
-
candidate = [generated_response]
|
145 |
-
|
146 |
-
P, R, F1 = bert_score(candidate, gt_answer, lang="en", verbose=False)
|
147 |
-
metrics['bertscore'] = {
|
148 |
-
"precision": P.mean().item() * 100,
|
149 |
-
"recall": R.mean().item() * 100,
|
150 |
-
"f1": F1.mean().item() * 100
|
151 |
-
}
|
152 |
-
except Exception as e:
|
153 |
-
metrics['error'] = f"Error calculating metrics: {e}"
|
154 |
-
|
155 |
-
if use_rag_checker:
|
156 |
-
return generated_response, metrics
|
157 |
-
else:
|
158 |
-
return generated_response, None
|
159 |
|
160 |
# Define the Gradio interface
|
161 |
def main():
|
@@ -185,7 +155,7 @@ def main():
|
|
185 |
value="gpt-4o",
|
186 |
label="Select Model"
|
187 |
)
|
188 |
-
rag_checkbox = gr.Checkbox(label="Use RAG Checker", value=
|
189 |
query_button = gr.Button("Ask")
|
190 |
with gr.Column():
|
191 |
answer_output = gr.Textbox(label="Answer", interactive=False)
|
|
|
5 |
import pytesseract
|
6 |
from PIL import Image
|
7 |
import docx # for reading .docx files
|
|
|
|
|
8 |
from llama_index.core import VectorStoreIndex, Document
|
9 |
from llama_index.embeddings.openai import OpenAIEmbedding
|
10 |
from llama_index.llms.openai import OpenAI
|
|
|
22 |
|
23 |
# Initialize global variables
|
24 |
vector_index = None
|
25 |
+
query_log = [] # Store queries and results for logging purposes
|
26 |
|
27 |
# Function to handle PDF and OCR for scanned PDFs
|
28 |
def load_pdf_manually(pdf_path):
|
|
|
100 |
return "Please provide a valid OpenAI API Key.", None
|
101 |
|
102 |
# Initialize the LLM with the selected model and provided API key
|
103 |
+
try:
|
104 |
+
llm = OpenAI(model=model_name, api_key=openai_api_key)
|
105 |
+
except Exception as e:
|
106 |
+
return f"Error initializing the OpenAI model: {e}", None
|
107 |
|
108 |
# Create a query engine and query the indexed documents
|
109 |
response_synthesizer = get_response_synthesizer(llm=llm)
|
|
|
124 |
"retrieved_context": [{"text": doc.text} for doc in response.source_nodes]
|
125 |
})
|
126 |
|
127 |
+
# Skip RAG Checker process entirely since use_rag_checker is disabled
|
128 |
+
return generated_response, None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
129 |
|
130 |
# Define the Gradio interface
|
131 |
def main():
|
|
|
155 |
value="gpt-4o",
|
156 |
label="Select Model"
|
157 |
)
|
158 |
+
rag_checkbox = gr.Checkbox(label="Use RAG Checker", value=False) # Set RAG Checker default to False
|
159 |
query_button = gr.Button("Ask")
|
160 |
with gr.Column():
|
161 |
answer_output = gr.Textbox(label="Answer", interactive=False)
|