Adwitiya_quizbot_2.0

Running

App Files Files Community

NCTCMumbai commited on Sep 10, 2024

Commit

8febad9

verified ·

1 Parent(s): 8ba0205

Update app.py

Browse files

Files changed (1) hide show

app.py +313 -122

app.py CHANGED Viewed

@@ -389,21 +389,259 @@
 # QUIZBOT.queue()
 # QUIZBOT.launch(debug=True)
-##############??????????????????????????????
 import pandas as pd
 import json
 import gradio as gr
 from pathlib import Path
 from ragatouille import RAGPretrainedModel
 from gradio_client import Client
-from jinja2 import Environment, FileSystemLoader
 from tempfile import NamedTemporaryFile
 VECTOR_COLUMN_NAME = "vector"
 TEXT_COLUMN_NAME = "text"
-#proj_dir = Path(__file__).parent
-proj_dir = Path.cwd()
-# Setting up the logging
 import logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
@@ -411,15 +649,8 @@ logger = logging.getLogger(__name__)
 # Replace Mixtral client with Qwen Client
 client = Client("Qwen/Qwen1.5-110B-Chat-demo")
-# Set up the template environment with the templates directory
-env = Environment(loader=FileSystemLoader(proj_dir / 'templates'))
-# Load the templates directly from the environment
-template = env.get_template('template.j2')
-template_html = env.get_template('template_html.j2')
 def system_instructions(question_difficulty, topic, documents_str):
-    return f"""<s> [INST] You are a great teacher and your task is to create 10 questions with 4 choices with {question_difficulty} difficulty about the topic request "{topic}" only from the below given documents, {documents_str}. Then create answers. Index in JSON format, the questions as "Q#":"" to "Q#":"", the four choices as "Q#:C1":"" to "Q#:C4":"", and the answers as "A#":"Q#:C#" to "A#":"Q#:C#". example is 'A10':'Q10:C3' [/INST]"""
 # RA
 RAG_db = gr.State()
@@ -427,7 +658,7 @@ RAG_db = gr.State()
 def json_to_excel(output_json):
     # Initialize list for DataFrame
     data = []
-    gr.Warning('Generating Shareable file link..',duration=30)
     for i in range(1, 11):  # Assuming there are 10 questions
         question_key = f"Q{i}"
         answer_key = f"A{i}"
@@ -467,134 +698,93 @@ def json_to_excel(output_json):
         "Time in seconds",
         "Image Link"
     ])
-    # Create a temporary file and save the DataFrame to it
     temp_file = NamedTemporaryFile(delete=False, suffix=".xlsx")
     df.to_excel(temp_file.name, index=False)
-    # # Save to Excel file
-    # excel_path = proj_dir / "quiz_questions.xlsx"
-    # df.to_excel(str(excel_path), index=False)
     return temp_file.name
 with gr.Blocks(title="Quiz Maker", theme=gr.themes.Default(primary_hue="green", secondary_hue="green"), css="style.css") as QUIZBOT:
-    def load_model():
-        RAG = RAGPretrainedModel.from_pretrained("colbert-ir/colbertv2.0")
-        RAG_db.value = RAG.from_index('.ragatouille/colbert/indexes/cbseclass10index')
-        return 'Ready to Go!!'
-    with gr.Column(scale=4):
-        gr.HTML("""
-        <center>
-            <h1><span style="color: purple;">ADWITIYA</span> Customs Manual Quizbot</h1>
-            <h2>Generative AI-powered Capacity building for Training Officers</h2>
-            <i>⚠️ NACIN Faculties create quiz from any topic dynamically for classroom evaluation after their sessions ! ⚠️</i>
-        </center>
-        """)
-    with gr.Column(scale=2):
-        load_btn = gr.Button("Click to Load!🚀")
-        load_text = gr.Textbox()
-        load_btn.click(load_model, [], load_text)
     topic = gr.Textbox(label="Enter the Topic for Quiz", placeholder="Write any topic/details from Customs Manual")
     with gr.Row():
-        radio = gr.Radio(["easy", "average", "hard"], label="How difficult should the quiz be?")
     generate_quiz_btn = gr.Button("Generate Quiz!🚀")
     quiz_msg = gr.Textbox()
     question_radios = [gr.Radio(visible=False) for _ in range(10)]
-    #@gr.dependencies.GPU
-    @generate_quiz_btn.click(inputs=[radio, topic], outputs=[quiz_msg] + question_radios + [gr.File(label="Download Excel")], api_name="generate_quiz")
-    def generate_quiz(question_difficulty, topic):
         top_k_rank = 10
-        RAG_db_ = RAG_db.value
-        documents_full = RAG_db_.search(topic, k=top_k_rank)
-        gr.Warning('Generation of Quiz may take 1 to 2 minutes. Please wait.',duration=60)
-        question_radio_list = []
-        excel_file = None
-        count = 0
-        while count <= 3:
-            try:
-                documents = [item['content'] for item in documents_full]
-                document_summaries = [f"[DOCUMENT {i + 1}]: {summary}{count}" for i, summary in enumerate(documents)]
-                documents_str = '\n'.join(document_summaries)
-                formatted_prompt = system_instructions(question_difficulty, topic, documents_str)
-                print(formatted_prompt)
-                # Use Qwen Client for quiz generation
-                response = client.predict(
-                    query=formatted_prompt,
-                    history=[],
-                    system="You are a helpful assistant.",
-                    api_name="/model_chat"
-                )
-                print(response)
-                response1 = response[1][0][1]
-                # Find the first and last curly braces
-                start_index = response1.find('{')
-                end_index = response1.rfind('}')
-                # Extract only the JSON part
-                if start_index != -1 and end_index != -1:
-                    cleaned_response = response1[start_index:end_index + 1]
-                    # Try parsing the cleaned JSON
-                    try:
-                        output_json = json.loads(cleaned_response)
-                        print('Parsed JSON:', output_json)
-                        global quiz_data
-                        quiz_data = output_json
-                        # Generate the Excel file
-                        excel_file = json_to_excel(output_json)
-                        for question_num in range(1, 11):
-                            question_key = f"Q{question_num}"
-                            answer_key = f"A{question_num}"
-                            question = quiz_data.get(question_key)
-                            answer = quiz_data.get(quiz_data.get(answer_key))
-                            if not question or not answer:
-                                continue
-                            choice_keys = [f"{question_key}:C{i}" for i in range(1, 5)]
-                            choice_list = [quiz_data.get(choice_key, "Choice not found") for choice_key in choice_keys]
-                            radio = gr.Radio(choices=choice_list, label=question, visible=True, interactive=True)
-                            question_radio_list.append(radio)
-                        print('question_radio_list', question_radio_list)
-                        if len(question_radio_list) == 10:
-                            break
-                        else:
-                            print('10 questions not generated. Trying again!')
-                            count += 1
-                            continue
-                    except json.JSONDecodeError as e:
-                        print(f"Failed to decode JSON: {e}")
-                else:
-                    print("No valid JSON found in the response.")
-            except Exception as e:
-                count += 1
-                print(f"Exception occurred: {e}")
-                if count == 3:
-                    print('Retry exhausted')
-                    gr.Warning('Sorry. Please try with another topic!')
-                else:
-                    print(f"Trying again.. {count} time... please wait")
                     continue
-        return ['Quiz Generated!'] + question_radio_list + [excel_file]
     check_button = gr.Button("Check Score")
     score_textbox = gr.Markdown()
@@ -624,3 +814,4 @@ with gr.Blocks(title="Quiz Maker", theme=gr.themes.Default(primary_hue="green",
 QUIZBOT.queue()
 QUIZBOT.launch(debug=True)

 # QUIZBOT.queue()
 # QUIZBOT.launch(debug=True)
+# ##############??????????????????????????????
+# import pandas as pd
+# import json
+# import gradio as gr
+# from pathlib import Path
+# from ragatouille import RAGPretrainedModel
+# from gradio_client import Client
+# from jinja2 import Environment, FileSystemLoader
+# from tempfile import NamedTemporaryFile
+# VECTOR_COLUMN_NAME = "vector"
+# TEXT_COLUMN_NAME = "text"
+# #proj_dir = Path(__file__).parent
+# proj_dir = Path.cwd()
+# # Setting up the logging
+# import logging
+# logging.basicConfig(level=logging.INFO)
+# logger = logging.getLogger(__name__)
+# # Replace Mixtral client with Qwen Client
+# client = Client("Qwen/Qwen1.5-110B-Chat-demo")
+# # Set up the template environment with the templates directory
+# env = Environment(loader=FileSystemLoader(proj_dir / 'templates'))
+# # Load the templates directly from the environment
+# template = env.get_template('template.j2')
+# template_html = env.get_template('template_html.j2')
+# def system_instructions(question_difficulty, topic, documents_str):
+#     return f"""<s> [INST] You are a great teacher and your task is to create 10 questions with 4 choices with {question_difficulty} difficulty about the topic request "{topic}" only from the below given documents, {documents_str}. Then create answers. Index in JSON format, the questions as "Q#":"" to "Q#":"", the four choices as "Q#:C1":"" to "Q#:C4":"", and the answers as "A#":"Q#:C#" to "A#":"Q#:C#". example is 'A10':'Q10:C3' [/INST]"""
+# # RA
+# RAG_db = gr.State()
+# def json_to_excel(output_json):
+#     # Initialize list for DataFrame
+#     data = []
+#     gr.Warning('Generating Shareable file link..',duration=30)
+#     for i in range(1, 11):  # Assuming there are 10 questions
+#         question_key = f"Q{i}"
+#         answer_key = f"A{i}"
+#         question = output_json.get(question_key, '')
+#         correct_answer_key = output_json.get(answer_key, '')
+#         correct_answer = correct_answer_key.split(':')[-1] if correct_answer_key else ''
+#         # Extract options
+#         option_keys = [f"{question_key}:C{i}" for i in range(1, 6)]
+#         options = [output_json.get(key, '') for key in option_keys]
+#         # Add data row
+#         data.append([
+#             question,                     # Question Text
+#             "Multiple Choice",            # Question Type
+#             options[0],                   # Option 1
+#             options[1],                   # Option 2
+#             options[2] if len(options) > 2 else '',  # Option 3
+#             options[3] if len(options) > 3 else '',  # Option 4
+#             options[4] if len(options) > 4 else '',  # Option 5
+#             correct_answer,               # Correct Answer
+#             30,                           # Time in seconds
+#             ''                            # Image Link
+#         ])
+#     # Create DataFrame
+#     df = pd.DataFrame(data, columns=[
+#         "Question Text",
+#         "Question Type",
+#         "Option 1",
+#         "Option 2",
+#         "Option 3",
+#         "Option 4",
+#         "Option 5",
+#         "Correct Answer",
+#         "Time in seconds",
+#         "Image Link"
+#     ])
+#     # Create a temporary file and save the DataFrame to it
+#     temp_file = NamedTemporaryFile(delete=False, suffix=".xlsx")
+#     df.to_excel(temp_file.name, index=False)
+#     # # Save to Excel file
+#     # excel_path = proj_dir / "quiz_questions.xlsx"
+#     # df.to_excel(str(excel_path), index=False)
+#     return temp_file.name
+# with gr.Blocks(title="Quiz Maker", theme=gr.themes.Default(primary_hue="green", secondary_hue="green"), css="style.css") as QUIZBOT:
+#     def load_model():
+#         RAG = RAGPretrainedModel.from_pretrained("colbert-ir/colbertv2.0")
+#         RAG_db.value = RAG.from_index('.ragatouille/colbert/indexes/cbseclass10index')
+#         return 'Ready to Go!!'
+#     with gr.Column(scale=4):
+#         gr.HTML("""
+#         <center>
+#             <h1><span style="color: purple;">ADWITIYA</span> Customs Manual Quizbot</h1>
+#             <h2>Generative AI-powered Capacity building for Training Officers</h2>
+#             <i>⚠️ NACIN Faculties create quiz from any topic dynamically for classroom evaluation after their sessions ! ⚠️</i>
+#         </center>
+#         """)
+#     with gr.Column(scale=2):
+#         load_btn = gr.Button("Click to Load!🚀")
+#         load_text = gr.Textbox()
+#         load_btn.click(load_model, [], load_text)
+#     topic = gr.Textbox(label="Enter the Topic for Quiz", placeholder="Write any topic/details from Customs Manual")
+#     with gr.Row():
+#         radio = gr.Radio(["easy", "average", "hard"], label="How difficult should the quiz be?")
+#     generate_quiz_btn = gr.Button("Generate Quiz!🚀")
+#     quiz_msg = gr.Textbox()
+#     question_radios = [gr.Radio(visible=False) for _ in range(10)]
+#     #@gr.dependencies.GPU
+#     @generate_quiz_btn.click(inputs=[radio, topic], outputs=[quiz_msg] + question_radios + [gr.File(label="Download Excel")], api_name="generate_quiz")
+#     def generate_quiz(question_difficulty, topic):
+#         top_k_rank = 10
+#         RAG_db_ = RAG_db.value
+#         documents_full = RAG_db_.search(topic, k=top_k_rank)
+#         gr.Warning('Generation of Quiz may take 1 to 2 minutes. Please wait.',duration=60)
+#         question_radio_list = []
+#         excel_file = None
+#         count = 0
+#         while count <= 3:
+#             try:
+#                 documents = [item['content'] for item in documents_full]
+#                 document_summaries = [f"[DOCUMENT {i + 1}]: {summary}{count}" for i, summary in enumerate(documents)]
+#                 documents_str = '\n'.join(document_summaries)
+#                 formatted_prompt = system_instructions(question_difficulty, topic, documents_str)
+#                 print(formatted_prompt)
+#                 # Use Qwen Client for quiz generation
+#                 response = client.predict(
+#                     query=formatted_prompt,
+#                     history=[],
+#                     system="You are a helpful assistant.",
+#                     api_name="/model_chat"
+#                 )
+#                 print(response)
+#                 response1 = response[1][0][1]
+#                 # Find the first and last curly braces
+#                 start_index = response1.find('{')
+#                 end_index = response1.rfind('}')
+#                 # Extract only the JSON part
+#                 if start_index != -1 and end_index != -1:
+#                     cleaned_response = response1[start_index:end_index + 1]
+#                     # Try parsing the cleaned JSON
+#                     try:
+#                         output_json = json.loads(cleaned_response)
+#                         print('Parsed JSON:', output_json)
+#                         global quiz_data
+#                         quiz_data = output_json
+#                         # Generate the Excel file
+#                         excel_file = json_to_excel(output_json)
+#                         for question_num in range(1, 11):
+#                             question_key = f"Q{question_num}"
+#                             answer_key = f"A{question_num}"
+#                             question = quiz_data.get(question_key)
+#                             answer = quiz_data.get(quiz_data.get(answer_key))
+#                             if not question or not answer:
+#                                 continue
+#                             choice_keys = [f"{question_key}:C{i}" for i in range(1, 5)]
+#                             choice_list = [quiz_data.get(choice_key, "Choice not found") for choice_key in choice_keys]
+#                             radio = gr.Radio(choices=choice_list, label=question, visible=True, interactive=True)
+#                             question_radio_list.append(radio)
+#                         print('question_radio_list', question_radio_list)
+#                         if len(question_radio_list) == 10:
+#                             break
+#                         else:
+#                             print('10 questions not generated. Trying again!')
+#                             count += 1
+#                             continue
+#                     except json.JSONDecodeError as e:
+#                         print(f"Failed to decode JSON: {e}")
+#                 else:
+#                     print("No valid JSON found in the response.")
+#             except Exception as e:
+#                 count += 1
+#                 print(f"Exception occurred: {e}")
+#                 if count == 3:
+#                     print('Retry exhausted')
+#                     gr.Warning('Sorry. Please try with another topic!')
+#                 else:
+#                     print(f"Trying again.. {count} time... please wait")
+#                     continue
+#         return ['Quiz Generated!'] + question_radio_list + [excel_file]
+#     check_button = gr.Button("Check Score")
+#     score_textbox = gr.Markdown()
+#     @check_button.click(inputs=question_radios, outputs=score_textbox)
+#     def compare_answers(*user_answers):
+#         user_answer_list = list(user_answers)
+#         answers_list = []
+#         for question_num in range(1, 20):
+#             answer_key = f"A{question_num}"
+#             answer = quiz_data.get(quiz_data.get(answer_key))
+#             if not answer:
+#                 break
+#             answers_list.append(answer)
+#         score = sum(1 for item in user_answer_list if item in answers_list)
+#         if score > 7:
+#             message = f"### Excellent! You got {score} out of 10!"
+#         elif score > 5:
+#             message = f"### Good! You got {score} out of 10!"
+#         else:
+#             message = f"### You got {score} out of 10! Don't worry. You can prepare well and try better next time!"
+#         return message
+# QUIZBOT.queue()
+# QUIZBOT.launch(debug=True)
+#?????????????????????????????????
 import pandas as pd
 import json
 import gradio as gr
 from pathlib import Path
 from ragatouille import RAGPretrainedModel
 from gradio_client import Client
 from tempfile import NamedTemporaryFile
+from sentence_transformers import CrossEncoder
+import numpy as np
+from time import perf_counter
 VECTOR_COLUMN_NAME = "vector"
 TEXT_COLUMN_NAME = "text"
+proj_dir = Path.cwd()
+# Set up logging
 import logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 # Replace Mixtral client with Qwen Client
 client = Client("Qwen/Qwen1.5-110B-Chat-demo")
 def system_instructions(question_difficulty, topic, documents_str):
+    return f"""<s> [INST] You are a great teacher and your task is to create 10 questions with 4 choices with {question_difficulty} difficulty about the topic request "{topic}" only from the below given documents, {documents_str}. Then create answers. Index in JSON format, the questions as "Q#":"" to "Q#":"", the four choices as "Q#:C1":"" to "Q#:C4":"", and the answers as "A#":"Q#:C#" to "A#":"Q#:C#". Example: 'A10':'Q10:C3' [/INST]"""
 # RA
 RAG_db = gr.State()
 def json_to_excel(output_json):
     # Initialize list for DataFrame
     data = []
+    gr.Warning('Generating Shareable file link..', duration=30)
     for i in range(1, 11):  # Assuming there are 10 questions
         question_key = f"Q{i}"
         answer_key = f"A{i}"
         "Time in seconds",
         "Image Link"
     ])
     temp_file = NamedTemporaryFile(delete=False, suffix=".xlsx")
     df.to_excel(temp_file.name, index=False)
     return temp_file.name
 with gr.Blocks(title="Quiz Maker", theme=gr.themes.Default(primary_hue="green", secondary_hue="green"), css="style.css") as QUIZBOT:
     topic = gr.Textbox(label="Enter the Topic for Quiz", placeholder="Write any topic/details from Customs Manual")
     with gr.Row():
+        difficulty_radio = gr.Radio(["easy", "average", "hard"], label="How difficult should the quiz be?")
+        model_radio = gr.Radio(choices=['(FAST) MiniLM-L6v2', '(ACCURATE) BGE reranker', '(HIGH ACCURATE) ColBERT'],
+                               value='(ACCURATE) BGE reranker', label="Embeddings",
+                               info="First query to ColBERT may take a little time")
     generate_quiz_btn = gr.Button("Generate Quiz!🚀")
     quiz_msg = gr.Textbox()
     question_radios = [gr.Radio(visible=False) for _ in range(10)]
+    @generate_quiz_btn.click(inputs=[difficulty_radio, topic, model_radio], outputs=[quiz_msg] + question_radios + [gr.File(label="Download Excel")])
+    def generate_quiz(question_difficulty, topic, cross_encoder):
         top_k_rank = 10
+        documents = []
+        gr.Warning('Generating Quiz may take 1-2 minutes. Please wait.', duration=60)
+        if cross_encoder == '(HIGH ACCURATE) ColBERT':
+            gr.Warning('Retrieving using ColBERT.. First-time query will take a minute for model to load.. please wait')
+            RAG = RAGPretrainedModel.from_pretrained("colbert-ir/colbertv2.0")
+            RAG_db.value = RAG.from_index('.ragatouille/colbert/indexes/cbseclass10index')
+            documents_full = RAG_db.value.search(topic, k=top_k_rank)
+            documents = [item['content'] for item in documents_full]
+        else:
+            document_start = perf_counter()
+            query_vec = retriever.encode(topic)
+            doc1 = table.search(query_vec, vector_column_name=VECTOR_COLUMN_NAME).limit(top_k_rank)
+            documents = table.search(query_vec, vector_column_name=VECTOR_COLUMN_NAME).limit(top_k_rank).to_list()
+            documents = [doc[TEXT_COLUMN_NAME] for doc in documents]
+            query_doc_pair = [[topic, doc] for doc in documents]
+            if cross_encoder == '(FAST) MiniLM-L6v2':
+                cross_encoder1 = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')
+            elif cross_encoder == '(ACCURATE) BGE reranker':
+                cross_encoder1 = CrossEncoder('BAAI/bge-reranker-base')
+            cross_scores = cross_encoder1.predict(query_doc_pair)
+            sim_scores_argsort = list(reversed(np.argsort(cross_scores)))
+            documents = [documents[idx] for idx in sim_scores_argsort[:top_k_rank]]
+        formatted_prompt = system_instructions(question_difficulty, topic, '\n'.join(documents))
+        try:
+            response = client.predict(query=formatted_prompt, history=[], system="You are a helpful assistant.", api_name="/model_chat")
+            response1 = response[1][0][1]
+            # Extract JSON
+            start_index = response1.find('{')
+            end_index = response1.rfind('}')
+            cleaned_response = response1[start_index:end_index + 1] if start_index != -1 and end_index != -1 else ''
+            output_json = json.loads(cleaned_response)
+            # Generate the Excel file
+            excel_file = json_to_excel(output_json)
+            question_radio_list = []
+            for question_num in range(1, 11):
+                question_key = f"Q{question_num}"
+                answer_key = f"A{question_num}"
+                question = output_json.get(question_key)
+                answer = output_json.get(output_json.get(answer_key))
+                if not question or not answer:
                     continue
+                choice_keys = [f"{question_key}:C{i}" for i in range(1, 5)]
+                choice_list = [output_json.get(choice_key, "Choice not found") for choice_key in choice_keys]
+                radio = gr.Radio(choices=choice_list, label=question, visible=True, interactive=True)
+                question_radio_list.append(radio)
+            return ['Quiz Generated!'] + question_radio_list + [excel_file]
+        except json.JSONDecodeError as e:
+            print(f"Failed to decode JSON: {e}")
     check_button = gr.Button("Check Score")
     score_textbox = gr.Markdown()
 QUIZBOT.queue()
 QUIZBOT.launch(debug=True)