Spaces:

Presidentlin
/

Aidan-Bench

Running

App Files Files Community

Presidentlin commited on Aug 13

Commit

c77c9f7

•

1 Parent(s): 8bbf037

x

Browse files

Files changed (2) hide show

__pycache__/main.cpython-310.pyc +0 -0
main.py +15 -5

__pycache__/main.cpython-310.pyc CHANGED Viewed

Binary files a/__pycache__/main.cpython-310.pyc and b/__pycache__/main.cpython-310.pyc differ

main.py CHANGED Viewed

@@ -7,7 +7,6 @@ import threading
 import streamlit as st  # Import Streamlit
 import queue
 def generate_answer(question, previous_answers, model_name, open_router_key, openai_api_key):
     """Generates an answer to a question using the specified language model."""
     gen_prompt = create_gen_prompt(question, previous_answers)
@@ -38,6 +37,7 @@ def evaluate_answer(question, new_answer, open_router_key, openai_api_key):
 def process_question(question, model_name, open_router_key, openai_api_key, result_queue):
     start_time = time.time()
     previous_answers = []
     question_novelty = 0
@@ -52,12 +52,15 @@ def process_question(question, model_name, open_router_key, openai_api_key, resu
                 break
             if coherence_score <= 3:
                 break
             novelty_score = get_novelty_score(new_answer, previous_answers, openai_api_key)
             if novelty_score < 0.1:
                 break
             # Append results to the queue instead of using st.write
@@ -66,7 +69,15 @@ def process_question(question, model_name, open_router_key, openai_api_key, resu
                 "question": question,
                 "answer": new_answer,
                 "coherence_score": coherence_score,
-                "novelty_score": novelty_score
             })
             previous_answers.append(new_answer)
@@ -75,7 +86,6 @@ def process_question(question, model_name, open_router_key, openai_api_key, resu
     except Exception as e:
         result_queue.put({"type": "error", "message": str(e)})
     time_taken = time.time() - start_time
     result_queue.put({
         "type": "summary",
@@ -84,7 +94,6 @@ def process_question(question, model_name, open_router_key, openai_api_key, resu
         "time_taken": time_taken
     })
     return question_novelty, [
         {
             "question": question,
@@ -144,6 +153,7 @@ def benchmark_model_multithreaded(model_name, questions, open_router_key, openai
                     st.write(f"<span style='color:green'>Coherence Score: {result['coherence_score']}</span>",
                              unsafe_allow_html=True)
                     st.write(f"**Novelty Score:** {result['novelty_score']}")
                 elif result["type"] == "summary":
                     st.write(f"<span style='color:blue'>Total novelty score for question '{result['question']}': {result['total_novelty']}</span>",
                              unsafe_allow_html=True)

 import streamlit as st  # Import Streamlit
 import queue
 def generate_answer(question, previous_answers, model_name, open_router_key, openai_api_key):
     """Generates an answer to a question using the specified language model."""
     gen_prompt = create_gen_prompt(question, previous_answers)
 def process_question(question, model_name, open_router_key, openai_api_key, result_queue):
     start_time = time.time()
+    # st.write(f"<span style='color:red'>{question}</span>", unsafe_allow_html=True)
     previous_answers = []
     question_novelty = 0
                 break
             if coherence_score <= 3:
+                # st.write("<span style='color:yellow'>Output is incoherent. Moving to next question.</span>",
+                #          unsafe_allow_html=True)
                 break
             novelty_score = get_novelty_score(new_answer, previous_answers, openai_api_key)
             if novelty_score < 0.1:
+                # st.write("<span style='color:yellow'>Output is redundant. Moving to next question.</span>",
+                #          unsafe_allow_html=True)
                 break
             # Append results to the queue instead of using st.write
                 "question": question,
                 "answer": new_answer,
                 "coherence_score": coherence_score,
+                "novelty_score": novelty_score,
+                "results": [
+                    {
+                        "question": question,
+                        "answers": previous_answers.copy() + [new_answer],  # Include the new answer
+                        "coherence_score": coherence_score,
+                        "novelty_score": question_novelty + novelty_score  # Accumulate novelty score
+                    }
+                ]
             })
             previous_answers.append(new_answer)
     except Exception as e:
         result_queue.put({"type": "error", "message": str(e)})
     time_taken = time.time() - start_time
     result_queue.put({
         "type": "summary",
         "time_taken": time_taken
     })
     return question_novelty, [
         {
             "question": question,
                     st.write(f"<span style='color:green'>Coherence Score: {result['coherence_score']}</span>",
                              unsafe_allow_html=True)
                     st.write(f"**Novelty Score:** {result['novelty_score']}")
+                    results.extend(result["results"])  # Add results here
                 elif result["type"] == "summary":
                     st.write(f"<span style='color:blue'>Total novelty score for question '{result['question']}': {result['total_novelty']}</span>",
                              unsafe_allow_html=True)