Presidentlin commited on
Commit
187c8cf
1 Parent(s): fe9a872
__pycache__/main.cpython-310.pyc CHANGED
Binary files a/__pycache__/main.cpython-310.pyc and b/__pycache__/main.cpython-310.pyc differ
 
__pycache__/models.cpython-310.pyc CHANGED
Binary files a/__pycache__/models.cpython-310.pyc and b/__pycache__/models.cpython-310.pyc differ
 
app.py CHANGED
@@ -9,7 +9,6 @@ st.set_page_config(page_title="Aidan Bench - Generator")
9
 
10
  st.title("Aidan Bench - Generator")
11
 
12
-
13
  # API Key Inputs with Security and User Experience Enhancements
14
  st.warning("Please keep your API keys secure and confidential. This app does not store or log your API keys.")
15
 
@@ -94,9 +93,13 @@ if st.session_state.open_router_key and st.session_state.openai_api_key:
94
  st.session_state.user_questions = []
95
 
96
  # Threshold Sliders
97
- st.subheader("Threshold Sliders")
98
- coherence_threshold = st.slider("Coherence Threshold (0-5):", 0, 5, 3)
99
- novelty_threshold = st.slider("Novelty Threshold (0-1):", 0.0, 1.0, 0.1)
 
 
 
 
100
 
101
  # Workflow Selection
102
  workflow = st.radio("Select Workflow:", ["Use Predefined Questions", "Use User-Defined Questions"])
@@ -159,9 +162,9 @@ if st.session_state.open_router_key and st.session_state.openai_api_key:
159
 
160
  # Benchmarking logic using the chosen execution mode
161
  if execution_mode == "Sequential":
162
- question_results = benchmark_model_sequential(model_name, selected_questions, st.session_state.open_router_key, st.session_state.openai_api_key,judge_model_name,coherence_threshold,novelty_threshold)
163
  else: # Multithreaded
164
- question_results = benchmark_model_multithreaded(model_name, selected_questions, st.session_state.open_router_key, st.session_state.openai_api_key, max_threads, judge_model_name, coherence_threshold,novelty_threshold)
165
 
166
  results.extend(question_results)
167
 
 
9
 
10
  st.title("Aidan Bench - Generator")
11
 
 
12
  # API Key Inputs with Security and User Experience Enhancements
13
  st.warning("Please keep your API keys secure and confidential. This app does not store or log your API keys.")
14
 
 
93
  st.session_state.user_questions = []
94
 
95
  # Threshold Sliders
96
+ st.sidebar.subheader("Threshold Sliders")
97
+ coherence_threshold = st.sidebar.slider("Coherence Threshold (0-5):", 0, 5, 3)
98
+ novelty_threshold = st.sidebar.slider("Novelty Threshold (0-1):", 0.0, 1.0, 0.1)
99
+
100
+ st.sidebar.subheader("Temp Sliders")
101
+ temp_threshold = st.sidebar.slider("Temperature (0-2):", 0.0, 2.0, 1.0)
102
+ top_p = st.sidebar.slider("Top P (0-1):", 0.0, 1.0, 1.0)
103
 
104
  # Workflow Selection
105
  workflow = st.radio("Select Workflow:", ["Use Predefined Questions", "Use User-Defined Questions"])
 
162
 
163
  # Benchmarking logic using the chosen execution mode
164
  if execution_mode == "Sequential":
165
+ question_results = benchmark_model_sequential(model_name, selected_questions, st.session_state.open_router_key, st.session_state.openai_api_key,judge_model_name,coherence_threshold,novelty_threshold,temp_threshold,top_p)
166
  else: # Multithreaded
167
+ question_results = benchmark_model_multithreaded(model_name, selected_questions, st.session_state.open_router_key, st.session_state.openai_api_key, max_threads, judge_model_name, coherence_threshold,novelty_threshold,temp_threshold,top_p)
168
 
169
  results.extend(question_results)
170
 
main.py CHANGED
@@ -7,25 +7,25 @@ import threading
7
  import streamlit as st # Import Streamlit
8
  import queue
9
 
10
- def generate_answer(question, previous_answers, model_name, open_router_key, openai_api_key):
11
  """Generates an answer to a question using the specified language model."""
12
  gen_prompt = create_gen_prompt(question, previous_answers)
13
  try:
14
  new_answer = chat_with_model(prompt=gen_prompt, model=model_name, open_router_key=open_router_key,
15
- openai_api_key=openai_api_key)
16
  return new_answer
17
  except Exception as e:
18
  st.error(f"Error generating answer: {str(e)}") # Use st.error
19
  return None
20
 
21
 
22
- def evaluate_answer(question, new_answer, open_router_key, openai_api_key, judge_model_name):
23
  """Evaluates the coherence and novelty of an answer."""
24
  judge_prompt = create_judge_prompt(question, new_answer)
25
  judge = judge_model_name # Use the judge_model_name passed to the function
26
  try:
27
  judge_response = chat_with_model(prompt=judge_prompt, model=judge, open_router_key=open_router_key,
28
- openai_api_key=openai_api_key)
29
  coherence_score = int(judge_response.split("<coherence_score>")[1].split("</coherence_score>")[0])
30
  return coherence_score
31
  except Exception as e:
@@ -33,18 +33,18 @@ def evaluate_answer(question, new_answer, open_router_key, openai_api_key, judge
33
  return None
34
 
35
 
36
- def process_question(question, model_name, open_router_key, openai_api_key, result_queue, judge_model_name,coherence_threshold,novelty_threshold):
37
  start_time = time.time()
38
  previous_answers = []
39
  question_novelty = 0
40
 
41
  try:
42
  while True:
43
- new_answer = generate_answer(question, previous_answers, model_name, open_router_key, openai_api_key)
44
  if new_answer is None:
45
  break
46
 
47
- coherence_score = evaluate_answer(question, new_answer, open_router_key, openai_api_key, judge_model_name)
48
  if coherence_score is None:
49
  break
50
 
@@ -126,7 +126,7 @@ def get_novelty_score(new_answer: str, previous_answers: list, openai_api_key):
126
  return novelty
127
 
128
 
129
- def benchmark_model_multithreaded(model_name, questions, open_router_key, openai_api_key, max_threads=None, judge_model_name=None,coherence_threshold=None,novelty_threshold=None):
130
  novelty_score = 0
131
  results = []
132
  result_queue = queue.Queue() # Create a queue for communication
@@ -140,7 +140,7 @@ def benchmark_model_multithreaded(model_name, questions, open_router_key, openai
140
  with ThreadPoolExecutor(max_workers=max_workers) as executor:
141
  # Submit tasks to the thread pool
142
  future_to_question = {
143
- executor.submit(process_question, question, model_name, open_router_key, openai_api_key, result_queue, judge_model_name,coherence_threshold,novelty_threshold): question
144
  for question in questions
145
  }
146
 
@@ -185,12 +185,12 @@ def benchmark_model_multithreaded(model_name, questions, open_router_key, openai
185
  return results
186
 
187
 
188
- def benchmark_model_sequential(model_name, questions, open_router_key, openai_api_key, judge_model_name,coherence_threshold,novelty_threshold):
189
  novelty_score = 0
190
  results = []
191
 
192
  for i, question in enumerate(questions):
193
- for result in process_question(question, model_name, open_router_key, openai_api_key, None, judge_model_name,coherence_threshold,novelty_threshold):
194
  if result["type"] == "answer":
195
  st.write(f"**Question:** {result['question']}")
196
  st.write(f"**New Answer:**\n{result['answer']}")
 
7
  import streamlit as st # Import Streamlit
8
  import queue
9
 
10
+ def generate_answer(question, previous_answers, model_name, open_router_key, openai_api_key,temperature,top_p):
11
  """Generates an answer to a question using the specified language model."""
12
  gen_prompt = create_gen_prompt(question, previous_answers)
13
  try:
14
  new_answer = chat_with_model(prompt=gen_prompt, model=model_name, open_router_key=open_router_key,
15
+ openai_api_key=openai_api_key,temperature=temperature,top_p=top_p)
16
  return new_answer
17
  except Exception as e:
18
  st.error(f"Error generating answer: {str(e)}") # Use st.error
19
  return None
20
 
21
 
22
+ def evaluate_answer(question, new_answer, open_router_key, openai_api_key, judge_model_name,temperature,top_p):
23
  """Evaluates the coherence and novelty of an answer."""
24
  judge_prompt = create_judge_prompt(question, new_answer)
25
  judge = judge_model_name # Use the judge_model_name passed to the function
26
  try:
27
  judge_response = chat_with_model(prompt=judge_prompt, model=judge, open_router_key=open_router_key,
28
+ openai_api_key=openai_api_key,temperature=temperature,top_p=top_p)
29
  coherence_score = int(judge_response.split("<coherence_score>")[1].split("</coherence_score>")[0])
30
  return coherence_score
31
  except Exception as e:
 
33
  return None
34
 
35
 
36
+ def process_question(question, model_name, open_router_key, openai_api_key, result_queue, judge_model_name,coherence_threshold,novelty_threshold,temperature,top_p):
37
  start_time = time.time()
38
  previous_answers = []
39
  question_novelty = 0
40
 
41
  try:
42
  while True:
43
+ new_answer = generate_answer(question, previous_answers, model_name, open_router_key, openai_api_key, temperature,top_p)
44
  if new_answer is None:
45
  break
46
 
47
+ coherence_score = evaluate_answer(question, new_answer, open_router_key, openai_api_key, judge_model_name,temperature,top_p)
48
  if coherence_score is None:
49
  break
50
 
 
126
  return novelty
127
 
128
 
129
+ def benchmark_model_multithreaded(model_name, questions, open_router_key, openai_api_key, max_threads=None, judge_model_name=None,coherence_threshold=None,novelty_threshold=None,temperature=0,top_p=0):
130
  novelty_score = 0
131
  results = []
132
  result_queue = queue.Queue() # Create a queue for communication
 
140
  with ThreadPoolExecutor(max_workers=max_workers) as executor:
141
  # Submit tasks to the thread pool
142
  future_to_question = {
143
+ executor.submit(process_question, question, model_name, open_router_key, openai_api_key, result_queue, judge_model_name,coherence_threshold,novelty_threshold,temperature,top_p): question
144
  for question in questions
145
  }
146
 
 
185
  return results
186
 
187
 
188
+ def benchmark_model_sequential(model_name, questions, open_router_key, openai_api_key, judge_model_name,coherence_threshold,novelty_threshold,temperature,top_p):
189
  novelty_score = 0
190
  results = []
191
 
192
  for i, question in enumerate(questions):
193
+ for result in process_question(question, model_name, open_router_key, openai_api_key, None, judge_model_name,coherence_threshold,novelty_threshold,temperature,top_p):
194
  if result["type"] == "answer":
195
  st.write(f"**Question:** {result['question']}")
196
  st.write(f"**New Answer:**\n{result['answer']}")
models.py CHANGED
@@ -5,7 +5,7 @@ from retry import retry
5
 
6
 
7
  @retry(tries=3)
8
- def chat_with_model(prompt, model, open_router_key=None, openai_api_key=None, max_tokens=4000, temperature=0):
9
  if open_router_key:
10
  client = OpenAI(
11
  api_key=open_router_key,
@@ -25,7 +25,8 @@ def chat_with_model(prompt, model, open_router_key=None, openai_api_key=None, ma
25
  }
26
  ],
27
  max_tokens=max_tokens,
28
- temperature=temperature
 
29
  )
30
  return response.choices[0].message.content
31
 
 
5
 
6
 
7
  @retry(tries=3)
8
+ def chat_with_model(prompt, model, open_router_key=None, openai_api_key=None, max_tokens=4000, temperature=0,top_p=0):
9
  if open_router_key:
10
  client = OpenAI(
11
  api_key=open_router_key,
 
25
  }
26
  ],
27
  max_tokens=max_tokens,
28
+ temperature=temperature,
29
+ top_p=top_p
30
  )
31
  return response.choices[0].message.content
32