Spaces:
Running
Running
File size: 6,381 Bytes
4eaa76b 7e12b4f 4eaa76b 786bb0f 4527045 3c3463c a858ac0 7e12b4f 80cdbfa 786bb0f a066122 80cdbfa 7e12b4f a066122 7e12b4f a066122 2e1ea9c a066122 c83c491 39d4915 a066122 a858ac0 4527045 7e12b4f 4527045 7e12b4f 4527045 7e12b4f a066122 c7edb9a a066122 5673631 a066122 5673631 a066122 2e1ea9c a066122 2e1ea9c a066122 7e12b4f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 |
import os
import gradio as gr
from openai import OpenAI
from optillm.cot_reflection import cot_reflection
from optillm.rto import round_trip_optimization
from optillm.z3_solver import Z3SolverSystem
from optillm.self_consistency import advanced_self_consistency_approach
from optillm.rstar import RStar
from optillm.plansearch import plansearch
from optillm.leap import leap
API_KEY = os.environ.get("OPENROUTER_API_KEY")
def compare_responses(message, model1, approach1, model2, approach2, system_message, max_tokens, temperature, top_p):
response1 = respond(message, [], model1, approach1, system_message, max_tokens, temperature, top_p)
response2 = respond(message, [], model2, approach2, system_message, max_tokens, temperature, top_p)
return response1, response2
def respond(message, history, model, approach, system_message, max_tokens, temperature, top_p):
client = OpenAI(api_key=API_KEY, base_url="https://openrouter.ai/api/v1")
messages = [{"role": "system", "content": system_message}]
for val in history:
if val[0]: messages.append({"role": "user", "content": val[0]})
if val[1]: messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
if approach == "none":
response = client.chat.completions.create(
extra_headers={
"HTTP-Referer": "https://github.com/codelion/optillm",
"X-Title": "optillm"
},
model=model,
messages=messages,
max_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
)
return response.choices[0].message.content
else:
system_prompt = system_message
initial_query = message
if approach == 'rto':
final_response = round_trip_optimization(system_prompt, initial_query, client, model)
elif approach == 'z3':
z3_solver = Z3SolverSystem(system_prompt, client, model)
final_response = z3_solver.process_query(initial_query)
elif approach == "self_consistency":
final_response = advanced_self_consistency_approach(system_prompt, initial_query, client, model)
elif approach == "rstar":
rstar = RStar(system_prompt, client, model)
final_response = rstar.solve(initial_query)
elif approach == "cot_reflection":
final_response = cot_reflection(system_prompt, initial_query, client, model)
elif approach == 'plansearch':
final_response = plansearch(system_prompt, initial_query, client, model)
elif approach == 'leap':
final_response = leap(system_prompt, initial_query, client, model)
return final_response
# for message in client.chat_completion(
# messages,
# max_tokens=max_tokens,
# stream=True,
# temperature=temperature,
# top_p=top_p,
# ):
# token = message.choices[0].delta.content
# response += token
# yield response
def create_model_dropdown():
return gr.Dropdown(
["nousresearch/hermes-3-llama-3.1-405b:free", "meta-llama/llama-3.1-8b-instruct:free",
"qwen/qwen-2-7b-instruct:free", "google/gemma-2-9b-it:free", "mistralai/mistral-7b-instruct:free",
"mistralai/pixtral-12b:free", "qwen/qwen-2-vl-7b-instruct:free", "google/gemini-flash-8b-1.5-exp",
"google/gemini-flash-1.5-exp", "google/gemini-pro-1.5-exp"],
value="nousresearch/hermes-3-llama-3.1-405b:free", label="Model"
)
def create_approach_dropdown():
return gr.Dropdown(
["none", "leap", "plansearch", "rstar", "cot_reflection", "rto", "self_consistency", "z3"],
value="none", label="Approach"
)
with gr.Blocks() as demo:
gr.Markdown("# optillm - LLM Optimization Comparison")
with gr.Row():
system_message = gr.Textbox(value="", label="System message")
max_tokens = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens")
temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")
with gr.Tabs():
with gr.TabItem("Chat"):
model = create_model_dropdown()
approach = create_approach_dropdown()
chatbot = gr.Chatbot()
msg = gr.Textbox()
with gr.Row():
submit = gr.Button("Submit")
clear = gr.Button("Clear")
def user(user_message, history):
return "", history + [[user_message, None]]
def bot(history, model, approach, system_message, max_tokens, temperature, top_p):
user_message = history[-1][0]
bot_message = respond(user_message, history[:-1], model, approach, system_message, max_tokens, temperature, top_p)
history[-1][1] = bot_message
return history
msg.submit(user, [msg, chatbot], [msg, chatbot]).then(
bot, [chatbot, model, approach, system_message, max_tokens, temperature, top_p], chatbot
)
submit.click(user, [msg, chatbot], [msg, chatbot]).then(
bot, [chatbot, model, approach, system_message, max_tokens, temperature, top_p], chatbot
)
clear.click(lambda: None, None, chatbot, queue=False)
with gr.TabItem("Compare"):
with gr.Row():
model1 = create_model_dropdown()
approach1 = create_approach_dropdown()
model2 = create_model_dropdown()
approach2 = create_approach_dropdown()
compare_input = gr.Textbox(label="Enter your message for comparison")
compare_button = gr.Button("Compare")
with gr.Row():
output1 = gr.Textbox(label="Response 1")
output2 = gr.Textbox(label="Response 2")
compare_button.click(
compare_responses,
inputs=[compare_input, model1, approach1, model2, approach2, system_message, max_tokens, temperature, top_p],
outputs=[output1, output2]
)
if __name__ == "__main__":
demo.launch() |