Spaces:
Runtime error
Runtime error
cleanup, fix elo timestamp ordering, add guanaco 13b
Browse files- app.py +118 -118
- calculate_elo.py +3 -2
app.py
CHANGED
@@ -123,6 +123,7 @@ AVAILABLE_MODELS = {
|
|
123 |
"lmsys-vicuna-13b": ("2nlb32ydkaz6yd", prompt_chat),
|
124 |
"supercot-13b": ("0be7865dwxpwqk", prompt_instruct, ["Instruction:"]),
|
125 |
"mpt-7b-instruct": ("jpqbvnyluj18b0", prompt_instruct),
|
|
|
126 |
}
|
127 |
|
128 |
_memoized_models = defaultdict()
|
@@ -275,15 +276,15 @@ with gr.Blocks() as arena:
|
|
275 |
- [π΅ Consider Donating on our Patreon](http://patreon.com/OpenAccessAICollective)
|
276 |
- Join us on [Discord](https://discord.gg/PugNNHAF5r)
|
277 |
""")
|
278 |
-
with gr.Tab("Chatbot"):
|
279 |
with gr.Row():
|
280 |
with gr.Column():
|
281 |
-
|
282 |
with gr.Column():
|
283 |
-
|
284 |
with gr.Row():
|
285 |
-
choose1 = gr.Button(value="π Prefer left", variant="secondary", visible=False).style(full_width=True)
|
286 |
-
choose2 = gr.Button(value="π Prefer right", variant="secondary", visible=False).style(full_width=True)
|
287 |
choose3 = gr.Button(value="π€ Tie", variant="secondary", visible=False).style(full_width=True)
|
288 |
choose4 = gr.Button(value="π Both are bad", variant="secondary", visible=False).style(full_width=True)
|
289 |
with gr.Row():
|
@@ -293,133 +294,132 @@ with gr.Blocks() as arena:
|
|
293 |
dismiss_reveal = gr.Button(value="Dismiss & Continue", variant="secondary", visible=False).style(full_width=True)
|
294 |
with gr.Row():
|
295 |
with gr.Column():
|
296 |
-
|
297 |
label="What do you want to ask?",
|
298 |
placeholder="Ask me anything.",
|
299 |
lines=3,
|
300 |
)
|
301 |
with gr.Column():
|
302 |
-
|
303 |
"", label="Persona Tags", interactive=True, visible=True, placeholder="Tell us about how you are judging the quality. ex: #CoT #SFW #NSFW #helpful #ethical #creativity", lines=2)
|
304 |
-
|
305 |
start_message, label="System Message", interactive=True, visible=True, placeholder="system prompt", lines=8)
|
306 |
|
307 |
-
|
308 |
"", label="Assistant Nudge", interactive=True, visible=True, placeholder="the first words of the assistant response to nudge them in the right direction.", lines=2)
|
309 |
with gr.Row():
|
310 |
-
|
311 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
312 |
with gr.Tab("Leaderboard"):
|
313 |
with gr.Column():
|
314 |
leaderboard_markdown = gr.Markdown(f"""{leaderboard_intro}
|
315 |
{dataset_to_markdown()}
|
316 |
""")
|
317 |
-
|
318 |
-
|
319 |
-
|
320 |
-
refresh.click(fn=refresh_md, inputs=[], outputs=[leaderboard_markdown])
|
321 |
-
|
322 |
-
clear.click(lambda: None, None, chatbot1, queue=False)
|
323 |
-
clear.click(lambda: None, None, chatbot2, queue=False)
|
324 |
-
clear.click(lambda: None, None, message, queue=False)
|
325 |
-
clear.click(lambda: None, None, nudge_msg, queue=False)
|
326 |
-
|
327 |
-
submit_click_event = submit.click(
|
328 |
-
lambda *args: (
|
329 |
-
gr.update(visible=False, interactive=False),
|
330 |
-
gr.update(visible=False),
|
331 |
-
gr.update(visible=False),
|
332 |
-
),
|
333 |
-
inputs=[], outputs=[message, clear, submit], queue=True
|
334 |
-
).then(
|
335 |
-
fn=user, inputs=[message, nudge_msg, chatbot1, chatbot2], outputs=[message, nudge_msg, chatbot1, chatbot2], queue=True
|
336 |
-
).then(
|
337 |
-
fn=chat, inputs=[chatbot1, chatbot2, system_msg], outputs=[chatbot1, chatbot2, message, reveal1, reveal2, state], queue=True
|
338 |
-
).then(
|
339 |
-
lambda *args: (
|
340 |
-
gr.update(visible=False, interactive=False),
|
341 |
-
gr.update(visible=True),
|
342 |
-
gr.update(visible=True),
|
343 |
-
gr.update(visible=True),
|
344 |
-
gr.update(visible=True),
|
345 |
-
gr.update(visible=False),
|
346 |
-
gr.update(visible=False),
|
347 |
-
),
|
348 |
-
inputs=[message, nudge_msg, system_msg], outputs=[message, choose1, choose2, choose3, choose4, clear, submit], queue=True
|
349 |
-
)
|
350 |
-
|
351 |
-
choose1_click_event = choose1.click(
|
352 |
-
fn=chosen_one_first, inputs=[chatbot1, chatbot2, system_msg, nudge_msg, rlhf_persona, state], outputs=[], queue=True
|
353 |
-
).then(
|
354 |
-
lambda *args: (
|
355 |
-
gr.update(visible=False),
|
356 |
-
gr.update(visible=False),
|
357 |
-
gr.update(visible=False),
|
358 |
-
gr.update(visible=False),
|
359 |
-
gr.update(visible=True),
|
360 |
-
gr.update(visible=True),
|
361 |
-
gr.update(visible=True),
|
362 |
-
),
|
363 |
-
inputs=[], outputs=[choose1, choose2, choose3, choose4, dismiss_reveal, reveal1, reveal2], queue=True
|
364 |
-
)
|
365 |
-
|
366 |
-
choose2_click_event = choose2.click(
|
367 |
-
fn=chosen_one_second, inputs=[chatbot1, chatbot2, system_msg, nudge_msg, rlhf_persona, state], outputs=[], queue=True
|
368 |
-
).then(
|
369 |
-
lambda *args: (
|
370 |
-
gr.update(visible=False),
|
371 |
-
gr.update(visible=False),
|
372 |
-
gr.update(visible=False),
|
373 |
-
gr.update(visible=False),
|
374 |
-
gr.update(visible=True),
|
375 |
-
gr.update(visible=True),
|
376 |
-
gr.update(visible=True),
|
377 |
-
),
|
378 |
-
inputs=[], outputs=[choose1, choose2, choose3, choose4, dismiss_reveal, reveal1, reveal2], queue=True
|
379 |
-
)
|
380 |
-
|
381 |
-
choose3_click_event = choose3.click(
|
382 |
-
fn=chosen_one_tie, inputs=[chatbot1, chatbot2, system_msg, nudge_msg, rlhf_persona, state], outputs=[], queue=True
|
383 |
-
).then(
|
384 |
-
lambda *args: (
|
385 |
-
gr.update(visible=False),
|
386 |
-
gr.update(visible=False),
|
387 |
-
gr.update(visible=False),
|
388 |
-
gr.update(visible=False),
|
389 |
-
gr.update(visible=True),
|
390 |
-
gr.update(visible=True),
|
391 |
-
gr.update(visible=True),
|
392 |
-
),
|
393 |
-
inputs=[], outputs=[choose1, choose2, choose3, choose4, dismiss_reveal, reveal1, reveal2], queue=True
|
394 |
-
)
|
395 |
-
|
396 |
-
choose4_click_event = choose4.click(
|
397 |
-
fn=chosen_one_suck, inputs=[chatbot1, chatbot2, system_msg, nudge_msg, rlhf_persona, state], outputs=[], queue=True
|
398 |
-
).then(
|
399 |
-
lambda *args: (
|
400 |
-
gr.update(visible=False),
|
401 |
-
gr.update(visible=False),
|
402 |
-
gr.update(visible=False),
|
403 |
-
gr.update(visible=False),
|
404 |
-
gr.update(visible=True),
|
405 |
-
gr.update(visible=True),
|
406 |
-
gr.update(visible=True),
|
407 |
-
),
|
408 |
-
inputs=[], outputs=[choose1, choose2, choose3, choose4, dismiss_reveal, reveal1, reveal2], queue=True
|
409 |
-
)
|
410 |
-
|
411 |
-
dismiss_click_event = dismiss_reveal.click(
|
412 |
-
lambda *args: (
|
413 |
-
gr.update(visible=True, interactive=True),
|
414 |
-
gr.update(visible=False),
|
415 |
-
gr.update(visible=True),
|
416 |
-
gr.update(visible=True),
|
417 |
-
gr.update(visible=False),
|
418 |
-
gr.update(visible=False),
|
419 |
-
None,
|
420 |
-
None,
|
421 |
-
),
|
422 |
-
inputs=[], outputs=[message, dismiss_reveal, clear, submit, reveal1, reveal2, chatbot1, chatbot2], queue=True
|
423 |
-
)
|
424 |
|
425 |
arena.queue(concurrency_count=5, max_size=16).launch(debug=True, server_name="0.0.0.0", server_port=7860)
|
|
|
123 |
"lmsys-vicuna-13b": ("2nlb32ydkaz6yd", prompt_chat),
|
124 |
"supercot-13b": ("0be7865dwxpwqk", prompt_instruct, ["Instruction:"]),
|
125 |
"mpt-7b-instruct": ("jpqbvnyluj18b0", prompt_instruct),
|
126 |
+
"guanaco-13b": ("yxl8w98z017mw2", prompt_instruct),
|
127 |
}
|
128 |
|
129 |
_memoized_models = defaultdict()
|
|
|
276 |
- [π΅ Consider Donating on our Patreon](http://patreon.com/OpenAccessAICollective)
|
277 |
- Join us on [Discord](https://discord.gg/PugNNHAF5r)
|
278 |
""")
|
279 |
+
with gr.Tab("Chatbot Arena"):
|
280 |
with gr.Row():
|
281 |
with gr.Column():
|
282 |
+
arena_chatbot1 = gr.Chatbot(label="Chatbot A")
|
283 |
with gr.Column():
|
284 |
+
arena_chatbot2 = gr.Chatbot(label="Chatbot B")
|
285 |
with gr.Row():
|
286 |
+
choose1 = gr.Button(value="π Prefer left (A)", variant="secondary", visible=False).style(full_width=True)
|
287 |
+
choose2 = gr.Button(value="π Prefer right (B)", variant="secondary", visible=False).style(full_width=True)
|
288 |
choose3 = gr.Button(value="π€ Tie", variant="secondary", visible=False).style(full_width=True)
|
289 |
choose4 = gr.Button(value="π Both are bad", variant="secondary", visible=False).style(full_width=True)
|
290 |
with gr.Row():
|
|
|
294 |
dismiss_reveal = gr.Button(value="Dismiss & Continue", variant="secondary", visible=False).style(full_width=True)
|
295 |
with gr.Row():
|
296 |
with gr.Column():
|
297 |
+
arena_message = gr.Textbox(
|
298 |
label="What do you want to ask?",
|
299 |
placeholder="Ask me anything.",
|
300 |
lines=3,
|
301 |
)
|
302 |
with gr.Column():
|
303 |
+
arena_rlhf_persona = gr.Textbox(
|
304 |
"", label="Persona Tags", interactive=True, visible=True, placeholder="Tell us about how you are judging the quality. ex: #CoT #SFW #NSFW #helpful #ethical #creativity", lines=2)
|
305 |
+
arena_system_msg = gr.Textbox(
|
306 |
start_message, label="System Message", interactive=True, visible=True, placeholder="system prompt", lines=8)
|
307 |
|
308 |
+
arena_nudge_msg = gr.Textbox(
|
309 |
"", label="Assistant Nudge", interactive=True, visible=True, placeholder="the first words of the assistant response to nudge them in the right direction.", lines=2)
|
310 |
with gr.Row():
|
311 |
+
arena_submit = gr.Button(value="Send message", variant="secondary").style(full_width=True)
|
312 |
+
arena_clear = gr.Button(value="New topic", variant="secondary").style(full_width=False)
|
313 |
+
state = gr.State({})
|
314 |
+
|
315 |
+
arena_clear.click(lambda: None, None, arena_chatbot1, queue=False)
|
316 |
+
arena_clear.click(lambda: None, None, arena_chatbot2, queue=False)
|
317 |
+
arena_clear.click(lambda: None, None, arena_message, queue=False)
|
318 |
+
arena_clear.click(lambda: None, None, arena_nudge_msg, queue=False)
|
319 |
+
|
320 |
+
submit_click_event = arena_submit.click(
|
321 |
+
lambda *args: (
|
322 |
+
gr.update(visible=False, interactive=False),
|
323 |
+
gr.update(visible=False),
|
324 |
+
gr.update(visible=False),
|
325 |
+
),
|
326 |
+
inputs=[], outputs=[arena_message, arena_clear, arena_submit], queue=True
|
327 |
+
).then(
|
328 |
+
fn=user, inputs=[arena_message, arena_nudge_msg, arena_chatbot1, arena_chatbot2], outputs=[arena_message, arena_nudge_msg, arena_chatbot1, arena_chatbot2], queue=True
|
329 |
+
).then(
|
330 |
+
fn=chat, inputs=[arena_chatbot1, arena_chatbot2, arena_system_msg], outputs=[arena_chatbot1, arena_chatbot2, arena_message, reveal1, reveal2, state], queue=True
|
331 |
+
).then(
|
332 |
+
lambda *args: (
|
333 |
+
gr.update(visible=False, interactive=False),
|
334 |
+
gr.update(visible=True),
|
335 |
+
gr.update(visible=True),
|
336 |
+
gr.update(visible=True),
|
337 |
+
gr.update(visible=True),
|
338 |
+
gr.update(visible=False),
|
339 |
+
gr.update(visible=False),
|
340 |
+
),
|
341 |
+
inputs=[arena_message, arena_nudge_msg, arena_system_msg], outputs=[arena_message, choose1, choose2, choose3, choose4, arena_clear, arena_submit], queue=True
|
342 |
+
)
|
343 |
+
|
344 |
+
choose1_click_event = choose1.click(
|
345 |
+
fn=chosen_one_first, inputs=[arena_chatbot1, arena_chatbot2, arena_system_msg, arena_nudge_msg, arena_rlhf_persona, state], outputs=[], queue=True
|
346 |
+
).then(
|
347 |
+
lambda *args: (
|
348 |
+
gr.update(visible=False),
|
349 |
+
gr.update(visible=False),
|
350 |
+
gr.update(visible=False),
|
351 |
+
gr.update(visible=False),
|
352 |
+
gr.update(visible=True),
|
353 |
+
gr.update(visible=True),
|
354 |
+
gr.update(visible=True),
|
355 |
+
),
|
356 |
+
inputs=[], outputs=[choose1, choose2, choose3, choose4, dismiss_reveal, reveal1, reveal2], queue=True
|
357 |
+
)
|
358 |
+
|
359 |
+
choose2_click_event = choose2.click(
|
360 |
+
fn=chosen_one_second, inputs=[arena_chatbot1, arena_chatbot2, arena_system_msg, arena_nudge_msg, arena_rlhf_persona, state], outputs=[], queue=True
|
361 |
+
).then(
|
362 |
+
lambda *args: (
|
363 |
+
gr.update(visible=False),
|
364 |
+
gr.update(visible=False),
|
365 |
+
gr.update(visible=False),
|
366 |
+
gr.update(visible=False),
|
367 |
+
gr.update(visible=True),
|
368 |
+
gr.update(visible=True),
|
369 |
+
gr.update(visible=True),
|
370 |
+
),
|
371 |
+
inputs=[], outputs=[choose1, choose2, choose3, choose4, dismiss_reveal, reveal1, reveal2], queue=True
|
372 |
+
)
|
373 |
+
|
374 |
+
choose3_click_event = choose3.click(
|
375 |
+
fn=chosen_one_tie, inputs=[arena_chatbot1, arena_chatbot2, arena_system_msg, arena_nudge_msg, arena_rlhf_persona, state], outputs=[], queue=True
|
376 |
+
).then(
|
377 |
+
lambda *args: (
|
378 |
+
gr.update(visible=False),
|
379 |
+
gr.update(visible=False),
|
380 |
+
gr.update(visible=False),
|
381 |
+
gr.update(visible=False),
|
382 |
+
gr.update(visible=True),
|
383 |
+
gr.update(visible=True),
|
384 |
+
gr.update(visible=True),
|
385 |
+
),
|
386 |
+
inputs=[], outputs=[choose1, choose2, choose3, choose4, dismiss_reveal, reveal1, reveal2], queue=True
|
387 |
+
)
|
388 |
+
|
389 |
+
choose4_click_event = choose4.click(
|
390 |
+
fn=chosen_one_suck, inputs=[arena_chatbot1, arena_chatbot2, arena_system_msg, arena_nudge_msg, arena_rlhf_persona, state], outputs=[], queue=True
|
391 |
+
).then(
|
392 |
+
lambda *args: (
|
393 |
+
gr.update(visible=False),
|
394 |
+
gr.update(visible=False),
|
395 |
+
gr.update(visible=False),
|
396 |
+
gr.update(visible=False),
|
397 |
+
gr.update(visible=True),
|
398 |
+
gr.update(visible=True),
|
399 |
+
gr.update(visible=True),
|
400 |
+
),
|
401 |
+
inputs=[], outputs=[choose1, choose2, choose3, choose4, dismiss_reveal, reveal1, reveal2], queue=True
|
402 |
+
)
|
403 |
+
|
404 |
+
dismiss_click_event = dismiss_reveal.click(
|
405 |
+
lambda *args: (
|
406 |
+
gr.update(visible=True, interactive=True),
|
407 |
+
gr.update(visible=False),
|
408 |
+
gr.update(visible=True),
|
409 |
+
gr.update(visible=True),
|
410 |
+
gr.update(visible=False),
|
411 |
+
gr.update(visible=False),
|
412 |
+
None,
|
413 |
+
None,
|
414 |
+
),
|
415 |
+
inputs=[], outputs=[arena_message, dismiss_reveal, arena_clear, arena_submit, reveal1, reveal2, arena_chatbot1, arena_chatbot2], queue=True
|
416 |
+
)
|
417 |
with gr.Tab("Leaderboard"):
|
418 |
with gr.Column():
|
419 |
leaderboard_markdown = gr.Markdown(f"""{leaderboard_intro}
|
420 |
{dataset_to_markdown()}
|
421 |
""")
|
422 |
+
leaderboad_refresh = gr.Button(value="Refresh Leaderboard", variant="secondary").style(full_width=True)
|
423 |
+
leaderboad_refresh.click(fn=refresh_md, inputs=[], outputs=[leaderboard_markdown])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
424 |
|
425 |
arena.queue(concurrency_count=5, max_size=16).launch(debug=True, server_name="0.0.0.0", server_port=7860)
|
calculate_elo.py
CHANGED
@@ -2,6 +2,7 @@ import logging
|
|
2 |
import os
|
3 |
from datetime import datetime
|
4 |
from decimal import Decimal
|
|
|
5 |
|
6 |
import boto3
|
7 |
from boto3.dynamodb.conditions import Attr, Key
|
@@ -261,8 +262,8 @@ def _backfill_logs():
|
|
261 |
|
262 |
def main():
|
263 |
last_processed_timestamp = get_last_processed_timestamp()
|
264 |
-
battles = get_unprocessed_battles(last_processed_timestamp)
|
265 |
-
|
266 |
elo_scores = {}
|
267 |
|
268 |
for battle in battles:
|
|
|
2 |
import os
|
3 |
from datetime import datetime
|
4 |
from decimal import Decimal
|
5 |
+
from typing import List
|
6 |
|
7 |
import boto3
|
8 |
from boto3.dynamodb.conditions import Attr, Key
|
|
|
262 |
|
263 |
def main():
|
264 |
last_processed_timestamp = get_last_processed_timestamp()
|
265 |
+
battles: List[dict] = get_unprocessed_battles(last_processed_timestamp)
|
266 |
+
battles = sorted(battles, key=lambda x: x['timestamp'])
|
267 |
elo_scores = {}
|
268 |
|
269 |
for battle in battles:
|