File size: 4,801 Bytes
72c7698 37b927a 2c4ffd2 72c7698 2c4ffd2 37b927a 2c4ffd2 72c7698 2c4ffd2 72c7698 2c4ffd2 72c7698 37b927a 2c4ffd2 72c7698 2c4ffd2 72c7698 2c4ffd2 72c7698 2c4ffd2 72c7698 2c4ffd2 72c7698 6bbb741 aff4f62 72c7698 6bbb741 72c7698 7e788c4 72c7698 6bbb741 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
import gradio as gr
import json
import hashlib
import sqlite3
import os
class JSONLViewer:
def __init__(self, data_file_path, db_path):
self.data_file_path = data_file_path
self.db_path = db_path
self.current_index = 0
self.data = []
self.load_data()
self.init_db()
def load_data(self):
with open(self.data_file_path, 'r', encoding='utf-8') as file:
self.data = [json.loads(line) for line in file]
def init_db(self):
conn = sqlite3.connect(self.db_path)
c = conn.cursor()
c.execute('''CREATE TABLE IF NOT EXISTS states
(record_id TEXT PRIMARY KEY, status TEXT)''')
conn.commit()
conn.close()
def get_current_record(self):
if 0 <= self.current_index < len(self.data):
return self.data[self.current_index]
return None
def get_record_id(self, record):
record_str = json.dumps(record, sort_keys=True)
return hashlib.md5(record_str.encode()).hexdigest()
def get_status(self, record_id):
conn = sqlite3.connect(self.db_path)
c = conn.cursor()
c.execute("SELECT status FROM states WHERE record_id = ?", (record_id,))
result = c.fetchone()
conn.close()
return result[0] if result else ""
def set_status(self, record_id, status):
conn = sqlite3.connect(self.db_path)
c = conn.cursor()
c.execute("INSERT OR REPLACE INTO states (record_id, status) VALUES (?, ?)",
(record_id, status))
conn.commit()
conn.close()
def move_prev(self):
if self.current_index > 0:
self.current_index -= 1
return self.get_current_record()
def move_next(self):
if self.current_index < len(self.data) - 1:
self.current_index += 1
return self.get_current_record()
viewer = JSONLViewer('plik.jsonl', 'states.db')
def update_ui(record):
if record:
instruction = record['conversations'][0]['value'] if record['conversations'] else ""
chosen = record['chosen']['value'] if 'chosen' in record else ""
rejected = record['rejected']['value'] if 'rejected' in record else ""
chosen_score = record['chosen_score'] if 'chosen_score' in record else ""
rejected_score = record['rejected_score'] if 'rejected_score' in record else ""
record_id = viewer.get_record_id(record)
status = viewer.get_status(record_id)
return instruction, chosen, rejected, chosen_score, rejected_score, status
return "", "", "", "", "", ""
def on_prev():
record = viewer.move_prev()
return update_ui(record)
def on_next():
record = viewer.move_next()
return update_ui(record)
def on_ok():
record = viewer.get_current_record()
if record:
record_id = viewer.get_record_id(record)
viewer.set_status(record_id, "ok")
return update_ui(record)
def on_rejected():
record = viewer.get_current_record()
if record:
record_id = viewer.get_record_id(record)
viewer.set_status(record_id, "rejected")
return update_ui(record)
with gr.Blocks(css="button.ok-button { background-color: #4CAF50 !important; }") as demo:
instruction = gr.Textbox(label="INSTRUCTION", lines=2, max_lines=2)
chosen = gr.Textbox(label="CHOSEN", lines=12, max_lines=12)
rejected = gr.Textbox(label="REJECTED", lines=12, max_lines=12)
chosen_score = gr.Number(label="CHOSEN SCORE")
rejected_score = gr.Number(label="REJECTED SCORE")
status = gr.Textbox(label="STATUS")
with gr.Row():
prev_btn = gr.Button("PREV")
next_btn = gr.Button("NEXT")
ok_btn = gr.Button("OK", elem_classes="ok-button")
rejected_btn = gr.Button("REJECTED", variant="stop")
prev_btn.click(on_prev, outputs=[instruction, chosen, rejected, chosen_score, rejected_score, status])
next_btn.click(on_next, outputs=[instruction, chosen, rejected, chosen_score, rejected_score, status])
ok_btn.click(on_ok, outputs=[instruction, chosen, rejected, chosen_score, rejected_score, status])
rejected_btn.click(on_rejected, outputs=[instruction, chosen, rejected, chosen_score, rejected_score, status])
# Initialize UI with first record
initial_record = viewer.get_current_record()
if initial_record:
init_instruction, init_chosen, init_rejected, init_chosen_score, init_rejected_score, init_status = update_ui(initial_record)
instruction.value = init_instruction
chosen.value = init_chosen
rejected.value = init_rejected
chosen_score.value = init_chosen_score
rejected_score.value = init_rejected_score
status.value = init_status
if __name__ == "__main__":
demo.launch() |