File size: 4,801 Bytes
72c7698
 
37b927a
2c4ffd2
 
72c7698
 
2c4ffd2
37b927a
2c4ffd2
72c7698
 
 
2c4ffd2
72c7698
 
2c4ffd2
 
 
 
 
 
 
 
 
 
72c7698
 
 
 
 
 
37b927a
 
 
 
2c4ffd2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72c7698
 
 
 
 
 
 
 
 
 
 
2c4ffd2
72c7698
 
 
 
 
 
 
 
2c4ffd2
 
72c7698
 
 
 
 
 
 
 
 
 
 
 
 
2c4ffd2
 
 
72c7698
 
 
 
2c4ffd2
 
 
72c7698
 
6bbb741
aff4f62
 
 
72c7698
 
 
 
 
 
 
6bbb741
 
72c7698
 
 
 
7e788c4
72c7698
 
 
 
 
 
 
 
 
 
 
 
6bbb741
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import gradio as gr
import json
import hashlib
import sqlite3
import os

class JSONLViewer:
    def __init__(self, data_file_path, db_path):
        self.data_file_path = data_file_path
        self.db_path = db_path
        self.current_index = 0
        self.data = []
        self.load_data()
        self.init_db()

    def load_data(self):
        with open(self.data_file_path, 'r', encoding='utf-8') as file:
            self.data = [json.loads(line) for line in file]

    def init_db(self):
        conn = sqlite3.connect(self.db_path)
        c = conn.cursor()
        c.execute('''CREATE TABLE IF NOT EXISTS states
                     (record_id TEXT PRIMARY KEY, status TEXT)''')
        conn.commit()
        conn.close()

    def get_current_record(self):
        if 0 <= self.current_index < len(self.data):
            return self.data[self.current_index]
        return None

    def get_record_id(self, record):
        record_str = json.dumps(record, sort_keys=True)
        return hashlib.md5(record_str.encode()).hexdigest()

    def get_status(self, record_id):
        conn = sqlite3.connect(self.db_path)
        c = conn.cursor()
        c.execute("SELECT status FROM states WHERE record_id = ?", (record_id,))
        result = c.fetchone()
        conn.close()
        return result[0] if result else ""

    def set_status(self, record_id, status):
        conn = sqlite3.connect(self.db_path)
        c = conn.cursor()
        c.execute("INSERT OR REPLACE INTO states (record_id, status) VALUES (?, ?)",
                  (record_id, status))
        conn.commit()
        conn.close()

    def move_prev(self):
        if self.current_index > 0:
            self.current_index -= 1
        return self.get_current_record()

    def move_next(self):
        if self.current_index < len(self.data) - 1:
            self.current_index += 1
        return self.get_current_record()

viewer = JSONLViewer('plik.jsonl', 'states.db')

def update_ui(record):
    if record:
        instruction = record['conversations'][0]['value'] if record['conversations'] else ""
        chosen = record['chosen']['value'] if 'chosen' in record else ""
        rejected = record['rejected']['value'] if 'rejected' in record else ""
        chosen_score = record['chosen_score'] if 'chosen_score' in record else ""
        rejected_score = record['rejected_score'] if 'rejected_score' in record else ""
        record_id = viewer.get_record_id(record)
        status = viewer.get_status(record_id)
        return instruction, chosen, rejected, chosen_score, rejected_score, status
    return "", "", "", "", "", ""

def on_prev():
    record = viewer.move_prev()
    return update_ui(record)

def on_next():
    record = viewer.move_next()
    return update_ui(record)

def on_ok():
    record = viewer.get_current_record()
    if record:
        record_id = viewer.get_record_id(record)
        viewer.set_status(record_id, "ok")
    return update_ui(record)

def on_rejected():
    record = viewer.get_current_record()
    if record:
        record_id = viewer.get_record_id(record)
        viewer.set_status(record_id, "rejected")
    return update_ui(record)

with gr.Blocks(css="button.ok-button { background-color: #4CAF50 !important; }") as demo:
    instruction = gr.Textbox(label="INSTRUCTION", lines=2, max_lines=2)
    chosen = gr.Textbox(label="CHOSEN", lines=12, max_lines=12)
    rejected = gr.Textbox(label="REJECTED", lines=12, max_lines=12)
    chosen_score = gr.Number(label="CHOSEN SCORE")
    rejected_score = gr.Number(label="REJECTED SCORE")
    status = gr.Textbox(label="STATUS")

    with gr.Row():
        prev_btn = gr.Button("PREV")
        next_btn = gr.Button("NEXT")
        ok_btn = gr.Button("OK", elem_classes="ok-button")
        rejected_btn = gr.Button("REJECTED", variant="stop")

    prev_btn.click(on_prev, outputs=[instruction, chosen, rejected, chosen_score, rejected_score, status])
    next_btn.click(on_next, outputs=[instruction, chosen, rejected, chosen_score, rejected_score, status])
    ok_btn.click(on_ok, outputs=[instruction, chosen, rejected, chosen_score, rejected_score, status])
    rejected_btn.click(on_rejected, outputs=[instruction, chosen, rejected, chosen_score, rejected_score, status])

    # Initialize UI with first record
    initial_record = viewer.get_current_record()
    if initial_record:
        init_instruction, init_chosen, init_rejected, init_chosen_score, init_rejected_score, init_status = update_ui(initial_record)
        instruction.value = init_instruction
        chosen.value = init_chosen
        rejected.value = init_rejected
        chosen_score.value = init_chosen_score
        rejected_score.value = init_rejected_score
        status.value = init_status

if __name__ == "__main__":
    demo.launch()