Spaces:
Sleeping
Sleeping
Hiroaki Ogasawara
commited on
Commit
·
e642ef1
1
Parent(s):
fb1a07e
chore: separate load script
Browse files
app.py
CHANGED
@@ -9,6 +9,21 @@ from utils import evaluate, report
|
|
9 |
from transformers import AutoTokenizer
|
10 |
|
11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
def process_jsonl_file(jsonl_file_path: str, api_key: str):
|
13 |
try:
|
14 |
content = open(jsonl_file_path, "r", encoding="utf-8").readlines()
|
@@ -32,7 +47,7 @@ def process_jsonl_file(jsonl_file_path: str, api_key: str):
|
|
32 |
) as temp_file:
|
33 |
temp_file.write(html_content)
|
34 |
output_file = temp_file.name
|
35 |
-
|
36 |
output_csv = None
|
37 |
keys = json_data[0].keys()
|
38 |
with tempfile.NamedTemporaryFile(
|
@@ -53,8 +68,7 @@ def process_jsonl_file(jsonl_file_path: str, api_key: str):
|
|
53 |
return None, None, e
|
54 |
|
55 |
|
56 |
-
|
57 |
-
with gr.Blocks() as reporting:
|
58 |
jsonl_input = gr.File(label="JSONLファイルをアップロード")
|
59 |
api_key_input = gr.Textbox(
|
60 |
label="GeminiのAPIキー(スコアのセルフ評価を行う場合)", type="password"
|
@@ -72,6 +86,8 @@ with gr.Blocks() as reporting:
|
|
72 |
outputs=[output_file, output_csv, output_text],
|
73 |
)
|
74 |
|
|
|
|
|
75 |
llm_jp_3 = "llm-jp/llm-jp-3-1.8b"
|
76 |
gemma_2 = "google/gemma-2-2b"
|
77 |
|
@@ -102,20 +118,8 @@ def tokenize_text(text: str, tokenizer_name: str):
|
|
102 |
token_count = len(tokens)
|
103 |
return f"<p>{tokenized_text}</p><p>Token Count: {token_count}</p>"
|
104 |
|
105 |
-
# https://x.com/abidlabs/status/1721548226250371264/photo/1
|
106 |
-
ga_script = """
|
107 |
-
<!-- Google tag (gtag.js) -->
|
108 |
-
<script async src="https://www.googletagmanager.com/gtag/js?id=G-0SHLFV3PV0"></script>
|
109 |
-
<script>
|
110 |
-
window.dataLayer = window.dataLayer || [];
|
111 |
-
function gtag(){dataLayer.push(arguments);}
|
112 |
-
gtag('js', new Date());
|
113 |
-
|
114 |
-
gtag('config', 'G-0SHLFV3PV0');
|
115 |
-
</script>
|
116 |
-
"""
|
117 |
|
118 |
-
with gr.Blocks(
|
119 |
with gr.Row():
|
120 |
tokenizer_dropdown = gr.Dropdown(
|
121 |
label="Tokenizerを選択", choices=tokenizer_names, value=tokenizer_names[0]
|
|
|
9 |
from transformers import AutoTokenizer
|
10 |
|
11 |
|
12 |
+
# https://x.com/abidlabs/status/1721548226250371264/photo/1
|
13 |
+
# https://github.com/gradio-app/gradio/issues/5954
|
14 |
+
ga_script = """
|
15 |
+
<script async src="https://www.googletagmanager.com/gtag/js?id=G-0SHLFV3PV0"></script>
|
16 |
+
"""
|
17 |
+
ga_load = """
|
18 |
+
function() {
|
19 |
+
window.dataLayer = window.dataLayer || [];
|
20 |
+
function gtag(){dataLayer.push(arguments);}
|
21 |
+
gtag('js', new Date());
|
22 |
+
|
23 |
+
gtag('config', 'G-0SHLFV3PV0');
|
24 |
+
}
|
25 |
+
"""
|
26 |
+
|
27 |
def process_jsonl_file(jsonl_file_path: str, api_key: str):
|
28 |
try:
|
29 |
content = open(jsonl_file_path, "r", encoding="utf-8").readlines()
|
|
|
47 |
) as temp_file:
|
48 |
temp_file.write(html_content)
|
49 |
output_file = temp_file.name
|
50 |
+
|
51 |
output_csv = None
|
52 |
keys = json_data[0].keys()
|
53 |
with tempfile.NamedTemporaryFile(
|
|
|
68 |
return None, None, e
|
69 |
|
70 |
|
71 |
+
with gr.Blocks(head=ga_script) as reporting:
|
|
|
72 |
jsonl_input = gr.File(label="JSONLファイルをアップロード")
|
73 |
api_key_input = gr.Textbox(
|
74 |
label="GeminiのAPIキー(スコアのセルフ評価を行う場合)", type="password"
|
|
|
86 |
outputs=[output_file, output_csv, output_text],
|
87 |
)
|
88 |
|
89 |
+
reporting.load(None, js=ga_load)
|
90 |
+
|
91 |
llm_jp_3 = "llm-jp/llm-jp-3-1.8b"
|
92 |
gemma_2 = "google/gemma-2-2b"
|
93 |
|
|
|
118 |
token_count = len(tokens)
|
119 |
return f"<p>{tokenized_text}</p><p>Token Count: {token_count}</p>"
|
120 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
121 |
|
122 |
+
with gr.Blocks() as tokenization:
|
123 |
with gr.Row():
|
124 |
tokenizer_dropdown = gr.Dropdown(
|
125 |
label="Tokenizerを選択", choices=tokenizer_names, value=tokenizer_names[0]
|