Liu Yiwen commited on
Commit
4570f48
·
0 Parent(s):

lotsa viewer v0.1.0

Browse files
.vscode/settings.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "python.testing.unittestArgs": [
3
+ "-v",
4
+ "-s",
5
+ ".",
6
+ "-p",
7
+ "*_test.py"
8
+ ],
9
+ "python.testing.pytestEnabled": false,
10
+ "python.testing.unittestEnabled": true
11
+ }
README.md ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Datasets Explorer
3
+ emoji: 📖
4
+ colorFrom: pink
5
+ colorTo: purple
6
+ sdk: gradio
7
+ sdk_version: 4.36.0
8
+ app_file: app.py
9
+ pinned: false
10
+ ---
11
+
12
+ # 📖 Dataset Explorer
13
+
14
+ Access any slice of data of any dataset on the [Hugging Face Dataset Hub](https://huggingface.co/datasets)
15
+
16
+ Run:
17
+
18
+ ```python
19
+ gradio app.py
20
+ ```
__pycache__/app.cpython-310.pyc ADDED
Binary file (12.4 kB). View file
 
__pycache__/data.cpython-310.pyc ADDED
Binary file (1.18 kB). View file
 
app.py ADDED
@@ -0,0 +1,429 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import copy
3
+ from datetime import datetime, timedelta
4
+ from io import BytesIO
5
+ import random
6
+ import gradio as gr
7
+ from functools import lru_cache
8
+ from hffs.fs import HfFileSystem
9
+ from typing import List, Tuple, Callable
10
+ from matplotlib import pyplot as plt
11
+ import pandas as pd
12
+ import numpy as np
13
+ import pyarrow as pa
14
+ import pyarrow.parquet as pq
15
+ from functools import partial
16
+ from tqdm.contrib.concurrent import thread_map
17
+ from datasets import Features, Image, Audio, Sequence
18
+ from fastapi import FastAPI, Response
19
+ import uvicorn
20
+ import os
21
+ from gradio_datetimerange import DateTimeRange
22
+
23
+ class AppError(RuntimeError):
24
+ pass
25
+
26
+
27
+ APP_URL = "http://127.0.0.1:7860" if os.getenv("DEV") else "https://lhoestq-datasets-explorer.hf.space"
28
+ PAGE_SIZE = 5
29
+ MAX_CACHED_BLOBS = PAGE_SIZE * 10
30
+ TIME_PLOTS_NUM = 5
31
+ _blobs_cache = {}
32
+
33
+ #####################################################
34
+ # Utils
35
+ #####################################################
36
+ def ndarray_to_base64(ndarray):
37
+ """
38
+ 将一维np.ndarray绘图并转换为Base64编码。
39
+ """
40
+ # 创建绘图
41
+ plt.figure(figsize=(8, 4))
42
+ plt.plot(ndarray)
43
+ plt.title("Vector Plot")
44
+ plt.xlabel("Index")
45
+ plt.ylabel("Value")
46
+ plt.tight_layout()
47
+
48
+ # 保存图像到内存字节流
49
+ buffer = BytesIO()
50
+ plt.savefig(buffer, format="png")
51
+ plt.close()
52
+ buffer.seek(0)
53
+
54
+ # 转换为Base64字符串
55
+ base64_str = base64.b64encode(buffer.getvalue()).decode('utf-8')
56
+ return f"data:image/png;base64,{base64_str}"
57
+
58
+ def flatten_ndarray_column(df, column_name):
59
+ def flatten_ndarray(ndarray):
60
+ if isinstance(ndarray, np.ndarray) and ndarray.dtype == 'O':
61
+ return np.concatenate([flatten_ndarray(subarray) for subarray in ndarray])
62
+ elif isinstance(ndarray, np.ndarray) and ndarray.ndim == 1:
63
+ return np.expand_dims(ndarray, axis=0)
64
+ return ndarray
65
+
66
+ flattened_data = df[column_name].apply(flatten_ndarray)
67
+ max_length = max(flattened_data.apply(len))
68
+
69
+ for i in range(max_length):
70
+ df[f'{column_name}_{i}'] = flattened_data.apply(lambda x: x[i] if i < len(x) else np.nan)
71
+
72
+ return df
73
+ #####################################################
74
+ # Define routes for image and audio files
75
+ #####################################################
76
+
77
+ app = FastAPI()
78
+
79
+
80
+ @app.get(
81
+ "/image",
82
+ responses={200: {"content": {"image/png": {}}}},
83
+ response_class=Response,
84
+ )
85
+ def image(id: str):
86
+ blob = get_blob(id)
87
+ return Response(content=blob, media_type="image/png")
88
+
89
+
90
+ @app.get(
91
+ "/audio",
92
+ responses={200: {"content": {"audio/wav": {}}}},
93
+ response_class=Response,
94
+ )
95
+ def audio(id: str):
96
+ blob = get_blob(id)
97
+ return Response(content=blob, media_type="audio/wav")
98
+
99
+
100
+ def push_blob(blob: bytes, blob_id: str) -> str:
101
+ global _blobs_cache
102
+ if blob_id in _blobs_cache:
103
+ del _blobs_cache[blob_id]
104
+ _blobs_cache[blob_id] = blob
105
+ if len(_blobs_cache) > MAX_CACHED_BLOBS:
106
+ del _blobs_cache[next(iter(_blobs_cache))]
107
+ return blob_id
108
+
109
+
110
+ def get_blob(blob_id: str) -> bytes:
111
+ global _blobs_cache
112
+ return _blobs_cache[blob_id]
113
+
114
+
115
+ def blobs_to_urls(blobs: List[bytes], type: str, prefix: str) -> List[str]:
116
+ image_blob_ids = [push_blob(blob, f"{prefix}-{i}") for i, blob in enumerate(blobs)]
117
+ return [APP_URL + f"/{type}?id={blob_id}" for blob_id in image_blob_ids]
118
+
119
+
120
+ #####################################################
121
+ # List configs, splits and parquet files
122
+ #####################################################
123
+
124
+
125
+ @lru_cache(maxsize=128)
126
+ def get_parquet_fs(dataset: str) -> HfFileSystem:
127
+ try:
128
+ fs = HfFileSystem(dataset, repo_type="dataset", revision="refs/convert/parquet")
129
+ if any(fs.isfile(path) for path in fs.ls("") if not path.startswith(".")):
130
+ raise AppError(f"Parquet export doesn't exist for '{dataset}'.")
131
+ return fs
132
+ except:
133
+ raise AppError(f"Parquet export doesn't exist for '{dataset}'.")
134
+
135
+
136
+
137
+ @lru_cache(maxsize=128)
138
+ def get_parquet_configs(dataset: str) -> List[str]:
139
+ fs = get_parquet_fs(dataset)
140
+ return [path for path in fs.ls("") if fs.isdir(path)]
141
+
142
+
143
+ def _sorted_split_key(split: str) -> str:
144
+ return split if not split.startswith("train") else chr(0) + split # always "train" first
145
+
146
+
147
+ @lru_cache(maxsize=128)
148
+ def get_parquet_splits(dataset: str, config: str) -> List[str]:
149
+ fs = get_parquet_fs(dataset)
150
+ return [path.split("/")[1] for path in fs.ls(config) if fs.isdir(path)]
151
+
152
+
153
+ #####################################################
154
+ # Index and query Parquet data
155
+ #####################################################
156
+
157
+
158
+ RowGroupReaders = List[Callable[[], pa.Table]]
159
+
160
+
161
+ @lru_cache(maxsize=128)
162
+ def index(dataset: str, config: str, split: str) -> Tuple[np.ndarray, RowGroupReaders, int, Features]:
163
+ fs = get_parquet_fs(dataset)
164
+ sources = fs.glob(f"{config}/{split}/*.parquet")
165
+ if not sources:
166
+ if config not in get_parquet_configs(dataset):
167
+ raise AppError(f"Invalid config {config}. Available configs are: {', '.join(get_parquet_configs(dataset))}.")
168
+ else:
169
+ raise AppError(f"Invalid split {split}. Available splits are: {', '.join(get_parquet_splits(dataset, config))}.")
170
+ desc = f"{dataset}/{config}/{split}"
171
+ all_pf: List[pq.ParquetFile] = thread_map(partial(pq.ParquetFile, filesystem=fs), sources, desc=desc, unit="pq")
172
+ features = Features.from_arrow_schema(all_pf[0].schema.to_arrow_schema())
173
+ rg_offsets = np.cumsum([pf.metadata.row_group(i).num_rows for pf in all_pf for i in range(pf.metadata.num_row_groups)])
174
+ rg_readers = [partial(pf.read_row_group, i) for pf in all_pf for i in range(pf.metadata.num_row_groups)]
175
+ max_page = 1 + (rg_offsets[-1] - 1) // PAGE_SIZE
176
+ return rg_offsets, rg_readers, max_page, features
177
+
178
+
179
+ def query(page: int, page_size: int, rg_offsets: np.ndarray, rg_readers: RowGroupReaders) -> pd.DataFrame:
180
+ start_row, end_row = (page - 1) * page_size, min(page * page_size, rg_offsets[-1] - 1) # both included
181
+ # rg_offsets[start_rg - 1] <= start_row < rg_offsets[start_rg]
182
+ # rg_offsets[end_rg - 1] <= end_row < rg_offsets[end_rg]
183
+ start_rg, end_rg = np.searchsorted(rg_offsets, [start_row, end_row], side="right") # both included
184
+ pa_table = pa.concat_tables([rg_readers[i]() for i in range(start_rg, end_rg + 1)])
185
+ offset = start_row - (rg_offsets[start_rg - 1] if start_rg > 0 else 0)
186
+ pa_table = pa_table.slice(offset, page_size)
187
+ return pa_table.to_pandas()
188
+
189
+
190
+ def sanitize_inputs(dataset: str, config: str, split: str, page: str) -> Tuple[str, str, str, int]:
191
+ try:
192
+ page = int(page)
193
+ assert page > 0
194
+ except:
195
+ raise AppError(f"Bad page: {page}")
196
+ if not dataset:
197
+ raise AppError("Empty dataset name")
198
+ if not config:
199
+ raise AppError(f"Empty config. Available configs are: {', '.join(get_parquet_configs(dataset))}.")
200
+ if not split:
201
+ raise AppError(f"Empty split. Available splits are: {', '.join(get_parquet_splits(dataset, config))}.")
202
+ return dataset, config, split, int(page)
203
+
204
+
205
+ @lru_cache(maxsize=128)
206
+ def get_page_df(dataset: str, config: str, split: str, page: str) -> Tuple[pd.DataFrame, int, Features]:
207
+ dataset, config, split, page = sanitize_inputs(dataset, config, split, page)
208
+ rg_offsets, rg_readers, max_page, features = index(dataset, config, split)
209
+ if page > max_page:
210
+ raise AppError(f"Page {page} does not exist")
211
+ df = query(page, PAGE_SIZE, rg_offsets=rg_offsets, rg_readers=rg_readers)
212
+ return df, max_page, features
213
+
214
+
215
+ #####################################################
216
+ # Format results
217
+ #####################################################
218
+
219
+ def get_page(dataset: str, config: str, split: str, page: str) -> Tuple[str, int, str]:
220
+ df_, max_page, features = get_page_df(dataset, config, split, page)
221
+ df = copy.deepcopy(df_)
222
+ unsupported_columns = []
223
+ if dataset != 'Salesforce/lotsa_data':
224
+ for column, feature in features.items():
225
+ if isinstance(feature, Image):
226
+ blob_type = "image" # TODO: support audio - right now it seems that the markdown renderer in gradio doesn't support audio and shows nothing
227
+ blob_urls = blobs_to_urls([item.get("bytes") if isinstance(item, dict) else None for item in df[column]], blob_type, prefix=f"{dataset}-{config}-{split}-{page}-{column}")
228
+ df = df.drop([column], axis=1)
229
+ df[column] = [f"![]({url})" for url in blob_urls]
230
+ elif any(bad_type in str(feature) for bad_type in ["Image(", "Audio(", "'binary'"]):
231
+ unsupported_columns.append(column)
232
+ df = df.drop([column], axis=1)
233
+ elif isinstance(feature, Sequence):
234
+ if feature.feature.dtype == 'float32':
235
+ # 直接将内容绘图,并嵌入为Base64编码
236
+ base64_srcs = [ndarray_to_base64(vec) for vec in df[column]]
237
+ df = df.drop([column], axis=1)
238
+ df[column] = [f"![]({src})" for src in base64_srcs]
239
+ info = "" if not unsupported_columns else f"Some columns are not supported yet: {unsupported_columns}"
240
+ return df.reset_index().to_markdown(index=False), max_page, info
241
+ else:
242
+ # 其他的处理逻辑
243
+ info = "" if not unsupported_columns else f"Some columns are not supported yet: {unsupported_columns}"
244
+ return df, max_page, info
245
+
246
+
247
+ #####################################################
248
+ # Gradio app
249
+ #####################################################
250
+
251
+
252
+ with gr.Blocks() as demo:
253
+ gr.Markdown("# 📖 Datasets Explorer\n\nAccess any slice of data of any dataset on the [Hugging Face Dataset Hub](https://huggingface.co/datasets)")
254
+ gr.Markdown("This is the dataset viewer from parquet export demo before the feature was added on the Hugging Face website.")
255
+ cp_dataset = gr.Textbox("Salesforce/lotsa_data", label="Pick a dataset", placeholder="competitions/aiornot")
256
+ cp_go = gr.Button("Explore")
257
+ cp_config = gr.Dropdown(["plain_text"], value="plain_text", label="Config", visible=False)
258
+ cp_split = gr.Dropdown(["train", "validation"], value="train", label="Split", visible=False)
259
+ cp_goto_next_page = gr.Button("Next page", visible=False)
260
+ cp_error = gr.Markdown("", visible=False)
261
+ cp_info = gr.Markdown("", visible=False)
262
+ cp_result = gr.Markdown("", visible=False)
263
+
264
+ now = datetime.now()
265
+ df = pd.DataFrame({
266
+ 'time': [now - timedelta(minutes=5*i) for i in range(25)] + [now],
267
+ 'price': np.random.randint(100, 1000, 26),
268
+ 'origin': [random.choice(["DFW", "DAL", "HOU"]) for _ in range(26)],
269
+ 'destination': [random.choice(["JFK", "LGA", "EWR"]) for _ in range(26)],
270
+ })
271
+
272
+ componets = []
273
+ for _ in range(TIME_PLOTS_NUM):
274
+ with gr.Row():
275
+ textbox = gr.Textbox("名称或说明")
276
+ with gr.Column():
277
+ daterange = DateTimeRange(["now - 24h", "now"])
278
+ plot1 = gr.LinePlot(df, x="time", y="price", color="origin")
279
+ # plot2 = gr.LinePlot(df, x="time", y="price", color="origin")
280
+ daterange.bind([plot1,
281
+ # plot2,
282
+ ])
283
+ comp = {
284
+ "textbox" : textbox,
285
+ "daterange" : daterange,
286
+ "plot1" : plot1,
287
+ # "plot2" : plot2,
288
+ }
289
+ componets.append(comp)
290
+
291
+ with gr.Row():
292
+ cp_page = gr.Textbox("1", label="Page", placeholder="1", visible=False)
293
+ cp_goto_page = gr.Button("Go to page", visible=False)
294
+
295
+ def show_error(message: str) -> dict:
296
+ return {
297
+ cp_error: gr.update(visible=True, value=f"## ❌ Error:\n\n{message}"),
298
+ cp_info: gr.update(visible=False, value=""),
299
+ cp_result: gr.update(visible=False, value=""),
300
+ }
301
+
302
+ def show_dataset_at_config_and_split_and_page(dataset: str, config: str, split: str, page: str) -> dict:
303
+ try:
304
+ ret = {}
305
+ if dataset != 'Salesforce/lotsa_data':
306
+ markdown_result, max_page, info = get_page(dataset, config, split, page)
307
+ ret[cp_result] = gr.update(visible=True, value=markdown_result)
308
+ else:
309
+ df, max_page, info = get_page(dataset, config, split, page)
310
+ print(df.columns)
311
+ # TODO:target为一维数组时len(row['target'][0])会直接报错
312
+ df['timestamp'] = df.apply(lambda row: pd.date_range(start=row['start'], periods=len(row['target'][0]), freq=row['freq']).to_pydatetime().tolist(), axis=1)
313
+ df = flatten_ndarray_column(df, 'target')
314
+ # 删除原始的start和freq列
315
+ df.drop(columns=['start', 'freq', 'target'], inplace=True)
316
+ if 'past_feat_dynamic_real' in df.columns:
317
+ df.drop(columns=['past_feat_dynamic_real'], inplace=True)
318
+ info = f"({info})" if info else ""
319
+ for i, rows in df.iterrows():
320
+ index = rows['item_id']
321
+ df_without_index = rows.drop('item_id').to_frame().T
322
+ df_expanded = df_without_index.apply(pd.Series.explode).reset_index(drop=True).fillna(0)
323
+ ret.update({
324
+ componets[i]["textbox"]: gr.update(value=f"item_id: {index}"),
325
+ componets[i]["daterange"]: gr.update(value=[df_without_index['timestamp'][i][0], df_without_index['timestamp'][i][-1]]),
326
+ componets[i]["plot1"]: gr.update(value=df_expanded, x="timestamp", y="target_0"),
327
+ })
328
+ return {
329
+ **ret,
330
+ cp_info: gr.update(visible=True, value=f"Page {page}/{max_page} {info}"),
331
+ cp_error: gr.update(visible=False, value="")
332
+ }
333
+ except AppError as err:
334
+ return show_error(str(err))
335
+
336
+ def show_dataset_at_config_and_split_and_next_page(dataset: str, config: str, split: str, page: str) -> dict:
337
+ try:
338
+ next_page = str(int(page) + 1)
339
+ return {
340
+ **show_dataset_at_config_and_split_and_page(dataset, config, split, next_page),
341
+ cp_page: gr.update(value=next_page, visible=True),
342
+ }
343
+ except AppError as err:
344
+ return show_error(str(err))
345
+
346
+ def show_dataset_at_config_and_split(dataset: str, config: str, split: str) -> dict:
347
+ try:
348
+ return {
349
+ **show_dataset_at_config_and_split_and_page(dataset, config, split, "1"),
350
+ cp_page: gr.update(value="1", visible=True),
351
+ cp_goto_page: gr.update(visible=True),
352
+ cp_goto_next_page: gr.update(visible=True),
353
+ }
354
+ except AppError as err:
355
+ return show_error(str(err))
356
+
357
+ def show_dataset_at_config(dataset: str, config: str) -> dict:
358
+ try:
359
+ splits = get_parquet_splits(dataset, config)
360
+ if not splits:
361
+ raise AppError(f"Dataset {dataset} with config {config} has no splits.")
362
+ else:
363
+ split = splits[0]
364
+ return {
365
+ **show_dataset_at_config_and_split(dataset, config, split),
366
+ cp_split: gr.update(value=split, choices=splits, visible=len(splits) > 1),
367
+ }
368
+ except AppError as err:
369
+ return show_error(str(err))
370
+
371
+ def show_dataset(dataset: str) -> dict:
372
+ try:
373
+ configs = get_parquet_configs(dataset)
374
+ if not configs:
375
+ raise AppError(f"Dataset {dataset} has no configs.")
376
+ else:
377
+ config = configs[0]
378
+ return {
379
+ **show_dataset_at_config(dataset, config),
380
+ cp_config: gr.update(value=config, choices=configs, visible=len(configs) > 1),
381
+ }
382
+ except AppError as err:
383
+ return show_error(str(err))
384
+
385
+ """
386
+ 动态生成组件时使用gr.LinePlot会有bug,直接卡死在show_dataset部分
387
+ """
388
+ # @gr.render(triggers=[cp_go.click])
389
+ # def create_test():
390
+ # now = datetime.now()
391
+ # df = pd.DataFrame({
392
+ # 'time': [now - timedelta(minutes=5*i) for i in range(25)],
393
+ # 'price': np.random.randint(100, 1000, 25),
394
+ # 'origin': [random.choice(["DFW", "DAL", "HOU"]) for _ in range(25)],
395
+ # 'destination': [random.choice(["JFK", "LGA", "EWR"]) for _ in range(25)],
396
+ # })
397
+ # # componets = []
398
+ # # daterange = DateTimeRange(["now - 24h", "now"])
399
+ # plot1 = gr.LinePlot(df, x="time", y="price")
400
+ # plot2 = gr.LinePlot(df, x="time", y="price", color="origin")
401
+ # # # daterange.bind([plot1, plot2])
402
+ # # componets.append(plot1)
403
+ # # componets.append(plot2)
404
+ # # componets.append(daterange)
405
+ # # test = gr.Textbox(label="input")
406
+ # # componets.append(test)
407
+ # # return componets
408
+
409
+ all_outputs = [cp_config, cp_split, cp_page, cp_goto_page, cp_goto_next_page, cp_result, cp_info, cp_error]
410
+ for comp in componets:
411
+ all_outputs += list(comp.values())
412
+ cp_go.click(show_dataset, inputs=[cp_dataset], outputs=all_outputs)
413
+ cp_config.change(show_dataset_at_config, inputs=[cp_dataset, cp_config], outputs=all_outputs)
414
+ cp_split.change(show_dataset_at_config_and_split, inputs=[cp_dataset, cp_config, cp_split], outputs=all_outputs)
415
+ cp_goto_page.click(show_dataset_at_config_and_split_and_page, inputs=[cp_dataset, cp_config, cp_split, cp_page], outputs=all_outputs)
416
+ cp_goto_next_page.click(show_dataset_at_config_and_split_and_next_page, inputs=[cp_dataset, cp_config, cp_split, cp_page], outputs=all_outputs)
417
+
418
+
419
+ if __name__ == "__main__":
420
+
421
+ app = gr.mount_gradio_app(app, demo, path="/")
422
+ uvicorn.run(app, host="127.0.0.1", port=7860)
423
+
424
+
425
+ # 需求:
426
+ # target多变量没办法同时打到一个图上。有几种选择,可以选择拉一个框选,一次一个;或者用强行用颜色区分,或者用两个框分别展示(动态生成多个框没办法指定位置)
427
+ # 无法动态生成组件
428
+ # 没有聚合、统计值等功能
429
+ # 支持其他库的调用
data.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import numpy as np
4
+ import random
5
+ from gradio_datetimerange import DateTimeRange
6
+ from datetime import datetime, timedelta
7
+ now = datetime.now()
8
+
9
+ df = pd.DataFrame({
10
+ 'time': [now - timedelta(minutes=5*i) for i in range(25)],
11
+ 'price': np.random.randint(100, 1000, 25),
12
+ 'origin': [random.choice(["DFW", "DAL", "HOU"]) for _ in range(25)],
13
+ 'destination': [random.choice(["JFK", "LGA", "EWR"]) for _ in range(25)],
14
+ })
15
+
16
+ if __name__ == "__main__":
17
+ with gr.Blocks() as demo:
18
+ daterange = DateTimeRange(["now - 24h", "now"])
19
+ plot1 = gr.LinePlot(df, x="time", y="price")
20
+ plot2 = gr.LinePlot(df, x="time", y="price", color="origin")
21
+ daterange.bind([plot1, plot2])
22
+
23
+ demo.launch(share=True)
24
+ print(type(DateTimeRange))
25
+ print(type(gr.LinePlot))
flagged/log.csv ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ X Axis Data,Y Axis Data,output,flag,username,timestamp
2
+ "{""headers"": [""1""], ""data"": [[""""]], ""metadata"": null}","{""headers"": [""1""], ""data"": [[""""]], ""metadata"": null}",,,,2025-01-19 00:16:06.944352
3
+ "{""headers"": [""1""], ""data"": [[""""]], ""metadata"": null}","{""headers"": [""1""], ""data"": [[""""]], ""metadata"": null}","{""type"": ""plotly"", ""plot"": ""{\""data\"":[{\""x\"":[\""\""],\""y\"":[\""\""],\""type\"":\""bar\""}],\""layout\"":{\""template\"":{\""data\"":{\""histogram2dcontour\"":[{\""type\"":\""histogram2dcontour\"",\""colorbar\"":{\""outlinewidth\"":0,\""ticks\"":\""\""},\""colorscale\"":[[0.0,\""#0d0887\""],[0.1111111111111111,\""#46039f\""],[0.2222222222222222,\""#7201a8\""],[0.3333333333333333,\""#9c179e\""],[0.4444444444444444,\""#bd3786\""],[0.5555555555555556,\""#d8576b\""],[0.6666666666666666,\""#ed7953\""],[0.7777777777777778,\""#fb9f3a\""],[0.8888888888888888,\""#fdca26\""],[1.0,\""#f0f921\""]]}],\""choropleth\"":[{\""type\"":\""choropleth\"",\""colorbar\"":{\""outlinewidth\"":0,\""ticks\"":\""\""}}],\""histogram2d\"":[{\""type\"":\""histogram2d\"",\""colorbar\"":{\""outlinewidth\"":0,\""ticks\"":\""\""},\""colorscale\"":[[0.0,\""#0d0887\""],[0.1111111111111111,\""#46039f\""],[0.2222222222222222,\""#7201a8\""],[0.3333333333333333,\""#9c179e\""],[0.4444444444444444,\""#bd3786\""],[0.5555555555555556,\""#d8576b\""],[0.6666666666666666,\""#ed7953\""],[0.7777777777777778,\""#fb9f3a\""],[0.8888888888888888,\""#fdca26\""],[1.0,\""#f0f921\""]]}],\""heatmap\"":[{\""type\"":\""heatmap\"",\""colorbar\"":{\""outlinewidth\"":0,\""ticks\"":\""\""},\""colorscale\"":[[0.0,\""#0d0887\""],[0.1111111111111111,\""#46039f\""],[0.2222222222222222,\""#7201a8\""],[0.3333333333333333,\""#9c179e\""],[0.4444444444444444,\""#bd3786\""],[0.5555555555555556,\""#d8576b\""],[0.6666666666666666,\""#ed7953\""],[0.7777777777777778,\""#fb9f3a\""],[0.8888888888888888,\""#fdca26\""],[1.0,\""#f0f921\""]]}],\""heatmapgl\"":[{\""type\"":\""heatmapgl\"",\""colorbar\"":{\""outlinewidth\"":0,\""ticks\"":\""\""},\""colorscale\"":[[0.0,\""#0d0887\""],[0.1111111111111111,\""#46039f\""],[0.2222222222222222,\""#7201a8\""],[0.3333333333333333,\""#9c179e\""],[0.4444444444444444,\""#bd3786\""],[0.5555555555555556,\""#d8576b\""],[0.6666666666666666,\""#ed7953\""],[0.7777777777777778,\""#fb9f3a\""],[0.8888888888888888,\""#fdca26\""],[1.0,\""#f0f921\""]]}],\""contourcarpet\"":[{\""type\"":\""contourcarpet\"",\""colorbar\"":{\""outlinewidth\"":0,\""ticks\"":\""\""}}],\""contour\"":[{\""type\"":\""contour\"",\""colorbar\"":{\""outlinewidth\"":0,\""ticks\"":\""\""},\""colorscale\"":[[0.0,\""#0d0887\""],[0.1111111111111111,\""#46039f\""],[0.2222222222222222,\""#7201a8\""],[0.3333333333333333,\""#9c179e\""],[0.4444444444444444,\""#bd3786\""],[0.5555555555555556,\""#d8576b\""],[0.6666666666666666,\""#ed7953\""],[0.7777777777777778,\""#fb9f3a\""],[0.8888888888888888,\""#fdca26\""],[1.0,\""#f0f921\""]]}],\""surface\"":[{\""type\"":\""surface\"",\""colorbar\"":{\""outlinewidth\"":0,\""ticks\"":\""\""},\""colorscale\"":[[0.0,\""#0d0887\""],[0.1111111111111111,\""#46039f\""],[0.2222222222222222,\""#7201a8\""],[0.3333333333333333,\""#9c179e\""],[0.4444444444444444,\""#bd3786\""],[0.5555555555555556,\""#d8576b\""],[0.6666666666666666,\""#ed7953\""],[0.7777777777777778,\""#fb9f3a\""],[0.8888888888888888,\""#fdca26\""],[1.0,\""#f0f921\""]]}],\""mesh3d\"":[{\""type\"":\""mesh3d\"",\""colorbar\"":{\""outlinewidth\"":0,\""ticks\"":\""\""}}],\""scatter\"":[{\""fillpattern\"":{\""fillmode\"":\""overlay\"",\""size\"":10,\""solidity\"":0.2},\""type\"":\""scatter\""}],\""parcoords\"":[{\""type\"":\""parcoords\"",\""line\"":{\""colorbar\"":{\""outlinewidth\"":0,\""ticks\"":\""\""}}}],\""scatterpolargl\"":[{\""type\"":\""scatterpolargl\"",\""marker\"":{\""colorbar\"":{\""outlinewidth\"":0,\""ticks\"":\""\""}}}],\""bar\"":[{\""error_x\"":{\""color\"":\""#2a3f5f\""},\""error_y\"":{\""color\"":\""#2a3f5f\""},\""marker\"":{\""line\"":{\""color\"":\""#E5ECF6\"",\""width\"":0.5},\""pattern\"":{\""fillmode\"":\""overlay\"",\""size\"":10,\""solidity\"":0.2}},\""type\"":\""bar\""}],\""scattergeo\"":[{\""type\"":\""scattergeo\"",\""marker\"":{\""colorbar\"":{\""outlinewidth\"":0,\""ticks\"":\""\""}}}],\""scatterpolar\"":[{\""type\"":\""scatterpolar\"",\""marker\"":{\""colorbar\"":{\""outlinewidth\"":0,\""ticks\"":\""\""}}}],\""histogram\"":[{\""marker\"":{\""pattern\"":{\""fillmode\"":\""overlay\"",\""size\"":10,\""solidity\"":0.2}},\""type\"":\""histogram\""}],\""scattergl\"":[{\""type\"":\""scattergl\"",\""marker\"":{\""colorbar\"":{\""outlinewidth\"":0,\""ticks\"":\""\""}}}],\""scatter3d\"":[{\""type\"":\""scatter3d\"",\""line\"":{\""colorbar\"":{\""outlinewidth\"":0,\""ticks\"":\""\""}},\""marker\"":{\""colorbar\"":{\""outlinewidth\"":0,\""ticks\"":\""\""}}}],\""scattermapbox\"":[{\""type\"":\""scattermapbox\"",\""marker\"":{\""colorbar\"":{\""outlinewidth\"":0,\""ticks\"":\""\""}}}],\""scatterternary\"":[{\""type\"":\""scatterternary\"",\""marker\"":{\""colorbar\"":{\""outlinewidth\"":0,\""ticks\"":\""\""}}}],\""scattercarpet\"":[{\""type\"":\""scattercarpet\"",\""marker\"":{\""colorbar\"":{\""outlinewidth\"":0,\""ticks\"":\""\""}}}],\""carpet\"":[{\""aaxis\"":{\""endlinecolor\"":\""#2a3f5f\"",\""gridcolor\"":\""white\"",\""linecolor\"":\""white\"",\""minorgridcolor\"":\""white\"",\""startlinecolor\"":\""#2a3f5f\""},\""baxis\"":{\""endlinecolor\"":\""#2a3f5f\"",\""gridcolor\"":\""white\"",\""linecolor\"":\""white\"",\""minorgridcolor\"":\""white\"",\""startlinecolor\"":\""#2a3f5f\""},\""type\"":\""carpet\""}],\""table\"":[{\""cells\"":{\""fill\"":{\""color\"":\""#EBF0F8\""},\""line\"":{\""color\"":\""white\""}},\""header\"":{\""fill\"":{\""color\"":\""#C8D4E3\""},\""line\"":{\""color\"":\""white\""}},\""type\"":\""table\""}],\""barpolar\"":[{\""marker\"":{\""line\"":{\""color\"":\""#E5ECF6\"",\""width\"":0.5},\""pattern\"":{\""fillmode\"":\""overlay\"",\""size\"":10,\""solidity\"":0.2}},\""type\"":\""barpolar\""}],\""pie\"":[{\""automargin\"":true,\""type\"":\""pie\""}]},\""layout\"":{\""autotypenumbers\"":\""strict\"",\""colorway\"":[\""#636efa\"",\""#EF553B\"",\""#00cc96\"",\""#ab63fa\"",\""#FFA15A\"",\""#19d3f3\"",\""#FF6692\"",\""#B6E880\"",\""#FF97FF\"",\""#FECB52\""],\""font\"":{\""color\"":\""#2a3f5f\""},\""hovermode\"":\""closest\"",\""hoverlabel\"":{\""align\"":\""left\""},\""paper_bgcolor\"":\""white\"",\""plot_bgcolor\"":\""#E5ECF6\"",\""polar\"":{\""bgcolor\"":\""#E5ECF6\"",\""angularaxis\"":{\""gridcolor\"":\""white\"",\""linecolor\"":\""white\"",\""ticks\"":\""\""},\""radialaxis\"":{\""gridcolor\"":\""white\"",\""linecolor\"":\""white\"",\""ticks\"":\""\""}},\""ternary\"":{\""bgcolor\"":\""#E5ECF6\"",\""aaxis\"":{\""gridcolor\"":\""white\"",\""linecolor\"":\""white\"",\""ticks\"":\""\""},\""baxis\"":{\""gridcolor\"":\""white\"",\""linecolor\"":\""white\"",\""ticks\"":\""\""},\""caxis\"":{\""gridcolor\"":\""white\"",\""linecolor\"":\""white\"",\""ticks\"":\""\""}},\""coloraxis\"":{\""colorbar\"":{\""outlinewidth\"":0,\""ticks\"":\""\""}},\""colorscale\"":{\""sequential\"":[[0.0,\""#0d0887\""],[0.1111111111111111,\""#46039f\""],[0.2222222222222222,\""#7201a8\""],[0.3333333333333333,\""#9c179e\""],[0.4444444444444444,\""#bd3786\""],[0.5555555555555556,\""#d8576b\""],[0.6666666666666666,\""#ed7953\""],[0.7777777777777778,\""#fb9f3a\""],[0.8888888888888888,\""#fdca26\""],[1.0,\""#f0f921\""]],\""sequentialminus\"":[[0.0,\""#0d0887\""],[0.1111111111111111,\""#46039f\""],[0.2222222222222222,\""#7201a8\""],[0.3333333333333333,\""#9c179e\""],[0.4444444444444444,\""#bd3786\""],[0.5555555555555556,\""#d8576b\""],[0.6666666666666666,\""#ed7953\""],[0.7777777777777778,\""#fb9f3a\""],[0.8888888888888888,\""#fdca26\""],[1.0,\""#f0f921\""]],\""diverging\"":[[0,\""#8e0152\""],[0.1,\""#c51b7d\""],[0.2,\""#de77ae\""],[0.3,\""#f1b6da\""],[0.4,\""#fde0ef\""],[0.5,\""#f7f7f7\""],[0.6,\""#e6f5d0\""],[0.7,\""#b8e186\""],[0.8,\""#7fbc41\""],[0.9,\""#4d9221\""],[1,\""#276419\""]]},\""xaxis\"":{\""gridcolor\"":\""white\"",\""linecolor\"":\""white\"",\""ticks\"":\""\"",\""title\"":{\""standoff\"":15},\""zerolinecolor\"":\""white\"",\""automargin\"":true,\""zerolinewidth\"":2},\""yaxis\"":{\""gridcolor\"":\""white\"",\""linecolor\"":\""white\"",\""ticks\"":\""\"",\""title\"":{\""standoff\"":15},\""zerolinecolor\"":\""white\"",\""automargin\"":true,\""zerolinewidth\"":2},\""scene\"":{\""xaxis\"":{\""backgroundcolor\"":\""#E5ECF6\"",\""gridcolor\"":\""white\"",\""linecolor\"":\""white\"",\""showbackground\"":true,\""ticks\"":\""\"",\""zerolinecolor\"":\""white\"",\""gridwidth\"":2},\""yaxis\"":{\""backgroundcolor\"":\""#E5ECF6\"",\""gridcolor\"":\""white\"",\""linecolor\"":\""white\"",\""showbackground\"":true,\""ticks\"":\""\"",\""zerolinecolor\"":\""white\"",\""gridwidth\"":2},\""zaxis\"":{\""backgroundcolor\"":\""#E5ECF6\"",\""gridcolor\"":\""white\"",\""linecolor\"":\""white\"",\""showbackground\"":true,\""ticks\"":\""\"",\""zerolinecolor\"":\""white\"",\""gridwidth\"":2}},\""shapedefaults\"":{\""line\"":{\""color\"":\""#2a3f5f\""}},\""annotationdefaults\"":{\""arrowcolor\"":\""#2a3f5f\"",\""arrowhead\"":0,\""arrowwidth\"":1},\""geo\"":{\""bgcolor\"":\""white\"",\""landcolor\"":\""#E5ECF6\"",\""subunitcolor\"":\""white\"",\""showland\"":true,\""showlakes\"":true,\""lakecolor\"":\""white\""},\""title\"":{\""x\"":0.05},\""mapbox\"":{\""style\"":\""light\""}}}}}""}",,,2025-01-19 00:16:17.909362
4
+ "{""headers"": [""1""], ""data"": [[""""]], ""metadata"": null}","{""headers"": [""1""], ""data"": [[""""]], ""metadata"": null}",,,,2025-01-19 00:20:32.556642
flatten_ndarray.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+
4
+ def flatten_ndarray_column(df, column_name):
5
+ def flatten_ndarray(ndarray):
6
+ if isinstance(ndarray, np.ndarray) and ndarray.dtype == 'O':
7
+ return np.concatenate([flatten_ndarray(subarray) for subarray in ndarray])
8
+ elif isinstance(ndarray, np.ndarray) and ndarray.ndim == 1:
9
+ return np.expand_dims(ndarray, axis=0)
10
+ return ndarray
11
+
12
+ flattened_data = df[column_name].apply(flatten_ndarray)
13
+ max_length = max(flattened_data.apply(len))
14
+
15
+ for i in range(max_length):
16
+ df[f'{column_name}_{i}'] = flattened_data.apply(lambda x: x[i] if i < len(x) else np.nan)
17
+
18
+ return df
19
+
20
+ # 示例用法
21
+ if __name__ == "__main__":
22
+ # 创建示例 DataFrame
23
+ data = {
24
+ 'target': [np.array([np.array([1, 2]), np.array([3, 4])]), np.array([5, 6, 7])]
25
+ }
26
+ df = pd.DataFrame(data)
27
+
28
+ # 拆分 target 列中的嵌套 ndarray
29
+ df = flatten_ndarray_column(df, 'target')
30
+ print(df)
gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
line_plot.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from fastapi import FastAPI
3
+ import plotly.graph_objects as go
4
+ import gradio as gr
5
+ import uvicorn
6
+
7
+ def create_plot(df):
8
+ fig = go.Figure()
9
+ for column in df.columns[1:]:
10
+ fig.add_trace(go.Scatter(x=df[df.columns[0]], y=df[column], mode='lines', name=column))
11
+
12
+ # 配置图例
13
+ fig.update_layout(
14
+ legend=dict(
15
+ title="Variables",
16
+ orientation="h",
17
+ yanchor="bottom",
18
+ y=1.02,
19
+ xanchor="right",
20
+ x=1
21
+ ),
22
+ xaxis_title='Time',
23
+ yaxis_title='Values'
24
+ )
25
+ return fig
26
+
27
+ # 创建Gradio界面
28
+ demo = gr.Blocks()
29
+ with demo:
30
+ # 示例数据
31
+ data = {
32
+ 'time': pd.date_range(start='2023-01-01', periods=6, freq='D'),
33
+ 'y1': [0, 1, 4, 9, 16, 25],
34
+ 'y2': [0, 1, 2, 3, 4, 5]
35
+ }
36
+ df = pd.DataFrame(data)
37
+ plot = create_plot(df)
38
+ gr.Plot(plot)
39
+
40
+ # 运行Gradio界面
41
+ if __name__ == "__main__":
42
+ app = FastAPI()
43
+ app = gr.mount_gradio_app(app, demo, path="/")
44
+ uvicorn.run(app, host="127.0.0.1", port=7860)
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ pyarrow
2
+ fsspec[http]
3
+ tqdm
4
+ datasets
5
+ git+https://github.com/huggingface/hffs.git@63298cde9f994a0ab16c3ba89c5f7a9d140f20b2
6
+ tabulate
tempCodeRunnerFile.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ if __name__ == "__main__":
2
+ # app = FastAPI()
3
+ # app = gr.mount_gradio_app(app, demo, path="/")
4
+ # uvicorn.run(app, host="127.0.0.1", port=7860)
your_script.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import altair as alt
2
+ from fastapi import FastAPI
3
+ import gradio as gr
4
+ import numpy as np
5
+ import pandas as pd
6
+ import uvicorn
7
+ from vega_datasets import data
8
+
9
+
10
+ def plot(v, a):
11
+ g = 9.81
12
+ theta = a / 180 * 3.14
13
+ tmax = ((2 * v) * np.sin(theta)) / g
14
+ timemat = tmax * np.linspace(0, 1, 40)
15
+
16
+ x = (v * timemat) * np.cos(theta)
17
+ y = ((v * timemat) * np.sin(theta)) - ((0.5 * g) * (timemat**2))
18
+ df = pd.DataFrame({"x": x, "y": y})
19
+ return df
20
+
21
+ demo = gr.Blocks()
22
+
23
+ with demo:
24
+ gr.Markdown(
25
+ r"Let's do some kinematics! Choose the speed and angle to see the trajectory. Remember that the range $R = v_0^2 \cdot \frac{\sin(2\theta)}{g}$"
26
+ )
27
+
28
+ with gr.Row():
29
+ speed = gr.Slider(1, 30, 25, label="Speed")
30
+ angle = gr.Slider(0, 90, 45, label="Angle")
31
+ output = gr.LinePlot(
32
+ x="x",
33
+ y="y",
34
+ overlay_point=True,
35
+ tooltip=["x", "y"],
36
+ x_lim=[0, 100],
37
+ y_lim=[0, 60],
38
+ width=350,
39
+ height=300,
40
+ )
41
+ btn = gr.Button(value="Run")
42
+ btn.click(plot, [speed, angle], output)
43
+
44
+ if __name__ == "__main__":
45
+ app = FastAPI()
46
+ app = gr.mount_gradio_app(app, demo, path="/")
47
+ uvicorn.run(app, host="127.0.0.1", port=7860)