Spaces:
Running
Running
Liu Yiwen
commited on
Commit
·
e6303fa
1
Parent(s):
1d5265e
losta_explorer v0.3.0
Browse files- __pycache__/utils.cpython-311.pyc +0 -0
- app.py +58 -40
- user_input.txt +1 -0
- utils.py +65 -29
__pycache__/utils.cpython-311.pyc
CHANGED
Binary files a/__pycache__/utils.cpython-311.pyc and b/__pycache__/utils.cpython-311.pyc differ
|
|
app.py
CHANGED
@@ -21,9 +21,9 @@ class AppError(RuntimeError):
|
|
21 |
|
22 |
|
23 |
APP_URL = "http://127.0.0.1:7860" if os.getenv("DEV") else "https://Kamarov-lotsa-explorer.hf.space"
|
24 |
-
PAGE_SIZE =
|
25 |
MAX_CACHED_BLOBS = PAGE_SIZE * 10
|
26 |
-
TIME_PLOTS_NUM =
|
27 |
_blobs_cache = {}
|
28 |
|
29 |
|
@@ -209,29 +209,37 @@ def get_page(dataset: str, config: str, split: str, page: str) -> Tuple[str, int
|
|
209 |
with gr.Blocks() as demo:
|
210 |
# 初始化组件
|
211 |
gr.Markdown("A tool for interactive observation of lotsa dataset, extended from lhoestq/datasets-explorer")
|
212 |
-
cp_dataset = gr.Textbox("Salesforce/lotsa_data", label="Pick a dataset",
|
213 |
cp_go = gr.Button("Explore")
|
214 |
cp_config = gr.Dropdown(["plain_text"], value="plain_text", label="Config", visible=False)
|
215 |
cp_split = gr.Dropdown(["train", "validation"], value="train", label="Split", visible=False)
|
216 |
-
cp_goto_next_page = gr.Button("Next page", visible=False)
|
217 |
cp_error = gr.Markdown("", visible=False)
|
218 |
cp_info = gr.Markdown("", visible=False)
|
219 |
cp_result = gr.Markdown("", visible=False)
|
220 |
-
|
221 |
# 初始化Salesforce/lotsa_data数据集展示使用的组件
|
222 |
-
componets = []
|
223 |
-
for _ in range(TIME_PLOTS_NUM):
|
224 |
-
with gr.Row():
|
225 |
-
with gr.Column(scale=2):
|
226 |
-
textbox = gr.Textbox("名称或说明")
|
227 |
-
statistics_textbox = gr.DataFrame()
|
228 |
-
with gr.Column(scale=3):
|
229 |
-
plot = gr.Plot()
|
230 |
-
componets.append({"textbox": textbox, "statistics_textbox": statistics_textbox, "plot": plot})
|
231 |
-
|
232 |
with gr.Row():
|
233 |
-
|
234 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
235 |
|
236 |
def show_error(message: str) -> dict:
|
237 |
return {
|
@@ -240,7 +248,7 @@ with gr.Blocks() as demo:
|
|
240 |
cp_result: gr.update(visible=False, value=""),
|
241 |
}
|
242 |
|
243 |
-
def show_dataset_at_config_and_split_and_page(dataset: str, config: str, split: str, page: str) -> dict:
|
244 |
try:
|
245 |
ret = {}
|
246 |
if dataset != 'Salesforce/lotsa_data':
|
@@ -248,21 +256,23 @@ with gr.Blocks() as demo:
|
|
248 |
ret[cp_result] = gr.update(visible=True, value=markdown_result)
|
249 |
else:
|
250 |
# 对Salesforce/lotsa_data数据集进行特殊处理
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
|
|
|
|
|
|
|
|
|
255 |
# 将单行的DataFrame展开为新的DataFrame
|
256 |
-
df_without_index =
|
257 |
df_expanded = df_without_index.apply(pd.Series.explode).reset_index(drop=True).fillna(0)
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
componets[i]["statistics_textbox"]: gr.update(value=df_statistics),
|
262 |
-
componets[i]["plot"]: gr.update(value=create_plot(df_expanded))
|
263 |
-
})
|
264 |
return {
|
265 |
-
|
|
|
266 |
cp_info: gr.update(visible=True, value=f"Page {page}/{max_page} {info}"),
|
267 |
cp_error: gr.update(visible=False, value="")
|
268 |
}
|
@@ -274,7 +284,7 @@ with gr.Blocks() as demo:
|
|
274 |
next_page = str(int(page) + 1)
|
275 |
return {
|
276 |
**show_dataset_at_config_and_split_and_page(dataset, config, split, next_page),
|
277 |
-
cp_page: gr.update(value=next_page, visible=True),
|
278 |
}
|
279 |
except AppError as err:
|
280 |
return show_error(str(err))
|
@@ -283,9 +293,10 @@ with gr.Blocks() as demo:
|
|
283 |
try:
|
284 |
return {
|
285 |
**show_dataset_at_config_and_split_and_page(dataset, config, split, "1"),
|
286 |
-
|
287 |
-
|
288 |
-
|
|
|
289 |
}
|
290 |
except AppError as err:
|
291 |
return show_error(str(err))
|
@@ -318,17 +329,24 @@ with gr.Blocks() as demo:
|
|
318 |
except AppError as err:
|
319 |
return show_error(str(err))
|
320 |
|
321 |
-
|
322 |
-
|
323 |
-
|
|
|
|
|
|
|
|
|
|
|
324 |
cp_go.click(show_dataset, inputs=[cp_dataset], outputs=all_outputs)
|
325 |
cp_config.change(show_dataset_at_config, inputs=[cp_dataset, cp_config], outputs=all_outputs)
|
326 |
cp_split.change(show_dataset_at_config_and_split, inputs=[cp_dataset, cp_config, cp_split], outputs=all_outputs)
|
327 |
-
cp_goto_page.click(show_dataset_at_config_and_split_and_page, inputs=[cp_dataset, cp_config, cp_split, cp_page], outputs=all_outputs)
|
328 |
-
cp_goto_next_page.click(show_dataset_at_config_and_split_and_next_page, inputs=[cp_dataset, cp_config, cp_split, cp_page], outputs=all_outputs)
|
|
|
|
|
329 |
|
330 |
|
331 |
if __name__ == "__main__":
|
332 |
|
333 |
app = gr.mount_gradio_app(app, demo, path="/")
|
334 |
-
uvicorn.run(app, host="
|
|
|
21 |
|
22 |
|
23 |
APP_URL = "http://127.0.0.1:7860" if os.getenv("DEV") else "https://Kamarov-lotsa-explorer.hf.space"
|
24 |
+
PAGE_SIZE = 1
|
25 |
MAX_CACHED_BLOBS = PAGE_SIZE * 10
|
26 |
+
TIME_PLOTS_NUM = 1
|
27 |
_blobs_cache = {}
|
28 |
|
29 |
|
|
|
209 |
with gr.Blocks() as demo:
|
210 |
# 初始化组件
|
211 |
gr.Markdown("A tool for interactive observation of lotsa dataset, extended from lhoestq/datasets-explorer")
|
212 |
+
cp_dataset = gr.Textbox("Salesforce/lotsa_data", label="Pick a dataset", interactive=False)
|
213 |
cp_go = gr.Button("Explore")
|
214 |
cp_config = gr.Dropdown(["plain_text"], value="plain_text", label="Config", visible=False)
|
215 |
cp_split = gr.Dropdown(["train", "validation"], value="train", label="Split", visible=False)
|
216 |
+
# cp_goto_next_page = gr.Button("Next page", visible=False)
|
217 |
cp_error = gr.Markdown("", visible=False)
|
218 |
cp_info = gr.Markdown("", visible=False)
|
219 |
cp_result = gr.Markdown("", visible=False)
|
220 |
+
tot_samples = 0
|
221 |
# 初始化Salesforce/lotsa_data数据集展示使用的组件
|
222 |
+
# componets = []
|
223 |
+
# for _ in range(TIME_PLOTS_NUM):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
224 |
with gr.Row():
|
225 |
+
with gr.Column(scale=3):
|
226 |
+
select_box = gr.Dropdown(choices=["items"], label="Select some items", multiselect=True, interactive=True)
|
227 |
+
with gr.Column(scale=1):
|
228 |
+
select_buttom = gr.Button("Show selected items")
|
229 |
+
with gr.Row():
|
230 |
+
with gr.Column(scale=2):
|
231 |
+
statistics_textbox = gr.DataFrame()
|
232 |
+
with gr.Column(scale=3):
|
233 |
+
plot = gr.Plot()
|
234 |
+
user_input_text = gr.Textbox(placeholder="输入一些内容")
|
235 |
+
# componets.append({"select_box": select_box,
|
236 |
+
# "statistics_textbox": statistics_textbox,
|
237 |
+
# "user_input_text": user_input_text,
|
238 |
+
# "plot": plot})
|
239 |
+
|
240 |
+
# with gr.Row():
|
241 |
+
# cp_page = gr.Textbox("1", label="Page", placeholder="1", visible=False)
|
242 |
+
# cp_goto_page = gr.Button("Go to page", visible=False)
|
243 |
|
244 |
def show_error(message: str) -> dict:
|
245 |
return {
|
|
|
248 |
cp_result: gr.update(visible=False, value=""),
|
249 |
}
|
250 |
|
251 |
+
def show_dataset_at_config_and_split_and_page(dataset: str, config: str, split: str, page: str|List[str]) -> dict:
|
252 |
try:
|
253 |
ret = {}
|
254 |
if dataset != 'Salesforce/lotsa_data':
|
|
|
256 |
ret[cp_result] = gr.update(visible=True, value=markdown_result)
|
257 |
else:
|
258 |
# 对Salesforce/lotsa_data数据集进行特殊处理
|
259 |
+
if type(page) == str:
|
260 |
+
page = [page]
|
261 |
+
df_list, id_list = [], []
|
262 |
+
for i, page in enumerate(page):
|
263 |
+
df, max_page, info = get_page(dataset, config, split, page)
|
264 |
+
df = clean_up_df(df)
|
265 |
+
row = df.iloc[0]
|
266 |
+
id_list.append(row['item_id'])
|
267 |
# 将单行的DataFrame展开为新的DataFrame
|
268 |
+
df_without_index = row.drop('item_id').to_frame().T
|
269 |
df_expanded = df_without_index.apply(pd.Series.explode).reset_index(drop=True).fillna(0)
|
270 |
+
df_list.append(df_expanded)
|
271 |
+
global tot_samples
|
272 |
+
tot_samples = max_page
|
|
|
|
|
|
|
273 |
return {
|
274 |
+
statistics_textbox: gr.update(value=create_statistic(df_list, id_list)),
|
275 |
+
plot: gr.update(value=create_plot(df_list, id_list)),
|
276 |
cp_info: gr.update(visible=True, value=f"Page {page}/{max_page} {info}"),
|
277 |
cp_error: gr.update(visible=False, value="")
|
278 |
}
|
|
|
284 |
next_page = str(int(page) + 1)
|
285 |
return {
|
286 |
**show_dataset_at_config_and_split_and_page(dataset, config, split, next_page),
|
287 |
+
# cp_page: gr.update(value=next_page, visible=True),
|
288 |
}
|
289 |
except AppError as err:
|
290 |
return show_error(str(err))
|
|
|
293 |
try:
|
294 |
return {
|
295 |
**show_dataset_at_config_and_split_and_page(dataset, config, split, "1"),
|
296 |
+
select_box: gr.update(choices=[f"{i+1}" for i in range(tot_samples)], value=["1"]),
|
297 |
+
# cp_page: gr.update(value="1", visible=True),
|
298 |
+
# cp_goto_page: gr.update(visible=True),
|
299 |
+
# cp_goto_next_page: gr.update(visible=True),
|
300 |
}
|
301 |
except AppError as err:
|
302 |
return show_error(str(err))
|
|
|
329 |
except AppError as err:
|
330 |
return show_error(str(err))
|
331 |
|
332 |
+
def save_to_file(user_input):
|
333 |
+
with open("user_input.txt", "w") as file:
|
334 |
+
file.write(user_input)
|
335 |
+
|
336 |
+
all_outputs = [cp_config, cp_split,
|
337 |
+
# cp_page, cp_goto_page, cp_goto_next_page,
|
338 |
+
cp_result, cp_info, cp_error,
|
339 |
+
select_box, select_buttom, statistics_textbox, user_input_text, plot]
|
340 |
cp_go.click(show_dataset, inputs=[cp_dataset], outputs=all_outputs)
|
341 |
cp_config.change(show_dataset_at_config, inputs=[cp_dataset, cp_config], outputs=all_outputs)
|
342 |
cp_split.change(show_dataset_at_config_and_split, inputs=[cp_dataset, cp_config, cp_split], outputs=all_outputs)
|
343 |
+
# cp_goto_page.click(show_dataset_at_config_and_split_and_page, inputs=[cp_dataset, cp_config, cp_split, cp_page], outputs=all_outputs)
|
344 |
+
# cp_goto_next_page.click(show_dataset_at_config_and_split_and_next_page, inputs=[cp_dataset, cp_config, cp_split, cp_page], outputs=all_outputs)
|
345 |
+
user_input_text.submit(save_to_file, inputs=user_input_text)
|
346 |
+
select_buttom.click(show_dataset_at_config_and_split_and_page, inputs=[cp_dataset, cp_config, cp_split, select_box], outputs=all_outputs)
|
347 |
|
348 |
|
349 |
if __name__ == "__main__":
|
350 |
|
351 |
app = gr.mount_gradio_app(app, demo, path="/")
|
352 |
+
uvicorn.run(app, host="127.0.0.1", port=7860)
|
user_input.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
1234
|
utils.py
CHANGED
@@ -52,19 +52,20 @@ def flatten_ndarray_column(df, column_name):
|
|
52 |
|
53 |
return df
|
54 |
|
55 |
-
def create_plot(
|
56 |
"""
|
57 |
-
|
58 |
"""
|
59 |
fig = go.Figure()
|
60 |
-
for
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
|
|
68 |
|
69 |
# 配置图例
|
70 |
fig.update_layout(
|
@@ -81,26 +82,33 @@ def create_plot(df):
|
|
81 |
)
|
82 |
return fig
|
83 |
|
84 |
-
def create_statistic(
|
85 |
"""
|
86 |
-
|
87 |
"""
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
104 |
|
105 |
def clean_up_df(df: pd.DataFrame) -> pd.DataFrame:
|
106 |
"""
|
@@ -116,4 +124,32 @@ def clean_up_df(df: pd.DataFrame) -> pd.DataFrame:
|
|
116 |
df.drop(columns=['start', 'freq', 'target'], inplace=True)
|
117 |
if 'past_feat_dynamic_real' in df.columns:
|
118 |
df.drop(columns=['past_feat_dynamic_real'], inplace=True)
|
119 |
-
return df
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
|
53 |
return df
|
54 |
|
55 |
+
def create_plot(dfs:list[pd.DataFrame], ids:list[str]):
|
56 |
"""
|
57 |
+
创建一个包含所有传入 DataFrame 的线图。
|
58 |
"""
|
59 |
fig = go.Figure()
|
60 |
+
for df, df_id in zip(dfs, ids):
|
61 |
+
for i, column in enumerate(df.columns[1:]):
|
62 |
+
fig.add_trace(go.Scatter(
|
63 |
+
x=df[df.columns[0]],
|
64 |
+
y=df[column],
|
65 |
+
mode='lines',
|
66 |
+
name=f"item_{df_id} - {column}",
|
67 |
+
visible=True if i == 0 else 'legendonly'
|
68 |
+
))
|
69 |
|
70 |
# 配置图例
|
71 |
fig.update_layout(
|
|
|
82 |
)
|
83 |
return fig
|
84 |
|
85 |
+
def create_statistic(dfs: list[pd.DataFrame], ids: list[str]):
|
86 |
"""
|
87 |
+
计算数据集列表的统计信息。
|
88 |
"""
|
89 |
+
stats_list = []
|
90 |
+
|
91 |
+
for df, id in zip(dfs, ids):
|
92 |
+
df_values = df.iloc[:, 1:]
|
93 |
+
# 计算统计值
|
94 |
+
mean_values = df_values.mean().round(2)
|
95 |
+
std_values = df_values.std().round(2)
|
96 |
+
max_values = df_values.max().round(2)
|
97 |
+
min_values = df_values.min().round(2)
|
98 |
+
|
99 |
+
# 将这些统计信息合并成一个新的DataFrame
|
100 |
+
stats_df = pd.DataFrame({
|
101 |
+
'Variables': [f"{id}_{col}" for col in df_values.columns],
|
102 |
+
'mean': mean_values.values,
|
103 |
+
'std': std_values.values,
|
104 |
+
'max': max_values.values,
|
105 |
+
'min': min_values.values
|
106 |
+
})
|
107 |
+
stats_list.append(stats_df)
|
108 |
+
|
109 |
+
# 合并所有统计信息DataFrame
|
110 |
+
combined_stats_df = pd.concat(stats_list, ignore_index=True)
|
111 |
+
return combined_stats_df
|
112 |
|
113 |
def clean_up_df(df: pd.DataFrame) -> pd.DataFrame:
|
114 |
"""
|
|
|
124 |
df.drop(columns=['start', 'freq', 'target'], inplace=True)
|
125 |
if 'past_feat_dynamic_real' in df.columns:
|
126 |
df.drop(columns=['past_feat_dynamic_real'], inplace=True)
|
127 |
+
return df
|
128 |
+
|
129 |
+
if __name__ == '__main__':
|
130 |
+
|
131 |
+
# 创建测试数据
|
132 |
+
data1 = {
|
133 |
+
'Time': ['2023-01-01', '2023-01-02', '2023-01-03'],
|
134 |
+
'Value1': [10, 15, 20],
|
135 |
+
'Value2': [20, 25, 30]
|
136 |
+
}
|
137 |
+
|
138 |
+
data2 = {
|
139 |
+
'Time': ['2023-01-01', '2023-01-02', '2023-01-03'],
|
140 |
+
'Value3': [5, 10, 15],
|
141 |
+
'Value4': [15, 20, 25]
|
142 |
+
}
|
143 |
+
|
144 |
+
df1 = pd.DataFrame(data1)
|
145 |
+
df2 = pd.DataFrame(data2)
|
146 |
+
|
147 |
+
# 转换时间列为日期时间格式
|
148 |
+
df1['Time'] = pd.to_datetime(df1['Time'])
|
149 |
+
df2['Time'] = pd.to_datetime(df2['Time'])
|
150 |
+
|
151 |
+
# 创建图表
|
152 |
+
fig = create_plot(df1, df2)
|
153 |
+
|
154 |
+
# 显示图表
|
155 |
+
fig.show()
|