Liu Yiwen commited on
Commit
e6303fa
·
1 Parent(s): 1d5265e

losta_explorer v0.3.0

Browse files
Files changed (4) hide show
  1. __pycache__/utils.cpython-311.pyc +0 -0
  2. app.py +58 -40
  3. user_input.txt +1 -0
  4. utils.py +65 -29
__pycache__/utils.cpython-311.pyc CHANGED
Binary files a/__pycache__/utils.cpython-311.pyc and b/__pycache__/utils.cpython-311.pyc differ
 
app.py CHANGED
@@ -21,9 +21,9 @@ class AppError(RuntimeError):
21
 
22
 
23
  APP_URL = "http://127.0.0.1:7860" if os.getenv("DEV") else "https://Kamarov-lotsa-explorer.hf.space"
24
- PAGE_SIZE = 5
25
  MAX_CACHED_BLOBS = PAGE_SIZE * 10
26
- TIME_PLOTS_NUM = 5
27
  _blobs_cache = {}
28
 
29
 
@@ -209,29 +209,37 @@ def get_page(dataset: str, config: str, split: str, page: str) -> Tuple[str, int
209
  with gr.Blocks() as demo:
210
  # 初始化组件
211
  gr.Markdown("A tool for interactive observation of lotsa dataset, extended from lhoestq/datasets-explorer")
212
- cp_dataset = gr.Textbox("Salesforce/lotsa_data", label="Pick a dataset", placeholder="competitions/aiornot")
213
  cp_go = gr.Button("Explore")
214
  cp_config = gr.Dropdown(["plain_text"], value="plain_text", label="Config", visible=False)
215
  cp_split = gr.Dropdown(["train", "validation"], value="train", label="Split", visible=False)
216
- cp_goto_next_page = gr.Button("Next page", visible=False)
217
  cp_error = gr.Markdown("", visible=False)
218
  cp_info = gr.Markdown("", visible=False)
219
  cp_result = gr.Markdown("", visible=False)
220
-
221
  # 初始化Salesforce/lotsa_data数据集展示使用的组件
222
- componets = []
223
- for _ in range(TIME_PLOTS_NUM):
224
- with gr.Row():
225
- with gr.Column(scale=2):
226
- textbox = gr.Textbox("名称或说明")
227
- statistics_textbox = gr.DataFrame()
228
- with gr.Column(scale=3):
229
- plot = gr.Plot()
230
- componets.append({"textbox": textbox, "statistics_textbox": statistics_textbox, "plot": plot})
231
-
232
  with gr.Row():
233
- cp_page = gr.Textbox("1", label="Page", placeholder="1", visible=False)
234
- cp_goto_page = gr.Button("Go to page", visible=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
235
 
236
  def show_error(message: str) -> dict:
237
  return {
@@ -240,7 +248,7 @@ with gr.Blocks() as demo:
240
  cp_result: gr.update(visible=False, value=""),
241
  }
242
 
243
- def show_dataset_at_config_and_split_and_page(dataset: str, config: str, split: str, page: str) -> dict:
244
  try:
245
  ret = {}
246
  if dataset != 'Salesforce/lotsa_data':
@@ -248,21 +256,23 @@ with gr.Blocks() as demo:
248
  ret[cp_result] = gr.update(visible=True, value=markdown_result)
249
  else:
250
  # 对Salesforce/lotsa_data数据集进行特殊处理
251
- df, max_page, info = get_page(dataset, config, split, page)
252
- df = clean_up_df(df)
253
- for i, rows in df.iterrows():
254
- index = rows['item_id']
 
 
 
 
255
  # 将单行的DataFrame展开为新的DataFrame
256
- df_without_index = rows.drop('item_id').to_frame().T
257
  df_expanded = df_without_index.apply(pd.Series.explode).reset_index(drop=True).fillna(0)
258
- df_statistics = create_statistic(df_expanded)
259
- ret.update({
260
- componets[i]["textbox"]: gr.update(value=f"item_id: {index}"),
261
- componets[i]["statistics_textbox"]: gr.update(value=df_statistics),
262
- componets[i]["plot"]: gr.update(value=create_plot(df_expanded))
263
- })
264
  return {
265
- **ret,
 
266
  cp_info: gr.update(visible=True, value=f"Page {page}/{max_page} {info}"),
267
  cp_error: gr.update(visible=False, value="")
268
  }
@@ -274,7 +284,7 @@ with gr.Blocks() as demo:
274
  next_page = str(int(page) + 1)
275
  return {
276
  **show_dataset_at_config_and_split_and_page(dataset, config, split, next_page),
277
- cp_page: gr.update(value=next_page, visible=True),
278
  }
279
  except AppError as err:
280
  return show_error(str(err))
@@ -283,9 +293,10 @@ with gr.Blocks() as demo:
283
  try:
284
  return {
285
  **show_dataset_at_config_and_split_and_page(dataset, config, split, "1"),
286
- cp_page: gr.update(value="1", visible=True),
287
- cp_goto_page: gr.update(visible=True),
288
- cp_goto_next_page: gr.update(visible=True),
 
289
  }
290
  except AppError as err:
291
  return show_error(str(err))
@@ -318,17 +329,24 @@ with gr.Blocks() as demo:
318
  except AppError as err:
319
  return show_error(str(err))
320
 
321
- all_outputs = [cp_config, cp_split, cp_page, cp_goto_page, cp_goto_next_page, cp_result, cp_info, cp_error]
322
- for componet in componets:
323
- all_outputs += list(componet.values())
 
 
 
 
 
324
  cp_go.click(show_dataset, inputs=[cp_dataset], outputs=all_outputs)
325
  cp_config.change(show_dataset_at_config, inputs=[cp_dataset, cp_config], outputs=all_outputs)
326
  cp_split.change(show_dataset_at_config_and_split, inputs=[cp_dataset, cp_config, cp_split], outputs=all_outputs)
327
- cp_goto_page.click(show_dataset_at_config_and_split_and_page, inputs=[cp_dataset, cp_config, cp_split, cp_page], outputs=all_outputs)
328
- cp_goto_next_page.click(show_dataset_at_config_and_split_and_next_page, inputs=[cp_dataset, cp_config, cp_split, cp_page], outputs=all_outputs)
 
 
329
 
330
 
331
  if __name__ == "__main__":
332
 
333
  app = gr.mount_gradio_app(app, demo, path="/")
334
- uvicorn.run(app, host="0.0.0.0", port=7860)
 
21
 
22
 
23
  APP_URL = "http://127.0.0.1:7860" if os.getenv("DEV") else "https://Kamarov-lotsa-explorer.hf.space"
24
+ PAGE_SIZE = 1
25
  MAX_CACHED_BLOBS = PAGE_SIZE * 10
26
+ TIME_PLOTS_NUM = 1
27
  _blobs_cache = {}
28
 
29
 
 
209
  with gr.Blocks() as demo:
210
  # 初始化组件
211
  gr.Markdown("A tool for interactive observation of lotsa dataset, extended from lhoestq/datasets-explorer")
212
+ cp_dataset = gr.Textbox("Salesforce/lotsa_data", label="Pick a dataset", interactive=False)
213
  cp_go = gr.Button("Explore")
214
  cp_config = gr.Dropdown(["plain_text"], value="plain_text", label="Config", visible=False)
215
  cp_split = gr.Dropdown(["train", "validation"], value="train", label="Split", visible=False)
216
+ # cp_goto_next_page = gr.Button("Next page", visible=False)
217
  cp_error = gr.Markdown("", visible=False)
218
  cp_info = gr.Markdown("", visible=False)
219
  cp_result = gr.Markdown("", visible=False)
220
+ tot_samples = 0
221
  # 初始化Salesforce/lotsa_data数据集展示使用的组件
222
+ # componets = []
223
+ # for _ in range(TIME_PLOTS_NUM):
 
 
 
 
 
 
 
 
224
  with gr.Row():
225
+ with gr.Column(scale=3):
226
+ select_box = gr.Dropdown(choices=["items"], label="Select some items", multiselect=True, interactive=True)
227
+ with gr.Column(scale=1):
228
+ select_buttom = gr.Button("Show selected items")
229
+ with gr.Row():
230
+ with gr.Column(scale=2):
231
+ statistics_textbox = gr.DataFrame()
232
+ with gr.Column(scale=3):
233
+ plot = gr.Plot()
234
+ user_input_text = gr.Textbox(placeholder="输入一些内容")
235
+ # componets.append({"select_box": select_box,
236
+ # "statistics_textbox": statistics_textbox,
237
+ # "user_input_text": user_input_text,
238
+ # "plot": plot})
239
+
240
+ # with gr.Row():
241
+ # cp_page = gr.Textbox("1", label="Page", placeholder="1", visible=False)
242
+ # cp_goto_page = gr.Button("Go to page", visible=False)
243
 
244
  def show_error(message: str) -> dict:
245
  return {
 
248
  cp_result: gr.update(visible=False, value=""),
249
  }
250
 
251
+ def show_dataset_at_config_and_split_and_page(dataset: str, config: str, split: str, page: str|List[str]) -> dict:
252
  try:
253
  ret = {}
254
  if dataset != 'Salesforce/lotsa_data':
 
256
  ret[cp_result] = gr.update(visible=True, value=markdown_result)
257
  else:
258
  # 对Salesforce/lotsa_data数据集进行特殊处理
259
+ if type(page) == str:
260
+ page = [page]
261
+ df_list, id_list = [], []
262
+ for i, page in enumerate(page):
263
+ df, max_page, info = get_page(dataset, config, split, page)
264
+ df = clean_up_df(df)
265
+ row = df.iloc[0]
266
+ id_list.append(row['item_id'])
267
  # 将单行的DataFrame展开为新的DataFrame
268
+ df_without_index = row.drop('item_id').to_frame().T
269
  df_expanded = df_without_index.apply(pd.Series.explode).reset_index(drop=True).fillna(0)
270
+ df_list.append(df_expanded)
271
+ global tot_samples
272
+ tot_samples = max_page
 
 
 
273
  return {
274
+ statistics_textbox: gr.update(value=create_statistic(df_list, id_list)),
275
+ plot: gr.update(value=create_plot(df_list, id_list)),
276
  cp_info: gr.update(visible=True, value=f"Page {page}/{max_page} {info}"),
277
  cp_error: gr.update(visible=False, value="")
278
  }
 
284
  next_page = str(int(page) + 1)
285
  return {
286
  **show_dataset_at_config_and_split_and_page(dataset, config, split, next_page),
287
+ # cp_page: gr.update(value=next_page, visible=True),
288
  }
289
  except AppError as err:
290
  return show_error(str(err))
 
293
  try:
294
  return {
295
  **show_dataset_at_config_and_split_and_page(dataset, config, split, "1"),
296
+ select_box: gr.update(choices=[f"{i+1}" for i in range(tot_samples)], value=["1"]),
297
+ # cp_page: gr.update(value="1", visible=True),
298
+ # cp_goto_page: gr.update(visible=True),
299
+ # cp_goto_next_page: gr.update(visible=True),
300
  }
301
  except AppError as err:
302
  return show_error(str(err))
 
329
  except AppError as err:
330
  return show_error(str(err))
331
 
332
+ def save_to_file(user_input):
333
+ with open("user_input.txt", "w") as file:
334
+ file.write(user_input)
335
+
336
+ all_outputs = [cp_config, cp_split,
337
+ # cp_page, cp_goto_page, cp_goto_next_page,
338
+ cp_result, cp_info, cp_error,
339
+ select_box, select_buttom, statistics_textbox, user_input_text, plot]
340
  cp_go.click(show_dataset, inputs=[cp_dataset], outputs=all_outputs)
341
  cp_config.change(show_dataset_at_config, inputs=[cp_dataset, cp_config], outputs=all_outputs)
342
  cp_split.change(show_dataset_at_config_and_split, inputs=[cp_dataset, cp_config, cp_split], outputs=all_outputs)
343
+ # cp_goto_page.click(show_dataset_at_config_and_split_and_page, inputs=[cp_dataset, cp_config, cp_split, cp_page], outputs=all_outputs)
344
+ # cp_goto_next_page.click(show_dataset_at_config_and_split_and_next_page, inputs=[cp_dataset, cp_config, cp_split, cp_page], outputs=all_outputs)
345
+ user_input_text.submit(save_to_file, inputs=user_input_text)
346
+ select_buttom.click(show_dataset_at_config_and_split_and_page, inputs=[cp_dataset, cp_config, cp_split, select_box], outputs=all_outputs)
347
 
348
 
349
  if __name__ == "__main__":
350
 
351
  app = gr.mount_gradio_app(app, demo, path="/")
352
+ uvicorn.run(app, host="127.0.0.1", port=7860)
user_input.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ 1234
utils.py CHANGED
@@ -52,19 +52,20 @@ def flatten_ndarray_column(df, column_name):
52
 
53
  return df
54
 
55
- def create_plot(df):
56
  """
57
- 创建一个包含所有列的线图。
58
  """
59
  fig = go.Figure()
60
- for i, column in enumerate(df.columns[1:]):
61
- fig.add_trace(go.Scatter(
62
- x=df[df.columns[0]],
63
- y=df[column],
64
- mode='lines',
65
- name=column,
66
- visible=True if i == 0 else 'legendonly'
67
- ))
 
68
 
69
  # 配置图例
70
  fig.update_layout(
@@ -81,26 +82,33 @@ def create_plot(df):
81
  )
82
  return fig
83
 
84
- def create_statistic(df):
85
  """
86
- 计算数据集的统计信息。
87
  """
88
- df_values = df.iloc[:, 1:]
89
- # 计算统计值
90
- mean_values = df_values.mean()
91
- std_values = df_values.std()
92
- max_values = df_values.max()
93
- min_values = df_values.min()
94
-
95
- # 将这些统计信息合并成一个新的DataFrame
96
- stats_df = pd.DataFrame({
97
- 'Variables': df_values.columns,
98
- 'mean': mean_values.values,
99
- 'std': std_values.values,
100
- 'max': max_values.values,
101
- 'min': min_values.values
102
- })
103
- return stats_df
 
 
 
 
 
 
 
104
 
105
  def clean_up_df(df: pd.DataFrame) -> pd.DataFrame:
106
  """
@@ -116,4 +124,32 @@ def clean_up_df(df: pd.DataFrame) -> pd.DataFrame:
116
  df.drop(columns=['start', 'freq', 'target'], inplace=True)
117
  if 'past_feat_dynamic_real' in df.columns:
118
  df.drop(columns=['past_feat_dynamic_real'], inplace=True)
119
- return df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
  return df
54
 
55
+ def create_plot(dfs:list[pd.DataFrame], ids:list[str]):
56
  """
57
+ 创建一个包含所有传入 DataFrame 的线图。
58
  """
59
  fig = go.Figure()
60
+ for df, df_id in zip(dfs, ids):
61
+ for i, column in enumerate(df.columns[1:]):
62
+ fig.add_trace(go.Scatter(
63
+ x=df[df.columns[0]],
64
+ y=df[column],
65
+ mode='lines',
66
+ name=f"item_{df_id} - {column}",
67
+ visible=True if i == 0 else 'legendonly'
68
+ ))
69
 
70
  # 配置图例
71
  fig.update_layout(
 
82
  )
83
  return fig
84
 
85
+ def create_statistic(dfs: list[pd.DataFrame], ids: list[str]):
86
  """
87
+ 计算数据集列表的统计信息。
88
  """
89
+ stats_list = []
90
+
91
+ for df, id in zip(dfs, ids):
92
+ df_values = df.iloc[:, 1:]
93
+ # 计算统计值
94
+ mean_values = df_values.mean().round(2)
95
+ std_values = df_values.std().round(2)
96
+ max_values = df_values.max().round(2)
97
+ min_values = df_values.min().round(2)
98
+
99
+ # 将这些统计信息合并成一个新的DataFrame
100
+ stats_df = pd.DataFrame({
101
+ 'Variables': [f"{id}_{col}" for col in df_values.columns],
102
+ 'mean': mean_values.values,
103
+ 'std': std_values.values,
104
+ 'max': max_values.values,
105
+ 'min': min_values.values
106
+ })
107
+ stats_list.append(stats_df)
108
+
109
+ # 合并所有统计信息DataFrame
110
+ combined_stats_df = pd.concat(stats_list, ignore_index=True)
111
+ return combined_stats_df
112
 
113
  def clean_up_df(df: pd.DataFrame) -> pd.DataFrame:
114
  """
 
124
  df.drop(columns=['start', 'freq', 'target'], inplace=True)
125
  if 'past_feat_dynamic_real' in df.columns:
126
  df.drop(columns=['past_feat_dynamic_real'], inplace=True)
127
+ return df
128
+
129
+ if __name__ == '__main__':
130
+
131
+ # 创建测试数据
132
+ data1 = {
133
+ 'Time': ['2023-01-01', '2023-01-02', '2023-01-03'],
134
+ 'Value1': [10, 15, 20],
135
+ 'Value2': [20, 25, 30]
136
+ }
137
+
138
+ data2 = {
139
+ 'Time': ['2023-01-01', '2023-01-02', '2023-01-03'],
140
+ 'Value3': [5, 10, 15],
141
+ 'Value4': [15, 20, 25]
142
+ }
143
+
144
+ df1 = pd.DataFrame(data1)
145
+ df2 = pd.DataFrame(data2)
146
+
147
+ # 转换时间列为日期时间格式
148
+ df1['Time'] = pd.to_datetime(df1['Time'])
149
+ df2['Time'] = pd.to_datetime(df2['Time'])
150
+
151
+ # 创建图表
152
+ fig = create_plot(df1, df2)
153
+
154
+ # 显示图表
155
+ fig.show()