polinaeterna HF staff commited on
Commit
8782f16
·
verified ·
1 Parent(s): a1fafd0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -1
app.py CHANGED
@@ -19,7 +19,7 @@ from transformers import AutoModel, AutoTokenizer, AutoConfig
19
  from tqdm import tqdm
20
 
21
 
22
- logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %(message)s")
23
 
24
 
25
  session = requests.Session()
@@ -90,6 +90,7 @@ def run_quality_check(dataset, column, batch_size, num_examples):
90
  config = "default" if "default" in info_resp["dataset_info"] else next(iter(info_resp["dataset_info"]))
91
  split = "train" if "train" in info_resp["dataset_info"][config]["splits"] else next(
92
  iter(info_resp["dataset_info"][config]["splits"]))
 
93
  try:
94
  data = pl.read_parquet(f"hf://datasets/{dataset}@~parquet/{config}/{split}/0000.parquet", columns=[column])
95
  except pl.exceptions.ComputeError:
@@ -101,6 +102,7 @@ def run_quality_check(dataset, column, batch_size, num_examples):
101
  except Exception as error:
102
  yield f"❌ {error}", gr.BarPlot(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame(),
103
  return
 
104
 
105
  texts = [text[:10000] for text in data[column].to_list()]
106
  # texts_sample = data.sample(100, shuffle=True, seed=16).to_pandas()
 
19
  from tqdm import tqdm
20
 
21
 
22
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
23
 
24
 
25
  session = requests.Session()
 
90
  config = "default" if "default" in info_resp["dataset_info"] else next(iter(info_resp["dataset_info"]))
91
  split = "train" if "train" in info_resp["dataset_info"][config]["splits"] else next(
92
  iter(info_resp["dataset_info"][config]["splits"]))
93
+ logging.info(f"Fetching data for {dataset} {config} {split}")
94
  try:
95
  data = pl.read_parquet(f"hf://datasets/{dataset}@~parquet/{config}/{split}/0000.parquet", columns=[column])
96
  except pl.exceptions.ComputeError:
 
102
  except Exception as error:
103
  yield f"❌ {error}", gr.BarPlot(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame(),
104
  return
105
+ logging.info("Data fetched.")
106
 
107
  texts = [text[:10000] for text in data[column].to_list()]
108
  # texts_sample = data.sample(100, shuffle=True, seed=16).to_pandas()