Richard Guo commited on
Commit
de5bc26
·
1 Parent(s): ae7183e

fixed indexable field via cast

Browse files
Files changed (1) hide show
  1. build_map.py +6 -7
build_map.py CHANGED
@@ -143,16 +143,14 @@ def upload_dataset_to_atlas(dataset_dict,
143
 
144
 
145
  # return longest string field from 5 samples
 
146
  if indexed_field is None:
147
  longest_length = 0
148
  for field in string_fields:
149
  length = 0
150
- for i in range(len(dataset_dict["head"])):
151
- ex = dataset_dict["head"].take([i])
152
- print(ex)
153
- if ex[field]:
154
- print(ex[field])
155
- length += len(ex[field].split())
156
  if length > longest_length:
157
  longest_length = length
158
  indexed_field = field
@@ -262,4 +260,5 @@ if __name__ == "__main__":
262
  project_name = "huggingface_auto_upload_test-dolly-15k"
263
 
264
  dataset_dict = load_dataset_and_metadata(dataset_name)
265
- print(upload_dataset_to_atlas(dataset_dict, project_name=project_name))
 
 
143
 
144
 
145
  # return longest string field from 5 samples
146
+ head = dataset_dict["head"]
147
  if indexed_field is None:
148
  longest_length = 0
149
  for field in string_fields:
150
  length = 0
151
+ for i in range(len(head)):
152
+ if head[field][i]:
153
+ length += len(str(head[field][i]).split())
 
 
 
154
  if length > longest_length:
155
  longest_length = length
156
  indexed_field = field
 
260
  project_name = "huggingface_auto_upload_test-dolly-15k"
261
 
262
  dataset_dict = load_dataset_and_metadata(dataset_name)
263
+ api_token = "ODdPKqJHYci4Gq4jnCC5-VR0L-rnIdfIy-6djgC4CTPCJ"
264
+ print(upload_dataset_to_atlas(dataset_dict, api_token, project_name=project_name))