davidberenstein1957 HF staff commited on
Commit
b06a781
·
1 Parent(s): bf5627a

add labelling

Browse files
src/synthetic_dataset_generator/apps/base.py CHANGED
@@ -79,8 +79,10 @@ def validate_push_to_hub(org_name, repo_name):
79
 
80
  def combine_datasets(repo_id: str, dataset: Dataset) -> Dataset:
81
  try:
82
- dataset = load_dataset(repo_id, split="train")
83
- return concatenate_datasets([dataset, dataset])
 
 
84
  except Exception:
85
  return dataset
86
 
 
79
 
80
  def combine_datasets(repo_id: str, dataset: Dataset) -> Dataset:
81
  try:
82
+ new_dataset = load_dataset(
83
+ repo_id, split="train", download_mode="force_redownload"
84
+ )
85
+ return concatenate_datasets([dataset, new_dataset])
86
  except Exception:
87
  return dataset
88
 
src/synthetic_dataset_generator/apps/textcat.py CHANGED
@@ -105,7 +105,7 @@ def generate_dataset(
105
  is_sample=is_sample,
106
  )
107
  labeller_generator = get_labeller_generator(
108
- system_prompt=f"{system_prompt}. Optional labels: {', '.join(labels)}. Only select actually relevant labels.",
109
  labels=labels,
110
  multi_label=multi_label,
111
  )
 
105
  is_sample=is_sample,
106
  )
107
  labeller_generator = get_labeller_generator(
108
+ system_prompt=f"{system_prompt}. Optional labels: {', '.join(labels)}. Only apply relevant labels. Applying less labels is better than applying too many labels.",
109
  labels=labels,
110
  multi_label=multi_label,
111
  )