Commit
·
b06a781
1
Parent(s):
bf5627a
add labelling
Browse files
src/synthetic_dataset_generator/apps/base.py
CHANGED
@@ -79,8 +79,10 @@ def validate_push_to_hub(org_name, repo_name):
|
|
79 |
|
80 |
def combine_datasets(repo_id: str, dataset: Dataset) -> Dataset:
|
81 |
try:
|
82 |
-
|
83 |
-
|
|
|
|
|
84 |
except Exception:
|
85 |
return dataset
|
86 |
|
|
|
79 |
|
80 |
def combine_datasets(repo_id: str, dataset: Dataset) -> Dataset:
|
81 |
try:
|
82 |
+
new_dataset = load_dataset(
|
83 |
+
repo_id, split="train", download_mode="force_redownload"
|
84 |
+
)
|
85 |
+
return concatenate_datasets([dataset, new_dataset])
|
86 |
except Exception:
|
87 |
return dataset
|
88 |
|
src/synthetic_dataset_generator/apps/textcat.py
CHANGED
@@ -105,7 +105,7 @@ def generate_dataset(
|
|
105 |
is_sample=is_sample,
|
106 |
)
|
107 |
labeller_generator = get_labeller_generator(
|
108 |
-
system_prompt=f"{system_prompt}. Optional labels: {', '.join(labels)}. Only
|
109 |
labels=labels,
|
110 |
multi_label=multi_label,
|
111 |
)
|
|
|
105 |
is_sample=is_sample,
|
106 |
)
|
107 |
labeller_generator = get_labeller_generator(
|
108 |
+
system_prompt=f"{system_prompt}. Optional labels: {', '.join(labels)}. Only apply relevant labels. Applying less labels is better than applying too many labels.",
|
109 |
labels=labels,
|
110 |
multi_label=multi_label,
|
111 |
)
|