Commit
·
545e114
1
Parent(s):
4983843
add logic to push pipeline code to hub
Browse files
src/synthetic_dataset_generator/apps/eval.py
CHANGED
@@ -19,6 +19,7 @@ from huggingface_hub import HfApi, repo_exists
|
|
19 |
|
20 |
from synthetic_dataset_generator.apps.base import (
|
21 |
hide_success_message,
|
|
|
22 |
show_success_message,
|
23 |
validate_argilla_user_workspace_dataset,
|
24 |
validate_push_to_hub,
|
@@ -346,7 +347,12 @@ def evaluate_sample_dataset(
|
|
346 |
|
347 |
|
348 |
def push_dataset_to_hub(
|
349 |
-
dataframe: pd.DataFrame,
|
|
|
|
|
|
|
|
|
|
|
350 |
):
|
351 |
repo_id = validate_push_to_hub(org_name, repo_name)
|
352 |
distiset = Distiset({"default": Dataset.from_pandas(dataframe)})
|
@@ -357,6 +363,7 @@ def push_dataset_to_hub(
|
|
357 |
token=oauth_token.token,
|
358 |
create_pr=False,
|
359 |
)
|
|
|
360 |
|
361 |
|
362 |
def push_dataset(
|
@@ -371,6 +378,7 @@ def push_dataset(
|
|
371 |
response_instruction_response: str,
|
372 |
prompt_template: str,
|
373 |
structured_output: dict,
|
|
|
374 |
oauth_token: Union[gr.OAuthToken, None] = None,
|
375 |
progress=gr.Progress(),
|
376 |
) -> pd.DataFrame:
|
@@ -385,7 +393,9 @@ def push_dataset(
|
|
385 |
structured_output=structured_output,
|
386 |
num_rows=num_rows,
|
387 |
)
|
388 |
-
push_dataset_to_hub(
|
|
|
|
|
389 |
try:
|
390 |
progress(0.1, desc="Setting up user and workspace")
|
391 |
hf_user = HfApi().whoami(token=oauth_token.token)["name"]
|
@@ -854,6 +864,7 @@ with gr.Blocks() as app:
|
|
854 |
response_instruction_response,
|
855 |
prompt_template,
|
856 |
structured_output,
|
|
|
857 |
],
|
858 |
outputs=[success_message],
|
859 |
show_progress=True,
|
|
|
19 |
|
20 |
from synthetic_dataset_generator.apps.base import (
|
21 |
hide_success_message,
|
22 |
+
push_pipeline_code_to_hub,
|
23 |
show_success_message,
|
24 |
validate_argilla_user_workspace_dataset,
|
25 |
validate_push_to_hub,
|
|
|
347 |
|
348 |
|
349 |
def push_dataset_to_hub(
|
350 |
+
dataframe: pd.DataFrame,
|
351 |
+
org_name: str,
|
352 |
+
repo_name: str,
|
353 |
+
oauth_token,
|
354 |
+
private: bool,
|
355 |
+
pipeline_code: str,
|
356 |
):
|
357 |
repo_id = validate_push_to_hub(org_name, repo_name)
|
358 |
distiset = Distiset({"default": Dataset.from_pandas(dataframe)})
|
|
|
363 |
token=oauth_token.token,
|
364 |
create_pr=False,
|
365 |
)
|
366 |
+
push_pipeline_code_to_hub(pipeline_code, org_name, repo_name, oauth_token.token)
|
367 |
|
368 |
|
369 |
def push_dataset(
|
|
|
378 |
response_instruction_response: str,
|
379 |
prompt_template: str,
|
380 |
structured_output: dict,
|
381 |
+
pipeline_code: str,
|
382 |
oauth_token: Union[gr.OAuthToken, None] = None,
|
383 |
progress=gr.Progress(),
|
384 |
) -> pd.DataFrame:
|
|
|
393 |
structured_output=structured_output,
|
394 |
num_rows=num_rows,
|
395 |
)
|
396 |
+
push_dataset_to_hub(
|
397 |
+
dataframe, org_name, repo_name, oauth_token, private, pipeline_code
|
398 |
+
)
|
399 |
try:
|
400 |
progress(0.1, desc="Setting up user and workspace")
|
401 |
hf_user = HfApi().whoami(token=oauth_token.token)["name"]
|
|
|
864 |
response_instruction_response,
|
865 |
prompt_template,
|
866 |
structured_output,
|
867 |
+
pipeline_code,
|
868 |
],
|
869 |
outputs=[success_message],
|
870 |
show_progress=True,
|
src/synthetic_dataset_generator/apps/sft.py
CHANGED
@@ -11,6 +11,7 @@ from huggingface_hub import HfApi
|
|
11 |
|
12 |
from synthetic_dataset_generator.apps.base import (
|
13 |
hide_success_message,
|
|
|
14 |
show_success_message,
|
15 |
validate_argilla_user_workspace_dataset,
|
16 |
validate_push_to_hub,
|
@@ -202,7 +203,14 @@ def generate_dataset(
|
|
202 |
return dataframe
|
203 |
|
204 |
|
205 |
-
def push_dataset_to_hub(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
206 |
repo_id = validate_push_to_hub(org_name, repo_name)
|
207 |
original_dataframe = dataframe.copy(deep=True)
|
208 |
dataframe = convert_dataframe_messages(dataframe)
|
@@ -214,6 +222,7 @@ def push_dataset_to_hub(dataframe, org_name, repo_name, oauth_token, private):
|
|
214 |
token=oauth_token.token,
|
215 |
create_pr=False,
|
216 |
)
|
|
|
217 |
return original_dataframe
|
218 |
|
219 |
|
@@ -225,6 +234,7 @@ def push_dataset(
|
|
225 |
num_rows: int = 10,
|
226 |
private: bool = False,
|
227 |
temperature: float = 0.9,
|
|
|
228 |
oauth_token: Union[gr.OAuthToken, None] = None,
|
229 |
progress=gr.Progress(),
|
230 |
) -> pd.DataFrame:
|
@@ -234,7 +244,9 @@ def push_dataset(
|
|
234 |
num_rows=num_rows,
|
235 |
temperature=temperature,
|
236 |
)
|
237 |
-
push_dataset_to_hub(
|
|
|
|
|
238 |
try:
|
239 |
progress(0.1, desc="Setting up user and workspace")
|
240 |
hf_user = HfApi().whoami(token=oauth_token.token)["name"]
|
@@ -528,6 +540,7 @@ with gr.Blocks() as app:
|
|
528 |
num_rows,
|
529 |
private,
|
530 |
temperature,
|
|
|
531 |
],
|
532 |
outputs=[success_message],
|
533 |
show_progress=True,
|
|
|
11 |
|
12 |
from synthetic_dataset_generator.apps.base import (
|
13 |
hide_success_message,
|
14 |
+
push_pipeline_code_to_hub,
|
15 |
show_success_message,
|
16 |
validate_argilla_user_workspace_dataset,
|
17 |
validate_push_to_hub,
|
|
|
203 |
return dataframe
|
204 |
|
205 |
|
206 |
+
def push_dataset_to_hub(
|
207 |
+
dataframe: pd.DataFrame,
|
208 |
+
org_name: str,
|
209 |
+
repo_name: str,
|
210 |
+
oauth_token: gr.OAuthToken,
|
211 |
+
private: bool,
|
212 |
+
pipeline_code: str,
|
213 |
+
):
|
214 |
repo_id = validate_push_to_hub(org_name, repo_name)
|
215 |
original_dataframe = dataframe.copy(deep=True)
|
216 |
dataframe = convert_dataframe_messages(dataframe)
|
|
|
222 |
token=oauth_token.token,
|
223 |
create_pr=False,
|
224 |
)
|
225 |
+
push_pipeline_code_to_hub(pipeline_code, org_name, repo_name, oauth_token.token)
|
226 |
return original_dataframe
|
227 |
|
228 |
|
|
|
234 |
num_rows: int = 10,
|
235 |
private: bool = False,
|
236 |
temperature: float = 0.9,
|
237 |
+
pipeline_code: str = "",
|
238 |
oauth_token: Union[gr.OAuthToken, None] = None,
|
239 |
progress=gr.Progress(),
|
240 |
) -> pd.DataFrame:
|
|
|
244 |
num_rows=num_rows,
|
245 |
temperature=temperature,
|
246 |
)
|
247 |
+
push_dataset_to_hub(
|
248 |
+
dataframe, org_name, repo_name, oauth_token, private, pipeline_code
|
249 |
+
)
|
250 |
try:
|
251 |
progress(0.1, desc="Setting up user and workspace")
|
252 |
hf_user = HfApi().whoami(token=oauth_token.token)["name"]
|
|
|
540 |
num_rows,
|
541 |
private,
|
542 |
temperature,
|
543 |
+
pipeline_code,
|
544 |
],
|
545 |
outputs=[success_message],
|
546 |
show_progress=True,
|
src/synthetic_dataset_generator/apps/textcat.py
CHANGED
@@ -11,6 +11,7 @@ from huggingface_hub import HfApi
|
|
11 |
|
12 |
from src.synthetic_dataset_generator.apps.base import (
|
13 |
hide_success_message,
|
|
|
14 |
show_success_message,
|
15 |
validate_argilla_user_workspace_dataset,
|
16 |
validate_push_to_hub,
|
@@ -172,6 +173,7 @@ def push_dataset_to_hub(
|
|
172 |
labels: List[str] = None,
|
173 |
oauth_token: Union[gr.OAuthToken, None] = None,
|
174 |
private: bool = False,
|
|
|
175 |
):
|
176 |
repo_id = validate_push_to_hub(org_name, repo_name)
|
177 |
labels = get_preprocess_labels(labels)
|
@@ -195,6 +197,7 @@ def push_dataset_to_hub(
|
|
195 |
token=oauth_token.token,
|
196 |
create_pr=False,
|
197 |
)
|
|
|
198 |
|
199 |
|
200 |
def push_dataset(
|
@@ -208,6 +211,7 @@ def push_dataset(
|
|
208 |
labels: List[str] = None,
|
209 |
private: bool = False,
|
210 |
temperature: float = 0.8,
|
|
|
211 |
oauth_token: Union[gr.OAuthToken, None] = None,
|
212 |
progress=gr.Progress(),
|
213 |
) -> pd.DataFrame:
|
@@ -221,7 +225,14 @@ def push_dataset(
|
|
221 |
temperature=temperature,
|
222 |
)
|
223 |
push_dataset_to_hub(
|
224 |
-
dataframe,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
225 |
)
|
226 |
|
227 |
dataframe = dataframe[
|
@@ -544,6 +555,7 @@ with gr.Blocks() as app:
|
|
544 |
labels,
|
545 |
private,
|
546 |
temperature,
|
|
|
547 |
],
|
548 |
outputs=[success_message],
|
549 |
show_progress=True,
|
|
|
11 |
|
12 |
from src.synthetic_dataset_generator.apps.base import (
|
13 |
hide_success_message,
|
14 |
+
push_pipeline_code_to_hub,
|
15 |
show_success_message,
|
16 |
validate_argilla_user_workspace_dataset,
|
17 |
validate_push_to_hub,
|
|
|
173 |
labels: List[str] = None,
|
174 |
oauth_token: Union[gr.OAuthToken, None] = None,
|
175 |
private: bool = False,
|
176 |
+
pipeline_code: str = "",
|
177 |
):
|
178 |
repo_id = validate_push_to_hub(org_name, repo_name)
|
179 |
labels = get_preprocess_labels(labels)
|
|
|
197 |
token=oauth_token.token,
|
198 |
create_pr=False,
|
199 |
)
|
200 |
+
push_pipeline_code_to_hub(pipeline_code, org_name, repo_name, oauth_token.token)
|
201 |
|
202 |
|
203 |
def push_dataset(
|
|
|
211 |
labels: List[str] = None,
|
212 |
private: bool = False,
|
213 |
temperature: float = 0.8,
|
214 |
+
pipeline_code: str = "",
|
215 |
oauth_token: Union[gr.OAuthToken, None] = None,
|
216 |
progress=gr.Progress(),
|
217 |
) -> pd.DataFrame:
|
|
|
225 |
temperature=temperature,
|
226 |
)
|
227 |
push_dataset_to_hub(
|
228 |
+
dataframe,
|
229 |
+
org_name,
|
230 |
+
repo_name,
|
231 |
+
num_labels,
|
232 |
+
labels,
|
233 |
+
oauth_token,
|
234 |
+
private,
|
235 |
+
pipeline_code,
|
236 |
)
|
237 |
|
238 |
dataframe = dataframe[
|
|
|
555 |
labels,
|
556 |
private,
|
557 |
temperature,
|
558 |
+
pipeline_code,
|
559 |
],
|
560 |
outputs=[success_message],
|
561 |
show_progress=True,
|
src/synthetic_dataset_generator/utils.py
CHANGED
@@ -39,14 +39,13 @@ def list_orgs(oauth_token: Union[OAuthToken, None] = None):
|
|
39 |
organizations = [org for org in organizations if org != data["name"]]
|
40 |
organizations = [data["name"]] + organizations
|
41 |
except Exception as e:
|
42 |
-
data = whoami(oauth_token.token)
|
43 |
warnings.warn(str(e))
|
44 |
gr.Info(
|
45 |
"Your user token does not have the necessary permissions to push to organizations."
|
46 |
"Please check your OAuth permissions in https://huggingface.co/settings/connected-applications."
|
47 |
"Update yout token permissions to include repo.write: https://huggingface.co/settings/tokens."
|
48 |
)
|
49 |
-
return [
|
50 |
|
51 |
return organizations
|
52 |
|
|
|
39 |
organizations = [org for org in organizations if org != data["name"]]
|
40 |
organizations = [data["name"]] + organizations
|
41 |
except Exception as e:
|
|
|
42 |
warnings.warn(str(e))
|
43 |
gr.Info(
|
44 |
"Your user token does not have the necessary permissions to push to organizations."
|
45 |
"Please check your OAuth permissions in https://huggingface.co/settings/connected-applications."
|
46 |
"Update yout token permissions to include repo.write: https://huggingface.co/settings/tokens."
|
47 |
)
|
48 |
+
return []
|
49 |
|
50 |
return organizations
|
51 |
|