davidberenstein1957 HF staff commited on
Commit
545e114
·
1 Parent(s): 4983843

add logic to push pipeline code to hub

Browse files
src/synthetic_dataset_generator/apps/eval.py CHANGED
@@ -19,6 +19,7 @@ from huggingface_hub import HfApi, repo_exists
19
 
20
  from synthetic_dataset_generator.apps.base import (
21
  hide_success_message,
 
22
  show_success_message,
23
  validate_argilla_user_workspace_dataset,
24
  validate_push_to_hub,
@@ -346,7 +347,12 @@ def evaluate_sample_dataset(
346
 
347
 
348
  def push_dataset_to_hub(
349
- dataframe: pd.DataFrame, org_name: str, repo_name: str, oauth_token, private
 
 
 
 
 
350
  ):
351
  repo_id = validate_push_to_hub(org_name, repo_name)
352
  distiset = Distiset({"default": Dataset.from_pandas(dataframe)})
@@ -357,6 +363,7 @@ def push_dataset_to_hub(
357
  token=oauth_token.token,
358
  create_pr=False,
359
  )
 
360
 
361
 
362
  def push_dataset(
@@ -371,6 +378,7 @@ def push_dataset(
371
  response_instruction_response: str,
372
  prompt_template: str,
373
  structured_output: dict,
 
374
  oauth_token: Union[gr.OAuthToken, None] = None,
375
  progress=gr.Progress(),
376
  ) -> pd.DataFrame:
@@ -385,7 +393,9 @@ def push_dataset(
385
  structured_output=structured_output,
386
  num_rows=num_rows,
387
  )
388
- push_dataset_to_hub(dataframe, org_name, repo_name, oauth_token, private)
 
 
389
  try:
390
  progress(0.1, desc="Setting up user and workspace")
391
  hf_user = HfApi().whoami(token=oauth_token.token)["name"]
@@ -854,6 +864,7 @@ with gr.Blocks() as app:
854
  response_instruction_response,
855
  prompt_template,
856
  structured_output,
 
857
  ],
858
  outputs=[success_message],
859
  show_progress=True,
 
19
 
20
  from synthetic_dataset_generator.apps.base import (
21
  hide_success_message,
22
+ push_pipeline_code_to_hub,
23
  show_success_message,
24
  validate_argilla_user_workspace_dataset,
25
  validate_push_to_hub,
 
347
 
348
 
349
  def push_dataset_to_hub(
350
+ dataframe: pd.DataFrame,
351
+ org_name: str,
352
+ repo_name: str,
353
+ oauth_token,
354
+ private: bool,
355
+ pipeline_code: str,
356
  ):
357
  repo_id = validate_push_to_hub(org_name, repo_name)
358
  distiset = Distiset({"default": Dataset.from_pandas(dataframe)})
 
363
  token=oauth_token.token,
364
  create_pr=False,
365
  )
366
+ push_pipeline_code_to_hub(pipeline_code, org_name, repo_name, oauth_token.token)
367
 
368
 
369
  def push_dataset(
 
378
  response_instruction_response: str,
379
  prompt_template: str,
380
  structured_output: dict,
381
+ pipeline_code: str,
382
  oauth_token: Union[gr.OAuthToken, None] = None,
383
  progress=gr.Progress(),
384
  ) -> pd.DataFrame:
 
393
  structured_output=structured_output,
394
  num_rows=num_rows,
395
  )
396
+ push_dataset_to_hub(
397
+ dataframe, org_name, repo_name, oauth_token, private, pipeline_code
398
+ )
399
  try:
400
  progress(0.1, desc="Setting up user and workspace")
401
  hf_user = HfApi().whoami(token=oauth_token.token)["name"]
 
864
  response_instruction_response,
865
  prompt_template,
866
  structured_output,
867
+ pipeline_code,
868
  ],
869
  outputs=[success_message],
870
  show_progress=True,
src/synthetic_dataset_generator/apps/sft.py CHANGED
@@ -11,6 +11,7 @@ from huggingface_hub import HfApi
11
 
12
  from synthetic_dataset_generator.apps.base import (
13
  hide_success_message,
 
14
  show_success_message,
15
  validate_argilla_user_workspace_dataset,
16
  validate_push_to_hub,
@@ -202,7 +203,14 @@ def generate_dataset(
202
  return dataframe
203
 
204
 
205
- def push_dataset_to_hub(dataframe, org_name, repo_name, oauth_token, private):
 
 
 
 
 
 
 
206
  repo_id = validate_push_to_hub(org_name, repo_name)
207
  original_dataframe = dataframe.copy(deep=True)
208
  dataframe = convert_dataframe_messages(dataframe)
@@ -214,6 +222,7 @@ def push_dataset_to_hub(dataframe, org_name, repo_name, oauth_token, private):
214
  token=oauth_token.token,
215
  create_pr=False,
216
  )
 
217
  return original_dataframe
218
 
219
 
@@ -225,6 +234,7 @@ def push_dataset(
225
  num_rows: int = 10,
226
  private: bool = False,
227
  temperature: float = 0.9,
 
228
  oauth_token: Union[gr.OAuthToken, None] = None,
229
  progress=gr.Progress(),
230
  ) -> pd.DataFrame:
@@ -234,7 +244,9 @@ def push_dataset(
234
  num_rows=num_rows,
235
  temperature=temperature,
236
  )
237
- push_dataset_to_hub(dataframe, org_name, repo_name, oauth_token, private)
 
 
238
  try:
239
  progress(0.1, desc="Setting up user and workspace")
240
  hf_user = HfApi().whoami(token=oauth_token.token)["name"]
@@ -528,6 +540,7 @@ with gr.Blocks() as app:
528
  num_rows,
529
  private,
530
  temperature,
 
531
  ],
532
  outputs=[success_message],
533
  show_progress=True,
 
11
 
12
  from synthetic_dataset_generator.apps.base import (
13
  hide_success_message,
14
+ push_pipeline_code_to_hub,
15
  show_success_message,
16
  validate_argilla_user_workspace_dataset,
17
  validate_push_to_hub,
 
203
  return dataframe
204
 
205
 
206
+ def push_dataset_to_hub(
207
+ dataframe: pd.DataFrame,
208
+ org_name: str,
209
+ repo_name: str,
210
+ oauth_token: gr.OAuthToken,
211
+ private: bool,
212
+ pipeline_code: str,
213
+ ):
214
  repo_id = validate_push_to_hub(org_name, repo_name)
215
  original_dataframe = dataframe.copy(deep=True)
216
  dataframe = convert_dataframe_messages(dataframe)
 
222
  token=oauth_token.token,
223
  create_pr=False,
224
  )
225
+ push_pipeline_code_to_hub(pipeline_code, org_name, repo_name, oauth_token.token)
226
  return original_dataframe
227
 
228
 
 
234
  num_rows: int = 10,
235
  private: bool = False,
236
  temperature: float = 0.9,
237
+ pipeline_code: str = "",
238
  oauth_token: Union[gr.OAuthToken, None] = None,
239
  progress=gr.Progress(),
240
  ) -> pd.DataFrame:
 
244
  num_rows=num_rows,
245
  temperature=temperature,
246
  )
247
+ push_dataset_to_hub(
248
+ dataframe, org_name, repo_name, oauth_token, private, pipeline_code
249
+ )
250
  try:
251
  progress(0.1, desc="Setting up user and workspace")
252
  hf_user = HfApi().whoami(token=oauth_token.token)["name"]
 
540
  num_rows,
541
  private,
542
  temperature,
543
+ pipeline_code,
544
  ],
545
  outputs=[success_message],
546
  show_progress=True,
src/synthetic_dataset_generator/apps/textcat.py CHANGED
@@ -11,6 +11,7 @@ from huggingface_hub import HfApi
11
 
12
  from src.synthetic_dataset_generator.apps.base import (
13
  hide_success_message,
 
14
  show_success_message,
15
  validate_argilla_user_workspace_dataset,
16
  validate_push_to_hub,
@@ -172,6 +173,7 @@ def push_dataset_to_hub(
172
  labels: List[str] = None,
173
  oauth_token: Union[gr.OAuthToken, None] = None,
174
  private: bool = False,
 
175
  ):
176
  repo_id = validate_push_to_hub(org_name, repo_name)
177
  labels = get_preprocess_labels(labels)
@@ -195,6 +197,7 @@ def push_dataset_to_hub(
195
  token=oauth_token.token,
196
  create_pr=False,
197
  )
 
198
 
199
 
200
  def push_dataset(
@@ -208,6 +211,7 @@ def push_dataset(
208
  labels: List[str] = None,
209
  private: bool = False,
210
  temperature: float = 0.8,
 
211
  oauth_token: Union[gr.OAuthToken, None] = None,
212
  progress=gr.Progress(),
213
  ) -> pd.DataFrame:
@@ -221,7 +225,14 @@ def push_dataset(
221
  temperature=temperature,
222
  )
223
  push_dataset_to_hub(
224
- dataframe, org_name, repo_name, num_labels, labels, oauth_token, private
 
 
 
 
 
 
 
225
  )
226
 
227
  dataframe = dataframe[
@@ -544,6 +555,7 @@ with gr.Blocks() as app:
544
  labels,
545
  private,
546
  temperature,
 
547
  ],
548
  outputs=[success_message],
549
  show_progress=True,
 
11
 
12
  from src.synthetic_dataset_generator.apps.base import (
13
  hide_success_message,
14
+ push_pipeline_code_to_hub,
15
  show_success_message,
16
  validate_argilla_user_workspace_dataset,
17
  validate_push_to_hub,
 
173
  labels: List[str] = None,
174
  oauth_token: Union[gr.OAuthToken, None] = None,
175
  private: bool = False,
176
+ pipeline_code: str = "",
177
  ):
178
  repo_id = validate_push_to_hub(org_name, repo_name)
179
  labels = get_preprocess_labels(labels)
 
197
  token=oauth_token.token,
198
  create_pr=False,
199
  )
200
+ push_pipeline_code_to_hub(pipeline_code, org_name, repo_name, oauth_token.token)
201
 
202
 
203
  def push_dataset(
 
211
  labels: List[str] = None,
212
  private: bool = False,
213
  temperature: float = 0.8,
214
+ pipeline_code: str = "",
215
  oauth_token: Union[gr.OAuthToken, None] = None,
216
  progress=gr.Progress(),
217
  ) -> pd.DataFrame:
 
225
  temperature=temperature,
226
  )
227
  push_dataset_to_hub(
228
+ dataframe,
229
+ org_name,
230
+ repo_name,
231
+ num_labels,
232
+ labels,
233
+ oauth_token,
234
+ private,
235
+ pipeline_code,
236
  )
237
 
238
  dataframe = dataframe[
 
555
  labels,
556
  private,
557
  temperature,
558
+ pipeline_code,
559
  ],
560
  outputs=[success_message],
561
  show_progress=True,
src/synthetic_dataset_generator/utils.py CHANGED
@@ -39,14 +39,13 @@ def list_orgs(oauth_token: Union[OAuthToken, None] = None):
39
  organizations = [org for org in organizations if org != data["name"]]
40
  organizations = [data["name"]] + organizations
41
  except Exception as e:
42
- data = whoami(oauth_token.token)
43
  warnings.warn(str(e))
44
  gr.Info(
45
  "Your user token does not have the necessary permissions to push to organizations."
46
  "Please check your OAuth permissions in https://huggingface.co/settings/connected-applications."
47
  "Update yout token permissions to include repo.write: https://huggingface.co/settings/tokens."
48
  )
49
- return [data["name"]]
50
 
51
  return organizations
52
 
 
39
  organizations = [org for org in organizations if org != data["name"]]
40
  organizations = [data["name"]] + organizations
41
  except Exception as e:
 
42
  warnings.warn(str(e))
43
  gr.Info(
44
  "Your user token does not have the necessary permissions to push to organizations."
45
  "Please check your OAuth permissions in https://huggingface.co/settings/connected-applications."
46
  "Update yout token permissions to include repo.write: https://huggingface.co/settings/tokens."
47
  )
48
+ return []
49
 
50
  return organizations
51