# Copyright 2021-present, the Recognai S.L. team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import sys import time import argilla as rg import pandas as pd import requests from argilla.labeling.text_classification import Rule, add_rules from datasets import load_dataset class LoadDatasets: def __init__(self, api_key, workspace="team"): rg.init(api_key=api_key, workspace=workspace) @staticmethod def load_somos(): print("Loading somos dataset") # Leer el dataset del Hub dataset = load_dataset("somosnlp/somos-alpaca-es", split="train") dataset = dataset.remove_columns("metrics") # si falla se puede comentar esta linea records = rg.DatasetForTextClassification.from_datasets(dataset) # Log the dataset rg.log( records, name="somos-alpaca-es", tags={"description": "SomosNLP Hackathon dataset"}, ) settings = rg.TextClassificationSettings( label_schema=["BAD INSTRUCTION", "BAD INPUT", "BAD OUTPUT", "INAPPROPRIATE", "BIASED", "ALL GOOD"] ) rg.configure_dataset(name="somos-alpaca-es", settings=settings, workspace="team") if __name__ == "__main__": API_KEY = sys.argv[1] LOAD_DATASETS = sys.argv[2] if LOAD_DATASETS.lower() == "none": print("No datasets being loaded") else: while True: try: response = requests.get("http://0.0.0.0:6900/") if response.status_code == 200: ld = LoadDatasets(API_KEY) ld.load_somos() break except requests.exceptions.ConnectionError: pass except Exception as e: print(e) time.sleep(10) pass time.sleep(5)