Update tasks/text.py
Browse files- tasks/text.py +12 -11
tasks/text.py
CHANGED
@@ -45,24 +45,14 @@ async def evaluate_text(request: TextEvaluationRequest):
|
|
45 |
}
|
46 |
|
47 |
# Load and prepare the dataset
|
48 |
-
dataset = load_dataset(request.dataset_name)
|
49 |
|
50 |
# Convert string labels to integers
|
51 |
dataset = dataset.map(lambda x: {"label": LABEL_MAPPING[x["label"]]})
|
52 |
|
53 |
# Split dataset
|
54 |
-
train_test = dataset["train"]
|
55 |
test_dataset = dataset["test"]
|
56 |
-
|
57 |
-
# Start tracking emissions
|
58 |
-
tracker.start()
|
59 |
-
tracker.start_task("inference")
|
60 |
|
61 |
-
#--------------------------------------------------------------------------------------------
|
62 |
-
# YOUR MODEL INFERENCE CODE HERE
|
63 |
-
# Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.
|
64 |
-
#--------------------------------------------------------------------------------------------
|
65 |
-
|
66 |
# Make random predictions (placeholder for actual model inference)
|
67 |
true_labels = test_dataset["label"]
|
68 |
# predictions = [random.randint(0, 7) for _ in range(len(true_labels))]
|
@@ -78,6 +68,17 @@ async def evaluate_text(request: TextEvaluationRequest):
|
|
78 |
model = AutoModelForSequenceClassification.from_pretrained(path_model).half().to(device) # Model en half precision sur GPU
|
79 |
tokenizer = AutoTokenizer.from_pretrained(path_tokenizer)
|
80 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
81 |
# Fonction de préprocessing
|
82 |
def preprocess_function(df):
|
83 |
tokenized = tokenizer(df["quote"], truncation=True) # Removed padding here
|
|
|
45 |
}
|
46 |
|
47 |
# Load and prepare the dataset
|
48 |
+
dataset = load_dataset(request.dataset_name, token=os.getenv("HF_TOKEN"))
|
49 |
|
50 |
# Convert string labels to integers
|
51 |
dataset = dataset.map(lambda x: {"label": LABEL_MAPPING[x["label"]]})
|
52 |
|
53 |
# Split dataset
|
|
|
54 |
test_dataset = dataset["test"]
|
|
|
|
|
|
|
|
|
55 |
|
|
|
|
|
|
|
|
|
|
|
56 |
# Make random predictions (placeholder for actual model inference)
|
57 |
true_labels = test_dataset["label"]
|
58 |
# predictions = [random.randint(0, 7) for _ in range(len(true_labels))]
|
|
|
68 |
model = AutoModelForSequenceClassification.from_pretrained(path_model).half().to(device) # Model en half precision sur GPU
|
69 |
tokenizer = AutoTokenizer.from_pretrained(path_tokenizer)
|
70 |
|
71 |
+
|
72 |
+
# Start tracking emissions
|
73 |
+
tracker.start()
|
74 |
+
tracker.start_task("inference")
|
75 |
+
|
76 |
+
#--------------------------------------------------------------------------------------------
|
77 |
+
# YOUR MODEL INFERENCE CODE HERE
|
78 |
+
# Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.
|
79 |
+
#--------------------------------------------------------------------------------------------
|
80 |
+
|
81 |
+
|
82 |
# Fonction de préprocessing
|
83 |
def preprocess_function(df):
|
84 |
tokenized = tokenizer(df["quote"], truncation=True) # Removed padding here
|