Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -21,31 +21,7 @@ def tokenize_function(examples):
|
|
21 |
return tokenizer(examples["text"])
|
22 |
|
23 |
|
24 |
-
|
25 |
-
def group_texts(examples):
|
26 |
-
# Concatenate all texts.
|
27 |
-
concatenated_examples = {k: sum(examples[k], []) for k in examples.keys()}
|
28 |
-
total_length = len(concatenated_examples[list(examples.keys())[0]])
|
29 |
-
# We drop the small remainder, we could add padding if the model supported it instead of this drop, you can
|
30 |
-
# customize this part to your needs.
|
31 |
-
total_length = (total_length // block_size) * block_size
|
32 |
-
# Split by chunks of max_len.
|
33 |
-
result = {
|
34 |
-
k: [t[i : i + block_size] for i in range(0, total_length, block_size)]
|
35 |
-
for k, t in concatenated_examples.items()
|
36 |
-
}
|
37 |
-
result["labels"] = result["input_ids"].copy()
|
38 |
-
return result
|
39 |
-
|
40 |
-
#Funktion, die der trainer braucht, um das Training zu evaluieren - mit einer Metrik
|
41 |
-
def compute_metrics(eval_pred):
|
42 |
-
#Metrik berechnen, um das training messen zu können - wird es besser???
|
43 |
-
metric = evaluate.load("accuracy") #3 Arten von gegebener Metrik: f1 oder roc_auc oder accuracy
|
44 |
-
logits, labels = eval_pred
|
45 |
-
predictions = np.argmax(logits, axis=-1)
|
46 |
-
#Call compute on metric to calculate the accuracy of your predictions.
|
47 |
-
#Before passing your predictions to compute, you need to convert the predictions to logits (remember all Transformers models return logits):
|
48 |
-
return metric.compute(predictions=predictions, references=labels)
|
49 |
|
50 |
#neues Model testen nach dem Training
|
51 |
########################################################################
|
|
|
21 |
return tokenizer(examples["text"])
|
22 |
|
23 |
|
24 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
|
26 |
#neues Model testen nach dem Training
|
27 |
########################################################################
|