John Graham Reynolds
commited on
Commit
·
d7ca2e1
1
Parent(s):
b0e4974
add classes to show off to files
Browse files- __init__.py +9 -0
- fixed_f1.py +40 -0
- fixed_precision.py +43 -0
- fixed_recall.py +42 -0
__init__.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fixed_f1 import FixedF1
|
2 |
+
from fixed_precision import FixedPrecision
|
3 |
+
from fixed_recall import FixedRecall
|
4 |
+
|
5 |
+
__all__ = [
|
6 |
+
"FixedF1",
|
7 |
+
"FixedPrecision",
|
8 |
+
"FixedRecall"
|
9 |
+
]
|
fixed_f1.py
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import datasets
|
2 |
+
import evaluate
|
3 |
+
from evaluate import evaluator, Metric
|
4 |
+
# from evaluate.metrics.f1 import F1
|
5 |
+
from sklearn.metrics import f1_score
|
6 |
+
|
7 |
+
# could in principle subclass F1, but ideally we can work the fix into the F1 class to maintain SOLID code
|
8 |
+
class FixedF1(evaluate.Metric):
|
9 |
+
|
10 |
+
def __init__(self, average="binary"):
|
11 |
+
super().__init__()
|
12 |
+
self.average = average
|
13 |
+
# additional values passed to compute() could and probably should (?) all be passed here so that the final computation is configured immediately at Metric instantiation
|
14 |
+
|
15 |
+
def _info(self):
|
16 |
+
return evaluate.MetricInfo(
|
17 |
+
description="Custom built F1 metric for true *multilabel* classification - the 'multilabel' config_name var in the evaluate.EvaluationModules class appears to better address multi-class classification, where features can fall under a multitude of labels. Granted, the subtely is minimal and easily confused. This class is implemented with the intention of enabling the evaluation of multiple multilabel classification metrics at the same time using the evaluate.CombinedEvaluations.combine method.",
|
18 |
+
citation="",
|
19 |
+
inputs_description="'average': This parameter is required for multiclass/multilabel targets. If None, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data. Options include: {‘micro’, ‘macro’, ‘samples’, ‘weighted’, ‘binary’} or None.",
|
20 |
+
features=datasets.Features(
|
21 |
+
{
|
22 |
+
"predictions": datasets.Sequence(datasets.Value("int32")),
|
23 |
+
"references": datasets.Sequence(datasets.Value("int32")),
|
24 |
+
}
|
25 |
+
if self.config_name == "multilabel"
|
26 |
+
else {
|
27 |
+
"predictions": datasets.Value("int32"),
|
28 |
+
"references": datasets.Value("int32"),
|
29 |
+
}
|
30 |
+
),
|
31 |
+
reference_urls=["https://scikit-learn.org/stable/modules/generated/sklearn.metrics.f1_score.html"],
|
32 |
+
)
|
33 |
+
|
34 |
+
# could remove specific kwargs like average, sample_weight from _compute() method of F1
|
35 |
+
|
36 |
+
def _compute(self, predictions, references, labels=None, pos_label=1, average="binary", sample_weight=None):
|
37 |
+
score = f1_score(
|
38 |
+
references, predictions, labels=labels, pos_label=pos_label, average=self.average, sample_weight=sample_weight
|
39 |
+
)
|
40 |
+
return {"f1": float(score) if score.size == 1 else score}
|
fixed_precision.py
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import datasets
|
2 |
+
import evaluate
|
3 |
+
from evaluate import evaluator, Metric
|
4 |
+
# from evaluate.metrics.precision import Precision
|
5 |
+
from sklearn.metrics import precision_score
|
6 |
+
|
7 |
+
# could in principle subclass Precision, but ideally we can work the fix into the Precision class to maintain SOLID code
|
8 |
+
class FixedPrecision(evaluate.Metric):
|
9 |
+
|
10 |
+
def __init__(self, average="binary", zero_division="warn"):
|
11 |
+
super().__init__()
|
12 |
+
self.average = average
|
13 |
+
self.zero_division = zero_division
|
14 |
+
# additional values passed to compute() could and probably should (?) all be passed here so that the final computation is configured immediately at Metric instantiation
|
15 |
+
|
16 |
+
def _info(self):
|
17 |
+
return evaluate.MetricInfo(
|
18 |
+
description="Custom built Precision metric for true *multilabel* classification - the 'multilabel' config_name var in the evaluate.EvaluationModules class appears to better address multi-class classification, where features can fall under a multitude of labels. Granted, the subtlety is minimal and easily confused. This class is implemented with the intention of enabling the evaluation of multiple multilabel classification metrics at the same time using the evaluate.CombinedEvaluations.combine method.",
|
19 |
+
citation="",
|
20 |
+
inputs_description="'average': This parameter is required for multiclass/multilabel targets. If None, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data. Options include: {‘micro’, ‘macro’, ‘samples’, ‘weighted’, ‘binary’} or None.",
|
21 |
+
features=datasets.Features(
|
22 |
+
{
|
23 |
+
"predictions": datasets.Sequence(datasets.Value("int32")),
|
24 |
+
"references": datasets.Sequence(datasets.Value("int32")),
|
25 |
+
}
|
26 |
+
if self.config_name == "multilabel"
|
27 |
+
else {
|
28 |
+
"predictions": datasets.Value("int32"),
|
29 |
+
"references": datasets.Value("int32"),
|
30 |
+
}
|
31 |
+
),
|
32 |
+
reference_urls=["https://scikit-learn.org/stable/modules/generated/sklearn.metrics.precision_score.html"],
|
33 |
+
)
|
34 |
+
|
35 |
+
# could remove specific kwargs like average, sample_weight from _compute() method and simply pass them to the underlying scikit-learn function in the form of a class var self.*
|
36 |
+
|
37 |
+
def _compute(
|
38 |
+
self, predictions, references, labels=None, pos_label=1, average="binary", sample_weight=None, zero_division="warn",
|
39 |
+
):
|
40 |
+
score = precision_score(
|
41 |
+
references, predictions, labels=labels, pos_label=pos_label, average=self.average, sample_weight=sample_weight, zero_division=self.zero_division,
|
42 |
+
)
|
43 |
+
return {"precision": float(score) if score.size == 1 else score}
|
fixed_recall.py
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import datasets
|
2 |
+
import evaluate
|
3 |
+
from evaluate import evaluator, Metric
|
4 |
+
# from evaluate.metrics.recall import Recall
|
5 |
+
from sklearn.metrics import recall_score
|
6 |
+
|
7 |
+
# could in principle subclass Recall, but ideally we can work the fix into the Recall class to maintain SOLID code
|
8 |
+
class FixedRecall(evaluate.Metric):
|
9 |
+
|
10 |
+
def __init__(self, average="binary"):
|
11 |
+
super().__init__()
|
12 |
+
self.average = average
|
13 |
+
# additional values passed to compute() could and probably should (?) all be passed here so that the final computation is configured immediately at Metric instantiation
|
14 |
+
|
15 |
+
def _info(self):
|
16 |
+
return evaluate.MetricInfo(
|
17 |
+
description="Custom built Recall metric for true *multilabel* classification - the 'multilabel' config_name var in the evaluate.EvaluationModules class appears to better address multi-class classification, where features can fall under a multitude of labels. Granted, the subtlety is minimal and easily confused. This class is implemented with the intention of enabling the evaluation of multiple multilabel classification metrics at the same time using the evaluate.CombinedEvaluations.combine method.",
|
18 |
+
citation="",
|
19 |
+
inputs_description="'average': This parameter is required for multiclass/multilabel targets. If None, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data. Options include: {‘micro’, ‘macro’, ‘samples’, ‘weighted’, ‘binary’} or None.",
|
20 |
+
features=datasets.Features(
|
21 |
+
{
|
22 |
+
"predictions": datasets.Sequence(datasets.Value("int32")),
|
23 |
+
"references": datasets.Sequence(datasets.Value("int32")),
|
24 |
+
}
|
25 |
+
if self.config_name == "multilabel"
|
26 |
+
else {
|
27 |
+
"predictions": datasets.Value("int32"),
|
28 |
+
"references": datasets.Value("int32"),
|
29 |
+
}
|
30 |
+
),
|
31 |
+
reference_urls=["https://scikit-learn.org/stable/modules/generated/sklearn.metrics.recall_score.html"],
|
32 |
+
)
|
33 |
+
|
34 |
+
# could remove specific kwargs like average, sample_weight from _compute() method and simply pass them to the underlying scikit-learn function in the form of a class var self.*
|
35 |
+
|
36 |
+
def _compute(
|
37 |
+
self, predictions, references, labels=None, pos_label=1, average="binary", sample_weight=None, zero_division="warn",
|
38 |
+
):
|
39 |
+
score = recall_score(
|
40 |
+
references, predictions, labels=labels, pos_label=pos_label, average=self.average, sample_weight=sample_weight, zero_division=zero_division,
|
41 |
+
)
|
42 |
+
return {"recall": float(score) if score.size == 1 else score}
|