laureBe commited on
Commit
69f6bad
·
verified ·
1 Parent(s): 5b66e1e

Upload 2 files

Browse files
Files changed (2) hide show
  1. tasks/audio.py +88 -0
  2. tasks/image.py +172 -0
tasks/audio.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter
2
+ from datetime import datetime
3
+ from datasets import load_dataset
4
+ from sklearn.metrics import accuracy_score
5
+ import random
6
+ import os
7
+
8
+ from .utils.evaluation import AudioEvaluationRequest
9
+ from .utils.emissions import tracker, clean_emissions_data, get_space_info
10
+
11
+ from dotenv import load_dotenv
12
+ load_dotenv()
13
+
14
+ router = APIRouter()
15
+
16
+ DESCRIPTION = "Random Baseline"
17
+ ROUTE = "/audio"
18
+
19
+
20
+
21
+ @router.post(ROUTE, tags=["Audio Task"],
22
+ description=DESCRIPTION)
23
+ async def evaluate_audio(request: AudioEvaluationRequest):
24
+ """
25
+ Evaluate audio classification for rainforest sound detection.
26
+
27
+ Current Model: Random Baseline
28
+ - Makes random predictions from the label space (0-1)
29
+ - Used as a baseline for comparison
30
+ """
31
+ # Get space info
32
+ username, space_url = get_space_info()
33
+
34
+ # Define the label mapping
35
+ LABEL_MAPPING = {
36
+ "chainsaw": 0,
37
+ "environment": 1
38
+ }
39
+ # Load and prepare the dataset
40
+ # Because the dataset is gated, we need to use the HF_TOKEN environment variable to authenticate
41
+ dataset = load_dataset(request.dataset_name,token=os.getenv("HF_TOKEN"))
42
+
43
+ # Split dataset
44
+ train_test = dataset["train"].train_test_split(test_size=request.test_size, seed=request.test_seed)
45
+ test_dataset = train_test["test"]
46
+
47
+ # Start tracking emissions
48
+ tracker.start()
49
+ tracker.start_task("inference")
50
+
51
+ #--------------------------------------------------------------------------------------------
52
+ # YOUR MODEL INFERENCE CODE HERE
53
+ # Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.
54
+ #--------------------------------------------------------------------------------------------
55
+
56
+ # Make random predictions (placeholder for actual model inference)
57
+ true_labels = test_dataset["label"]
58
+ predictions = [random.randint(0, 1) for _ in range(len(true_labels))]
59
+
60
+ #--------------------------------------------------------------------------------------------
61
+ # YOUR MODEL INFERENCE STOPS HERE
62
+ #--------------------------------------------------------------------------------------------
63
+
64
+ # Stop tracking emissions
65
+ emissions_data = tracker.stop_task()
66
+
67
+ # Calculate accuracy
68
+ accuracy = accuracy_score(true_labels, predictions)
69
+
70
+ # Prepare results dictionary
71
+ results = {
72
+ "username": username,
73
+ "space_url": space_url,
74
+ "submission_timestamp": datetime.now().isoformat(),
75
+ "model_description": DESCRIPTION,
76
+ "accuracy": float(accuracy),
77
+ "energy_consumed_wh": emissions_data.energy_consumed * 1000,
78
+ "emissions_gco2eq": emissions_data.emissions * 1000,
79
+ "emissions_data": clean_emissions_data(emissions_data),
80
+ "api_route": ROUTE,
81
+ "dataset_config": {
82
+ "dataset_name": request.dataset_name,
83
+ "test_size": request.test_size,
84
+ "test_seed": request.test_seed
85
+ }
86
+ }
87
+
88
+ return results
tasks/image.py ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter
2
+ from datetime import datetime
3
+ from datasets import load_dataset
4
+ import numpy as np
5
+ from sklearn.metrics import accuracy_score
6
+ import random
7
+ import os
8
+
9
+ from .utils.evaluation import ImageEvaluationRequest
10
+ from .utils.emissions import tracker, clean_emissions_data, get_space_info
11
+
12
+ from dotenv import load_dotenv
13
+ load_dotenv()
14
+
15
+ router = APIRouter()
16
+
17
+ DESCRIPTION = "Random Baseline"
18
+ ROUTE = "/image"
19
+
20
+ def parse_boxes(annotation_string):
21
+ """Parse multiple boxes from a single annotation string.
22
+ Each box has 5 values: class_id, x_center, y_center, width, height"""
23
+ values = [float(x) for x in annotation_string.strip().split()]
24
+ boxes = []
25
+ # Each box has 5 values
26
+ for i in range(0, len(values), 5):
27
+ if i + 5 <= len(values):
28
+ # Skip class_id (first value) and take the next 4 values
29
+ box = values[i+1:i+5]
30
+ boxes.append(box)
31
+ return boxes
32
+
33
+ def compute_iou(box1, box2):
34
+ """Compute Intersection over Union (IoU) between two YOLO format boxes."""
35
+ # Convert YOLO format (x_center, y_center, width, height) to corners
36
+ def yolo_to_corners(box):
37
+ x_center, y_center, width, height = box
38
+ x1 = x_center - width/2
39
+ y1 = y_center - height/2
40
+ x2 = x_center + width/2
41
+ y2 = y_center + height/2
42
+ return np.array([x1, y1, x2, y2])
43
+
44
+ box1_corners = yolo_to_corners(box1)
45
+ box2_corners = yolo_to_corners(box2)
46
+
47
+ # Calculate intersection
48
+ x1 = max(box1_corners[0], box2_corners[0])
49
+ y1 = max(box1_corners[1], box2_corners[1])
50
+ x2 = min(box1_corners[2], box2_corners[2])
51
+ y2 = min(box1_corners[3], box2_corners[3])
52
+
53
+ intersection = max(0, x2 - x1) * max(0, y2 - y1)
54
+
55
+ # Calculate union
56
+ box1_area = (box1_corners[2] - box1_corners[0]) * (box1_corners[3] - box1_corners[1])
57
+ box2_area = (box2_corners[2] - box2_corners[0]) * (box2_corners[3] - box2_corners[1])
58
+ union = box1_area + box2_area - intersection
59
+
60
+ return intersection / (union + 1e-6)
61
+
62
+ def compute_max_iou(true_boxes, pred_box):
63
+ """Compute maximum IoU between a predicted box and all true boxes"""
64
+ max_iou = 0
65
+ for true_box in true_boxes:
66
+ iou = compute_iou(true_box, pred_box)
67
+ max_iou = max(max_iou, iou)
68
+ return max_iou
69
+
70
+ @router.post(ROUTE, tags=["Image Task"],
71
+ description=DESCRIPTION)
72
+ async def evaluate_image(request: ImageEvaluationRequest):
73
+ """
74
+ Evaluate image classification and object detection for forest fire smoke.
75
+
76
+ Current Model: Random Baseline
77
+ - Makes random predictions for both classification and bounding boxes
78
+ - Used as a baseline for comparison
79
+
80
+ Metrics:
81
+ - Classification accuracy: Whether an image contains smoke or not
82
+ - Object Detection accuracy: IoU (Intersection over Union) for smoke bounding boxes
83
+ """
84
+ # Get space info
85
+ username, space_url = get_space_info()
86
+
87
+ # Load and prepare the dataset
88
+ dataset = load_dataset(request.dataset_name, token=os.getenv("HF_TOKEN"))
89
+
90
+ # Split dataset
91
+ train_test = dataset["train"].train_test_split(test_size=request.test_size, seed=request.test_seed)
92
+ test_dataset = train_test["test"]
93
+
94
+ # Start tracking emissions
95
+ tracker.start()
96
+ tracker.start_task("inference")
97
+
98
+ #--------------------------------------------------------------------------------------------
99
+ # YOUR MODEL INFERENCE CODE HERE
100
+ # Update the code below to replace the random baseline with your model inference
101
+ #--------------------------------------------------------------------------------------------
102
+
103
+ predictions = []
104
+ true_labels = []
105
+ pred_boxes = []
106
+ true_boxes_list = [] # List of lists, each inner list contains boxes for one image
107
+
108
+ for example in test_dataset:
109
+ # Parse true annotation (YOLO format: class_id x_center y_center width height)
110
+ annotation = example.get("annotations", "").strip()
111
+ has_smoke = len(annotation) > 0
112
+ true_labels.append(int(has_smoke))
113
+
114
+ # Make random classification prediction
115
+ pred_has_smoke = random.random() > 0.5
116
+ predictions.append(int(pred_has_smoke))
117
+
118
+ # If there's a true box, parse it and make random box prediction
119
+ if has_smoke:
120
+ # Parse all true boxes from the annotation
121
+ image_true_boxes = parse_boxes(annotation)
122
+ true_boxes_list.append(image_true_boxes)
123
+
124
+ # For baseline, make one random box prediction per image
125
+ # In a real model, you might want to predict multiple boxes
126
+ random_box = [
127
+ random.random(), # x_center
128
+ random.random(), # y_center
129
+ random.random() * 0.5, # width (max 0.5)
130
+ random.random() * 0.5 # height (max 0.5)
131
+ ]
132
+ pred_boxes.append(random_box)
133
+
134
+ #--------------------------------------------------------------------------------------------
135
+ # YOUR MODEL INFERENCE STOPS HERE
136
+ #--------------------------------------------------------------------------------------------
137
+
138
+ # Stop tracking emissions
139
+ emissions_data = tracker.stop_task()
140
+
141
+ # Calculate classification accuracy
142
+ classification_accuracy = accuracy_score(true_labels, predictions)
143
+
144
+ # Calculate mean IoU for object detection (only for images with smoke)
145
+ # For each image, we compute the max IoU between the predicted box and all true boxes
146
+ ious = []
147
+ for true_boxes, pred_box in zip(true_boxes_list, pred_boxes):
148
+ max_iou = compute_max_iou(true_boxes, pred_box)
149
+ ious.append(max_iou)
150
+
151
+ mean_iou = float(np.mean(ious)) if ious else 0.0
152
+
153
+ # Prepare results dictionary
154
+ results = {
155
+ "username": username,
156
+ "space_url": space_url,
157
+ "submission_timestamp": datetime.now().isoformat(),
158
+ "model_description": DESCRIPTION,
159
+ "classification_accuracy": float(classification_accuracy),
160
+ "mean_iou": mean_iou,
161
+ "energy_consumed_wh": emissions_data.energy_consumed * 1000,
162
+ "emissions_gco2eq": emissions_data.emissions * 1000,
163
+ "emissions_data": clean_emissions_data(emissions_data),
164
+ "api_route": ROUTE,
165
+ "dataset_config": {
166
+ "dataset_name": request.dataset_name,
167
+ "test_size": request.test_size,
168
+ "test_seed": request.test_seed
169
+ }
170
+ }
171
+
172
+ return results