TheoLvs commited on
Commit
3b09640
·
1 Parent(s): fe4a4cb

Updated API

Browse files
Files changed (3) hide show
  1. requirements.txt +2 -1
  2. tasks/audio.py +8 -12
  3. tasks/image.py +148 -8
requirements.txt CHANGED
@@ -6,4 +6,5 @@ scikit-learn>=1.0.2
6
  pydantic>=1.10.0
7
  python-dotenv>=1.0.0
8
  gradio>=4.0.0
9
- requests>=2.31.0
 
 
6
  pydantic>=1.10.0
7
  python-dotenv>=1.0.0
8
  gradio>=4.0.0
9
+ requests>=2.31.0
10
+ librosa==0.10.2.post1
tasks/audio.py CHANGED
@@ -3,16 +3,21 @@ from datetime import datetime
3
  from datasets import load_dataset
4
  from sklearn.metrics import accuracy_score
5
  import random
 
6
 
7
  from .utils.evaluation import AudioEvaluationRequest
8
  from .utils.emissions import tracker, clean_emissions_data, get_space_info
9
 
 
 
 
10
  router = APIRouter()
11
 
12
  DESCRIPTION = "Random Baseline"
13
  ROUTE = "/audio"
14
 
15
 
 
16
  @router.post(ROUTE, tags=["Audio Task"],
17
  description=DESCRIPTION)
18
  async def evaluate_audio(request: AudioEvaluationRequest):
@@ -31,19 +36,10 @@ async def evaluate_audio(request: AudioEvaluationRequest):
31
  "chainsaw": 0,
32
  "environment": 1
33
  }
34
-
35
- try:
36
- from huggingface_hub import login
37
- login()
38
- except:
39
- pass
40
-
41
  # Load and prepare the dataset
42
- dataset = load_dataset(request.dataset_name)
43
-
44
- # Convert string labels to integers
45
- dataset = dataset.map(lambda x: {"label": LABEL_MAPPING[x["label"]]})
46
-
47
  # Split dataset
48
  train_test = dataset["train"].train_test_split(test_size=request.test_size, seed=request.test_seed)
49
  test_dataset = train_test["test"]
 
3
  from datasets import load_dataset
4
  from sklearn.metrics import accuracy_score
5
  import random
6
+ import os
7
 
8
  from .utils.evaluation import AudioEvaluationRequest
9
  from .utils.emissions import tracker, clean_emissions_data, get_space_info
10
 
11
+ from dotenv import load_dotenv
12
+ load_dotenv()
13
+
14
  router = APIRouter()
15
 
16
  DESCRIPTION = "Random Baseline"
17
  ROUTE = "/audio"
18
 
19
 
20
+
21
  @router.post(ROUTE, tags=["Audio Task"],
22
  description=DESCRIPTION)
23
  async def evaluate_audio(request: AudioEvaluationRequest):
 
36
  "chainsaw": 0,
37
  "environment": 1
38
  }
 
 
 
 
 
 
 
39
  # Load and prepare the dataset
40
+ # Because the dataset is gated, we need to use the HF_TOKEN environment variable to authenticate
41
+ dataset = load_dataset(request.dataset_name,token=os.getenv("HF_TOKEN"))
42
+
 
 
43
  # Split dataset
44
  train_test = dataset["train"].train_test_split(test_size=request.test_size, seed=request.test_seed)
45
  test_dataset = train_test["test"]
tasks/image.py CHANGED
@@ -1,32 +1,172 @@
1
  from fastapi import APIRouter
 
 
 
 
 
 
 
2
  from .utils.evaluation import ImageEvaluationRequest
3
- from .utils.emissions import get_space_info
 
 
 
4
 
5
  router = APIRouter()
6
 
7
  DESCRIPTION = "Random Baseline"
8
  ROUTE = "/image"
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  @router.post(ROUTE, tags=["Image Task"],
11
  description=DESCRIPTION)
12
  async def evaluate_image(request: ImageEvaluationRequest):
13
  """
14
- Evaluate image classification.
15
 
16
  Current Model: Random Baseline
17
- - Makes random predictions
18
  - Used as a baseline for comparison
 
 
 
 
19
  """
 
20
  username, space_url = get_space_info()
21
- return {
22
- "message": "Image evaluation endpoint not yet implemented",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  "username": username,
24
  "space_url": space_url,
25
- "route": ROUTE,
26
  "model_description": DESCRIPTION,
27
- "received_config": {
 
 
 
 
 
 
28
  "dataset_name": request.dataset_name,
29
  "test_size": request.test_size,
30
  "test_seed": request.test_seed
31
  }
32
- }
 
 
 
1
  from fastapi import APIRouter
2
+ from datetime import datetime
3
+ from datasets import load_dataset
4
+ import numpy as np
5
+ from sklearn.metrics import accuracy_score
6
+ import random
7
+ import os
8
+
9
  from .utils.evaluation import ImageEvaluationRequest
10
+ from .utils.emissions import tracker, clean_emissions_data, get_space_info
11
+
12
+ from dotenv import load_dotenv
13
+ load_dotenv()
14
 
15
  router = APIRouter()
16
 
17
  DESCRIPTION = "Random Baseline"
18
  ROUTE = "/image"
19
 
20
+ def parse_boxes(annotation_string):
21
+ """Parse multiple boxes from a single annotation string.
22
+ Each box has 5 values: class_id, x_center, y_center, width, height"""
23
+ values = [float(x) for x in annotation_string.strip().split()]
24
+ boxes = []
25
+ # Each box has 5 values
26
+ for i in range(0, len(values), 5):
27
+ if i + 5 <= len(values):
28
+ # Skip class_id (first value) and take the next 4 values
29
+ box = values[i+1:i+5]
30
+ boxes.append(box)
31
+ return boxes
32
+
33
+ def compute_iou(box1, box2):
34
+ """Compute Intersection over Union (IoU) between two YOLO format boxes."""
35
+ # Convert YOLO format (x_center, y_center, width, height) to corners
36
+ def yolo_to_corners(box):
37
+ x_center, y_center, width, height = box
38
+ x1 = x_center - width/2
39
+ y1 = y_center - height/2
40
+ x2 = x_center + width/2
41
+ y2 = y_center + height/2
42
+ return np.array([x1, y1, x2, y2])
43
+
44
+ box1_corners = yolo_to_corners(box1)
45
+ box2_corners = yolo_to_corners(box2)
46
+
47
+ # Calculate intersection
48
+ x1 = max(box1_corners[0], box2_corners[0])
49
+ y1 = max(box1_corners[1], box2_corners[1])
50
+ x2 = min(box1_corners[2], box2_corners[2])
51
+ y2 = min(box1_corners[3], box2_corners[3])
52
+
53
+ intersection = max(0, x2 - x1) * max(0, y2 - y1)
54
+
55
+ # Calculate union
56
+ box1_area = (box1_corners[2] - box1_corners[0]) * (box1_corners[3] - box1_corners[1])
57
+ box2_area = (box2_corners[2] - box2_corners[0]) * (box2_corners[3] - box2_corners[1])
58
+ union = box1_area + box2_area - intersection
59
+
60
+ return intersection / (union + 1e-6)
61
+
62
+ def compute_max_iou(true_boxes, pred_box):
63
+ """Compute maximum IoU between a predicted box and all true boxes"""
64
+ max_iou = 0
65
+ for true_box in true_boxes:
66
+ iou = compute_iou(true_box, pred_box)
67
+ max_iou = max(max_iou, iou)
68
+ return max_iou
69
+
70
  @router.post(ROUTE, tags=["Image Task"],
71
  description=DESCRIPTION)
72
  async def evaluate_image(request: ImageEvaluationRequest):
73
  """
74
+ Evaluate image classification and object detection for forest fire smoke.
75
 
76
  Current Model: Random Baseline
77
+ - Makes random predictions for both classification and bounding boxes
78
  - Used as a baseline for comparison
79
+
80
+ Metrics:
81
+ - Classification accuracy: Whether an image contains smoke or not
82
+ - Object Detection accuracy: IoU (Intersection over Union) for smoke bounding boxes
83
  """
84
+ # Get space info
85
  username, space_url = get_space_info()
86
+
87
+ # Load and prepare the dataset
88
+ dataset = load_dataset(request.dataset_name, token=os.getenv("HF_TOKEN"))
89
+
90
+ # Split dataset
91
+ train_test = dataset["train"].train_test_split(test_size=request.test_size, seed=request.test_seed)
92
+ test_dataset = train_test["test"]
93
+
94
+ # Start tracking emissions
95
+ tracker.start()
96
+ tracker.start_task("inference")
97
+
98
+ #--------------------------------------------------------------------------------------------
99
+ # YOUR MODEL INFERENCE CODE HERE
100
+ # Update the code below to replace the random baseline with your model inference
101
+ #--------------------------------------------------------------------------------------------
102
+
103
+ predictions = []
104
+ true_labels = []
105
+ pred_boxes = []
106
+ true_boxes_list = [] # List of lists, each inner list contains boxes for one image
107
+
108
+ for example in test_dataset:
109
+ # Parse true annotation (YOLO format: class_id x_center y_center width height)
110
+ annotation = example.get("annotations", "").strip()
111
+ has_smoke = len(annotation) > 0
112
+ true_labels.append(int(has_smoke))
113
+
114
+ # Make random classification prediction
115
+ pred_has_smoke = random.random() > 0.5
116
+ predictions.append(int(pred_has_smoke))
117
+
118
+ # If there's a true box, parse it and make random box prediction
119
+ if has_smoke:
120
+ # Parse all true boxes from the annotation
121
+ image_true_boxes = parse_boxes(annotation)
122
+ true_boxes_list.append(image_true_boxes)
123
+
124
+ # For baseline, make one random box prediction per image
125
+ # In a real model, you might want to predict multiple boxes
126
+ random_box = [
127
+ random.random(), # x_center
128
+ random.random(), # y_center
129
+ random.random() * 0.5, # width (max 0.5)
130
+ random.random() * 0.5 # height (max 0.5)
131
+ ]
132
+ pred_boxes.append(random_box)
133
+
134
+ #--------------------------------------------------------------------------------------------
135
+ # YOUR MODEL INFERENCE STOPS HERE
136
+ #--------------------------------------------------------------------------------------------
137
+
138
+ # Stop tracking emissions
139
+ emissions_data = tracker.stop_task()
140
+
141
+ # Calculate classification accuracy
142
+ classification_accuracy = accuracy_score(true_labels, predictions)
143
+
144
+ # Calculate mean IoU for object detection (only for images with smoke)
145
+ # For each image, we compute the max IoU between the predicted box and all true boxes
146
+ ious = []
147
+ for true_boxes, pred_box in zip(true_boxes_list, pred_boxes):
148
+ max_iou = compute_max_iou(true_boxes, pred_box)
149
+ ious.append(max_iou)
150
+
151
+ mean_iou = float(np.mean(ious)) if ious else 0.0
152
+
153
+ # Prepare results dictionary
154
+ results = {
155
  "username": username,
156
  "space_url": space_url,
157
+ "submission_timestamp": datetime.now().isoformat(),
158
  "model_description": DESCRIPTION,
159
+ "classification_accuracy": float(classification_accuracy),
160
+ "mean_iou": mean_iou,
161
+ "energy_consumed_wh": emissions_data.energy_consumed * 1000,
162
+ "emissions_gco2eq": emissions_data.emissions * 1000,
163
+ "emissions_data": clean_emissions_data(emissions_data),
164
+ "api_route": ROUTE,
165
+ "dataset_config": {
166
  "dataset_name": request.dataset_name,
167
  "test_size": request.test_size,
168
  "test_seed": request.test_seed
169
  }
170
+ }
171
+
172
+ return results