TheoLvs commited on
Commit
fe4a4cb
·
1 Parent(s): 1c33274

Added audio task

Browse files
Files changed (2) hide show
  1. tasks/audio.py +68 -8
  2. tasks/utils/evaluation.py +2 -2
tasks/audio.py CHANGED
@@ -1,32 +1,92 @@
1
  from fastapi import APIRouter
 
 
 
 
 
2
  from .utils.evaluation import AudioEvaluationRequest
3
- from .utils.emissions import get_space_info
4
 
5
  router = APIRouter()
6
 
7
  DESCRIPTION = "Random Baseline"
8
  ROUTE = "/audio"
9
 
 
10
  @router.post(ROUTE, tags=["Audio Task"],
11
  description=DESCRIPTION)
12
  async def evaluate_audio(request: AudioEvaluationRequest):
13
  """
14
- Evaluate audio classification.
15
 
16
  Current Model: Random Baseline
17
- - Makes random predictions
18
  - Used as a baseline for comparison
19
  """
 
20
  username, space_url = get_space_info()
21
- return {
22
- "message": "Audio evaluation endpoint not yet implemented",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  "username": username,
24
  "space_url": space_url,
 
25
  "model_description": DESCRIPTION,
26
- "route": ROUTE,
27
- "received_config": {
 
 
 
 
28
  "dataset_name": request.dataset_name,
29
  "test_size": request.test_size,
30
  "test_seed": request.test_seed
31
  }
32
- }
 
 
 
1
  from fastapi import APIRouter
2
+ from datetime import datetime
3
+ from datasets import load_dataset
4
+ from sklearn.metrics import accuracy_score
5
+ import random
6
+
7
  from .utils.evaluation import AudioEvaluationRequest
8
+ from .utils.emissions import tracker, clean_emissions_data, get_space_info
9
 
10
  router = APIRouter()
11
 
12
  DESCRIPTION = "Random Baseline"
13
  ROUTE = "/audio"
14
 
15
+
16
  @router.post(ROUTE, tags=["Audio Task"],
17
  description=DESCRIPTION)
18
  async def evaluate_audio(request: AudioEvaluationRequest):
19
  """
20
+ Evaluate audio classification for rainforest sound detection.
21
 
22
  Current Model: Random Baseline
23
+ - Makes random predictions from the label space (0-1)
24
  - Used as a baseline for comparison
25
  """
26
+ # Get space info
27
  username, space_url = get_space_info()
28
+
29
+ # Define the label mapping
30
+ LABEL_MAPPING = {
31
+ "chainsaw": 0,
32
+ "environment": 1
33
+ }
34
+
35
+ try:
36
+ from huggingface_hub import login
37
+ login()
38
+ except:
39
+ pass
40
+
41
+ # Load and prepare the dataset
42
+ dataset = load_dataset(request.dataset_name)
43
+
44
+ # Convert string labels to integers
45
+ dataset = dataset.map(lambda x: {"label": LABEL_MAPPING[x["label"]]})
46
+
47
+ # Split dataset
48
+ train_test = dataset["train"].train_test_split(test_size=request.test_size, seed=request.test_seed)
49
+ test_dataset = train_test["test"]
50
+
51
+ # Start tracking emissions
52
+ tracker.start()
53
+ tracker.start_task("inference")
54
+
55
+ #--------------------------------------------------------------------------------------------
56
+ # YOUR MODEL INFERENCE CODE HERE
57
+ # Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.
58
+ #--------------------------------------------------------------------------------------------
59
+
60
+ # Make random predictions (placeholder for actual model inference)
61
+ true_labels = test_dataset["label"]
62
+ predictions = [random.randint(0, 1) for _ in range(len(true_labels))]
63
+
64
+ #--------------------------------------------------------------------------------------------
65
+ # YOUR MODEL INFERENCE STOPS HERE
66
+ #--------------------------------------------------------------------------------------------
67
+
68
+ # Stop tracking emissions
69
+ emissions_data = tracker.stop_task()
70
+
71
+ # Calculate accuracy
72
+ accuracy = accuracy_score(true_labels, predictions)
73
+
74
+ # Prepare results dictionary
75
+ results = {
76
  "username": username,
77
  "space_url": space_url,
78
+ "submission_timestamp": datetime.now().isoformat(),
79
  "model_description": DESCRIPTION,
80
+ "accuracy": float(accuracy),
81
+ "energy_consumed_wh": emissions_data.energy_consumed * 1000,
82
+ "emissions_gco2eq": emissions_data.emissions * 1000,
83
+ "emissions_data": clean_emissions_data(emissions_data),
84
+ "api_route": ROUTE,
85
+ "dataset_config": {
86
  "dataset_name": request.dataset_name,
87
  "test_size": request.test_size,
88
  "test_seed": request.test_seed
89
  }
90
+ }
91
+
92
+ return results
tasks/utils/evaluation.py CHANGED
@@ -10,9 +10,9 @@ class TextEvaluationRequest(BaseEvaluationRequest):
10
  description="The name of the dataset on HuggingFace Hub")
11
 
12
  class ImageEvaluationRequest(BaseEvaluationRequest):
13
- dataset_name: str = Field("placeholder/frugalaichallenge-image-train",
14
  description="The name of the dataset on HuggingFace Hub")
15
 
16
  class AudioEvaluationRequest(BaseEvaluationRequest):
17
- dataset_name: str = Field("placeholder/frugalaichallenge-audio-train",
18
  description="The name of the dataset on HuggingFace Hub")
 
10
  description="The name of the dataset on HuggingFace Hub")
11
 
12
  class ImageEvaluationRequest(BaseEvaluationRequest):
13
+ dataset_name: str = Field("pyronear/pyro-sdis",
14
  description="The name of the dataset on HuggingFace Hub")
15
 
16
  class AudioEvaluationRequest(BaseEvaluationRequest):
17
+ dataset_name: str = Field("rfcx/frugalai",
18
  description="The name of the dataset on HuggingFace Hub")