IlayMalinyak commited on
Commit
766ed77
·
1 Parent(s): 47127a2

sanity check

Browse files
tasks/audio.py CHANGED
@@ -132,11 +132,11 @@ async def evaluate_audio(request: AudioEvaluationRequest):
132
 
133
  return results
134
 
135
- if __name__ == "__main__":
136
- sample_request = AudioEvaluationRequest(
137
- dataset_name="rfcx/frugalai", # Replace with actual dataset name
138
- test_size=0.2, # Example values
139
- test_seed=42
140
- )
141
- #
142
- asyncio.run(evaluate_audio(sample_request))
 
132
 
133
  return results
134
 
135
+ # if __name__ == "__main__":
136
+ # sample_request = AudioEvaluationRequest(
137
+ # dataset_name="rfcx/frugalai", # Replace with actual dataset name
138
+ # test_size=0.2, # Example values
139
+ # test_seed=42
140
+ # )
141
+ # #
142
+ # asyncio.run(evaluate_audio(sample_request))
tasks/models/frugal_2025-02-01/frugal_kan_features_2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3fbc9f7a73a40a99863fbbf70e244598d2594e451f01737812b553e354541c2
3
+ size 614523
tasks/run.py CHANGED
@@ -2,7 +2,7 @@ from torch.utils.data import DataLoader
2
  from .utils.data import FFTDataset, SplitDataset
3
  from datasets import load_dataset
4
  from .utils.train import Trainer, XGBoostTrainer
5
- from .utils.models import CNNKan, KanEncoder, CNNKanFeaturesEncoder
6
  from .utils.data_utils import *
7
  from huggingface_hub import login
8
  import yaml
@@ -13,6 +13,42 @@ import pandas as pd
13
  import seaborn as sns
14
  import matplotlib.pyplot as plt
15
  from collections import OrderedDict
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
  # local_rank = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
18
  current_date = datetime.date.today().strftime("%Y-%m-%d")
@@ -37,18 +73,62 @@ with open("../logs//token.txt", "r") as f:
37
  local_rank = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
38
  login(api_key)
39
  dataset = load_dataset("rfcx/frugalai", streaming=True)
 
40
 
41
- train_ds = SplitDataset(FFTDataset(dataset["train"]), is_train=True)
42
 
43
  train_dl = DataLoader(train_ds, batch_size=data_args.batch_size, collate_fn=collate_fn)
44
 
45
- val_ds = SplitDataset(FFTDataset(dataset["train"]), is_train=False)
46
 
47
  val_dl = DataLoader(val_ds,batch_size=data_args.batch_size, collate_fn=collate_fn)
48
 
49
- test_ds = FFTDataset(dataset["test"])
50
  test_dl = DataLoader(test_ds,batch_size=data_args.batch_size, collate_fn=collate_fn)
51
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  # data = []
53
  #
54
  # # Iterate over the dataset
@@ -92,7 +172,8 @@ test_dl = DataLoader(test_ds,batch_size=data_args.batch_size, collate_fn=collate
92
  # model = DualEncoder(model_args, model_args_f, conformer_args)
93
  # model = FasterKAN([18000,64,64,16,1])
94
  # model = CNNKan(model_args, conformer_args, kan_args.get_dict())
95
- model = CNNKanFeaturesEncoder(model_args, mlp_args, kan_args.get_dict())
 
96
  # model.kan.speed()
97
  # model = KanEncoder(kan_args.get_dict())
98
  model = model.to(local_rank)
 
2
  from .utils.data import FFTDataset, SplitDataset
3
  from datasets import load_dataset
4
  from .utils.train import Trainer, XGBoostTrainer
5
+ from .utils.models import CNNKan, KanEncoder, CNNKanFeaturesEncoder, CNNFeaturesEncoder
6
  from .utils.data_utils import *
7
  from huggingface_hub import login
8
  import yaml
 
13
  import seaborn as sns
14
  import matplotlib.pyplot as plt
15
  from collections import OrderedDict
16
+ import xgboost as xgb
17
+ from tqdm import tqdm
18
+ from sklearn.metrics import accuracy_score, classification_report, roc_auc_score
19
+ from sklearn.model_selection import train_test_split
20
+ import warnings
21
+
22
+ warnings.filterwarnings("ignore")
23
+
24
+
25
+
26
+ def create_dataframe(ds, save_name='train'):
27
+ try:
28
+ df = pd.read_csv(f"tasks/utils/dfs/{save_name}.csv")
29
+ except FileNotFoundError:
30
+ data = []
31
+
32
+ # Iterate over the dataset
33
+ pbar = tqdm(enumerate(ds))
34
+ for i, batch in pbar:
35
+ label = batch['label']
36
+ features = batch['audio']['features']
37
+
38
+ # Flatten the nested dictionary structure
39
+ feature_dict = {'label': label}
40
+ for k, v in features.items():
41
+ if isinstance(v, dict):
42
+ for sub_k, sub_v in v.items():
43
+ feature_dict[f"{k}_{sub_k}"] = sub_v[0].item() # Aggregate (e.g., mean)
44
+ data.append(feature_dict)
45
+ # Convert to DataFrame
46
+ df = pd.DataFrame(data)
47
+ print(os.getcwd())
48
+ df.to_csv(f"tasks/utils/dfs/{save_name}.csv", index=False)
49
+ X = df.drop(columns=['label'])
50
+ y = df['label']
51
+ return X, y
52
 
53
  # local_rank = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
54
  current_date = datetime.date.today().strftime("%Y-%m-%d")
 
73
  local_rank = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
74
  login(api_key)
75
  dataset = load_dataset("rfcx/frugalai", streaming=True)
76
+ full_ds = FFTDataset(dataset["train"], features=True)
77
 
78
+ train_ds = SplitDataset(FFTDataset(dataset["train"], features=True), is_train=True)
79
 
80
  train_dl = DataLoader(train_ds, batch_size=data_args.batch_size, collate_fn=collate_fn)
81
 
82
+ val_ds = SplitDataset(FFTDataset(dataset["train"], features=True), is_train=False)
83
 
84
  val_dl = DataLoader(val_ds,batch_size=data_args.batch_size, collate_fn=collate_fn)
85
 
86
+ test_ds = FFTDataset(dataset["test"], features=True)
87
  test_dl = DataLoader(test_ds,batch_size=data_args.batch_size, collate_fn=collate_fn)
88
 
89
+
90
+ x,y = create_dataframe(full_ds, save_name='train_val')
91
+ print(x.shape)
92
+ x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=0.2, random_state=42)
93
+
94
+ evals_result = {}
95
+ num_boost_round = 1000 # Set a large number of boosting rounds
96
+
97
+ # Watchlist to monitor performance on train and validation data
98
+
99
+ dtrain = xgb.DMatrix(x_train, label=y_train)
100
+ dval = xgb.DMatrix(x_val, label=y_val)
101
+ watchlist = [(dtrain, 'train'), (dval, 'eval')]
102
+ params = {
103
+ 'objective': 'binary:logistic',
104
+ 'eval_metric': 'logloss',
105
+ **boost_args.get_dict()
106
+ }
107
+ # Train the model
108
+ xgb_model = xgb.train(
109
+ params,
110
+ dtrain,
111
+ num_boost_round=num_boost_round,
112
+ evals=watchlist,
113
+ early_stopping_rounds=10, # Early stopping after 10 rounds with no improvement
114
+ evals_result=evals_result,
115
+ verbose_eval=False # Show evaluation results for each iteration
116
+ )
117
+
118
+ xgb_pred = xgb_model.predict(dval, output_margin=False) # Take probability of class 1
119
+ # xgb_pred = torch.tensor(xgb_pred, dtype=torch.float32, device=x.device).unsqueeze(1)
120
+ y_pred = (xgb_pred >= 0.5).astype(int)
121
+
122
+ # Get the number of trees in the trained model
123
+
124
+ accuracy = accuracy_score(y_val, y_pred)
125
+ roc_auc = roc_auc_score(y_val, y_pred)
126
+
127
+ print(f'Accuracy: {accuracy:.4f}')
128
+ print(f'ROC AUC Score: {roc_auc:.4f}')
129
+ num_xgb_features = xgb_model.best_iteration + 1
130
+ print(num_xgb_features)
131
+
132
  # data = []
133
  #
134
  # # Iterate over the dataset
 
172
  # model = DualEncoder(model_args, model_args_f, conformer_args)
173
  # model = FasterKAN([18000,64,64,16,1])
174
  # model = CNNKan(model_args, conformer_args, kan_args.get_dict())
175
+ # model = CNNKanFeaturesEncoder(xgb_model, model_args, kan_args.get_dict())
176
+ model = CNNFeaturesEncoder(xgb_model,model_args)
177
  # model.kan.speed()
178
  # model = KanEncoder(kan_args.get_dict())
179
  model = model.to(local_rank)
tasks/utils/data.py CHANGED
@@ -52,11 +52,16 @@ class SplitDataset(IterableDataset):
52
 
53
 
54
  class FFTDataset(IterableDataset):
55
- def __init__(self, original_dataset, max_len=72000, orig_sample_rate=12000, target_sample_rate=3000):
 
 
 
 
56
  self.dataset = original_dataset
57
  self.resampler = T.Resample(orig_freq=orig_sample_rate, new_freq=target_sample_rate)
58
  self.target_sample_rate = target_sample_rate
59
  self.max_len = max_len
 
60
 
61
 
62
  def normalize_audio(self, audio):
@@ -89,8 +94,10 @@ class FFTDataset(IterableDataset):
89
  fft_data = fft(audio_data)
90
  magnitude = torch.abs(fft_data)
91
  phase = torch.angle(fft_data)
92
- features = compute_all_features(audio_data, sample_rate=self.target_sample_rate)
93
- features_arr = torch.tensor([v for _, v in features['frequency_domain'].items()])
 
 
94
  magnitude_centered = fftshift(magnitude)
95
  phase_centered = fftshift(phase)
96
  # cwt = features['cwt_power']
@@ -103,7 +110,6 @@ class FFTDataset(IterableDataset):
103
  # item['audio']['cwt_mag'] = torch.nan_to_num(cwt, 0)
104
  item['audio']['array'] = torch.nan_to_num(audio_data, 0)
105
  # item['audio']['features'] = features
106
- item['audio']['features_arr'] = torch.nan_to_num(features_arr, 0)
107
  yield item
108
 
109
 
 
52
 
53
 
54
  class FFTDataset(IterableDataset):
55
+ def __init__(self, original_dataset,
56
+ max_len=72000,
57
+ orig_sample_rate=12000,
58
+ target_sample_rate=3000,
59
+ features=False):
60
  self.dataset = original_dataset
61
  self.resampler = T.Resample(orig_freq=orig_sample_rate, new_freq=target_sample_rate)
62
  self.target_sample_rate = target_sample_rate
63
  self.max_len = max_len
64
+ self.features = features
65
 
66
 
67
  def normalize_audio(self, audio):
 
94
  fft_data = fft(audio_data)
95
  magnitude = torch.abs(fft_data)
96
  phase = torch.angle(fft_data)
97
+ if self.features:
98
+ features = compute_all_features(audio_data, sample_rate=self.target_sample_rate)
99
+ # features_arr = torch.tensor([v for _, v in features['frequency_domain'].items()])
100
+ item['audio']['features'] = features
101
  magnitude_centered = fftshift(magnitude)
102
  phase_centered = fftshift(phase)
103
  # cwt = features['cwt_power']
 
110
  # item['audio']['cwt_mag'] = torch.nan_to_num(cwt, 0)
111
  item['audio']['array'] = torch.nan_to_num(audio_data, 0)
112
  # item['audio']['features'] = features
 
113
  yield item
114
 
115
 
tasks/utils/data_utils.py CHANGED
@@ -5,10 +5,10 @@ from torch.nn.utils.rnn import pad_sequence
5
 
6
  def collate_fn(batch):
7
  # Extract audio arrays and FFT data from the batch of dictionaries
8
- audio_arrays = [torch.tensor(item['audio']['array']) for item in batch]
9
- fft_arrays = [torch.tensor(item['audio']['fft_mag']) for item in batch]
10
  # cwt_arrays = [torch.tensor(item['audio']['cwt_mag']) for item in batch]
11
- # features = [item['audio']['features'] for item in batch]
12
  # features_arr = torch.stack([item['audio']['features_arr'] for item in batch])
13
  labels = [torch.tensor(item['label']) for item in batch]
14
 
@@ -22,7 +22,7 @@ def collate_fn(batch):
22
  'audio': {
23
  'array': padded_audio,
24
  'fft_mag': padded_fft,
25
- # 'features': features,
26
  # 'features_arr': features_arr,
27
  # 'cwt_mag': padded_cwt,
28
  },
 
5
 
6
  def collate_fn(batch):
7
  # Extract audio arrays and FFT data from the batch of dictionaries
8
+ audio_arrays = [item['audio']['array'] for item in batch]
9
+ fft_arrays = [item['audio']['fft_mag'] for item in batch]
10
  # cwt_arrays = [torch.tensor(item['audio']['cwt_mag']) for item in batch]
11
+ features = [item['audio']['features'] for item in batch]
12
  # features_arr = torch.stack([item['audio']['features_arr'] for item in batch])
13
  labels = [torch.tensor(item['label']) for item in batch]
14
 
 
22
  'audio': {
23
  'array': padded_audio,
24
  'fft_mag': padded_fft,
25
+ 'features': features,
26
  # 'features_arr': features_arr,
27
  # 'cwt_mag': padded_cwt,
28
  },
tasks/utils/dfs/train_val.csv ADDED
The diff for this file is too large to render. See raw diff
 
tasks/utils/models.py CHANGED
@@ -3,6 +3,10 @@ import torch.nn as nn
3
  from .Modules.conformer import ConformerEncoder, ConformerDecoder
4
  from .Modules.mhsa_pro import RotaryEmbedding, ContinuousRotaryEmbedding
5
  from .kan.fasterkan import FasterKAN
 
 
 
 
6
 
7
 
8
  class Sine(nn.Module):
@@ -161,6 +165,46 @@ class DualEncoder(nn.Module):
161
  logits = torch.cat([x1, x2], dim=-1)
162
  return self.regressor(logits).squeeze()
163
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
  class CNNKan(nn.Module):
165
  def __init__(self, args, conformer_args, kan_args):
166
  super().__init__()
@@ -173,18 +217,35 @@ class CNNKan(nn.Module):
173
  return self.kan(x)
174
 
175
  class CNNKanFeaturesEncoder(nn.Module):
176
- def __init__(self, args, mlp_args, kan_args):
177
  super().__init__()
 
 
178
  self.backbone = CNNEncoder(args)
179
- self.mlp = MLPEncoder(mlp_args)
180
- kan_args['layers_hidden'][0] += self.mlp.output_dim
181
  self.kan = FasterKAN(**kan_args)
182
 
183
- def forward(self, x: torch.Tensor, f: torch.Tensor) -> torch.Tensor:
 
 
 
 
 
 
 
 
 
 
 
 
 
184
  x = self.backbone(x)
185
  x = x.mean(dim=1)
186
- f = self.mlp(f)
187
- x_f = torch.cat([x, f], dim=-1)
 
 
 
188
  return self.kan(x_f)
189
 
190
  class KanEncoder(nn.Module):
 
3
  from .Modules.conformer import ConformerEncoder, ConformerDecoder
4
  from .Modules.mhsa_pro import RotaryEmbedding, ContinuousRotaryEmbedding
5
  from .kan.fasterkan import FasterKAN
6
+ import numpy as np
7
+ import xgboost as xgb
8
+ import pandas as pd
9
+
10
 
11
 
12
  class Sine(nn.Module):
 
165
  logits = torch.cat([x1, x2], dim=-1)
166
  return self.regressor(logits).squeeze()
167
 
168
+ class CNNFeaturesEncoder(nn.Module):
169
+ def __init__(self, xgb_model, args, mlp_hidden=64):
170
+ super().__init__()
171
+ self.xgb_model = xgb_model
172
+ self.best_xgb_features = xgb_model.best_iteration + 1
173
+ self.backbone = CNNEncoder(args)
174
+ self.total_features = self.best_xgb_features + args.encoder_dims[-1]
175
+ self.mlp = nn.Sequential(
176
+ nn.Linear(self.total_features, mlp_hidden),
177
+ nn.BatchNorm1d(mlp_hidden),
178
+ nn.SiLU(),
179
+ nn.Linear(mlp_hidden, mlp_hidden),
180
+ nn.BatchNorm1d(mlp_hidden),
181
+ nn.SiLU(),
182
+ nn.Linear(mlp_hidden, 1),
183
+ )
184
+
185
+ def _create_features_data(self, features):
186
+ # Handle batch processing
187
+ batch_size = len(features)
188
+ data = []
189
+
190
+ # Iterate through each item in the batch
191
+ for batch_idx in range(batch_size):
192
+ feature_dict = {}
193
+ for k, v in features[batch_idx].items():
194
+ feature_dict[f"frequency_domain_{k}"] = v[0].item()
195
+ data.append(feature_dict)
196
+
197
+ return pd.DataFrame(data)
198
+ def forward(self, x: torch.Tensor, f) -> torch.Tensor:
199
+ x = self.backbone(x)
200
+ x = x.mean(dim=-1)
201
+ f_np = self._create_features_data(f)
202
+ dtest = xgb.DMatrix(f_np) # Convert input to DMatrix
203
+ xgb_features = self.xgb_model.predict(dtest, pred_leaf=True).astype(np.float32)
204
+ xgb_features = torch.tensor(xgb_features, dtype=torch.float32, device=x.device)
205
+ x_f = torch.cat([x, xgb_features[:, :self.best_xgb_features]], dim=1)
206
+ return self.mlp(x_f)
207
+
208
  class CNNKan(nn.Module):
209
  def __init__(self, args, conformer_args, kan_args):
210
  super().__init__()
 
217
  return self.kan(x)
218
 
219
  class CNNKanFeaturesEncoder(nn.Module):
220
+ def __init__(self, xgb_model, args, kan_args):
221
  super().__init__()
222
+ self.xgb_model = xgb_model
223
+ self.best_xgb_features = xgb_model.best_iteration + 1
224
  self.backbone = CNNEncoder(args)
225
+ kan_args['layers_hidden'][0] += self.best_xgb_features
 
226
  self.kan = FasterKAN(**kan_args)
227
 
228
+ def _create_features_data(self, features):
229
+ # Handle batch processing
230
+ batch_size = len(features)
231
+ data = []
232
+
233
+ # Iterate through each item in the batch
234
+ for batch_idx in range(batch_size):
235
+ feature_dict = {}
236
+ for k, v in features[batch_idx].items():
237
+ feature_dict[f"frequency_domain_{k}"] = v[0].item()
238
+ data.append(feature_dict)
239
+
240
+ return pd.DataFrame(data)
241
+ def forward(self, x: torch.Tensor, f) -> torch.Tensor:
242
  x = self.backbone(x)
243
  x = x.mean(dim=1)
244
+ f_np = self._create_features_data(f)
245
+ dtest = xgb.DMatrix(f_np) # Convert input to DMatrix
246
+ xgb_features = self.xgb_model.predict(dtest, pred_leaf=True).astype(np.float32)
247
+ xgb_features = torch.tensor(xgb_features, dtype=torch.float32, device=x.device)
248
+ x_f = torch.cat([x, xgb_features[:, :self.best_xgb_features]], dim=1)
249
  return self.kan(x_f)
250
 
251
  class KanEncoder(nn.Module):
tasks/utils/train.py CHANGED
@@ -226,14 +226,14 @@ class Trainer(object):
226
 
227
  def train_batch(self, batch, batch_idx, device):
228
  x, fft, y = batch['audio']['array'], batch['audio']['fft_mag'], batch['label']
229
- # features = batch['audio']['features_arr'].to(device).float()
230
  # cwt = batch['audio']['cwt_mag']
231
  x = x.to(device).float()
232
  fft = fft.to(device).float()
233
  # cwt = cwt.to(device).float()
234
  y = y.to(device).float()
235
  x_fft = torch.cat((x.unsqueeze(dim=1), fft.unsqueeze(dim=1)), dim=1)
236
- y_pred = self.model(x_fft).squeeze()
237
  loss = self.criterion(y_pred, y)
238
  loss.backward()
239
  self.optimizer.step()
@@ -267,13 +267,15 @@ class Trainer(object):
267
 
268
  def eval_batch(self, batch, batch_idx, device):
269
  x, fft, y = batch['audio']['array'], batch['audio']['fft_mag'], batch['label']
 
 
270
  # features = batch['audio']['features_arr'].to(device).float()
271
  x = x.to(device).float()
272
  fft = fft.to(device).float()
273
  x_fft = torch.cat((x.unsqueeze(dim=1), fft.unsqueeze(dim=1)), dim=1)
274
  y = y.to(device).float()
275
  with torch.no_grad():
276
- y_pred = self.model(x_fft).squeeze()
277
  loss = self.criterion(y_pred.squeeze(), y)
278
  probs = torch.sigmoid(y_pred)
279
  cls_pred = (probs > 0.5).float()
 
226
 
227
  def train_batch(self, batch, batch_idx, device):
228
  x, fft, y = batch['audio']['array'], batch['audio']['fft_mag'], batch['label']
229
+ features = batch['audio']['features']
230
  # cwt = batch['audio']['cwt_mag']
231
  x = x.to(device).float()
232
  fft = fft.to(device).float()
233
  # cwt = cwt.to(device).float()
234
  y = y.to(device).float()
235
  x_fft = torch.cat((x.unsqueeze(dim=1), fft.unsqueeze(dim=1)), dim=1)
236
+ y_pred = self.model(x_fft, features).squeeze()
237
  loss = self.criterion(y_pred, y)
238
  loss.backward()
239
  self.optimizer.step()
 
267
 
268
  def eval_batch(self, batch, batch_idx, device):
269
  x, fft, y = batch['audio']['array'], batch['audio']['fft_mag'], batch['label']
270
+ features = batch['audio']['features']
271
+
272
  # features = batch['audio']['features_arr'].to(device).float()
273
  x = x.to(device).float()
274
  fft = fft.to(device).float()
275
  x_fft = torch.cat((x.unsqueeze(dim=1), fft.unsqueeze(dim=1)), dim=1)
276
  y = y.to(device).float()
277
  with torch.no_grad():
278
+ y_pred = self.model(x_fft, features).squeeze()
279
  loss = self.criterion(y_pred.squeeze(), y)
280
  probs = torch.sigmoid(y_pred)
281
  cls_pred = (probs > 0.5).float()
tasks/utils/transforms.py CHANGED
@@ -156,7 +156,6 @@ def compute_time_domain_features(audio, sample_rate, frame_length=2048, hop_leng
156
 
157
  return features
158
 
159
-
160
  def compute_frequency_domain_features(audio, sample_rate, n_fft=2048, hop_length=512):
161
  """
162
  Compute frequency-domain features from audio signal.
@@ -175,7 +174,6 @@ def compute_frequency_domain_features(audio, sample_rate, n_fft=2048, hop_length
175
  sr=sample_rate,
176
  n_fft=n_fft,
177
  hop_length=hop_length,
178
-
179
  )
180
  features['spectral_centroid'] = torch.FloatTensor([spectral_centroids.max()])
181
  except Exception as e:
@@ -188,7 +186,6 @@ def compute_frequency_domain_features(audio, sample_rate, n_fft=2048, hop_length
188
  sr=sample_rate,
189
  n_fft=n_fft,
190
  hop_length=hop_length,
191
-
192
  )
193
  features['spectral_rolloff'] = torch.FloatTensor([spectral_rolloff.max()])
194
  except Exception as e:
@@ -205,6 +202,7 @@ def compute_frequency_domain_features(audio, sample_rate, n_fft=2048, hop_length
205
  features['spectral_bandwidth'] = torch.FloatTensor([spectral_bandwidth.max()])
206
  except Exception as e:
207
  features['spectral_bandwidth'] = torch.FloatTensor([np.nan])
 
208
  # 4. Spectral Contrast
209
  try:
210
  spectral_contrast = librosa.feature.spectral_contrast(
@@ -240,6 +238,77 @@ def compute_frequency_domain_features(audio, sample_rate, n_fft=2048, hop_length
240
  except Exception as e:
241
  features['spectral_flux'] = torch.FloatTensor([np.nan])
242
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
243
  return features
244
 
245
 
@@ -267,6 +336,4 @@ def compute_all_features(audio, sample_rate, wavelet='db1', decompos_level=4):
267
  # features['time_domain'] = compute_time_domain_features(audio, sample_rate)
268
 
269
  # Frequency domain features
270
- features['frequency_domain'] = compute_frequency_domain_features(audio, sample_rate)
271
-
272
- return features
 
156
 
157
  return features
158
 
 
159
  def compute_frequency_domain_features(audio, sample_rate, n_fft=2048, hop_length=512):
160
  """
161
  Compute frequency-domain features from audio signal.
 
174
  sr=sample_rate,
175
  n_fft=n_fft,
176
  hop_length=hop_length,
 
177
  )
178
  features['spectral_centroid'] = torch.FloatTensor([spectral_centroids.max()])
179
  except Exception as e:
 
186
  sr=sample_rate,
187
  n_fft=n_fft,
188
  hop_length=hop_length,
 
189
  )
190
  features['spectral_rolloff'] = torch.FloatTensor([spectral_rolloff.max()])
191
  except Exception as e:
 
202
  features['spectral_bandwidth'] = torch.FloatTensor([spectral_bandwidth.max()])
203
  except Exception as e:
204
  features['spectral_bandwidth'] = torch.FloatTensor([np.nan])
205
+
206
  # 4. Spectral Contrast
207
  try:
208
  spectral_contrast = librosa.feature.spectral_contrast(
 
238
  except Exception as e:
239
  features['spectral_flux'] = torch.FloatTensor([np.nan])
240
 
241
+ # 7. MFCCs (Mel-Frequency Cepstral Coefficients)
242
+ try:
243
+ mfccs = librosa.feature.mfcc(
244
+ y=audio_np,
245
+ sr=sample_rate,
246
+ n_mfcc=13, # Number of MFCCs to compute
247
+ n_fft=n_fft,
248
+ hop_length=hop_length
249
+ )
250
+ features['mfcc_mean'] = torch.FloatTensor([mfccs.mean()])
251
+ except Exception as e:
252
+ features['mfcc_mean'] = torch.FloatTensor([np.nan])
253
+
254
+ # 8. Chroma Features
255
+ try:
256
+ chroma = librosa.feature.chroma_stft(
257
+ y=audio_np,
258
+ sr=sample_rate,
259
+ n_fft=n_fft,
260
+ hop_length=hop_length
261
+ )
262
+ features['chroma_mean'] = torch.FloatTensor([chroma.mean()])
263
+ except Exception as e:
264
+ features['chroma_mean'] = torch.FloatTensor([np.nan])
265
+
266
+ # 9. Spectral Kurtosis
267
+ try:
268
+ spectral_kurtosis = librosa.feature.spectral_kurtosis(
269
+ y=audio_np,
270
+ sr=sample_rate,
271
+ n_fft=n_fft,
272
+ hop_length=hop_length
273
+ )
274
+ features['spectral_kurtosis'] = torch.FloatTensor([spectral_kurtosis.mean()])
275
+ except Exception as e:
276
+ features['spectral_kurtosis'] = torch.FloatTensor([np.nan])
277
+
278
+ # 10. Spectral Skewness
279
+ try:
280
+ spectral_skewness = librosa.feature.spectral_skewness(
281
+ y=audio_np,
282
+ sr=sample_rate,
283
+ n_fft=n_fft,
284
+ hop_length=hop_length
285
+ )
286
+ features['spectral_skewness'] = torch.FloatTensor([spectral_skewness.mean()])
287
+ except Exception as e:
288
+ features['spectral_skewness'] = torch.FloatTensor([np.nan])
289
+
290
+ # 11. Spectral Slope
291
+ try:
292
+ spectral_slope = librosa.feature.spectral_slope(
293
+ y=audio_np,
294
+ sr=sample_rate,
295
+ n_fft=n_fft,
296
+ hop_length=hop_length
297
+ )
298
+ features['spectral_slope'] = torch.FloatTensor([spectral_slope.mean()])
299
+ except Exception as e:
300
+ features['spectral_slope'] = torch.FloatTensor([np.nan])
301
+
302
+ # 12. Tonnetz (Tonal Centroid Features)
303
+ try:
304
+ tonnetz = librosa.feature.tonnetz(
305
+ y=audio_np,
306
+ sr=sample_rate
307
+ )
308
+ features['tonnetz_mean'] = torch.FloatTensor([tonnetz.mean()])
309
+ except Exception as e:
310
+ features['tonnetz_mean'] = torch.FloatTensor([np.nan])
311
+
312
  return features
313
 
314
 
 
336
  # features['time_domain'] = compute_time_domain_features(audio, sample_rate)
337
 
338
  # Frequency domain features
339
+ return compute_frequency_domain_features(audio, sample_rate)