huseinzol05 commited on
Commit
68df9df
1 Parent(s): 9572e5b

Upload MM_LLMs

Browse files
Files changed (3) hide show
  1. config.json +2 -1
  2. model.safetensors +1 -1
  3. modeling_audio.py +5 -1
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "audio-alignment-tinyllama/checkpoint-5800",
3
  "architectures": [
4
  "MM_LLMs"
5
  ],
@@ -203,6 +203,7 @@
203
  "use_weighted_layer_sum": false,
204
  "vocab_size": 51865
205
  },
 
206
  "auto_map": {
207
  "AutoConfig": "modeling_audio.MM_LLMs_Config",
208
  "AutoModel": "modeling_audio.MM_LLMs"
 
1
  {
2
+ "_name_or_path": "audio-alignment-tinyllama/checkpoint-6800",
3
  "architectures": [
4
  "MM_LLMs"
5
  ],
 
203
  "use_weighted_layer_sum": false,
204
  "vocab_size": 51865
205
  },
206
+ "audio_select_layer": -2,
207
  "auto_map": {
208
  "AutoConfig": "modeling_audio.MM_LLMs_Config",
209
  "AutoModel": "modeling_audio.MM_LLMs"
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b3875b2eb90aa1bd7aea8ba2faed29ed0c5896cd341c86fdad8e49e2319d02e1
3
  size 2817909376
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d69f1d7e7d57b232dead293d2c9ee96b39daf7783fcb15ba200db80b23b2c80f
3
  size 2817909376
modeling_audio.py CHANGED
@@ -34,11 +34,13 @@ class MM_LLMs_Config(PretrainedConfig):
34
  self,
35
  audio_config=None,
36
  llm_config=None,
 
37
  **kwargs
38
  ):
39
 
40
  self.audio_config = audio_config
41
  self.llm_config = llm_config
 
42
 
43
  if isinstance(self.audio_config, dict):
44
  audio_config["model_type"] = (
@@ -252,6 +254,8 @@ class MM_LLMs(PreTrainedModel):
252
  return model_inputs
253
 
254
  def encode_audio(self, audios):
255
- encoded = self.audio_encoder.encoder(audios)[0]
 
 
256
  audio_features = self.audio_projector(encoded.transpose(1, 2).contiguous())
257
  return audio_features
 
34
  self,
35
  audio_config=None,
36
  llm_config=None,
37
+ audio_select_layer=-2,
38
  **kwargs
39
  ):
40
 
41
  self.audio_config = audio_config
42
  self.llm_config = llm_config
43
+ self.audio_select_layer = audio_select_layer
44
 
45
  if isinstance(self.audio_config, dict):
46
  audio_config["model_type"] = (
 
254
  return model_inputs
255
 
256
  def encode_audio(self, audios):
257
+
258
+ encoded = self.audio_encoder.encoder(audios, output_hidden_states=True)
259
+ encoded = encoded.hidden_states[self.config.audio_select_layer]
260
  audio_features = self.audio_projector(encoded.transpose(1, 2).contiguous())
261
  return audio_features