akhaliq HF staff commited on
Commit
3eb8d72
1 Parent(s): e2fd588

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -4
app.py CHANGED
@@ -6,12 +6,21 @@ import whisper
6
  model = whisper.load_model("base")
7
 
8
 
9
-
10
 
11
  def inference(audio):
12
- result = model.transcribe(audio)
13
- print(result["text"])
14
- return result["text"]
 
 
 
 
 
 
 
 
 
 
15
 
16
 
17
  title="Whisper"
@@ -86,6 +95,60 @@ block = gr.Blocks(css=css)
86
 
87
 
88
  with block:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  with gr.Group():
90
  with gr.Box():
91
  with gr.Row().style(mobile_collapse=False, equal_height=True):
 
6
  model = whisper.load_model("base")
7
 
8
 
 
9
 
10
  def inference(audio):
11
+ audio = whisper.load_audio(audio)
12
+ audio = whisper.pad_or_trim(audio)
13
+
14
+ mel = whisper.log_mel_spectrogram(audio).to(model.device)
15
+
16
+ _, probs = model.detect_language(mel)
17
+ print(f"Detected language: {max(probs, key=probs.get)}")
18
+
19
+ options = whisper.DecodingOptions()
20
+ result = whisper.decode(model, mel, options)
21
+
22
+ print(result.text)
23
+ return result.text
24
 
25
 
26
  title="Whisper"
 
95
 
96
 
97
  with block:
98
+ gr.HTML(
99
+ """
100
+ <div style="text-align: center; max-width: 650px; margin: 0 auto;">
101
+ <div
102
+ style="
103
+ display: inline-flex;
104
+ align-items: center;
105
+ gap: 0.8rem;
106
+ font-size: 1.75rem;
107
+ "
108
+ >
109
+ <svg
110
+ width="0.65em"
111
+ height="0.65em"
112
+ viewBox="0 0 115 115"
113
+ fill="none"
114
+ xmlns="http://www.w3.org/2000/svg"
115
+ >
116
+ <rect width="23" height="23" fill="white"></rect>
117
+ <rect y="69" width="23" height="23" fill="white"></rect>
118
+ <rect x="23" width="23" height="23" fill="#AEAEAE"></rect>
119
+ <rect x="23" y="69" width="23" height="23" fill="#AEAEAE"></rect>
120
+ <rect x="46" width="23" height="23" fill="white"></rect>
121
+ <rect x="46" y="69" width="23" height="23" fill="white"></rect>
122
+ <rect x="69" width="23" height="23" fill="black"></rect>
123
+ <rect x="69" y="69" width="23" height="23" fill="black"></rect>
124
+ <rect x="92" width="23" height="23" fill="#D9D9D9"></rect>
125
+ <rect x="92" y="69" width="23" height="23" fill="#AEAEAE"></rect>
126
+ <rect x="115" y="46" width="23" height="23" fill="white"></rect>
127
+ <rect x="115" y="115" width="23" height="23" fill="white"></rect>
128
+ <rect x="115" y="69" width="23" height="23" fill="#D9D9D9"></rect>
129
+ <rect x="92" y="46" width="23" height="23" fill="#AEAEAE"></rect>
130
+ <rect x="92" y="115" width="23" height="23" fill="#AEAEAE"></rect>
131
+ <rect x="92" y="69" width="23" height="23" fill="white"></rect>
132
+ <rect x="69" y="46" width="23" height="23" fill="white"></rect>
133
+ <rect x="69" y="115" width="23" height="23" fill="white"></rect>
134
+ <rect x="69" y="69" width="23" height="23" fill="#D9D9D9"></rect>
135
+ <rect x="46" y="46" width="23" height="23" fill="black"></rect>
136
+ <rect x="46" y="115" width="23" height="23" fill="black"></rect>
137
+ <rect x="46" y="69" width="23" height="23" fill="black"></rect>
138
+ <rect x="23" y="46" width="23" height="23" fill="#D9D9D9"></rect>
139
+ <rect x="23" y="115" width="23" height="23" fill="#AEAEAE"></rect>
140
+ <rect x="23" y="69" width="23" height="23" fill="black"></rect>
141
+ </svg>
142
+ <h1 style="font-weight: 900; margin-bottom: 7px;">
143
+ Whisper
144
+ </h1>
145
+ </div>
146
+ <p style="margin-bottom: 10px; font-size: 94%">
147
+ Whisper is a general-purpose speech recognition model. It is trained on a large dataset of diverse audio and is also a multi-task model that can perform multilingual speech recognition as well as speech translation and language identification.
148
+ </p>
149
+ </div>
150
+ """
151
+ )
152
  with gr.Group():
153
  with gr.Box():
154
  with gr.Row().style(mobile_collapse=False, equal_height=True):