dmaniloff commited on
Commit
bb39a26
·
1 Parent(s): 3b23541

full demo.

Browse files
Files changed (1) hide show
  1. app.py +46 -16
app.py CHANGED
@@ -4,38 +4,68 @@ import tempfile
4
  import torch
5
  import gradio as gr
6
  from transformers import pipeline
 
7
 
8
 
9
- MODEL_NAME = "openai/whisper-large-v3"
10
- BATCH_SIZE = 8
11
-
12
  device = 0 if torch.cuda.is_available() else "cpu"
13
 
 
 
 
 
14
  pipe = pipeline(
15
  task="automatic-speech-recognition",
16
- model=MODEL_NAME,
17
  chunk_length_s=30,
18
  device=device,
19
  )
20
 
21
 
22
- def transcribe(inputs, task="transcribe"):
23
- if inputs is None:
 
24
  raise gr.Error("No audio file submitted!")
25
 
26
  output = pipe(
27
- inputs,
28
- batch_size=BATCH_SIZE,
29
- generate_kwargs={"task": task},
30
  return_timestamps=True
31
  )
32
  return output["text"]
33
 
34
- demo = gr.Interface(
35
- fn=transcribe,
36
- inputs=["audio"],
37
- outputs="text",
38
- title="Transcribe Audio to Text", # Give our demo a title
39
- )
40
 
41
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  import torch
5
  import gradio as gr
6
  from transformers import pipeline
7
+ from huggingface_hub import InferenceClient
8
 
9
 
 
 
 
10
  device = 0 if torch.cuda.is_available() else "cpu"
11
 
12
+ AUDIO_MODEL_NAME = "distil-whisper/distil-large-v3" # faster and very close in performance to the full-size "openai/whisper-large-v3"
13
+ TEXT_MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"
14
+ BATCH_SIZE = 8
15
+
16
  pipe = pipeline(
17
  task="automatic-speech-recognition",
18
+ model=AUDIO_MODEL_NAME,
19
  chunk_length_s=30,
20
  device=device,
21
  )
22
 
23
 
24
+ def transcribe(audio_input):
25
+ """Function to convert audio to text."""
26
+ if audio_input is None:
27
  raise gr.Error("No audio file submitted!")
28
 
29
  output = pipe(
30
+ audio_input,
31
+ batch_size=BATCH_SIZE,
32
+ generate_kwargs={"task": "transcribe"},
33
  return_timestamps=True
34
  )
35
  return output["text"]
36
 
 
 
 
 
 
 
37
 
38
+ def organize_text(meeting_transcript):
39
+ messages = build_messages(meeting_transcript)
40
+ response = client.chat_completion(
41
+ messages, model=TEXT_MODEL_NAME, max_tokens=250, seed=430
42
+ )
43
+ return response.choices[0].message.content
44
+
45
+
46
+ def build_messages(meeting_transcript) -> list:
47
+ system_input = "You are an assitant that organizes meeting minutes."
48
+ user_input = """Take this raw meeting transcript and return an organized version.
49
+ Here is the transcript:
50
+ {meeting_transcript}
51
+ """.format(meeting_transcript=meeting_transcript)
52
+
53
+ messages = [
54
+ {"role": "system", "content": system_input},
55
+ {"role": "user", "content": user_input},
56
+ ]
57
+ return messages
58
+
59
+ def meeting_transcript_tool(audio_input):
60
+ meeting_text = transcribe(audio_input)
61
+ organized_text = organize_text(meeting_text)
62
+ return organized_text
63
+
64
+
65
+ full_demo = gr.Interface(
66
+ fn=meeting_transcript_tool,
67
+ inputs=gr.Audio(type="filepath"),
68
+ outputs=gr.Textbox(show_copy_button=True),
69
+ title="The Complete Meeting Transcript Tool",
70
+ )
71
+ full_demo.launch()