dmaniloff's picture
Update app.py
91a7adc verified
import os
import tempfile
import torch
import gradio as gr
from transformers import pipeline
from huggingface_hub import InferenceClient
device = 0 if torch.cuda.is_available() else "cpu"
AUDIO_MODEL_NAME = "distil-whisper/distil-large-v3" # faster and very close in performance to the full-size "openai/whisper-large-v3"
TEXT_MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"
BATCH_SIZE = 8
pipe = pipeline(
task="automatic-speech-recognition",
model=AUDIO_MODEL_NAME,
chunk_length_s=30,
device=device,
)
client = InferenceClient()
def transcribe(audio_input):
"""Function to convert audio to text."""
if audio_input is None:
raise gr.Error("No audio file submitted!")
output = pipe(
audio_input,
batch_size=BATCH_SIZE,
generate_kwargs={"task": "transcribe"},
return_timestamps=True
)
return output["text"]
def organize_text(meeting_transcript):
messages = build_messages(meeting_transcript)
response = client.chat_completion(
messages, model=TEXT_MODEL_NAME, max_tokens=250, seed=430
)
return response.choices[0].message.content
def build_messages(meeting_transcript) -> list:
system_input = "You are an assitant that organizes meeting minutes."
user_input = """Take this raw meeting transcript and return an organized version.
Here is the transcript:
{meeting_transcript}
""".format(meeting_transcript=meeting_transcript)
messages = [
{"role": "system", "content": system_input},
{"role": "user", "content": user_input},
]
return messages
def meeting_transcript_tool(audio_input):
meeting_text = transcribe(audio_input)
organized_text = organize_text(meeting_text)
return organized_text
full_demo = gr.Interface(
fn=meeting_transcript_tool,
inputs=gr.Audio(type="filepath"),
outputs=gr.Textbox(show_copy_button=True),
title="Meeting Transcript Tool (Recipe from the Enterprise Hub Cookbook)",
)
full_demo.launch()