Spaces:
Runtime error
Runtime error
File size: 1,977 Bytes
89cc85a 607b639 dcac4f2 89cc85a bb39a26 89cc85a bb39a26 89cc85a bb39a26 89cc85a 5b88c7d 89cc85a bb39a26 89cc85a bb39a26 89cc85a dcac4f2 bb39a26 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
import os
import tempfile
import torch
import gradio as gr
from transformers import pipeline
from huggingface_hub import InferenceClient
device = 0 if torch.cuda.is_available() else "cpu"
AUDIO_MODEL_NAME = "distil-whisper/distil-large-v3" # faster and very close in performance to the full-size "openai/whisper-large-v3"
TEXT_MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"
BATCH_SIZE = 8
pipe = pipeline(
task="automatic-speech-recognition",
model=AUDIO_MODEL_NAME,
chunk_length_s=30,
device=device,
)
client = InferenceClient()
def transcribe(audio_input):
"""Function to convert audio to text."""
if audio_input is None:
raise gr.Error("No audio file submitted!")
output = pipe(
audio_input,
batch_size=BATCH_SIZE,
generate_kwargs={"task": "transcribe"},
return_timestamps=True
)
return output["text"]
def organize_text(meeting_transcript):
messages = build_messages(meeting_transcript)
response = client.chat_completion(
messages, model=TEXT_MODEL_NAME, max_tokens=250, seed=430
)
return response.choices[0].message.content
def build_messages(meeting_transcript) -> list:
system_input = "You are an assitant that organizes meeting minutes."
user_input = """Take this raw meeting transcript and return an organized version.
Here is the transcript:
{meeting_transcript}
""".format(meeting_transcript=meeting_transcript)
messages = [
{"role": "system", "content": system_input},
{"role": "user", "content": user_input},
]
return messages
def meeting_transcript_tool(audio_input):
meeting_text = transcribe(audio_input)
organized_text = organize_text(meeting_text)
return organized_text
full_demo = gr.Interface(
fn=meeting_transcript_tool,
inputs=gr.Audio(type="filepath"),
outputs=gr.Textbox(show_copy_button=True),
title="The Complete Meeting Transcript Tool",
)
full_demo.launch() |