dron3flyv3r's picture
update
a62eb6d
import gradio as gr
import os
from huggingface_hub import InferenceClient
from transformers import AutoTokenizer
HUGGINGFACE_API_KEY = os.environ["HUGGINGFACE_API_KEY"]
def transcript_audio(audio_file) -> str:
model = "openai/whisper-large-v3"
api = InferenceClient(model, token=HUGGINGFACE_API_KEY)
text = api.automatic_speech_recognition(audio_file)
return text
def summarize_text(text: str, bullet_points: int, conclusion: bool) -> str:
llm_model = "meta-llama/Meta-Llama-3-70B-Instruct"
api = InferenceClient(llm_model, token=HUGGINGFACE_API_KEY)
tokenizer = AutoTokenizer.from_pretrained(llm_model, token=HUGGINGFACE_API_KEY)
if conclusion:
user_chat = f"Summarize the following text, into {bullet_points} bullet points and a conclusion:\n{text}"
else:
user_chat = (
f"Summarize the following text, into {bullet_points} bullet points:\n{text}"
)
chat = [
{"role": "system", "content": "You are a Meeting Summarizer AI. You well help summarize the text into bullet points and a conclusion. Please return a Markdown formatted text. Remember to give it a title."},
{"role": "user", "content": user_chat},
]
prompt = tokenizer.apply_chat_template(
chat, tokenize=False, add_generation_prompt=True
)
summary = api.text_generation(prompt, max_new_tokens=250, do_sample=True)
print(summary)
return summary
def control(audio_file, text: str, bullet_points: int, conclusion: bool) -> str:
if audio_file:
text = transcript_audio(audio_file)
summary = summarize_text(text, bullet_points, conclusion)
return summary
# make a simeple interface, where the user can input a text and get a summary or input an audio file and get a transcript and a summary
iface = gr.Interface(
fn=control,
inputs=[
gr.components.Audio(label="Audio file", type="filepath"),
gr.components.Textbox(lines=5, label="Text"),
gr.components.Slider(
minimum=1, maximum=10, value=5, step=1, label="Number of bullet points"
),
gr.components.Checkbox(label="Add conclusion"),
],
outputs=gr.components.Markdown(label="Summary"),
)
iface.launch()