File size: 2,234 Bytes
29c0dab
3ef9a12
3a47b34
6d1b7ca
3ef9a12
3146fe8
5e8f5fe
29c0dab
 
3a47b34
 
 
 
 
29c0dab
 
3a47b34
a62eb6d
3a47b34
6d1b7ca
3a47b34
a62eb6d
3a47b34
6d1b7ca
a62eb6d
6d1b7ca
 
a62eb6d
6d1b7ca
 
 
 
 
a62eb6d
3a47b34
a1d7b67
 
3a47b34
6d1b7ca
8d92114
3a47b34
 
 
 
6d1b7ca
 
3a47b34
 
8d92114
3a47b34
dcbfa6b
3a47b34
8d92114
 
 
3a47b34
 
3146fe8
3a47b34
 
29c0dab
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import gradio as gr
import os
from huggingface_hub import InferenceClient
from transformers import AutoTokenizer


HUGGINGFACE_API_KEY = os.environ["HUGGINGFACE_API_KEY"]


def transcript_audio(audio_file) -> str:
    model = "openai/whisper-large-v3"
    api = InferenceClient(model, token=HUGGINGFACE_API_KEY)
    text = api.automatic_speech_recognition(audio_file)
    return text


def summarize_text(text: str, bullet_points: int, conclusion: bool) -> str:
    llm_model = "meta-llama/Meta-Llama-3-70B-Instruct"
    api = InferenceClient(llm_model, token=HUGGINGFACE_API_KEY)
    tokenizer = AutoTokenizer.from_pretrained(llm_model, token=HUGGINGFACE_API_KEY)
    if conclusion:
        user_chat = f"Summarize the following text, into {bullet_points} bullet points and a conclusion:\n{text}"
    else:
        user_chat = (
            f"Summarize the following text, into {bullet_points} bullet points:\n{text}"
        )
    chat = [
        {"role": "system", "content": "You are a Meeting Summarizer AI. You well help summarize the text into bullet points and a conclusion. Please return a Markdown formatted text. Remember to give it a title."},
        {"role": "user", "content": user_chat},
    ]
    prompt = tokenizer.apply_chat_template(
        chat, tokenize=False, add_generation_prompt=True
    )
    
    summary = api.text_generation(prompt, max_new_tokens=250, do_sample=True)
    print(summary)
    return summary


def control(audio_file, text: str, bullet_points: int, conclusion: bool) -> str:
    if audio_file:
        text = transcript_audio(audio_file)
    summary = summarize_text(text, bullet_points, conclusion)
    return summary


# make a simeple interface, where the user can input a text and get a summary or input an audio file and get a transcript and a summary
iface = gr.Interface(
    fn=control,
    inputs=[
        gr.components.Audio(label="Audio file", type="filepath"),
        gr.components.Textbox(lines=5, label="Text"),
        gr.components.Slider(
            minimum=1, maximum=10, value=5, step=1, label="Number of bullet points"
        ),
        gr.components.Checkbox(label="Add conclusion"),
    ],
    outputs=gr.components.Markdown(label="Summary"),
)

iface.launch()