Spaces:
Runtime error
Runtime error
dron3flyv3r
commited on
Commit
·
6d1b7ca
1
Parent(s):
a1d7b67
Add AutoTokenizer for text summarization
Browse files- app.py +15 -2
- requirements.txt +1 -0
app.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
import gradio as gr
|
2 |
import os
|
3 |
from huggingface_hub import InferenceClient
|
|
|
4 |
|
5 |
HUGGINGFACE_API_KEY = os.environ["HUGGINGFACE_API_KEY"]
|
6 |
|
@@ -15,19 +16,31 @@ def transcript_audio(audio_file) -> str:
|
|
15 |
def summarize_text(text: str, bullet_points: int, conclusion: bool) -> str:
|
16 |
llm_model = "google/gemma-7b-it"
|
17 |
api = InferenceClient(llm_model, token=HUGGINGFACE_API_KEY)
|
|
|
18 |
if conclusion:
|
19 |
-
|
20 |
else:
|
21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
summary = api.text_generation(prompt, max_new_tokens=250, do_sample=True)
|
23 |
print(summary)
|
24 |
return summary
|
25 |
|
|
|
26 |
def control(audio_file, text: str, bullet_points: int, conclusion: bool) -> str:
|
27 |
if audio_file:
|
28 |
text = transcript_audio(audio_file)
|
29 |
summary = summarize_text(text, bullet_points, conclusion)
|
30 |
return summary
|
|
|
|
|
31 |
# make a simeple interface, where the user can input a text and get a summary or input an audio file and get a transcript and a summary
|
32 |
iface = gr.Interface(
|
33 |
fn=control,
|
|
|
1 |
import gradio as gr
|
2 |
import os
|
3 |
from huggingface_hub import InferenceClient
|
4 |
+
from transformers import AutoTokenizer
|
5 |
|
6 |
HUGGINGFACE_API_KEY = os.environ["HUGGINGFACE_API_KEY"]
|
7 |
|
|
|
16 |
def summarize_text(text: str, bullet_points: int, conclusion: bool) -> str:
|
17 |
llm_model = "google/gemma-7b-it"
|
18 |
api = InferenceClient(llm_model, token=HUGGINGFACE_API_KEY)
|
19 |
+
tokenizer = AutoTokenizer.from_pretrained(llm_model, token=HUGGINGFACE_API_KEY)
|
20 |
if conclusion:
|
21 |
+
user_chat = f"Summarize the following text into {bullet_points} bullet points and a conclusion: {text}"
|
22 |
else:
|
23 |
+
user_chat = (
|
24 |
+
f"Summarize the following text into {bullet_points} bullet points: {text}"
|
25 |
+
)
|
26 |
+
chat = [
|
27 |
+
{"role": "user", "content": user_chat},
|
28 |
+
]
|
29 |
+
prompt = tokenizer.apply_chat_template(
|
30 |
+
chat, tokenize=False, add_generation_prompt=True
|
31 |
+
)
|
32 |
summary = api.text_generation(prompt, max_new_tokens=250, do_sample=True)
|
33 |
print(summary)
|
34 |
return summary
|
35 |
|
36 |
+
|
37 |
def control(audio_file, text: str, bullet_points: int, conclusion: bool) -> str:
|
38 |
if audio_file:
|
39 |
text = transcript_audio(audio_file)
|
40 |
summary = summarize_text(text, bullet_points, conclusion)
|
41 |
return summary
|
42 |
+
|
43 |
+
|
44 |
# make a simeple interface, where the user can input a text and get a summary or input an audio file and get a transcript and a summary
|
45 |
iface = gr.Interface(
|
46 |
fn=control,
|
requirements.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
transformers
|