Spaces:
Sleeping
Sleeping
import gradio as gr | |
import whisper | |
from langchain_openai import ChatOpenAI | |
from utils import RefineDataSummarizer | |
import os | |
def transcript(file_dir, language, model_type): | |
model_dir = os.path.join('models', model_type) | |
model = whisper.load_model(model_dir) | |
result = model.transcribe(file_dir, language=language, task='transcribe') | |
lines = [s['text'] for s in result['segments']] | |
text = '' | |
for line in lines: | |
text += f"{line}\n" | |
return text | |
def upload_file(file_paths): | |
return file_paths | |
def summary(text, chunk_num, chunk_overlap, user_api, llm_type): | |
if user_api == "Not Provided": | |
api_key = os.getenv("openai_api") | |
else: | |
api_key = user_api | |
api_key = api_key.strip() | |
llm = ChatOpenAI(temperature=1, openai_api_key=api_key, model_name=llm_type) | |
rds = RefineDataSummarizer(llm=llm) | |
result = rds.get_summarization(text, chunk_num=chunk_num, chunk_overlap=chunk_overlap) | |
return result["output_text"] | |
with gr.Blocks() as demo: | |
with gr.Row(equal_height=False): | |
with gr.Column(): | |
file_output = gr.File() | |
upload_button = gr.UploadButton("Click to Upload a File", file_types=["audio", "video"], file_count="single") | |
upload_button.upload(upload_file, upload_button, file_output) | |
language = gr.Dropdown( | |
["English", "Chinese"], label="Transcript Language", value="English") | |
model_type = gr.Dropdown( | |
[ | |
"tiny.en.pt", | |
"tiny.pt", | |
"small.en.pt", | |
"small.pt", | |
"base.en.pt", | |
"base.pt", | |
"medium.en.pt", | |
"medium.pt", | |
"large-v1.pt", | |
"large-v2.pt",], label="Model Type", value="medium.en.pt") | |
TranscriptButton = gr.Button("Transcript", variant="primary") | |
with gr.Column(): | |
transcript_text = gr.Textbox(placeholder="Transcript Result", label="Transcript") | |
with gr.Accordion(open=False, label=["summary settings"]): | |
chunk_num = gr.Number(precision=0, minimum=1, maximum=9999, step=1, label="Chunk Number", value=1) | |
chunk_overlap = gr.Number(precision=0, minimum=1, maximum=9999, step=1, label="Chunk Overlap", value=100) | |
with gr.Accordion(open=False, label=["llm settings"]): | |
user_api = gr.Textbox(placeholder="If Empty, Use Default Key", label="Your API Key", value="Not Provided") | |
llm_type = gr.Dropdown( | |
[ | |
"gpt-3.5-turbo", | |
"gpt-3.5-turbo-16k", | |
"gpt-4-1106-preview" | |
], label="LLM Type", value="gpt-4-1106-preview") | |
SunmmaryButton = gr.Button("Summary", variant="primary") | |
summary_text = gr.Textbox(placeholder="Summary Result", label="Summary") | |
TranscriptButton.click( | |
fn=transcript, | |
inputs=[ | |
file_output, | |
language, | |
model_type | |
], | |
outputs=[transcript_text] | |
) | |
SunmmaryButton.click( | |
fn=summary, | |
inputs=[ | |
transcript_text, | |
chunk_num, | |
chunk_overlap, | |
user_api, | |
llm_type | |
], | |
outputs=[summary_text] | |
) | |
demo.launch() |