|
from pytube import YouTube |
|
import whisper |
|
import os |
|
import subprocess |
|
from openai import OpenAI |
|
import ssl |
|
|
|
|
|
def download_youtube_audio(url, destination="."): |
|
|
|
yt = YouTube(url) |
|
|
|
ssl._create_default_https_context = ssl._create_unverified_context |
|
|
|
audio_stream = yt.streams.filter(only_audio=True).first() |
|
|
|
|
|
out_file = audio_stream.download(output_path=destination) |
|
|
|
|
|
base, ext = os.path.splitext(out_file) |
|
audio_file = base + '.mp3' |
|
|
|
|
|
subprocess.run(['ffmpeg', '-i', out_file, audio_file]) |
|
|
|
|
|
os.remove(out_file) |
|
|
|
print(f"Downloaded and converted to MP3: {audio_file}") |
|
return audio_file |
|
|
|
|
|
def transcribe_audio(audio_file): |
|
model = whisper.load_model("base") |
|
result = model.transcribe(audio_file) |
|
return result["text"] |
|
|
|
|
|
def write_text_to_file(text, filename="transcribed_text.txt"): |
|
|
|
with open(filename, "w") as file: |
|
file.write(text) |
|
|
|
|
|
def delete_file(file_path): |
|
os.remove(file_path) |
|
|
|
|
|
def process(url): |
|
|
|
file_path = download_youtube_audio(url) |
|
|
|
prompt = transcribe_audio(file_path) |
|
delete_file(file_path) |
|
result_summary = summarize_text(prompt) |
|
|
|
return result_summary |
|
|
|
|
|
def summarize_text(prompt): |
|
pre_prompt = 'You are a model that receives a transcription of a YouTube video. Your task is to correct any words ' \ |
|
'that ' \ |
|
'may be incorrect based on the context, and transform it into a well-structured summary of the entire ' \ |
|
'video. Your summary should highlight important details and provide additional context when ' \ |
|
'necessary. ' \ |
|
'Aim to be detailed, particularly when addressing non-trivial aspects of the content. The summary ' \ |
|
'should ' \ |
|
'encompass at least 20-30% of the original text length.' |
|
client = OpenAI(api_key=os.getenv('OPENAI_API_KEY')) |
|
|
|
response = client.chat.completions.create( |
|
model="gpt-4-turbo", |
|
messages=[ |
|
{"role": "system", "content": pre_prompt}, |
|
{"role": "user", "content": prompt}, |
|
] |
|
) |
|
|
|
|
|
summary_result = response.choices[0].message.content |
|
return summary_result |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|