archit11 commited on
Commit
3d79800
·
verified ·
1 Parent(s): 307a45b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -24
app.py CHANGED
@@ -1,3 +1,5 @@
 
 
1
  import gradio as gr
2
  import pandas as pd
3
  import yt_dlp
@@ -5,11 +7,10 @@ import os
5
  from semantic_chunkers import StatisticalChunker
6
  from semantic_router.encoders import HuggingFaceEncoder
7
  from faster_whisper import WhisperModel
8
- import spaces
9
-
10
 
11
- # Function to download YouTube audio
12
- def download_youtube_audio(url, output_path, preferred_quality="192"):
13
  ydl_opts = {
14
  'format': 'bestaudio/best',
15
  'postprocessors': [{
@@ -17,7 +18,7 @@ def download_youtube_audio(url, output_path, preferred_quality="192"):
17
  'preferredcodec': 'mp3',
18
  'preferredquality': preferred_quality,
19
  }],
20
- 'outtmpl': output_path
21
  }
22
 
23
  try:
@@ -26,25 +27,29 @@ def download_youtube_audio(url, output_path, preferred_quality="192"):
26
  video_title = info_dict.get('title', None)
27
  print(f"Downloading audio for: {video_title}")
28
 
29
- ydl.download([url])
30
- print(f"Audio file saved as: {output_path}")
31
-
32
- return output_path
 
 
33
 
34
  except yt_dlp.utils.DownloadError as e:
35
  print(f"Error downloading audio: {e}")
36
  return None
37
 
38
- # Function to transcribe audio using WhisperModel
39
- @spaces.GPU(duration=120)
40
- def transcribe(path, model_name):
41
  model = WhisperModel(model_name)
42
- print(f"Reading {path}")
43
- segments, info = model.transcribe(path)
 
 
44
  return segments
45
 
46
  # Function to process segments and convert them into a DataFrame
47
- @spaces.GPU(duration=120)
48
  def process_segments(segments):
49
  result = {}
50
  print("Processing...")
@@ -62,11 +67,13 @@ def process_segments(segments):
62
  return df
63
 
64
  # Gradio interface functions
65
- @spaces.GPU(duration=120)
66
  def generate_transcript(youtube_url, model_name="large-v3"):
67
- path = "downloaded_audio.mp3"
68
- download_youtube_audio(youtube_url, path)
69
- segments = transcribe(path, model_name)
 
 
70
  df = process_segments(segments)
71
 
72
  lis = list(df['text'])
@@ -99,25 +106,20 @@ def generate_transcript(youtube_url, model_name="large-v3"):
99
 
100
  # Function to download video using yt-dlp and generate transcript HTML
101
  def download_video(youtube_url):
102
- # Define download options
103
  ydl_opts = {
104
  'format': 'mp4',
105
  'outtmpl': 'downloaded_video.mp4',
106
  'quiet': True
107
  }
108
 
109
- # Extract video ID to check if already downloaded
110
  with yt_dlp.YoutubeDL({'quiet': True}) as ydl:
111
  info_dict = ydl.extract_info(youtube_url, download=False)
112
  video_path = 'downloaded_video.mp4'
113
 
114
- # Check if video already downloaded
115
  if not os.path.exists(video_path):
116
- # Download the video
117
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
118
  ydl.download([youtube_url])
119
 
120
- # Generate HTML for the transcript
121
  transcripts = generate_transcript(youtube_url)
122
  transcript_html = ""
123
  for t in transcripts:
 
1
+
2
+ import spaces
3
  import gradio as gr
4
  import pandas as pd
5
  import yt_dlp
 
7
  from semantic_chunkers import StatisticalChunker
8
  from semantic_router.encoders import HuggingFaceEncoder
9
  from faster_whisper import WhisperModel
10
+ import io
 
11
 
12
+ # Function to download YouTube audio and return it as a BytesIO object
13
+ def download_youtube_audio(url, preferred_quality="192"):
14
  ydl_opts = {
15
  'format': 'bestaudio/best',
16
  'postprocessors': [{
 
18
  'preferredcodec': 'mp3',
19
  'preferredquality': preferred_quality,
20
  }],
21
+ 'outtmpl': '-', # Output to stdout
22
  }
23
 
24
  try:
 
27
  video_title = info_dict.get('title', None)
28
  print(f"Downloading audio for: {video_title}")
29
 
30
+ # Download audio to a BytesIO object
31
+ audio_buffer = io.BytesIO()
32
+ ydl.download([url], audio_buffer)
33
+ audio_buffer.seek(0)
34
+ print("Audio download complete")
35
+ return audio_buffer
36
 
37
  except yt_dlp.utils.DownloadError as e:
38
  print(f"Error downloading audio: {e}")
39
  return None
40
 
41
+ # Function to transcribe audio from BytesIO using WhisperModel
42
+ @spaces.GPU
43
+ def transcribe(audio_buffer, model_name="medium"):
44
  model = WhisperModel(model_name)
45
+ print("Reading audio buffer")
46
+
47
+ # Hypothetical support for BytesIO object
48
+ segments, info = model.transcribe(audio_buffer)
49
  return segments
50
 
51
  # Function to process segments and convert them into a DataFrame
52
+ @spaces.GPU
53
  def process_segments(segments):
54
  result = {}
55
  print("Processing...")
 
67
  return df
68
 
69
  # Gradio interface functions
70
+ @spaces.GPU
71
  def generate_transcript(youtube_url, model_name="large-v3"):
72
+ audio_buffer = download_youtube_audio(youtube_url)
73
+ if audio_buffer is None:
74
+ return "Error downloading audio"
75
+
76
+ segments = transcribe(audio_buffer, model_name)
77
  df = process_segments(segments)
78
 
79
  lis = list(df['text'])
 
106
 
107
  # Function to download video using yt-dlp and generate transcript HTML
108
  def download_video(youtube_url):
 
109
  ydl_opts = {
110
  'format': 'mp4',
111
  'outtmpl': 'downloaded_video.mp4',
112
  'quiet': True
113
  }
114
 
 
115
  with yt_dlp.YoutubeDL({'quiet': True}) as ydl:
116
  info_dict = ydl.extract_info(youtube_url, download=False)
117
  video_path = 'downloaded_video.mp4'
118
 
 
119
  if not os.path.exists(video_path):
 
120
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
121
  ydl.download([youtube_url])
122
 
 
123
  transcripts = generate_transcript(youtube_url)
124
  transcript_html = ""
125
  for t in transcripts: