GoodML commited on
Commit
590726d
·
verified ·
1 Parent(s): fa09811

Testing youtube transcription for youtube videos

Browse files
Files changed (1) hide show
  1. app.py +45 -22
app.py CHANGED
@@ -6,6 +6,7 @@ from dotenv import load_dotenv
6
  from deepgram import DeepgramClient, PrerecordedOptions
7
  import tempfile
8
  import json
 
9
 
10
  import warnings
11
  warnings.filterwarnings("ignore", message="FP16 is not supported on CPU; using FP32 instead")
@@ -32,31 +33,20 @@ if not DEEPGRAM_API_KEY:
32
  GEMINI_API_ENDPOINT = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent"
33
  GEMINI_API_KEY = API_KEY
34
 
35
- # Load Whisper AI model at startup
36
- # print("Loading Whisper AI model..., ANIKET")
37
- # whisper_model = whisper.load_model("base") # Choose model size: tiny, base, small, medium, large
38
- # print("Whisper AI model loaded successfully, ANIKET")
39
-
40
-
41
  @app.route("/", methods=["GET"])
42
  def health_check():
43
  return jsonify({"status": "success", "message": "API is running successfully!"}), 200
44
 
45
 
46
- @app.route("/mbsa")
47
- def mbsa():
48
- return render_template("mbsa.html")
49
-
50
-
51
  @app.route('/process-audio', methods=['POST'])
52
  def process_audio():
53
- print("GOT THE PROCESS AUDIO REQUEST, ANIKET")
54
 
55
  if 'audio' not in request.files:
56
  return jsonify({"error": "No audio file provided"}), 400
57
 
58
  audio_file = request.files['audio']
59
- print("AUDIO FILE NAME: ", audio_file)
60
 
61
  temp_audio_path = None
62
  try:
@@ -67,23 +57,23 @@ def process_audio():
67
  temp_audio_path = temp_audio_file.name # Get the file path
68
  temp_audio_file.write(audio_file.read()) # Write the uploaded audio to the temp file
69
 
70
- print(f"Temporary audio file saved at: {temp_audio_path}")
71
 
72
  # Step 2: Transcribe the uploaded audio file synchronously
73
  transcription = transcribe_audio(temp_audio_path)
74
 
75
- print("BEFORE THE transcription FAILED ERROR, CHECKING IF I GOT THE TRANSCRIPTION", transcription)
76
 
77
  if not transcription:
78
  return jsonify({"error": "Audio transcription failed"}), 500
79
 
80
- print("GOT THE transcription")
81
 
82
  # Step 3: Generate structured recipe information using Gemini API synchronously
83
- print("Starting the GEMINI REQUEST TO STRUCTURE IT")
84
  structured_data = query_gemini_api(transcription)
85
 
86
- print("GOT THE STRUCTURED DATA", structured_data)
87
  # Step 4: Return the structured data
88
  return jsonify(structured_data)
89
 
@@ -97,6 +87,41 @@ def process_audio():
97
  print(f"Temporary WAV file deleted: {temp_audio_path}")
98
 
99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
 
101
  def transcribe_audio(wav_file_path):
102
  """
@@ -127,7 +152,7 @@ def transcribe_audio(wav_file_path):
127
 
128
  # Check if the response is valid
129
  if response:
130
- print("Request successful! Processing response.")
131
 
132
  # Convert response to JSON string
133
  try:
@@ -152,7 +177,7 @@ def transcribe_audio(wav_file_path):
152
  transcript_file_path = "transcript_from_transcribe_audio.txt"
153
  with open(transcript_file_path, "w", encoding="utf-8") as transcript_file:
154
  transcript_file.write(transcript)
155
- print(f"Transcript saved to file: {transcript_file_path}")
156
 
157
  return transcript
158
  else:
@@ -176,7 +201,6 @@ def query_gemini_api(transcription):
176
  try:
177
  # Define the structured prompt
178
  prompt = (
179
- "Print the transcription in the response as well"
180
  "Analyze the provided cooking video transcription and extract the following structured information:\n"
181
  "1. Recipe Name: Identify the name of the dish being prepared.\n"
182
  "2. Ingredients List: Extract a detailed list of ingredients with their respective quantities (if mentioned).\n"
@@ -207,7 +231,6 @@ def query_gemini_api(transcription):
207
  f"{GEMINI_API_ENDPOINT}?key={GEMINI_API_KEY}",
208
  json=payload,
209
  headers=headers,
210
-
211
  )
212
 
213
  # Raise error if response code is not 200
 
6
  from deepgram import DeepgramClient, PrerecordedOptions
7
  import tempfile
8
  import json
9
+ from youtube_transcript_api import YouTubeTranscriptApi, YouTubeTranscriptApiException
10
 
11
  import warnings
12
  warnings.filterwarnings("ignore", message="FP16 is not supported on CPU; using FP32 instead")
 
33
  GEMINI_API_ENDPOINT = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent"
34
  GEMINI_API_KEY = API_KEY
35
 
 
 
 
 
 
 
36
  @app.route("/", methods=["GET"])
37
  def health_check():
38
  return jsonify({"status": "success", "message": "API is running successfully!"}), 200
39
 
40
 
 
 
 
 
 
41
  @app.route('/process-audio', methods=['POST'])
42
  def process_audio():
43
+ # print("GOT THE PROCESS AUDIO REQUEST, ANIKET")
44
 
45
  if 'audio' not in request.files:
46
  return jsonify({"error": "No audio file provided"}), 400
47
 
48
  audio_file = request.files['audio']
49
+ # print("AUDIO FILE NAME: ", audio_file)
50
 
51
  temp_audio_path = None
52
  try:
 
57
  temp_audio_path = temp_audio_file.name # Get the file path
58
  temp_audio_file.write(audio_file.read()) # Write the uploaded audio to the temp file
59
 
60
+ # print(f"Temporary audio file saved at: {temp_audio_path}")
61
 
62
  # Step 2: Transcribe the uploaded audio file synchronously
63
  transcription = transcribe_audio(temp_audio_path)
64
 
65
+ # print("BEFORE THE transcription FAILED ERROR, CHECKING IF I GOT THE TRANSCRIPTION", transcription)
66
 
67
  if not transcription:
68
  return jsonify({"error": "Audio transcription failed"}), 500
69
 
70
+ # print("GOT THE transcription")
71
 
72
  # Step 3: Generate structured recipe information using Gemini API synchronously
73
+ # print("Starting the GEMINI REQUEST TO STRUCTURE IT")
74
  structured_data = query_gemini_api(transcription)
75
 
76
+ # print("GOT THE STRUCTURED DATA", structured_data)
77
  # Step 4: Return the structured data
78
  return jsonify(structured_data)
79
 
 
87
  print(f"Temporary WAV file deleted: {temp_audio_path}")
88
 
89
 
90
+ @app.route('/process-youtube', methods=['POST'])
91
+ def process_youtube():
92
+ youtube_url = request.json.get('youtube_url')
93
+
94
+ if not youtube_url:
95
+ return jsonify({"error": "No YouTube URL provided"}), 400
96
+
97
+ try:
98
+ # Extract the YouTube video ID from the URL
99
+ video_id = youtube_url.split("v=")[-1].split("&")[0]
100
+
101
+ try:
102
+ # Fetch the transcript for the video using YouTubeTranscriptApi
103
+ transcript_data = YouTubeTranscriptApi.get_transcript(video_id)
104
+
105
+ # Concatenate the text from each segment into a single transcript
106
+ transcript = " ".join([segment['text'] for segment in transcript_data])
107
+
108
+
109
+ except YouTubeTranscriptApiException as e:
110
+ print(f"Error fetching transcript for video ID {video_id}: {e}")
111
+
112
+ if not transcript:
113
+ return jsonify({"error": "Unable to fetch transcript from YouTube"}), 500
114
+
115
+ # Send the transcript to the Gemini API for structured data
116
+ structured_data = query_gemini_api(transcript)
117
+
118
+ # Return the structured data
119
+ return jsonify(structured_data)
120
+
121
+ except Exception as e:
122
+ return jsonify({"error": str(e)}), 500
123
+
124
+
125
 
126
  def transcribe_audio(wav_file_path):
127
  """
 
152
 
153
  # Check if the response is valid
154
  if response:
155
+ # print("Request successful! Processing response.")
156
 
157
  # Convert response to JSON string
158
  try:
 
177
  transcript_file_path = "transcript_from_transcribe_audio.txt"
178
  with open(transcript_file_path, "w", encoding="utf-8") as transcript_file:
179
  transcript_file.write(transcript)
180
+ # print(f"Transcript saved to file: {transcript_file_path}")
181
 
182
  return transcript
183
  else:
 
201
  try:
202
  # Define the structured prompt
203
  prompt = (
 
204
  "Analyze the provided cooking video transcription and extract the following structured information:\n"
205
  "1. Recipe Name: Identify the name of the dish being prepared.\n"
206
  "2. Ingredients List: Extract a detailed list of ingredients with their respective quantities (if mentioned).\n"
 
231
  f"{GEMINI_API_ENDPOINT}?key={GEMINI_API_KEY}",
232
  json=payload,
233
  headers=headers,
 
234
  )
235
 
236
  # Raise error if response code is not 200