Spaces:
Running
Running
Testing youtube transcription for youtube videos
Browse files
app.py
CHANGED
@@ -6,6 +6,7 @@ from dotenv import load_dotenv
|
|
6 |
from deepgram import DeepgramClient, PrerecordedOptions
|
7 |
import tempfile
|
8 |
import json
|
|
|
9 |
|
10 |
import warnings
|
11 |
warnings.filterwarnings("ignore", message="FP16 is not supported on CPU; using FP32 instead")
|
@@ -32,31 +33,20 @@ if not DEEPGRAM_API_KEY:
|
|
32 |
GEMINI_API_ENDPOINT = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent"
|
33 |
GEMINI_API_KEY = API_KEY
|
34 |
|
35 |
-
# Load Whisper AI model at startup
|
36 |
-
# print("Loading Whisper AI model..., ANIKET")
|
37 |
-
# whisper_model = whisper.load_model("base") # Choose model size: tiny, base, small, medium, large
|
38 |
-
# print("Whisper AI model loaded successfully, ANIKET")
|
39 |
-
|
40 |
-
|
41 |
@app.route("/", methods=["GET"])
|
42 |
def health_check():
|
43 |
return jsonify({"status": "success", "message": "API is running successfully!"}), 200
|
44 |
|
45 |
|
46 |
-
@app.route("/mbsa")
|
47 |
-
def mbsa():
|
48 |
-
return render_template("mbsa.html")
|
49 |
-
|
50 |
-
|
51 |
@app.route('/process-audio', methods=['POST'])
|
52 |
def process_audio():
|
53 |
-
print("GOT THE PROCESS AUDIO REQUEST, ANIKET")
|
54 |
|
55 |
if 'audio' not in request.files:
|
56 |
return jsonify({"error": "No audio file provided"}), 400
|
57 |
|
58 |
audio_file = request.files['audio']
|
59 |
-
print("AUDIO FILE NAME: ", audio_file)
|
60 |
|
61 |
temp_audio_path = None
|
62 |
try:
|
@@ -67,23 +57,23 @@ def process_audio():
|
|
67 |
temp_audio_path = temp_audio_file.name # Get the file path
|
68 |
temp_audio_file.write(audio_file.read()) # Write the uploaded audio to the temp file
|
69 |
|
70 |
-
print(f"Temporary audio file saved at: {temp_audio_path}")
|
71 |
|
72 |
# Step 2: Transcribe the uploaded audio file synchronously
|
73 |
transcription = transcribe_audio(temp_audio_path)
|
74 |
|
75 |
-
print("BEFORE THE transcription FAILED ERROR, CHECKING IF I GOT THE TRANSCRIPTION", transcription)
|
76 |
|
77 |
if not transcription:
|
78 |
return jsonify({"error": "Audio transcription failed"}), 500
|
79 |
|
80 |
-
print("GOT THE transcription")
|
81 |
|
82 |
# Step 3: Generate structured recipe information using Gemini API synchronously
|
83 |
-
print("Starting the GEMINI REQUEST TO STRUCTURE IT")
|
84 |
structured_data = query_gemini_api(transcription)
|
85 |
|
86 |
-
print("GOT THE STRUCTURED DATA", structured_data)
|
87 |
# Step 4: Return the structured data
|
88 |
return jsonify(structured_data)
|
89 |
|
@@ -97,6 +87,41 @@ def process_audio():
|
|
97 |
print(f"Temporary WAV file deleted: {temp_audio_path}")
|
98 |
|
99 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
100 |
|
101 |
def transcribe_audio(wav_file_path):
|
102 |
"""
|
@@ -127,7 +152,7 @@ def transcribe_audio(wav_file_path):
|
|
127 |
|
128 |
# Check if the response is valid
|
129 |
if response:
|
130 |
-
print("Request successful! Processing response.")
|
131 |
|
132 |
# Convert response to JSON string
|
133 |
try:
|
@@ -152,7 +177,7 @@ def transcribe_audio(wav_file_path):
|
|
152 |
transcript_file_path = "transcript_from_transcribe_audio.txt"
|
153 |
with open(transcript_file_path, "w", encoding="utf-8") as transcript_file:
|
154 |
transcript_file.write(transcript)
|
155 |
-
print(f"Transcript saved to file: {transcript_file_path}")
|
156 |
|
157 |
return transcript
|
158 |
else:
|
@@ -176,7 +201,6 @@ def query_gemini_api(transcription):
|
|
176 |
try:
|
177 |
# Define the structured prompt
|
178 |
prompt = (
|
179 |
-
"Print the transcription in the response as well"
|
180 |
"Analyze the provided cooking video transcription and extract the following structured information:\n"
|
181 |
"1. Recipe Name: Identify the name of the dish being prepared.\n"
|
182 |
"2. Ingredients List: Extract a detailed list of ingredients with their respective quantities (if mentioned).\n"
|
@@ -207,7 +231,6 @@ def query_gemini_api(transcription):
|
|
207 |
f"{GEMINI_API_ENDPOINT}?key={GEMINI_API_KEY}",
|
208 |
json=payload,
|
209 |
headers=headers,
|
210 |
-
|
211 |
)
|
212 |
|
213 |
# Raise error if response code is not 200
|
|
|
6 |
from deepgram import DeepgramClient, PrerecordedOptions
|
7 |
import tempfile
|
8 |
import json
|
9 |
+
from youtube_transcript_api import YouTubeTranscriptApi, YouTubeTranscriptApiException
|
10 |
|
11 |
import warnings
|
12 |
warnings.filterwarnings("ignore", message="FP16 is not supported on CPU; using FP32 instead")
|
|
|
33 |
GEMINI_API_ENDPOINT = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent"
|
34 |
GEMINI_API_KEY = API_KEY
|
35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
@app.route("/", methods=["GET"])
|
37 |
def health_check():
|
38 |
return jsonify({"status": "success", "message": "API is running successfully!"}), 200
|
39 |
|
40 |
|
|
|
|
|
|
|
|
|
|
|
41 |
@app.route('/process-audio', methods=['POST'])
|
42 |
def process_audio():
|
43 |
+
# print("GOT THE PROCESS AUDIO REQUEST, ANIKET")
|
44 |
|
45 |
if 'audio' not in request.files:
|
46 |
return jsonify({"error": "No audio file provided"}), 400
|
47 |
|
48 |
audio_file = request.files['audio']
|
49 |
+
# print("AUDIO FILE NAME: ", audio_file)
|
50 |
|
51 |
temp_audio_path = None
|
52 |
try:
|
|
|
57 |
temp_audio_path = temp_audio_file.name # Get the file path
|
58 |
temp_audio_file.write(audio_file.read()) # Write the uploaded audio to the temp file
|
59 |
|
60 |
+
# print(f"Temporary audio file saved at: {temp_audio_path}")
|
61 |
|
62 |
# Step 2: Transcribe the uploaded audio file synchronously
|
63 |
transcription = transcribe_audio(temp_audio_path)
|
64 |
|
65 |
+
# print("BEFORE THE transcription FAILED ERROR, CHECKING IF I GOT THE TRANSCRIPTION", transcription)
|
66 |
|
67 |
if not transcription:
|
68 |
return jsonify({"error": "Audio transcription failed"}), 500
|
69 |
|
70 |
+
# print("GOT THE transcription")
|
71 |
|
72 |
# Step 3: Generate structured recipe information using Gemini API synchronously
|
73 |
+
# print("Starting the GEMINI REQUEST TO STRUCTURE IT")
|
74 |
structured_data = query_gemini_api(transcription)
|
75 |
|
76 |
+
# print("GOT THE STRUCTURED DATA", structured_data)
|
77 |
# Step 4: Return the structured data
|
78 |
return jsonify(structured_data)
|
79 |
|
|
|
87 |
print(f"Temporary WAV file deleted: {temp_audio_path}")
|
88 |
|
89 |
|
90 |
+
@app.route('/process-youtube', methods=['POST'])
|
91 |
+
def process_youtube():
|
92 |
+
youtube_url = request.json.get('youtube_url')
|
93 |
+
|
94 |
+
if not youtube_url:
|
95 |
+
return jsonify({"error": "No YouTube URL provided"}), 400
|
96 |
+
|
97 |
+
try:
|
98 |
+
# Extract the YouTube video ID from the URL
|
99 |
+
video_id = youtube_url.split("v=")[-1].split("&")[0]
|
100 |
+
|
101 |
+
try:
|
102 |
+
# Fetch the transcript for the video using YouTubeTranscriptApi
|
103 |
+
transcript_data = YouTubeTranscriptApi.get_transcript(video_id)
|
104 |
+
|
105 |
+
# Concatenate the text from each segment into a single transcript
|
106 |
+
transcript = " ".join([segment['text'] for segment in transcript_data])
|
107 |
+
|
108 |
+
|
109 |
+
except YouTubeTranscriptApiException as e:
|
110 |
+
print(f"Error fetching transcript for video ID {video_id}: {e}")
|
111 |
+
|
112 |
+
if not transcript:
|
113 |
+
return jsonify({"error": "Unable to fetch transcript from YouTube"}), 500
|
114 |
+
|
115 |
+
# Send the transcript to the Gemini API for structured data
|
116 |
+
structured_data = query_gemini_api(transcript)
|
117 |
+
|
118 |
+
# Return the structured data
|
119 |
+
return jsonify(structured_data)
|
120 |
+
|
121 |
+
except Exception as e:
|
122 |
+
return jsonify({"error": str(e)}), 500
|
123 |
+
|
124 |
+
|
125 |
|
126 |
def transcribe_audio(wav_file_path):
|
127 |
"""
|
|
|
152 |
|
153 |
# Check if the response is valid
|
154 |
if response:
|
155 |
+
# print("Request successful! Processing response.")
|
156 |
|
157 |
# Convert response to JSON string
|
158 |
try:
|
|
|
177 |
transcript_file_path = "transcript_from_transcribe_audio.txt"
|
178 |
with open(transcript_file_path, "w", encoding="utf-8") as transcript_file:
|
179 |
transcript_file.write(transcript)
|
180 |
+
# print(f"Transcript saved to file: {transcript_file_path}")
|
181 |
|
182 |
return transcript
|
183 |
else:
|
|
|
201 |
try:
|
202 |
# Define the structured prompt
|
203 |
prompt = (
|
|
|
204 |
"Analyze the provided cooking video transcription and extract the following structured information:\n"
|
205 |
"1. Recipe Name: Identify the name of the dish being prepared.\n"
|
206 |
"2. Ingredients List: Extract a detailed list of ingredients with their respective quantities (if mentioned).\n"
|
|
|
231 |
f"{GEMINI_API_ENDPOINT}?key={GEMINI_API_KEY}",
|
232 |
json=payload,
|
233 |
headers=headers,
|
|
|
234 |
)
|
235 |
|
236 |
# Raise error if response code is not 200
|