Spaces:
Running
Running
Added both audio and video information
Browse files
app.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
import os
|
2 |
import requests
|
3 |
from flask import Flask, request, jsonify, render_template
|
|
|
4 |
from dotenv import load_dotenv
|
5 |
import tempfile
|
6 |
import json
|
@@ -37,8 +38,78 @@ def health_check():
|
|
37 |
return jsonify({"status": "success", "message": "API is running successfully!"}), 200
|
38 |
|
39 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
|
41 |
-
|
|
|
42 |
"""Download video (MP4 format) from the given URL and save it to temp_video_path."""
|
43 |
response = requests.get(url, stream=True)
|
44 |
if response.status_code == 200:
|
@@ -107,16 +178,31 @@ def process_video():
|
|
107 |
# Step 1: Download the WAV file from the provided URL
|
108 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video_file:
|
109 |
temp_video_path = temp_video_file.name
|
110 |
-
|
111 |
interval = 1
|
112 |
# Step 2: get the information from the downloaded MP4 file synchronously
|
113 |
video_info = get_information_from_video_using_OCR(temp_video_path, interval)
|
114 |
|
115 |
if not video_info:
|
116 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
117 |
|
|
|
|
|
118 |
# Step 3: Generate structured recipe information using Gemini API synchronously
|
119 |
-
structured_data = query_gemini_api(video_info)
|
120 |
|
121 |
return jsonify(structured_data)
|
122 |
|
@@ -134,14 +220,14 @@ def process_video():
|
|
134 |
|
135 |
|
136 |
|
137 |
-
def query_gemini_api(
|
138 |
"""
|
139 |
Send transcription text to Gemini API and fetch structured recipe information synchronously.
|
140 |
"""
|
141 |
try:
|
142 |
# Define the structured prompt
|
143 |
prompt = (
|
144 |
-
"Analyze the provided cooking video transcription and extract the following structured information:\n"
|
145 |
"1. Recipe Name: Identify the name of the dish being prepared.\n"
|
146 |
"2. Ingredients List: Extract a detailed list of ingredients with their respective quantities (if mentioned).\n"
|
147 |
"3. Steps for Preparation: Provide a step-by-step breakdown of the recipe's preparation process, organized and numbered sequentially.\n"
|
@@ -152,7 +238,9 @@ def query_gemini_api(transcription):
|
|
152 |
"8. Special Notes or Variations: Include any specific tips, variations, or alternatives mentioned.\n"
|
153 |
"9. Festive or Thematic Relevance: Note if the recipe has any special relevance to holidays, events, or seasons.\n"
|
154 |
"Also, make sure not to provide anything else or any other information or warning or text apart from the above things mentioned."
|
155 |
-
f"Text: {
|
|
|
|
|
156 |
)
|
157 |
|
158 |
# Prepare the payload and headers
|
|
|
1 |
import os
|
2 |
import requests
|
3 |
from flask import Flask, request, jsonify, render_template
|
4 |
+
from deepgram import DeepgramClient, PrerecordedOptions
|
5 |
from dotenv import load_dotenv
|
6 |
import tempfile
|
7 |
import json
|
|
|
38 |
return jsonify({"status": "success", "message": "API is running successfully!"}), 200
|
39 |
|
40 |
|
41 |
+
def transcribe_audio(wav_file_path):
|
42 |
+
"""
|
43 |
+
Transcribe audio from a video file using Deepgram API synchronously.
|
44 |
+
|
45 |
+
Args:
|
46 |
+
wav_file_path (str): Path to save the converted WAV file.
|
47 |
+
Returns:
|
48 |
+
dict: A dictionary containing status, transcript, or error message.
|
49 |
+
"""
|
50 |
+
print("Entered the transcribe_audio function")
|
51 |
+
try:
|
52 |
+
# Initialize Deepgram client
|
53 |
+
deepgram = DeepgramClient(DEEPGRAM_API_KEY)
|
54 |
+
|
55 |
+
# Open the converted WAV file
|
56 |
+
with open(wav_file_path, 'rb') as buffer_data:
|
57 |
+
payload = {'buffer': buffer_data}
|
58 |
+
|
59 |
+
# Configure transcription options
|
60 |
+
options = PrerecordedOptions(
|
61 |
+
smart_format=True, model="nova-2", language="en-US"
|
62 |
+
)
|
63 |
+
|
64 |
+
# Transcribe the audio
|
65 |
+
response = deepgram.listen.prerecorded.v('1').transcribe_file(payload, options)
|
66 |
+
|
67 |
+
# Check if the response is valid
|
68 |
+
if response:
|
69 |
+
# print("Request successful! Processing response.")
|
70 |
+
|
71 |
+
# Convert response to JSON string
|
72 |
+
try:
|
73 |
+
data_str = response.to_json(indent=4)
|
74 |
+
except AttributeError as e:
|
75 |
+
return {"status": "error", "message": f"Error converting response to JSON: {e}"}
|
76 |
+
|
77 |
+
# Parse the JSON string to a Python dictionary
|
78 |
+
try:
|
79 |
+
data = json.loads(data_str)
|
80 |
+
except json.JSONDecodeError as e:
|
81 |
+
return {"status": "error", "message": f"Error parsing JSON string: {e}"}
|
82 |
+
|
83 |
+
# Extract the transcript
|
84 |
+
try:
|
85 |
+
transcript = data["results"]["channels"][0]["alternatives"][0]["transcript"]
|
86 |
+
except KeyError as e:
|
87 |
+
return {"status": "error", "message": f"Error extracting transcript: {e}"}
|
88 |
+
|
89 |
+
print(f"Transcript obtained: {transcript}")
|
90 |
+
# Step: Save the transcript to a text file
|
91 |
+
transcript_file_path = "transcript_from_transcribe_audio.txt"
|
92 |
+
with open(transcript_file_path, "w", encoding="utf-8") as transcript_file:
|
93 |
+
transcript_file.write(transcript)
|
94 |
+
# print(f"Transcript saved to file: {transcript_file_path}")
|
95 |
+
|
96 |
+
return transcript
|
97 |
+
else:
|
98 |
+
return {"status": "error", "message": "Invalid response from Deepgram."}
|
99 |
+
|
100 |
+
except FileNotFoundError:
|
101 |
+
return {"status": "error", "message": f"Video file not found: {wav_file_path}"}
|
102 |
+
except Exception as e:
|
103 |
+
return {"status": "error", "message": f"Unexpected error: {e}"}
|
104 |
+
finally:
|
105 |
+
# Clean up the temporary WAV file
|
106 |
+
if os.path.exists(wav_file_path):
|
107 |
+
os.remove(wav_file_path)
|
108 |
+
print(f"Temporary WAV file deleted: {wav_file_path}")
|
109 |
+
|
110 |
|
111 |
+
|
112 |
+
def download_video(url, temp_video_path):
|
113 |
"""Download video (MP4 format) from the given URL and save it to temp_video_path."""
|
114 |
response = requests.get(url, stream=True)
|
115 |
if response.status_code == 200:
|
|
|
178 |
# Step 1: Download the WAV file from the provided URL
|
179 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video_file:
|
180 |
temp_video_path = temp_video_file.name
|
181 |
+
download_video(video_url, temp_video_path)
|
182 |
interval = 1
|
183 |
# Step 2: get the information from the downloaded MP4 file synchronously
|
184 |
video_info = get_information_from_video_using_OCR(temp_video_path, interval)
|
185 |
|
186 |
if not video_info:
|
187 |
+
video_info = ""
|
188 |
+
|
189 |
+
|
190 |
+
|
191 |
+
# Step 2: Convert the MP4 to WAV
|
192 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_wav_file:
|
193 |
+
temp_wav_path = temp_wav_file.name
|
194 |
+
convert_mp4_to_wav(temp_video_path, temp_wav_path)
|
195 |
+
|
196 |
+
audio_info = transcribe_audio(temp_wav_path)
|
197 |
+
|
198 |
+
# If no transcription present, use an empty string
|
199 |
+
if not audio_info:
|
200 |
+
audio_info = ""
|
201 |
|
202 |
+
|
203 |
+
|
204 |
# Step 3: Generate structured recipe information using Gemini API synchronously
|
205 |
+
structured_data = query_gemini_api(video_info, audio_info)
|
206 |
|
207 |
return jsonify(structured_data)
|
208 |
|
|
|
220 |
|
221 |
|
222 |
|
223 |
+
def query_gemini_api(video_transcription, audio_transcription):
|
224 |
"""
|
225 |
Send transcription text to Gemini API and fetch structured recipe information synchronously.
|
226 |
"""
|
227 |
try:
|
228 |
# Define the structured prompt
|
229 |
prompt = (
|
230 |
+
"Analyze the provided cooking video and audio transcription combined and based on the combined information extract the following structured information:\n"
|
231 |
"1. Recipe Name: Identify the name of the dish being prepared.\n"
|
232 |
"2. Ingredients List: Extract a detailed list of ingredients with their respective quantities (if mentioned).\n"
|
233 |
"3. Steps for Preparation: Provide a step-by-step breakdown of the recipe's preparation process, organized and numbered sequentially.\n"
|
|
|
238 |
"8. Special Notes or Variations: Include any specific tips, variations, or alternatives mentioned.\n"
|
239 |
"9. Festive or Thematic Relevance: Note if the recipe has any special relevance to holidays, events, or seasons.\n"
|
240 |
"Also, make sure not to provide anything else or any other information or warning or text apart from the above things mentioned."
|
241 |
+
f"Text: {audio_transcription}\n"
|
242 |
+
f"Text: {video_transcription}\n"
|
243 |
+
|
244 |
)
|
245 |
|
246 |
# Prepare the payload and headers
|