File size: 3,689 Bytes
10781e9 274dc77 f5e715a f92198a f5e715a f92198a f5e715a 10781e9 ac0f6ea 10781e9 f5e715a 10781e9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
from fastapi import FastAPI, HTTPException, Request
from pydantic import BaseModel
import os
import uuid
import subprocess
import requests
import supabase
import azure.cognitiveservices.speech as speechsdk
from typing import List
app = FastAPI()
# Variables
SUPABASE_URL = 'http://127.0.0.1:54321'
SUPABASE_SERVICE_ROLE_KEY = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZS1kZW1vIiwicm9sZSI6InNlcnZpY2Vfcm9sZSIsImV4cCI6MTk4MzgxMjk5Nn0.EGIM96RAZx35lJzdJsyH-qQwv8Hdp7fsn3W0YpN81IU'
NEXT_PUBLIC_AZURE_SPEECH_KEY = '2ffcc0ab85c747a09390a051c4399f81'
NEXT_PUBLIC_AZURE_SPEECH_REGION = 'westeurope'
# Supabase setup
SUPABASE_URL = ("SUPABASE_URL")
SUPABASE_SERVICE_ROLE_KEY = ("SUPABASE_SERVICE_ROLE_KEY")
supabase_client = supabase.create_client(SUPABASE_URL, SUPABASE_SERVICE_ROLE_KEY)
print(f"SUPABASE_URL: {SUPABASE_URL}")
print(f"SUPABASE_SERVICE_ROLE_KEY: {'Exists' if SUPABASE_SERVICE_ROLE_KEY else 'Missing'}")
# Azure Speech settings
AZURE_SPEECH_KEY = ("NEXT_PUBLIC_AZURE_SPEECH_KEY")
AZURE_SPEECH_REGION = ("NEXT_PUBLIC_AZURE_SPEECH_REGION")
print(f"AZURE_SPEECH_KEY: {'Exists' if AZURE_SPEECH_KEY else 'Missing Azure Speech Key'}")
print(f"AZURE_SPEECH_REGION: {'Exists' if AZURE_SPEECH_REGION else 'Missing Azure Speech Region'}")
class YouTubeData(BaseModel):
youtubeUrl: List[str]
userId: str
# Function to download and save audio
async def download_and_save_audio(video_url: str, file_id: str) -> str:
temp_file_path = f"/tmp/{file_id}.wav"
command = [
"yt-dlp", "--extract-audio", "--audio-format", "wav", "--output", temp_file_path, video_url
]
try:
subprocess.run(command, check=True)
return temp_file_path
except subprocess.CalledProcessError as e:
raise HTTPException(status_code=500, detail=f"yt-dlp failed: {str(e)}")
# Function to transcribe audio
async def transcribe_audio(file_path: str) -> str:
speech_config = speechsdk.SpeechConfig(subscription=AZURE_SPEECH_KEY, region=AZURE_SPEECH_REGION)
speech_config.speech_recognition_language = "en-US"
audio_config = speechsdk.AudioConfig(filename=file_path)
recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config)
result = recognizer.recognize_once()
if result.reason == speechsdk.ResultReason.RecognizedSpeech:
return result.text
else:
raise HTTPException(status_code=500, detail=f"Speech recognition failed: {result.error_details}")
# Function to upload file to Supabase
async def upload_to_supabase(file_path: str, user_id: str, file_id: str):
with open(file_path, "rb") as f:
file_data = f.read()
response = supabase_client.storage.from_("files").upload(f"{user_id}/{file_id}.txt", file_data, content_type="text/plain", upsert=True)
return response
@app.post("/transcribe")
async def transcribe_youtube_audio(data: YouTubeData):
combined_transcription = ""
for url in data.youtubeUrl:
file_id = str(uuid.uuid4())
temp_file_path = await download_and_save_audio(url, file_id)
transcription = await transcribe_audio(temp_file_path)
combined_transcription += f"\nTranscription for URL ({url}):\n{transcription}\n"
os.remove(temp_file_path)
file_id = str(uuid.uuid4())
file_name = f"{file_id}-transcription.txt"
temp_transcription_path = f"/tmp/{file_name}"
with open(temp_transcription_path, "w", encoding="utf-8") as f:
f.write(combined_transcription)
upload_response = await upload_to_supabase(temp_transcription_path, data.userId, file_id)
os.remove(temp_transcription_path)
return {"success": True, "file": {"id": file_id}}
|