import gradio as gr from gradio_client import Client, handle_file import os # Hugging Faceトークンを環境変数から取得 HF_TOKEN = os.getenv("HF_TOKEN") # トークンが設定されていない場合のエラーハンドリング if HF_TOKEN is None: raise ValueError("Hugging Face token (HF_TOKEN) is not set in environment variables.") # クライアント設定 client = Client("Plachta/Seed-VC", hf_token=HF_TOKEN) def process_audio( source, target, diffusion_steps=25, length_adjust=1, inference_cfg_rate=0.7, f0_condition=False, auto_f0_adjust=True, pitch_shift=0 ): try: # ファイルパスをログ出力 print(f"Source file: {source}") print(f"Target file: {target}") # APIリクエスト時のパラメータをログ出力 print(f"Parameters: diffusion_steps={diffusion_steps}, length_adjust={length_adjust}, " f"inference_cfg_rate={inference_cfg_rate}, f0_condition={f0_condition}, " f"auto_f0_adjust={auto_f0_adjust}, pitch_shift={pitch_shift}") # API呼び出し result = client.predict( source=handle_file(source), target=handle_file(target), diffusion_steps=diffusion_steps, length_adjust=length_adjust, inference_cfg_rate=inference_cfg_rate, f0_condition=f0_condition, auto_f0_adjust=auto_f0_adjust, pitch_shift=pitch_shift, api_name="/predict" ) # レスポンスのログ出力 print(f"API Response: {result}") # レスポンス内にトークンが含まれているか確認 if isinstance(result, tuple) and len(result) > 1: stream_audio_url, full_audio_url = result print(f"Stream Output URL: {stream_audio_url}") print(f"Full Output URL: {full_audio_url}") else: print("Unexpected response format.") return result # 成功時は結果をそのまま返す except Exception as e: # エラー発生時に詳細をログ出力 error_message = f"An error occurred: {str(e)}" print(error_message) # ファイルパスやエラー内容を返す return (f"Error: {error_message}\n" f"Source file: {source}\n" f"Target file: {target}\n" f"Parameters: diffusion_steps={diffusion_steps}, length_adjust={length_adjust}, " f"inference_cfg_rate={inference_cfg_rate}, f0_condition={f0_condition}, " f"auto_f0_adjust={auto_f0_adjust}, pitch_shift={pitch_shift}"), None # Gradioインターフェース作成 with gr.Blocks() as demo: gr.Markdown("# Audio Transformation with Seed-VC") with gr.Row(): source_audio = gr.Audio(label="Source Audio", type="filepath") target_audio = gr.Audio(label="Reference Audio", type="filepath") diffusion_steps = gr.Slider(1, 50, value=25, label="Diffusion Steps") length_adjust = gr.Slider(0.5, 2, value=1, label="Length Adjust") inference_cfg_rate = gr.Slider(0.1, 1.0, value=0.7, label="Inference CFG Rate") f0_condition = gr.Checkbox(label="Use F0 conditioned model") auto_f0_adjust = gr.Checkbox(label="Auto F0 adjust", value=True) pitch_shift = gr.Slider(-12, 12, value=0, label="Pitch shift") output_stream = gr.Audio(label="Stream Output Audio") output_full = gr.Audio(label="Full Output Audio") run_button = gr.Button("Transform Audio") run_button.click( process_audio, inputs=[ source_audio, target_audio, diffusion_steps, length_adjust, inference_cfg_rate, f0_condition, auto_f0_adjust, pitch_shift ], outputs=[output_stream, output_full] ) demo.launch()