import gradio as gr
from gradio_client import Client, handle_file
import os

# Hugging Faceトークンを環境変数から取得
HF_TOKEN = os.getenv("HF_TOKEN")

# トークンが設定されていない場合のエラーハンドリング
if HF_TOKEN is None:
    raise ValueError("Hugging Face token (HF_TOKEN) is not set in environment variables.")

# クライアント設定
client = Client("Plachta/Seed-VC", hf_token=HF_TOKEN)

def process_audio(
    source,
    target,
    diffusion_steps=25,
    length_adjust=1,
    inference_cfg_rate=0.7,
    f0_condition=False,
    auto_f0_adjust=True,
    pitch_shift=0
):
    try:
        # ファイルパスをログ出力
        print(f"Source file: {source}")
        print(f"Target file: {target}")

        # APIリクエスト時のパラメータをログ出力
        print(f"Parameters: diffusion_steps={diffusion_steps}, length_adjust={length_adjust}, "
              f"inference_cfg_rate={inference_cfg_rate}, f0_condition={f0_condition}, "
              f"auto_f0_adjust={auto_f0_adjust}, pitch_shift={pitch_shift}")

        # API呼び出し
        result = client.predict(
            source=handle_file(source),
            target=handle_file(target),
            diffusion_steps=diffusion_steps,
            length_adjust=length_adjust,
            inference_cfg_rate=inference_cfg_rate,
            f0_condition=f0_condition,
            auto_f0_adjust=auto_f0_adjust,
            pitch_shift=pitch_shift,
            api_name="/predict"
        )

        # レスポンスのログ出力
        print(f"API Response: {result}")

        # レスポンス内にトークンが含まれているか確認
        if isinstance(result, tuple) and len(result) > 1:
            stream_audio_url, full_audio_url = result
            print(f"Stream Output URL: {stream_audio_url}")
            print(f"Full Output URL: {full_audio_url}")
        else:
            print("Unexpected response format.")

        return result  # 成功時は結果をそのまま返す

    except Exception as e:
        # エラー発生時に詳細をログ出力
        error_message = f"An error occurred: {str(e)}"
        print(error_message)
        
        # ファイルパスやエラー内容を返す
        return (f"Error: {error_message}\n"
                f"Source file: {source}\n"
                f"Target file: {target}\n"
                f"Parameters: diffusion_steps={diffusion_steps}, length_adjust={length_adjust}, "
                f"inference_cfg_rate={inference_cfg_rate}, f0_condition={f0_condition}, "
                f"auto_f0_adjust={auto_f0_adjust}, pitch_shift={pitch_shift}"), None

# Gradioインターフェース作成
with gr.Blocks() as demo:
    gr.Markdown("# Audio Transformation with Seed-VC")

    with gr.Row():
        source_audio = gr.Audio(label="Source Audio", type="filepath")
        target_audio = gr.Audio(label="Reference Audio", type="filepath")

    diffusion_steps = gr.Slider(1, 50, value=25, label="Diffusion Steps")
    length_adjust = gr.Slider(0.5, 2, value=1, label="Length Adjust")
    inference_cfg_rate = gr.Slider(0.1, 1.0, value=0.7, label="Inference CFG Rate")
    f0_condition = gr.Checkbox(label="Use F0 conditioned model")
    auto_f0_adjust = gr.Checkbox(label="Auto F0 adjust", value=True)
    pitch_shift = gr.Slider(-12, 12, value=0, label="Pitch shift")

    output_stream = gr.Audio(label="Stream Output Audio")
    output_full = gr.Audio(label="Full Output Audio")

    run_button = gr.Button("Transform Audio")

    run_button.click(
        process_audio,
        inputs=[
            source_audio, target_audio, diffusion_steps, length_adjust,
            inference_cfg_rate, f0_condition, auto_f0_adjust, pitch_shift
        ],
        outputs=[output_stream, output_full]
    )

demo.launch()