File size: 3,841 Bytes
37c2132
 
 
da9ac09
37c2132
 
 
 
 
 
 
da9ac09
fd2000c
 
 
 
 
 
 
 
 
 
bae29a7
d0e5c3a
 
 
112f9f7
3cb9e69
fa51e11
 
bae29a7
 
 
 
 
 
 
 
fa51e11
3cb9e69
 
 
 
 
 
fa51e11
3cb9e69
 
 
 
fa51e11
3cb9e69
 
 
 
 
d0e5c3a
bae29a7
da9ac09
 
 
37c2132
112f9f7
 
da9ac09
 
112f9f7
 
 
 
da9ac09
 
 
 
 
 
112f9f7
 
 
37c2132
 
112f9f7
37c2132
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import gradio as gr
from gradio_client import Client, handle_file
import os
import requests

HF_TOKEN = os.getenv("HF_TOKEN")

if HF_TOKEN is None:
    raise ValueError("Hugging Face token (HF_TOKEN) is not set in environment variables.")

client = Client("Plachta/Seed-VC", hf_token=HF_TOKEN)

def process_audio(
    source,
    target,
    diffusion_steps=25,
    length_adjust=1,
    inference_cfg_rate=0.7,
    f0_condition=False,
    auto_f0_adjust=True,
    pitch_shift=0
):
    try:
        print(f"Source file: {source}")
        print(f"Target file: {target}")

        # API呼び出し
        event_id = client.predict(
            source=handle_file(source),
            target=handle_file(target),
            diffusion_steps=diffusion_steps,
            length_adjust=length_adjust,
            inference_cfg_rate=inference_cfg_rate,
            f0_condition=f0_condition,
            auto_f0_adjust=auto_f0_adjust,
            pitch_shift=pitch_shift,
            api_name="/predict"
        )

        # ストリーミング結果の取得
        url = f"{client.base_url}/call/predict/{event_id}"
        headers = {"Authorization": f"Bearer {HF_TOKEN}"}
        response = requests.get(url, headers=headers, stream=True)
        print(f"Requesting URL: {url}")
        print(f"HTTP Status Code: {response.status_code}")

        if response.status_code == 200:
            for line in response.iter_lines():
                if line:
                    print(line.decode('utf-8'))  # ストリームされたデータを出力
        else:
            error_message = f"Failed to fetch result stream: {response.status_code} {response.text}"
            print(error_message)
            raise ValueError(error_message)

        return None, None  # 修正後の仕様に合わせる場合、ここでダウンロード処理を実施

    except Exception as e:
        error_message = f"An error occurred: {str(e)}"
        print(error_message)
        return None, None

def download_file_with_token(url, filename):
    """トークンを使用してファイルをダウンロード"""
    headers = {"Authorization": f"Bearer {HF_TOKEN}"}
    response = requests.get(url, headers=headers)
    print(f"Requesting URL: {url}")
    print(f"HTTP Status Code: {response.status_code}")
    print(f"Response Content: {response.text}")

    if response.status_code == 200:
        with open(filename, "wb") as f:
            f.write(response.content)
        print(f"File downloaded: {filename}")
        return filename
    else:
        error_detail = f"Failed to download file: {response.status_code} {response.text}"
        print(error_detail)
        raise ValueError(error_detail)

with gr.Blocks() as demo:
    gr.Markdown("# Audio Transformation with Token Authentication")

    with gr.Row():
        source_audio = gr.Audio(label="Source Audio", type="filepath")
        target_audio = gr.Audio(label="Reference Audio", type="filepath")

    diffusion_steps = gr.Slider(1, 50, value=25, label="Diffusion Steps")
    length_adjust = gr.Slider(0.5, 2, value=1, label="Length Adjust")
    inference_cfg_rate = gr.Slider(0.1, 1.0, value=0.7, label="Inference CFG Rate")
    f0_condition = gr.Checkbox(label="Use F0 conditioned model")
    auto_f0_adjust = gr.Checkbox(label="Auto F0 adjust", value=True)
    pitch_shift = gr.Slider(-12, 12, value=0, label="Pitch shift")

    output_stream = gr.Audio(label="Stream Output Audio")
    output_full = gr.Audio(label="Full Output Audio")

    run_button = gr.Button("Transform Audio")

    run_button.click(
        process_audio,
        inputs=[
            source_audio, target_audio, diffusion_steps, length_adjust,
            inference_cfg_rate, f0_condition, auto_f0_adjust, pitch_shift
        ],
        outputs=[output_stream, output_full]
    )

demo.launch()