File size: 4,406 Bytes
62ccb33
fba0fd8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1af64f9
fba0fd8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import gradio as gr
import httpx
import ormsgpack
from pydantic import BaseModel, conint
from typing import Annotated, Literal
import tempfile
import os
import json
import shutil
from datetime import datetime

# 定义缓存文件路径
CACHE_FILE = "token_cache.json"
CACHE_FOLDER = "cache"

# 确保缓存文件夹存在
if not os.path.exists(CACHE_FOLDER):
    os.makedirs(CACHE_FOLDER)

class ServeReferenceAudio(BaseModel):
    audio: bytes
    text: str

class ServeTTSRequest(BaseModel):
    text: str
    chunk_length: Annotated[int, conint(ge=100, le=300, strict=True)] = 200
    format: Literal["wav", "pcm", "mp3"] = "mp3"
    mp3_bitrate: Literal[64, 128, 192] = 128
    references: list[ServeReferenceAudio] = []
    reference_id: str | None = None
    normalize: bool = False
    latency: Literal["normal", "balanced"] = "normal"

def load_cached_data():
    if os.path.exists(CACHE_FILE):
        with open(CACHE_FILE, 'r') as f:
            cache = json.load(f)
            return cache.get('api_key', ''), cache.get('api_url', 'https://api.fish.audio/v1/tts')
    return '', 'https://api.fish.audio/v1/tts'

def save_cached_data(api_key, api_url):
    with open(CACHE_FILE, 'w') as f:
        json.dump({'api_key': api_key, 'api_url': api_url}, f)

def text_to_speech(api_key, api_url, text, reference_audio, reference_text):
    if not api_key:
        return None, "Please enter your API key."
    
    if not api_url:
        return None, "Please enter the API URL."
    
    # 保存API密钥和URL到缓存
    save_cached_data(api_key, api_url)
    
    references = []
    if reference_audio is not None:
        with open(reference_audio.name, "rb") as f:
            audio_bytes = f.read()
        references.append(ServeReferenceAudio(audio=audio_bytes, text=reference_text))

    request = ServeTTSRequest(
        text=text,
        references=references
    )

    with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as temp_file:
        output_filename = temp_file.name

        with httpx.Client() as client:
            with client.stream(
                "POST",
                api_url,
                content=ormsgpack.packb(request, option=ormsgpack.OPT_SERIALIZE_PYDANTIC),
                headers={
                    "authorization": f"Bearer {api_key}",
                    "content-type": "application/msgpack",
                },
                timeout=None,
            ) as response:
                if response.status_code != 200:
                    return None, f"Error: {response.status_code} - {response.text}"
                for chunk in response.iter_bytes():
                    temp_file.write(chunk)

    # 生成唯一的文件名并保存到缓存文件夹
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    cache_filename = f"generate_voice_{timestamp}.wav"
    cache_path = os.path.join(CACHE_FOLDER, cache_filename)
    shutil.copy(output_filename, cache_path)

    return output_filename, f"Text-to-speech conversion completed successfully! Saved as {cache_filename}"

with gr.Blocks() as demo:
    gr.Markdown("# [Fish.audio](https://fish.audio) Text-to-Speech WebUI")
    
    cached_api_key, cached_api_url = load_cached_data()
    
    with gr.Row():
        api_key = gr.Textbox(
            label="API Key", 
            placeholder="Enter your Fish.audio API key here", 
            value=cached_api_key
        )
        api_url = gr.Textbox(
            label="API URL",
            placeholder="Enter the API URL here",
            value=cached_api_url
        )
    
    gr.Markdown("You can get the API Key from [here](https://fish.audio/go-api)")

    with gr.Row():
        text_input = gr.Textbox(label="Text to convert", placeholder="Enter the text you want to convert to speech")

    with gr.Row():
        reference_audio = gr.File(label="Reference Audio (optional)")
        reference_text = gr.Textbox(label="Reference Text", placeholder="Enter the text corresponding to the reference audio")

    with gr.Row():
        convert_button = gr.Button("Convert to Speech")

    with gr.Row():
        output_audio = gr.Audio(label="Generated Speech")
        output_message = gr.Textbox(label="Message")

    convert_button.click(
        text_to_speech,
        inputs=[api_key, api_url, text_input, reference_audio, reference_text],
        outputs=[output_audio, output_message]
    )

demo.launch()