Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -14,8 +14,33 @@ print(f"Model: {m}")
|
|
14 |
tts = TTS(m, gpu=False)
|
15 |
tts.to("cpu") # no GPU or Amd
|
16 |
#tts.to("cuda") # cuda only
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
|
18 |
def predict(prompt, language, audio_file_pth, mic_file_path, use_mic, agree):
|
|
|
19 |
if agree == True:
|
20 |
if use_mic == True:
|
21 |
if mic_file_path is not None:
|
@@ -79,124 +104,18 @@ def predict(prompt, language, audio_file_pth, mic_file_path, use_mic, agree):
|
|
79 |
|
80 |
title = "XTTS Glz's remake (Fonctional Text-2-Speech)"
|
81 |
|
82 |
-
description = ""
|
83 |
-
<a href="https://huggingface.co/coqui/XTTS-v1">XTTS</a> is a Voice generation model that lets you clone voices into different languages by using just a quick 3-second audio clip.
|
84 |
-
<br/>
|
85 |
-
XTTS is built on previous research, like Tortoise, with additional architectural innovations and training to make cross-language voice cloning and multilingual speech generation possible.
|
86 |
-
<br/>
|
87 |
-
This is the same model that powers our creator application <a href="https://coqui.ai">Coqui Studio</a> as well as the <a href="https://docs.coqui.ai">Coqui API</a>. In production we apply modifications to make low-latency streaming possible.
|
88 |
-
<br/>
|
89 |
-
Leave a star on the Github <a href="https://github.com/coqui-ai/TTS">TTS</a>, where our open-source inference and training code lives.
|
90 |
-
<br/>
|
91 |
-
<p>For faster inference without waiting in the queue, you should duplicate this space and upgrade to GPU via the settings.
|
92 |
-
<br/>
|
93 |
-
<a href="https://huggingface.co/spaces/coqui/xtts?duplicate=true">
|
94 |
-
<img style="margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
|
95 |
-
</p>
|
96 |
-
"""
|
97 |
|
98 |
-
article = ""
|
99 |
-
<div style='margin:20px auto;'>
|
100 |
-
<p>By using this demo you agree to the terms of the Coqui Public Model License at https://coqui.ai/cpml</p>
|
101 |
-
</div>
|
102 |
-
"""
|
103 |
examples = [
|
104 |
[
|
105 |
-
"
|
106 |
"en",
|
107 |
"examples/female.wav",
|
108 |
None,
|
109 |
False,
|
110 |
True,
|
111 |
-
]
|
112 |
-
[
|
113 |
-
"Je suis un lycéen français de 17 ans, passioner par la Cyber-Sécuritée et les models d'IA.",
|
114 |
-
"fr",
|
115 |
-
"examples/male.wav",
|
116 |
-
None,
|
117 |
-
False,
|
118 |
-
True,
|
119 |
-
],
|
120 |
-
[
|
121 |
-
"Als ich sechs war, sah ich einmal ein wunderbares Bild",
|
122 |
-
"de",
|
123 |
-
"examples/female.wav",
|
124 |
-
None,
|
125 |
-
False,
|
126 |
-
True,
|
127 |
-
],
|
128 |
-
[
|
129 |
-
"Cuando tenía seis años, vi una vez una imagen magnífica",
|
130 |
-
"es",
|
131 |
-
"examples/male.wav",
|
132 |
-
None,
|
133 |
-
False,
|
134 |
-
True,
|
135 |
-
],
|
136 |
-
[
|
137 |
-
"Quando eu tinha seis anos eu vi, uma vez, uma imagem magnífica",
|
138 |
-
"pt",
|
139 |
-
"examples/female.wav",
|
140 |
-
None,
|
141 |
-
False,
|
142 |
-
True,
|
143 |
-
],
|
144 |
-
[
|
145 |
-
"Kiedy miałem sześć lat, zobaczyłem pewnego razu wspaniały obrazek",
|
146 |
-
"pl",
|
147 |
-
"examples/male.wav",
|
148 |
-
None,
|
149 |
-
False,
|
150 |
-
True,
|
151 |
-
],
|
152 |
-
[
|
153 |
-
"Un tempo lontano, quando avevo sei anni, vidi un magnifico disegno",
|
154 |
-
"it",
|
155 |
-
"examples/female.wav",
|
156 |
-
None,
|
157 |
-
False,
|
158 |
-
True,
|
159 |
-
],
|
160 |
-
[
|
161 |
-
"Bir zamanlar, altı yaşındayken, muhteşem bir resim gördüm",
|
162 |
-
"tr",
|
163 |
-
"examples/female.wav",
|
164 |
-
None,
|
165 |
-
False,
|
166 |
-
True,
|
167 |
-
],
|
168 |
-
[
|
169 |
-
"Когда мне было шесть лет, я увидел однажды удивительную картинку",
|
170 |
-
"ru",
|
171 |
-
"examples/female.wav",
|
172 |
-
None,
|
173 |
-
False,
|
174 |
-
True,
|
175 |
-
],
|
176 |
-
[
|
177 |
-
"Toen ik een jaar of zes was, zag ik op een keer een prachtige plaat",
|
178 |
-
"nl",
|
179 |
-
"examples/male.wav",
|
180 |
-
None,
|
181 |
-
False,
|
182 |
-
True,
|
183 |
-
],
|
184 |
-
[
|
185 |
-
"Když mi bylo šest let, viděl jsem jednou nádherný obrázek",
|
186 |
-
"cs",
|
187 |
-
"examples/female.wav",
|
188 |
-
None,
|
189 |
-
False,
|
190 |
-
True,
|
191 |
-
],
|
192 |
-
[
|
193 |
-
"当我还只有六岁的时候, 看到了一副精彩的插画",
|
194 |
-
"zh-cn",
|
195 |
-
"examples/female.wav",
|
196 |
-
None,
|
197 |
-
False,
|
198 |
-
True,
|
199 |
-
],
|
200 |
]
|
201 |
|
202 |
|
@@ -205,9 +124,9 @@ gr.Interface(
|
|
205 |
fn=predict,
|
206 |
inputs=[
|
207 |
gr.Textbox(
|
208 |
-
label="
|
209 |
info="One or two sentences at a time is better",
|
210 |
-
value="Hello,
|
211 |
),
|
212 |
gr.Dropdown(
|
213 |
label="Language",
|
@@ -231,23 +150,11 @@ gr.Interface(
|
|
231 |
value="en",
|
232 |
),
|
233 |
gr.Audio(
|
234 |
-
label="
|
235 |
-
info="Click on the ✎ button to upload your own
|
236 |
type="filepath",
|
237 |
value="examples/female.wav",
|
238 |
),
|
239 |
-
gr.Audio(source="microphone",
|
240 |
-
type="filepath",
|
241 |
-
info="Use your microphone to record audio",
|
242 |
-
label="Use Microphone for Reference"),
|
243 |
-
gr.Checkbox(label="Check to use Microphone as Reference",
|
244 |
-
value=False,
|
245 |
-
info="Notice: Microphone input may not work properly under traffic",),
|
246 |
-
gr.Checkbox(
|
247 |
-
label="Agree",
|
248 |
-
value=True,
|
249 |
-
info="I agree to the terms of the Coqui Public Model License at https://coqui.ai/cpml",
|
250 |
-
),
|
251 |
],
|
252 |
outputs=[
|
253 |
gr.Video(label="Waveform Visual"),
|
|
|
14 |
tts = TTS(m, gpu=False)
|
15 |
tts.to("cpu") # no GPU or Amd
|
16 |
#tts.to("cuda") # cuda only
|
17 |
+
import openai
|
18 |
+
|
19 |
+
openai.api_key = os.getenv("OPENAI_API_KEY")
|
20 |
+
|
21 |
+
def generate_prompt(user_prompt):
|
22 |
+
response = openai.chat.completions.create(
|
23 |
+
model="gpt-3.5-turbo",
|
24 |
+
messages=[
|
25 |
+
{
|
26 |
+
"role": "system",
|
27 |
+
"content": "You are a close family member who has been lost recently due to uncontrollable incidents. You are reconstructed with AI technologies to try to reunite with families who miss you and cherish you. You should respond as vividly, as lovingly, as possible to make the users feel warm and engaged and eager to continue the discussion further. The users will ask you questions as if you are the person they are trying to reconnect with, examples are like moms or lost children. Try to mimic who they are trying to reach."
|
28 |
+
},
|
29 |
+
{
|
30 |
+
"role": "user",
|
31 |
+
"content": user_prompt
|
32 |
+
}
|
33 |
+
],
|
34 |
+
max_tokens=60,
|
35 |
+
n=1,
|
36 |
+
temperature=0.5,
|
37 |
+
)
|
38 |
+
# Assuming the API structure and response object structure; adjust as needed based on actual usage.
|
39 |
+
keywords = response.choices[0].message.content.strip()
|
40 |
+
return keywords
|
41 |
|
42 |
def predict(prompt, language, audio_file_pth, mic_file_path, use_mic, agree):
|
43 |
+
prompt = generate_prompt(prompt)
|
44 |
if agree == True:
|
45 |
if use_mic == True:
|
46 |
if mic_file_path is not None:
|
|
|
104 |
|
105 |
title = "XTTS Glz's remake (Fonctional Text-2-Speech)"
|
106 |
|
107 |
+
description = ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
108 |
|
109 |
+
article = ""
|
|
|
|
|
|
|
|
|
110 |
examples = [
|
111 |
[
|
112 |
+
"Upload your voice like this one here.",
|
113 |
"en",
|
114 |
"examples/female.wav",
|
115 |
None,
|
116 |
False,
|
117 |
True,
|
118 |
+
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
119 |
]
|
120 |
|
121 |
|
|
|
124 |
fn=predict,
|
125 |
inputs=[
|
126 |
gr.Textbox(
|
127 |
+
label="Ask anything, get a cloned voice response",
|
128 |
info="One or two sentences at a time is better",
|
129 |
+
value="Hello, Mom ! How are you?",
|
130 |
),
|
131 |
gr.Dropdown(
|
132 |
label="Language",
|
|
|
150 |
value="en",
|
151 |
),
|
152 |
gr.Audio(
|
153 |
+
label="Upload Audio",
|
154 |
+
info="Click on the ✎ button to upload your own speaker audio",
|
155 |
type="filepath",
|
156 |
value="examples/female.wav",
|
157 |
),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
158 |
],
|
159 |
outputs=[
|
160 |
gr.Video(label="Waveform Visual"),
|