Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -4,9 +4,9 @@ from TTS.utils.manage import ModelManager
|
|
| 4 |
from TTS.utils.synthesizer import Synthesizer
|
| 5 |
|
| 6 |
manager = ModelManager()
|
| 7 |
-
|
| 8 |
synthesizer = Synthesizer(
|
| 9 |
-
|
| 10 |
)
|
| 11 |
|
| 12 |
import os
|
|
@@ -142,15 +142,9 @@ SE_speaker_manager = SpeakerManager(encoder_model_path=CHECKPOINT_SE_PATH, encod
|
|
| 142 |
|
| 143 |
# Define helper function
|
| 144 |
|
| 145 |
-
def compute_spec(ref_file):
|
| 146 |
-
y, sr = librosa.load(ref_file, sr=ap.sample_rate)
|
| 147 |
-
spec = ap.spectrogram(y)
|
| 148 |
-
spec = torch.FloatTensor(spec).unsqueeze(0)
|
| 149 |
-
return spec
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
def voice_conversion(apikey, upload, audio):
|
| 153 |
|
|
|
|
|
|
|
| 154 |
openai.api_key = apikey
|
| 155 |
|
| 156 |
# load audio and pad/trim it to fit 30 seconds
|
|
@@ -186,22 +180,26 @@ def voice_conversion(apikey, upload, audio):
|
|
| 186 |
wavs = synthesizer.tts(chat_response + "。")
|
| 187 |
|
| 188 |
synthesizer.save_wav(wavs, "output.wav")
|
| 189 |
-
#tts.tts_to_file(chat_response + "。", file_path="output.wav")
|
| 190 |
-
|
| 191 |
-
target_audio = 'target.wav'
|
| 192 |
-
reference_audio = 'reference.wav'
|
| 193 |
-
driving_audio = 'driving.wav'
|
| 194 |
|
| 195 |
-
|
| 196 |
|
| 197 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 198 |
|
| 199 |
-
#data1 = np.asarray(data1, dtype=np.int16)
|
| 200 |
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 205 |
# !ffmpeg-normalize $target_audio -nt rms -t=-27 -o $target_audio -ar 16000 -f
|
| 206 |
# !ffmpeg-normalize $reference_audio -nt rms -t=-27 -o $reference_audio -ar 16000 -f
|
| 207 |
# !ffmpeg-normalize $driving_audio -nt rms -t=-27 -o $driving_audio -ar 16000 -f
|
|
@@ -245,7 +243,7 @@ def voice_conversion(apikey, upload, audio):
|
|
| 245 |
enhanced = enhance_model.enhance_batch(noisy, lengths=torch.tensor([1.]))
|
| 246 |
torchaudio.save("enhanced.wav", enhanced.cpu(), 16000)
|
| 247 |
|
| 248 |
-
return
|
| 249 |
|
| 250 |
c1=gr.Interface(
|
| 251 |
fn=voice_conversion,
|
|
@@ -278,4 +276,61 @@ c2=gr.Interface(
|
|
| 278 |
)
|
| 279 |
|
| 280 |
demo = gr.TabbedInterface([c1, c2], ["wav文件上传", "麦克风上传"], title = '🥳💬💕 - TalktoAI,随时随地,谈天说地!')
|
| 281 |
-
demo.launch(show_error = True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
from TTS.utils.synthesizer import Synthesizer
|
| 5 |
|
| 6 |
manager = ModelManager()
|
| 7 |
+
model_path1, config_path, model_item = manager.download_model("tts_models/zh-CN/baker/tacotron2-DDC-GST")
|
| 8 |
synthesizer = Synthesizer(
|
| 9 |
+
model_path1, config_path, None, None, None,
|
| 10 |
)
|
| 11 |
|
| 12 |
import os
|
|
|
|
| 142 |
|
| 143 |
# Define helper function
|
| 144 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 145 |
|
| 146 |
+
def chatgpt(apikey, audio):
|
| 147 |
+
|
| 148 |
openai.api_key = apikey
|
| 149 |
|
| 150 |
# load audio and pad/trim it to fit 30 seconds
|
|
|
|
| 180 |
wavs = synthesizer.tts(chat_response + "。")
|
| 181 |
|
| 182 |
synthesizer.save_wav(wavs, "output.wav")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 183 |
|
| 184 |
+
return [result.text, chat_response, "output.wav"]
|
| 185 |
|
| 186 |
+
def compute_spec(ref_file):
|
| 187 |
+
y, sr = librosa.load(ref_file, sr=ap.sample_rate)
|
| 188 |
+
spec = ap.spectrogram(y)
|
| 189 |
+
spec = torch.FloatTensor(spec).unsqueeze(0)
|
| 190 |
+
return spec
|
| 191 |
|
|
|
|
| 192 |
|
| 193 |
+
def voice_conversion(ta, ra, da):
|
| 194 |
+
|
| 195 |
+
target_audio = 'target.wav'
|
| 196 |
+
reference_audio = 'reference.wav'
|
| 197 |
+
driving_audio = 'driving.wav'
|
| 198 |
+
|
| 199 |
+
write(target_audio, ta[0], ta[1])
|
| 200 |
+
write(reference_audio, ra[0], ra[1])
|
| 201 |
+
write(driving_audio, da[0], da[1])
|
| 202 |
+
|
| 203 |
# !ffmpeg-normalize $target_audio -nt rms -t=-27 -o $target_audio -ar 16000 -f
|
| 204 |
# !ffmpeg-normalize $reference_audio -nt rms -t=-27 -o $reference_audio -ar 16000 -f
|
| 205 |
# !ffmpeg-normalize $driving_audio -nt rms -t=-27 -o $driving_audio -ar 16000 -f
|
|
|
|
| 243 |
enhanced = enhance_model.enhance_batch(noisy, lengths=torch.tensor([1.]))
|
| 244 |
torchaudio.save("enhanced.wav", enhanced.cpu(), 16000)
|
| 245 |
|
| 246 |
+
return "enhanced.wav"
|
| 247 |
|
| 248 |
c1=gr.Interface(
|
| 249 |
fn=voice_conversion,
|
|
|
|
| 276 |
)
|
| 277 |
|
| 278 |
demo = gr.TabbedInterface([c1, c2], ["wav文件上传", "麦克风上传"], title = '🥳💬💕 - TalktoAI,随时随地,谈天说地!')
|
| 279 |
+
demo.launch(show_error = True)
|
| 280 |
+
block = gr.Blocks()
|
| 281 |
+
|
| 282 |
+
with block:
|
| 283 |
+
with gr.Group():
|
| 284 |
+
gr.Markdown(
|
| 285 |
+
""" # <center>🥳💬💕 - TalktoAI,随时随地,谈天说地!</center>
|
| 286 |
+
|
| 287 |
+
## <center>🤖 - 让有人文关怀的AI造福每一个人!AI向善,文明璀璨!TalktoAI - Enable the future!</center>
|
| 288 |
+
|
| 289 |
+
"""
|
| 290 |
+
)
|
| 291 |
+
|
| 292 |
+
with gr.Box():
|
| 293 |
+
with gr.Row().style(mobile_collapse=False, equal_height=True):
|
| 294 |
+
|
| 295 |
+
inp1 = gr.components.Textbox(lines=2, label="请填写您的OpenAI-API-key")
|
| 296 |
+
inp2 = gr.Audio(source="microphone", type="filepath",label="说些什么吧")
|
| 297 |
+
|
| 298 |
+
btn = gr.Button("开始对话吧")
|
| 299 |
+
|
| 300 |
+
yousay = gr.Textbox(lines=3, label="您的提问")
|
| 301 |
+
texts = gr.Textbox(lines=5, label="ChatGPT的回答")
|
| 302 |
+
audio_tts = gr.Audio(label="自动合成的声音")
|
| 303 |
+
|
| 304 |
+
btn.click(chatgpt, [inp1, inp2], [yousay, texts, audio_tts])
|
| 305 |
+
|
| 306 |
+
with gr.Box():
|
| 307 |
+
with gr.Row().style(mobile_collapse=False, equal_height=True):
|
| 308 |
+
inp3 = gr.Audio(source="upload", label = "请上传您喜欢的声音(wav/mp3文件, max. 30mb)", type="filepath")
|
| 309 |
+
inp4 = audio_tts
|
| 310 |
+
inp5 = audio_tts
|
| 311 |
+
|
| 312 |
+
btn1 = gr.Button("用喜欢的声音听一听吧")
|
| 313 |
+
|
| 314 |
+
out1 = gr.Audio(label="声音拟合的专属声音")
|
| 315 |
+
|
| 316 |
+
btn1.click(voice_conversion, [inp3, inp4, inp5], [out1])
|
| 317 |
+
|
| 318 |
+
gr.Markdown(
|
| 319 |
+
"""
|
| 320 |
+
|
| 321 |
+
### <center>注意❗:请不要输入或生成会对个人以及组织造成侵害的内容,此程序仅供科研、学习及娱乐使用。用户输入或生成的内容与程序开发者无关,请自觉合法合规使用,违反者一切后果自负。</center>
|
| 322 |
+
|
| 323 |
+
### <center>Model by [Raven](https://huggingface.co/spaces/BlinkDL/Raven-RWKV-7B). Thanks to [PENG Bo](https://github.com/BlinkDL). Please follow me on [Bilibili](https://space.bilibili.com/501495851?spm_id_from=333.1007.0.0).</center>
|
| 324 |
+
|
| 325 |
+
"""
|
| 326 |
+
)
|
| 327 |
+
|
| 328 |
+
gr.HTML('''
|
| 329 |
+
<div class="footer">
|
| 330 |
+
<p>🎶🖼️🎡 - It’s the intersection of technology and liberal arts that makes our hearts sing. - Steve Jobs
|
| 331 |
+
</p>
|
| 332 |
+
</div>
|
| 333 |
+
''')
|
| 334 |
+
|
| 335 |
+
|
| 336 |
+
block.launch(show_error=True)
|