ChatGPT-with-Voice-Conversion

Build error

App Files Files Community

Kevin676 commited on Apr 14, 2023

Commit

5ab9daa

1 Parent(s): 5ce216b

Update app.py

Browse files

Files changed (1) hide show

app.py +79 -24

app.py CHANGED Viewed

@@ -4,9 +4,9 @@ from TTS.utils.manage import ModelManager
 from TTS.utils.synthesizer import Synthesizer
 manager = ModelManager()
-model_path, config_path, model_item = manager.download_model("tts_models/zh-CN/baker/tacotron2-DDC-GST")
 synthesizer = Synthesizer(
-    model_path, config_path, None, None, None,
 )
 import os
@@ -142,15 +142,9 @@ SE_speaker_manager = SpeakerManager(encoder_model_path=CHECKPOINT_SE_PATH, encod
 # Define helper function
-def compute_spec(ref_file):
-  y, sr = librosa.load(ref_file, sr=ap.sample_rate)
-  spec = ap.spectrogram(y)
-  spec = torch.FloatTensor(spec).unsqueeze(0)
-  return spec
-def voice_conversion(apikey, upload, audio):
     openai.api_key = apikey
     # load audio and pad/trim it to fit 30 seconds
@@ -186,22 +180,26 @@ def voice_conversion(apikey, upload, audio):
     wavs = synthesizer.tts(chat_response + "。")
     synthesizer.save_wav(wavs, "output.wav")
-    #tts.tts_to_file(chat_response + "。", file_path="output.wav")
-    target_audio = 'target.wav'
-    reference_audio = 'reference.wav'
-    driving_audio = 'driving.wav'
-    rate1, data1 = wavfile.read("output.wav")
-    #data1 = (data1 * 32767).astype(np.int16)
-    #data1 = np.asarray(data1, dtype=np.int16)
-    write(target_audio, upload[0], upload[1])
-    write(reference_audio, rate1, data1)
-    write(driving_audio, rate1, data1)
   # !ffmpeg-normalize $target_audio -nt rms -t=-27 -o $target_audio -ar 16000 -f
   # !ffmpeg-normalize $reference_audio -nt rms -t=-27 -o $reference_audio -ar 16000 -f
   # !ffmpeg-normalize $driving_audio -nt rms -t=-27 -o $driving_audio -ar 16000 -f
@@ -245,7 +243,7 @@ def voice_conversion(apikey, upload, audio):
     enhanced = enhance_model.enhance_batch(noisy, lengths=torch.tensor([1.]))
     torchaudio.save("enhanced.wav", enhanced.cpu(), 16000)
-    return [result.text, chat_response, "enhanced.wav"]
 c1=gr.Interface(
     fn=voice_conversion,
@@ -278,4 +276,61 @@ c2=gr.Interface(
     )
 demo = gr.TabbedInterface([c1, c2], ["wav文件上传", "麦克风上传"], title = '🥳💬💕 - TalktoAI，随时随地，谈天说地！')
-demo.launch(show_error = True)

 from TTS.utils.synthesizer import Synthesizer
 manager = ModelManager()
+model_path1, config_path, model_item = manager.download_model("tts_models/zh-CN/baker/tacotron2-DDC-GST")
 synthesizer = Synthesizer(
+    model_path1, config_path, None, None, None,
 )
 import os
 # Define helper function
+def chatgpt(apikey, audio):
     openai.api_key = apikey
     # load audio and pad/trim it to fit 30 seconds
     wavs = synthesizer.tts(chat_response + "。")
     synthesizer.save_wav(wavs, "output.wav")
+    return [result.text, chat_response, "output.wav"]
+def compute_spec(ref_file):
+  y, sr = librosa.load(ref_file, sr=ap.sample_rate)
+  spec = ap.spectrogram(y)
+  spec = torch.FloatTensor(spec).unsqueeze(0)
+  return spec
+def voice_conversion(ta, ra, da):
+    target_audio = 'target.wav'
+    reference_audio = 'reference.wav'
+    driving_audio = 'driving.wav'
+    write(target_audio, ta[0], ta[1])
+    write(reference_audio, ra[0], ra[1])
+    write(driving_audio, da[0], da[1])
   # !ffmpeg-normalize $target_audio -nt rms -t=-27 -o $target_audio -ar 16000 -f
   # !ffmpeg-normalize $reference_audio -nt rms -t=-27 -o $reference_audio -ar 16000 -f
   # !ffmpeg-normalize $driving_audio -nt rms -t=-27 -o $driving_audio -ar 16000 -f
     enhanced = enhance_model.enhance_batch(noisy, lengths=torch.tensor([1.]))
     torchaudio.save("enhanced.wav", enhanced.cpu(), 16000)
+    return "enhanced.wav"
 c1=gr.Interface(
     fn=voice_conversion,
     )
 demo = gr.TabbedInterface([c1, c2], ["wav文件上传", "麦克风上传"], title = '🥳💬💕 - TalktoAI，随时随地，谈天说地！')
+demo.launch(show_error = True)
+block = gr.Blocks()
+with block:
+    with gr.Group():
+        gr.Markdown(
+            """ # <center>🥳💬💕 - TalktoAI，随时随地，谈天说地！</center>
+            ## <center>🤖 - 让有人文关怀的AI造福每一个人！AI向善，文明璀璨！TalktoAI - Enable the future！</center>
+      """
+        )
+        with gr.Box():
+            with gr.Row().style(mobile_collapse=False, equal_height=True):
+                inp1 = gr.components.Textbox(lines=2, label="请填写您的OpenAI-API-key")
+                inp2 = gr.Audio(source="microphone", type="filepath",label="说些什么吧")
+                btn = gr.Button("开始对话吧")
+        yousay = gr.Textbox(lines=3, label="您的提问")
+        texts = gr.Textbox(lines=5, label="ChatGPT的回答")
+        audio_tts = gr.Audio(label="自动合成的声音")
+        btn.click(chatgpt, [inp1, inp2], [yousay, texts, audio_tts])
+        with gr.Box():
+            with gr.Row().style(mobile_collapse=False, equal_height=True):
+                inp3 = gr.Audio(source="upload", label = "请上传您喜欢的声音(wav/mp3文件, max. 30mb)", type="filepath")
+                inp4 = audio_tts
+                inp5 = audio_tts
+                btn1 = gr.Button("用喜欢的声音听一听吧")
+        out1 = gr.Audio(label="声音拟合的专属声音")
+        btn1.click(voice_conversion, [inp3, inp4, inp5], [out1])
+        gr.Markdown(
+            """
+            ### <center>注意❗：请不要输入或生成会对个人以及组织造成侵害的内容，此程序仅供科研、学习及娱乐使用。用户输入或生成的内容与程序开发者无关，请自觉合法合规使用，违反者一切后果自负。</center>
+            ### <center>Model by [Raven](https://huggingface.co/spaces/BlinkDL/Raven-RWKV-7B). Thanks to [PENG Bo](https://github.com/BlinkDL). Please follow me on [Bilibili](https://space.bilibili.com/501495851?spm_id_from=333.1007.0.0).</center>
+      """
+        )
+        gr.HTML('''
+        <div class="footer">
+                    <p>🎶🖼️🎡 - It’s the intersection of technology and liberal arts that makes our hearts sing. - Steve Jobs
+                    </p>
+        </div>
+        ''')
+block.launch(show_error=True)