ChatGPT-with-Voice-Conversion

Build error

App Files Files Community

Kevin676 commited on Apr 10, 2023

Commit

5ce216b

1 Parent(s): b5af9c5

Update app.py

Browse files

Files changed (1) hide show

app.py +9 -26

app.py CHANGED Viewed

@@ -50,7 +50,7 @@ from scipy.io import wavfile
 import subprocess
 import whisper
-model1 = whisper.load_model("base")
 os.system('pip install voicefixer --upgrade')
 from voicefixer import VoiceFixer
 voicefixer = VoiceFixer()
@@ -64,19 +64,10 @@ savedir="pretrained_models/metricgan-plus-voicebank",
 run_opts={"device":"cuda"},
 )
-mes1 = [
-    {"role": "system", "content": "You are a TOEFL examiner. Help me improve my oral Englsih and give me feedback."}
 ]
-mes2 = [
-    {"role": "system", "content": "You are a mental health therapist. Respond to me only in Chinese. Your name is Tina."}
-]
-mes3 = [
-    {"role": "system", "content": "You are my personal assistant. Respond to me only in Chinese. Your name is Alice."}
-]
-res = []
 '''
@@ -158,7 +149,7 @@ def compute_spec(ref_file):
   return spec
-def voice_conversion(apikey, upload, audio, choice1):
     openai.api_key = apikey
@@ -166,7 +157,7 @@ def voice_conversion(apikey, upload, audio, choice1):
     audio = whisper.load_audio(audio)
     audio = whisper.pad_or_trim(audio)
-    # make log-Mel spectrogram and move to the same device as the model
     mel = whisper.log_mel_spectrogram(audio).to(model1.device)
     # detect the spoken language
@@ -176,18 +167,11 @@ def voice_conversion(apikey, upload, audio, choice1):
     # decode the audio
     options = whisper.DecodingOptions()
     result = whisper.decode(model1, mel, options)
-    res.append(result.text)
-    if choice1 == "TOEFL":
-      messages = mes1
-    elif choice1 == "Therapist":
-      messages = mes2
-    elif choice1 == "Alice":
-      messages = mes3
     # chatgpt
-    n = len(res)
-    content = res[n-1]
     messages.append({"role": "user", "content": content})
     completion = openai.ChatCompletion.create(
@@ -200,7 +184,6 @@ def voice_conversion(apikey, upload, audio, choice1):
     messages.append({"role": "assistant", "content": chat_response})
     wavs = synthesizer.tts(chat_response + "。")
     synthesizer.save_wav(wavs, "output.wav")
     #tts.tts_to_file(chat_response + "。", file_path="output.wav")
@@ -211,7 +194,7 @@ def voice_conversion(apikey, upload, audio, choice1):
     rate1, data1 = wavfile.read("output.wav")
-    data1 = (data1 * 32767).astype(np.int16)
     #data1 = np.asarray(data1, dtype=np.int16)
@@ -270,7 +253,7 @@ c1=gr.Interface(
         gr.Textbox(lines=1, label = "请填写您的OpenAI-API-key"),
         gr.Audio(source="upload", label = "请上传您喜欢的声音(wav文件)"),
         gr.Audio(source="microphone", label = "和您的专属AI聊天吧！", type="filepath"),
-        gr.Radio(["TOEFL", "Therapist", "Alice"], label="TOEFL Examiner, Therapist Tina, or Assistant Alice?"),
     ],
     outputs=[
         gr.Textbox(label="Speech to Text"), gr.Textbox(label="ChatGPT Output"), gr.Audio(label="Audio with Custom Voice"),

 import subprocess
 import whisper
+model1 = whisper.load_model("small")
 os.system('pip install voicefixer --upgrade')
 from voicefixer import VoiceFixer
 voicefixer = VoiceFixer()
 run_opts={"device":"cuda"},
 )
+mes = [
+    {"role": "system", "content": "You are my personal assistant. Try to be helpful. Respond to me only in Chinese."}
 ]
 '''
   return spec
+def voice_conversion(apikey, upload, audio):
     openai.api_key = apikey
     audio = whisper.load_audio(audio)
     audio = whisper.pad_or_trim(audio)
+    # make log-Mel spectrogram and move to the same device as the model1
     mel = whisper.log_mel_spectrogram(audio).to(model1.device)
     # detect the spoken language
     # decode the audio
     options = whisper.DecodingOptions()
     result = whisper.decode(model1, mel, options)
+    messages = mes
     # chatgpt
+    content = result.text
     messages.append({"role": "user", "content": content})
     completion = openai.ChatCompletion.create(
     messages.append({"role": "assistant", "content": chat_response})
     wavs = synthesizer.tts(chat_response + "。")
     synthesizer.save_wav(wavs, "output.wav")
     #tts.tts_to_file(chat_response + "。", file_path="output.wav")
     rate1, data1 = wavfile.read("output.wav")
+    #data1 = (data1 * 32767).astype(np.int16)
     #data1 = np.asarray(data1, dtype=np.int16)
         gr.Textbox(lines=1, label = "请填写您的OpenAI-API-key"),
         gr.Audio(source="upload", label = "请上传您喜欢的声音(wav文件)"),
         gr.Audio(source="microphone", label = "和您的专属AI聊天吧！", type="filepath"),
+#        gr.Radio(["TOEFL", "Therapist", "Alice"], label="TOEFL Examiner, Therapist Tina, or Assistant Alice?"),
     ],
     outputs=[
         gr.Textbox(label="Speech to Text"), gr.Textbox(label="ChatGPT Output"), gr.Audio(label="Audio with Custom Voice"),