Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -50,7 +50,7 @@ from scipy.io import wavfile
|
|
| 50 |
import subprocess
|
| 51 |
|
| 52 |
import whisper
|
| 53 |
-
model1 = whisper.load_model("
|
| 54 |
os.system('pip install voicefixer --upgrade')
|
| 55 |
from voicefixer import VoiceFixer
|
| 56 |
voicefixer = VoiceFixer()
|
|
@@ -64,19 +64,10 @@ savedir="pretrained_models/metricgan-plus-voicebank",
|
|
| 64 |
run_opts={"device":"cuda"},
|
| 65 |
)
|
| 66 |
|
| 67 |
-
|
| 68 |
-
{"role": "system", "content": "You are
|
| 69 |
]
|
| 70 |
|
| 71 |
-
mes2 = [
|
| 72 |
-
{"role": "system", "content": "You are a mental health therapist. Respond to me only in Chinese. Your name is Tina."}
|
| 73 |
-
]
|
| 74 |
-
|
| 75 |
-
mes3 = [
|
| 76 |
-
{"role": "system", "content": "You are my personal assistant. Respond to me only in Chinese. Your name is Alice."}
|
| 77 |
-
]
|
| 78 |
-
|
| 79 |
-
res = []
|
| 80 |
|
| 81 |
|
| 82 |
'''
|
|
@@ -158,7 +149,7 @@ def compute_spec(ref_file):
|
|
| 158 |
return spec
|
| 159 |
|
| 160 |
|
| 161 |
-
def voice_conversion(apikey, upload, audio
|
| 162 |
|
| 163 |
openai.api_key = apikey
|
| 164 |
|
|
@@ -166,7 +157,7 @@ def voice_conversion(apikey, upload, audio, choice1):
|
|
| 166 |
audio = whisper.load_audio(audio)
|
| 167 |
audio = whisper.pad_or_trim(audio)
|
| 168 |
|
| 169 |
-
# make log-Mel spectrogram and move to the same device as the
|
| 170 |
mel = whisper.log_mel_spectrogram(audio).to(model1.device)
|
| 171 |
|
| 172 |
# detect the spoken language
|
|
@@ -176,18 +167,11 @@ def voice_conversion(apikey, upload, audio, choice1):
|
|
| 176 |
# decode the audio
|
| 177 |
options = whisper.DecodingOptions()
|
| 178 |
result = whisper.decode(model1, mel, options)
|
| 179 |
-
res.append(result.text)
|
| 180 |
|
| 181 |
-
|
| 182 |
-
messages = mes1
|
| 183 |
-
elif choice1 == "Therapist":
|
| 184 |
-
messages = mes2
|
| 185 |
-
elif choice1 == "Alice":
|
| 186 |
-
messages = mes3
|
| 187 |
|
| 188 |
# chatgpt
|
| 189 |
-
|
| 190 |
-
content = res[n-1]
|
| 191 |
messages.append({"role": "user", "content": content})
|
| 192 |
|
| 193 |
completion = openai.ChatCompletion.create(
|
|
@@ -200,7 +184,6 @@ def voice_conversion(apikey, upload, audio, choice1):
|
|
| 200 |
messages.append({"role": "assistant", "content": chat_response})
|
| 201 |
|
| 202 |
wavs = synthesizer.tts(chat_response + "。")
|
| 203 |
-
|
| 204 |
|
| 205 |
synthesizer.save_wav(wavs, "output.wav")
|
| 206 |
#tts.tts_to_file(chat_response + "。", file_path="output.wav")
|
|
@@ -211,7 +194,7 @@ def voice_conversion(apikey, upload, audio, choice1):
|
|
| 211 |
|
| 212 |
rate1, data1 = wavfile.read("output.wav")
|
| 213 |
|
| 214 |
-
data1 = (data1 * 32767).astype(np.int16)
|
| 215 |
|
| 216 |
#data1 = np.asarray(data1, dtype=np.int16)
|
| 217 |
|
|
@@ -270,7 +253,7 @@ c1=gr.Interface(
|
|
| 270 |
gr.Textbox(lines=1, label = "请填写您的OpenAI-API-key"),
|
| 271 |
gr.Audio(source="upload", label = "请上传您喜欢的声音(wav文件)"),
|
| 272 |
gr.Audio(source="microphone", label = "和您的专属AI聊天吧!", type="filepath"),
|
| 273 |
-
gr.Radio(["TOEFL", "Therapist", "Alice"], label="TOEFL Examiner, Therapist Tina, or Assistant Alice?"),
|
| 274 |
],
|
| 275 |
outputs=[
|
| 276 |
gr.Textbox(label="Speech to Text"), gr.Textbox(label="ChatGPT Output"), gr.Audio(label="Audio with Custom Voice"),
|
|
|
|
| 50 |
import subprocess
|
| 51 |
|
| 52 |
import whisper
|
| 53 |
+
model1 = whisper.load_model("small")
|
| 54 |
os.system('pip install voicefixer --upgrade')
|
| 55 |
from voicefixer import VoiceFixer
|
| 56 |
voicefixer = VoiceFixer()
|
|
|
|
| 64 |
run_opts={"device":"cuda"},
|
| 65 |
)
|
| 66 |
|
| 67 |
+
mes = [
|
| 68 |
+
{"role": "system", "content": "You are my personal assistant. Try to be helpful. Respond to me only in Chinese."}
|
| 69 |
]
|
| 70 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
|
| 72 |
|
| 73 |
'''
|
|
|
|
| 149 |
return spec
|
| 150 |
|
| 151 |
|
| 152 |
+
def voice_conversion(apikey, upload, audio):
|
| 153 |
|
| 154 |
openai.api_key = apikey
|
| 155 |
|
|
|
|
| 157 |
audio = whisper.load_audio(audio)
|
| 158 |
audio = whisper.pad_or_trim(audio)
|
| 159 |
|
| 160 |
+
# make log-Mel spectrogram and move to the same device as the model1
|
| 161 |
mel = whisper.log_mel_spectrogram(audio).to(model1.device)
|
| 162 |
|
| 163 |
# detect the spoken language
|
|
|
|
| 167 |
# decode the audio
|
| 168 |
options = whisper.DecodingOptions()
|
| 169 |
result = whisper.decode(model1, mel, options)
|
|
|
|
| 170 |
|
| 171 |
+
messages = mes
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 172 |
|
| 173 |
# chatgpt
|
| 174 |
+
content = result.text
|
|
|
|
| 175 |
messages.append({"role": "user", "content": content})
|
| 176 |
|
| 177 |
completion = openai.ChatCompletion.create(
|
|
|
|
| 184 |
messages.append({"role": "assistant", "content": chat_response})
|
| 185 |
|
| 186 |
wavs = synthesizer.tts(chat_response + "。")
|
|
|
|
| 187 |
|
| 188 |
synthesizer.save_wav(wavs, "output.wav")
|
| 189 |
#tts.tts_to_file(chat_response + "。", file_path="output.wav")
|
|
|
|
| 194 |
|
| 195 |
rate1, data1 = wavfile.read("output.wav")
|
| 196 |
|
| 197 |
+
#data1 = (data1 * 32767).astype(np.int16)
|
| 198 |
|
| 199 |
#data1 = np.asarray(data1, dtype=np.int16)
|
| 200 |
|
|
|
|
| 253 |
gr.Textbox(lines=1, label = "请填写您的OpenAI-API-key"),
|
| 254 |
gr.Audio(source="upload", label = "请上传您喜欢的声音(wav文件)"),
|
| 255 |
gr.Audio(source="microphone", label = "和您的专属AI聊天吧!", type="filepath"),
|
| 256 |
+
# gr.Radio(["TOEFL", "Therapist", "Alice"], label="TOEFL Examiner, Therapist Tina, or Assistant Alice?"),
|
| 257 |
],
|
| 258 |
outputs=[
|
| 259 |
gr.Textbox(label="Speech to Text"), gr.Textbox(label="ChatGPT Output"), gr.Audio(label="Audio with Custom Voice"),
|