lihongjie
commited on
Commit
·
4ae14db
1
Parent(s):
677b20d
update
Browse files- .gitattributes +10 -0
- .gitignore +0 -0
- README.md +58 -49
- asset/{dingding.png → en_man1.mp3} +2 -2
- asset/{cross_lingual_prompt.wav → en_man1.txt} +2 -2
- asset/en_woman1.mp3 +3 -0
- asset/en_woman1.txt +3 -0
- asset/zh_man1.txt +3 -0
- asset/zh_man1.wav +3 -0
- asset/zh_man2.mp3 +3 -0
- asset/zh_man2.txt +3 -0
- asset/zh_woman1.txt +3 -0
- asset/{zero_shot_prompt.wav → zh_woman1.wav} +0 -0
- config.json +0 -0
- main_ax650 +2 -2
- prompt_files/flow_embedding_1_192.txt +0 -192
- prompt_files/flow_prompt_speech_token_1_87.txt +0 -87
- prompt_files/llm_embedding_1_192.txt +0 -192
- prompt_files/llm_prompt_speech_token_1_87.txt +0 -87
- prompt_files/prompt_speech_feat_1_174_80.txt +0 -0
- prompt_files/prompt_text_1_15.txt +0 -15
- prompt_files/text_1_38.txt +0 -38
- run.sh +3 -2
- scripts/frontend.py +22 -1
- scripts/process_prompt.py +21 -18
- {prompt_files → token2wav-axmodels}/rand_noise_1_80_300.txt +0 -0
- {prompt_files → token2wav-axmodels}/speech_window_2x8x480.txt +0 -0
.gitattributes
CHANGED
|
@@ -88,3 +88,13 @@ main_ax650 filter=lfs diff=lfs merge=lfs -text
|
|
| 88 |
token2wav-axmodels/flow_estimator_200.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 89 |
token2wav-axmodels/flow.input_embedding.float16.bin filter=lfs diff=lfs merge=lfs -text
|
| 90 |
scripts/tokenizer/assets/multilingual_zh_ja_yue_char_del.tiktoken filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
token2wav-axmodels/flow_estimator_200.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 89 |
token2wav-axmodels/flow.input_embedding.float16.bin filter=lfs diff=lfs merge=lfs -text
|
| 90 |
scripts/tokenizer/assets/multilingual_zh_ja_yue_char_del.tiktoken filter=lfs diff=lfs merge=lfs -text
|
| 91 |
+
asset/zh_man1.wav filter=lfs diff=lfs merge=lfs -text
|
| 92 |
+
asset/zh_man2.mp3 filter=lfs diff=lfs merge=lfs -text
|
| 93 |
+
asset/zh_woman1.txt filter=lfs diff=lfs merge=lfs -text
|
| 94 |
+
asset/en_man1.mp3 filter=lfs diff=lfs merge=lfs -text
|
| 95 |
+
asset/en_man1.txt filter=lfs diff=lfs merge=lfs -text
|
| 96 |
+
asset/en_woman1.mp3 filter=lfs diff=lfs merge=lfs -text
|
| 97 |
+
asset/en_woman1.txt filter=lfs diff=lfs merge=lfs -text
|
| 98 |
+
asset/zh_man1.txt filter=lfs diff=lfs merge=lfs -text
|
| 99 |
+
asset/zh_woman1.wav filter=lfs diff=lfs merge=lfs -text
|
| 100 |
+
asset/zh_man2.txt filter=lfs diff=lfs merge=lfs -text
|
.gitignore
CHANGED
|
File without changes
|
README.md
CHANGED
|
@@ -43,65 +43,25 @@ Download all files from this repository to the device
|
|
| 43 |
|
| 44 |
### 1. Text to Speech (Voice Cloning)
|
| 45 |
|
| 46 |
-
#### 1
|
| 47 |
|
| 48 |
-
|
| 49 |
-
|
|
|
|
| 50 |
```
|
| 51 |
pip3 install -r scripts/requirements.txt
|
| 52 |
```
|
| 53 |
|
| 54 |
-
|
| 55 |
-
```
|
| 56 |
-
pip3 install modelscope
|
| 57 |
-
modelscope download --model pengzhendong/wetext --local_dir pengzhendong/wetext
|
| 58 |
-
```
|
| 59 |
-
|
| 60 |
-
#### 2. Process Prompt Speech
|
| 61 |
-
```
|
| 62 |
-
python scripts/process_prompt.py
|
| 63 |
-
```
|
| 64 |
-
|
| 65 |
-
Pass parameters according to the actual situation.
|
| 66 |
-
```
|
| 67 |
-
args.add_argument('--model_dir', type=str, default="../../model_convert/pretrained_models/CosyVoice2-0.5B/")
|
| 68 |
-
args.add_argument('--wetext_dir', type=str, default="../../model_convert/pengzhendong/wetext/")
|
| 69 |
-
args.add_argument('--sample_rate', type=int, default=24000)
|
| 70 |
-
args.add_argument('--zero_shot_spk_id', type=str, default="")
|
| 71 |
-
args.add_argument('--tts_text', type=str, default="君不见黄河之水天上来,奔流到海不复回。君不见高堂明镜悲白发,朝如青丝暮成雪。")
|
| 72 |
-
args.add_argument('--prompt_text', type=str, default="希望你以后能够做的比我还好呦。")
|
| 73 |
-
args.add_argument('--prompt_speech', type=str, default="../../model_convert/asset/zero_shot_prompt.wav")
|
| 74 |
-
```
|
| 75 |
-
执行完上述命令,会生成类似以下的文件:
|
| 76 |
-
```
|
| 77 |
-
prompt_text_1_15.txt
|
| 78 |
-
llm_prompt_speech_token_1_87.txt
|
| 79 |
-
flow_prompt_speech_token_1_87.txt
|
| 80 |
-
prompt_speech_feat_1_174_80.txt
|
| 81 |
-
llm_embedding_1_192.txt
|
| 82 |
-
flow_embedding_1_192.txt
|
| 83 |
-
text_1_38.txt
|
| 84 |
-
rand_noise_1_80_300.txt
|
| 85 |
-
speech_window_2x8x480.txt
|
| 86 |
-
```
|
| 87 |
-
The prompt_files directory contains files generated based on the default prompt speech. You can use them directly without running this command.
|
| 88 |
-
|
| 89 |
-
#### 3. Start HTTP Tokenizer Server
|
| 90 |
```
|
| 91 |
cd scripts
|
| 92 |
python cosyvoice2_tokenizer.py --host {your host} --port {your port}
|
| 93 |
```
|
| 94 |
|
| 95 |
-
####
|
| 96 |
1) Moidfy the HTTP host in `run.sh`.
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
run.sh main_ax650 prompt_text_1_15.txt llm_prompt_speech_token_1_87.txt flow_prompt_speech_token_1_87.txt \
|
| 100 |
-
prompt_speech_feat_1_174_80.txt llm_embedding_1_192.txt flow_embedding_1_192.txt text_1_38.txt \
|
| 101 |
-
rand_noise_1_80_300.txt speech_window_2x8x480.txt \
|
| 102 |
-
CosyVoice-BlankEN-Ax650-prefill_512/ token2wav-axmodels/
|
| 103 |
-
```
|
| 104 |
-
3) Run `run.sh`
|
| 105 |
```shell
|
| 106 |
root@ax650 ~/Cosyvoice2 # bash run.sh
|
| 107 |
rm: cannot remove 'output*.wav': No such file or directory
|
|
@@ -163,4 +123,53 @@ text >>
|
|
| 163 |
```
|
| 164 |
|
| 165 |
Output Speech:
|
| 166 |
-
[output.wav](asset/output.wav)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
|
| 44 |
### 1. Text to Speech (Voice Cloning)
|
| 45 |
|
| 46 |
+
#### (1) Copy this project to AX650 Board
|
| 47 |
|
| 48 |
+
#### (2). Prepare Dependencies
|
| 49 |
+
|
| 50 |
+
**Running HTTP Tokenizer Server** and **Processing Prompt Speech** require these Python packages. If you run these two step on a PC, install them on the PC.
|
| 51 |
```
|
| 52 |
pip3 install -r scripts/requirements.txt
|
| 53 |
```
|
| 54 |
|
| 55 |
+
#### 2. Start HTTP Tokenizer Server
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
```
|
| 57 |
cd scripts
|
| 58 |
python cosyvoice2_tokenizer.py --host {your host} --port {your port}
|
| 59 |
```
|
| 60 |
|
| 61 |
+
#### 3. Run on AX650 Board
|
| 62 |
1) Moidfy the HTTP host in `run.sh`.
|
| 63 |
+
|
| 64 |
+
2) Run `run.sh`
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
```shell
|
| 66 |
root@ax650 ~/Cosyvoice2 # bash run.sh
|
| 67 |
rm: cannot remove 'output*.wav': No such file or directory
|
|
|
|
| 123 |
```
|
| 124 |
|
| 125 |
Output Speech:
|
| 126 |
+
[output.wav](asset/output.wav)
|
| 127 |
+
|
| 128 |
+
|
| 129 |
+
#### Optional. Process Prompt Speech
|
| 130 |
+
If you want to replicate a specific sound, do this step.
|
| 131 |
+
|
| 132 |
+
##### (1). Downlaod wetext
|
| 133 |
+
```
|
| 134 |
+
pip3 install modelscope
|
| 135 |
+
modelscope download --model pengzhendong/wetext --local_dir pengzhendong/wetext
|
| 136 |
+
```
|
| 137 |
+
|
| 138 |
+
##### (2). Process Prompt Speech
|
| 139 |
+
```
|
| 140 |
+
python scripts/process_prompt.py
|
| 141 |
+
```
|
| 142 |
+
|
| 143 |
+
Pass parameters according to the actual situation.
|
| 144 |
+
```
|
| 145 |
+
python scripts/process_prompt.py -h
|
| 146 |
+
|
| 147 |
+
usage: process_prompt.py [-h] [--model_dir MODEL_DIR] [--wetext_dir WETEXT_DIR] [--sample_rate SAMPLE_RATE] [--prompt_text PROMPT_TEXT] [--prompt_speech PROMPT_SPEECH]
|
| 148 |
+
[--output OUTPUT]
|
| 149 |
+
|
| 150 |
+
options:
|
| 151 |
+
-h, --help show this help message and exit
|
| 152 |
+
--model_dir MODEL_DIR
|
| 153 |
+
tokenizer configuration directionary
|
| 154 |
+
--wetext_dir WETEXT_DIR
|
| 155 |
+
path to wetext
|
| 156 |
+
--sample_rate SAMPLE_RATE
|
| 157 |
+
Sampling rate for prompt audio
|
| 158 |
+
--prompt_text PROMPT_TEXT
|
| 159 |
+
The text content of the prompt(reference) audio. Text or file path.
|
| 160 |
+
--prompt_speech PROMPT_SPEECH
|
| 161 |
+
The path to prompt(reference) audio.
|
| 162 |
+
--output OUTPUT Output data storage directory
|
| 163 |
+
```
|
| 164 |
+
|
| 165 |
+
After executing the above command, files like the following will be generated:
|
| 166 |
+
```
|
| 167 |
+
flow_embedding.txt
|
| 168 |
+
flow_prompt_speech_token.txt
|
| 169 |
+
llm_embedding.txt
|
| 170 |
+
llm_prompt_speech_token.txt
|
| 171 |
+
prompt_speech_feat.txt
|
| 172 |
+
prompt_text.txt
|
| 173 |
+
```
|
| 174 |
+
|
| 175 |
+
When you run run.sh, pass the output path here to the prompt_files parameter of the run.sh script.
|
asset/{dingding.png → en_man1.mp3}
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:461dd4cc9cf5bf6b774a9978cc9b7ca96033b214714b12413ecfe9eb1bf03ab9
|
| 3 |
+
size 15309
|
asset/{cross_lingual_prompt.wav → en_man1.txt}
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ce5d3c2b96bf649e61817fd44c913c9abfa2314b3265ad6f115fd5c2477cc017
|
| 3 |
+
size 66
|
asset/en_woman1.mp3
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:872ff69b74b37763cfc4a49bdd39d8a2acf51f428e42e1ab9fa3dfc0c4a2e3d4
|
| 3 |
+
size 16941
|
asset/en_woman1.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c14383963cba5217b00603065c4c1fc4167155d5c8ae8d6b5b6b92c81b8eef6b
|
| 3 |
+
size 67
|
asset/zh_man1.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ac9b54ab8e18581b2fce95bd8e4f8aa4e840beec28d56304b86359e095c57bce
|
| 3 |
+
size 57
|
asset/zh_man1.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:da1153fca1303cd20470317a4ba93027cc5e172214b777747215add36f41109e
|
| 3 |
+
size 1536044
|
asset/zh_man2.mp3
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cd012ac30fe1ffb5bc3e356a84f4f668a25a62c72f810ffae218f83cbcfdf53e
|
| 3 |
+
size 31761
|
asset/zh_man2.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0c68ca97e76f6a966fbee90d95ba2210dad6f1c07fcae0f445282b0035823472
|
| 3 |
+
size 69
|
asset/zh_woman1.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4ff1a7dd8cb643e4f769735733e7547ff66aa5b29d99f674131f3fb448446efa
|
| 3 |
+
size 45
|
asset/{zero_shot_prompt.wav → zh_woman1.wav}
RENAMED
|
File without changes
|
config.json
CHANGED
|
File without changes
|
main_ax650
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8d36bca7a681437db6ece77226fa6e00605d613b2ab7028de50cd525ec6575c6
|
| 3 |
+
size 6641632
|
prompt_files/flow_embedding_1_192.txt
DELETED
|
@@ -1,192 +0,0 @@
|
|
| 1 |
-
9.833880662918090820e-01
|
| 2 |
-
3.727950453758239746e-01
|
| 3 |
-
-6.032654047012329102e-01
|
| 4 |
-
-5.367950201034545898e-01
|
| 5 |
-
1.168318033218383789e+00
|
| 6 |
-
-4.463896751403808594e-01
|
| 7 |
-
-8.533740043640136719e-02
|
| 8 |
-
6.770751476287841797e-01
|
| 9 |
-
4.399921894073486328e-01
|
| 10 |
-
-8.947002887725830078e-02
|
| 11 |
-
-4.119307696819305420e-01
|
| 12 |
-
-1.400911569595336914e+00
|
| 13 |
-
1.469335317611694336e+00
|
| 14 |
-
7.800692915916442871e-01
|
| 15 |
-
6.252028942108154297e-01
|
| 16 |
-
-1.524239063262939453e+00
|
| 17 |
-
4.095870852470397949e-01
|
| 18 |
-
8.922567367553710938e-01
|
| 19 |
-
1.414063215255737305e+00
|
| 20 |
-
-3.570723533630371094e-01
|
| 21 |
-
3.816263973712921143e-01
|
| 22 |
-
-2.559853792190551758e-01
|
| 23 |
-
9.759899973869323730e-01
|
| 24 |
-
-2.347678393125534058e-01
|
| 25 |
-
-8.310836553573608398e-01
|
| 26 |
-
-1.119347572326660156e+00
|
| 27 |
-
6.822414696216583252e-02
|
| 28 |
-
1.058485746383666992e+00
|
| 29 |
-
2.381889820098876953e-01
|
| 30 |
-
-2.013707756996154785e-01
|
| 31 |
-
-4.302661716938018799e-01
|
| 32 |
-
-1.057960271835327148e+00
|
| 33 |
-
1.127839088439941406e+00
|
| 34 |
-
-1.518161177635192871e+00
|
| 35 |
-
-5.298921465873718262e-01
|
| 36 |
-
-1.788670778274536133e+00
|
| 37 |
-
-3.309334218502044678e-01
|
| 38 |
-
1.011094689369201660e+00
|
| 39 |
-
-3.399490118026733398e-01
|
| 40 |
-
-5.792245864868164062e-01
|
| 41 |
-
3.723595738410949707e-01
|
| 42 |
-
-3.795529901981353760e-02
|
| 43 |
-
-9.215813875198364258e-01
|
| 44 |
-
-2.451439201831817627e-01
|
| 45 |
-
-1.136183738708496094e+00
|
| 46 |
-
9.513977169990539551e-02
|
| 47 |
-
7.262014746665954590e-01
|
| 48 |
-
-9.598007798194885254e-01
|
| 49 |
-
-5.060364603996276855e-01
|
| 50 |
-
-2.999072074890136719e-01
|
| 51 |
-
-7.779634594917297363e-01
|
| 52 |
-
1.212495565414428711e+00
|
| 53 |
-
3.001802563667297363e-01
|
| 54 |
-
-2.383058547973632812e+00
|
| 55 |
-
1.490965783596038818e-01
|
| 56 |
-
5.186975002288818359e-02
|
| 57 |
-
1.555646419525146484e+00
|
| 58 |
-
-7.905082702636718750e-01
|
| 59 |
-
6.895875930786132812e-01
|
| 60 |
-
-7.865182161331176758e-01
|
| 61 |
-
-1.267613649368286133e+00
|
| 62 |
-
5.915310978889465332e-01
|
| 63 |
-
-3.206543624401092529e-01
|
| 64 |
-
3.275410532951354980e-01
|
| 65 |
-
-7.800404429435729980e-01
|
| 66 |
-
2.810131907463073730e-01
|
| 67 |
-
-5.581974983215332031e-02
|
| 68 |
-
-6.896089911460876465e-01
|
| 69 |
-
-1.699091911315917969e+00
|
| 70 |
-
8.533768653869628906e-01
|
| 71 |
-
-1.143321990966796875e+00
|
| 72 |
-
1.108269929885864258e+00
|
| 73 |
-
1.488067150115966797e+00
|
| 74 |
-
4.714697599411010742e-01
|
| 75 |
-
-2.468206435441970825e-01
|
| 76 |
-
-2.778674662113189697e-01
|
| 77 |
-
-5.726919770240783691e-01
|
| 78 |
-
7.966566681861877441e-01
|
| 79 |
-
3.259438872337341309e-01
|
| 80 |
-
7.238841056823730469e-01
|
| 81 |
-
1.317236185073852539e+00
|
| 82 |
-
-6.427643299102783203e-01
|
| 83 |
-
-6.616854071617126465e-01
|
| 84 |
-
3.449333608150482178e-01
|
| 85 |
-
1.523873805999755859e+00
|
| 86 |
-
-1.770880818367004395e+00
|
| 87 |
-
4.459496736526489258e-01
|
| 88 |
-
-1.308673977851867676e+00
|
| 89 |
-
-8.378249406814575195e-01
|
| 90 |
-
-7.776624560356140137e-01
|
| 91 |
-
-7.166379690170288086e-01
|
| 92 |
-
1.483591556549072266e+00
|
| 93 |
-
-1.046773791313171387e+00
|
| 94 |
-
-9.184205532073974609e-02
|
| 95 |
-
-5.694127678871154785e-01
|
| 96 |
-
-7.002854347229003906e-01
|
| 97 |
-
-5.811145305633544922e-01
|
| 98 |
-
-1.267730951309204102e+00
|
| 99 |
-
1.940409541130065918e+00
|
| 100 |
-
7.551879882812500000e-01
|
| 101 |
-
3.788790851831436157e-02
|
| 102 |
-
-1.767819404602050781e+00
|
| 103 |
-
1.966339051723480225e-01
|
| 104 |
-
2.125173091888427734e+00
|
| 105 |
-
4.033783376216888428e-01
|
| 106 |
-
-6.449738740921020508e-01
|
| 107 |
-
-7.214421778917312622e-02
|
| 108 |
-
1.038697957992553711e+00
|
| 109 |
-
-1.720039248466491699e+00
|
| 110 |
-
-5.593552589416503906e-01
|
| 111 |
-
6.905189156532287598e-01
|
| 112 |
-
1.693801283836364746e+00
|
| 113 |
-
1.025780200958251953e+00
|
| 114 |
-
1.601356863975524902e-01
|
| 115 |
-
1.841381192207336426e-03
|
| 116 |
-
-1.340688228607177734e+00
|
| 117 |
-
7.914224863052368164e-01
|
| 118 |
-
-4.111509919166564941e-01
|
| 119 |
-
-9.689708948135375977e-01
|
| 120 |
-
9.706826806068420410e-01
|
| 121 |
-
3.221712112426757812e-01
|
| 122 |
-
-1.017553806304931641e+00
|
| 123 |
-
6.374475359916687012e-01
|
| 124 |
-
-1.567446827888488770e+00
|
| 125 |
-
1.079622745513916016e+00
|
| 126 |
-
-6.838436126708984375e-01
|
| 127 |
-
-7.464203834533691406e-01
|
| 128 |
-
4.736322760581970215e-01
|
| 129 |
-
7.230627536773681641e-02
|
| 130 |
-
-1.091879606246948242e+00
|
| 131 |
-
-2.780759036540985107e-01
|
| 132 |
-
6.500254869461059570e-01
|
| 133 |
-
-1.413071602582931519e-01
|
| 134 |
-
-6.677935123443603516e-01
|
| 135 |
-
-5.637246370315551758e-01
|
| 136 |
-
1.807020783424377441e+00
|
| 137 |
-
2.142686128616333008e+00
|
| 138 |
-
4.661364853382110596e-01
|
| 139 |
-
-7.062357068061828613e-01
|
| 140 |
-
-7.115917205810546875e-01
|
| 141 |
-
1.251373767852783203e+00
|
| 142 |
-
-1.802901387214660645e+00
|
| 143 |
-
-1.352177619934082031e+00
|
| 144 |
-
-3.198754191398620605e-01
|
| 145 |
-
1.498459577560424805e-01
|
| 146 |
-
-4.831680059432983398e-01
|
| 147 |
-
7.488607764244079590e-01
|
| 148 |
-
8.024247884750366211e-01
|
| 149 |
-
7.148905396461486816e-01
|
| 150 |
-
-1.689905524253845215e-01
|
| 151 |
-
-3.437060117721557617e-01
|
| 152 |
-
1.340401619672775269e-01
|
| 153 |
-
1.683871150016784668e+00
|
| 154 |
-
1.002604246139526367e+00
|
| 155 |
-
1.308276414871215820e+00
|
| 156 |
-
-7.617053985595703125e-01
|
| 157 |
-
-2.677526175975799561e-01
|
| 158 |
-
-7.422828674316406250e-01
|
| 159 |
-
5.662541985511779785e-01
|
| 160 |
-
-9.786943793296813965e-01
|
| 161 |
-
5.175768136978149414e-01
|
| 162 |
-
-2.861405014991760254e-01
|
| 163 |
-
8.294684886932373047e-01
|
| 164 |
-
-1.999751329421997070e-01
|
| 165 |
-
1.037881255149841309e+00
|
| 166 |
-
5.464680194854736328e-01
|
| 167 |
-
-8.660980463027954102e-01
|
| 168 |
-
4.928737580776214600e-01
|
| 169 |
-
-6.311498880386352539e-01
|
| 170 |
-
3.337791562080383301e-01
|
| 171 |
-
9.849458932876586914e-01
|
| 172 |
-
-1.106900125741958618e-01
|
| 173 |
-
2.177442312240600586e-01
|
| 174 |
-
1.023627996444702148e+00
|
| 175 |
-
7.414194345474243164e-01
|
| 176 |
-
1.292455196380615234e+00
|
| 177 |
-
6.313494443893432617e-01
|
| 178 |
-
9.998620748519897461e-01
|
| 179 |
-
2.719911038875579834e-01
|
| 180 |
-
2.164029121398925781e+00
|
| 181 |
-
5.713845491409301758e-01
|
| 182 |
-
1.178232431411743164e+00
|
| 183 |
-
1.090514659881591797e-02
|
| 184 |
-
-4.316673576831817627e-01
|
| 185 |
-
-1.270594716072082520e+00
|
| 186 |
-
5.932700037956237793e-01
|
| 187 |
-
-1.272589564323425293e+00
|
| 188 |
-
9.731127023696899414e-01
|
| 189 |
-
9.898380041122436523e-01
|
| 190 |
-
-3.958564698696136475e-01
|
| 191 |
-
-5.807604193687438965e-01
|
| 192 |
-
5.031570792198181152e-01
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
prompt_files/flow_prompt_speech_token_1_87.txt
DELETED
|
@@ -1,87 +0,0 @@
|
|
| 1 |
-
1520
|
| 2 |
-
4299
|
| 3 |
-
6486
|
| 4 |
-
6486
|
| 5 |
-
6486
|
| 6 |
-
6486
|
| 7 |
-
4299
|
| 8 |
-
2031
|
| 9 |
-
5136
|
| 10 |
-
5405
|
| 11 |
-
537
|
| 12 |
-
5263
|
| 13 |
-
4528
|
| 14 |
-
4862
|
| 15 |
-
146
|
| 16 |
-
1561
|
| 17 |
-
1565
|
| 18 |
-
4795
|
| 19 |
-
5073
|
| 20 |
-
2752
|
| 21 |
-
395
|
| 22 |
-
2927
|
| 23 |
-
5589
|
| 24 |
-
6327
|
| 25 |
-
5023
|
| 26 |
-
4780
|
| 27 |
-
5591
|
| 28 |
-
2687
|
| 29 |
-
1308
|
| 30 |
-
3159
|
| 31 |
-
5832
|
| 32 |
-
5838
|
| 33 |
-
736
|
| 34 |
-
1797
|
| 35 |
-
1882
|
| 36 |
-
758
|
| 37 |
-
3749
|
| 38 |
-
2076
|
| 39 |
-
441
|
| 40 |
-
4970
|
| 41 |
-
2261
|
| 42 |
-
6378
|
| 43 |
-
5661
|
| 44 |
-
5086
|
| 45 |
-
2486
|
| 46 |
-
220
|
| 47 |
-
1107
|
| 48 |
-
3005
|
| 49 |
-
3650
|
| 50 |
-
5348
|
| 51 |
-
2511
|
| 52 |
-
1569
|
| 53 |
-
5106
|
| 54 |
-
1542
|
| 55 |
-
2139
|
| 56 |
-
1695
|
| 57 |
-
1295
|
| 58 |
-
3563
|
| 59 |
-
3805
|
| 60 |
-
5800
|
| 61 |
-
5829
|
| 62 |
-
5831
|
| 63 |
-
707
|
| 64 |
-
572
|
| 65 |
-
5672
|
| 66 |
-
3411
|
| 67 |
-
6075
|
| 68 |
-
3658
|
| 69 |
-
5192
|
| 70 |
-
4543
|
| 71 |
-
5103
|
| 72 |
-
5589
|
| 73 |
-
4943
|
| 74 |
-
527
|
| 75 |
-
860
|
| 76 |
-
3644
|
| 77 |
-
4598
|
| 78 |
-
5049
|
| 79 |
-
5061
|
| 80 |
-
5682
|
| 81 |
-
6486
|
| 82 |
-
6486
|
| 83 |
-
6486
|
| 84 |
-
6486
|
| 85 |
-
6486
|
| 86 |
-
6486
|
| 87 |
-
4299
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
prompt_files/llm_embedding_1_192.txt
DELETED
|
@@ -1,192 +0,0 @@
|
|
| 1 |
-
9.833880662918090820e-01
|
| 2 |
-
3.727950453758239746e-01
|
| 3 |
-
-6.032654047012329102e-01
|
| 4 |
-
-5.367950201034545898e-01
|
| 5 |
-
1.168318033218383789e+00
|
| 6 |
-
-4.463896751403808594e-01
|
| 7 |
-
-8.533740043640136719e-02
|
| 8 |
-
6.770751476287841797e-01
|
| 9 |
-
4.399921894073486328e-01
|
| 10 |
-
-8.947002887725830078e-02
|
| 11 |
-
-4.119307696819305420e-01
|
| 12 |
-
-1.400911569595336914e+00
|
| 13 |
-
1.469335317611694336e+00
|
| 14 |
-
7.800692915916442871e-01
|
| 15 |
-
6.252028942108154297e-01
|
| 16 |
-
-1.524239063262939453e+00
|
| 17 |
-
4.095870852470397949e-01
|
| 18 |
-
8.922567367553710938e-01
|
| 19 |
-
1.414063215255737305e+00
|
| 20 |
-
-3.570723533630371094e-01
|
| 21 |
-
3.816263973712921143e-01
|
| 22 |
-
-2.559853792190551758e-01
|
| 23 |
-
9.759899973869323730e-01
|
| 24 |
-
-2.347678393125534058e-01
|
| 25 |
-
-8.310836553573608398e-01
|
| 26 |
-
-1.119347572326660156e+00
|
| 27 |
-
6.822414696216583252e-02
|
| 28 |
-
1.058485746383666992e+00
|
| 29 |
-
2.381889820098876953e-01
|
| 30 |
-
-2.013707756996154785e-01
|
| 31 |
-
-4.302661716938018799e-01
|
| 32 |
-
-1.057960271835327148e+00
|
| 33 |
-
1.127839088439941406e+00
|
| 34 |
-
-1.518161177635192871e+00
|
| 35 |
-
-5.298921465873718262e-01
|
| 36 |
-
-1.788670778274536133e+00
|
| 37 |
-
-3.309334218502044678e-01
|
| 38 |
-
1.011094689369201660e+00
|
| 39 |
-
-3.399490118026733398e-01
|
| 40 |
-
-5.792245864868164062e-01
|
| 41 |
-
3.723595738410949707e-01
|
| 42 |
-
-3.795529901981353760e-02
|
| 43 |
-
-9.215813875198364258e-01
|
| 44 |
-
-2.451439201831817627e-01
|
| 45 |
-
-1.136183738708496094e+00
|
| 46 |
-
9.513977169990539551e-02
|
| 47 |
-
7.262014746665954590e-01
|
| 48 |
-
-9.598007798194885254e-01
|
| 49 |
-
-5.060364603996276855e-01
|
| 50 |
-
-2.999072074890136719e-01
|
| 51 |
-
-7.779634594917297363e-01
|
| 52 |
-
1.212495565414428711e+00
|
| 53 |
-
3.001802563667297363e-01
|
| 54 |
-
-2.383058547973632812e+00
|
| 55 |
-
1.490965783596038818e-01
|
| 56 |
-
5.186975002288818359e-02
|
| 57 |
-
1.555646419525146484e+00
|
| 58 |
-
-7.905082702636718750e-01
|
| 59 |
-
6.895875930786132812e-01
|
| 60 |
-
-7.865182161331176758e-01
|
| 61 |
-
-1.267613649368286133e+00
|
| 62 |
-
5.915310978889465332e-01
|
| 63 |
-
-3.206543624401092529e-01
|
| 64 |
-
3.275410532951354980e-01
|
| 65 |
-
-7.800404429435729980e-01
|
| 66 |
-
2.810131907463073730e-01
|
| 67 |
-
-5.581974983215332031e-02
|
| 68 |
-
-6.896089911460876465e-01
|
| 69 |
-
-1.699091911315917969e+00
|
| 70 |
-
8.533768653869628906e-01
|
| 71 |
-
-1.143321990966796875e+00
|
| 72 |
-
1.108269929885864258e+00
|
| 73 |
-
1.488067150115966797e+00
|
| 74 |
-
4.714697599411010742e-01
|
| 75 |
-
-2.468206435441970825e-01
|
| 76 |
-
-2.778674662113189697e-01
|
| 77 |
-
-5.726919770240783691e-01
|
| 78 |
-
7.966566681861877441e-01
|
| 79 |
-
3.259438872337341309e-01
|
| 80 |
-
7.238841056823730469e-01
|
| 81 |
-
1.317236185073852539e+00
|
| 82 |
-
-6.427643299102783203e-01
|
| 83 |
-
-6.616854071617126465e-01
|
| 84 |
-
3.449333608150482178e-01
|
| 85 |
-
1.523873805999755859e+00
|
| 86 |
-
-1.770880818367004395e+00
|
| 87 |
-
4.459496736526489258e-01
|
| 88 |
-
-1.308673977851867676e+00
|
| 89 |
-
-8.378249406814575195e-01
|
| 90 |
-
-7.776624560356140137e-01
|
| 91 |
-
-7.166379690170288086e-01
|
| 92 |
-
1.483591556549072266e+00
|
| 93 |
-
-1.046773791313171387e+00
|
| 94 |
-
-9.184205532073974609e-02
|
| 95 |
-
-5.694127678871154785e-01
|
| 96 |
-
-7.002854347229003906e-01
|
| 97 |
-
-5.811145305633544922e-01
|
| 98 |
-
-1.267730951309204102e+00
|
| 99 |
-
1.940409541130065918e+00
|
| 100 |
-
7.551879882812500000e-01
|
| 101 |
-
3.788790851831436157e-02
|
| 102 |
-
-1.767819404602050781e+00
|
| 103 |
-
1.966339051723480225e-01
|
| 104 |
-
2.125173091888427734e+00
|
| 105 |
-
4.033783376216888428e-01
|
| 106 |
-
-6.449738740921020508e-01
|
| 107 |
-
-7.214421778917312622e-02
|
| 108 |
-
1.038697957992553711e+00
|
| 109 |
-
-1.720039248466491699e+00
|
| 110 |
-
-5.593552589416503906e-01
|
| 111 |
-
6.905189156532287598e-01
|
| 112 |
-
1.693801283836364746e+00
|
| 113 |
-
1.025780200958251953e+00
|
| 114 |
-
1.601356863975524902e-01
|
| 115 |
-
1.841381192207336426e-03
|
| 116 |
-
-1.340688228607177734e+00
|
| 117 |
-
7.914224863052368164e-01
|
| 118 |
-
-4.111509919166564941e-01
|
| 119 |
-
-9.689708948135375977e-01
|
| 120 |
-
9.706826806068420410e-01
|
| 121 |
-
3.221712112426757812e-01
|
| 122 |
-
-1.017553806304931641e+00
|
| 123 |
-
6.374475359916687012e-01
|
| 124 |
-
-1.567446827888488770e+00
|
| 125 |
-
1.079622745513916016e+00
|
| 126 |
-
-6.838436126708984375e-01
|
| 127 |
-
-7.464203834533691406e-01
|
| 128 |
-
4.736322760581970215e-01
|
| 129 |
-
7.230627536773681641e-02
|
| 130 |
-
-1.091879606246948242e+00
|
| 131 |
-
-2.780759036540985107e-01
|
| 132 |
-
6.500254869461059570e-01
|
| 133 |
-
-1.413071602582931519e-01
|
| 134 |
-
-6.677935123443603516e-01
|
| 135 |
-
-5.637246370315551758e-01
|
| 136 |
-
1.807020783424377441e+00
|
| 137 |
-
2.142686128616333008e+00
|
| 138 |
-
4.661364853382110596e-01
|
| 139 |
-
-7.062357068061828613e-01
|
| 140 |
-
-7.115917205810546875e-01
|
| 141 |
-
1.251373767852783203e+00
|
| 142 |
-
-1.802901387214660645e+00
|
| 143 |
-
-1.352177619934082031e+00
|
| 144 |
-
-3.198754191398620605e-01
|
| 145 |
-
1.498459577560424805e-01
|
| 146 |
-
-4.831680059432983398e-01
|
| 147 |
-
7.488607764244079590e-01
|
| 148 |
-
8.024247884750366211e-01
|
| 149 |
-
7.148905396461486816e-01
|
| 150 |
-
-1.689905524253845215e-01
|
| 151 |
-
-3.437060117721557617e-01
|
| 152 |
-
1.340401619672775269e-01
|
| 153 |
-
1.683871150016784668e+00
|
| 154 |
-
1.002604246139526367e+00
|
| 155 |
-
1.308276414871215820e+00
|
| 156 |
-
-7.617053985595703125e-01
|
| 157 |
-
-2.677526175975799561e-01
|
| 158 |
-
-7.422828674316406250e-01
|
| 159 |
-
5.662541985511779785e-01
|
| 160 |
-
-9.786943793296813965e-01
|
| 161 |
-
5.175768136978149414e-01
|
| 162 |
-
-2.861405014991760254e-01
|
| 163 |
-
8.294684886932373047e-01
|
| 164 |
-
-1.999751329421997070e-01
|
| 165 |
-
1.037881255149841309e+00
|
| 166 |
-
5.464680194854736328e-01
|
| 167 |
-
-8.660980463027954102e-01
|
| 168 |
-
4.928737580776214600e-01
|
| 169 |
-
-6.311498880386352539e-01
|
| 170 |
-
3.337791562080383301e-01
|
| 171 |
-
9.849458932876586914e-01
|
| 172 |
-
-1.106900125741958618e-01
|
| 173 |
-
2.177442312240600586e-01
|
| 174 |
-
1.023627996444702148e+00
|
| 175 |
-
7.414194345474243164e-01
|
| 176 |
-
1.292455196380615234e+00
|
| 177 |
-
6.313494443893432617e-01
|
| 178 |
-
9.998620748519897461e-01
|
| 179 |
-
2.719911038875579834e-01
|
| 180 |
-
2.164029121398925781e+00
|
| 181 |
-
5.713845491409301758e-01
|
| 182 |
-
1.178232431411743164e+00
|
| 183 |
-
1.090514659881591797e-02
|
| 184 |
-
-4.316673576831817627e-01
|
| 185 |
-
-1.270594716072082520e+00
|
| 186 |
-
5.932700037956237793e-01
|
| 187 |
-
-1.272589564323425293e+00
|
| 188 |
-
9.731127023696899414e-01
|
| 189 |
-
9.898380041122436523e-01
|
| 190 |
-
-3.958564698696136475e-01
|
| 191 |
-
-5.807604193687438965e-01
|
| 192 |
-
5.031570792198181152e-01
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
prompt_files/llm_prompt_speech_token_1_87.txt
DELETED
|
@@ -1,87 +0,0 @@
|
|
| 1 |
-
1520
|
| 2 |
-
4299
|
| 3 |
-
6486
|
| 4 |
-
6486
|
| 5 |
-
6486
|
| 6 |
-
6486
|
| 7 |
-
4299
|
| 8 |
-
2031
|
| 9 |
-
5136
|
| 10 |
-
5405
|
| 11 |
-
537
|
| 12 |
-
5263
|
| 13 |
-
4528
|
| 14 |
-
4862
|
| 15 |
-
146
|
| 16 |
-
1561
|
| 17 |
-
1565
|
| 18 |
-
4795
|
| 19 |
-
5073
|
| 20 |
-
2752
|
| 21 |
-
395
|
| 22 |
-
2927
|
| 23 |
-
5589
|
| 24 |
-
6327
|
| 25 |
-
5023
|
| 26 |
-
4780
|
| 27 |
-
5591
|
| 28 |
-
2687
|
| 29 |
-
1308
|
| 30 |
-
3159
|
| 31 |
-
5832
|
| 32 |
-
5838
|
| 33 |
-
736
|
| 34 |
-
1797
|
| 35 |
-
1882
|
| 36 |
-
758
|
| 37 |
-
3749
|
| 38 |
-
2076
|
| 39 |
-
441
|
| 40 |
-
4970
|
| 41 |
-
2261
|
| 42 |
-
6378
|
| 43 |
-
5661
|
| 44 |
-
5086
|
| 45 |
-
2486
|
| 46 |
-
220
|
| 47 |
-
1107
|
| 48 |
-
3005
|
| 49 |
-
3650
|
| 50 |
-
5348
|
| 51 |
-
2511
|
| 52 |
-
1569
|
| 53 |
-
5106
|
| 54 |
-
1542
|
| 55 |
-
2139
|
| 56 |
-
1695
|
| 57 |
-
1295
|
| 58 |
-
3563
|
| 59 |
-
3805
|
| 60 |
-
5800
|
| 61 |
-
5829
|
| 62 |
-
5831
|
| 63 |
-
707
|
| 64 |
-
572
|
| 65 |
-
5672
|
| 66 |
-
3411
|
| 67 |
-
6075
|
| 68 |
-
3658
|
| 69 |
-
5192
|
| 70 |
-
4543
|
| 71 |
-
5103
|
| 72 |
-
5589
|
| 73 |
-
4943
|
| 74 |
-
527
|
| 75 |
-
860
|
| 76 |
-
3644
|
| 77 |
-
4598
|
| 78 |
-
5049
|
| 79 |
-
5061
|
| 80 |
-
5682
|
| 81 |
-
6486
|
| 82 |
-
6486
|
| 83 |
-
6486
|
| 84 |
-
6486
|
| 85 |
-
6486
|
| 86 |
-
6486
|
| 87 |
-
4299
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
prompt_files/prompt_speech_feat_1_174_80.txt
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
prompt_files/prompt_text_1_15.txt
DELETED
|
@@ -1,15 +0,0 @@
|
|
| 1 |
-
99658
|
| 2 |
-
99317
|
| 3 |
-
56568
|
| 4 |
-
23031
|
| 5 |
-
33447
|
| 6 |
-
26232
|
| 7 |
-
99521
|
| 8 |
-
99190
|
| 9 |
-
9370
|
| 10 |
-
56006
|
| 11 |
-
35946
|
| 12 |
-
97706
|
| 13 |
-
52801
|
| 14 |
-
119024
|
| 15 |
-
1773
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
prompt_files/text_1_38.txt
DELETED
|
@@ -1,38 +0,0 @@
|
|
| 1 |
-
101956
|
| 2 |
-
16530
|
| 3 |
-
88970
|
| 4 |
-
99789
|
| 5 |
-
99469
|
| 6 |
-
53930
|
| 7 |
-
52510
|
| 8 |
-
35727
|
| 9 |
-
17447
|
| 10 |
-
36407
|
| 11 |
-
3837
|
| 12 |
-
100524
|
| 13 |
-
88653
|
| 14 |
-
26939
|
| 15 |
-
55135
|
| 16 |
-
16530
|
| 17 |
-
58364
|
| 18 |
-
18397
|
| 19 |
-
1773
|
| 20 |
-
101956
|
| 21 |
-
16530
|
| 22 |
-
88970
|
| 23 |
-
44636
|
| 24 |
-
99911
|
| 25 |
-
30858
|
| 26 |
-
100811
|
| 27 |
-
100302
|
| 28 |
-
99243
|
| 29 |
-
28291
|
| 30 |
-
3837
|
| 31 |
-
99816
|
| 32 |
-
29524
|
| 33 |
-
99467
|
| 34 |
-
99691
|
| 35 |
-
109803
|
| 36 |
-
12857
|
| 37 |
-
100167
|
| 38 |
-
1773
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
run.sh
CHANGED
|
@@ -5,7 +5,7 @@ rm output*.wav
|
|
| 5 |
./main_ax650 \
|
| 6 |
--template_filename_axmodel "${LLM_DIR}/qwen2_p128_l%d_together.axmodel" \
|
| 7 |
--token2wav_axmodel_dir $TOKEN2WAV_DIR \
|
| 8 |
-
--n_timesteps
|
| 9 |
--axmodel_num 24 \
|
| 10 |
--bos 0 --eos 0 \
|
| 11 |
--filename_tokenizer_model "http://127.0.0.1:12345" \
|
|
@@ -15,7 +15,8 @@ rm output*.wav
|
|
| 15 |
--filename_llm_embed "${LLM_DIR}/llm.llm_embedding.float16.bin" \
|
| 16 |
--filename_speech_embed "${LLM_DIR}/llm.speech_embedding.float16.bin" \
|
| 17 |
--continue 0 \
|
| 18 |
-
--
|
|
|
|
| 19 |
|
| 20 |
|
| 21 |
chmod 777 output*.wav
|
|
|
|
| 5 |
./main_ax650 \
|
| 6 |
--template_filename_axmodel "${LLM_DIR}/qwen2_p128_l%d_together.axmodel" \
|
| 7 |
--token2wav_axmodel_dir $TOKEN2WAV_DIR \
|
| 8 |
+
--n_timesteps 10 \
|
| 9 |
--axmodel_num 24 \
|
| 10 |
--bos 0 --eos 0 \
|
| 11 |
--filename_tokenizer_model "http://127.0.0.1:12345" \
|
|
|
|
| 15 |
--filename_llm_embed "${LLM_DIR}/llm.llm_embedding.float16.bin" \
|
| 16 |
--filename_speech_embed "${LLM_DIR}/llm.speech_embedding.float16.bin" \
|
| 17 |
--continue 0 \
|
| 18 |
+
--prompt_files en_woman1 \
|
| 19 |
+
--text "Because he has zero capacity to respond to the two and a half hour"
|
| 20 |
|
| 21 |
|
| 22 |
chmod 777 output*.wav
|
scripts/frontend.py
CHANGED
|
@@ -32,7 +32,7 @@ try:
|
|
| 32 |
import ttsfrd
|
| 33 |
use_ttsfrd = True
|
| 34 |
except ImportError:
|
| 35 |
-
|
| 36 |
from wetext import Normalizer as ZhNormalizer
|
| 37 |
from wetext import Normalizer as EnNormalizer
|
| 38 |
use_ttsfrd = False
|
|
@@ -192,6 +192,27 @@ class CosyVoiceFrontEnd:
|
|
| 192 |
model_input['text'] = tts_text_token
|
| 193 |
model_input['text_len'] = tts_text_token_len
|
| 194 |
return model_input
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 195 |
|
| 196 |
def frontend_cross_lingual(self, tts_text, prompt_speech_16k, resample_rate, zero_shot_spk_id):
|
| 197 |
model_input = self.frontend_zero_shot(tts_text, '', prompt_speech_16k, resample_rate, zero_shot_spk_id)
|
|
|
|
| 32 |
import ttsfrd
|
| 33 |
use_ttsfrd = True
|
| 34 |
except ImportError:
|
| 35 |
+
|
| 36 |
from wetext import Normalizer as ZhNormalizer
|
| 37 |
from wetext import Normalizer as EnNormalizer
|
| 38 |
use_ttsfrd = False
|
|
|
|
| 192 |
model_input['text'] = tts_text_token
|
| 193 |
model_input['text_len'] = tts_text_token_len
|
| 194 |
return model_input
|
| 195 |
+
|
| 196 |
+
def process_prompt(self, prompt_text, prompt_speech_16k, resample_rate, zero_shot_spk_id):
|
| 197 |
+
if zero_shot_spk_id == '':
|
| 198 |
+
prompt_text_token, prompt_text_token_len = self._extract_text_token(prompt_text)
|
| 199 |
+
prompt_speech_resample = torchaudio.transforms.Resample(orig_freq=16000, new_freq=resample_rate)(prompt_speech_16k)
|
| 200 |
+
speech_feat, speech_feat_len = self._extract_speech_feat(prompt_speech_resample)
|
| 201 |
+
speech_token, speech_token_len = self._extract_speech_token(prompt_speech_16k)
|
| 202 |
+
if resample_rate == 24000:
|
| 203 |
+
# cosyvoice2, force speech_feat % speech_token = 2
|
| 204 |
+
token_len = min(int(speech_feat.shape[1] / 2), speech_token.shape[1])
|
| 205 |
+
speech_feat, speech_feat_len[:] = speech_feat[:, :2 * token_len], 2 * token_len
|
| 206 |
+
speech_token, speech_token_len[:] = speech_token[:, :token_len], token_len
|
| 207 |
+
embedding = self._extract_spk_embedding(prompt_speech_16k)
|
| 208 |
+
model_input = {'prompt_text': prompt_text_token, 'prompt_text_len': prompt_text_token_len,
|
| 209 |
+
'llm_prompt_speech_token': speech_token, 'llm_prompt_speech_token_len': speech_token_len,
|
| 210 |
+
'flow_prompt_speech_token': speech_token, 'flow_prompt_speech_token_len': speech_token_len,
|
| 211 |
+
'prompt_speech_feat': speech_feat, 'prompt_speech_feat_len': speech_feat_len,
|
| 212 |
+
'llm_embedding': embedding, 'flow_embedding': embedding}
|
| 213 |
+
else:
|
| 214 |
+
model_input = self.spk2info[zero_shot_spk_id]
|
| 215 |
+
return model_input
|
| 216 |
|
| 217 |
def frontend_cross_lingual(self, tts_text, prompt_speech_16k, resample_rate, zero_shot_spk_id):
|
| 218 |
model_input = self.frontend_zero_shot(tts_text, '', prompt_speech_16k, resample_rate, zero_shot_spk_id)
|
scripts/process_prompt.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
import argparse
|
|
|
|
| 2 |
import torch
|
| 3 |
import torchaudio
|
| 4 |
import numpy as np
|
|
@@ -15,15 +16,15 @@ def load_wav(wav, target_sr):
|
|
| 15 |
if __name__ == "__main__":
|
| 16 |
|
| 17 |
args = argparse.ArgumentParser()
|
| 18 |
-
args.add_argument('--model_dir', type=str, default="scripts/CosyVoice-BlankEN")
|
| 19 |
-
args.add_argument('--wetext_dir', type=str, default="pengzhendong/wetext")
|
| 20 |
-
args.add_argument('--sample_rate', type=int, default=24000)
|
| 21 |
-
args.add_argument('--
|
| 22 |
-
args.add_argument('--
|
| 23 |
-
args.add_argument('--
|
| 24 |
-
args.add_argument('--prompt_speech', type=str, default="asset/zero_shot_prompt.wav")
|
| 25 |
args = args.parse_args()
|
| 26 |
|
|
|
|
| 27 |
|
| 28 |
frontend = CosyVoiceFrontEnd(f"{args.model_dir}",
|
| 29 |
args.wetext_dir,
|
|
@@ -33,27 +34,29 @@ if __name__ == "__main__":
|
|
| 33 |
"all")
|
| 34 |
|
| 35 |
prompt_speech_16k = load_wav(args.prompt_speech, 16000)
|
| 36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
|
| 38 |
# model_input = {'prompt_text': prompt_text_token, 'prompt_text_len': prompt_text_token_len,
|
| 39 |
# 'llm_prompt_speech_token': speech_token, 'llm_prompt_speech_token_len': speech_token_len,
|
| 40 |
# 'flow_prompt_speech_token': speech_token, 'flow_prompt_speech_token_len': speech_token_len,
|
| 41 |
# 'prompt_speech_feat': speech_feat, 'prompt_speech_feat_len': speech_feat_len,
|
| 42 |
# 'llm_embedding': embedding, 'flow_embedding': embedding}
|
| 43 |
-
|
|
|
|
| 44 |
for k, v in model_input.items():
|
| 45 |
if "_len" in k:
|
| 46 |
continue
|
| 47 |
shapes = [str(s) for s in v.shape]
|
| 48 |
shape_str = "_".join(shapes)
|
| 49 |
if v.dtype in (torch.int32, torch.int64):
|
| 50 |
-
np.savetxt(f"{
|
| 51 |
else:
|
| 52 |
-
np.savetxt(f"{
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
rand_noise = torch.randn([1, 80, 300])
|
| 56 |
-
np.savetxt("rand_noise_1_80_300.txt", rand_noise.numpy().reshape(-1), delimiter=",")
|
| 57 |
-
|
| 58 |
-
speech_window = np.hamming(2 * 8 * 480)
|
| 59 |
-
np.savetxt("speech_window_2x8x480.txt", speech_window.reshape(-1), delimiter=",")
|
|
|
|
| 1 |
import argparse
|
| 2 |
+
import os
|
| 3 |
import torch
|
| 4 |
import torchaudio
|
| 5 |
import numpy as np
|
|
|
|
| 16 |
if __name__ == "__main__":
|
| 17 |
|
| 18 |
args = argparse.ArgumentParser()
|
| 19 |
+
args.add_argument('--model_dir', type=str, default="scripts/CosyVoice-BlankEN", help="tokenizer configuration directionary")
|
| 20 |
+
args.add_argument('--wetext_dir', type=str, default="pengzhendong/wetext", help="path to wetext")
|
| 21 |
+
args.add_argument('--sample_rate', type=int, default=24000, help="Sampling rate for prompt audio")
|
| 22 |
+
args.add_argument('--prompt_text', type=str, default="希望你以后能够做的比我还好呦。", help="The text content of the prompt(reference) audio. Text or file path.")
|
| 23 |
+
args.add_argument('--prompt_speech', type=str, default="asset/zero_shot_prompt.wav", help="The path to prompt(reference) audio.")
|
| 24 |
+
args.add_argument('--output', type=str, default="prompt_files", help="Output data storage directory")
|
|
|
|
| 25 |
args = args.parse_args()
|
| 26 |
|
| 27 |
+
os.makedirs(args.output, exist_ok=True)
|
| 28 |
|
| 29 |
frontend = CosyVoiceFrontEnd(f"{args.model_dir}",
|
| 30 |
args.wetext_dir,
|
|
|
|
| 34 |
"all")
|
| 35 |
|
| 36 |
prompt_speech_16k = load_wav(args.prompt_speech, 16000)
|
| 37 |
+
zero_shot_spk_id = ""
|
| 38 |
+
|
| 39 |
+
if os.path.isfile(args.prompt_text):
|
| 40 |
+
with open(args.prompt_text, "r") as f:
|
| 41 |
+
prompt_text = f.read()
|
| 42 |
+
else:
|
| 43 |
+
prompt_text = args.prompt_text
|
| 44 |
+
print("prompt_text",prompt_text)
|
| 45 |
+
model_input = frontend.process_prompt( prompt_text, prompt_speech_16k, args.sample_rate, zero_shot_spk_id)
|
| 46 |
|
| 47 |
# model_input = {'prompt_text': prompt_text_token, 'prompt_text_len': prompt_text_token_len,
|
| 48 |
# 'llm_prompt_speech_token': speech_token, 'llm_prompt_speech_token_len': speech_token_len,
|
| 49 |
# 'flow_prompt_speech_token': speech_token, 'flow_prompt_speech_token_len': speech_token_len,
|
| 50 |
# 'prompt_speech_feat': speech_feat, 'prompt_speech_feat_len': speech_feat_len,
|
| 51 |
# 'llm_embedding': embedding, 'flow_embedding': embedding}
|
| 52 |
+
print("prompt speech token size:", model_input["flow_prompt_speech_token"].shape)
|
| 53 |
+
assert model_input["flow_prompt_speech_token"].shape[1] >=75, f"speech_token length should >= 75, bug get {model_input['flow_prompt_speech_token'].shape[1]}"
|
| 54 |
for k, v in model_input.items():
|
| 55 |
if "_len" in k:
|
| 56 |
continue
|
| 57 |
shapes = [str(s) for s in v.shape]
|
| 58 |
shape_str = "_".join(shapes)
|
| 59 |
if v.dtype in (torch.int32, torch.int64):
|
| 60 |
+
np.savetxt(f"{args.output}/{k}.txt", v.detach().cpu().numpy().reshape(-1), fmt="%d", delimiter=",")
|
| 61 |
else:
|
| 62 |
+
np.savetxt(f"{args.output}/{k}.txt", v.detach().cpu().numpy().reshape(-1), delimiter=",")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
{prompt_files → token2wav-axmodels}/rand_noise_1_80_300.txt
RENAMED
|
File without changes
|
{prompt_files → token2wav-axmodels}/speech_window_2x8x480.txt
RENAMED
|
File without changes
|