Update README.md
Browse files
README.md
CHANGED
|
@@ -127,19 +127,47 @@ pip install -r requirements.txt
|
|
| 127 |
pip install -e .
|
| 128 |
```
|
| 129 |
|
| 130 |
-
Afterwards,
|
| 131 |
-
|
| 132 |
-
```
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 139 |
```
|
| 140 |
|
| 141 |
-
|
| 142 |
-
|
| 143 |
## License
|
| 144 |
|
| 145 |
TBA
|
|
|
|
| 127 |
pip install -e .
|
| 128 |
```
|
| 129 |
|
| 130 |
+
Afterwards, try to run the following python code snippet to convert text to speech.
|
| 131 |
+
|
| 132 |
+
```python
|
| 133 |
+
from boson_multimodal.serve.serve_engine import HiggsAudioServeEngine, HiggsAudioResponse
|
| 134 |
+
from boson_multimodal.data_types import ChatMLSample, Message, AudioContent
|
| 135 |
+
|
| 136 |
+
import torch
|
| 137 |
+
import torchaudio
|
| 138 |
+
import time
|
| 139 |
+
import click
|
| 140 |
+
|
| 141 |
+
MODEL_PATH = "bosonai/higgs-audio-v2-generation-3B-staging"
|
| 142 |
+
AUDIO_TOKENIZER_PATH = "bosonai/higgs-audio-v2-tokenizer-staging"
|
| 143 |
+
|
| 144 |
+
messages = [
|
| 145 |
+
Message(
|
| 146 |
+
role="system",
|
| 147 |
+
content="Generate audio following instruction.\n\n<|scene_desc_start|>\nSPEAKER0: british accent\n<|scene_desc_end|>",
|
| 148 |
+
),
|
| 149 |
+
Message(
|
| 150 |
+
role="user",
|
| 151 |
+
content="The sun rises in the east and sets in the west. This simple fact has been observed by humans for thousands of years.",
|
| 152 |
+
),
|
| 153 |
+
]
|
| 154 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 155 |
+
|
| 156 |
+
serve_engine = HiggsAudioServeEngine(MODEL_PATH, AUDIO_TOKENIZER_PATH, device=device)
|
| 157 |
+
|
| 158 |
+
output: HiggsAudioResponse = serve_engine.generate(
|
| 159 |
+
chat_ml_sample=ChatMLSample(messages=messages),
|
| 160 |
+
max_new_tokens=1024,
|
| 161 |
+
temperature=1.0,
|
| 162 |
+
top_p=0.95,
|
| 163 |
+
top_k=50,
|
| 164 |
+
stop_strings=["<|end_of_text|>", "<|eot_id|>"],
|
| 165 |
+
seed=12345,
|
| 166 |
+
)
|
| 167 |
+
torchaudio.save(f"output.wav", torch.from_numpy(output.audio)[None, :], output.sampling_rate)
|
| 168 |
```
|
| 169 |
|
| 170 |
+
You can also check https://github.com/boson-ai/higgs-audio/examples for more example scripts.
|
|
|
|
| 171 |
## License
|
| 172 |
|
| 173 |
TBA
|