devadigaprathamesh commited on
Commit
a4e4f12
·
verified ·
1 Parent(s): 032f637

Create config.json

Browse files
Files changed (1) hide show
  1. config.json +64 -0
config.json ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "app_name": "qwen-law-finetuning",
3
+ "model": {
4
+ "base_model_id": "Qwen/Qwen2.5-7B",
5
+ "max_seq_length": 4096,
6
+ "quantization": {
7
+ "load_in_4bit": true,
8
+ "attn_implementation": "flash_attention_2",
9
+ "dtype": "bfloat16"
10
+ }
11
+ },
12
+ "peft_config": {
13
+ "r": 32,
14
+ "target_modules": [
15
+ "q_proj",
16
+ "k_proj",
17
+ "v_proj",
18
+ "o_proj",
19
+ "gate_proj",
20
+ "up_proj",
21
+ "down_proj"
22
+ ],
23
+ "lora_alpha": 64,
24
+ "lora_dropout": 0.05,
25
+ "bias": "none",
26
+ "use_gradient_checkpointing": "unsloth"
27
+ },
28
+ "training": {
29
+ "dataset": "viber1/indian-law-dataset",
30
+ "output_dir": "/data/JurisQwen",
31
+ "checkpoint_dir": "/data/checkpoints",
32
+ "num_train_epochs": 3,
33
+ "per_device_train_batch_size": 16,
34
+ "gradient_accumulation_steps": 2,
35
+ "optimizer": "adamw_8bit",
36
+ "learning_rate": 2e-4,
37
+ "weight_decay": 0.001,
38
+ "lr_scheduler_type": "cosine",
39
+ "warmup_ratio": 0.1,
40
+ "precision": {
41
+ "bf16": true,
42
+ "fp16": false,
43
+ "tf32": true
44
+ }
45
+ },
46
+ "inference": {
47
+ "max_new_tokens": 512
48
+ },
49
+ "compute": {
50
+ "gpu": "A100-40GB",
51
+ "training_timeout_hours": 5,
52
+ "inference_timeout_minutes": 10
53
+ },
54
+ "prompt_template": {
55
+ "user_prefix": "<|im_start|>user\n",
56
+ "user_suffix": "<|im_end|>",
57
+ "assistant_prefix": "<|im_start|>assistant\n",
58
+ "assistant_suffix": "<|im_end|>"
59
+ },
60
+ "volumes": {
61
+ "path": "/data",
62
+ "name": "finetune-volume"
63
+ }
64
+ }