Robotics
LeRobot
Safetensors
xvla
xvla-new / train_config.json
jadechoghari's picture
jadechoghari HF Staff
Upload policy weights, train config and readme
e766d84 verified
{
"dataset": {
"repo_id": "pepijn223/bimanual-so100-handover-cube",
"root": null,
"episodes": null,
"image_transforms": {
"enable": false,
"max_num_transforms": 3,
"random_order": false,
"tfs": {
"brightness": {
"weight": 1.0,
"type": "ColorJitter",
"kwargs": {
"brightness": [
0.8,
1.2
]
}
},
"contrast": {
"weight": 1.0,
"type": "ColorJitter",
"kwargs": {
"contrast": [
0.8,
1.2
]
}
},
"saturation": {
"weight": 1.0,
"type": "ColorJitter",
"kwargs": {
"saturation": [
0.5,
1.5
]
}
},
"hue": {
"weight": 1.0,
"type": "ColorJitter",
"kwargs": {
"hue": [
-0.05,
0.05
]
}
},
"sharpness": {
"weight": 1.0,
"type": "SharpnessJitter",
"kwargs": {
"sharpness": [
0.5,
1.5
]
}
},
"affine": {
"weight": 1.0,
"type": "RandomAffine",
"kwargs": {
"degrees": [
-5.0,
5.0
],
"translate": [
0.05,
0.05
]
}
}
}
},
"revision": null,
"use_imagenet_stats": true,
"video_backend": "torchcodec",
"streaming": false
},
"env": null,
"policy": {
"type": "xvla",
"n_obs_steps": 1,
"input_features": {
"observation.images.left": {
"type": "VISUAL",
"shape": [
3,
256,
256
]
},
"observation.images.right": {
"type": "VISUAL",
"shape": [
3,
256,
256
]
},
"observation.state": {
"type": "STATE",
"shape": [
8
]
},
"observation.images.top": {
"type": "VISUAL",
"shape": [
3,
224,
224
]
}
},
"output_features": {
"action": {
"type": "ACTION",
"shape": [
20
]
}
},
"device": "cuda",
"use_amp": false,
"push_to_hub": true,
"repo_id": "xvla-new",
"private": null,
"tags": null,
"license": null,
"pretrained_path": "/fsx/jade_choghari/models/xvla-base",
"chunk_size": 30,
"n_action_steps": 30,
"normalization_mapping": {
"STATE": "IDENTITY",
"ACTION": "MEAN_STD",
"VISUAL": "IDENTITY"
},
"florence_config": {
"model_type": "florence2",
"bos_token_id": 0,
"eos_token_id": 2,
"ignore_index": -100,
"pad_token_id": 1,
"projection_dim": 1024,
"text_config": {
"vocab_size": 51289,
"activation_dropout": 0.1,
"activation_function": "gelu",
"add_bias_logits": false,
"add_final_layer_norm": false,
"attention_dropout": 0.1,
"bos_token_id": 0,
"classif_dropout": 0.1,
"classifier_dropout": 0.0,
"d_model": 1024,
"decoder_attention_heads": 16,
"decoder_ffn_dim": 4096,
"decoder_layerdrop": 0.0,
"decoder_layers": 12,
"decoder_start_token_id": 2,
"dropout": 0.1,
"early_stopping": true,
"encoder_attention_heads": 16,
"encoder_ffn_dim": 4096,
"encoder_layerdrop": 0.0,
"encoder_layers": 12,
"eos_token_id": 2,
"forced_eos_token_id": 2,
"forced_bos_token_id": 0,
"gradient_checkpointing": false,
"init_std": 0.02,
"is_encoder_decoder": true,
"label2id": {
"LABEL_0": 0,
"LABEL_1": 1,
"LABEL_2": 2
},
"max_position_embeddings": 4096,
"no_repeat_ngram_size": 3,
"normalize_before": false,
"num_hidden_layers": 12,
"pad_token_id": 1,
"scale_embedding": false,
"num_beams": 3
},
"vision_config": {
"model_type": "davit",
"drop_path_rate": 0.1,
"patch_size": [
7,
3,
3,
3
],
"patch_stride": [
4,
2,
2,
2
],
"patch_padding": [
3,
1,
1,
1
],
"patch_prenorm": [
false,
true,
true,
true
],
"enable_checkpoint": false,
"dim_embed": [
256,
512,
1024,
2048
],
"num_heads": [
8,
16,
32,
64
],
"num_groups": [
8,
16,
32,
64
],
"depths": [
1,
1,
9,
1
],
"window_size": 12,
"projection_dim": 1024,
"visual_temporal_embedding": {
"type": "COSINE",
"max_temporal_embeddings": 100
},
"image_pos_embed": {
"type": "learned_abs_2d",
"max_pos_embeddings": 50
},
"image_feature_source": [
"spatial_avg_pool",
"temporal_avg_pool"
]
},
"vocab_size": 51289,
"torch_dtype": "float32",
"is_encoder_decoder": true
},
"tokenizer_name": "facebook/bart-large",
"tokenizer_max_length": 1024,
"tokenizer_padding_side": "right",
"pad_language_to": "max_length",
"hidden_size": 1024,
"depth": 24,
"num_heads": 16,
"mlp_ratio": 4.0,
"num_domains": 30,
"len_soft_prompts": 32,
"dim_time": 32,
"max_len_seq": 512,
"use_hetero_proj": false,
"action_mode": "so101_bimanual",
"num_denoising_steps": 10,
"use_proprio": true,
"max_state_dim": 20,
"domain_feature_key": null,
"resize_imgs_with_padding": [
224,
224
],
"num_image_views": 3,
"empty_cameras": 0,
"freeze_vision_encoder": true,
"freeze_language_encoder": true,
"train_policy_transformer": true,
"train_soft_prompts": true,
"optimizer_lr": 0.0001,
"optimizer_betas": [
0.9,
0.95
],
"optimizer_eps": 1e-08,
"optimizer_weight_decay": 0.0001,
"optimizer_grad_clip_norm": 10.0,
"scheduler_warmup_steps": 1000,
"scheduler_decay_steps": 30000,
"scheduler_decay_lr": 2.5e-06
},
"output_dir": "/fsx/jade_choghari/outputs/xvla_training",
"job_name": "xvla_training",
"resume": false,
"seed": 1000,
"num_workers": 4,
"batch_size": 64,
"steps": 6000,
"eval_freq": 20000,
"log_freq": 50,
"save_checkpoint": true,
"save_freq": 3000,
"use_policy_training_preset": true,
"optimizer": {
"type": "adamw",
"lr": 0.0001,
"weight_decay": 0.0001,
"grad_clip_norm": 10.0,
"betas": [
0.9,
0.95
],
"eps": 1e-08
},
"scheduler": {
"type": "cosine_decay_with_warmup",
"num_warmup_steps": 1000,
"num_decay_steps": 30000,
"peak_lr": 0.0001,
"decay_lr": 2.5e-06
},
"eval": {
"n_episodes": 50,
"batch_size": 50,
"use_async_envs": false
},
"wandb": {
"enable": true,
"disable_artifact": false,
"project": "lerobot",
"entity": null,
"notes": null,
"run_id": "j168f031",
"mode": null
},
"checkpoint_path": null,
"rename_map": {}
}