{ "_gradient_checkpointing": true, "_name_or_path": "Yysrc/LIBERO-Object", "action_dim": 7, "action_model_type": "DiT-L", "architectures": [ "Mantis" ], "attn_implementation": null, "connector_num_hidden_layers": 12, "diffusion_model_id": "Efficient-Large-Model/Sana_600M_512px_diffusers", "future_action_window_size": 4, "in_channels": 32, "input_size": 16, "loss_type": "flow", "max_input_text_tokens": 256, "max_timestep_gap": 6, "mllm_id": "Qwen/Qwen2.5-VL-3B-Instruct", "model_type": "mantis", "modules_to_freeze": [ "vae" ], "modules_to_unfreeze": [ "model.mllm_backbone.model.embed_tokens", "model.policy_head", "model.transformer", "model.connector", "model.mllm_backbone" ], "noise_scheduler_id": "Efficient-Large-Model/Sana_600M_512px_diffusers", "num_actqueries": 6, "num_gapqueries": 3, "num_metaqueries": 9, "past_action_window_size": 0, "scheduler_id": "Efficient-Large-Model/Sana_600M_512px_diffusers", "system_prompt": "You will be provided with an image observation and a corresponding instruction.", "torch_dtype": "float32", "training_mode": "image_action", "transformers_version": "4.49.0", "vae_downsample_f": 32, "vae_id": "Efficient-Large-Model/Sana_600M_512px_diffusers" }