Upload training_config.yaml with huggingface_hub

e10512f verified 4 months ago

886 Bytes

	add_think_tokens: true
	chat_version: Qwen/Qwen2.5-1.5B-Instruct
	custom_name: lightr1_3_EN_1024
	dataset: Light-R1
	dataset_args: {}
	hf_username: mveroe
	max_length: 1024
	mixup:
	FPR: 0.0
	TPR: 1.0
	generator_accuracy: 1.0
	model_name: Qwen/Qwen2.5-1.5B
	neptune_project: ethsri/label-noise
	pretrained_model: true
	seed: 42
	set_seed: false
	shuffle: true
	training_args:
	bf16: true
	ddp_find_unused_parameters: false
	gradient_accumulation_steps: 4
	hub_strategy: all_checkpoints
	learning_rate: 5.0e-05
	logging_first_step: true
	logging_steps: 1
	logging_strategy: steps
	lr_scheduler_type: cosine
	num_train_epochs: 3
	optim: adafactor
	overwrite_output_dir: true
	per_device_train_batch_size: 4
	push_to_hub: true
	report_to: none
	save_strategy: epoch
	seed: 42
	warmup_ratio: 0.1
	training_type: !!python/object/apply:src.configs.TrainingTypes
	- sft
	use_neptune: true