jxm
/

edx-md

Model card Files Files and versions

edx-md / config.toml

jxm's picture

Upload folder using huggingface_hub

079981e verified 8 months ago

history blame contribute delete

780 Bytes

	use_wandb = 1
	seed = 3
	style = "dit"
	d_adapter = 768
	normalize_embeddings = 1
	depth = 3
	embs = [ "gte", "gtr", "stella", "sentence-t5", "e5", "sbert", "clip", "stella-big", "snowflake", "ember",]
	n_embs_per_batch = 2
	max_seq_length = 512
	depth_transform = 6
	lr = 5e-5
	bs = 64
	save_every = 400
	epochs = 5.0
	dataset = "nomic_unsupervised"
	max_grad_norm = 10.0
	gradient_accumulation_steps = 1
	loss_coefficient_vsp = 0
	loss_coefficient_contrastive = 1
	loss_coefficient_trans = 1
	loss_coefficient_cc = 0
	eval_steps = 99999999999999999
	cluster_size = 1024
	cluster_strategy = "cluster_within_domain"
	warmup_steps = 100
	wandb_project = "edx-2"
	wandb_name = "dit-pretrain-md-3"
	save_dir = "checkpoints/{}/"
	state_dict_dir = "checkpoints/dit-pretrain-md-2/model.pt"
	num_params = 1140467800