{ "language_pair": "sw-en", "language_name": "Swahili", "model_name": "Helsinki-NLP/opus-mt-mul-en", "dataset_config": { "primary_dataset": "custom", "custom_datasets": [ { "path": "/home/brenda/chl_scratch/openchs_rnd/tasks/translation/swahili/dataset/nllb_fake.jsonl", "name": "ccaligned_swahili", "weight": 1, "priority": "medium" }, { "path": "/home/brenda/chl_scratch/openchs_rnd/tasks/translation/swahili/dataset/cc_fake.jsonl", "name": "nllb_swahili", "weight": 0.7, "priority": "medium" } ], "validation_split": 0.1, "test_split": 0.05, "max_samples": null, "shuffle": true, "seed": 42, "max_length": 500, "filter_length_ratio": true, "max_length_ratio": 2.5, "min_length_ratio": 0.4 }, "training_config": { "learning_rate": 3e-05, "batch_size": 16, "gradient_accumulation_steps": 4, "num_epochs": 0.1, "max_length": 256, "weight_decay": 0.01, "warmup_steps": 0, "warmup_ratio": 0.1, "lr_scheduler": "cosine", "save_strategy": "steps", "save_steps": 1000, "eval_strategy": "steps", "eval_steps": 1000, "logging_steps": 50, "early_stopping_patience": 5, "early_stopping_threshold": 0.001, "mixed_precision": "fp16", "gradient_checkpointing": false, "dataloader_num_workers": 4, "load_best_model_at_end": true, "metric_for_best_model": "eval_bleu", "greater_is_better": true, "label_smoothing": 0.1, "max_grad_norm": 1.0, "adam_beta1": 0.9, "adam_beta2": 0.999, "adam_epsilon": 1e-08 }, "generation_config": { "max_length": 256, "min_length": 1, "num_beams": 5, "length_penalty": 0.6, "early_stopping": true, "no_repeat_ngram_size": 4, "repetition_penalty": 1.5, "do_sample": false }, "segmentation_config": { "enabled": true, "max_segment_tokens": 450, "overlap_tokens": 100, "preserve_sentences": true, "min_segment_tokens": 50, "sentence_splitter": "nltk" }, "hallucination_detection": { "enabled": true, "max_length_ratio": 2.5, "min_length_ratio": 0.4, "repetition_threshold": 0.3, "log_hallucination_scores": false }, "evaluation_config": { "metrics": [ "bleu", "chrf", "comet_qe" ], "compute_comet_during_training": false, "test_size": 1000, "max_eval_samples": 5000, "save_predictions": true, "predictions_file": "./predictions/sw-en-predictions.jsonl" }, "mlflow_config": { "tracking_uri": "http://localhost:5000", "experiment_name": "swahili-english-translation", "run_name": "sw-en-nllb-ccaligned-production", "log_models": true, "log_artifacts": true, "log_metrics": true }, "deployment": { "model_output_dir": "/home/brenda/chl_scratch/translation/models/finetuned-sw-en", "save_tokenizer": true, "save_config": true, "create_model_card": true, "push_to_hub": true, "hub_model_id": "brendaogutu/sw-en-translation-test-3" }, "system_config": { "cache_dir": "/home/brenda/chl_scratch/translation/.cache", "use_cuda": true, "cuda_device": "cuda:0", "seed": 42, "deterministic": true, "num_workers": 4 } }