{
  "language_pair": "sw-en",
  "language_name": "Swahili",
  "model_name": "Helsinki-NLP/opus-mt-mul-en",
  "dataset_config": {
    "primary_dataset": "custom",
    "custom_datasets": [
      {
        "path": "/home/brenda/chl_scratch/openchs_rnd/tasks/translation/swahili/dataset/nllb_fake.jsonl",
        "name": "ccaligned_swahili",
        "weight": 1,
        "priority": "medium"
      },
      {
        "path": "/home/brenda/chl_scratch/openchs_rnd/tasks/translation/swahili/dataset/cc_fake.jsonl",
        "name": "nllb_swahili",
        "weight": 0.7,
        "priority": "medium"
      }
    ],
    "validation_split": 0.1,
    "test_split": 0.05,
    "max_samples": null,
    "shuffle": true,
    "seed": 42,
    "max_length": 500,
    "filter_length_ratio": true,
    "max_length_ratio": 2.5,
    "min_length_ratio": 0.4
  },
  "training_config": {
    "learning_rate": 3e-05,
    "batch_size": 16,
    "gradient_accumulation_steps": 4,
    "num_epochs": 0.1,
    "max_length": 256,
    "weight_decay": 0.01,
    "warmup_steps": 0,
    "warmup_ratio": 0.1,
    "lr_scheduler": "cosine",
    "save_strategy": "steps",
    "save_steps": 1000,
    "eval_strategy": "steps",
    "eval_steps": 1000,
    "logging_steps": 50,
    "early_stopping_patience": 5,
    "early_stopping_threshold": 0.001,
    "mixed_precision": "fp16",
    "gradient_checkpointing": false,
    "dataloader_num_workers": 4,
    "load_best_model_at_end": true,
    "metric_for_best_model": "eval_bleu",
    "greater_is_better": true,
    "label_smoothing": 0.1,
    "max_grad_norm": 1.0,
    "adam_beta1": 0.9,
    "adam_beta2": 0.999,
    "adam_epsilon": 1e-08
  },
  "generation_config": {
    "max_length": 256,
    "min_length": 1,
    "num_beams": 5,
    "length_penalty": 0.6,
    "early_stopping": true,
    "no_repeat_ngram_size": 4,
    "repetition_penalty": 1.5,
    "do_sample": false
  },
  "segmentation_config": {
    "enabled": true,
    "max_segment_tokens": 450,
    "overlap_tokens": 100,
    "preserve_sentences": true,
    "min_segment_tokens": 50,
    "sentence_splitter": "nltk"
  },
  "hallucination_detection": {
    "enabled": true,
    "max_length_ratio": 2.5,
    "min_length_ratio": 0.4,
    "repetition_threshold": 0.3,
    "log_hallucination_scores": false
  },
  "evaluation_config": {
    "metrics": [
      "bleu",
      "chrf",
      "comet_qe"
    ],
    "compute_comet_during_training": false,
    "test_size": 1000,
    "max_eval_samples": 5000,
    "save_predictions": true,
    "predictions_file": "./predictions/sw-en-predictions.jsonl"
  },
  "mlflow_config": {
    "tracking_uri": "http://localhost:5000",
    "experiment_name": "swahili-english-translation",
    "run_name": "sw-en-nllb-ccaligned-production",
    "log_models": true,
    "log_artifacts": true,
    "log_metrics": true
  },
  "deployment": {
    "model_output_dir": "/home/brenda/chl_scratch/translation/models/finetuned-sw-en",
    "save_tokenizer": true,
    "save_config": true,
    "create_model_card": true,
    "push_to_hub": true,
    "hub_model_id": "brendaogutu/sw-en-translation-test-3"
  },
  "system_config": {
    "cache_dir": "/home/brenda/chl_scratch/translation/.cache",
    "use_cuda": true,
    "cuda_device": "cuda:0",
    "seed": 42,
    "deterministic": true,
    "num_workers": 4
  }
}