#!/usr/bin/env python3 import os import sys import json sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from dotenv import load_dotenv load_dotenv('.1.env') import docker from fiber.logging_utils import get_logger logger = get_logger(__name__) # ПАРАМЕТРЫ ЗАДАЧИ TASK_ID = "3ddab764-6692-4707-ab16-68dc1980dda7" EXPECTED_REPO = "5e94aaaf-6210-4fba-b675-2b9158a38c11" HOURS_TO_COMPLETE = 8 MODEL = "unsloth/llama-3-8b" # Проверяем датасет dataset_path = f"/tmp/{TASK_ID}_data.json" if not os.path.exists(dataset_path): logger.error(f"Dataset not found! Downloading...") import subprocess download_url = "https://gradients.s3.eu-north-1.amazonaws.com/b78963b5b5728cf8_train_data.json?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAVVZOOA7SA4UOFLPI%2F20250601%2Feu-north-1%2Fs3%2Faws4_request&X-Amz-Date=20250601T125454Z&X-Amz-Expires=604800&X-Amz-SignedHeaders=host&X-Amz-Signature=41ed3471ee222560fedc10b2116d2ebdc4654fd72684f1dee9b2ae262f921d3f" subprocess.run(['wget', '-O', dataset_path, download_url]) # Анализируем датасет with open(dataset_path, 'r') as f: data = json.load(f) dataset_size = len(data) logger.info(f"Dataset size: {dataset_size:,} samples") # ИСПРАВЛЕННЫЙ КОНФИГ - решаем проблему с batch size и generations config_content = f"""base_model: {MODEL} model_type: AutoModelForCausalLM tokenizer_type: AutoTokenizer load_in_8bit: false load_in_4bit: false strict: false datasets: - path: /workspace/input_data/ ds_type: json data_files: - {TASK_ID}_data.json split: train val_set_size: 0.01 output_dir: outputs rl: grpo trl: beta: 0.0 max_completion_length: 384 use_vllm: True num_generations: 2 # ИСПРАВЛЕНО: уменьшено с 4 до 2 vllm_batch_size: 256 reward_funcs: - rewards_{TASK_ID}.reward_func_general reward_weights: - 1.0 sequence_len: 512 sample_packing: false pad_to_sequence_len: true trust_remote_code: true adapter: lora lora_r: 64 lora_alpha: 128 lora_dropout: 0.05 lora_target_linear: true lora_modules_to_save: ["embed_tokens", "lm_head"] gradient_accumulation_steps: 16 micro_batch_size: 2 # ИСПРАВЛЕНО: увеличено с 1 до 2 (должно быть кратно num_generations) eval_batch_size: 2 # ИСПРАВЛЕНО: добавлено явное значение num_epochs: 1 optimizer: paged_adamw_8bit lr_scheduler: cosine learning_rate: 5e-6 train_on_inputs: false group_by_length: true bf16: true tf32: true gradient_checkpointing: true logging_steps: 100 # Только один attention механизм flash_attention: true xformers_attention: false # ИСПРАВЛЕНО: отключаем чтобы не конфликтовал wandb_project: GOD-GRPO wandb_entity: wandb_mode: online wandb_runid: {TASK_ID} wandb_name: grpo_{TASK_ID} hub_model_id: {os.getenv('HUGGINGFACE_USERNAME')}/{EXPECTED_REPO} hub_strategy: checkpoint save_steps: 1000 save_strategy: steps warmup_ratio: 0.03 eval_steps: 2000 evals_per_epoch: 2 # ИСПРАВЛЕНО: уменьшено для экономии времени """ # Сохраняем конфиг config_path = f"core/config/{TASK_ID}.yml" with open(config_path, 'w') as f: f.write(config_content) logger.info(f"✅ Config saved to {config_path}") # REWARD ФУНКЦИЯ reward_func = '''def reward_func_general(completions, prompts=None, **kwargs): """General purpose reward function for GRPO""" rewards = [] for completion in completions: score = 0.0 # Длина length = len(completion) if 100 < length < 1000: score += 0.3 elif 50 < length <= 100: score += 0.2 elif length > 1000: score += 0.1 # Структура if '.' in completion or '!' in completion or '?' in completion: score += 0.2 # Слова word_count = len(completion.split()) if word_count > 20: score += 0.3 elif word_count > 10: score += 0.2 # Параграфы if '\\n' in completion: score += 0.2 # Финальная нормализация rewards.append(min(1.0, max(0.0, score))) return rewards ''' # Сохраняем reward функцию reward_path = f"core/config/rewards_{TASK_ID}.py" with open(reward_path, 'w') as f: f.write(reward_func) logger.info(f"✅ Reward function saved") # Очистка старых контейнеров docker_client = docker.from_env() try: old = docker_client.containers.get(f"grpo_{TASK_ID}") old.stop() old.remove(force=True) logger.info(f"🗑️ Removed old container") except: pass # КОМАНДА ЗАПУСКА bash_command = f""" echo '=== GRPO Training FIXED VERSION ===' && echo 'Model: {MODEL}' && echo 'Dataset: {dataset_size:,} samples' && echo 'Time limit: {HOURS_TO_COMPLETE} hours' && echo 'Micro batch size: 2, Generations: 2' && echo '' && echo 'Setting up Hugging Face...' && huggingface-cli login --token $HUGGINGFACE_TOKEN --add-to-git-credential && echo 'Setting up W&B...' && wandb login $WANDB_TOKEN && echo 'Copying dataset...' && cp /workspace/input_data/{TASK_ID}_data.json /workspace/axolotl/ && echo 'Copying reward function...' && mkdir -p /workspace/axolotl/src && cp /workspace/axolotl/configs/rewards_{TASK_ID}.py /workspace/axolotl/src/ && echo 'Validating config...' && python -c " import yaml with open('configs/{TASK_ID}.yml') as f: config = yaml.safe_load(f) print(f'Micro batch size: {{config[\"micro_batch_size\"]}}') print(f'Eval batch size: {{config.get(\"eval_batch_size\", \"default\")}}') print(f'Num generations: {{config[\"trl\"][\"num_generations\"]}}') print(f'Flash attention: {{config[\"flash_attention\"]}}') print('Config validation passed!') " && echo 'Starting training...' && cd /workspace/axolotl && accelerate launch -m axolotl.cli.train configs/{TASK_ID}.yml """ # Запускаем logger.info("🚀 FIXED VERSION - Starting GRPO training!") container = docker_client.containers.run( image="axolotlai/axolotl:main-py3.11-cu124-2.5.1", environment={ "HUGGINGFACE_TOKEN": os.getenv("HUGGINGFACE_TOKEN"), "WANDB_TOKEN": os.getenv("WANDB_TOKEN"), "PYTORCH_CUDA_ALLOC_CONF": "max_split_size_mb:512", "CUDA_LAUNCH_BLOCKING": "0", # Для лучшей производительности }, volumes={ os.path.abspath("core/config"): { "bind": "/workspace/axolotl/configs", "mode": "rw", }, os.path.abspath("core/outputs"): { "bind": "/workspace/axolotl/outputs", "mode": "rw", }, "/tmp": { "bind": "/workspace/input_data", "mode": "ro", } }, runtime="nvidia", device_requests=[docker.types.DeviceRequest(count=1, capabilities=[["gpu"]])], detach=True, tty=True, command=["/bin/bash", "-c", bash_command], shm_size="64g", ulimits=[ docker.types.Ulimit(name='memlock', soft=-1, hard=-1), docker.types.Ulimit(name='stack', soft=67108864, hard=67108864), ], name=f"grpo_{TASK_ID}" ) logger.info(f"✅ Container started: {container.name}") logger.info(f"📋 Monitor: docker logs -f grpo_{TASK_ID}") logger.info("🔧 Key fixes applied:") logger.info(" - micro_batch_size: 1 → 2") logger.info(" - eval_batch_size: added explicit value 2") logger.info(" - num_generations: 4 → 2") logger.info(" - xformers_attention: disabled to avoid conflicts") logger.info(" - Added config validation step")