dada22231's picture
Training in progress, step 20
540feec verified
raw
history blame
7.84 kB
#!/usr/bin/env python3
import os
import sys
import json
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from dotenv import load_dotenv
load_dotenv('.1.env')
import docker
from fiber.logging_utils import get_logger
logger = get_logger(__name__)
# ПАРАМЕТРЫ ЗАДАЧИ
TASK_ID = "3ddab764-6692-4707-ab16-68dc1980dda7"
EXPECTED_REPO = "5e94aaaf-6210-4fba-b675-2b9158a38c11"
HOURS_TO_COMPLETE = 8
MODEL = "unsloth/llama-3-8b"
# Проверяем датасет
dataset_path = f"/tmp/{TASK_ID}_data.json"
if not os.path.exists(dataset_path):
logger.error(f"Dataset not found! Downloading...")
import subprocess
download_url = "https://gradients.s3.eu-north-1.amazonaws.com/b78963b5b5728cf8_train_data.json?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAVVZOOA7SA4UOFLPI%2F20250601%2Feu-north-1%2Fs3%2Faws4_request&X-Amz-Date=20250601T125454Z&X-Amz-Expires=604800&X-Amz-SignedHeaders=host&X-Amz-Signature=41ed3471ee222560fedc10b2116d2ebdc4654fd72684f1dee9b2ae262f921d3f"
subprocess.run(['wget', '-O', dataset_path, download_url])
# Анализируем датасет
with open(dataset_path, 'r') as f:
data = json.load(f)
dataset_size = len(data)
logger.info(f"Dataset size: {dataset_size:,} samples")
# ИСПРАВЛЕННЫЙ КОНФИГ - решаем проблему с batch size и generations
config_content = f"""base_model: {MODEL}
model_type: AutoModelForCausalLM
tokenizer_type: AutoTokenizer
load_in_8bit: false
load_in_4bit: false
strict: false
datasets:
- path: /workspace/input_data/
ds_type: json
data_files:
- {TASK_ID}_data.json
split: train
val_set_size: 0.01
output_dir: outputs
rl: grpo
trl:
beta: 0.0
max_completion_length: 384
use_vllm: True
num_generations: 2 # ИСПРАВЛЕНО: уменьшено с 4 до 2
vllm_batch_size: 256
reward_funcs:
- rewards_{TASK_ID}.reward_func_general
reward_weights:
- 1.0
sequence_len: 512
sample_packing: false
pad_to_sequence_len: true
trust_remote_code: true
adapter: lora
lora_r: 64
lora_alpha: 128
lora_dropout: 0.05
lora_target_linear: true
lora_modules_to_save: ["embed_tokens", "lm_head"]
gradient_accumulation_steps: 16
micro_batch_size: 2 # ИСПРАВЛЕНО: увеличено с 1 до 2 (должно быть кратно num_generations)
eval_batch_size: 2 # ИСПРАВЛЕНО: добавлено явное значение
num_epochs: 1
optimizer: paged_adamw_8bit
lr_scheduler: cosine
learning_rate: 5e-6
train_on_inputs: false
group_by_length: true
bf16: true
tf32: true
gradient_checkpointing: true
logging_steps: 100
# Только один attention механизм
flash_attention: true
xformers_attention: false # ИСПРАВЛЕНО: отключаем чтобы не конфликтовал
wandb_project: GOD-GRPO
wandb_entity:
wandb_mode: online
wandb_runid: {TASK_ID}
wandb_name: grpo_{TASK_ID}
hub_model_id: {os.getenv('HUGGINGFACE_USERNAME')}/{EXPECTED_REPO}
hub_strategy: checkpoint
save_steps: 1000
save_strategy: steps
warmup_ratio: 0.03
eval_steps: 2000
evals_per_epoch: 2 # ИСПРАВЛЕНО: уменьшено для экономии времени
"""
# Сохраняем конфиг
config_path = f"core/config/{TASK_ID}.yml"
with open(config_path, 'w') as f:
f.write(config_content)
logger.info(f"✅ Config saved to {config_path}")
# REWARD ФУНКЦИЯ
reward_func = '''def reward_func_general(completions, prompts=None, **kwargs):
"""General purpose reward function for GRPO"""
rewards = []
for completion in completions:
score = 0.0
# Длина
length = len(completion)
if 100 < length < 1000:
score += 0.3
elif 50 < length <= 100:
score += 0.2
elif length > 1000:
score += 0.1
# Структура
if '.' in completion or '!' in completion or '?' in completion:
score += 0.2
# Слова
word_count = len(completion.split())
if word_count > 20:
score += 0.3
elif word_count > 10:
score += 0.2
# Параграфы
if '\\n' in completion:
score += 0.2
# Финальная нормализация
rewards.append(min(1.0, max(0.0, score)))
return rewards
'''
# Сохраняем reward функцию
reward_path = f"core/config/rewards_{TASK_ID}.py"
with open(reward_path, 'w') as f:
f.write(reward_func)
logger.info(f"✅ Reward function saved")
# Очистка старых контейнеров
docker_client = docker.from_env()
try:
old = docker_client.containers.get(f"grpo_{TASK_ID}")
old.stop()
old.remove(force=True)
logger.info(f"🗑️ Removed old container")
except:
pass
# КОМАНДА ЗАПУСКА
bash_command = f"""
echo '=== GRPO Training FIXED VERSION ===' &&
echo 'Model: {MODEL}' &&
echo 'Dataset: {dataset_size:,} samples' &&
echo 'Time limit: {HOURS_TO_COMPLETE} hours' &&
echo 'Micro batch size: 2, Generations: 2' &&
echo '' &&
echo 'Setting up Hugging Face...' &&
huggingface-cli login --token $HUGGINGFACE_TOKEN --add-to-git-credential &&
echo 'Setting up W&B...' &&
wandb login $WANDB_TOKEN &&
echo 'Copying dataset...' &&
cp /workspace/input_data/{TASK_ID}_data.json /workspace/axolotl/ &&
echo 'Copying reward function...' &&
mkdir -p /workspace/axolotl/src &&
cp /workspace/axolotl/configs/rewards_{TASK_ID}.py /workspace/axolotl/src/ &&
echo 'Validating config...' &&
python -c "
import yaml
with open('configs/{TASK_ID}.yml') as f:
config = yaml.safe_load(f)
print(f'Micro batch size: {{config[\"micro_batch_size\"]}}')
print(f'Eval batch size: {{config.get(\"eval_batch_size\", \"default\")}}')
print(f'Num generations: {{config[\"trl\"][\"num_generations\"]}}')
print(f'Flash attention: {{config[\"flash_attention\"]}}')
print('Config validation passed!')
" &&
echo 'Starting training...' &&
cd /workspace/axolotl &&
accelerate launch -m axolotl.cli.train configs/{TASK_ID}.yml
"""
# Запускаем
logger.info("🚀 FIXED VERSION - Starting GRPO training!")
container = docker_client.containers.run(
image="axolotlai/axolotl:main-py3.11-cu124-2.5.1",
environment={
"HUGGINGFACE_TOKEN": os.getenv("HUGGINGFACE_TOKEN"),
"WANDB_TOKEN": os.getenv("WANDB_TOKEN"),
"PYTORCH_CUDA_ALLOC_CONF": "max_split_size_mb:512",
"CUDA_LAUNCH_BLOCKING": "0", # Для лучшей производительности
},
volumes={
os.path.abspath("core/config"): {
"bind": "/workspace/axolotl/configs",
"mode": "rw",
},
os.path.abspath("core/outputs"): {
"bind": "/workspace/axolotl/outputs",
"mode": "rw",
},
"/tmp": {
"bind": "/workspace/input_data",
"mode": "ro",
}
},
runtime="nvidia",
device_requests=[docker.types.DeviceRequest(count=1, capabilities=[["gpu"]])],
detach=True,
tty=True,
command=["/bin/bash", "-c", bash_command],
shm_size="64g",
ulimits=[
docker.types.Ulimit(name='memlock', soft=-1, hard=-1),
docker.types.Ulimit(name='stack', soft=67108864, hard=67108864),
],
name=f"grpo_{TASK_ID}"
)
logger.info(f"✅ Container started: {container.name}")
logger.info(f"📋 Monitor: docker logs -f grpo_{TASK_ID}")
logger.info("🔧 Key fixes applied:")
logger.info(" - micro_batch_size: 1 → 2")
logger.info(" - eval_batch_size: added explicit value 2")
logger.info(" - num_generations: 4 → 2")
logger.info(" - xformers_attention: disabled to avoid conflicts")
logger.info(" - Added config validation step")