|
|
|
|
|
import os |
|
|
import sys |
|
|
import json |
|
|
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) |
|
|
|
|
|
from dotenv import load_dotenv |
|
|
load_dotenv('.1.env') |
|
|
|
|
|
import docker |
|
|
from fiber.logging_utils import get_logger |
|
|
|
|
|
logger = get_logger(__name__) |
|
|
|
|
|
|
|
|
TASK_ID = "3ddab764-6692-4707-ab16-68dc1980dda7" |
|
|
EXPECTED_REPO = "5e94aaaf-6210-4fba-b675-2b9158a38c11" |
|
|
HOURS_TO_COMPLETE = 8 |
|
|
MODEL = "unsloth/llama-3-8b" |
|
|
|
|
|
|
|
|
dataset_path = f"/tmp/{TASK_ID}_data.json" |
|
|
if not os.path.exists(dataset_path): |
|
|
logger.error(f"Dataset not found! Downloading...") |
|
|
import subprocess |
|
|
download_url = "https://gradients.s3.eu-north-1.amazonaws.com/b78963b5b5728cf8_train_data.json?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAVVZOOA7SA4UOFLPI%2F20250601%2Feu-north-1%2Fs3%2Faws4_request&X-Amz-Date=20250601T125454Z&X-Amz-Expires=604800&X-Amz-SignedHeaders=host&X-Amz-Signature=41ed3471ee222560fedc10b2116d2ebdc4654fd72684f1dee9b2ae262f921d3f" |
|
|
subprocess.run(['wget', '-O', dataset_path, download_url]) |
|
|
|
|
|
|
|
|
with open(dataset_path, 'r') as f: |
|
|
data = json.load(f) |
|
|
dataset_size = len(data) |
|
|
logger.info(f"Dataset size: {dataset_size:,} samples") |
|
|
|
|
|
|
|
|
config_content = f"""base_model: {MODEL} |
|
|
model_type: AutoModelForCausalLM |
|
|
tokenizer_type: AutoTokenizer |
|
|
|
|
|
load_in_8bit: false |
|
|
load_in_4bit: false |
|
|
strict: false |
|
|
|
|
|
datasets: |
|
|
- path: /workspace/input_data/ |
|
|
ds_type: json |
|
|
data_files: |
|
|
- {TASK_ID}_data.json |
|
|
split: train |
|
|
|
|
|
val_set_size: 0.01 |
|
|
output_dir: outputs |
|
|
|
|
|
rl: grpo |
|
|
|
|
|
trl: |
|
|
beta: 0.0 |
|
|
max_completion_length: 384 |
|
|
use_vllm: True |
|
|
num_generations: 2 # ИСПРАВЛЕНО: уменьшено с 4 до 2 |
|
|
vllm_batch_size: 256 |
|
|
reward_funcs: |
|
|
- rewards_{TASK_ID}.reward_func_general |
|
|
reward_weights: |
|
|
- 1.0 |
|
|
|
|
|
sequence_len: 512 |
|
|
sample_packing: false |
|
|
pad_to_sequence_len: true |
|
|
trust_remote_code: true |
|
|
|
|
|
adapter: lora |
|
|
lora_r: 64 |
|
|
lora_alpha: 128 |
|
|
lora_dropout: 0.05 |
|
|
lora_target_linear: true |
|
|
lora_modules_to_save: ["embed_tokens", "lm_head"] |
|
|
|
|
|
gradient_accumulation_steps: 16 |
|
|
micro_batch_size: 2 # ИСПРАВЛЕНО: увеличено с 1 до 2 (должно быть кратно num_generations) |
|
|
eval_batch_size: 2 # ИСПРАВЛЕНО: добавлено явное значение |
|
|
num_epochs: 1 |
|
|
optimizer: paged_adamw_8bit |
|
|
lr_scheduler: cosine |
|
|
learning_rate: 5e-6 |
|
|
|
|
|
train_on_inputs: false |
|
|
group_by_length: true |
|
|
bf16: true |
|
|
tf32: true |
|
|
|
|
|
gradient_checkpointing: true |
|
|
logging_steps: 100 |
|
|
|
|
|
# Только один attention механизм |
|
|
flash_attention: true |
|
|
xformers_attention: false # ИСПРАВЛЕНО: отключаем чтобы не конфликтовал |
|
|
|
|
|
wandb_project: GOD-GRPO |
|
|
wandb_entity: |
|
|
wandb_mode: online |
|
|
wandb_runid: {TASK_ID} |
|
|
wandb_name: grpo_{TASK_ID} |
|
|
|
|
|
hub_model_id: {os.getenv('HUGGINGFACE_USERNAME')}/{EXPECTED_REPO} |
|
|
hub_strategy: checkpoint |
|
|
|
|
|
save_steps: 1000 |
|
|
save_strategy: steps |
|
|
warmup_ratio: 0.03 |
|
|
eval_steps: 2000 |
|
|
evals_per_epoch: 2 # ИСПРАВЛЕНО: уменьшено для экономии времени |
|
|
""" |
|
|
|
|
|
|
|
|
config_path = f"core/config/{TASK_ID}.yml" |
|
|
with open(config_path, 'w') as f: |
|
|
f.write(config_content) |
|
|
logger.info(f"✅ Config saved to {config_path}") |
|
|
|
|
|
|
|
|
reward_func = '''def reward_func_general(completions, prompts=None, **kwargs): |
|
|
"""General purpose reward function for GRPO""" |
|
|
rewards = [] |
|
|
|
|
|
for completion in completions: |
|
|
score = 0.0 |
|
|
|
|
|
# Длина |
|
|
length = len(completion) |
|
|
if 100 < length < 1000: |
|
|
score += 0.3 |
|
|
elif 50 < length <= 100: |
|
|
score += 0.2 |
|
|
elif length > 1000: |
|
|
score += 0.1 |
|
|
|
|
|
# Структура |
|
|
if '.' in completion or '!' in completion or '?' in completion: |
|
|
score += 0.2 |
|
|
|
|
|
# Слова |
|
|
word_count = len(completion.split()) |
|
|
if word_count > 20: |
|
|
score += 0.3 |
|
|
elif word_count > 10: |
|
|
score += 0.2 |
|
|
|
|
|
# Параграфы |
|
|
if '\\n' in completion: |
|
|
score += 0.2 |
|
|
|
|
|
# Финальная нормализация |
|
|
rewards.append(min(1.0, max(0.0, score))) |
|
|
|
|
|
return rewards |
|
|
''' |
|
|
|
|
|
|
|
|
reward_path = f"core/config/rewards_{TASK_ID}.py" |
|
|
with open(reward_path, 'w') as f: |
|
|
f.write(reward_func) |
|
|
logger.info(f"✅ Reward function saved") |
|
|
|
|
|
|
|
|
docker_client = docker.from_env() |
|
|
try: |
|
|
old = docker_client.containers.get(f"grpo_{TASK_ID}") |
|
|
old.stop() |
|
|
old.remove(force=True) |
|
|
logger.info(f"🗑️ Removed old container") |
|
|
except: |
|
|
pass |
|
|
|
|
|
|
|
|
bash_command = f""" |
|
|
echo '=== GRPO Training FIXED VERSION ===' && |
|
|
echo 'Model: {MODEL}' && |
|
|
echo 'Dataset: {dataset_size:,} samples' && |
|
|
echo 'Time limit: {HOURS_TO_COMPLETE} hours' && |
|
|
echo 'Micro batch size: 2, Generations: 2' && |
|
|
echo '' && |
|
|
echo 'Setting up Hugging Face...' && |
|
|
huggingface-cli login --token $HUGGINGFACE_TOKEN --add-to-git-credential && |
|
|
echo 'Setting up W&B...' && |
|
|
wandb login $WANDB_TOKEN && |
|
|
echo 'Copying dataset...' && |
|
|
cp /workspace/input_data/{TASK_ID}_data.json /workspace/axolotl/ && |
|
|
echo 'Copying reward function...' && |
|
|
mkdir -p /workspace/axolotl/src && |
|
|
cp /workspace/axolotl/configs/rewards_{TASK_ID}.py /workspace/axolotl/src/ && |
|
|
echo 'Validating config...' && |
|
|
python -c " |
|
|
import yaml |
|
|
with open('configs/{TASK_ID}.yml') as f: |
|
|
config = yaml.safe_load(f) |
|
|
print(f'Micro batch size: {{config[\"micro_batch_size\"]}}') |
|
|
print(f'Eval batch size: {{config.get(\"eval_batch_size\", \"default\")}}') |
|
|
print(f'Num generations: {{config[\"trl\"][\"num_generations\"]}}') |
|
|
print(f'Flash attention: {{config[\"flash_attention\"]}}') |
|
|
print('Config validation passed!') |
|
|
" && |
|
|
echo 'Starting training...' && |
|
|
cd /workspace/axolotl && |
|
|
accelerate launch -m axolotl.cli.train configs/{TASK_ID}.yml |
|
|
""" |
|
|
|
|
|
|
|
|
logger.info("🚀 FIXED VERSION - Starting GRPO training!") |
|
|
|
|
|
container = docker_client.containers.run( |
|
|
image="axolotlai/axolotl:main-py3.11-cu124-2.5.1", |
|
|
environment={ |
|
|
"HUGGINGFACE_TOKEN": os.getenv("HUGGINGFACE_TOKEN"), |
|
|
"WANDB_TOKEN": os.getenv("WANDB_TOKEN"), |
|
|
"PYTORCH_CUDA_ALLOC_CONF": "max_split_size_mb:512", |
|
|
"CUDA_LAUNCH_BLOCKING": "0", |
|
|
}, |
|
|
volumes={ |
|
|
os.path.abspath("core/config"): { |
|
|
"bind": "/workspace/axolotl/configs", |
|
|
"mode": "rw", |
|
|
}, |
|
|
os.path.abspath("core/outputs"): { |
|
|
"bind": "/workspace/axolotl/outputs", |
|
|
"mode": "rw", |
|
|
}, |
|
|
"/tmp": { |
|
|
"bind": "/workspace/input_data", |
|
|
"mode": "ro", |
|
|
} |
|
|
}, |
|
|
runtime="nvidia", |
|
|
device_requests=[docker.types.DeviceRequest(count=1, capabilities=[["gpu"]])], |
|
|
detach=True, |
|
|
tty=True, |
|
|
command=["/bin/bash", "-c", bash_command], |
|
|
shm_size="64g", |
|
|
ulimits=[ |
|
|
docker.types.Ulimit(name='memlock', soft=-1, hard=-1), |
|
|
docker.types.Ulimit(name='stack', soft=67108864, hard=67108864), |
|
|
], |
|
|
name=f"grpo_{TASK_ID}" |
|
|
) |
|
|
|
|
|
logger.info(f"✅ Container started: {container.name}") |
|
|
logger.info(f"📋 Monitor: docker logs -f grpo_{TASK_ID}") |
|
|
logger.info("🔧 Key fixes applied:") |
|
|
logger.info(" - micro_batch_size: 1 → 2") |
|
|
logger.info(" - eval_batch_size: added explicit value 2") |
|
|
logger.info(" - num_generations: 4 → 2") |
|
|
logger.info(" - xformers_attention: disabled to avoid conflicts") |
|
|
logger.info(" - Added config validation step") |
|
|
|