seawolf2357 commited on
Commit
23f9fc2
Β·
verified Β·
1 Parent(s): c381ead

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -5
app.py CHANGED
@@ -48,7 +48,8 @@ from huggingface_hub import HfApi, create_repo
48
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
49
  NUM_GPUS = torch.cuda.device_count()
50
 
51
- STORAGE_PATH = "/data"
 
52
  DB_PATH = f"{STORAGE_PATH}/phoenix_experiments.db"
53
  MODELS_PATH = f"{STORAGE_PATH}/phoenix_models"
54
  DEFAULT_MODEL = "Qwen/Qwen3-0.6B"
@@ -56,8 +57,19 @@ DEFAULT_MODEL = "Qwen/Qwen3-0.6B"
56
  # HuggingFace Token
57
  HF_TOKEN = os.getenv("HF_TOKEN")
58
 
59
- Path(STORAGE_PATH).mkdir(parents=True, exist_ok=True)
60
- Path(MODELS_PATH).mkdir(parents=True, exist_ok=True)
 
 
 
 
 
 
 
 
 
 
 
61
 
62
  print(f"πŸ”₯ PHOENIX Platform v2.0 - Multi-GPU Optimized")
63
  print(f"πŸ’Ύ Storage: {STORAGE_PATH}")
@@ -486,12 +498,16 @@ def finetune_retention_model(
486
  num_steps: int = 3000,
487
  batch_size: int = 4,
488
  learning_rate: float = 1e-5,
489
- output_dir: str = "/data/finetuning_temp",
490
  use_gradient_checkpointing: bool = True,
491
  ):
492
  """
493
  πŸ†• v2.0: Brumby-style Retraining with Multi-GPU Support
494
  """
 
 
 
 
495
  print("\n" + "="*80)
496
  print("πŸ”₯ PHOENIX RETRAINING - Multi-GPU (v2.0)")
497
  print("="*80)
@@ -1513,5 +1529,34 @@ with gr.Blocks(title="πŸ”₯ PHOENIX v2.0 Multi-GPU", theme=gr.themes.Soft()) as d
1513
 
1514
 
1515
  if __name__ == "__main__":
 
 
 
 
 
 
 
 
1516
  demo.queue(max_size=20)
1517
- demo.launch(server_name="0.0.0.0", server_port=7860, share=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
49
  NUM_GPUS = torch.cuda.device_count()
50
 
51
+ # βœ… μš°λΆ„νˆ¬ ν˜Έν™˜: ν™ˆ 디렉토리 λ˜λŠ” ν™˜κ²½ λ³€μˆ˜ μ‚¬μš©
52
+ STORAGE_PATH = os.getenv("PHOENIX_STORAGE_PATH", str(Path.home() / "phoenix_data"))
53
  DB_PATH = f"{STORAGE_PATH}/phoenix_experiments.db"
54
  MODELS_PATH = f"{STORAGE_PATH}/phoenix_models"
55
  DEFAULT_MODEL = "Qwen/Qwen3-0.6B"
 
57
  # HuggingFace Token
58
  HF_TOKEN = os.getenv("HF_TOKEN")
59
 
60
+ # 디렉토리 생성 (κΆŒν•œ 였λ₯˜ 처리)
61
+ try:
62
+ Path(STORAGE_PATH).mkdir(parents=True, exist_ok=True)
63
+ Path(MODELS_PATH).mkdir(parents=True, exist_ok=True)
64
+ print(f"βœ… Storage initialized: {STORAGE_PATH}")
65
+ except PermissionError:
66
+ print(f"⚠️ Permission denied for {STORAGE_PATH}")
67
+ print(f" Using current directory instead")
68
+ STORAGE_PATH = "./phoenix_data"
69
+ DB_PATH = f"{STORAGE_PATH}/phoenix_experiments.db"
70
+ MODELS_PATH = f"{STORAGE_PATH}/phoenix_models"
71
+ Path(STORAGE_PATH).mkdir(parents=True, exist_ok=True)
72
+ Path(MODELS_PATH).mkdir(parents=True, exist_ok=True)
73
 
74
  print(f"πŸ”₯ PHOENIX Platform v2.0 - Multi-GPU Optimized")
75
  print(f"πŸ’Ύ Storage: {STORAGE_PATH}")
 
498
  num_steps: int = 3000,
499
  batch_size: int = 4,
500
  learning_rate: float = 1e-5,
501
+ output_dir: str = None,
502
  use_gradient_checkpointing: bool = True,
503
  ):
504
  """
505
  πŸ†• v2.0: Brumby-style Retraining with Multi-GPU Support
506
  """
507
+ # output_dir κΈ°λ³Έκ°’ μ„€μ •
508
+ if output_dir is None:
509
+ output_dir = f"{STORAGE_PATH}/finetuning_temp"
510
+
511
  print("\n" + "="*80)
512
  print("πŸ”₯ PHOENIX RETRAINING - Multi-GPU (v2.0)")
513
  print("="*80)
 
1529
 
1530
 
1531
  if __name__ == "__main__":
1532
+ import argparse
1533
+
1534
+ parser = argparse.ArgumentParser(description='PHOENIX v2.0 Multi-GPU')
1535
+ parser.add_argument('--port', type=int, default=None, help='Server port (default: auto find 7860-7960)')
1536
+ parser.add_argument('--share', action='store_true', help='Create public Gradio link')
1537
+ parser.add_argument('--host', type=str, default="0.0.0.0", help='Server host')
1538
+ args = parser.parse_args()
1539
+
1540
  demo.queue(max_size=20)
1541
+
1542
+ # 포트 μžλ™ μ°ΎκΈ°
1543
+ if args.port is None:
1544
+ # 7860λΆ€ν„° 7960κΉŒμ§€ μ‹œλ„
1545
+ for port in range(7860, 7960):
1546
+ try:
1547
+ demo.launch(
1548
+ server_name=args.host,
1549
+ server_port=port,
1550
+ share=args.share,
1551
+ show_error=True
1552
+ )
1553
+ break
1554
+ except OSError:
1555
+ continue
1556
+ else:
1557
+ demo.launch(
1558
+ server_name=args.host,
1559
+ server_port=args.port,
1560
+ share=args.share,
1561
+ show_error=True
1562
+ )