|
|
--- |
|
|
library_name: transformers |
|
|
tags: |
|
|
- unsloth |
|
|
datasets: |
|
|
- amityco/tau-bench-retail-train-next-action-medium |
|
|
--- |
|
|
``` |
|
|
|
|
|
tau train medium 100 sample |
|
|
|
|
|
β π Average Reward: 0.5175 β |
|
|
β β |
|
|
β π Pass^k Metrics: β |
|
|
β k=1: 0.518 β |
|
|
β k=2: 0.421 |
|
|
|
|
|
tau train medium 200 sample |
|
|
|
|
|
β π Average Reward: 0.5526 β |
|
|
β β |
|
|
β π Pass^k Metrics: β |
|
|
β k=1: 0.553 β |
|
|
β k=2: 0.482 |
|
|
|
|
|
tau train medium 300 sample |
|
|
|
|
|
β π Average Reward: 0.5175 β |
|
|
β β |
|
|
β π Pass^k Metrics: β |
|
|
β k=1: 0.518 β |
|
|
β k=2: 0.421 |
|
|
``` |
|
|
|
|
|
 |
|
|
|
|
|
 |