Lj Miranda commited on
Commit
2a65969
·
unverified ·
2 Parent(s): 6a4841e f70bf3c

Merge pull request #1 from filbench/add/parser

Browse files
.github/parse_results_workflow.yaml ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Daily Parse Results
2
+
3
+ on:
4
+ schedule:
5
+ - cron: '0 0 * * *' # Runs at 00:00 UTC every day
6
+ workflow_dispatch: # Allows manual triggering of the workflow
7
+
8
+ jobs:
9
+ parse-results:
10
+ runs-on: ubuntu-latest
11
+
12
+ steps:
13
+ - name: Checkout repository
14
+ uses: actions/checkout@v3
15
+
16
+ - name: Set up Python
17
+ uses: actions/setup-python@v4
18
+ with:
19
+ python-version: '3.10' # Specify the Python version you need
20
+
21
+ - name: Install dependencies
22
+ run: |
23
+ python -m pip install --upgrade pip
24
+ pip install -r requirements.txt
25
+
26
+ - name: Run parse_results.py
27
+ env:
28
+ HF_TOKEN: ${{ secrets.HF_TOKEN }} # Pass the secret as an environment variable
29
+ run: |
30
+ python parse_results.py
parse_results.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Parse all results and upload it into a single leaderboard"""
2
+
3
+ import argparse
4
+ import os
5
+ import sys
6
+ import logging
7
+ import json
8
+ from typing import Any
9
+
10
+ import pandas as pd
11
+ from datasets import load_dataset, Dataset, DownloadMode
12
+ from huggingface_hub import list_datasets
13
+
14
+ logging.basicConfig(
15
+ format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
16
+ datefmt="%Y-%m-%d %H:%M:%S",
17
+ handlers=[logging.StreamHandler(sys.stdout)],
18
+ level=logging.INFO,
19
+ )
20
+
21
+
22
+ def get_args():
23
+ # fmt: off
24
+ parser = argparse.ArgumentParser(description="Parse all results from datasets of a given HF org, and upload it into a new dataset.")
25
+ parser.add_argument("--hf_org", type=str, default="UD-Filipino", help="HuggingFace org to parse results from.")
26
+ parser.add_argument("--hf_repo_output", type=str, default="UD-Filipino/filbench-results", help="HuggingFace dataset to upload all parsed results.")
27
+ # fmt: on
28
+ return parser.parse_args()
29
+
30
+
31
+ def main():
32
+ args = get_args()
33
+ if not os.getenv("HF_TOKEN"):
34
+ raise ValueError("HF_TOKEN environment variable not set!")
35
+
36
+ # List datasets with 'details' in their name within a given org
37
+ datasets = [ds.id for ds in list_datasets(search="details", author=args.hf_org)]
38
+ logging.info(f"Found {len(datasets)} datasets")
39
+ parsed_results = pd.DataFrame([parse_outputs(dataset) for dataset in datasets])
40
+ logging.info(f"Uploading to {args.hf_repo_output}")
41
+ Dataset.from_pandas(parsed_results).push_to_hub(repo_id=args.hf_repo_output, private=True, split="train")
42
+
43
+
44
+ def parse_outputs(dataset_id: str) -> dict[str, Any]:
45
+ """Parse a dataset ID and output a dataframe containing the relevant fields
46
+
47
+ Based from: https://huggingface.co/docs/lighteval/en/saving-and-reading-results
48
+ """
49
+ logging.info(f"Parsing results from dataset {dataset_id}")
50
+ ds = load_dataset(dataset_id, "results", trust_remote_code=True, download_mode=DownloadMode.FORCE_REDOWNLOAD)
51
+
52
+ # Save all metrics and versions for each task
53
+ metrics = {}
54
+ versions = {}
55
+ for run in ds.keys():
56
+ df = ds[run].to_pandas()
57
+ for task, result in json.loads(df.results.iloc[0]).items():
58
+ if task != "all":
59
+ metrics[task] = result
60
+
61
+ versions.update(json.loads(df.versions.iloc[0]))
62
+
63
+ logging.info(f"Found {len(metrics)} tasks!")
64
+
65
+ latest_config = json.loads(ds["latest"].to_pandas().config_general.iloc[0])
66
+ model_config = {
67
+ "model_name": latest_config.get("model_name"),
68
+ "model_dtype": latest_config.get("model_dtype"),
69
+ "model_size": latest_config.get("model_size"),
70
+ }
71
+
72
+ return {"config": model_config, "results": metrics, "versions": versions}
73
+
74
+
75
+ if __name__ == "__main__":
76
+ main()