ljvmiranda921 commited on
Commit
a38a5b4
·
1 Parent(s): 6a4841e

Add results parser

Browse files
Files changed (1) hide show
  1. parse_results.py +76 -0
parse_results.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Parse all results and upload it into a single leaderboard"""
2
+
3
+ import argparse
4
+ import os
5
+ import sys
6
+ import logging
7
+ import json
8
+ from typing import Any
9
+
10
+ import pandas as pd
11
+ from datasets import load_dataset, Dataset, DownloadMode
12
+ from huggingface_hub import list_datasets
13
+
14
+ logging.basicConfig(
15
+ format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
16
+ datefmt="%Y-%m-%d %H:%M:%S",
17
+ handlers=[logging.StreamHandler(sys.stdout)],
18
+ level=logging.INFO,
19
+ )
20
+
21
+
22
+ def get_args():
23
+ # fmt: off
24
+ parser = argparse.ArgumentParser(description="Parse all results from datasets of a given HF org, and upload it into a new dataset.")
25
+ parser.add_argument("--hf_org", type=str, default="UD-Filipino", help="HuggingFace org to parse results from.")
26
+ parser.add_argument("--hf_repo_output", type=str, default="UD-Filipino/filbench-results", help="HuggingFace dataset to upload all parsed results.")
27
+ # fmt: on
28
+ return parser.parse_args()
29
+
30
+
31
+ def main():
32
+ args = get_args()
33
+ if not os.getenv("HF_TOKEN"):
34
+ raise ValueError("HF_TOKEN environment variable not set!")
35
+
36
+ # List datasets with 'details' in their name within a given org
37
+ datasets = [ds.id for ds in list_datasets(search="details", author=args.hf_org)]
38
+ logging.info(f"Found {len(datasets)} datasets")
39
+ parsed_results = pd.DataFrame([parse_outputs(dataset) for dataset in datasets])
40
+ logging.info(f"Uploading to {args.hf_repo_output}")
41
+ Dataset.from_pandas(parsed_results).push_to_hub(repo_id=args.hf_repo_output, private=True, split="train")
42
+
43
+
44
+ def parse_outputs(dataset_id: str) -> dict[str, Any]:
45
+ """Parse a dataset ID and output a dataframe containing the relevant fields
46
+
47
+ Based from: https://huggingface.co/docs/lighteval/en/saving-and-reading-results
48
+ """
49
+ logging.info(f"Parsing results from dataset {dataset_id}")
50
+ ds = load_dataset(dataset_id, "results", trust_remote_code=True, download_mode=DownloadMode.FORCE_REDOWNLOAD)
51
+
52
+ # Save all metrics and versions for each task
53
+ metrics = {}
54
+ versions = {}
55
+ for run in ds.keys():
56
+ df = ds[run].to_pandas()
57
+ for task, result in json.loads(df.results.iloc[0]).items():
58
+ if task != "all":
59
+ metrics[task] = result
60
+
61
+ versions.update(json.loads(df.versions.iloc[0]))
62
+
63
+ logging.info(f"Found {len(metrics)} tasks!")
64
+
65
+ latest_config = json.loads(ds["latest"].to_pandas().config_general.iloc[0])
66
+ model_config = {
67
+ "model_name": latest_config.get("model_name"),
68
+ "model_dtype": latest_config.get("model_dtype"),
69
+ "model_size": latest_config.get("model_size"),
70
+ }
71
+
72
+ return {"config": model_config, "results": metrics, "versions": versions}
73
+
74
+
75
+ if __name__ == "__main__":
76
+ main()