ljvmiranda921 commited on
Commit
b5b19aa
·
1 Parent(s): 814a536

Apply pre-commit and all fixes

Browse files
README.md CHANGED
@@ -1,6 +1,6 @@
1
  # HF Leaderboard Backend
2
 
3
- This is a fork of the [leaderboard demo from HuggingFace](https://huggingface.co/demo-leaderboard-backend) with some additional scripts for parsing the results from our evaluation runs.
4
 
5
  ## Set-up and installation
6
 
@@ -21,4 +21,4 @@ If you want to update the HuggingFace space, you should add a remote pointing to
21
  ```sh
22
  git remote add hf https://huggingface.co/spaces/UD-Filipino/filbench-leaderboard
23
  git push hf main
24
- ```
 
1
  # HF Leaderboard Backend
2
 
3
+ This is a fork of the [leaderboard demo from HuggingFace](https://huggingface.co/demo-leaderboard-backend) with some additional scripts for parsing the results from our evaluation runs.
4
 
5
  ## Set-up and installation
6
 
 
21
  ```sh
22
  git remote add hf https://huggingface.co/spaces/UD-Filipino/filbench-leaderboard
23
  git push hf main
24
+ ```
app.py CHANGED
@@ -1,38 +1,16 @@
1
  import gradio as gr
2
- import pandas as pd
3
  from apscheduler.schedulers.background import BackgroundScheduler
4
  from gradio_leaderboard import ColumnFilter, Leaderboard, SelectColumns
5
  from huggingface_hub import snapshot_download
6
 
7
- from src.about import (
8
- CITATION_BUTTON_LABEL,
9
- CITATION_BUTTON_TEXT,
10
- EVALUATION_QUEUE_TEXT,
11
- INTRODUCTION_TEXT,
12
- LLM_BENCHMARKS_TEXT,
13
- TITLE,
14
- )
15
  from src.display.css_html_js import custom_css
16
- from src.display.utils import (
17
- BENCHMARK_COLS,
18
- COLS,
19
- EVAL_COLS,
20
- EVAL_TYPES,
21
- AutoEvalColumn,
22
- ModelType,
23
- Precision,
24
- WeightType,
25
- fields,
26
- )
27
- from src.envs import (
28
- API,
29
- EVAL_REQUESTS_PATH,
30
- EVAL_RESULTS_PATH,
31
- QUEUE_REPO,
32
- REPO_ID,
33
- RESULTS_REPO,
34
- TOKEN,
35
- )
36
  from src.populate import get_evaluation_queue_df, get_leaderboard_df
37
  from src.submission.submit import add_new_eval
38
 
 
1
  import gradio as gr
 
2
  from apscheduler.schedulers.background import BackgroundScheduler
3
  from gradio_leaderboard import ColumnFilter, Leaderboard, SelectColumns
4
  from huggingface_hub import snapshot_download
5
 
6
+ from src.about import CITATION_BUTTON_LABEL, CITATION_BUTTON_TEXT
7
+ from src.about import EVALUATION_QUEUE_TEXT, INTRODUCTION_TEXT, LLM_BENCHMARKS_TEXT
8
+ from src.about import TITLE
 
 
 
 
 
9
  from src.display.css_html_js import custom_css
10
+ from src.display.utils import BENCHMARK_COLS, COLS, EVAL_COLS, EVAL_TYPES
11
+ from src.display.utils import AutoEvalColumn, ModelType, Precision, WeightType, fields
12
+ from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID
13
+ from src.envs import RESULTS_REPO, TOKEN
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  from src.populate import get_evaluation_queue_df, get_leaderboard_df
15
  from src.submission.submit import add_new_eval
16
 
requirements.txt CHANGED
@@ -3,14 +3,14 @@ black
3
  datasets
4
  gradio
5
  gradio[oauth]
6
- gradio_leaderboard==0.0.13
7
  gradio_client
 
8
  huggingface-hub>=0.18.0
9
  matplotlib
10
  numpy
11
  pandas
12
  python-dateutil
 
 
13
  tqdm
14
  transformers
15
- tokenizers>=0.15.0
16
- sentencepiece
 
3
  datasets
4
  gradio
5
  gradio[oauth]
 
6
  gradio_client
7
+ gradio_leaderboard==0.0.13
8
  huggingface-hub>=0.18.0
9
  matplotlib
10
  numpy
11
  pandas
12
  python-dateutil
13
+ sentencepiece
14
+ tokenizers>=0.15.0
15
  tqdm
16
  transformers
 
 
src/about.py CHANGED
@@ -30,7 +30,7 @@ Intro text
30
  """
31
 
32
  # Which evaluations are you running? how can people reproduce what you have?
33
- LLM_BENCHMARKS_TEXT = f"""
34
  ## How it works
35
 
36
  ## Reproducibility
 
30
  """
31
 
32
  # Which evaluations are you running? how can people reproduce what you have?
33
+ LLM_BENCHMARKS_TEXT = """
34
  ## How it works
35
 
36
  ## Reproducibility
src/display/css_html_js.py CHANGED
@@ -33,7 +33,7 @@ custom_css = """
33
  background: none;
34
  border: none;
35
  }
36
-
37
  #search-bar {
38
  padding: 0px;
39
  }
@@ -77,7 +77,7 @@ custom_css = """
77
  #filter_type label > .wrap{
78
  width: 103px;
79
  }
80
- #filter_type label > .wrap .wrap-inner{
81
  padding: 2px;
82
  }
83
  #filter_type label > .wrap .wrap-inner input{
 
33
  background: none;
34
  border: none;
35
  }
36
+
37
  #search-bar {
38
  padding: 0px;
39
  }
 
77
  #filter_type label > .wrap{
78
  width: 103px;
79
  }
80
+ #filter_type label > .wrap .wrap-inner{
81
  padding: 2px;
82
  }
83
  #filter_type label > .wrap .wrap-inner input{
src/display/utils.py CHANGED
@@ -1,8 +1,6 @@
1
  from dataclasses import dataclass, make_dataclass
2
  from enum import Enum
3
 
4
- import pandas as pd
5
-
6
  from src.about import Tasks
7
 
8
 
 
1
  from dataclasses import dataclass, make_dataclass
2
  from enum import Enum
3
 
 
 
4
  from src.about import Tasks
5
 
6
 
src/leaderboard/read_evals.py CHANGED
@@ -1,6 +1,5 @@
1
  import glob
2
  import json
3
- import math
4
  import os
5
  from dataclasses import dataclass
6
 
 
1
  import glob
2
  import json
 
3
  import os
4
  from dataclasses import dataclass
5
 
src/submission/check_validity.py CHANGED
@@ -1,8 +1,6 @@
1
  import json
2
  import os
3
- import re
4
  from collections import defaultdict
5
- from datetime import datetime, timedelta, timezone
6
 
7
  import huggingface_hub
8
  from huggingface_hub import ModelCard
@@ -53,7 +51,7 @@ def is_model_on_hub(
53
  )
54
  if test_tokenizer:
55
  try:
56
- tk = AutoTokenizer.from_pretrained(
57
  model_name,
58
  revision=revision,
59
  trust_remote_code=trust_remote_code,
@@ -68,7 +66,7 @@ def is_model_on_hub(
68
  except Exception as e:
69
  return (
70
  False,
71
- "'s tokenizer cannot be loaded. Is your tokenizer class in a stable transformers release, and correctly configured?",
72
  None,
73
  )
74
  return True, None, config
@@ -81,7 +79,7 @@ def is_model_on_hub(
81
  )
82
 
83
  except Exception as e:
84
- return False, "was not found on hub!", None
85
 
86
 
87
  def get_model_size(model_info: ModelInfo, precision: str):
 
1
  import json
2
  import os
 
3
  from collections import defaultdict
 
4
 
5
  import huggingface_hub
6
  from huggingface_hub import ModelCard
 
51
  )
52
  if test_tokenizer:
53
  try:
54
+ AutoTokenizer.from_pretrained(
55
  model_name,
56
  revision=revision,
57
  trust_remote_code=trust_remote_code,
 
66
  except Exception as e:
67
  return (
68
  False,
69
+ f"'s tokenizer cannot be loaded. Is your tokenizer class in a stable transformers release, and correctly configured?: {e}",
70
  None,
71
  )
72
  return True, None, config
 
79
  )
80
 
81
  except Exception as e:
82
+ return False, f"was not found on hub! {e}", None
83
 
84
 
85
  def get_model_size(model_info: ModelInfo, precision: str):
src/submission/submit.py CHANGED
@@ -4,12 +4,8 @@ from datetime import datetime, timezone
4
 
5
  from src.display.formatting import styled_error, styled_message, styled_warning
6
  from src.envs import API, EVAL_REQUESTS_PATH, QUEUE_REPO, TOKEN
7
- from src.submission.check_validity import (
8
- already_submitted_models,
9
- check_model_card,
10
- get_model_size,
11
- is_model_on_hub,
12
- )
13
 
14
  REQUESTED_MODELS = None
15
  USERS_TO_SUBMISSION_DATES = None
 
4
 
5
  from src.display.formatting import styled_error, styled_message, styled_warning
6
  from src.envs import API, EVAL_REQUESTS_PATH, QUEUE_REPO, TOKEN
7
+ from src.submission.check_validity import already_submitted_models, check_model_card
8
+ from src.submission.check_validity import get_model_size, is_model_on_hub
 
 
 
 
9
 
10
  REQUESTED_MODELS = None
11
  USERS_TO_SUBMISSION_DATES = None