Spaces:

filbench
/

filbench-leaderboard

Running

ljvmiranda921 commited on Mar 8

Commit

b5b19aa

1 Parent(s): 814a536

Apply pre-commit and all fixes

Files changed (9) hide show

README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 # HF Leaderboard Backend
-This is a fork of the [leaderboard demo from HuggingFace](https://huggingface.co/demo-leaderboard-backend) with some additional scripts for parsing the results from our evaluation runs.
 ## Set-up and installation
@@ -21,4 +21,4 @@ If you want to update the HuggingFace space, you should add a remote pointing to
 ```sh
 git remote add hf https://huggingface.co/spaces/UD-Filipino/filbench-leaderboard
 git push hf main
-```

 # HF Leaderboard Backend
+This is a fork of the [leaderboard demo from HuggingFace](https://huggingface.co/demo-leaderboard-backend) with some additional scripts for parsing the results from our evaluation runs.
 ## Set-up and installation
 ```sh
 git remote add hf https://huggingface.co/spaces/UD-Filipino/filbench-leaderboard
 git push hf main
+```

app.py CHANGED Viewed

@@ -1,38 +1,16 @@
 import gradio as gr
-import pandas as pd
 from apscheduler.schedulers.background import BackgroundScheduler
 from gradio_leaderboard import ColumnFilter, Leaderboard, SelectColumns
 from huggingface_hub import snapshot_download
-from src.about import (
-    CITATION_BUTTON_LABEL,
-    CITATION_BUTTON_TEXT,
-    EVALUATION_QUEUE_TEXT,
-    INTRODUCTION_TEXT,
-    LLM_BENCHMARKS_TEXT,
-    TITLE,
-)
 from src.display.css_html_js import custom_css
-from src.display.utils import (
-    BENCHMARK_COLS,
-    COLS,
-    EVAL_COLS,
-    EVAL_TYPES,
-    AutoEvalColumn,
-    ModelType,
-    Precision,
-    WeightType,
-    fields,
-)
-from src.envs import (
-    API,
-    EVAL_REQUESTS_PATH,
-    EVAL_RESULTS_PATH,
-    QUEUE_REPO,
-    REPO_ID,
-    RESULTS_REPO,
-    TOKEN,
-)
 from src.populate import get_evaluation_queue_df, get_leaderboard_df
 from src.submission.submit import add_new_eval

 import gradio as gr
 from apscheduler.schedulers.background import BackgroundScheduler
 from gradio_leaderboard import ColumnFilter, Leaderboard, SelectColumns
 from huggingface_hub import snapshot_download
+from src.about import CITATION_BUTTON_LABEL, CITATION_BUTTON_TEXT
+from src.about import EVALUATION_QUEUE_TEXT, INTRODUCTION_TEXT, LLM_BENCHMARKS_TEXT
+from src.about import TITLE
 from src.display.css_html_js import custom_css
+from src.display.utils import BENCHMARK_COLS, COLS, EVAL_COLS, EVAL_TYPES
+from src.display.utils import AutoEvalColumn, ModelType, Precision, WeightType, fields
+from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID
+from src.envs import RESULTS_REPO, TOKEN
 from src.populate import get_evaluation_queue_df, get_leaderboard_df
 from src.submission.submit import add_new_eval

requirements.txt CHANGED Viewed

@@ -3,14 +3,14 @@ black
 datasets
 gradio
 gradio[oauth]
-gradio_leaderboard==0.0.13
 gradio_client
 huggingface-hub>=0.18.0
 matplotlib
 numpy
 pandas
 python-dateutil
 tqdm
 transformers
-tokenizers>=0.15.0
-sentencepiece

 datasets
 gradio
 gradio[oauth]
 gradio_client
+gradio_leaderboard==0.0.13
 huggingface-hub>=0.18.0
 matplotlib
 numpy
 pandas
 python-dateutil
+sentencepiece
+tokenizers>=0.15.0
 tqdm
 transformers

src/about.py CHANGED Viewed

@@ -30,7 +30,7 @@ Intro text
 """
 # Which evaluations are you running? how can people reproduce what you have?
-LLM_BENCHMARKS_TEXT = f"""
 ## How it works
 ## Reproducibility

 """
 # Which evaluations are you running? how can people reproduce what you have?
+LLM_BENCHMARKS_TEXT = """
 ## How it works
 ## Reproducibility

src/display/css_html_js.py CHANGED Viewed

@@ -33,7 +33,7 @@ custom_css = """
     background: none;
     border: none;
 }
 #search-bar {
     padding: 0px;
 }
@@ -77,7 +77,7 @@ custom_css = """
 #filter_type label > .wrap{
     width: 103px;
 }
-#filter_type label > .wrap .wrap-inner{
     padding: 2px;
 }
 #filter_type label > .wrap .wrap-inner input{

     background: none;
     border: none;
 }
 #search-bar {
     padding: 0px;
 }
 #filter_type label > .wrap{
     width: 103px;
 }
+#filter_type label > .wrap .wrap-inner{
     padding: 2px;
 }
 #filter_type label > .wrap .wrap-inner input{

src/display/utils.py CHANGED Viewed

@@ -1,8 +1,6 @@
 from dataclasses import dataclass, make_dataclass
 from enum import Enum
-import pandas as pd
 from src.about import Tasks

 from dataclasses import dataclass, make_dataclass
 from enum import Enum
 from src.about import Tasks

src/leaderboard/read_evals.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import glob
 import json
-import math
 import os
 from dataclasses import dataclass

 import glob
 import json
 import os
 from dataclasses import dataclass

src/submission/check_validity.py CHANGED Viewed

@@ -1,8 +1,6 @@
 import json
 import os
-import re
 from collections import defaultdict
-from datetime import datetime, timedelta, timezone
 import huggingface_hub
 from huggingface_hub import ModelCard
@@ -53,7 +51,7 @@ def is_model_on_hub(
         )
         if test_tokenizer:
             try:
-                tk = AutoTokenizer.from_pretrained(
                     model_name,
                     revision=revision,
                     trust_remote_code=trust_remote_code,
@@ -68,7 +66,7 @@ def is_model_on_hub(
             except Exception as e:
                 return (
                     False,
-                    "'s tokenizer cannot be loaded. Is your tokenizer class in a stable transformers release, and correctly configured?",
                     None,
                 )
         return True, None, config
@@ -81,7 +79,7 @@ def is_model_on_hub(
         )
     except Exception as e:
-        return False, "was not found on hub!", None
 def get_model_size(model_info: ModelInfo, precision: str):

 import json
 import os
 from collections import defaultdict
 import huggingface_hub
 from huggingface_hub import ModelCard
         )
         if test_tokenizer:
             try:
+                AutoTokenizer.from_pretrained(
                     model_name,
                     revision=revision,
                     trust_remote_code=trust_remote_code,
             except Exception as e:
                 return (
                     False,
+                    f"'s tokenizer cannot be loaded. Is your tokenizer class in a stable transformers release, and correctly configured?: {e}",
                     None,
                 )
         return True, None, config
         )
     except Exception as e:
+        return False, f"was not found on hub! {e}", None
 def get_model_size(model_info: ModelInfo, precision: str):

src/submission/submit.py CHANGED Viewed

@@ -4,12 +4,8 @@ from datetime import datetime, timezone
 from src.display.formatting import styled_error, styled_message, styled_warning
 from src.envs import API, EVAL_REQUESTS_PATH, QUEUE_REPO, TOKEN
-from src.submission.check_validity import (
-    already_submitted_models,
-    check_model_card,
-    get_model_size,
-    is_model_on_hub,
-)
 REQUESTED_MODELS = None
 USERS_TO_SUBMISSION_DATES = None

 from src.display.formatting import styled_error, styled_message, styled_warning
 from src.envs import API, EVAL_REQUESTS_PATH, QUEUE_REPO, TOKEN
+from src.submission.check_validity import already_submitted_models, check_model_card
+from src.submission.check_validity import get_model_size, is_model_on_hub
 REQUESTED_MODELS = None
 USERS_TO_SUBMISSION_DATES = None