ljvmiranda921 commited on
Commit
7ffe204
Β·
1 Parent(s): d7a42ca

Add indicators for each evals

Browse files
Files changed (1) hide show
  1. src/schema.py +25 -26
src/schema.py CHANGED
@@ -1,6 +1,5 @@
1
  from dataclasses import dataclass, make_dataclass
2
  from enum import Enum
3
- from typing import Literal, Optional, Union
4
 
5
  from src.display.formatting import make_clickable_model
6
 
@@ -66,10 +65,10 @@ model_registry = {
66
 
67
  ### The Task and Tasks classes store information about each benchmark we're scoring. ###
68
  class TaskCategory(Enum):
69
- CULTURAL_KNOWLEDGE = "Cultural Knowledge"
70
- CLASSICAL_NLP = "Classical NLP"
71
- READING_COMPREHENSION = "Reading Comprehension"
72
- TRANSLATION = "Translation"
73
 
74
 
75
  @dataclass
@@ -84,28 +83,28 @@ class Task:
84
 
85
  class Tasks(Enum):
86
  # fmt: off
87
- balita_tgl_mcf = Task("balita_tgl_mcf", "acc_", "BalitaNLP", "tgl", TaskCategory.CULTURAL_KNOWLEDGE, 35_177)
88
- belebele_ceb_mcf = Task("belebele_ceb_mcf", "acc_", "Belebele (ceb)", "ceb", TaskCategory.READING_COMPREHENSION, 900)
89
- belebele_fil_mcf = Task("belebele_fil_mcf", "acc_", "Belebele (fil)", "fil", TaskCategory.READING_COMPREHENSION, 900)
90
- cebuaner_ceb_mcf = Task("cebuaner_ceb_mcf", "acc_", "CebuaNER", "ceb", TaskCategory.CLASSICAL_NLP, 1310)
91
- dengue_filipino_fil = Task("dengue_filipino_fil:_average", "acc_norm", "Dengue", "fil", TaskCategory.CLASSICAL_NLP, 4015)
92
- firecs_fil_mcf = Task("firecs_fil_mcf", "acc_", "FiReCS", "fil", TaskCategory.CLASSICAL_NLP, 7340)
93
- global_mmlu_all_tgl = Task("global_mmlu_all_tgl_mcf:_average", "acc_", "Global-MMLU", "tgl", TaskCategory.CULTURAL_KNOWLEDGE, 14_042)
94
- include_tgl_mcf = Task("include_tgl_mcf:_average", "acc_", "INCLUDE", "tgl", TaskCategory.CULTURAL_KNOWLEDGE, 500)
95
- kalahi_tgl_mcf = Task("kalahi_tgl_mcf", "acc_", "KALAHI", "tgl", TaskCategory.CULTURAL_KNOWLEDGE, 150)
96
- newsphnli_fil_mcf = Task("newsphnli_fil_mcf", "acc_", "NewsPH NLI", "fil", TaskCategory.READING_COMPREHENSION, 90_000)
97
- ntrex128_fil = Task("ntrex128_fil", "bleu", "NTREX-128", "fil", TaskCategory.TRANSLATION, 1997)
98
- readability_ceb_mcf = Task("readability_ceb_mcf", "acc_", "Readability (ceb)", "ceb", TaskCategory.READING_COMPREHENSION, 350)
99
- sib200_ceb_mcf = Task("sib200_ceb_mcf", "acc_", "SIB-200 (ceb)", "ceb", TaskCategory.CLASSICAL_NLP, 99)
100
- sib200_tgl_mcf = Task("sib200_tgl_mcf", "acc_", "SIB-200 (tgl)", "tgl", TaskCategory.CLASSICAL_NLP, 99)
101
  # stingraybench_corr_tgl_mcf = Task("stingraybench_correctness_tgl_mcf", "acc_", "StingrayBench (Correctness)", "tgl", TaskCategory.CULTURAL_KNOWLEDGE, 100)
102
- stingraybench_sem_appropriateness_tgl_mcf = Task("stingraybench_semantic_appropriateness_tgl_mcf", "acc_", "StingrayBench", "tgl", TaskCategory.CULTURAL_KNOWLEDGE, 100)
103
- tatoeba_ceb = Task("tatoeba_ceb", "bleu", "Tatoeba (ceb)", "ceb", TaskCategory.TRANSLATION, 377)
104
- tatoeba_tgl = Task("tatoeba_tgl", "bleu", "Tatoeba (tgl)", "tgl", TaskCategory.TRANSLATION, 2499)
105
- tico19_tgl = Task("tico19_tgl", "bleu", "TICO-19", "tgl", TaskCategory.TRANSLATION, 971)
106
- tlunifiedner_tgl_mcf = Task("tlunifiedner_tgl_mcf", "acc_", "TLUnified NER", "tgl", TaskCategory.CLASSICAL_NLP, 1579)
107
- universalner_ceb_mcf = Task("universalner_ceb_mcf", "acc_", "Universal NER (ceb)", "ceb", TaskCategory.CLASSICAL_NLP, 49)
108
- universalner_tgl_mcf = Task("universalner_tgl_mcf", "acc_", "Universal NER (tgl)", "tgl", TaskCategory.CLASSICAL_NLP, 56)
109
  # fmt: on
110
 
111
 
 
1
  from dataclasses import dataclass, make_dataclass
2
  from enum import Enum
 
3
 
4
  from src.display.formatting import make_clickable_model
5
 
 
65
 
66
  ### The Task and Tasks classes store information about each benchmark we're scoring. ###
67
  class TaskCategory(Enum):
68
+ CULTURAL_KNOWLEDGE = "🌏 Cultural Knowledge"
69
+ CLASSICAL_NLP = "πŸ›οΈ Classical NLP"
70
+ READING_COMPREHENSION = "πŸ“– Reading Comprehension"
71
+ TRANSLATION = "πŸ”’ Generation"
72
 
73
 
74
  @dataclass
 
83
 
84
  class Tasks(Enum):
85
  # fmt: off
86
+ balita_tgl_mcf = Task("balita_tgl_mcf", "acc_", "🌏 BalitaNLP", "tgl", TaskCategory.CULTURAL_KNOWLEDGE, 35_177)
87
+ belebele_ceb_mcf = Task("belebele_ceb_mcf", "acc_", "πŸ“– Belebele (ceb)", "ceb", TaskCategory.READING_COMPREHENSION, 900)
88
+ belebele_fil_mcf = Task("belebele_fil_mcf", "acc_", "πŸ“– Belebele (fil)", "fil", TaskCategory.READING_COMPREHENSION, 900)
89
+ cebuaner_ceb_mcf = Task("cebuaner_ceb_mcf", "acc_", "πŸ›οΈ CebuaNER", "ceb", TaskCategory.CLASSICAL_NLP, 1310)
90
+ dengue_filipino_fil = Task("dengue_filipino_fil:_average", "acc_norm", "πŸ›οΈ Dengue", "fil", TaskCategory.CLASSICAL_NLP, 4015)
91
+ firecs_fil_mcf = Task("firecs_fil_mcf", "acc_", "πŸ›οΈ FiReCS", "fil", TaskCategory.CLASSICAL_NLP, 7340)
92
+ global_mmlu_all_tgl = Task("global_mmlu_all_tgl_mcf:_average", "acc_", "🌏 Global-MMLU", "tgl", TaskCategory.CULTURAL_KNOWLEDGE, 14_042)
93
+ include_tgl_mcf = Task("include_tgl_mcf:_average", "acc_", "🌏 INCLUDE", "tgl", TaskCategory.CULTURAL_KNOWLEDGE, 500)
94
+ kalahi_tgl_mcf = Task("kalahi_tgl_mcf", "acc_", "🌏 KALAHI", "tgl", TaskCategory.CULTURAL_KNOWLEDGE, 150)
95
+ newsphnli_fil_mcf = Task("newsphnli_fil_mcf", "acc_", "πŸ“– NewsPH NLI", "fil", TaskCategory.READING_COMPREHENSION, 90_000)
96
+ ntrex128_fil = Task("ntrex128_fil", "bleu", "πŸ”’ NTREX-128", "fil", TaskCategory.TRANSLATION, 1997)
97
+ readability_ceb_mcf = Task("readability_ceb_mcf", "acc_", "πŸ“– Readability (ceb)", "ceb", TaskCategory.READING_COMPREHENSION, 350)
98
+ sib200_ceb_mcf = Task("sib200_ceb_mcf", "acc_", "πŸ›οΈ SIB-200 (ceb)", "ceb", TaskCategory.CLASSICAL_NLP, 99)
99
+ sib200_tgl_mcf = Task("sib200_tgl_mcf", "acc_", "πŸ›οΈ SIB-200 (tgl)", "tgl", TaskCategory.CLASSICAL_NLP, 99)
100
  # stingraybench_corr_tgl_mcf = Task("stingraybench_correctness_tgl_mcf", "acc_", "StingrayBench (Correctness)", "tgl", TaskCategory.CULTURAL_KNOWLEDGE, 100)
101
+ stingraybench_sem_appropriateness_tgl_mcf = Task("stingraybench_semantic_appropriateness_tgl_mcf", "acc_", "🌏StingrayBench", "tgl", TaskCategory.CULTURAL_KNOWLEDGE, 100)
102
+ tatoeba_ceb = Task("tatoeba_ceb", "bleu", "πŸ”’ Tatoeba (ceb)", "ceb", TaskCategory.TRANSLATION, 377)
103
+ tatoeba_tgl = Task("tatoeba_tgl", "bleu", "πŸ”’ Tatoeba (tgl)", "tgl", TaskCategory.TRANSLATION, 2499)
104
+ tico19_tgl = Task("tico19_tgl", "bleu", "πŸ”’ TICO-19", "tgl", TaskCategory.TRANSLATION, 971)
105
+ tlunifiedner_tgl_mcf = Task("tlunifiedner_tgl_mcf", "acc_", "πŸ›οΈ TLUnified NER", "tgl", TaskCategory.CLASSICAL_NLP, 1579)
106
+ universalner_ceb_mcf = Task("universalner_ceb_mcf", "acc_", "πŸ›οΈ Universal NER (ceb)", "ceb", TaskCategory.CLASSICAL_NLP, 49)
107
+ universalner_tgl_mcf = Task("universalner_tgl_mcf", "acc_", "πŸ›οΈ Universal NER (tgl)", "tgl", TaskCategory.CLASSICAL_NLP, 56)
108
  # fmt: on
109
 
110