DontPlanToEnd commited on
Commit
c199dcb
Β·
verified Β·
1 Parent(s): a152108

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +992 -671
app.py CHANGED
@@ -1,91 +1,187 @@
1
  import dash
2
- from dash import html, dcc, Input, Output, State
3
  import dash_ag_grid as dag
4
  import pandas as pd
5
  import numpy as np
6
  from datetime import datetime, timedelta
7
  import base64
8
  import os
 
 
 
9
 
10
- # Define the columns
11
- MAIN_COLS = ['#P', 'Model', 'UGI πŸ†', 'W/10 πŸ‘', 'NatInt πŸ’‘', 'Coding πŸ’»', 'Unruly', 'Internet', 'Societal/Political', 'Political Lean πŸ“‹', 'Ideology Name']
12
- AXES_COLS_1 = ['govt', 'dipl', 'econ', 'scty']
13
- AXES_COLS_2 = ['Federal-Unitary', 'Democratic-Autocratic', 'Security-Freedom', 'Nationalism-Internationalism',
14
- 'Militarist-Pacifist', 'Assimilationist-Multiculturalist', 'Collectivize-Privatize',
15
- 'Planned-LaissezFaire', 'Isolationism-Globalism', 'Irreligious-Religious',
16
- 'Progressive-Traditional', 'Acceleration-Bioconservative']
17
- UGI_CATEGORY_COLS = ['Unruly', 'Internet', 'Societal/Political']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
  def load_leaderboard_data(csv_file_path):
20
  try:
 
21
  df = pd.read_csv(csv_file_path, na_values=['NA'])
22
-
 
 
 
 
 
 
 
23
  # Add type sort value
24
  def get_type_sort_value(row):
25
  if pd.isna(row['Total Parameters']):
26
- return 3 # P
27
  if row['Is Foundation'] and not row['Is Merged']:
28
- return 0 # B
29
  if row['Is Merged']:
30
- return 2 # M
31
  if row['Is Finetuned'] and not row['Is Merged']:
32
- return 1 # F
33
- return 4
34
 
35
  df['model_type_sort'] = df.apply(get_type_sort_value, axis=1)
36
-
 
37
  # Convert date columns to datetime
38
  for col in ['Release Date', 'Test Date']:
39
  df[col] = pd.to_datetime(df[col], format='%m/%d/%Y', errors='coerce')
40
-
41
- # Store original release date for sorting (earliest first for tertiary sort)
42
  df['Release_Date_Sort'] = df['Release Date']
43
-
44
- # Create a multi-tiered sorting key
45
- # 1. UGI score (highest first) - primary sort
46
- # 2. NatInt score (highest first) - secondary sort for same UGI
47
- # 3. Release Date (earliest first) - tertiary sort for same UGI and NatInt
48
-
49
  # Format dates as strings for display
50
  df['Release Date'] = df['Release Date'].dt.strftime('%Y-%m-%d')
51
  df['Test Date'] = df['Test Date'].dt.strftime('%Y-%m-%d')
52
-
53
- # Calculate the date two weeks ago from today
54
- two_weeks_ago = (datetime.now() - timedelta(days=6)).strftime('%Y-%m-%d') #temp 6
55
-
56
- # Store model name and link separately
57
- df['Model_Link'] = df['Model Link'].fillna('')
58
- df['Model_Display'] = df['author/model_name']
59
-
60
- # Check for new models based on Test Date
61
  df['is_new'] = df.apply(
62
  lambda row: 'πŸ†•' if pd.notna(row["Test Date"]) and row["Test Date"] >= two_weeks_ago else '',
63
  axis=1
64
  )
65
-
 
 
 
 
66
  # Add pinned and selected columns
67
  df['pinned'] = False
68
  df['selected'] = False
69
-
70
- # Convert percentage strings to floats for all relevant columns
71
- percentage_columns = ['Political Lean πŸ“‹'] + AXES_COLS_1 + AXES_COLS_2
 
 
 
 
 
 
 
 
 
72
  for col in percentage_columns:
73
- df[col] = pd.to_numeric(df[col].astype(str).str.rstrip('%'), errors='coerce')
74
-
75
- # Round numeric columns and handle NA values
76
- numeric_columns = df.select_dtypes(include=[np.number]).columns
77
- for col in numeric_columns:
78
- df[col] = df[col].apply(lambda x: None if pd.isna(x) else round(x, 3))
 
 
 
 
79
 
80
- # Sort with multiple keys in the required order
81
  df = df.sort_values(
82
- by=['UGI πŸ†', 'NatInt πŸ’‘', 'Release_Date_Sort'],
83
- ascending=[False, False, True] # UGI desc, NatInt desc, Release date asc (earliest first)
84
  )
85
-
86
  return df
87
  except Exception as e:
88
  print(f"Error loading CSV file: {e}")
 
 
 
89
  return pd.DataFrame()
90
 
91
  def load_ideology_descriptions():
@@ -148,6 +244,7 @@ app.index_string = '''
148
  --secondary-text: #666;
149
  --pinned-bg: #f5f5f5;
150
  --border-color: #ccc;
 
151
  }
152
  @media (prefers-color-scheme: dark) {
153
  :root {
@@ -159,6 +256,7 @@ app.index_string = '''
159
  --secondary-text: #8b949e;
160
  --pinned-bg: #1c2128;
161
  --border-color: #30363d;
 
162
  color-scheme: dark;
163
  }
164
  .ag-theme-alpine .ag-menu {
@@ -250,6 +348,11 @@ app.index_string = '''
250
  --ag-foreground-color: var(--text-color);
251
  --ag-row-border-color: var(--grid-border);
252
  }
 
 
 
 
 
253
  .ag-theme-alpine .ag-pinned-left-header,
254
  .ag-theme-alpine .ag-cell-last-left-pinned {
255
  border-right: 2px solid var(--grid-border) !important;
@@ -312,6 +415,41 @@ app.index_string = '''
312
  .border-right {
313
  border-right: 2px solid var(--grid-border) !important;
314
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
315
  /* Link Styles */
316
  .model-link {
317
  color: var(--link-color) !important;
@@ -428,12 +566,112 @@ app.index_string = '''
428
  .split-header-top, .split-header-bottom {
429
  white-space: nowrap;
430
  }
431
- .ag-theme-alpine .new-emoji-cell.ag-cell {
432
- font-size: 18px !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
433
  display: flex !important;
434
  align-items: center !important;
435
- justify-content: flex-start !important;
436
- padding-left: 12px !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
437
  }
438
  </style>
439
  </head>
@@ -451,8 +689,14 @@ app.index_string = '''
451
  # Load data
452
  df = load_leaderboard_data("ugi-leaderboard-data.csv")
453
 
454
- # Define helper functions
455
- def create_numeric_column(field, width=125, sort=None, sortIndex=None, **kwargs):
 
 
 
 
 
 
456
  column = {
457
  "field": field,
458
  "width": width,
@@ -461,35 +705,27 @@ def create_numeric_column(field, width=125, sort=None, sortIndex=None, **kwargs)
461
  "defaultOption": "inRange",
462
  "filterOptions": ['equals', 'notEqual', 'greaterThan', 'greaterThanOrEqual', 'lessThan', 'lessThanOrEqual', 'inRange']
463
  },
 
464
  "headerClass": "ag-left-aligned-header wrap-text",
465
- "cellClass": "ag-left-aligned-cell",
466
  "wrapHeaderText": True,
467
  "autoHeaderHeight": True,
468
  "suppressSizeToFit": True,
469
  "sortingOrder": ['desc', 'asc'],
470
- "comparator": {
471
- "function": """
472
- function(valueA, valueB, nodeA, nodeB, isInverted) {
473
- const a = nodeA.data.__sortValue;
474
- const b = nodeB.data.__sortValue;
475
- return a - b;
476
- }
477
- """
478
- }
479
  }
480
-
481
- # Update filterParams if provided in kwargs
482
- if 'filterParams' in kwargs:
483
- column['filterParams'].update(kwargs['filterParams'])
484
-
485
- if sort:
486
- column["sort"] = sort
487
- if sortIndex is not None:
488
- column["sortIndex"] = sortIndex
489
  return column
490
-
491
- def create_text_column(field, width=120):
492
- return {
 
 
 
 
 
 
 
493
  "field": field,
494
  "width": width,
495
  "filter": "agTextColumnFilter",
@@ -498,215 +734,13 @@ def create_text_column(field, width=120):
498
  "filterOptions": ['contains', 'notContains', 'startsWith', 'endsWith']
499
  },
500
  "headerClass": "ag-left-aligned-header wrap-text",
501
- "cellClass": "ag-left-aligned-cell",
502
- "wrapHeaderText": True,
503
- "autoHeaderHeight": True
504
- }
505
-
506
- # Define column configurations
507
- columnDefs = [
508
- {
509
- "headerName": "πŸ“Œ",
510
- "field": "pinned",
511
- "width": 55,
512
- "filter": False,
513
- "suppressMenu": True,
514
- "cellRenderer": "PinRenderer",
515
- "pinned": "left"
516
- },
517
- {
518
- "headerName": "",
519
- "field": "is_new",
520
- "width": 55,
521
- "filter": False,
522
- "suppressMenu": True,
523
- "cellClass": "new-emoji-cell",
524
- "pinned": "left"
525
- },
526
- {
527
- "field": "#P",
528
- "width": 115,
529
- "filter": "agNumberColumnFilter",
530
- "filterParams": {
531
- "defaultOption": "equals",
532
- "filterOptions": ['equals', 'notEqual', 'greaterThan', 'greaterThanOrEqual', 'lessThan', 'lessThanOrEqual', 'inRange']
533
- },
534
- "headerClass": "ag-left-aligned-header wrap-text",
535
- "cellClass": "ag-right-aligned-cell",
536
- "wrapHeaderText": True,
537
- "autoHeaderHeight": True,
538
- "suppressSizeToFit": True,
539
- "sortingOrder": ['desc', 'asc'],
540
- "pinned": "left"
541
- },
542
- {
543
- "field": "model_type_sort",
544
- "hide": True
545
- },
546
- {
547
- "headerName": "T",
548
- "field": "model_type_sort", # Changed to use the sort field directly
549
- "width": 45,
550
- "filter": False,
551
- "suppressMenu": True,
552
- "cellRenderer": "TypeRenderer",
553
- "pinned": "left",
554
- "sortable": True,
555
- "sortingOrder": ['asc', 'desc']
556
- },
557
- {
558
- "field": "Model_Display",
559
- "headerName": "Model",
560
- "cellRenderer": "ModelLink",
561
- "filter": "agTextColumnFilter",
562
- "filterParams": {
563
- "defaultOption": "contains",
564
- "filterOptions": ['contains', 'notContains', 'startsWith', 'endsWith']
565
- },
566
- "width": 380,
567
- "suppressMenu": False,
568
- "pinned": "left",
569
- "headerClass": "ag-left-aligned-header wrap-text",
570
- "wrapHeaderText": True,
571
- "autoHeaderHeight": True
572
- },
573
- {
574
- "field": "UGI πŸ†",
575
- "width": 120,
576
- "filter": "agNumberColumnFilter",
577
- "filterParams": {
578
- "defaultOption": "greaterThanOrEqual"
579
- },
580
- "headerClass": "ag-left-aligned-header wrap-text",
581
- "cellClass": ["ag-left-aligned-cell", "border-left"],
582
  "wrapHeaderText": True,
583
  "autoHeaderHeight": True,
584
- "suppressSizeToFit": True,
585
- "sortingOrder": ['desc', 'asc']
586
- },
587
- create_numeric_column("W/10 πŸ‘", width=130, filterParams={
588
- "defaultOption": "greaterThanOrEqual",
589
- "filterOptions": ['equals', 'notEqual', 'greaterThan', 'greaterThanOrEqual', 'lessThan', 'lessThanOrEqual', 'inRange']
590
- }),
591
- {
592
- "field": "NatInt πŸ’‘",
593
- "headerName": "NatInt πŸ’‘",
594
- "width": 140,
595
- "filter": "agNumberColumnFilter",
596
- "filterParams": {
597
- "defaultOption": "greaterThanOrEqual",
598
- "filterOptions": ['equals', 'notEqual', 'greaterThan', 'greaterThanOrEqual', 'lessThan', 'lessThanOrEqual', 'inRange']
599
- },
600
- "headerClass": "ag-left-aligned-header wrap-text",
601
- "cellClass": ["ag-left-aligned-cell", "border-left"],
602
- "wrapHeaderText": True,
603
- "autoHeaderHeight": True,
604
- "suppressSizeToFit": True,
605
- "sortingOrder": ['desc', 'asc']
606
- },
607
- create_numeric_column("Coding πŸ’»", width=140, filterParams={
608
- "defaultOption": "greaterThanOrEqual"
609
- }),
610
- {
611
- "field": "Political Lean πŸ“‹",
612
- "width": 175,
613
- "filter": "agNumberColumnFilter",
614
- "filterParams": {
615
- "defaultOption": "inRange",
616
- "filterOptions": ['equals', 'notEqual', 'greaterThan', 'greaterThanOrEqual', 'lessThan', 'lessThanOrEqual', 'inRange']
617
- },
618
- "valueFormatter": {
619
- "function": "d3.format('.1f')(params.value) + '%'"
620
- },
621
- "sortingOrder": ['desc', 'asc'],
622
- "headerClass": "ag-left-aligned-header wrap-text",
623
- "cellClass": "ag-left-aligned-cell",
624
- "wrapHeaderText": True,
625
- "autoHeaderHeight": True
626
  }
627
- ]
628
-
629
- ugi_category_columns = [
630
- create_numeric_column(col, width=120) for col in UGI_CATEGORY_COLS
631
- ]
632
-
633
- w10_type_columns = [
634
- create_numeric_column("W/10-Direct", width=120, filterParams={
635
- "defaultOption": "greaterThanOrEqual",
636
- "filterOptions": ['equals', 'notEqual', 'greaterThan', 'greaterThanOrEqual', 'lessThan', 'lessThanOrEqual', 'inRange']
637
- }),
638
- create_numeric_column("W/10-Adherence", width=120, filterParams={
639
- "defaultOption": "greaterThanOrEqual",
640
- "filterOptions": ['equals', 'notEqual', 'greaterThan', 'greaterThanOrEqual', 'lessThan', 'lessThanOrEqual', 'inRange']
641
- })
642
- ]
643
-
644
- political_columns = [
645
- {
646
- "headerName": "12axes Ideology",
647
- "field": "Ideology Name",
648
- "width": 160,
649
- "filter": "agTextColumnFilter",
650
- "filterParams": {
651
- "defaultOption": "contains",
652
- "filterOptions": ['contains', 'notContains', 'startsWith', 'endsWith']
653
- },
654
- "headerClass": "ag-left-aligned-header wrap-text",
655
- "cellClass": "ag-left-aligned-cell",
656
- "wrapHeaderText": True,
657
- "autoHeaderHeight": True
658
- }
659
- ]
660
-
661
- template_column = {
662
- "field": "Prompt Template",
663
- "headerName": "Template",
664
- "width": 160,
665
- "filter": "agTextColumnFilter",
666
- "filterParams": {
667
- "defaultOption": "contains",
668
- "filterOptions": ['contains', 'notContains', 'startsWith', 'endsWith']
669
- },
670
- "headerClass": "ag-left-aligned-header wrap-text",
671
- "cellClass": "ag-left-aligned-cell",
672
- "wrapHeaderText": True,
673
- "autoHeaderHeight": True,
674
- "comparator": {
675
- "function": """
676
- function(valueA, valueB) {
677
- if (!valueA && !valueB) return 0;
678
- if (!valueA) return 1;
679
- if (!valueB) return -1;
680
- return valueA.toLowerCase().localeCompare(valueB.toLowerCase());
681
- }
682
- """
683
- }
684
- }
685
-
686
- # Add axes columns with different widths
687
- for i, col in enumerate(AXES_COLS_1):
688
- col_def = {
689
- "field": col,
690
- "width": 105,
691
- "filter": "agNumberColumnFilter",
692
- "filterParams": {
693
- "defaultOption": "inRange",
694
- "filterOptions": ['equals', 'notEqual', 'greaterThan', 'greaterThanOrEqual', 'lessThan', 'lessThanOrEqual', 'inRange']
695
- },
696
- "valueFormatter": {
697
- "function": "d3.format('.1f')(params.value) + '%'"
698
- },
699
- "headerClass": "ag-left-aligned-header wrap-text",
700
- "cellClass": ["ag-left-aligned-cell"],
701
- "sortingOrder": ['desc', 'asc']
702
- }
703
-
704
- if i == 0: # First column (govt)
705
- col_def["cellClass"].append("border-left")
706
- elif i == len(AXES_COLS_1) - 1: # Last column (scty)
707
- col_def["cellClass"].append("border-right")
708
-
709
- columnDefs.append(col_def)
710
 
711
  template_with_split_header = """
712
  <div class="ag-cell-label-container" role="presentation">
@@ -726,94 +760,171 @@ template_with_split_header = """
726
  </div>
727
  """
728
 
729
- for col in AXES_COLS_2:
730
- high, low = col.split('-')
731
- columnDefs.append({
732
- "field": col,
733
- "headerComponentParams": {
734
- "template": template_with_split_header.format(high=high, low=low)
735
- },
736
- "width": 175,
737
- "filter": "agNumberColumnFilter",
738
- "filterParams": {
739
- "defaultOption": "inRange",
740
- "filterOptions": ['equals', 'notEqual', 'greaterThan', 'greaterThanOrEqual', 'lessThan', 'lessThanOrEqual', 'inRange']
741
- },
742
- "valueFormatter": {
743
- "function": "d3.format('.1f')(params.value) + '%'"
744
- },
745
- "sortingOrder": ['desc', 'asc']
746
- })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
747
 
748
- # Date Columns
749
- columnDefs.extend([
750
- {
751
- "field": "Release Date",
752
- "width": 130,
753
- "filter": "agDateColumnFilter",
754
- "filterParams": {
755
- "defaultOption": "greaterThan",
756
- "browserDatePicker": True,
757
- "inRangeInclusive": True
758
- },
759
- "filterValueGetter": {
760
- "function": """
761
- function(params) {
762
- if (!params.data['Release Date']) return null;
763
- const [year, month, day] = params.data['Release Date'].split('-');
764
- return new Date(year, month - 1, day);
765
- }
766
- """
767
- },
768
- "valueFormatter": {
769
- "function": """
770
- function(params) {
771
- if (!params.value) return '';
772
- const [year, month, day] = params.value.split('-');
773
- return `${month}/${day}/${year}`;
774
- }
775
- """
776
- },
777
- "cellClass": ["ag-left-aligned-cell", "border-left"],
778
- "headerClass": "ag-left-aligned-header wrap-text",
779
- "wrapHeaderText": True,
780
- "autoHeaderHeight": True,
781
- "sortable": True
782
  },
783
- {
784
- "field": "Test Date",
785
- "width": 130,
786
- "filter": "agDateColumnFilter",
787
- "filterParams": {
788
- "defaultOption": "greaterThan",
789
- "browserDatePicker": True,
790
- "inRangeInclusive": True
791
- },
792
- "filterValueGetter": {
793
- "function": """
794
- function(params) {
795
- if (!params.data['Test Date']) return null;
796
- const [year, month, day] = params.data['Test Date'].split('-');
797
- return new Date(year, month - 1, day);
798
- }
799
- """
800
- },
801
- "valueFormatter": {
802
- "function": """
803
- function(params) {
804
- if (!params.value) return '';
805
- const [year, month, day] = params.value.split('-');
806
- return `${month}/${day}/${year}`;
807
- }
808
- """
809
- },
810
- "cellClass": "ag-left-aligned-cell",
811
- "headerClass": "ag-left-aligned-header wrap-text",
812
- "wrapHeaderText": True,
813
- "autoHeaderHeight": True,
814
- "sortable": True
815
- }
816
- ])
817
 
818
  # Define the grid options with postSort
819
  dashGridOptions = {
@@ -827,25 +938,62 @@ dashGridOptions = {
827
  "pinnedTopRowData": [],
828
  "suppressMaintainUnsortedOrder": True,
829
  "suppressMultiSort": True,
 
830
  "rowBuffer": 10,
831
  "maxBlocksInCache": 2,
832
  "icons": {
833
  "menu": '<i class="fas fa-search" style="color: var(--text-color)"></i>'
834
  },
835
- "theme": "ag-theme-alpine-dark" if "prefers-color-scheme: dark" else "ag-theme-alpine",
836
- "columnState": {
837
- "function": """
838
- function() {
839
- return {
840
- columnVisibility: {}
841
- };
842
- }
843
- """
844
- }
845
  }
846
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
847
  # Define the layout
848
  app.layout = html.Div([
 
849
  dcc.Store(id='pinned-models-store', data=[]),
850
 
851
  # Header
@@ -875,127 +1023,117 @@ app.layout = html.Div([
875
  )
876
  ], style={'float': 'right'})
877
  ], style={'overflow': 'hidden', 'marginBottom': '20px', 'padding': '0 20px'}),
878
-
879
- # Temporary Notice
880
- html.Div(
881
- html.P(
882
- "*Currently working on adding new benchmarks to the leaderboard, so I will probably not test additional models until the new leaderboard is released.",
883
- style={'textAlign': 'center', 'color': 'red', 'fontSize': '0.9em'}
884
- )
885
- ),
886
-
887
- # Title
888
- html.Div([
889
- html.H1("πŸ“’ UGI Leaderboard",
890
- className="page-title",
891
- style={'fontSize': '38px'}),
892
- html.H2("Uncensored General Intelligence",
893
- className="page-subtitle"),
894
- ], style={'marginBottom': '30px'}),
895
 
896
  html.Div([
897
- html.Div([
898
- "To filter columns, click the ",
899
- html.I(className="fas fa-search", style={"color": "var(--text-color)"}),
900
- " next to a column's name. On mobile, hold the column name for the menu to appear."
901
- ], style={'marginBottom': '20px', 'color': 'var(--text-color)'})
902
- ], style={'padding': '0 20px'}),
903
-
904
- # Model Type Filter
905
  html.Div([
 
906
  html.Div([
907
- html.Label("Display Models:",
908
- className="model-type-filter"),
909
- dcc.Checklist(
910
- id='model-type-filter',
911
- options=[
912
- {'label': html.Span('Base', style={'color': '#71de5f', 'fontSize': '16.5px'}), 'value': 'Is Foundation'},
913
- {'label': html.Span('Finetune', style={'color': '#f6b10b', 'fontSize': '16.5px'}), 'value': 'Is Finetuned'},
914
- {'label': html.Span('Merge', style={'color': '#f08aff', 'fontSize': '16.5px'}), 'value': 'Is Merged'},
915
- {'label': html.Span('Proprietary', style={'color': '#19cdce', 'fontSize': '16.5px'}), 'value': 'proprietary'}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
916
  ],
917
- value=['Is Foundation', 'Is Finetuned', 'Is Merged', 'proprietary'],
918
- inline=True,
919
- style={'display': 'inline-block'},
920
- labelStyle={'fontWeight': 'normal', 'marginRight': '15px'}
921
  )
922
- ], style={'float': 'left'}),
923
- html.Div([
924
- dcc.Checklist(
925
- id='na-model-filter',
926
- options=[{'label': 'NA Models', 'value': 'show_na'}],
927
- value=[],
928
- inline=True,
929
- style={'display': 'inline-block'},
930
- labelStyle={'fontWeight': 'normal'} # Make sure NA Models isn't bold
931
- )
932
- ], style={'float': 'right'})
933
- ], style={'marginBottom': '10px', 'padding': '0 20px', 'overflow': 'hidden'}),
934
-
935
- # Additional Columns Filter
936
- html.Div([
937
  html.Div([
938
- html.Label("Show Additional Columns:",
939
- className="model-type-filter"),
940
  dcc.Checklist(
941
- id='additional-columns-filter',
942
- options=[
943
- {'label': 'UGI Categories', 'value': 'ugi_categories'},
944
- {'label': 'W/10 Types', 'value': 'w10_types'},
945
- {'label': 'Political Test Axes', 'value': 'political_axes'}
946
- ],
947
  value=[],
948
  inline=True,
949
- style={'display': 'inline-block'},
950
  labelStyle={'fontWeight': 'normal', 'marginRight': '15px'}
951
  )
952
- ], style={'float': 'left'}),
953
- html.Div([
954
- dcc.Checklist(
955
- id='template-filter',
956
- options=[{'label': 'Prompt Template', 'value': 'template'}],
957
- value=[],
958
- inline=True,
959
- style={'display': 'inline-block'},
960
- labelStyle={'fontWeight': 'normal'}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
961
  )
962
- ], style={'float': 'right'})
963
- ], style={'marginBottom': '13px', 'padding': '0 20px', 'overflow': 'hidden'}),
964
-
 
 
 
965
  # Grid
966
  html.Div([
967
  dag.AgGrid(
968
  id='leaderboard-grid',
969
- columnDefs=columnDefs,
970
  rowData=df.to_dict('records'),
 
971
  defaultColDef={
972
- "sortable": True,
973
- "resizable": True,
974
- "filter": "agNumberColumnFilter",
975
- "floatingFilter": False,
976
  "sortingOrder": ['desc', 'asc'],
977
  "filterParams": {
978
  "defaultOption": "between"
979
  },
980
- "comparator": {
981
- "function": """
982
- function(valueA, valueB, nodeA, nodeB, isInverted) {
983
- const isEmptyA = valueA === null || valueA === undefined || valueA === '' || isNaN(valueA);
984
- const isEmptyB = valueB === null || valueB === undefined || valueB === '' || isNaN(valueB);
985
-
986
- // Force empty values to bottom
987
- if (isEmptyA && !isEmptyB) return 1;
988
- if (!isEmptyA && isEmptyB) return -1;
989
- if (isEmptyA && isEmptyB) return 0;
990
-
991
- // Normal comparison for non-empty values
992
- if (typeof valueA === 'number' && typeof valueB === 'number') {
993
- return valueA - valueB;
994
- }
995
- return String(valueA).localeCompare(String(valueB));
996
- }
997
- """
998
- }
999
  },
1000
  dashGridOptions=dashGridOptions,
1001
  dangerously_allow_code=True,
@@ -1006,231 +1144,414 @@ app.layout = html.Div([
1006
  )
1007
  ], style={'marginBottom': '30px'}),
1008
 
 
 
 
 
 
1009
  # Description
1010
  html.Div([
1011
- html.H3("About", style={'fontSize': '22px', 'marginBottom': '0px'}),
1012
-
1013
- html.P([html.Strong("UGI:"), " Uncensored General Intelligence. A benchmark measuring both willingness to answer and accuracy in fact-based contentious questions. The test set is made of roughly 100 questions/tasks, covering topics that are commonly difficult to get LLMs to answer. The leaderboard's questions are kept private in order to avoid the common problem of not knowing if a model is intelligent or if it was just trained on the test questions."],
1014
- style={'marginTop': '7px', 'marginBottom': '4px'}),
1015
-
 
 
 
1016
  html.Details([
1017
- html.Summary("Categories",
1018
- style={
1019
- 'fontWeight': 'normal',
1020
- 'fontSize': '1em',
1021
- 'marginLeft': '20px',
1022
- 'cursor': 'pointer'
1023
- }),
1024
  html.Ul([
1025
- html.Li("Unruly: Taboo underground knowledge and recipes"),
1026
- html.Li("Internet: Knowledge of controversial/explicit web content"),
1027
- html.Li("Societal/Political: Awareness of contentious socio-political topics")
1028
- ], style={'marginTop': '0px', 'marginBottom': '16px', 'marginLeft': '40px'})
1029
- ], style={'marginBottom': '16px'}),
1030
-
1031
- html.P([html.Strong("W/10:"), " Willingness/10. A more narrow subset of the UGI questions, solely focused on measuring how far a model can be pushed before going against its instructions or refusing to answer."], style={'marginBottom': '4px'}),
1032
-
 
 
 
 
 
 
 
 
 
1033
  html.Details([
1034
- html.Summary("Types",
1035
- style={
1036
- 'fontWeight': 'normal',
1037
- 'fontSize': '1em',
1038
- 'marginLeft': '20px',
1039
- 'cursor': 'pointer'
1040
- }),
1041
  html.Ul([
1042
- html.Li("Direct: Measures if the model directly refuses to respond to certain prompts"),
1043
- html.Li("Adherence: Some models might not explicitly refuse to do something, though will still deviate from the instructions as a way of getting out of doing it, or simply due to lack of instruction following capabilities")
1044
- ], style={'marginTop': '0px', 'marginBottom': '16px', 'marginLeft': '40px'})
1045
- ], style={'marginBottom': '16px'}),
1046
-
1047
- html.P([
1048
- "A high UGI but low W/10 could mean for example that the model can provide a lot of accurate sensitive information, but will refuse to form the information into something it sees as offensive or against its rules.",
1049
- html.Br(),
1050
- html.Br()
1051
- ]),
1052
-
1053
- html.P([
1054
- html.Strong("Benchmarks not focused on censorship:"),
1055
- html.Div(style={'margin': '6px 0'}),
1056
- html.Strong("NatInt:"), " Natural Intelligence. A general knowledge quiz covering real-world subjects that llms are not commonly benchmarked on, such as pop culture trivia. This measures if the model understands a diverse range of topics, as opposed to over-training on textbook information and the types of questions commonly tested on benchmarks."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1057
  ]),
1058
-
1059
- html.P([html.Strong("Coding:"), " A simple 50 question quiz measuring how vast a model's programming knowledge is. Each question is worth 2 points."]),
1060
-
1061
- html.P([
1062
- html.Strong("Political Lean:"),
1063
- " Measures a model's tendency to hold left wing vs right wing political beliefs. Ranges between -100% and 100%, where left wing is left of zero (negative) and right wing is right of zero (positive). Uses the axes of the ",
1064
- html.A("12axes",
1065
- href="https://politicaltests.github.io/12axes/",
1066
- target="_blank",
1067
- style={'color': 'var(--link-color)'}
1068
- ),
1069
- " test most aligned with modern left vs right issues: Assimilationist-Multiculturalist, Collectivize-Privatize, and Progressive-Traditional. To see all of the axis scores, select the option above the leaderboard to show all Political Test Axes."
1070
- ], style={'marginBottom': '4px'}),
1071
- html.Ul([
1072
- html.Li("NA if model wasn't capable of answering a sufficient number of questions.")
1073
- ], style={'marginTop': '0px', 'marginBottom': '16px'}),
1074
-
1075
- html.P("Aggregate Political Scores", style={'marginBottom': '4px'}),
1076
- html.Ul([
1077
- html.Li("Govt: Higher = State authority, Lower = Individual liberty"),
1078
- html.Li("Dipl: Higher = Global outlook, Lower = National interests"),
1079
- html.Li("Econ: Higher = Economic equality, Lower = Market freedom"),
1080
- html.Li("Scty: Higher = Progressive values, Lower = Traditional values")
1081
- ], style={'marginTop': '0px', 'marginBottom': '16px'}),
1082
-
1083
- html.Br(),
1084
-
1085
- html.P("All local models are tested using Q6_K.gguf quants.")
1086
  ], style={
1087
  'maxWidth': '1200px',
1088
  'margin': '0 auto',
1089
  'padding': '0 20px',
1090
- 'color': 'var(--text-color)'
 
1091
  }),
1092
 
1093
- # Add 12axes Ideology Descriptions here
1094
- html.Details([
1095
- html.Summary("12axes Ideology Descriptions",
1096
- className="details-summary"),
1097
- html.Div([
1098
- html.I("Only showing ideologies at least one model has.",
1099
- className='ideology-note',
1100
- style={'fontSize': '0.9em'}),
1101
- dcc.Markdown("\n\n".join([
1102
- f"**{ideology}**: {IDEOLOGY_DESCRIPTIONS.get(ideology, 'No description available.')}"
1103
- for ideology in sorted(set(df['Ideology Name'].dropna()))
1104
- if ideology # Skip empty values
1105
- ]), className='markdown-content'),
1106
- html.Div([
1107
- html.A("Source",
1108
- href="https://github.com/politicaltests/politicaltests.github.io/blob/main/12axes/ideologies.js",
1109
- target="_blank",
1110
- className="source-link")
1111
- ], style={'marginTop': '20px'})
1112
- ], style={'paddingTop': '10px'})
1113
- ], style={'marginTop': '30px', 'marginBottom': '50px', 'maxWidth': '1200px', 'margin': '30px auto 80px'})
1114
  ], style={'maxWidth': '100%', 'margin': '0 auto'})
1115
 
 
 
 
 
 
 
 
1116
  @app.callback(
1117
- [Output('leaderboard-grid', 'rowData'),
1118
- Output('model-type-filter', 'value'),
1119
- Output('pinned-models-store', 'data')],
1120
- [Input('model-type-filter', 'value'),
1121
- Input('na-model-filter', 'value'),
1122
- Input('leaderboard-grid', 'pinnedTopRowData')],
1123
  prevent_initial_call=False
1124
  )
1125
- def update_grid(selected_types, show_na, pinned_rows):
1126
- if selected_types is None:
1127
- selected_types = []
1128
-
1129
- if not selected_types:
1130
- return [], selected_types, []
1131
-
1132
- filtered_df = df.copy()
1133
-
1134
- # Get pinned model IDs
1135
- pinned_models = []
1136
- if pinned_rows:
1137
- pinned_models = [row['Model_Display'] for row in pinned_rows]
1138
- # Remove pinned models from the dataframe
1139
- filtered_df = filtered_df[~filtered_df['Model_Display'].isin(pinned_models)]
1140
-
1141
- mask = pd.Series(False, index=filtered_df.index)
1142
 
1143
- # Model type filtering
1144
- if 'Is Finetuned' in selected_types:
1145
- if 'Is Merged' in selected_types:
1146
- mask |= filtered_df['Is Finetuned']
1147
- else:
1148
- mask |= (filtered_df['Is Finetuned'] & ~filtered_df['Is Merged'])
1149
- elif 'Is Merged' in selected_types:
1150
- mask |= filtered_df['Is Merged']
1151
-
1152
- if 'Is Foundation' in selected_types:
1153
- mask |= (filtered_df['Is Foundation'] & ~filtered_df['Total Parameters'].isna())
1154
- if 'proprietary' in selected_types:
1155
- mask |= filtered_df['Total Parameters'].isna()
1156
-
1157
- filtered_df = filtered_df[mask]
1158
-
1159
- # NA filtering
1160
- political_columns = ['Political Lean πŸ“‹', 'govt', 'dipl', 'econ', 'scty'] + AXES_COLS_2
1161
- has_na = filtered_df[political_columns].isna().any(axis=1)
1162
-
1163
- if show_na is None or not show_na:
1164
- filtered_df = filtered_df[~has_na]
1165
-
1166
- # Use the same multi-key sorting as in load_leaderboard_data
1167
- filtered_df = filtered_df.sort_values(
1168
- by=['UGI πŸ†', 'NatInt πŸ’‘', 'Release_Date_Sort'],
1169
- ascending=[False, False, True] # UGI desc, NatInt desc, Release date asc (earliest first)
1170
- )
1171
 
1172
- records = filtered_df.to_dict('records')
 
1173
 
1174
- return records, selected_types, pinned_models
1175
 
1176
  @app.callback(
1177
- Output('leaderboard-grid', 'columnDefs'),
1178
- [Input('additional-columns-filter', 'value'),
1179
- Input('template-filter', 'value')]
 
 
1180
  )
1181
- def update_columns(additional_columns, template_filter):
1182
- # Start with base columns up to UGI column
1183
- current_columns = columnDefs[:6] # Include up to Model column
1184
-
1185
- # Add Template column if selected
1186
- if template_filter and 'template' in template_filter:
1187
- current_columns.append(template_column)
1188
-
1189
- # Rest of the function remains the same...
1190
- current_columns.extend(columnDefs[6:7])
1191
-
1192
- if 'ugi_categories' in additional_columns:
1193
- current_columns.extend(ugi_category_columns)
1194
 
1195
- current_columns.extend(columnDefs[7:8])
1196
-
1197
- if 'w10_types' in additional_columns:
1198
- current_columns.extend(w10_type_columns)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1199
 
1200
- current_columns.extend(columnDefs[8:11])
 
 
 
 
 
 
 
 
 
 
1201
 
1202
- if 'political_axes' in additional_columns:
1203
- current_columns.extend(political_columns)
1204
- current_columns.extend([col for col in columnDefs if col['field'] in AXES_COLS_1])
1205
- current_columns.extend([col for col in columnDefs if col['field'] in AXES_COLS_2])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1206
 
1207
- current_columns.extend([col for col in columnDefs if col['field'] in ['Release Date', 'Test Date']])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1208
 
1209
- return current_columns
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1210
 
1211
 
1212
  if __name__ == '__main__':
1213
- app.run_server(host='0.0.0.0', port=8050)
1214
- app.clientside_callback(
1215
- """
1216
- function(n_clicks) {
1217
- if (!window.gridApi) return;
1218
-
1219
- const pinnedRows = window.gridApi.getGridOption('pinnedTopRowData') || [];
1220
-
1221
- if (pinnedRows.length > 0) {
1222
- const pinnedIds = new Set(pinnedRows.map(row => row.Model_Display));
1223
- const currentRows = [];
1224
- window.gridApi.forEachNode(node => {
1225
- if (!pinnedIds.has(node.data.Model_Display)) {
1226
- currentRows.push(node.data);
1227
- }
1228
- });
1229
- window.gridApi.setGridOption('rowData', currentRows);
1230
- }
1231
- return window.dash_clientside.no_update;
1232
- }
1233
- """,
1234
- Output('leaderboard-grid', 'rowData'),
1235
- Input('model-type-filter', 'value')
1236
- )
 
1
  import dash
2
+ from dash import html, dcc, Input, Output, State, no_update
3
  import dash_ag_grid as dag
4
  import pandas as pd
5
  import numpy as np
6
  from datetime import datetime, timedelta
7
  import base64
8
  import os
9
+ import logging
10
+ import sys
11
+ import json
12
 
13
+ # This setup works with the PYTHONUNBUFFERED=1 environment variable.
14
+ logging.basicConfig(
15
+ level=logging.INFO,
16
+ format="%(asctime)s [%(levelname)s] %(message)s",
17
+ handlers=[
18
+ logging.StreamHandler(sys.stdout)
19
+ ]
20
+ )
21
+
22
+ # Helper function to create a checklist option
23
+ def create_option(value, label):
24
+ return {'label': label, 'value': value}
25
+
26
+ # Define groups of columns that will be toggled together
27
+ COLUMN_GROUPS = {
28
+ "uncensored_ugi_cats": ["Hazardous", "Entertainment", "SocPol"],
29
+ "w10_sub_scores": ["W/10-Direct", "W/10-Adherence"],
30
+ "natint_sub_scores": ["Standard", "Pop Culture", "World Model"],
31
+ "writing_repetition_group": [
32
+ "lexical_stuckness", "originality_score", "internal_semantic_redundancy"
33
+ ],
34
+ "writing_style_group": [
35
+ "Readability_Grade_Level", "Verb_to_Noun_Ratio", "Adjective_Adverb_Percentage", "Dialogue_Percentage"
36
+ ],
37
+ "nsfw_dark_group": ["avg_nsfw_score", "avg_dark_score"],
38
+ "length_adherence_group": ["avg_length_error_pct", "creative_writing_wc_exceeded_pct"],
39
+ "politics_agg_group": ["govt", "dipl", "econ", "scty"],
40
+ "politics_axes_group": [
41
+ 'Federal-Unitary', 'Democratic-Autocratic', 'Security-Freedom', 'Nationalism-Internationalism',
42
+ 'Militarist-Pacifist', 'Assimilationist-Multiculturalist', 'Collectivize-Privatize',
43
+ 'Planned-LaissezFaire', 'Isolationism-Globalism', 'Irreligious-Religious',
44
+ 'Progressive-Traditional', 'Acceleration-Bioconservative'
45
+ ],
46
+ "world_model_group": [
47
+ 'wm_recipe_percent_error', 'wm_geoguesser_mae', 'wm_weight_percent_error',
48
+ 'wm_music_mae', 'wm_youtube_view_percent_error', 'Show Rec Score',
49
+ "Show Rec MAE", "Show Rec Pearson", "Show Rec Std Dev Error"
50
+ ],
51
+ }
52
+
53
+ # Define the columns for each preset, using group keys for grouped columns
54
+ PRESET_COLUMNS = {
55
+ "Overview": {
56
+ "UGI πŸ†": "UGI πŸ†", "W/10 πŸ‘": "W/10 πŸ‘", "NatInt πŸ’‘": "NatInt πŸ’‘", "Writing ✍️": "Writing ✍️",
57
+ "Political Lean πŸ“‹": "Political Lean πŸ“‹"
58
+ },
59
+ "Uncensored": {
60
+ "UGI πŸ†": "UGI πŸ†",
61
+ "uncensored_ugi_cats": "UGI Categories",
62
+ "W/10 πŸ‘": "W/10 πŸ‘",
63
+ "w10_sub_scores": "W/10 Categories"
64
+ },
65
+ "Intelligence": {
66
+ "NatInt πŸ’‘": "NatInt πŸ’‘", "natint_sub_scores": "NatInt Categories",
67
+ "world_model_group": "World Model Tests",
68
+ },
69
+ "Writing": {
70
+ "Writing ✍️": "Writing ✍️",
71
+ "nsfw_dark_group": "NSFW / Dark Scores",
72
+ "writing_style_group": "Stylistic Metrics",
73
+ "writing_repetition_group": "Repetition Metrics",
74
+ "length_adherence_group": "Length Adherence",
75
+ "avg_writing_style_score": "Style Adherence",
76
+ },
77
+ "Politics": {
78
+ "Political Lean πŸ“‹": "Political Lean πŸ“‹", "12axes Ideology": "Ideology",
79
+ "politics_agg_group": "Aggregate Scores",
80
+ "politics_axes_group": "12 Axes Scores"
81
+ }
82
+ }
83
+
84
+ # Create the checklist options from the preset definitions
85
+ PRESET_OPTIONS = {
86
+ preset: [create_option(col, label) for col, label in cols.items()]
87
+ for preset, cols in PRESET_COLUMNS.items()
88
+ }
89
+
90
+ # Define other toggleable columns that are not part of presets
91
+ OTHER_TOGGLES = {
92
+ "Prompt Template": "Template",
93
+ "Architecture": "Architecture",
94
+ "Avg Thinking Chars": "Avg Thinking Chars"
95
+ }
96
 
97
  def load_leaderboard_data(csv_file_path):
98
  try:
99
+ # Load the CSV without special boolean handling first
100
  df = pd.read_csv(csv_file_path, na_values=['NA'])
101
+
102
+ # Defensive: remove any leading/trailing whitespace from headers
103
+ df.columns = df.columns.str.strip()
104
+ if 'Is Thinking Model' in df.columns:
105
+ df['Is Thinking Model'] = df['Is Thinking Model'].astype(str).fillna('FALSE').str.strip().str.upper() == 'TRUE'
106
+ else:
107
+ df['Is Thinking Model'] = False
108
+
109
  # Add type sort value
110
  def get_type_sort_value(row):
111
  if pd.isna(row['Total Parameters']):
112
+ return 3 # P (Proprietary)
113
  if row['Is Foundation'] and not row['Is Merged']:
114
+ return 0 # B (Base)
115
  if row['Is Merged']:
116
+ return 2 # M (Merge)
117
  if row['Is Finetuned'] and not row['Is Merged']:
118
+ return 1 # F (Finetune)
119
+ return 4 # Unknown
120
 
121
  df['model_type_sort'] = df.apply(get_type_sort_value, axis=1)
122
+ df['type'] = df['model_type_sort']
123
+
124
  # Convert date columns to datetime
125
  for col in ['Release Date', 'Test Date']:
126
  df[col] = pd.to_datetime(df[col], format='%m/%d/%Y', errors='coerce')
127
+
128
+ # Store original release date for sorting
129
  df['Release_Date_Sort'] = df['Release Date']
130
+
 
 
 
 
 
131
  # Format dates as strings for display
132
  df['Release Date'] = df['Release Date'].dt.strftime('%Y-%m-%d')
133
  df['Test Date'] = df['Test Date'].dt.strftime('%Y-%m-%d')
134
+
135
+ # Calculate the date for the 'new' emoji
136
+ two_weeks_ago = (datetime.now() - timedelta(days=2)).strftime('%Y-%m-%d') # temp set to 2
 
 
 
 
 
 
137
  df['is_new'] = df.apply(
138
  lambda row: 'πŸ†•' if pd.notna(row["Test Date"]) and row["Test Date"] >= two_weeks_ago else '',
139
  axis=1
140
  )
141
+
142
+ # Store model name and link separately
143
+ df['Model_Link'] = df['Model Link'].fillna('')
144
+ df['Model_Display'] = df['author/model_name']
145
+
146
  # Add pinned and selected columns
147
  df['pinned'] = False
148
  df['selected'] = False
149
+
150
+ # Flatten the list of political columns, expanding group keys into their actual column names
151
+ politics_keys = list(PRESET_COLUMNS['Politics'].keys())
152
+ all_politics_individual_cols = []
153
+ for key in politics_keys:
154
+ if key in COLUMN_GROUPS:
155
+ all_politics_individual_cols.extend(COLUMN_GROUPS[key])
156
+ else:
157
+ all_politics_individual_cols.append(key)
158
+
159
+ # Now, process only the real columns that are percentages
160
+ percentage_columns = [col for col in all_politics_individual_cols if col != '12axes Ideology']
161
  for col in percentage_columns:
162
+ if col in df.columns: # Check if the column exists before processing
163
+ df[col] = pd.to_numeric(df[col].astype(str).str.rstrip('%'), errors='coerce')
164
+
165
+ # Replace NaN with large/small numbers for sorting, which serialize reliably to JSON
166
+ # Higher is better -> fill with a very small number so they sort last when descending
167
+ df['Show Rec Score'].fillna(-99999, inplace=True)
168
+ df['Show Rec Pearson'].fillna(-99999, inplace=True)
169
+ # Lower is better -> fill with a very large number so they sort last when ascending
170
+ df['Show Rec MAE'].fillna(99999, inplace=True)
171
+ df['Show Rec Std Dev Error'].fillna(99999, inplace=True)
172
 
173
+ # Sort with multiple keys
174
  df = df.sort_values(
175
+ by=['UGI πŸ†', 'NatInt πŸ’‘', 'Release_Date_Sort'],
176
+ ascending=[False, False, True]
177
  )
178
+
179
  return df
180
  except Exception as e:
181
  print(f"Error loading CSV file: {e}")
182
+ # Print the full traceback to help debug future issues
183
+ import traceback
184
+ traceback.print_exc()
185
  return pd.DataFrame()
186
 
187
  def load_ideology_descriptions():
 
244
  --secondary-text: #666;
245
  --pinned-bg: #f5f5f5;
246
  --border-color: #ccc;
247
+ --preset-bg: #f9f9f9;
248
  }
249
  @media (prefers-color-scheme: dark) {
250
  :root {
 
256
  --secondary-text: #8b949e;
257
  --pinned-bg: #1c2128;
258
  --border-color: #30363d;
259
+ --preset-bg: #1c2128;
260
  color-scheme: dark;
261
  }
262
  .ag-theme-alpine .ag-menu {
 
348
  --ag-foreground-color: var(--text-color);
349
  --ag-row-border-color: var(--grid-border);
350
  }
351
+ body .ag-theme-alpine .ag-header-cell,
352
+ body .ag-theme-alpine .ag-cell {
353
+ padding-left: 10px;
354
+ padding-right: 10px;
355
+ }
356
  .ag-theme-alpine .ag-pinned-left-header,
357
  .ag-theme-alpine .ag-cell-last-left-pinned {
358
  border-right: 2px solid var(--grid-border) !important;
 
415
  .border-right {
416
  border-right: 2px solid var(--grid-border) !important;
417
  }
418
+ .border-left-dashed {
419
+ border-left: 2px dashed var(--grid-border) !important;
420
+ margin-left: -2px !important;
421
+ }
422
+
423
+ .preset-container {
424
+ display: flex;
425
+ flex-direction: row;
426
+ gap: 15px;
427
+ justify-content: space-between;
428
+ }
429
+ .preset-column {
430
+ flex: 1;
431
+ padding: 10px;
432
+ border: 1px solid var(--grid-border);
433
+ border-radius: 8px;
434
+ background-color: var(--preset-bg);
435
+ }
436
+ .preset-selector label {
437
+ font-size: 1.2em !important;
438
+ display: flex;
439
+ align-items: center;
440
+ }
441
+ .preset-selector input[type="radio"] {
442
+ width: 1.1em;
443
+ height: 1.1em;
444
+ }
445
+ .preset-selector .dash-radioitems {
446
+ font-size: 1.2em;
447
+ }
448
+ .preset-checklist {
449
+ margin-top: 10px;
450
+ padding-left: 5px;
451
+ }
452
+
453
  /* Link Styles */
454
  .model-link {
455
  color: var(--link-color) !important;
 
566
  .split-header-top, .split-header-bottom {
567
  white-space: nowrap;
568
  }
569
+
570
+ @media (max-width: 800px) {
571
+ .ag-theme-alpine .ag-pinned-left-cols-container,
572
+ .ag-theme-alpine .ag-pinned-left-header {
573
+ position: static !important;
574
+ box-shadow: none !important;
575
+ border-right: none !important;
576
+ }
577
+ .ag-theme-alpine .ag-cell-last-left-pinned {
578
+ border-right: 1px solid var(--grid-border) !important;
579
+ }
580
+ }
581
+ #model-type-filter .dash-checklist-item {
582
+ font-weight: normal;
583
+ margin-right: 15px;
584
+ }
585
+ /* This rule adds the divider to the container of the last item */
586
+ #model-type-filter .dash-checklist-item:last-of-type {
587
+ border-left: 1px solid var(--border-color);
588
+ margin-left: 10px;
589
+ padding-left: 15px;
590
+ }
591
+
592
+ .center-aligned-header .ag-header-cell-label {
593
+ justify-content: center !important;
594
+ }
595
+ .ag-header-cell[col-id="pinned"],
596
+ .ag-cell[col-id="pinned"],
597
+ .ag-header-cell[col-id="is_new"],
598
+ .ag-cell[col-id="is_new"],
599
+ .ag-header-cell[col-id="R"],
600
+ .ag-cell[col-id="R"],
601
+ .ag-header-cell[col-id="type"],
602
+ .ag-cell[col-id="type"] {
603
+ padding-left: 0px !important;
604
+ padding-right: 0px !important;
605
  display: flex !important;
606
  align-items: center !important;
607
+ justify-content: center !important;
608
+ text-align: center !important;
609
+ }
610
+
611
+ .ag-cell[col-id="is_new"] {
612
+ padding-left: 0px !important;
613
+ padding-right: 0px !important;
614
+ font-size: 18px;
615
+ display: flex;
616
+ align-items: center;
617
+ justify-content: center;
618
+ }
619
+
620
+ .header-optimal-len-err .ag-header-cell-text::after {
621
+ content: ' (0)';
622
+ color: #CC5500 !important;
623
+ font-weight: normal;
624
+ font-family: inherit;
625
+ font-size: inherit;
626
+ }
627
+ .header-optimal-natint .ag-header-cell-text::after {
628
+ content: ' (higher)';
629
+ color: #CC5500 !important;
630
+ font-weight: normal;
631
+ font-family: inherit;
632
+ font-size: inherit;
633
+ }
634
+ .header-optimal-orig .ag-header-cell-text::after {
635
+ content: ' (higher)';
636
+ color: #CC5500 !important;
637
+ font-weight: normal;
638
+ font-family: inherit;
639
+ font-size: inherit;
640
+ }
641
+ .header-optimal-sem-red .ag-header-cell-text::after {
642
+ content: ' (lower)';
643
+ color: #CC5500 !important;
644
+ font-weight: normal;
645
+ font-family: inherit;
646
+ font-size: inherit;
647
+ }
648
+ .header-optimal-lex-stuck .ag-header-cell-text::after {
649
+ content: ' (lower)';
650
+ color: #CC5500 !important;
651
+ font-weight: normal;
652
+ font-family: inherit;
653
+ font-size: inherit;
654
+ }
655
+ .header-optimal-adj-adv .ag-header-cell-text::after {
656
+ content: ' (~13.5)';
657
+ color: #CC5500 !important;
658
+ font-weight: normal;
659
+ font-family: inherit;
660
+ font-size: inherit;
661
+ }
662
+ .header-optimal-read-grade .ag-header-cell-text::after {
663
+ content: ' (~5.5)';
664
+ color: #CC5500 !important;
665
+ font-weight: normal;
666
+ font-family: inherit;
667
+ font-size: inherit;
668
+ }
669
+ .header-optimal-dialogue .ag-header-cell-text::after {
670
+ content: ' (~60)';
671
+ color: #CC5500 !important;
672
+ font-weight: normal;
673
+ font-family: inherit;
674
+ font-size: inherit;
675
  }
676
  </style>
677
  </head>
 
689
  # Load data
690
  df = load_leaderboard_data("ugi-leaderboard-data.csv")
691
 
692
+ def create_numeric_column(field, width=125, **kwargs):
693
+ base_classes = "ag-left-aligned-cell"
694
+ custom_class = kwargs.get("cellClass", "")
695
+ if isinstance(custom_class, list):
696
+ custom_class = " ".join(custom_class)
697
+ final_cell_class = f"{base_classes} {custom_class}".strip()
698
+ incoming_filter_params = kwargs.pop('filterParams', {})
699
+
700
  column = {
701
  "field": field,
702
  "width": width,
 
705
  "defaultOption": "inRange",
706
  "filterOptions": ['equals', 'notEqual', 'greaterThan', 'greaterThanOrEqual', 'lessThan', 'lessThanOrEqual', 'inRange']
707
  },
708
+ "valueFormatter": {"function": "params.value == null ? '' : String(params.value)"},
709
  "headerClass": "ag-left-aligned-header wrap-text",
710
+ "cellClass": final_cell_class,
711
  "wrapHeaderText": True,
712
  "autoHeaderHeight": True,
713
  "suppressSizeToFit": True,
714
  "sortingOrder": ['desc', 'asc'],
 
 
 
 
 
 
 
 
 
715
  }
716
+ column['filterParams'].update(incoming_filter_params)
717
+ column.update(kwargs)
 
 
 
 
 
 
 
718
  return column
719
+
720
+ def create_text_column(field, width=120, **kwargs):
721
+ base_classes = "ag-left-aligned-cell"
722
+ custom_class = kwargs.get("cellClass", "")
723
+ if isinstance(custom_class, list):
724
+ custom_class = " ".join(custom_class)
725
+ final_cell_class = f"{base_classes} {custom_class}".strip()
726
+ incoming_filter_params = kwargs.pop('filterParams', {})
727
+
728
+ column = {
729
  "field": field,
730
  "width": width,
731
  "filter": "agTextColumnFilter",
 
734
  "filterOptions": ['contains', 'notContains', 'startsWith', 'endsWith']
735
  },
736
  "headerClass": "ag-left-aligned-header wrap-text",
737
+ "cellClass": final_cell_class,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
738
  "wrapHeaderText": True,
739
  "autoHeaderHeight": True,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
740
  }
741
+ column['filterParams'].update(incoming_filter_params)
742
+ column.update(kwargs)
743
+ return column
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
744
 
745
  template_with_split_header = """
746
  <div class="ag-cell-label-container" role="presentation">
 
760
  </div>
761
  """
762
 
763
+ template_with_optimal_value = """
764
+ <div class="ag-cell-label-container" role="presentation">
765
+ <span ref="eMenu" class="ag-header-icon ag-header-cell-menu-button"></span>
766
+ <div ref="eLabel" class="ag-header-cell-label" role="presentation">
767
+
768
+ <!-- This new wrapper div takes the place of the original text span. -->
769
+ <!-- It inherits the expanding behavior, but acts as a container. -->
770
+ <div class="ag-header-cell-text" style="display: flex; align-items: center;">
771
+
772
+ <!-- The ref="eText" span is now INSIDE our container. AG Grid will still populate it with the column name. -->
773
+ <span ref="eText"></span>
774
+
775
+ <!-- The optimal value is its sibling, so it will always stay right next to it. -->
776
+ <span style="color: red; font-weight: normal; padding-left: 5px; white-space: nowrap;">{optimal}</span>
777
+ </div>
778
+ <!-- The icons remain outside the wrapper, positioned correctly to the right. -->
779
+ <span ref="eSortOrder" class="ag-header-icon ag-sort-order"></span>
780
+ <span ref="eSortAsc" class="ag-header-icon ag-sort-ascending-icon"></span>
781
+ <span ref="eSortDesc" class="ag-header-icon ag-sort-descending-icon"></span>
782
+ <span ref="eSortNone" class="ag-header-icon ag-sort-none-icon"></span>
783
+ <span ref="eFilter" class="ag-header-icon ag-filter-icon"></span>
784
+ </div>
785
+ </div>
786
+ """
787
+
788
+ # This master list defines the final, non-negotiable order of columns in the grid.
789
+ MASTER_COLUMN_ORDER = [
790
+ "pinned", "is_new", "R", "Avg Thinking Chars", "#P", "type", "Model_Display",
791
+ # Other Toggles
792
+ "Prompt Template", "Architecture",
793
+ # Uncensored
794
+ "UGI πŸ†", "Hazardous", "Entertainment", "SocPol",
795
+ "W/10 πŸ‘", "W/10-Direct", "W/10-Adherence",
796
+ # Intelligence
797
+ "NatInt πŸ’‘",
798
+ "Standard", "Pop Culture", "World Model",
799
+ 'wm_recipe_percent_error', 'wm_geoguesser_mae', 'wm_weight_percent_error',
800
+ 'wm_music_mae', 'wm_youtube_view_percent_error',
801
+ "Show Rec Score", # Main Score
802
+ "Show Rec MAE", "Show Rec Pearson", "Show Rec Std Dev Error",
803
+ # Writing
804
+ "Writing ✍️",
805
+ "avg_nsfw_score", "avg_dark_score",
806
+ "Readability_Grade_Level", "Verb_to_Noun_Ratio", "Adjective_Adverb_Percentage", "Dialogue_Percentage",
807
+ "lexical_stuckness", "originality_score", "internal_semantic_redundancy",
808
+ "avg_length_error_pct", "creative_writing_wc_exceeded_pct",
809
+ "avg_writing_style_score",
810
+ # Politics
811
+ "Political Lean πŸ“‹",
812
+ "12axes Ideology", "govt", "dipl", "econ", "scty",
813
+ 'Federal-Unitary', 'Democratic-Autocratic', 'Security-Freedom', 'Nationalism-Internationalism',
814
+ 'Militarist-Pacifist', 'Assimilationist-Multiculturalist', 'Collectivize-Privatize',
815
+ 'Planned-LaissezFaire', 'Isolationism-Globalism', 'Irreligious-Religious',
816
+ 'Progressive-Traditional', 'Acceleration-Bioconservative',
817
+ # Always at the end
818
+ "Release Date", "Test Date"
819
+ ]
820
+
821
+ # Master dictionary containing definitions for ALL possible columns
822
+ ALL_COLUMN_DEFS = {
823
+ # --- Always Visible ---
824
+ "pinned": {"headerName": "πŸ“Œ", "field": "pinned", "width": 40, "minWidth": 40, "filter": False, "suppressMenu": True, "cellRenderer": "PinRenderer", "suppressSizeToFit": True, "headerClass": "center-aligned-header"},
825
+ "is_new": {"headerName": "", "field": "is_new", "width": 30, "minWidth": 30, "filter": False, "suppressMenu": True, "suppressSizeToFit": True},
826
+ "R": {"headerName": "R", "field": "Is Thinking Model", "cellRenderer": "ReasoningRenderer", "width": 34, "minWidth": 34, "filter": False, "suppressMenu": True, "sortable": True, "suppressSizeToFit": True, "headerClass": "center-aligned-header"},
827
+ "#P": {"field": "#P", "width": 105, "filter": "agNumberColumnFilter", "filterParams": {"defaultOption": "equals"}, "headerClass": "ag-left-aligned-header wrap-text", "cellClass": "ag-right-aligned-cell", "wrapHeaderText": True, "autoHeaderHeight": True, "suppressSizeToFit": True, "sortingOrder": ['desc', 'asc']},
828
+ "type": {"headerName": "T", "field": "type", "width": 32, "minWidth": 32, "filter": False, "suppressMenu": True, "cellRenderer": "TypeRenderer", "sortable": True, "sortingOrder": ['asc', 'desc'], "suppressSizeToFit": True, "headerClass": "center-aligned-header"},
829
+ "Model_Display": {"field": "Model_Display", "headerName": "Model", "cellRenderer": "ModelLink", "filter": "agTextColumnFilter", "filterParams": {"defaultOption": "contains"}, "width": 395, "suppressMenu": False, "headerClass": "ag-left-aligned-header wrap-text", "wrapHeaderText": True, "autoHeaderHeight": True},
830
+ "Release Date": {"field": "Release Date", "width": 105, "filter": "agDateColumnFilter", "filterParams": {"browserDatePicker": True, "inRangeInclusive": True, "defaultOption": "greaterThan"}, "cellClass": ["ag-left-aligned-cell", "border-left"], "headerClass": "ag-left-aligned-header wrap-text", "wrapHeaderText": True, "autoHeaderHeight": True, "sortable": True},
831
+ "Test Date": {"field": "Test Date", "width": 105, "filter": "agDateColumnFilter", "filterParams": {"browserDatePicker": True, "inRangeInclusive": True, "defaultOption": "greaterThan"}, "cellClass": "ag-left-aligned-cell", "headerClass": "ag-left-aligned-header wrap-text", "wrapHeaderText": True, "autoHeaderHeight": True, "sortable": True},
832
+
833
+ # --- Main Scores (Overview Columns) ---
834
+ "UGI πŸ†": create_numeric_column("UGI πŸ†", headerName="UGI πŸ†", width=120, filterParams={"defaultOption": "greaterThanOrEqual"}),
835
+ "W/10 πŸ‘": create_numeric_column("W/10 πŸ‘", headerName="W/10 πŸ‘", width=130, filterParams={"defaultOption": "greaterThanOrEqual"}),
836
+ "NatInt πŸ’‘": create_numeric_column("NatInt πŸ’‘", headerName="NatInt πŸ’‘", width=140, filterParams={"defaultOption": "greaterThanOrEqual"}),
837
+ "Writing ✍️": create_numeric_column("Writing ✍️", headerName="Writing ✍️", width=135, filterParams={"defaultOption": "greaterThanOrEqual"}),
838
+ "Political Lean πŸ“‹": create_numeric_column("Political Lean πŸ“‹", headerName="Political Lean πŸ“‹", width=135, valueFormatter={"function": "params.value == null ? '' : params.value.toFixed(1) + '%'"}, filterParams={"defaultOption": "inRange"}),
839
+
840
+ # --- UGI Categories ---
841
+ "Hazardous": create_numeric_column("Hazardous", width=120, filterParams={"defaultOption": "greaterThanOrEqual"}),
842
+ "Entertainment": create_numeric_column("Entertainment", width=122, filterParams={"defaultOption": "greaterThanOrEqual"}),
843
+ "SocPol": create_numeric_column("SocPol", width=120, filterParams={"defaultOption": "greaterThanOrEqual"}),
844
+
845
+ # --- W/10 Types ---
846
+ "W/10-Direct": create_numeric_column("W/10-Direct", width=120, filterParams={"defaultOption": "greaterThanOrEqual"}),
847
+ "W/10-Adherence": create_numeric_column("W/10-Adherence", width=120, filterParams={"defaultOption": "greaterThanOrEqual"}),
848
+
849
+ # --- NatInt Categories ---
850
+ "Standard": create_numeric_column("Standard", width=120, cellClass="border-left", filterParams={"defaultOption": "greaterThanOrEqual"}),
851
+ "Pop Culture": create_numeric_column("Pop Culture", width=120, filterParams={"defaultOption": "greaterThanOrEqual"}),
852
+ "World Model": create_numeric_column("World Model", width=120, filterParams={"defaultOption": "greaterThanOrEqual"}),
853
+ 'wm_recipe_percent_error': create_numeric_column('wm_recipe_percent_error', headerName="Cooking (% Error)", width=120, cellClass="border-left", filterParams={"defaultOption": "lessThanOrEqual"}, sortingOrder=['asc', 'desc']),
854
+ 'wm_geoguesser_mae': create_numeric_column('wm_geoguesser_mae', headerName="GeoGuesser (km Error)", width=120, filterParams={"defaultOption": "lessThanOrEqual"}, sortingOrder=['asc', 'desc']),
855
+ 'wm_weight_percent_error': create_numeric_column('wm_weight_percent_error', headerName="Weight (% Error)", width=120, filterParams={"defaultOption": "lessThanOrEqual"}, sortingOrder=['asc', 'desc']),
856
+ 'wm_music_mae': create_numeric_column('wm_music_mae', headerName="Music (Error)", width=120, filterParams={"defaultOption": "lessThanOrEqual"}, sortingOrder=['asc', 'desc']),
857
+ 'wm_youtube_view_percent_error': create_numeric_column('wm_youtube_view_percent_error', headerName="YouTube Views (% Error)", width=126, filterParams={"defaultOption": "lessThanOrEqual"}, sortingOrder=['asc', 'desc']),
858
+ "Show Rec Score": create_numeric_column(
859
+ "Show Rec Score",
860
+ headerName="Show Rec Score",
861
+ width=120,
862
+ filterParams={"defaultOption": "greaterThanOrEqual"},
863
+ valueFormatter={"function": "params.value === -99999 ? '' : String(params.value)"}
864
+ ),
865
+ "Show Rec MAE": create_numeric_column(
866
+ "Show Rec MAE",
867
+ headerName="Show Rec MAE",
868
+ width=120,
869
+ filterParams={"defaultOption": "lessThanOrEqual"},
870
+ sortingOrder=['asc', 'desc'],
871
+ valueFormatter={"function": "params.value === 99999 ? '' : String(params.value)"},
872
+ cellClass="border-left-dashed"
873
+ ),
874
+ "Show Rec Pearson": create_numeric_column(
875
+ "Show Rec Pearson",
876
+ headerName="Show Rec Pearson",
877
+ width=120,
878
+ filterParams={"defaultOption": "greaterThanOrEqual"},
879
+ # Add this formatter to hide the placeholder
880
+ valueFormatter={"function": "params.value === -99999 ? '' : String(params.value)"}
881
+ ),
882
+ "Show Rec Std Dev Error": create_numeric_column(
883
+ "Show Rec Std Dev Error",
884
+ headerName="Show Rec Std Dev Error",
885
+ width=120,
886
+ filterParams={"defaultOption": "lessThanOrEqual"},
887
+ sortingOrder=['asc', 'desc'],
888
+ # Add this formatter to hide the placeholder
889
+ valueFormatter={"function": "params.value === 99999 ? '' : String(params.value)"}
890
+ ),
891
 
892
+ # --- Writing Categories ---
893
+ "avg_nsfw_score": create_numeric_column("avg_nsfw_score", headerComponentParams={"template": template_with_split_header.format(high='NSFW', low='SFW')}, width=105, cellClass="border-left", filterParams={"defaultOption": "greaterThanOrEqual"}),
894
+ "avg_dark_score": create_numeric_column("avg_dark_score", headerComponentParams={"template": template_with_split_header.format(high='Dark', low='Tame')}, width=105, filterParams={"defaultOption": "greaterThanOrEqual"}),
895
+ "Dialogue_Percentage": create_numeric_column("Dialogue_Percentage", headerName="Dialogue %", width=110, filterParams={"defaultOption": "greaterThanOrEqual"}),
896
+ "Verb_to_Noun_Ratio": create_numeric_column("Verb_to_Noun_Ratio", headerName="Verb/Noun Ratio", width=123, filterParams={"defaultOption": "inRange"}),
897
+ "Adjective_Adverb_Percentage": create_numeric_column("Adjective_Adverb_Percentage", headerName="Adj&Adv %", width=115, filterParams={"defaultOption": "inRange"}),
898
+ "Readability_Grade_Level": create_numeric_column("Readability_Grade_Level", headerName="Readability Grade", width=124, cellClass="border-left", filterParams={"defaultOption": "inRange"}, sortingOrder=['desc', 'asc']),
899
+ "avg_writing_style_score": create_numeric_column("avg_writing_style_score", headerName="Style Adherence", width=121, cellClass="border-left", filterParams={"defaultOption": "greaterThanOrEqual"}),
900
+ "avg_length_error_pct": create_numeric_column("avg_length_error_pct", headerName="Length Error %", width=113, cellClass="border-left", filterParams={"defaultOption": "lessThanOrEqual"}, sortingOrder=['asc', 'desc']),
901
+ "creative_writing_wc_exceeded_pct": create_numeric_column("creative_writing_wc_exceeded_pct", headerName="Exceeded %", width=118, filterParams={"defaultOption": "inRange"}),
902
+ "originality_score": create_numeric_column("originality_score", headerName="Originality", width=120, filterParams={"defaultOption": "greaterThanOrEqual"}),
903
+ "internal_semantic_redundancy": create_numeric_column("internal_semantic_redundancy", headerName="Semantic Redundancy", width=125, filterParams={"defaultOption": "lessThanOrEqual"}, sortingOrder=['asc', 'desc']),
904
+ "lexical_stuckness": create_numeric_column("lexical_stuckness", headerName="Lexical Stuckness", width=118, cellClass="border-left", filterParams={"defaultOption": "lessThanOrEqual"}, sortingOrder=['asc', 'desc']),
905
+
906
+ # --- Politics ---
907
+ "12axes Ideology": create_text_column("12axes Ideology", width=170, cellClass="border-left", filterParams={"defaultOption": "contains"}),
908
+ "govt": create_numeric_column("govt", width=105, valueFormatter={"function": "params.value == null ? '' : params.value.toFixed(1) + '%'"}, cellClass="border-left", filterParams={"defaultOption": "inRange"}),
909
+ "dipl": create_numeric_column("dipl", width=105, valueFormatter={"function": "params.value == null ? '' : params.value.toFixed(1) + '%'"}, filterParams={"defaultOption": "inRange"}),
910
+ "econ": create_numeric_column("econ", width=105, valueFormatter={"function": "params.value == null ? '' : params.value.toFixed(1) + '%'"}, filterParams={"defaultOption": "inRange"}),
911
+ "scty": create_numeric_column("scty", width=105, valueFormatter={"function": "params.value == null ? '' : params.value.toFixed(1) + '%'"}, filterParams={"defaultOption": "inRange"}),
912
+ **{
913
+ col: create_numeric_column(
914
+ col,
915
+ headerComponentParams={"template": template_with_split_header.format(high=col.split('-')[0], low=col.split('-')[1])},
916
+ width=175,
917
+ valueFormatter={"function": "params.value == null ? '' : params.value.toFixed(1) + '%'"},
918
+ cellClass="border-left" if i == 0 else "",
919
+ filterParams={"defaultOption": "inRange"}
920
+ ) for i, col in enumerate(COLUMN_GROUPS["politics_axes_group"])
 
 
 
 
 
921
  },
922
+
923
+ # --- Other Toggles ---
924
+ "Prompt Template": create_text_column("Prompt Template", width=160, filterParams={"defaultOption": "contains"}),
925
+ "Architecture": create_text_column("Architecture", width=160, filterParams={"defaultOption": "contains"}),
926
+ "Avg Thinking Chars": create_numeric_column("Avg Thinking Chars", width=120, filterParams={"defaultOption": "greaterThanOrEqual"}, valueFormatter={"function": "params.value === 0 ? '' : params.value"}),
927
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
928
 
929
  # Define the grid options with postSort
930
  dashGridOptions = {
 
938
  "pinnedTopRowData": [],
939
  "suppressMaintainUnsortedOrder": True,
940
  "suppressMultiSort": True,
941
+ # "maintainColumnOrder": True,
942
  "rowBuffer": 10,
943
  "maxBlocksInCache": 2,
944
  "icons": {
945
  "menu": '<i class="fas fa-search" style="color: var(--text-color)"></i>'
946
  },
947
+ "theme": "ag-theme-alpine-dark" if "prefers-color-scheme: dark" else "ag-theme-alpine"
 
 
 
 
 
 
 
 
 
948
  }
949
 
950
+ def get_initial_column_defs():
951
+ """Generates the column definitions for the initial page load."""
952
+ visible_cols = {"pinned", "is_new", "R", "#P", "type", "Model_Display", "Release Date", "Test Date"}
953
+ visible_cols.update(PRESET_COLUMNS['Overview'].keys())
954
+
955
+ primary_sort_col = "UGI πŸ†"
956
+ pinned_cols = ["pinned", "is_new", "R", "Avg Thinking Chars", "#P", "type", "Model_Display"]
957
+
958
+ initial_defs = []
959
+ for col_name in MASTER_COLUMN_ORDER:
960
+ if col_name not in ALL_COLUMN_DEFS:
961
+ continue
962
+
963
+ # --- START OF MODIFICATION ---
964
+ if col_name == "Writing ✍️":
965
+ # Manually create the definition for our test column
966
+ col_def = {
967
+ "field": "Writing ✍️",
968
+ "headerName": "Writing ✍️",
969
+ "width": 135
970
+ }
971
+ else:
972
+ # Use the existing logic for all other columns
973
+ col_def = ALL_COLUMN_DEFS[col_name].copy()
974
+ # --- END OF MODIFICATION ---
975
+
976
+ col_def['hide'] = col_name not in visible_cols
977
+ col_def['pinned'] = 'left' if col_name in pinned_cols else None
978
+
979
+ if col_def.get('field') == primary_sort_col:
980
+ col_def['sort'] = 'desc'
981
+ col_def['sortIndex'] = 0
982
+
983
+ initial_defs.append(col_def)
984
+
985
+ border_cols = {"UGI πŸ†", "NatInt πŸ’‘", "Writing ✍️", "Political Lean πŸ“‹"}
986
+ for col_def in initial_defs:
987
+ if col_def.get('field') in border_cols:
988
+ current_class = col_def.get('cellClass', '')
989
+ if 'border-left' not in current_class:
990
+ col_def['cellClass'] = f"{current_class} border-left".strip()
991
+
992
+ return initial_defs
993
+
994
  # Define the layout
995
  app.layout = html.Div([
996
+ dcc.Location(id='url', refresh=False),
997
  dcc.Store(id='pinned-models-store', data=[]),
998
 
999
  # Header
 
1023
  )
1024
  ], style={'float': 'right'})
1025
  ], style={'overflow': 'hidden', 'marginBottom': '20px', 'padding': '0 20px'}),
1026
+
1027
+ html.H1("πŸ“’ UGI Leaderboard", className="page-title", style={'fontSize': '38px'}),
1028
+ html.H2("Uncensored General Intelligence", className="page-subtitle"),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1029
 
1030
  html.Div([
1031
+ "To filter columns, click the ", html.I(className="fas fa-search"), " icon. On mobile, hold the column name for the menu to appear."
1032
+ ], style={'marginTop': '20px', 'marginBottom': '20px', 'padding': '0 20px'}),
1033
+
1034
+ # --- TOP FILTER SECTION ---
 
 
 
 
1035
  html.Div([
1036
+ # Left side: Model Type
1037
  html.Div([
1038
+ # The label is now a direct child, so it will appear on its own line above the checklists.
1039
+ html.Label("Display Models:", className="model-type-filter"),
1040
+
1041
+ # A new sub-container for the interactive elements, using flexbox for horizontal alignment.
1042
+ html.Div(
1043
+ [
1044
+ # Checklist for the main types
1045
+ dcc.Checklist(
1046
+ id='model-type-filter-main',
1047
+ options=[
1048
+ {'label': html.Span('Base', style={'color': '#71de5f'}), 'value': 'Is Foundation'},
1049
+ {'label': html.Span('Finetune', style={'color': '#f6b10b'}), 'value': 'Is Finetuned'},
1050
+ {'label': html.Span('Merge', style={'color': '#f08aff'}), 'value': 'Is Merged'},
1051
+ {'label': html.Span('Proprietary', style={'color': '#19cdce'}), 'value': 'proprietary'},
1052
+ ],
1053
+ value=['Is Foundation', 'Is Finetuned', 'Is Merged', 'proprietary'],
1054
+ inline=True,
1055
+ labelStyle={'fontWeight': 'normal', 'marginRight': '15px'}
1056
+ ),
1057
+
1058
+ # The visual divider with adjusted margins for balanced spacing.
1059
+ # It has less left margin to compensate for the right margin of "Proprietary".
1060
+ html.Span('|', style={
1061
+ 'marginLeft': '-5px',
1062
+ 'marginRight': '10px',
1063
+ 'color': 'var(--secondary-text)'
1064
+ }),
1065
+
1066
+ # Checklist for the reasoning type
1067
+ dcc.Checklist(
1068
+ id='model-type-filter-reasoning',
1069
+ options=[
1070
+ {'label': html.Span('Reasoning'), 'value': 'Is Thinking Model'}
1071
+ ],
1072
+ value=['Is Thinking Model'],
1073
+ inline=True,
1074
+ ),
1075
  ],
1076
+ style={'display': 'flex', 'alignItems': 'center'} # Flexbox applies only to this line
 
 
 
1077
  )
1078
+ ]),
1079
+ # Right side: Other Options
 
 
 
 
 
 
 
 
 
 
 
 
 
1080
  html.Div([
1081
+ html.Label("Other Options:", className="model-type-filter"),
 
1082
  dcc.Checklist(
1083
+ id='other-toggles-checklist',
1084
+ options=[{'label': label, 'value': col} for col, label in OTHER_TOGGLES.items()] +
1085
+ [{'label': 'NA Models', 'value': 'show_na'}],
 
 
 
1086
  value=[],
1087
  inline=True,
 
1088
  labelStyle={'fontWeight': 'normal', 'marginRight': '15px'}
1089
  )
1090
+ ], style={'textAlign': 'left'}), # Corrected alignment
1091
+ ], style={'display': 'flex', 'flexWrap': 'wrap', 'justifyContent': 'space-between', 'alignItems': 'center', 'padding': '0 20px', 'marginBottom': '20px'}),
1092
+
1093
+
1094
+ # --- HORIZONTAL CONTAINER FOR PRESETS AND CHECKLISTS ---
1095
+ html.Div(
1096
+ [
1097
+ # Create a vertical block for each preset
1098
+ html.Div(
1099
+ [
1100
+ dcc.RadioItems(
1101
+ id=f'{preset.lower()}-selector',
1102
+ className='preset-selector',
1103
+ options=[{'label': preset, 'value': preset}],
1104
+ value='Overview' if preset == 'Overview' else None,
1105
+ inputStyle={"marginRight": "8px"}
1106
+ ),
1107
+ dcc.Checklist(
1108
+ id=f'{preset.lower()}-checklist',
1109
+ className='preset-checklist',
1110
+ options=PRESET_OPTIONS[preset],
1111
+ value=[],
1112
+ labelStyle={'display': 'block', 'marginBottom': '8px', 'fontWeight': 'normal'}
1113
+ ) if preset != "Overview" else None
1114
+ ],
1115
+ className='preset-column',
1116
+ id=f'{preset.lower()}-preset-div'
1117
  )
1118
+ for preset in PRESET_COLUMNS.keys()
1119
+ ],
1120
+ className='preset-container',
1121
+ style={'padding': '0 20px', 'marginBottom': '20px'}
1122
+ ),
1123
+
1124
  # Grid
1125
  html.Div([
1126
  dag.AgGrid(
1127
  id='leaderboard-grid',
 
1128
  rowData=df.to_dict('records'),
1129
+ columnDefs=get_initial_column_defs(),
1130
  defaultColDef={
1131
+ "sortable": True, "resizable": True, "filter": True, "floatingFilter": False,
1132
+ "suppressMovable": True, # This disables column dragging for all columns
 
 
1133
  "sortingOrder": ['desc', 'asc'],
1134
  "filterParams": {
1135
  "defaultOption": "between"
1136
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1137
  },
1138
  dashGridOptions=dashGridOptions,
1139
  dangerously_allow_code=True,
 
1144
  )
1145
  ], style={'marginBottom': '30px'}),
1146
 
1147
+ # html.Div([
1148
+ # html.H4("Debug Information"),
1149
+ # html.Pre(id='debug-output', style={'border': '1px solid #ccc', 'padding': '10px', 'whiteSpace': 'pre-wrap', 'maxHeight': '400px', 'overflowY': 'auto'})
1150
+ # ]),
1151
+
1152
  # Description
1153
  html.Div([
1154
+ html.H3("About the Benchmarks", style={'fontSize': '22px', 'marginBottom': '10px'}),
1155
+ html.P(
1156
+ "To ensure a fair evaluation, all test questions are kept private. This prevents models from being specifically trained on the benchmark itself."
1157
+ ),
1158
+
1159
+ # --- Uncensored Section ---
1160
+ html.P([html.Strong("UGI πŸ†"), ": Uncensored General Intelligence"], style={'marginTop': '20px', 'fontSize': '1.2em'}),
1161
+ html.P("Measures a model's knowledge of sensitive topics and its ability to follow instructions when faced with controversial prompts."),
1162
  html.Details([
1163
+ html.Summary("UGI Metrics", style={'fontWeight': 'normal', 'fontSize': '1em', 'marginLeft': '20px', 'cursor': 'pointer'}),
 
 
 
 
 
 
1164
  html.Ul([
1165
+ html.Li([html.Strong("Categories:")]),
1166
+ html.Ul([
1167
+ html.Li([html.Strong("Hazardous:"), " Knowledge of topics that LLMs probably shouldn't assist with."]),
1168
+ html.Li([html.Strong("Entertainment:"), " Knowledge of adult or controversial entertainment and media."]),
1169
+ html.Li([html.Strong("SocPol:"), " Knowledge of sensitive socio-political topics."]),
1170
+ ], style={'listStyleType': 'circle', 'marginLeft': '20px'}),
1171
+ html.Li([html.Strong("W/10 πŸ‘ (Willingness/10):"), " A component of the UGI score that measures how far a model can be pushed before it refuses to answer or deviates from instructions."]),
1172
+ html.Ul([
1173
+ html.Li([html.Strong("W/10-Direct:"), " Measures if the model directly refuses to respond to certain prompts."]),
1174
+ html.Li([html.Strong("W/10-Adherence:"), " Measures if a model deviates from instructions, which can be a form of refusal or a sign of lower intelligence."]),
1175
+ ], style={'listStyleType': 'circle', 'marginLeft': '20px'}),
1176
+ ], style={'marginTop': '5px', 'marginLeft': '40px'})
1177
+ ], open=True),
1178
+
1179
+ # --- Intelligence Section ---
1180
+ html.P([html.Strong("NatInt πŸ’‘"), ": Natural Intelligence"], style={'marginTop': '20px', 'fontSize': '1.2em'}),
1181
+ html.P("Measures a model's general knowledge and reasoning capabilities across a range of standard and specialized domains."),
1182
  html.Details([
1183
+ html.Summary("Intelligence Metrics", style={'fontWeight': 'normal', 'fontSize': '1em', 'marginLeft': '20px', 'cursor': 'pointer'}),
 
 
 
 
 
 
1184
  html.Ul([
1185
+ html.Li([html.Strong("Standard:"), " Measures knowledge of standard, factual information like dates, statistics, math, and logic."]),
1186
+ html.Li([html.Strong("Pop Culture:"), " Knowledge of specific details from things like video games, movies, music, and internet culture."]),
1187
+ html.Li([html.Strong("World Model:"), " Tasks that test a model's understanding of real-world properties and patterns."]),
1188
+ html.Ul([
1189
+ html.Li([html.Strong("Cooking (% Error):"), " Predicts needed ingredient amounts for recipes."]),
1190
+ html.Li([html.Strong("GeoGuesser (km Error):"), " Identifies a location based on a description of its surroundings."]),
1191
+ html.Li([html.Strong("Weight (% Error):"), " Estimates the weight of various objects based on their description."]),
1192
+ html.Li([html.Strong("Music (Error):"), " Predicts a song's musical attributes (like bpm and loudness) based on its lyrics."]),
1193
+ html.Li([html.Strong("YouTube Views (% Error):"), " Estimates a video's view count from its title, given examples from the same channel."]),
1194
+ html.Li([html.Strong("Show Recommendation Score:"), " A model's ability to predict what rating out of ten a person will rate a TV show based on their previous ratings."]),
1195
+ html.Ul([
1196
+ html.Li([html.Strong("Show Rec MAE:"), " The mean absolute error between the model's predicted ratings and the user's true ratings."]),
1197
+ html.Li([html.Strong("Show Rec Pearson:"), " Measures how well the model's predictions trend with the user's true ratings."]),
1198
+ html.Li([html.Strong("Show Rec Std Dev Error:"), " The absolute difference between the spread of the model's predictions and the spread of the true ratings."]),
1199
+ ], style={'listStyleType': 'circle', 'marginLeft': '20px'}),
1200
+ ], style={'listStyleType': 'circle', 'marginLeft': '20px'}),
1201
+ ], style={'marginTop': '5px', 'marginLeft': '40px'})
1202
+ ], open=True),
1203
+
1204
+ # --- Writing Section ---
1205
+ html.P([html.Strong("Writing ✍️")], style={'marginTop': '20px', 'fontSize': '1.2em'}),
1206
+ html.P("A score of a model's writing ability, factoring in intelligence, writing style, amount of repetition, and adherence to requested output length. The score attempts to match the average person's preferences. Optimal values are displayed in parentheses in the column headers for the metrics used in the formula (e.g., 'Readability Grade (~5.5)'). These values were estimated using human feedback through model preference."),
1207
+ html.Details([
1208
+ html.Summary("Writing Metrics", style={'fontWeight': 'normal', 'fontSize': '1em', 'marginLeft': '20px', 'cursor': 'pointer'}),
1209
+ html.Ul([
1210
+ html.Li([html.Strong("NSFW/Dark Scores:"), " Measures the tonal direction a model takes when writing stories, from SFW to explicit (NSFW) and from lighthearted to violent/tragic (Dark)."]),
1211
+ html.Li([html.Strong("Stylistic Metrics:")]),
1212
+ html.Ul([
1213
+ html.Li([html.Strong("Readability Grade:"), " The estimated US school grade level needed to understand the text."]),
1214
+ html.Li([html.Strong("Verb/Noun Ratio:"), " The ratio of action words (verbs) to naming words (nouns)."]),
1215
+ html.Li([html.Strong("Adj&Adv %:"), " The percentage of descriptive words (adjectives and adverbs) out of total words."]),
1216
+ html.Li([html.Strong("Dialogue %:"), " The percentage of sentences in the model's response that is dialogue when writing stories."]),
1217
+ ], style={'listStyleType': 'circle', 'marginLeft': '20px'}),
1218
+ html.Li([html.Strong("Repetition Metrics:")]),
1219
+ html.Ul([
1220
+ html.Li([html.Strong("Lexical Stuckness:"), " Measures if the model gets 'stuck' using a limited vocabulary in parts of its writing."]),
1221
+ html.Li([html.Strong("Originality:"), " Measures how unique each story is by comparing its word usage and themes against all other stories generated by the same model for different prompts."]),
1222
+ html.Li([html.Strong("Semantic Redundancy:"), " Detects when the same concept is expressed multiple times with different wording."]),
1223
+ ], style={'listStyleType': 'circle', 'marginLeft': '20px'}),
1224
+ html.Li([html.Strong("Length Adherence:")]),
1225
+ html.Ul([
1226
+ html.Li([html.Strong("Length Error %:"), " The average percentage difference between a user-requested word count and the generated word count."]),
1227
+ html.Li([html.Strong("Exceeded %:"), " The percentage of times the model responds with more words than requested."]),
1228
+ ], style={'listStyleType': 'circle', 'marginLeft': '20px'}),
1229
+ html.Li([html.Strong("Style Adherence:"), " How closely the model is able to match the writing style of a given example."]),
1230
+ ], style={'marginTop': '5px', 'marginLeft': '40px'})
1231
+ ], open=True),
1232
+
1233
+ # --- Politics Section ---
1234
+ html.P([html.Strong("Political Lean πŸ“‹")], style={'marginTop': '20px', 'fontSize': '1.2em'}),
1235
+ html.Details([
1236
+ html.Summary("Political Metrics", style={'fontWeight': 'normal', 'fontSize': '1em', 'marginLeft': '20px', 'cursor': 'pointer'}),
1237
+ html.Ul([
1238
+ html.Li([html.Strong("Political Lean πŸ“‹:"), " Measures a model's political alignment based on its responses to the ", html.A("12axes", href="https://politicaltests.github.io/12axes/", target="_blank", style={'color': 'var(--link-color)'}), " test. The Political Lean metric uses a simplified version with the Assimilationist-Multiculturalist, Collectivize-Privatize, and Progressive-Traditional axes. The score ranges from -100% (Left) to 100% (Right)."]),
1239
+ html.Li([html.Strong("12axes Ideology:"), " The closest matching political ideology from the 12axes test."]),
1240
+ html.Li([html.Strong("Aggregate Scores:")]),
1241
+ html.Ul([
1242
+ html.Li("Govt: Higher = State authority, Lower = Individual liberty"),
1243
+ html.Li("Dipl: Higher = Global outlook, Lower = National interests"),
1244
+ html.Li("Econ: Higher = Economic equality, Lower = Market freedom"),
1245
+ html.Li("Scty: Higher = Progressive values, Lower = Traditional values")
1246
+ ], style={'listStyleType': 'circle', 'marginLeft': '20px'}),
1247
+ ], style={'marginTop': '5px', 'marginLeft': '40px'})
1248
+ ], open=True),
1249
+ html.Details([
1250
+ html.Summary("12axes Ideology Descriptions", style={'fontWeight': 'normal', 'fontSize': '1em', 'marginLeft': '20px', 'cursor': 'pointer', 'marginTop': '10px'}),
1251
+ html.Div([
1252
+ html.I("Only showing ideologies at least one model has.", className='ideology-note', style={'fontSize': '0.9em'}),
1253
+ dcc.Markdown("\n\n".join([
1254
+ f"**{ideology}**: {IDEOLOGY_DESCRIPTIONS.get(ideology, 'No description available.')}"
1255
+ for ideology in sorted(set(df['12axes Ideology'].dropna()))
1256
+ if ideology
1257
+ ]), className='markdown-content'),
1258
+ html.Div([
1259
+ html.A("Source", href="https://github.com/politicaltests/politicaltests.github.io/blob/main/12axes/ideologies.js", target="_blank", className="source-link")
1260
+ ], style={'marginTop': '20px'})
1261
+ ], style={'paddingTop': '10px', 'marginLeft': '40px'})
1262
  ]),
1263
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1264
  ], style={
1265
  'maxWidth': '1200px',
1266
  'margin': '0 auto',
1267
  'padding': '0 20px',
1268
+ 'color': 'var(--text-color)',
1269
+ 'marginBottom': '80px'
1270
  }),
1271
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1272
  ], style={'maxWidth': '100%', 'margin': '0 auto'})
1273
 
1274
+ OVERVIEW_MAPPING = {
1275
+ "Uncensored": ["UGI πŸ†", "W/10 πŸ‘"],
1276
+ "Intelligence": ["NatInt πŸ’‘"],
1277
+ "Writing": ["Writing ✍️"],
1278
+ "Politics": ["Political Lean πŸ“‹"]
1279
+ }
1280
+
1281
  @app.callback(
1282
+ [Output(f'{p.lower()}-checklist', 'value') for p in PRESET_COLUMNS.keys() if p != "Overview"] +
1283
+ [Output(f'{p.lower()}-selector', 'value') for p in PRESET_COLUMNS.keys()],
1284
+ [Input(f'{p.lower()}-selector', 'value') for p in PRESET_COLUMNS.keys()],
 
 
 
1285
  prevent_initial_call=False
1286
  )
1287
+ def sync_presets_and_checklists(*selector_values):
1288
+ ctx = dash.callback_context
1289
+ if not ctx.triggered_id:
1290
+ selected_preset = "Overview"
1291
+ else:
1292
+ triggering_id_root = ctx.triggered_id.split('.')[0]
1293
+ selected_preset = triggering_id_root.replace('-selector', '').capitalize()
 
 
 
 
 
 
 
 
 
 
1294
 
1295
+ checklist_outputs = {p: [] for p in PRESET_COLUMNS.keys() if p != "Overview"}
1296
+ selector_outputs = {p: None for p in PRESET_COLUMNS.keys()}
1297
+
1298
+ if selected_preset == "Overview":
1299
+ for preset, cols in OVERVIEW_MAPPING.items():
1300
+ checklist_outputs[preset] = cols
1301
+ # Simplified this logic since the special case is gone.
1302
+ elif selected_preset == "Intelligence":
1303
+ checklist_outputs["Intelligence"] = list(PRESET_COLUMNS["Intelligence"].keys())
1304
+ elif selected_preset == "Writing":
1305
+ checklist_outputs["Writing"] = list(PRESET_COLUMNS["Writing"].keys())
1306
+ checklist_outputs["Intelligence"] = ["NatInt πŸ’‘"]
1307
+ elif selected_preset in checklist_outputs:
1308
+ checklist_outputs[selected_preset] = list(PRESET_COLUMNS[selected_preset].keys())
1309
+
1310
+ selector_outputs[selected_preset] = selected_preset
 
 
 
 
 
 
 
 
 
 
 
 
1311
 
1312
+ final_checklist_values = [checklist_outputs[p] for p in PRESET_COLUMNS.keys() if p != "Overview"]
1313
+ final_selector_values = [selector_outputs[p] for p in PRESET_COLUMNS.keys()]
1314
 
1315
+ return final_checklist_values + final_selector_values
1316
 
1317
  @app.callback(
1318
+ Output('leaderboard-grid', 'columnDefs', allow_duplicate=True),
1319
+ [Input(f'{p.lower()}-checklist', 'value') for p in PRESET_COLUMNS.keys() if p != "Overview"] +
1320
+ [Input('other-toggles-checklist', 'value')] +
1321
+ [Input(f'{p.lower()}-selector', 'value') for p in PRESET_COLUMNS.keys()],
1322
+ prevent_initial_call=True
1323
  )
1324
+ def update_columns_and_sort(uncensored_cols, intelligence_cols, writing_cols, politics_cols, other_toggles, *selector_values):
1325
+ ctx = dash.callback_context
 
 
 
 
 
 
 
 
 
 
 
1326
 
1327
+ apply_default_sort = False
1328
+ if ctx.triggered_id and ctx.triggered_id.endswith('-selector'):
1329
+ apply_default_sort = True
1330
+
1331
+ active_preset = 'Overview'
1332
+ for i, preset_name in enumerate(PRESET_COLUMNS.keys()):
1333
+ if selector_values[i] == preset_name:
1334
+ active_preset = preset_name
1335
+ break
1336
+
1337
+ all_selections = set(uncensored_cols + intelligence_cols + writing_cols + politics_cols + other_toggles)
1338
+
1339
+ expanded_selections = set()
1340
+ for item in all_selections:
1341
+ if item in COLUMN_GROUPS:
1342
+ expanded_selections.update(COLUMN_GROUPS[item])
1343
+ else:
1344
+ expanded_selections.add(item)
1345
+
1346
+ visible_cols = {"pinned", "is_new", "R", "#P", "type", "Model_Display", "Release Date", "Test Date"}
1347
+ visible_cols.update(expanded_selections)
1348
 
1349
+ sort_map = {
1350
+ "Overview": "UGI πŸ†",
1351
+ "Uncensored": "UGI πŸ†",
1352
+ "Intelligence": "NatInt πŸ’‘",
1353
+ "Writing": "Writing ✍️",
1354
+ "Politics": None
1355
+ }
1356
+ primary_sort_col = sort_map.get(active_preset)
1357
+ pinned_cols = ["pinned", "is_new", "R", "Avg Thinking Chars", "#P", "type", "Model_Display"]
1358
+
1359
+ # --- FINAL CORRECTED LOGIC ---
1360
 
1361
+ final_defs = []
1362
+ for col_name in MASTER_COLUMN_ORDER:
1363
+ if col_name not in ALL_COLUMN_DEFS:
1364
+ continue
1365
+
1366
+ col_def = ALL_COLUMN_DEFS[col_name].copy()
1367
+
1368
+ # THIS IS THE LINE THAT HAS BEEN REMOVED.
1369
+ # col_def.pop('headerComponentParams', None)
1370
+
1371
+ col_def['hide'] = col_name not in visible_cols
1372
+ col_def['pinned'] = 'left' if col_name in pinned_cols else None
1373
+
1374
+ if apply_default_sort:
1375
+ if col_def.get('field') == primary_sort_col:
1376
+ col_def['sort'] = 'desc'
1377
+ col_def['sortIndex'] = 0
1378
+ else:
1379
+ col_def['sort'] = None
1380
+ col_def['sortIndex'] = None
1381
+
1382
+ final_defs.append(col_def)
1383
+
1384
+ if active_preset == 'Writing':
1385
+ natint_col_def = next((col for col in final_defs if col.get('field') == 'NatInt πŸ’‘'), None)
1386
+
1387
+ if natint_col_def:
1388
+ temp_defs = [col for col in final_defs if col.get('field') != 'NatInt πŸ’‘']
1389
+ try:
1390
+ insert_index = next(i for i, col in enumerate(temp_defs) if col.get('field') == 'Writing ✍️') + 1
1391
+ temp_defs.insert(insert_index, natint_col_def)
1392
+ final_defs = temp_defs
1393
+ except StopIteration:
1394
+ pass
1395
+
1396
+ # --- Logic for adding optimal values via CSS classes (unchanged and correct) ---
1397
+ WRITING_OPTIMAL_CLASSES = {
1398
+ "avg_length_error_pct": "header-optimal-len-err",
1399
+ "NatInt πŸ’‘": "header-optimal-natint",
1400
+ "originality_score": "header-optimal-orig",
1401
+ "internal_semantic_redundancy": "header-optimal-sem-red",
1402
+ "lexical_stuckness": "header-optimal-lex-stuck",
1403
+ "Adjective_Adverb_Percentage": "header-optimal-adj-adv",
1404
+ "Readability_Grade_Level": "header-optimal-read-grade",
1405
+ "Dialogue_Percentage": "header-optimal-dialogue"
1406
+ }
1407
+
1408
+ for col_def in final_defs:
1409
+ # Clear any previous optimal classes first
1410
+ current_classes = col_def.get('headerClass', '').split()
1411
+ cleaned_classes = [c for c in current_classes if not c.startswith('header-optimal-')]
1412
+ col_def['headerClass'] = ' '.join(cleaned_classes)
1413
+
1414
+ if active_preset == 'Writing':
1415
+ field = col_def.get('field')
1416
+ if field in WRITING_OPTIMAL_CLASSES:
1417
+ class_to_add = WRITING_OPTIMAL_CLASSES[field]
1418
+ current_classes = col_def.get('headerClass', '').split()
1419
+ if class_to_add not in current_classes:
1420
+ current_classes.append(class_to_add)
1421
+ col_def['headerClass'] = ' '.join(current_classes)
1422
+
1423
+ # --- Border logic (unchanged) ---
1424
+ border_cols = set()
1425
+ if active_preset == 'Overview':
1426
+ border_cols = {"UGI πŸ†", "NatInt πŸ’‘", "Writing ✍️", "Political Lean πŸ“‹"}
1427
+ elif active_preset == 'Uncensored':
1428
+ border_cols = {"UGI πŸ†", "W/10 πŸ‘"}
1429
+ elif active_preset == 'Intelligence':
1430
+ border_cols = {"NatInt πŸ’‘"}
1431
+ elif active_preset == 'Writing':
1432
+ border_cols = {"Writing ✍️", "NatInt πŸ’‘"}
1433
+ else:
1434
+ main_score_columns = ["UGI πŸ†", "W/10 πŸ‘", "NatInt πŸ’‘", "Writing ✍️", "Political Lean πŸ“‹"]
1435
+ for col_def in final_defs:
1436
+ if not col_def.get('hide', True) and col_def.get('field') in main_score_columns:
1437
+ border_cols.add(col_def.get('field'))
1438
+ break
1439
+
1440
+ for col_def in final_defs:
1441
+ if col_def.get('field') in border_cols:
1442
+ current_class = col_def.get('cellClass', '')
1443
+ if 'border-left' not in current_class:
1444
+ col_def['cellClass'] = f"{current_class} border-left".strip()
1445
+
1446
+ return final_defs
1447
 
1448
+ @app.callback(
1449
+ Output('leaderboard-grid', 'rowData'),
1450
+ [Input(f'{p.lower()}-selector', 'value') for p in PRESET_COLUMNS.keys()] +
1451
+ [Input(f'{p.lower()}-checklist', 'value') for p in PRESET_COLUMNS.keys() if p != "Overview"] +
1452
+ [
1453
+ Input('model-type-filter-main', 'value'),
1454
+ Input('model-type-filter-reasoning', 'value'),
1455
+ Input('other-toggles-checklist', 'value')
1456
+ ]
1457
+ )
1458
+ def update_grid_rows(*args):
1459
+ # 1. Unpack arguments
1460
+ num_presets = len(PRESET_COLUMNS)
1461
+ num_checklists = num_presets - 1
1462
+
1463
+ selector_values = args[:num_presets]
1464
+ checklist_values = args[num_presets : num_presets + num_checklists]
1465
+ main_types = args[num_presets + num_checklists]
1466
+ reasoning_type = args[num_presets + num_checklists + 1]
1467
+ other_toggles = args[num_presets + num_checklists + 2]
1468
+
1469
+ uncensored_cols, intelligence_cols, writing_cols, politics_cols = checklist_values
1470
+
1471
+ # 2. Basic setup
1472
+ selected_types = main_types + reasoning_type
1473
+ show_na_filter = 'show_na' in other_toggles
1474
+ filtered_df = df.copy()
1475
+
1476
+ # 3. Model Type Filtering (unchanged)
1477
+ categories = {
1478
+ 'Is Foundation': (filtered_df['Is Foundation'] & ~filtered_df['Is Merged'] & pd.notna(filtered_df['Total Parameters'])),
1479
+ 'Is Finetuned': (filtered_df['Is Finetuned'] & ~filtered_df['Is Merged']),
1480
+ 'Is Merged': filtered_df['Is Merged'],
1481
+ 'proprietary': pd.isna(filtered_df['Total Parameters']),
1482
+ 'Is Thinking Model': filtered_df['Is Thinking Model']
1483
+ }
1484
+ final_mask = pd.Series(True, index=filtered_df.index)
1485
+ for category_value, condition_mask in categories.items():
1486
+ if category_value not in selected_types:
1487
+ final_mask &= ~condition_mask
1488
+ filtered_df = filtered_df[final_mask]
1489
+
1490
+ # 4. Determine active preset
1491
+ active_preset = None
1492
+ for i, preset_name in enumerate(PRESET_COLUMNS.keys()):
1493
+ if selector_values[i] == preset_name:
1494
+ active_preset = preset_name
1495
+ break
1496
+
1497
+ # 5. Apply preset-specific filtering for Writing preset
1498
+ if active_preset == 'Writing' and not show_na_filter:
1499
+ filtered_df.dropna(subset=['Writing ✍️'], inplace=True)
1500
+
1501
+ # 6. Apply context-aware "NA Models" filter
1502
+ if show_na_filter:
1503
+ all_selections = set(uncensored_cols + intelligence_cols + writing_cols + politics_cols)
1504
+ if active_preset == 'Overview':
1505
+ all_selections.update(PRESET_COLUMNS['Overview'].keys())
1506
+
1507
+ is_writing_visible = 'Writing ✍️' in all_selections
1508
+ is_politics_visible = 'Political Lean πŸ“‹' in all_selections
1509
+ # Now correctly checks if the "World Model Tests" checkbox is ticked
1510
+ is_pred_reasoning_visible = 'world_model_group' in intelligence_cols
1511
+
1512
+ na_conditions = []
1513
+ if is_writing_visible:
1514
+ na_conditions.append(filtered_df['Writing ✍️'].isna())
1515
+ if is_pred_reasoning_visible:
1516
+ na_conditions.append(filtered_df['Show Rec Score'] == -99999)
1517
+ if is_politics_visible:
1518
+ na_conditions.append(filtered_df['Political Lean πŸ“‹'].isna())
1519
+
1520
+ if na_conditions:
1521
+ final_na_mask = pd.Series(False, index=filtered_df.index)
1522
+ for condition in na_conditions:
1523
+ final_na_mask |= condition
1524
+ filtered_df = filtered_df[final_na_mask]
1525
+ else:
1526
+ filtered_df = filtered_df.iloc[0:0]
1527
+
1528
+ return filtered_df.to_dict('records')
1529
 
1530
+ app.clientside_callback(
1531
+ """
1532
+ function(_, columnDefs) {
1533
+ // This function runs once on page load to set the initial pinning based on screen size.
1534
+ const isMobile = window.innerWidth < 800;
1535
+ if (!isMobile || !columnDefs) {
1536
+ // On desktop, or if defs are not ready, do nothing.
1537
+ return dash_clientside.no_update;
1538
+ }
1539
+
1540
+ // On mobile, create a new set of definitions with all pinning removed.
1541
+ const newDefs = columnDefs.map(col => {
1542
+ const newCol = Object.assign({}, col);
1543
+ newCol.pinned = null; // Un-pin the column
1544
+ return newCol;
1545
+ });
1546
+ return newDefs;
1547
+ }
1548
+ """,
1549
+ Output('leaderboard-grid', 'columnDefs', allow_duplicate=True),
1550
+ Input('url', 'pathname'),
1551
+ State('leaderboard-grid', 'columnDefs'),
1552
+ prevent_initial_call=True
1553
+ )
1554
 
1555
 
1556
  if __name__ == '__main__':
1557
+ app.run_server(host='0.0.0.0', port=8050)