| guardset |
general-safety-education-binary |
Collection |
0.9672 |
0.9835 |
0.9806 |
| guardset |
response-safety-cyber-binary |
Collection |
0.9623 |
0.9681 |
0.9607 |
| guardset |
prompt-response-safety-binary |
Collection |
0.9514 |
0.9783 |
0.9595 |
| guardset |
prompt-safety-finance-binary |
Collection |
0.9939 |
1.0000 |
1.0000 |
| guardset |
general-safety-hr-binary |
Collection |
0.9643 |
0.9868 |
0.9865 |
| guardset |
prompt-safety-law-binary |
Collection |
0.9783 |
1.0000 |
0.9890 |
| guardset |
prompt-safety-binary |
Collection |
0.9564 |
0.9731 |
0.9676 |
| guardset |
response-safety-law-binary |
Collection |
0.9344 |
0.9344 |
0.9194 |
| guardset |
response-safety-binary |
Collection |
0.9338 |
0.9484 |
0.9279 |
| guardset |
prompt-safety-cyber-binary |
Collection |
0.9540 |
0.9649 |
0.9558 |
| guardset |
response-safety-finance-binary |
Collection |
0.9350 |
0.9650 |
0.9381 |
| guardset |
general-safety-social-media-binary |
Collection |
0.9484 |
0.9793 |
0.9690 |
| harmfulness-mix |
prompt-harmfulness-binary |
Collection |
0.9533 |
0.9579 |
0.9558 |
| in-the-wild |
prompt-jailbreak-binary |
Collection |
0.9535 |
0.9535 |
0.9240 |
| intel |
general-politeness-binary |
Collection |
0.9843 |
0.9858 |
0.9831 |
| intel |
general-politeness-multiclass |
Collection |
0.9951 |
0.9992 |
0.9992 |
| jigsaw |
prompt-toxicity-binary |
Collection |
0.9531 |
0.9531 |
0.9111 |
| moderation |
prompt-hate-speech-binary |
Collection |
0.9141 |
0.9207 |
0.8826 |
| moderation |
prompt-harmfulness-binary |
Collection |
0.8543 |
0.8627 |
0.8457 |
| moderation |
prompt-sexual-content-binary |
Collection |
0.9256 |
0.9382 |
0.9153 |
| moderation |
prompt-harassment-binary |
Collection |
0.8788 |
0.8895 |
0.8660 |
| moderation |
prompt-self-harm-binary |
Collection |
0.8929 |
0.9667 |
0.9375 |
| moderation |
prompt-violence-binary |
Collection |
0.9017 |
0.9077 |
0.8821 |
| moderation |
prompt-harmfulness-multilabel |
Collection |
0.6185 |
0.6909 |
0.6547 |
| nvidia-aegis |
prompt-response-safety-binary |
Collection |
0.8254 |
0.8300 |
0.7878 |
| nvidia-aegis |
prompt-safety-binary |
Collection |
0.8770 |
0.8945 |
0.8688 |
| nvidia-aegis |
response-safety-binary |
Collection |
0.8631 |
0.8736 |
0.7560 |
| polyguard |
prompt-safety-multilabel |
Collection |
0.6619 |
0.7955 |
0.7955 |
| polyguard |
response-refusal-binary |
Collection |
0.9486 |
0.9567 |
0.9510 |
| polyguard |
prompt-safety-binary |
Collection |
0.9740 |
0.9741 |
0.9609 |
| polyguard |
response-safety-binary |
Collection |
0.9535 |
0.9535 |
0.9447 |
| sok |
prompt-jailbreak-binary |
Collection |
0.9862 |
0.9901 |
0.9901 |
| toxic-chat |
prompt-toxicity-binary |
Collection |
0.8919 |
0.9091 |
0.8527 |