jiaqiz commited on 22 days ago

Commit

6239888

0 Parent(s):

upload

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +36 -0
LICENSE +14 -0
NOTICE +5 -0
README.md +216 -0
added_tokens.json +28 -0
bias.md +4 -0
chat_template.jinja +148 -0
config.json +38 -0
explainability.md +14 -0
generation_config.json +13 -0
merges.txt +0 -0
model-00001-of-00095.safetensors +3 -0
model-00002-of-00095.safetensors +3 -0
model-00003-of-00095.safetensors +3 -0
model-00004-of-00095.safetensors +3 -0
model-00005-of-00095.safetensors +3 -0
model-00006-of-00095.safetensors +3 -0
model-00007-of-00095.safetensors +3 -0
model-00008-of-00095.safetensors +3 -0
model-00009-of-00095.safetensors +3 -0
model-00010-of-00095.safetensors +3 -0
model-00011-of-00095.safetensors +3 -0
model-00012-of-00095.safetensors +3 -0
model-00013-of-00095.safetensors +3 -0
model-00014-of-00095.safetensors +3 -0
model-00015-of-00095.safetensors +3 -0
model-00016-of-00095.safetensors +3 -0
model-00017-of-00095.safetensors +3 -0
model-00018-of-00095.safetensors +3 -0
model-00019-of-00095.safetensors +3 -0
model-00020-of-00095.safetensors +3 -0
model-00021-of-00095.safetensors +3 -0
model-00022-of-00095.safetensors +3 -0
model-00023-of-00095.safetensors +3 -0
model-00024-of-00095.safetensors +3 -0
model-00025-of-00095.safetensors +3 -0
model-00026-of-00095.safetensors +3 -0
model-00027-of-00095.safetensors +3 -0
model-00028-of-00095.safetensors +3 -0
model-00029-of-00095.safetensors +3 -0
model-00030-of-00095.safetensors +3 -0
model-00031-of-00095.safetensors +3 -0
model-00032-of-00095.safetensors +3 -0
model-00033-of-00095.safetensors +3 -0
model-00034-of-00095.safetensors +3 -0
model-00035-of-00095.safetensors +3 -0
model-00036-of-00095.safetensors +3 -0
model-00037-of-00095.safetensors +3 -0
model-00038-of-00095.safetensors +3 -0
model-00039-of-00095.safetensors +3 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,36 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text

LICENSE ADDED Viewed

	@@ -0,0 +1,14 @@

+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

NOTICE ADDED Viewed

	@@ -0,0 +1,5 @@

+This product includes a model derived from:
+Qwen3-235B-A22B-Thinking-2507
+Copyright 2025 Alibaba Cloud
+Licensed under the Apache License, Version 2.0

README.md ADDED Viewed

	@@ -0,0 +1,216 @@

+---
+license: apache-2.0
+inference: false
+fine-tuning: false
+language:
+- en
+tags:
+  - nvidia
+  - qwen3
+base_model: Qwen/Qwen3-235B-A22B-Thinking-2507
+library_name: transformers
+---
+# Model Overview
+## Description:
+Qwen3-Nemotron-235B-A22B-GenRM is a Generative Reward Model (GenRM) that leverages Qwen3-235B-A22B-Thinking-2507 as the foundation and is fine-tuned to evaluate the quality of assistant's responses.
+Given a conversation history, a new user request, and two candidate assistant responses, it produces an individual helpfulness score for each response and a ranking score.
+This GenRM is used in the Reinforcement Learning from Human Feedback training of [NVIDIA-Nemotron-3-Nano model](https://huggingface.co/nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16).
+The exact training scheme will be further detailed in an upcoming technical report.
+This model is ready for commercial/non-commercial use.
+## License/Terms of Use:
+The model is licensed with [Apache 2.0](LICENSE).
+### Deployment Geography
+Global
+## Release Date:
+HuggingFace 2025-12-15 via https://huggingface.co/nvidia/Qwen3-Nemotron-235B-A22B-GenRM
+## References:
+* [HelpSteer3-Preference](https://arxiv.org/abs/2505.11475)
+* Technical report (to be released soon)
+## RM-Bench
+| Chat | Math | Code | Safety | Easy | Normal | Hard | Overall |
+|:------|:------|:------|:------|:------|:------|:------|:------|
+| 76.5 | 96.9 | 81.4 | 94.4 | 94.0 | 90.5 | 77.4 | 87.3 |
+## JudgeBench
+|  Knowledge| Reasoning | Math | Code | Overall  |
+|:------|:------|:------|:------|:------|
+| 78.6 | 95.9 | 91.1 | 95.2 | 87.4 |
+## Model Architecture:
+**Architecture Type:** Transformer <br>
+**Network Architecture:** Qwen3 <br>
+We developed this model using [Qwen/Qwen3-235B-A22B-Thinking-2507](https://huggingface.co/Qwen/Qwen3-235B-A22B-Thinking-2507) as its foundation. This model contains 235 billion parameters.
+## Input:
+**Input Type(s):** Text <br>
+**Input Format:** String <br>
+**Input Parameters:** One Dimensional (1D) <br>
+**Other Properties Related to Input:** Max of 128k tokens <br>
+## Output:
+**Output Type(s):** Text <br>
+**Output Format:** String <br>
+**Output Parameters:** One-Dimensional (1D) <br>
+Our AI models are designed and/or optimized to run on NVIDIA GPU-accelerated systems. By leveraging NVIDIA’s hardware (e.g. GPU cores) and software frameworks (e.g., CUDA libraries), the model achieves faster training and inference times compared to CPU-only solutions. <br>
+## Software Integration:
+**Runtime Engine(s):** <br>
+* [NeMo-RL] <br>
+**Supported Hardware Microarchitecture Compatibility:** <br>
+* NVIDIA Hopper <br>
+**Supported Operating System(s):** Linux <br>
+## Quick Start
+The model shares the same architecture as Qwen3-235B-A22B-Thinking-2507. It can be served with vLLM.
+```
+python3 -m vllm.entrypoints.openai.api_server \
+  --model "nvidia/Qwen3-Nemotron-235B-A22B-GenRM" \
+  --trust-remote-code \
+  --seed=1 \
+  --host="0.0.0.0" \
+  --port=5000 \
+  --served-model-name "nvidia/Qwen3-Nemotron-235B-A22B-GenRM" \
+  --tensor-parallel-size=8 \
+  --max-model-len=40000 \
+  --gpu-memory-utilization=0.95
+```
+Now you can query the model, here is an example:
+```python
+from openai import OpenAI
+client = OpenAI(base_url="http://127.0.0.1:5000/v1", api_key="dummy")
+msg = [
+  {"role": "user", "content": "What is 1+1?"},
+  {"role": "assistant", "content": "1+1=2"},
+  {"role": "user", "content": "What about 1+2?"},
+  {"role": "response_1", "content": "1+2=4"},
+  {"role": "response_2", "content": "1+2=3"}
+]
+completion = client.chat.completions.create(
+    model="nvidia/Qwen3-Nemotron-235B-A22B-GenRM",
+    messages=msg,
+    temperature=0.6,
+    top_p=0.95,
+    max_tokens=16384,
+    stream=False
+)
+output = completion.choices[0].message.content
+print(output.split("</think>")[-1].strip())
+```
+Note that the conversation history should be presented in "user" and "assistant" roles, where the last turn is user turn. The responses to be judged should be in "response_1" and "response_2" roles.
+### Interpretation of Scores
+For individual helpfulness score, it ranges from 1 to 5, where higher means better.
+For ranking score, it ranges from 1 to 6, where:
+* 1 = Response 1 is much better than Response 2
+* 2 = Response 1 is better than Response 2
+* 3 = Response 1 is slightly better than Response 2
+* 4 = Response 2 is slightly better than Response 1
+* 5 = Response 2 is better than Response 1
+* 6 = Response 2 is much better than Response 1
+## Model Version:
+v1.0
+# Training, Testing and Evaluation Datasets:
+## Training Datasets:
+**Dataset Name:** Subset of Nemotron dataset-3 containing samples from HelpSteer3, lmarena-ai/arena-human-preference-140k (commercial-friendly models only) and additional safety preference data.
+**Datasets Links:** To be released (Nemotron dataset-3)
+**Data Collection Method** <br>
+* [Hybrid: Human, Synthetic] <br>
+**Labeling Method** <br>
+* [Hybrid: Human,Synthetic] <br>
+## Evaluation Datasets
+**Dataset Name:** RM-Bench <br>
+**Dataset Link:** https://huggingface.co/datasets/THU-KEG/RM-Bench
+**Data Collection Method by dataset** <br>
+* [Hybrid: Human, Synthetic] <br>
+**Labeling Method by dataset** <br>
+* [Hybrid: Human, Synthetic] <br>
+**Properties:** <br>
+* 1,327 prompts, each with three pairs of responses as well as preferences between the pair of responses.
+**Dataset Name:** JudgeBench <br>
+**Dataset Link:** https://huggingface.co/datasets/ScalerLab/JudgeBench
+**Data Collection Method by dataset** <br>
+* [Hybrid: Human, Synthetic] <br>
+**Labeling Method by dataset** <br>
+* [Hybrid: Human, Synthetic] <br>
+**Properties:** <br>
+* 350 prompts, each with a pair of responses as well as preferences between the pair of responses.
+# Inference:
+**Engine:** PyTorch <br>
+**Test Hardware:** H100 <br>
+## Ethical Considerations:
+NVIDIA believes Trustworthy AI is a shared responsibility and we have established policies and practices to enable development for a wide array of AI applications.  When downloaded or used in accordance with our terms of service, developers should work with their supporting model team to ensure this model meets requirements for the relevant industry and use case and addresses unforeseen product misuse.
+For more detailed information on ethical considerations for this model, please see the Model Card++ [Explainability](explainability.md), [Bias](bias.md), [Safety and Security](safety.md), and [Privacy](privacy.md) Subcards.
+Please report security vulnerabilities or NVIDIA AI Concerns [here](https://www.nvidia.com/en-us/support/submit-security-vulnerability/).
+## Citation
+If you find this model useful, please cite the following work:
+```bibtex
+@misc{wang2025helpsteer3preferenceopenhumanannotatedpreference,
+      title={Help{S}teer3-{P}reference: Open Human-Annotated Preference Data across Diverse Tasks and Languages},
+      author={Zhilin Wang and Jiaqi Zeng and Olivier Delalleau and Hoo-Chang Shin and Felipe Soares and Alexander Bukharin and Ellie Evans and Yi Dong and Oleksii Kuchaiev},
+      year={2025},
+      eprint={2505.11475},
+      archivePrefix={arXiv},
+      primaryClass={cs.CL},
+      url={https://arxiv.org/abs/2505.11475},
+}
+```

added_tokens.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "</think>": 151668,
+  "</tool_call>": 151658,
+  "</tool_response>": 151666,
+  "<think>": 151667,
+  "<tool_call>": 151657,
+  "<tool_response>": 151665,
+  "<|box_end|>": 151649,
+  "<|box_start|>": 151648,
+  "<|endoftext|>": 151643,
+  "<|file_sep|>": 151664,
+  "<|fim_middle|>": 151660,
+  "<|fim_pad|>": 151662,
+  "<|fim_prefix|>": 151659,
+  "<|fim_suffix|>": 151661,
+  "<|im_end|>": 151645,
+  "<|im_start|>": 151644,
+  "<|image_pad|>": 151655,
+  "<|object_ref_end|>": 151647,
+  "<|object_ref_start|>": 151646,
+  "<|quad_end|>": 151651,
+  "<|quad_start|>": 151650,
+  "<|repo_name|>": 151663,
+  "<|video_pad|>": 151656,
+  "<|vision_end|>": 151653,
+  "<|vision_pad|>": 151654,
+  "<|vision_start|>": 151652
+}

bias.md ADDED Viewed

	@@ -0,0 +1,4 @@

+| Field | Response |
+| ----- | ------ |
+| Participation considerations from adversely impacted groups [protected classes](https://www.senate.ca.gov/content/protected-classes) in model design and testing | None |
+| Measures taken to mitigate against unwanted bias | None |

chat_template.jinja ADDED Viewed

	@@ -0,0 +1,148 @@

+{%- if tools %}
+    {{- '<|im_start|>system\n' }}
+    {%- if messages[0].role == 'system' %}
+        {{- messages[0].content + '\n\n' }}
+    {%- endif %}
+    {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
+    {%- for tool in tools %}
+        {{- "\n" }}
+        {{- tool | tojson }}
+    {%- endfor %}
+    {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
+{%- else %}
+    {%- if messages[0].role == 'system' %}
+        {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
+    {%- endif %}
+    {{- '<|im_start|>user\n' }}
+You are an expert evaluation judge specializing in comparative assessment of LLM responses. You are impartial, rigorous, and consistent. Given the conversation context and two assistant responses to the user's latest query, you will follow the evaluation plan and scoring guidelines exactly as written below.
+{{- '\n\n#### Conversation Context ####\n' }}
+{%- for message in messages %}
+    {%- if message['role'] == 'user' %}
+        {{- 'User: ' + message['content']|trim + '\n' }}
+    {%- elif message['role'] == 'assistant' %}
+        {%- if '</think>' in message['content'] %}
+            {%- set content = message['content'].split('</think>')[-1].lstrip() %}
+        {%- else %}
+            {%- set content = message['content'] %}
+        {%- endif %}
+        {{- 'Assistant: ' + content|trim + '\n'}}
+    {%- endif %}
+{%- endfor %}
+{{- '\n#### Responses to be Scored ####\n' }}
+{%- for message in messages %}
+    {%- if message['role'] == 'response_1' %}
+        {{- '[The Begin of Response 1]\n' + message['content']|trim + '\n[The End of Response 1]\n' }}
+    {%- elif message['role'] == 'response_2' %}
+        {{- '\n[The Begin of Response 2]\n' + message['content']|trim + '\n[The End of Response 2]\n' }}
+    {%- endif %}
+{%- endfor %}
+#### Evaluation Plan ####
+Please act as an impartial judge and evaluate the quality of the responses provided by two AI assistants to the user prompt. Begin your evaluation by generating your own answer to the prompt. You must provide your answer before judging any answers. When evaluating the assistants' answers, compare both assistants' answers with your answer. You must identify and correct any mistakes or inaccurate information. Then consider if the assistant's answers are helpful, relevant, and concise. Helpful means the answer correctly responds to the prompt or follows the instructions. Note when user prompt has any ambiguity or more than one interpretation, it is more helpful and appropriate to ask for clarifications or more information from the user than providing an answer based on assumptions. Relevant means all parts of the response closely connect or are appropriate to what is being asked. Concise means the response is clear and not verbose or excessive. Then consider the creativity and novelty of the assistant's answers when needed. Finally, identify any missing important information in the assistants' answers that would be beneficial to include when responding to the user prompt.
+#### Scoring Guidelines ####
+Based on the evaluation plan above, assign scores using these scales:
+**Individual Helpfulness Scores (1-5):**
+- 5: Extremely Helpful - Completely aligned with what the user was asking for
+- 4: Mostly Helpful - Generally useful with minor room for improvement
+- 3: Partially Helpful - Misses the overall goal in some way
+- 2: Borderline Unhelpful - Mostly doesn't capture what the user wanted
+- 1: Not Helpful - Completely missed the essence of the request
+**Comparative Ranking (1-6):**
+- 1: Response 1 is much better than Response 2
+- 2: Response 1 is better than Response 2
+- 3: Response 1 is slightly better than Response 2
+- 4: Response 2 is slightly better than Response 1
+- 5: Response 2 is better than Response 1
+- 6: Response 2 is much better than Response 1
+#### Output Format ####
+Analyze step by step following the evaluation plan, then provide your judgment as JSON:
+```json
+{
+    "response_1_analysis": "Your detailed analysis of Response 1 based on the evaluation plan",
+    "response_2_analysis": "Your detailed analysis of Response 2 based on the evaluation plan",
+    "score_1": <1-5>,
+    "score_2": <1-5>,
+    "ranking": <1-6>
+}
+```
+    {{- '<|im_end|>\n' }}
+{%- endif %}
+{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
+{%- for message in messages[::-1] %}
+    {%- set index = (messages|length - 1) - loop.index0 %}
+    {%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
+        {%- set ns.multi_step_tool = false %}
+        {%- set ns.last_query_index = index %}
+    {%- endif %}
+{%- endfor %}
+{%- for message in messages %}
+    {%- if message.content is string %}
+        {%- set content = message.content %}
+    {%- else %}
+        {%- set content = '' %}
+    {%- endif %}
+    {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
+        {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
+    {%- elif message.role == "assistant" %}
+        {%- set reasoning_content = '' %}
+        {%- if message.reasoning_content is string %}
+            {%- set reasoning_content = message.reasoning_content %}
+        {%- else %}
+            {%- if '</think>' in content %}
+                {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
+                {%- set content = content.split('</think>')[-1].lstrip('\n') %}
+            {%- endif %}
+        {%- endif %}
+        {%- if loop.index0 > ns.last_query_index %}
+            {%- if loop.last or (not loop.last and reasoning_content) %}
+                {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
+            {%- else %}
+                {{- '<|im_start|>' + message.role + '\n' + content }}
+            {%- endif %}
+        {%- else %}
+            {{- '<|im_start|>' + message.role + '\n' + content }}
+        {%- endif %}
+        {%- if message.tool_calls %}
+            {%- for tool_call in message.tool_calls %}
+                {%- if (loop.first and content) or (not loop.first) %}
+                    {{- '\n' }}
+                {%- endif %}
+                {%- if tool_call.function %}
+                    {%- set tool_call = tool_call.function %}
+                {%- endif %}
+                {{- '<tool_call>\n{"name": "' }}
+                {{- tool_call.name }}
+                {{- '", "arguments": ' }}
+                {%- if tool_call.arguments is string %}
+                    {{- tool_call.arguments }}
+                {%- else %}
+                    {{- tool_call.arguments | tojson }}
+                {%- endif %}
+                {{- '}\n</tool_call>' }}
+            {%- endfor %}
+        {%- endif %}
+        {{- '<|im_end|>\n' }}
+    {%- elif message.role == "tool" %}
+        {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
+            {{- '<|im_start|>user' }}
+        {%- endif %}
+        {{- '\n<tool_response>\n' }}
+        {{- content }}
+        {{- '\n</tool_response>' }}
+        {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
+            {{- '<|im_end|>\n' }}
+        {%- endif %}
+    {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+    {{- '<|im_start|>assistant\n' }}
+    {%- if enable_thinking is defined and enable_thinking is false %}
+        {{- '<think>\n\n</think>\n\n' }}
+    {%- endif %}
+{%- endif %}

config.json ADDED Viewed

	@@ -0,0 +1,38 @@

+{
+  "architectures": [
+    "Qwen3MoeForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 151643,
+  "decoder_sparse_step": 1,
+  "eos_token_id": 151645,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 12288,
+  "max_position_embeddings": 40960,
+  "max_window_layers": 94,
+  "mlp_only_layers": [],
+  "model_type": "qwen3_moe",
+  "moe_intermediate_size": 1536,
+  "norm_topk_prob": true,
+  "num_attention_heads": 64,
+  "num_experts": 128,
+  "num_experts_per_tok": 8,
+  "num_hidden_layers": 94,
+  "num_key_value_heads": 4,
+  "output_router_logits": false,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 5000000,
+  "router_aux_loss_coef": 0.001,
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.53.3",
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151936
+}

explainability.md ADDED Viewed

	@@ -0,0 +1,14 @@

+| Field | Response |
+| ----- | ----- |
+| Intended Application & Domain: | Generative reward model, suitable for LLM-as-a-Judge and Reinforcement Learning from Human Feedback. |
+| Model Type: | Qwen3-235B-A22B MoE |
+| Intended User: | Developers designing AI Agent systems, chatbots, RAG systems, and other AI-powered applications. |
+| Output: | Text (String, One-Dimensional sequences). |
+| Describe how the model works: | Given a conversation history, a user request, and two candidate responses, the model generates a reasoning trace then an individual helpfulness score for each response and a ranking score. |
+| Name the adversely impacted groups this has been tested to deliver comparable outcomes regardless of: | N/A |
+| Technical Limitations: | The model's max sequence length is 128K tokens. Longer text inputs should be truncated. The model may show decreased accuracy for harder prompts when configured to skip intermediate reasoning traces. |
+| Verified to have met prescribed NVIDIA quality standards: | Yes |
+| Performance Metrics: | Accuracy, Throughput, and Latency. |
+| Potential Known Risks: | The model was trained on data that contains toxic language and societal biases originally crawled from the internet. Therefore, the model may amplify those biases and return toxic responses especially when prompted with toxic prompts. The model may generate answers that may be inaccurate, omit key information, or include irrelevant or redundant text producing socially unacceptable or undesirable text, even if the prompt itself does not include anything explicitly offensive. |
+| Licensing & Terms of Use: | [Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0.txt) |

generation_config.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+  "bos_token_id": 151643,
+  "do_sample": true,
+  "eos_token_id": [
+    151645,
+    151643
+  ],
+  "pad_token_id": 151643,
+  "temperature": 0.6,
+  "top_k": 20,
+  "top_p": 0.95,
+  "transformers_version": "4.53.3"
+}

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

model-00001-of-00095.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bebfa6a00983675b8d6db561c27c59bb6327d3efe8e81464143902539d39821a
+size 4999647472

model-00002-of-00095.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ffb108d472b92e3ee1b7ab255946b528546223230a8eaa85be1872a33ad1785f
+size 4988141728

model-00003-of-00095.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:688b397ade6a5a13a7886174716aacf2593aaf2afbfd32cdb37a2c567d7effe7
+size 4988141728

model-00004-of-00095.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:657781de5e6bc6b04e560ade4a6808e7da7d5ae08552a15b4f57b886e1a3bb20
+size 4988141728

model-00005-of-00095.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ee4e5bdc477e2f02d31365c1eb3020a048d0a1e4900fdd6b3ce981319d9cd60b
+size 4988141728

model-00006-of-00095.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b2b046192097b429a298a5f9212ad5063644a7f614d4779346960da30c8a80ed
+size 4988141728

model-00007-of-00095.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bf2d6d31adcbdb760eae2e23969c70174701baa671846504ac5b014ad3c1602a
+size 4988141728

model-00008-of-00095.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fb3c576af6d12f136ea2230f99a31b62f4ed3437bb336292cc0220179eb37117
+size 4988141728

model-00009-of-00095.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3e62a09d930c6169a07957d5e0889ca8b07ae91734095d2d76333ad51fb82e01
+size 4988141728

model-00010-of-00095.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d6217ef51013249a343addb3bba0222cf4fe1e9c9303a4b853c57ab5616420a8
+size 4988141728

model-00011-of-00095.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6d2857fc46a3c11df57cf9fb6353a8e08c33ea03d40caef004830d0d15cb9a58
+size 4988142048

model-00012-of-00095.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0ddbdd696b59c628100c509a1568bf74413d6b00d03381700c956cfdfc4cb96a
+size 4988142120

model-00013-of-00095.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9ac11f186889a5faa0aea922297aedeca92f3e3a9e177968fe36399308170746
+size 4988142120

model-00014-of-00095.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fc976d447e4c4ea009fd3c63ceceb17ff0addf13e509fdd24dded9bf23ecf9f4
+size 4988142120

model-00015-of-00095.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6c84a4b82c1fb44eeaad9888de86046fd1d0b1e502bea2f554c48525515ea5d6
+size 4988142120

model-00016-of-00095.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9845f8983eef03e6ac39b9282ca14a1fe07b026d3950ead90edfaeece18b38c5
+size 4988142120

model-00017-of-00095.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:50738dca121d760bca0d95c6f8f2967dffd2e3e5c8c355740aa9b8e8c9655488
+size 4988142120

model-00018-of-00095.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a95a791a37a91857f87427453f06a73b5b098f2393c48148a7bcf1cc32340607
+size 4988142120

model-00019-of-00095.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d598db7ebcc6694a8b10b06c7844aaae2f2d5d5e5dbdacb6dd3147eb0a17726e
+size 4988142120

model-00020-of-00095.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f069d2eb0a87ae873a695c2843323d6ffba2ba90fd844945bf94ec0ff0eaab54
+size 4988142120

model-00021-of-00095.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6b401cca518051ae40c51643691b47cd5cdb5738ddd355473ca9e5ad2c649daf
+size 4988142120

model-00022-of-00095.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:176246f089dcd7e34282939c293be0bb51af9665d919d3229cbfabb54b6cad3e
+size 4988142120

model-00023-of-00095.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0958c5ef85e34e8251517eac3b11158a5a5dfe0b8043e589e1a4808e7cbacfde
+size 4988142120

model-00024-of-00095.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ec195768088884816d03a8ec413cc40d395aff17981778d6772c57bbc26a224f
+size 4988142120

model-00025-of-00095.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0eab6d85dec15579c4b1bc2a5fb9b506bc927dc7a5e85440efa4bd3c125ecf82
+size 4988142120

model-00026-of-00095.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:162feb930d70e96a9078de30c31bcbf30d870d51d3a3326b68ebb40aaf07c6dd
+size 4988142120

model-00027-of-00095.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0f46243ad0ad75a96f911d4b90ee3f3d9b1e6d29b1a90b2f110fb97a07b3acac
+size 4988142120

model-00028-of-00095.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f92d8c754f8aa19bd65bdce114b1bbd0b1dc1895a89e8b5366ea9ae399882dac
+size 4988142120

model-00029-of-00095.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b7d0bccc42fc9f7ecc48508a2a100206f5d768118bf39c3a9db3eefc34a9ec42
+size 4988142120

model-00030-of-00095.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0875b3a8e72199983cdd14b187a5d79db2a77aefc6828d87a95844bfbc15328f
+size 4988142120

model-00031-of-00095.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:039c9297037365a27e38bd271a6b4c6b7af597b41fc6bcc86271612783338f4c
+size 4988142120

model-00032-of-00095.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b0c535ae77de4fb83152ae936dc41d8e41036639a220ec0e7b8cf4c323eaf194
+size 4988142120

model-00033-of-00095.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:41f3509532736aa224b7ee46bf9145476e44f4a07aac1b2fb660d64e4b13ff5c
+size 4988142120

model-00034-of-00095.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:71252f3f4d870504d0ce3899d8014035a085a31f154676c101d8e8a7aea53b95
+size 4988142120

model-00035-of-00095.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b47f30ac3bd315fa2650b70c767a81b9f26e8ce63c6562dc60caa12f20b035f2
+size 4988142120

model-00036-of-00095.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:24d064856ac9edab5b7d5a018a35f9312b68e8c5d9b78205e9986af16b225291
+size 4988142120

model-00037-of-00095.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:604cb4134e8c3475d5a74f969651d86f323ecec6f1624881b2679748e85d9b57
+size 4988142120

model-00038-of-00095.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:501a04a90fe296a7b802f50fffadfabd65171e4d5ef8e520bb9f35e500e7969a
+size 4988142120

model-00039-of-00095.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8def596e403ef4c4d14e67cf362643ec5fedda9b9c14fab454dd01743260c1ac
+size 4988142120