Hristo-Karagyozov commited on
Commit
6ae42f6
·
verified ·
1 Parent(s): 9434e7d

Upload folder using huggingface_hub

Browse files
__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .modeling_custom import CustomDistilBertClassifier
config.json CHANGED
@@ -1,37 +1,37 @@
1
- {
2
- "_name_or_path": "../distilbert-prompt-classifier/checkpoint-24",
3
- "activation": "gelu",
4
- "architectures": [
5
- "FineTunedDistilBertWithStringLabels"
6
- ],
7
- "attention_dropout": 0.1,
8
- "dim": 768,
9
- "dropout": 0.1,
10
- "hidden_dim": 3072,
11
- "id2label": {
12
- "0": "LABEL_0",
13
- "1": "LABEL_1",
14
- "2": "LABEL_2",
15
- "3": "LABEL_3"
16
- },
17
- "initializer_range": 0.02,
18
- "label2id": {
19
- "LABEL_0": 0,
20
- "LABEL_1": 1,
21
- "LABEL_2": 2,
22
- "LABEL_3": 3
23
- },
24
- "max_position_embeddings": 512,
25
- "model_type": "distilbert",
26
- "n_heads": 12,
27
- "n_layers": 6,
28
- "pad_token_id": 0,
29
- "problem_type": "single_label_classification",
30
- "qa_dropout": 0.1,
31
- "seq_classif_dropout": 0.2,
32
- "sinusoidal_pos_embds": false,
33
- "tie_weights_": true,
34
- "torch_dtype": "float32",
35
- "transformers_version": "4.48.1",
36
- "vocab_size": 30522
37
- }
 
1
+ {
2
+ "_name_or_path": "distilbert-base-uncased",
3
+ "activation": "gelu",
4
+ "architectures": [
5
+ "CustomDistilBertClassifier"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
11
+ "id2label": {
12
+ "0": "LABEL_0",
13
+ "1": "LABEL_1",
14
+ "2": "LABEL_2",
15
+ "3": "LABEL_3"
16
+ },
17
+ "initializer_range": 0.02,
18
+ "label2id": {
19
+ "LABEL_0": 0,
20
+ "LABEL_1": 1,
21
+ "LABEL_2": 2,
22
+ "LABEL_3": 3
23
+ },
24
+ "max_position_embeddings": 512,
25
+ "model_type": "distilbert",
26
+ "n_heads": 12,
27
+ "n_layers": 6,
28
+ "pad_token_id": 0,
29
+ "problem_type": "single_label_classification",
30
+ "qa_dropout": 0.1,
31
+ "seq_classif_dropout": 0.2,
32
+ "sinusoidal_pos_embds": false,
33
+ "tie_weights_": true,
34
+ "torch_dtype": "float32",
35
+ "transformers_version": "4.46.3",
36
+ "vocab_size": 30522
37
+ }
modeling_custom.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import DistilBertForSequenceClassification
3
+
4
+ label_dict = {0: "Clarification", 1: "Factual", 2: "Operational", 3: "Summarization"}
5
+
6
+
7
+ class CustomDistilBertClassifier(DistilBertForSequenceClassification):
8
+ def __init__(self, config):
9
+ super().__init__(config)
10
+ self.label_map = label_dict # Use your predefined label mapping
11
+
12
+ def forward(self, input_ids, attention_mask):
13
+ outputs = super().forward(input_ids=input_ids, attention_mask=attention_mask)
14
+ logits = outputs.logits
15
+ predicted_class = torch.argmax(logits, dim=-1).item()
16
+ return self.label_map[predicted_class] # Return the string label directly
17
+
18
+ @classmethod
19
+ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
20
+ model = super().from_pretrained(pretrained_model_name_or_path, *model_args, **kwargs)
21
+ return model
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e323d86ca66e592b2f16e8e3e224a9c2c7e5d8fe354b0ca929c9646b9c6d42c6
3
+ size 535736698
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ffb5e793dd475a09e9f2ff870c6fa547ca8d17b32e983b9e3a10a9adc4d7ed1
3
+ size 13990
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c964619ec20c2b1ddf3699d812c3c717630e29566857ae4c8e10cd3cf5f7d7d7
3
+ size 1064
special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "mask_token": "[MASK]",
48
+ "model_max_length": 512,
49
+ "pad_token": "[PAD]",
50
+ "sep_token": "[SEP]",
51
+ "strip_accents": null,
52
+ "tokenize_chinese_chars": true,
53
+ "tokenizer_class": "DistilBertTokenizer",
54
+ "unk_token": "[UNK]"
55
+ }
trainer_state.json ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 3.0,
5
+ "eval_steps": 500,
6
+ "global_step": 24,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.875,
13
+ "grad_norm": 1.7773577098978421e-07,
14
+ "learning_rate": 1.3285352466760777e-05,
15
+ "loss": 0.0,
16
+ "step": 7
17
+ },
18
+ {
19
+ "epoch": 1.0,
20
+ "eval_accuracy": 0.970873786407767,
21
+ "eval_f1": 0.9708202209574825,
22
+ "eval_loss": 0.3374857008457184,
23
+ "eval_runtime": 1.5469,
24
+ "eval_samples_per_second": 66.584,
25
+ "eval_steps_per_second": 1.293,
26
+ "step": 8
27
+ },
28
+ {
29
+ "epoch": 1.75,
30
+ "grad_norm": 1.6692462168066413e-07,
31
+ "learning_rate": 7.814913215741634e-06,
32
+ "loss": 0.0,
33
+ "step": 14
34
+ },
35
+ {
36
+ "epoch": 2.0,
37
+ "eval_accuracy": 0.970873786407767,
38
+ "eval_f1": 0.9708202209574825,
39
+ "eval_loss": 0.33735620975494385,
40
+ "eval_runtime": 1.5393,
41
+ "eval_samples_per_second": 66.912,
42
+ "eval_steps_per_second": 1.299,
43
+ "step": 16
44
+ },
45
+ {
46
+ "epoch": 2.625,
47
+ "grad_norm": 1.698020923868171e-07,
48
+ "learning_rate": 2.34447396472249e-06,
49
+ "loss": 0.0,
50
+ "step": 21
51
+ }
52
+ ],
53
+ "logging_steps": 7,
54
+ "max_steps": 24,
55
+ "num_input_tokens_seen": 0,
56
+ "num_train_epochs": 3,
57
+ "save_steps": 500,
58
+ "stateful_callbacks": {
59
+ "TrainerControl": {
60
+ "args": {
61
+ "should_epoch_stop": false,
62
+ "should_evaluate": false,
63
+ "should_log": false,
64
+ "should_save": true,
65
+ "should_training_stop": true
66
+ },
67
+ "attributes": {}
68
+ }
69
+ },
70
+ "total_flos": 47491256236032.0,
71
+ "train_batch_size": 64,
72
+ "trial_name": null,
73
+ "trial_params": null
74
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ca6260af351c39ea1e60a29d92923e9b727973a9f67e9d13d7397bca2424ff6
3
+ size 5304
vocab.txt ADDED
The diff for this file is too large to render. See raw diff