onkarsus13 commited on
Commit
6b488bb
·
1 Parent(s): 77d68aa
Files changed (4) hide show
  1. README.md +30 -0
  2. config.json +165 -0
  3. model.safetensors +3 -0
  4. preprocessor_config.json +14 -0
README.md CHANGED
@@ -1,3 +1,33 @@
1
  ---
2
  license: apache-2.0
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  license: apache-2.0
3
  ---
4
+
5
+ This is the Offical weights of ConFiDeNet
6
+
7
+ ```python
8
+ from PIL import Image
9
+ import torch
10
+ from transformers import ConFiDeNetForDepthEstimation, ConFiDeNetImageProcessor
11
+
12
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
13
+
14
+ image = Image.open("<Image Path>").convert("RGB")
15
+ print(image.size)
16
+ # image.save("image.jpg")
17
+
18
+ image_processor = ConFiDeNetImageProcessor.from_pretrained("<Weight-Path>")
19
+ model = ConFiDeNetForDepthEstimation.from_pretrained("<Weigh-Path>").to(device)
20
+
21
+ inputs = image_processor(images=image, return_tensors="pt").to(device)
22
+
23
+ with torch.no_grad():
24
+ outputs = model(**inputs)
25
+
26
+ post_processed_output = image_processor.post_process_depth_estimation(
27
+ outputs, target_sizes=[(image.height, image.width)],
28
+ )
29
+
30
+ depth = post_processed_output[0]["predicted_depth_uint16"].detach().cpu().numpy()
31
+ depth = Image.fromarray(depth, mode="I;16")
32
+ depth.save("depth.png")
33
+ ```
config.json ADDED
@@ -0,0 +1,165 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "ConFiDeNetForDepthEstimation"
4
+ ],
5
+ "fov_model_config": {
6
+ "hidden_size": 1024,
7
+ "image_size": 384,
8
+ "model_type": "dinov2",
9
+ "num_attention_heads": 16,
10
+ "num_hidden_layers": 24,
11
+ "out_features": [
12
+ "stage24"
13
+ ],
14
+ "out_indices": [
15
+ 24
16
+ ],
17
+ "patch_size": 16,
18
+ "stage_names": [
19
+ "stem",
20
+ "stage1",
21
+ "stage2",
22
+ "stage3",
23
+ "stage4",
24
+ "stage5",
25
+ "stage6",
26
+ "stage7",
27
+ "stage8",
28
+ "stage9",
29
+ "stage10",
30
+ "stage11",
31
+ "stage12",
32
+ "stage13",
33
+ "stage14",
34
+ "stage15",
35
+ "stage16",
36
+ "stage17",
37
+ "stage18",
38
+ "stage19",
39
+ "stage20",
40
+ "stage21",
41
+ "stage22",
42
+ "stage23",
43
+ "stage24"
44
+ ],
45
+ "use_mask_token": false
46
+ },
47
+ "fusion_hidden_size": 256,
48
+ "image_model_config": {
49
+ "hidden_size": 1024,
50
+ "image_size": 384,
51
+ "model_type": "dinov2",
52
+ "num_attention_heads": 16,
53
+ "num_hidden_layers": 24,
54
+ "out_features": [
55
+ "stage24"
56
+ ],
57
+ "out_indices": [
58
+ 24
59
+ ],
60
+ "patch_size": 16,
61
+ "stage_names": [
62
+ "stem",
63
+ "stage1",
64
+ "stage2",
65
+ "stage3",
66
+ "stage4",
67
+ "stage5",
68
+ "stage6",
69
+ "stage7",
70
+ "stage8",
71
+ "stage9",
72
+ "stage10",
73
+ "stage11",
74
+ "stage12",
75
+ "stage13",
76
+ "stage14",
77
+ "stage15",
78
+ "stage16",
79
+ "stage17",
80
+ "stage18",
81
+ "stage19",
82
+ "stage20",
83
+ "stage21",
84
+ "stage22",
85
+ "stage23",
86
+ "stage24"
87
+ ],
88
+ "use_mask_token": false
89
+ },
90
+ "initializer_range": 0.02,
91
+ "intermediate_feature_dims": [
92
+ 256,
93
+ 256
94
+ ],
95
+ "intermediate_hook_ids": [
96
+ 11,
97
+ 5
98
+ ],
99
+ "merge_padding_value": 3,
100
+ "model_type": "depth_pro",
101
+ "num_fov_head_layers": 2,
102
+ "patch_model_config": {
103
+ "hidden_size": 1024,
104
+ "image_size": 384,
105
+ "model_type": "dinov2",
106
+ "num_attention_heads": 16,
107
+ "num_hidden_layers": 24,
108
+ "out_features": [
109
+ "stage24"
110
+ ],
111
+ "out_indices": [
112
+ 24
113
+ ],
114
+ "patch_size": 16,
115
+ "stage_names": [
116
+ "stem",
117
+ "stage1",
118
+ "stage2",
119
+ "stage3",
120
+ "stage4",
121
+ "stage5",
122
+ "stage6",
123
+ "stage7",
124
+ "stage8",
125
+ "stage9",
126
+ "stage10",
127
+ "stage11",
128
+ "stage12",
129
+ "stage13",
130
+ "stage14",
131
+ "stage15",
132
+ "stage16",
133
+ "stage17",
134
+ "stage18",
135
+ "stage19",
136
+ "stage20",
137
+ "stage21",
138
+ "stage22",
139
+ "stage23",
140
+ "stage24"
141
+ ],
142
+ "use_mask_token": false
143
+ },
144
+ "patch_size": 384,
145
+ "scaled_images_feature_dims": [
146
+ 1024,
147
+ 1024,
148
+ 512
149
+ ],
150
+ "scaled_images_overlap_ratios": [
151
+ 0.0,
152
+ 0.5,
153
+ 0.25
154
+ ],
155
+ "scaled_images_ratios": [
156
+ 0.25,
157
+ 0.5,
158
+ 1
159
+ ],
160
+ "torch_dtype": "float16",
161
+ "transformers_version": "4.49.0.dev0",
162
+ "use_batch_norm_in_fusion_residual": false,
163
+ "use_bias_in_fusion_residual": true,
164
+ "use_fov_model": true
165
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c6811e3165485b9a94a204329860cb333a79877e757eb795a179a4ea34bbcf7
3
+ size 1904185492
preprocessor_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "do_rescale": true,
4
+ "do_resize": true,
5
+ "image_mean": 0.5,
6
+ "image_processor_type": "ConFiDeNetImageProcessorFast",
7
+ "image_std": 0.5,
8
+ "resample": 2,
9
+ "rescale_factor": 0.00392156862745098,
10
+ "size": {
11
+ "height": 1536,
12
+ "width": 1536
13
+ }
14
+ }