New Push

Browse files

Files changed (4) hide show

README.md +30 -0
config.json +165 -0
model.safetensors +3 -0
preprocessor_config.json +14 -0

README.md CHANGED Viewed

@@ -1,3 +1,33 @@
 ---
 license: apache-2.0
 ---

 ---
 license: apache-2.0
 ---
+This is the Offical weights of ConFiDeNet
+```python
+from PIL import Image
+import torch
+from transformers import ConFiDeNetForDepthEstimation, ConFiDeNetImageProcessor
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+image = Image.open("<Image Path>").convert("RGB")
+print(image.size)
+# image.save("image.jpg")
+image_processor = ConFiDeNetImageProcessor.from_pretrained("<Weight-Path>")
+model = ConFiDeNetForDepthEstimation.from_pretrained("<Weigh-Path>").to(device)
+inputs = image_processor(images=image, return_tensors="pt").to(device)
+with torch.no_grad():
+    outputs = model(**inputs)
+post_processed_output = image_processor.post_process_depth_estimation(
+    outputs, target_sizes=[(image.height, image.width)],
+)
+depth = post_processed_output[0]["predicted_depth_uint16"].detach().cpu().numpy()
+depth = Image.fromarray(depth, mode="I;16")
+depth.save("depth.png")
+```

config.json ADDED Viewed

	@@ -0,0 +1,165 @@

+{
+  "architectures": [
+    "ConFiDeNetForDepthEstimation"
+  ],
+  "fov_model_config": {
+    "hidden_size": 1024,
+    "image_size": 384,
+    "model_type": "dinov2",
+    "num_attention_heads": 16,
+    "num_hidden_layers": 24,
+    "out_features": [
+      "stage24"
+    ],
+    "out_indices": [
+      24
+    ],
+    "patch_size": 16,
+    "stage_names": [
+      "stem",
+      "stage1",
+      "stage2",
+      "stage3",
+      "stage4",
+      "stage5",
+      "stage6",
+      "stage7",
+      "stage8",
+      "stage9",
+      "stage10",
+      "stage11",
+      "stage12",
+      "stage13",
+      "stage14",
+      "stage15",
+      "stage16",
+      "stage17",
+      "stage18",
+      "stage19",
+      "stage20",
+      "stage21",
+      "stage22",
+      "stage23",
+      "stage24"
+    ],
+    "use_mask_token": false
+  },
+  "fusion_hidden_size": 256,
+  "image_model_config": {
+    "hidden_size": 1024,
+    "image_size": 384,
+    "model_type": "dinov2",
+    "num_attention_heads": 16,
+    "num_hidden_layers": 24,
+    "out_features": [
+      "stage24"
+    ],
+    "out_indices": [
+      24
+    ],
+    "patch_size": 16,
+    "stage_names": [
+      "stem",
+      "stage1",
+      "stage2",
+      "stage3",
+      "stage4",
+      "stage5",
+      "stage6",
+      "stage7",
+      "stage8",
+      "stage9",
+      "stage10",
+      "stage11",
+      "stage12",
+      "stage13",
+      "stage14",
+      "stage15",
+      "stage16",
+      "stage17",
+      "stage18",
+      "stage19",
+      "stage20",
+      "stage21",
+      "stage22",
+      "stage23",
+      "stage24"
+    ],
+    "use_mask_token": false
+  },
+  "initializer_range": 0.02,
+  "intermediate_feature_dims": [
+    256,
+    256
+  ],
+  "intermediate_hook_ids": [
+    11,
+    5
+  ],
+  "merge_padding_value": 3,
+  "model_type": "depth_pro",
+  "num_fov_head_layers": 2,
+  "patch_model_config": {
+    "hidden_size": 1024,
+    "image_size": 384,
+    "model_type": "dinov2",
+    "num_attention_heads": 16,
+    "num_hidden_layers": 24,
+    "out_features": [
+      "stage24"
+    ],
+    "out_indices": [
+      24
+    ],
+    "patch_size": 16,
+    "stage_names": [
+      "stem",
+      "stage1",
+      "stage2",
+      "stage3",
+      "stage4",
+      "stage5",
+      "stage6",
+      "stage7",
+      "stage8",
+      "stage9",
+      "stage10",
+      "stage11",
+      "stage12",
+      "stage13",
+      "stage14",
+      "stage15",
+      "stage16",
+      "stage17",
+      "stage18",
+      "stage19",
+      "stage20",
+      "stage21",
+      "stage22",
+      "stage23",
+      "stage24"
+    ],
+    "use_mask_token": false
+  },
+  "patch_size": 384,
+  "scaled_images_feature_dims": [
+    1024,
+    1024,
+    512
+  ],
+  "scaled_images_overlap_ratios": [
+    0.0,
+    0.5,
+    0.25
+  ],
+  "scaled_images_ratios": [
+    0.25,
+    0.5,
+    1
+  ],
+  "torch_dtype": "float16",
+  "transformers_version": "4.49.0.dev0",
+  "use_batch_norm_in_fusion_residual": false,
+  "use_bias_in_fusion_residual": true,
+  "use_fov_model": true
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9c6811e3165485b9a94a204329860cb333a79877e757eb795a179a4ea34bbcf7
+size 1904185492

preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "do_normalize": true,
+  "do_rescale": true,
+  "do_resize": true,
+  "image_mean": 0.5,
+  "image_processor_type": "ConFiDeNetImageProcessorFast",
+  "image_std": 0.5,
+  "resample": 2,
+  "rescale_factor": 0.00392156862745098,
+  "size": {
+    "height": 1536,
+    "width": 1536
+  }
+}