Update README and code to enhance clarity

Browse files

Files changed (3) hide show

README.md +9 -90
sampling_ddim.py +75 -0
sampling_ddpm.py +62 -0

README.md CHANGED Viewed

@@ -6,9 +6,9 @@ pipeline_tag: unconditional-image-generation
 tags:
 - art
 ---
-# Abstract
-**DDPM** model trained on [huggan/anime-faces](https://huggingface.co/datasets/huggan/anime-faces) dataset.
 ## Training Arguments
@@ -25,95 +25,14 @@ tags:
 For training code, please refer to [this link](https://github.com/LittleNyima/code-snippets/blob/master/ddpm-tutorial/ddpm_training.py).
-# Inference
-This project aims to implement DDPM from scratch, so `DDPMScheduler` is not used. Instead, I use only `UNet2DModel` and implement a simple scheduler myself. The inference code is:
-```python
-import torch
-from tqdm import tqdm
-from diffusers import UNet2DModel
-class DDPM:
-    def __init__(
-        self,
-        num_train_timesteps:int = 1000,
-        beta_start: float = 0.0001,
-        beta_end: float = 0.02,
-    ):
-        self.num_train_timesteps = num_train_timesteps
-        self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32)
-        self.alphas = 1.0 - self.betas
-        self.alphas_cumprod = torch.cumprod(self.alphas, dim=0)
-        self.timesteps = torch.arange(num_train_timesteps - 1, -1, -1)
-    def add_noise(
-        self,
-        original_samples: torch.Tensor,
-        noise: torch.Tensor,
-        timesteps: torch.Tensor,
-    ):
-        alphas_cumprod = self.alphas_cumprod.to(device=original_samples.device ,dtype=original_samples.dtype)
-        noise = noise.to(original_samples.device)
-        timesteps = timesteps.to(original_samples.device)
-        # \sqrt{\bar\alpha_t}
-        sqrt_alpha_prod = alphas_cumprod[timesteps].flatten() ** 0.5
-        while len(sqrt_alpha_prod.shape) < len(original_samples.shape):
-            sqrt_alpha_prod = sqrt_alpha_prod.unsqueeze(-1)
-        # \sqrt{1 - \bar\alpha_t}
-        sqrt_one_minus_alpha_prod = (1.0 - alphas_cumprod[timesteps]).flatten() ** 0.5
-        while len(sqrt_one_minus_alpha_prod.shape) < len(original_samples.shape):
-            sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.unsqueeze(-1)
-        return sqrt_alpha_prod * original_samples + sqrt_one_minus_alpha_prod * noise
-    @torch.no_grad()
-    def sample(
-        self,
-        unet: UNet2DModel,
-        batch_size: int,
-        in_channels: int,
-        sample_size: int,
-    ):
-        betas = self.betas.to(unet.device)
-        alphas = self.alphas.to(unet.device)
-        alphas_cumprod = self.alphas_cumprod.to(unet.device)
-        timesteps = self.timesteps.to(unet.device)
-        images = torch.randn((batch_size, in_channels, sample_size, sample_size), device=unet.device)
-        for timestep in tqdm(timesteps, desc='Sampling'):
-            pred_noise: torch.Tensor = unet(images, timestep).sample
-            # mean of q(x_{t-1}|x_t)
-            alpha_t = alphas[timestep]
-            alpha_cumprod_t = alphas_cumprod[timestep]
-            sqrt_alpha_t = alpha_t ** 0.5
-            one_minus_alpha_t = 1.0 - alpha_t
-            sqrt_one_minus_alpha_cumprod_t = (1 - alpha_cumprod_t) ** 0.5
-            mean = (images - one_minus_alpha_t / sqrt_one_minus_alpha_cumprod_t * pred_noise) / sqrt_alpha_t
-            # variance of q(x_{t-1}|x_t)
-            if timestep > 1:
-                beta_t = betas[timestep]
-                one_minus_alpha_cumprod_t_minus_one = 1.0 - alphas_cumprod[timestep - 1]
-                one_divided_by_sigma_square = alpha_t / beta_t + 1.0 / one_minus_alpha_cumprod_t_minus_one
-                variance = (1.0 / one_divided_by_sigma_square) ** 0.5
-            else:
-                variance = torch.zeros_like(timestep)
-            epsilon = torch.randn_like(images)
-            images = mean + variance * epsilon
-        images = (images / 2.0 + 0.5).clamp(0, 1).cpu().permute(0, 2, 3, 1).numpy()
-        return images
-model = UNet2DModel.from_pretrained('ddpm-animefaces-64').cuda()
-ddpm = DDPM()
-images = ddpm.sample(model, 32, 3, 64)
-from diffusers.utils import make_image_grid, numpy_to_pil
-image_grid = make_image_grid(numpy_to_pil(images), rows=4, cols=8)
-image_grid.save('ddpm-sample-results.png')
-```
-This can also be found in [this link](https://github.com/LittleNyima/code-snippets/blob/master/ddpm-tutorial/ddpm_sampling.py).

 tags:
 - art
 ---
+# ddpm-anime-faces-64
+**ddpm-anime-faces-64** is an educational project for introducing the training and sampling processes of DDPM and DDIM models. The model is trained on [huggan/anime-faces](https://huggingface.co/datasets/huggan/anime-faces) dataset.
 ## Training Arguments
 For training code, please refer to [this link](https://github.com/LittleNyima/code-snippets/blob/master/ddpm-tutorial/ddpm_training.py).
+## Inference
+This project aims to implement DDPM from scratch, so `DDPMScheduler` is not used. Instead, I use only `UNet2DModel` and implement a simple scheduler myself.
+Please refer to `sampling_ddpm.py` and `sampling_ddim.py` for detailed usages.
+## References
+1. [DDPM Tutorial (Written in Chinese)](https://littlenyima.github.io/posts/13-denoising-diffusion-probabilistic-models/)
+2. [DDIM Tutorial (Written in Chinese)](https://littlenyima.github.io/posts/14-denoising-diffusion-implicit-models/)
+3. [GitHub Repo](https://github.com/LittleNyima/code-snippets)

sampling_ddim.py ADDED Viewed

	@@ -0,0 +1,75 @@

+# model
+from diffusers import UNet2DModel
+model = UNet2DModel.from_pretrained('ddpm-anime-faces-64').cuda()
+# core
+import torch
+import math
+from tqdm import tqdm
+class DDIM:
+    def __init__(
+        self,
+        num_train_timesteps:int = 1000,
+        beta_start: float = 0.0001,
+        beta_end: float = 0.02,
+        sample_steps: int = 20,
+    ):
+        self.num_train_timesteps = num_train_timesteps
+        self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32)
+        self.alphas = 1.0 - self.betas
+        self.alphas_cumprod = torch.cumprod(self.alphas, dim=0)
+        self.timesteps = torch.linspace(num_train_timesteps - 1, 0, sample_steps).long()
+    @torch.no_grad()
+    def sample(
+        self,
+        unet: UNet2DModel,
+        batch_size: int,
+        in_channels: int,
+        sample_size: int,
+        eta: float = 0.0,
+    ):
+        alphas = self.alphas.to(unet.device)
+        alphas_cumprod = self.alphas_cumprod.to(unet.device)
+        timesteps = self.timesteps.to(unet.device)
+        images = torch.randn((batch_size, in_channels, sample_size, sample_size), device=unet.device)
+        for t, tau in tqdm(list(zip(timesteps[:-1], timesteps[1:])), desc='Sampling'):
+            pred_noise: torch.Tensor = unet(images, t).sample
+            # sigma_t
+            if not math.isclose(eta, 0.0):
+                one_minus_alpha_prod_tau = 1.0 - alphas_cumprod[tau]
+                one_minus_alpha_prod_t = 1.0 - alphas_cumprod[t]
+                one_minus_alpha_t = 1.0 - alphas[t]
+                sigma_t = eta * (one_minus_alpha_prod_tau * one_minus_alpha_t / one_minus_alpha_prod_t) ** 0.5
+            else:
+                sigma_t = torch.zeros_like(alphas[0])
+            # first term of x_tau
+            alphas_cumprod_tau = alphas_cumprod[tau]
+            sqrt_alphas_cumprod_tau = alphas_cumprod_tau ** 0.5
+            alphas_cumprod_t = alphas_cumprod[t]
+            sqrt_alphas_cumprod_t = alphas_cumprod_t ** 0.5
+            sqrt_one_minus_alphas_cumprod_t = (1.0 - alphas_cumprod_t) ** 0.5
+            first_term = sqrt_alphas_cumprod_tau * (images - sqrt_one_minus_alphas_cumprod_t * pred_noise) / sqrt_alphas_cumprod_t
+            # second term of x_tau
+            coeff = (1.0 - alphas_cumprod_tau - sigma_t ** 2) ** 0.5
+            second_term = coeff * pred_noise
+            epsilon = torch.randn_like(images)
+            images = first_term + second_term + sigma_t * epsilon
+        images = (images / 2.0 + 0.5).clamp(0, 1).cpu().permute(0, 2, 3, 1).numpy()
+        return images
+ddim = DDIM()
+images = ddim.sample(model, 32, 3, 64)
+from diffusers.utils import make_image_grid, numpy_to_pil
+image_grid = make_image_grid(numpy_to_pil(images), rows=4, cols=8)
+image_grid.save('ddim-sample-results.png')

sampling_ddpm.py ADDED Viewed

	@@ -0,0 +1,62 @@

+import torch
+from tqdm import tqdm
+from diffusers import UNet2DModel
+class DDPM:
+    def __init__(
+        self,
+        num_train_timesteps:int = 1000,
+        beta_start: float = 0.0001,
+        beta_end: float = 0.02,
+    ):
+        self.num_train_timesteps = num_train_timesteps
+        self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32)
+        self.alphas = 1.0 - self.betas
+        self.alphas_cumprod = torch.cumprod(self.alphas, dim=0)
+        self.timesteps = torch.arange(num_train_timesteps - 1, -1, -1)
+    @torch.no_grad()
+    def sample(
+        self,
+        unet: UNet2DModel,
+        batch_size: int,
+        in_channels: int,
+        sample_size: int,
+    ):
+        betas = self.betas.to(unet.device)
+        alphas = self.alphas.to(unet.device)
+        alphas_cumprod = self.alphas_cumprod.to(unet.device)
+        timesteps = self.timesteps.to(unet.device)
+        images = torch.randn((batch_size, in_channels, sample_size, sample_size), device=unet.device)
+        for timestep in tqdm(timesteps, desc='Sampling'):
+            pred_noise: torch.Tensor = unet(images, timestep).sample
+            # mean of q(x_{t-1}|x_t)
+            alpha_t = alphas[timestep]
+            alpha_cumprod_t = alphas_cumprod[timestep]
+            sqrt_alpha_t = alpha_t ** 0.5
+            one_minus_alpha_t = 1.0 - alpha_t
+            sqrt_one_minus_alpha_cumprod_t = (1 - alpha_cumprod_t) ** 0.5
+            mean = (images - one_minus_alpha_t / sqrt_one_minus_alpha_cumprod_t * pred_noise) / sqrt_alpha_t
+            # variance of q(x_{t-1}|x_t)
+            if timestep > 0:
+                beta_t = betas[timestep]
+                one_minus_alpha_cumprod_t_minus_one = 1.0 - alphas_cumprod[timestep - 1]
+                one_divided_by_sigma_square = alpha_t / beta_t + 1.0 / one_minus_alpha_cumprod_t_minus_one
+                variance = (1.0 / one_divided_by_sigma_square) ** 0.5
+            else:
+                variance = torch.zeros_like(timestep)
+            epsilon = torch.randn_like(images)
+            images = mean + variance * epsilon
+        images = (images / 2.0 + 0.5).clamp(0, 1).cpu().permute(0, 2, 3, 1).numpy()
+        return images
+model = UNet2DModel.from_pretrained('ddpm-animefaces-64').cuda()
+ddpm = DDPM()
+images = ddpm.sample(model, 32, 3, 64)
+from diffusers.utils import make_image_grid, numpy_to_pil
+image_grid = make_image_grid(numpy_to_pil(images), rows=4, cols=8)
+image_grid.save('ddpm-sample-results.png')