Source code for simulation.utils.machine_learning.cycle_gan.models.wcycle_gan

from typing import List, Tuple

import torch
from torch import Tensor, nn

from simulation.utils.machine_learning.models.helper import set_requires_grad

from .base_model import BaseModel, CycleGANNetworks
from .cycle_gan_stats import CycleGANStats

[docs]class WassersteinCycleGANModel(BaseModel): """This class implements the CycleGAN model, for learning image-to-image translation without paired data. By default, it uses a '--netg resnet_9blocks' ResNet generator, a '--netd basic' discriminator (PatchGAN introduced by pix2pix), and a least-square GANs objective ('--gan_mode lsgan'). CycleGAN paper: """ def __init__( self, netg_a_to_b: nn.Module, netg_b_to_a: nn.Module, netd_a: nn.Module = None, netd_b: nn.Module = None, is_train: bool = True, beta1: float = 0.5, lr: float = 0.0002, lr_policy: str = "linear", lambda_idt_a: int = 10, lambda_idt_b: int = 10, lambda_cycle: float = 0.5, optimizer_type: str = "rms_prop", is_l1: bool = False, wgan_n_critic: int = 5, wgan_initial_n_critic: int = 5, wgan_clip_lower=-0.01, wgan_clip_upper=0.01, ): """Initialize the CycleGAN class. Args: is_train: enable or disable training mode beta1: momentum term of adam lr: initial learning rate for adam lr_policy: linear #learning rate policy. [linear | step | plateau | cosine] lambda_idt_a: weight for loss of domain A lambda_idt_b: weight for loss of domain B lambda_cycle: weight for loss identity is_l1: Decide whether to use l1 loss or l2 loss as cycle and identity loss functions """ self.wgan_initial_n_critic = wgan_initial_n_critic self.clips = (wgan_clip_lower, wgan_clip_upper) self.wgan_n_critic = wgan_n_critic self.networks = CycleGANNetworks(netg_a_to_b, netg_b_to_a, netd_a, netd_b) super().__init__( netg_a_to_b, netg_b_to_a, netd_a, netd_b, is_train, lambda_cycle, lambda_idt_a, lambda_idt_b, is_l1, optimizer_type, lr_policy, beta1, lr, )
[docs] def update_critic_a( self, batch_a: Tensor, batch_b: Tensor, clip_bounds: Tuple[float, float] = None, ): set_requires_grad([self.networks.d_a], requires_grad=True) return self.networks.d_a.perform_optimization_step( self.networks.g_a_to_b, self.optimizer_d, batch_a, batch_b, clip_bounds, )
[docs] def update_critic_b( self, batch_a: Tensor, batch_b: Tensor, clip_bounds: Tuple[float, float] = None, ): set_requires_grad([self.networks.d_b], requires_grad=True) return self.networks.d_b.perform_optimization_step( self.networks.g_b_to_a, self.optimizer_d, batch_b, batch_a, clip_bounds, )
[docs] def update_generators(self, batch_a: Tensor, batch_b: Tensor): """""" real_a = batch_a real_b = batch_b self.optimizer_g.zero_grad() # set G_A and G_B's gradients to zero # G_A and G_B set_requires_grad( [self.networks.d_a, self.networks.d_b], False ) # Ds require no gradients when optimizing Gs set_requires_grad([self.networks.g_a_to_b, self.networks.g_b_to_a], True) fake_a = self.networks.g_b_to_a(real_b) loss_g_b_to_a = -torch.mean(self.networks.d_a(fake_a)) fake_b = self.networks.g_a_to_b(real_a) loss_g_a_to_b = -torch.mean(self.networks.d_b(fake_b)) # Identity loss idt_a = self.networks.g_b_to_a(real_a) idt_b = self.networks.g_a_to_b(real_b) loss_idt_a = self.criterionIdt(idt_a, real_a) * self.lambda_idt_a loss_idt_b = self.criterionIdt(idt_b, real_b) * self.lambda_idt_b rec_a = self.networks.g_a_to_b(fake_a) rec_b = self.networks.g_b_to_a(fake_b) # Forward cycle loss loss_cycle_a = self.criterionCycle(rec_a, real_a) * self.lambda_cycle # Backward cycle loss loss_cycle_b = self.criterionCycle(rec_b, real_b) * self.lambda_cycle # combined loss and calculate gradients loss_g = ( loss_g_a_to_b + loss_g_b_to_a + loss_cycle_a + loss_cycle_b + loss_idt_a + loss_idt_b ) loss_g.backward() self.optimizer_g.step() # update G_A and G_B's weights self.metric = ( loss_g.item() ) # set the generator loss as metric for plateau lr-policy return CycleGANStats( real_a, real_b, fake_a, fake_b, rec_a, rec_b, idt_a, idt_b, loss_g_a_to_b.item(), loss_g_b_to_a.item(), loss_idt_a.item(), loss_idt_b.item(), loss_cycle_a.item(), loss_cycle_b.item(), )
[docs] def pre_training(self, critic_batches): # Update critic for batch_a, batch_b in critic_batches: self.update_critic_a(batch_a, batch_b, self.clips) self.update_critic_b(batch_a, batch_b, self.clips)
[docs] def do_iteration( self, batch_a: torch.Tensor, batch_b: torch.Tensor, critic_batches: List[Tuple[torch.Tensor, torch.Tensor]], ): # Update critic for batch_a_d, batch_b_d in critic_batches: distance_a = self.update_critic_a(batch_a_d, batch_b_d, self.clips) distance_b = self.update_critic_b(batch_a_d, batch_b_d, self.clips) update_stats: CycleGANStats = self.update_generators(batch_a, batch_b) update_stats.w_distance_a = distance_a update_stats.w_distance_b = distance_b return update_stats