From afd82ac3548100fcc3072a5b11add89ab4b2e331 Mon Sep 17 00:00:00 2001 From: kopytjuk Date: Tue, 25 Dec 2018 16:03:44 +0100 Subject: [PATCH] Bugfix to run the code on windows machines. --- main.py | 382 ++++++++++++++++++++++++++++---------------------------- 1 file changed, 192 insertions(+), 190 deletions(-) diff --git a/main.py b/main.py index 7c3e638..a9cf68f 100644 --- a/main.py +++ b/main.py @@ -16,214 +16,216 @@ import os import models.dcgan as dcgan import models.mlp as mlp -parser = argparse.ArgumentParser() -parser.add_argument('--dataset', required=True, help='cifar10 | lsun | imagenet | folder | lfw ') -parser.add_argument('--dataroot', required=True, help='path to dataset') -parser.add_argument('--workers', type=int, help='number of data loading workers', default=2) -parser.add_argument('--batchSize', type=int, default=64, help='input batch size') -parser.add_argument('--imageSize', type=int, default=64, help='the height / width of the input image to network') -parser.add_argument('--nc', type=int, default=3, help='input image channels') -parser.add_argument('--nz', type=int, default=100, help='size of the latent z vector') -parser.add_argument('--ngf', type=int, default=64) -parser.add_argument('--ndf', type=int, default=64) -parser.add_argument('--niter', type=int, default=25, help='number of epochs to train for') -parser.add_argument('--lrD', type=float, default=0.00005, help='learning rate for Critic, default=0.00005') -parser.add_argument('--lrG', type=float, default=0.00005, help='learning rate for Generator, default=0.00005') -parser.add_argument('--beta1', type=float, default=0.5, help='beta1 for adam. default=0.5') -parser.add_argument('--cuda' , action='store_true', help='enables cuda') -parser.add_argument('--ngpu' , type=int, default=1, help='number of GPUs to use') -parser.add_argument('--netG', default='', help="path to netG (to continue training)") -parser.add_argument('--netD', default='', help="path to netD (to continue training)") -parser.add_argument('--clamp_lower', type=float, default=-0.01) -parser.add_argument('--clamp_upper', type=float, default=0.01) -parser.add_argument('--Diters', type=int, default=5, help='number of D iters per each G iter') -parser.add_argument('--noBN', action='store_true', help='use batchnorm or not (only for DCGAN)') -parser.add_argument('--mlp_G', action='store_true', help='use MLP for G') -parser.add_argument('--mlp_D', action='store_true', help='use MLP for D') -parser.add_argument('--n_extra_layers', type=int, default=0, help='Number of extra layers on gen and disc') -parser.add_argument('--experiment', default=None, help='Where to store samples and models') -parser.add_argument('--adam', action='store_true', help='Whether to use adam (default is rmsprop)') -opt = parser.parse_args() -print(opt) +if __name__=="__main__": -if opt.experiment is None: - opt.experiment = 'samples' -os.system('mkdir {0}'.format(opt.experiment)) + parser = argparse.ArgumentParser() + parser.add_argument('--dataset', required=True, help='cifar10 | lsun | imagenet | folder | lfw ') + parser.add_argument('--dataroot', required=True, help='path to dataset') + parser.add_argument('--workers', type=int, help='number of data loading workers', default=2) + parser.add_argument('--batchSize', type=int, default=64, help='input batch size') + parser.add_argument('--imageSize', type=int, default=64, help='the height / width of the input image to network') + parser.add_argument('--nc', type=int, default=3, help='input image channels') + parser.add_argument('--nz', type=int, default=100, help='size of the latent z vector') + parser.add_argument('--ngf', type=int, default=64) + parser.add_argument('--ndf', type=int, default=64) + parser.add_argument('--niter', type=int, default=25, help='number of epochs to train for') + parser.add_argument('--lrD', type=float, default=0.00005, help='learning rate for Critic, default=0.00005') + parser.add_argument('--lrG', type=float, default=0.00005, help='learning rate for Generator, default=0.00005') + parser.add_argument('--beta1', type=float, default=0.5, help='beta1 for adam. default=0.5') + parser.add_argument('--cuda' , action='store_true', help='enables cuda') + parser.add_argument('--ngpu' , type=int, default=1, help='number of GPUs to use') + parser.add_argument('--netG', default='', help="path to netG (to continue training)") + parser.add_argument('--netD', default='', help="path to netD (to continue training)") + parser.add_argument('--clamp_lower', type=float, default=-0.01) + parser.add_argument('--clamp_upper', type=float, default=0.01) + parser.add_argument('--Diters', type=int, default=5, help='number of D iters per each G iter') + parser.add_argument('--noBN', action='store_true', help='use batchnorm or not (only for DCGAN)') + parser.add_argument('--mlp_G', action='store_true', help='use MLP for G') + parser.add_argument('--mlp_D', action='store_true', help='use MLP for D') + parser.add_argument('--n_extra_layers', type=int, default=0, help='Number of extra layers on gen and disc') + parser.add_argument('--experiment', default=None, help='Where to store samples and models') + parser.add_argument('--adam', action='store_true', help='Whether to use adam (default is rmsprop)') + opt = parser.parse_args() + print(opt) -opt.manualSeed = random.randint(1, 10000) # fix seed -print("Random Seed: ", opt.manualSeed) -random.seed(opt.manualSeed) -torch.manual_seed(opt.manualSeed) + if opt.experiment is None: + opt.experiment = 'samples' + os.system('mkdir {0}'.format(opt.experiment)) -cudnn.benchmark = True + opt.manualSeed = random.randint(1, 10000) # fix seed + print("Random Seed: ", opt.manualSeed) + random.seed(opt.manualSeed) + torch.manual_seed(opt.manualSeed) -if torch.cuda.is_available() and not opt.cuda: - print("WARNING: You have a CUDA device, so you should probably run with --cuda") + cudnn.benchmark = True -if opt.dataset in ['imagenet', 'folder', 'lfw']: - # folder dataset - dataset = dset.ImageFolder(root=opt.dataroot, - transform=transforms.Compose([ - transforms.Scale(opt.imageSize), - transforms.CenterCrop(opt.imageSize), - transforms.ToTensor(), - transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), - ])) -elif opt.dataset == 'lsun': - dataset = dset.LSUN(db_path=opt.dataroot, classes=['bedroom_train'], - transform=transforms.Compose([ - transforms.Scale(opt.imageSize), - transforms.CenterCrop(opt.imageSize), - transforms.ToTensor(), - transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), - ])) -elif opt.dataset == 'cifar10': - dataset = dset.CIFAR10(root=opt.dataroot, download=True, - transform=transforms.Compose([ - transforms.Scale(opt.imageSize), - transforms.ToTensor(), - transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), - ]) - ) -assert dataset -dataloader = torch.utils.data.DataLoader(dataset, batch_size=opt.batchSize, - shuffle=True, num_workers=int(opt.workers)) + if torch.cuda.is_available() and not opt.cuda: + print("WARNING: You have a CUDA device, so you should probably run with --cuda") -ngpu = int(opt.ngpu) -nz = int(opt.nz) -ngf = int(opt.ngf) -ndf = int(opt.ndf) -nc = int(opt.nc) -n_extra_layers = int(opt.n_extra_layers) + if opt.dataset in ['imagenet', 'folder', 'lfw']: + # folder dataset + dataset = dset.ImageFolder(root=opt.dataroot, + transform=transforms.Compose([ + transforms.Scale(opt.imageSize), + transforms.CenterCrop(opt.imageSize), + transforms.ToTensor(), + transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), + ])) + elif opt.dataset == 'lsun': + dataset = dset.LSUN(db_path=opt.dataroot, classes=['bedroom_train'], + transform=transforms.Compose([ + transforms.Scale(opt.imageSize), + transforms.CenterCrop(opt.imageSize), + transforms.ToTensor(), + transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), + ])) + elif opt.dataset == 'cifar10': + dataset = dset.CIFAR10(root=opt.dataroot, download=True, + transform=transforms.Compose([ + transforms.Scale(opt.imageSize), + transforms.ToTensor(), + transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), + ]) + ) + assert dataset + dataloader = torch.utils.data.DataLoader(dataset, batch_size=opt.batchSize, + shuffle=True, num_workers=int(opt.workers)) -# custom weights initialization called on netG and netD -def weights_init(m): - classname = m.__class__.__name__ - if classname.find('Conv') != -1: - m.weight.data.normal_(0.0, 0.02) - elif classname.find('BatchNorm') != -1: - m.weight.data.normal_(1.0, 0.02) - m.bias.data.fill_(0) + ngpu = int(opt.ngpu) + nz = int(opt.nz) + ngf = int(opt.ngf) + ndf = int(opt.ndf) + nc = int(opt.nc) + n_extra_layers = int(opt.n_extra_layers) -if opt.noBN: - netG = dcgan.DCGAN_G_nobn(opt.imageSize, nz, nc, ngf, ngpu, n_extra_layers) -elif opt.mlp_G: - netG = mlp.MLP_G(opt.imageSize, nz, nc, ngf, ngpu) -else: - netG = dcgan.DCGAN_G(opt.imageSize, nz, nc, ngf, ngpu, n_extra_layers) + # custom weights initialization called on netG and netD + def weights_init(m): + classname = m.__class__.__name__ + if classname.find('Conv') != -1: + m.weight.data.normal_(0.0, 0.02) + elif classname.find('BatchNorm') != -1: + m.weight.data.normal_(1.0, 0.02) + m.bias.data.fill_(0) -netG.apply(weights_init) -if opt.netG != '': # load checkpoint if needed - netG.load_state_dict(torch.load(opt.netG)) -print(netG) + if opt.noBN: + netG = dcgan.DCGAN_G_nobn(opt.imageSize, nz, nc, ngf, ngpu, n_extra_layers) + elif opt.mlp_G: + netG = mlp.MLP_G(opt.imageSize, nz, nc, ngf, ngpu) + else: + netG = dcgan.DCGAN_G(opt.imageSize, nz, nc, ngf, ngpu, n_extra_layers) -if opt.mlp_D: - netD = mlp.MLP_D(opt.imageSize, nz, nc, ndf, ngpu) -else: - netD = dcgan.DCGAN_D(opt.imageSize, nz, nc, ndf, ngpu, n_extra_layers) - netD.apply(weights_init) + netG.apply(weights_init) + if opt.netG != '': # load checkpoint if needed + netG.load_state_dict(torch.load(opt.netG)) + print(netG) -if opt.netD != '': - netD.load_state_dict(torch.load(opt.netD)) -print(netD) + if opt.mlp_D: + netD = mlp.MLP_D(opt.imageSize, nz, nc, ndf, ngpu) + else: + netD = dcgan.DCGAN_D(opt.imageSize, nz, nc, ndf, ngpu, n_extra_layers) + netD.apply(weights_init) -input = torch.FloatTensor(opt.batchSize, 3, opt.imageSize, opt.imageSize) -noise = torch.FloatTensor(opt.batchSize, nz, 1, 1) -fixed_noise = torch.FloatTensor(opt.batchSize, nz, 1, 1).normal_(0, 1) -one = torch.FloatTensor([1]) -mone = one * -1 + if opt.netD != '': + netD.load_state_dict(torch.load(opt.netD)) + print(netD) -if opt.cuda: - netD.cuda() - netG.cuda() - input = input.cuda() - one, mone = one.cuda(), mone.cuda() - noise, fixed_noise = noise.cuda(), fixed_noise.cuda() + input = torch.FloatTensor(opt.batchSize, 3, opt.imageSize, opt.imageSize) + noise = torch.FloatTensor(opt.batchSize, nz, 1, 1) + fixed_noise = torch.FloatTensor(opt.batchSize, nz, 1, 1).normal_(0, 1) + one = torch.FloatTensor([1]) + mone = one * -1 -# setup optimizer -if opt.adam: - optimizerD = optim.Adam(netD.parameters(), lr=opt.lrD, betas=(opt.beta1, 0.999)) - optimizerG = optim.Adam(netG.parameters(), lr=opt.lrG, betas=(opt.beta1, 0.999)) -else: - optimizerD = optim.RMSprop(netD.parameters(), lr = opt.lrD) - optimizerG = optim.RMSprop(netG.parameters(), lr = opt.lrG) + if opt.cuda: + netD.cuda() + netG.cuda() + input = input.cuda() + one, mone = one.cuda(), mone.cuda() + noise, fixed_noise = noise.cuda(), fixed_noise.cuda() -gen_iterations = 0 -for epoch in range(opt.niter): - data_iter = iter(dataloader) - i = 0 - while i < len(dataloader): - ############################ - # (1) Update D network - ########################### - for p in netD.parameters(): # reset requires_grad - p.requires_grad = True # they are set to False below in netG update + # setup optimizer + if opt.adam: + optimizerD = optim.Adam(netD.parameters(), lr=opt.lrD, betas=(opt.beta1, 0.999)) + optimizerG = optim.Adam(netG.parameters(), lr=opt.lrG, betas=(opt.beta1, 0.999)) + else: + optimizerD = optim.RMSprop(netD.parameters(), lr = opt.lrD) + optimizerG = optim.RMSprop(netG.parameters(), lr = opt.lrG) - # train the discriminator Diters times - if gen_iterations < 25 or gen_iterations % 500 == 0: - Diters = 100 - else: - Diters = opt.Diters - j = 0 - while j < Diters and i < len(dataloader): - j += 1 + gen_iterations = 0 + for epoch in range(opt.niter): + data_iter = iter(dataloader) + i = 0 + while i < len(dataloader): + ############################ + # (1) Update D network + ########################### + for p in netD.parameters(): # reset requires_grad + p.requires_grad = True # they are set to False below in netG update - # clamp parameters to a cube + # train the discriminator Diters times + if gen_iterations < 25 or gen_iterations % 500 == 0: + Diters = 100 + else: + Diters = opt.Diters + j = 0 + while j < Diters and i < len(dataloader): + j += 1 + + # clamp parameters to a cube + for p in netD.parameters(): + p.data.clamp_(opt.clamp_lower, opt.clamp_upper) + + data = data_iter.next() + i += 1 + + # train with real + real_cpu, _ = data + netD.zero_grad() + batch_size = real_cpu.size(0) + + if opt.cuda: + real_cpu = real_cpu.cuda() + input.resize_as_(real_cpu).copy_(real_cpu) + inputv = Variable(input) + + errD_real = netD(inputv) + errD_real.backward(one) + + # train with fake + noise.resize_(opt.batchSize, nz, 1, 1).normal_(0, 1) + noisev = Variable(noise, volatile = True) # totally freeze netG + fake = Variable(netG(noisev).data) + inputv = fake + errD_fake = netD(inputv) + errD_fake.backward(mone) + errD = errD_real - errD_fake + optimizerD.step() + + ############################ + # (2) Update G network + ########################### for p in netD.parameters(): - p.data.clamp_(opt.clamp_lower, opt.clamp_upper) - - data = data_iter.next() - i += 1 - - # train with real - real_cpu, _ = data - netD.zero_grad() - batch_size = real_cpu.size(0) - - if opt.cuda: - real_cpu = real_cpu.cuda() - input.resize_as_(real_cpu).copy_(real_cpu) - inputv = Variable(input) - - errD_real = netD(inputv) - errD_real.backward(one) - - # train with fake + p.requires_grad = False # to avoid computation + netG.zero_grad() + # in case our last batch was the tail batch of the dataloader, + # make sure we feed a full batch of noise noise.resize_(opt.batchSize, nz, 1, 1).normal_(0, 1) - noisev = Variable(noise, volatile = True) # totally freeze netG - fake = Variable(netG(noisev).data) - inputv = fake - errD_fake = netD(inputv) - errD_fake.backward(mone) - errD = errD_real - errD_fake - optimizerD.step() + noisev = Variable(noise) + fake = netG(noisev) + errG = netD(fake) + errG.backward(one) + optimizerG.step() + gen_iterations += 1 - ############################ - # (2) Update G network - ########################### - for p in netD.parameters(): - p.requires_grad = False # to avoid computation - netG.zero_grad() - # in case our last batch was the tail batch of the dataloader, - # make sure we feed a full batch of noise - noise.resize_(opt.batchSize, nz, 1, 1).normal_(0, 1) - noisev = Variable(noise) - fake = netG(noisev) - errG = netD(fake) - errG.backward(one) - optimizerG.step() - gen_iterations += 1 + print('[%d/%d][%d/%d][%d] Loss_D: %f Loss_G: %f Loss_D_real: %f Loss_D_fake %f' + % (epoch, opt.niter, i, len(dataloader), gen_iterations, + errD.data[0], errG.data[0], errD_real.data[0], errD_fake.data[0])) + if gen_iterations % 500 == 0: + real_cpu = real_cpu.mul(0.5).add(0.5) + vutils.save_image(real_cpu, '{0}/real_samples.png'.format(opt.experiment)) + fake = netG(Variable(fixed_noise, volatile=True)) + fake.data = fake.data.mul(0.5).add(0.5) + vutils.save_image(fake.data, '{0}/fake_samples_{1}.png'.format(opt.experiment, gen_iterations)) - print('[%d/%d][%d/%d][%d] Loss_D: %f Loss_G: %f Loss_D_real: %f Loss_D_fake %f' - % (epoch, opt.niter, i, len(dataloader), gen_iterations, - errD.data[0], errG.data[0], errD_real.data[0], errD_fake.data[0])) - if gen_iterations % 500 == 0: - real_cpu = real_cpu.mul(0.5).add(0.5) - vutils.save_image(real_cpu, '{0}/real_samples.png'.format(opt.experiment)) - fake = netG(Variable(fixed_noise, volatile=True)) - fake.data = fake.data.mul(0.5).add(0.5) - vutils.save_image(fake.data, '{0}/fake_samples_{1}.png'.format(opt.experiment, gen_iterations)) - - # do checkpointing - torch.save(netG.state_dict(), '{0}/netG_epoch_{1}.pth'.format(opt.experiment, epoch)) - torch.save(netD.state_dict(), '{0}/netD_epoch_{1}.pth'.format(opt.experiment, epoch)) + # do checkpointing + torch.save(netG.state_dict(), '{0}/netG_epoch_{1}.pth'.format(opt.experiment, epoch)) + torch.save(netD.state_dict(), '{0}/netD_epoch_{1}.pth'.format(opt.experiment, epoch))