Python Examples of torchvision.transforms.functional.to_pil

Source File: demo_segmentation.py From Text_Segmentation_Image_Inpainting with GNU General Public License v3.0

6 votes

def process(eval_img, device='cpu'):
    (img, origin, unpadder), file_name = eval_img
    with torch.no_grad():
        out = model(img.to(device))

    prob = F.sigmoid(out)
    mask = prob > 0.5
    mask = torch.nn.MaxPool2d(kernel_size=(3, 3), padding=(1, 1), stride=1)(mask.float()).byte()
    mask = unpadder(mask)
    mask = mask.float().cpu()

    save_image(mask, file_name + ' _mask.jpg')
    origin_np = np.array(to_pil_image(origin[0]))
    mask_np = to_pil_image(mask[0]).convert("L")
    mask_np = np.array(mask_np, dtype='uint8')
    mask_np = draw_bounding_box(origin_np, mask_np, 500)
    mask_ = Image.fromarray(mask_np)
    mask_.save(file_name + "_contour.jpg")
    # ret, mask_np = cv2.threshold(mask_np, 127, 255, 0)
    # dst = cv2.inpaint(origin_np, mask_np, 1, cv2.INPAINT_NS)
    # out = Image.fromarray(dst)
    # out.save(file_name + ' _box.jpg')

Source File: train.py From Holocron with MIT License

6 votes

def plot_samples(images, targets):
    # Unnormalize image
    nb_samples = 4
    _, axes = plt.subplots(1, nb_samples, figsize=(20, 5))
    for idx in range(nb_samples):
        img = images[idx]
        img *= torch.tensor([0.229, 0.224, 0.225]).view(-1, 1, 1)
        img += torch.tensor([0.485, 0.456, 0.406]).view(-1, 1, 1)
        img = F.to_pil_image(img)

        axes[idx].imshow(img)
        axes[idx].axis('off')
        for box, label in zip(targets[idx]['boxes'], targets[idx]['labels']):
            xmin = int(box[0] * images[idx].shape[-1])
            ymin = int(box[1] * images[idx].shape[-2])
            xmax = int(box[2] * images[idx].shape[-1])
            ymax = int(box[3] * images[idx].shape[-2])

            rect = Rectangle((xmin, ymin), xmax - xmin, ymax - ymin,
                             linewidth=2, edgecolor='lime', facecolor='none')
            axes[idx].add_patch(rect)
            axes[idx].text(xmin, ymin, classes[label.item()], color='lime', fontsize=12)

    plt.show()

Source File: train.py From Holocron with MIT License

6 votes

def plot_samples(images, targets, ignore_index=None):
    # Unnormalize image
    nb_samples = 4
    _, axes = plt.subplots(2, nb_samples, figsize=(20, 5))
    for idx in range(nb_samples):
        img = images[idx]
        img *= torch.tensor([0.229, 0.224, 0.225]).view(-1, 1, 1)
        img += torch.tensor([0.485, 0.456, 0.406]).view(-1, 1, 1)
        img = F.to_pil_image(img)
        target = targets[idx]
        if isinstance(ignore_index, int):
            target[target == ignore_index] = 0

        axes[0][idx].imshow(img)
        axes[0][idx].axis('off')
        axes[1][idx].imshow(target)
        axes[1][idx].axis('off')
    plt.show()

Source File: augmentation.py From nni with MIT License

6 votes

def test_tta():
    img_f = os.path.join(settings.TEST_IMG_DIR, '0c2637aa9.jpg')
    img = Image.open(img_f)
    img = img.convert('RGB')

    tta_index = 7
    trans1 = TTATransform(tta_index)
    img = trans1(img)
    #img.show()

    img_np = np.array(img)
    img_np = np.expand_dims(img_np, 0)
    print(img_np.shape)
    img_np = tta_back_mask_np(img_np, tta_index)
    img_np = np.reshape(img_np, (768, 768, 3))
    img_back = F.to_pil_image(img_np)
    img_back.show()

Source File: inference.py From RCRNet-Pytorch with MIT License

6 votes

def inference():
    model.eval()
    print("Begin inference on {} {}.".format(args.dataset, args.split))
    for data in tqdm(dataloader):
        images = [frame['image'].to(device) for frame in data]
        with torch.no_grad():
            preds = model(images)
            preds = [torch.sigmoid(pred) for pred in preds]
        # save predicted saliency maps
        for i, pred_ in enumerate(preds):
            for j, pred in enumerate(pred_.detach().cpu()):
                dataset = data[i]['dataset'][j]
                image_id = data[i]['image_id'][j]
                height = data[i]['height'].item()
                width = data[i]['width'].item()
                result_path = os.path.join(args.results_folder, "{}/{}.png".format(dataset, image_id))

                result = TF.to_pil_image(pred)
                result = result.resize((height, width))
                dirname = os.path.dirname(result_path)
                if not os.path.exists(dirname):
                    os.makedirs(dirname)
                result.save(result_path)

Source File: test_transform_goturn.py From open-vot with MIT License

6 votes

def test_transform_goturn(self):
        base_dataset = VOT(self.vot_dir, return_rect=True, download=True)
        transform = TransformGOTURN()
        dataset = Pairwise(
            base_dataset, transform, pairs_per_video=1,
            frame_range=1, causal=True)
        self.assertGreater(len(dataset), 0)

        for crop_z, crop_x, labels in dataset:
            self.assertEqual(crop_z.size(), crop_x.size())

        if self.visualize:
            for t in range(10):
                crop_z, crop_x, labels = random.choice(dataset)
                mean_color = torch.tensor(
                    transform.mean_color).float().view(3, 1, 1)
                crop_z = F.to_pil_image((crop_z + mean_color) / 255.0)
                crop_x = F.to_pil_image((crop_x + mean_color) / 255.0)
                labels = labels.cpu().numpy()
                labels *= transform.out_size / transform.label_scale_factor

                bndbox = np.concatenate([
                    labels[:2], labels[2:] - labels[:2]])
                show_frame(crop_x, bndbox, fig_n=1, pause=1)

Source File: data_specs.py From margipose with Apache License 2.0

5 votes

def unconvert(self, tensor):
        return tr.to_pil_image(denormalize_pixels(tensor.clone(), self.mean, self.stddev), 'RGB')

Source File: test.py From Noise2Noise-Cryo-EM-image-denoising with MIT License

5 votes

def test():
    device = torch.device(conf.cuda if torch.cuda.is_available() else "cpu")
    test_dataset = TestingDataset(conf.data_path_test,conf.crop_img_size)
    test_loader =  DataLoader(test_dataset, batch_size=1, shuffle=False)
    print('Loading model from: {}'.format(conf.model_path_test))
    model = UNet(in_channels=conf.img_channel,out_channels=conf.img_channel)
    print('loading model')
    model.load_state_dict(torch.load(conf.model_path_test))
    model.eval()
    model.to(device)
    result_dir = conf.denoised_dir
    if not os.path.exists(result_dir):
        os.mkdir(result_dir)
    for batch_idx, source in enumerate(test_loader):
        source_img = tvF.to_pil_image(source.squeeze(0))
        source = source.to(device)
        denoised_img = model(source).detach().cpu()
        
        img_name = test_loader.dataset.test_list[batch_idx]
        
        denoised_result= tvF.to_pil_image(torch.clamp(denoised_img.squeeze(0), 0, 1))
        
        fname = os.path.splitext(img_name)[0]
        
        source_img.save(os.path.join(result_dir, f'{fname}-noisy.png'))
        denoised_result.save(os.path.join(result_dir, f'{fname}-denoised.png'))

Source File: test.py From Noise2Noise-Cryo-EM-image-denoising with MIT License

5 votes

def test():
    device = torch.device(conf.cuda if torch.cuda.is_available() else "cpu")
    test_dataset = Testinging_Dataset(conf.data_path_test,conf.test_noise_param,conf.crop_img_size)
    test_loader =  DataLoader(test_dataset, batch_size=1, shuffle=False)
    print('Loading model from: {}'.format(conf.model_path_test))
    model = UNet(in_channels=conf.img_channel,out_channels=conf.img_channel)
    print('loading model')
    model.load_state_dict(torch.load(conf.model_path_test))
    model.eval()
    model.to(device)
    result_dir = conf.denoised_dir
    if not os.path.exists(result_dir):
        os.mkdir(result_dir)
    for batch_idx, (source,img_cropped) in enumerate(test_loader):
        source_img = tvF.to_pil_image(source.squeeze(0))
        img_truth = img_cropped.squeeze(0).numpy().astype(np.uint8)
        source = source.to(device)
        denoised_img = model(source).detach().cpu()
        
        img_name = test_loader.dataset.image_list[batch_idx]
        
        denoised_result= tvF.to_pil_image(torch.clamp(denoised_img.squeeze(0), 0, 1))
        fname = os.path.splitext(img_name)[0]
        
        source_img.save(os.path.join(result_dir, f'{fname}-noisy.png'))
        denoised_result.save(os.path.join(result_dir, f'{fname}-denoised.png'))       
        io.imsave(os.path.join(result_dir, f'{fname}-ground_truth.png'),img_truth)

Source File: transforms.py From ACDRNet with Apache License 2.0

5 votes

def __call__(self, img, mask):
        return F.to_pil_image(img, self.mode), F.to_pil_image(mask, self.mode)

Source File: video_transforms.py From pvse with MIT License

5 votes

def __call__(self, pic):
        """
        Args:
            pic (Tensor or numpy.ndarray): Image to be converted to PIL Image.

        Returns:
            PIL Image: Image converted to PIL Image.

        """
        return F.to_pil_image(pic, self.mode)

Source File: generate_pseudo_labels.py From RCRNet-Pytorch with MIT License

5 votes

def generate_pseudo_label():
    pseudo_label_generator.eval()

    for data in tqdm(dataloader):
        images = []
        labels = []
        for frame in data:
            images.append(frame['image'].to(device))
            labels.append(frame['label'].to(device) if 'label' in frame else None)
        with torch.no_grad():
            for i in range(1, args.frame_between_label_num+1):
                pseudo_label = pseudo_label_generator.generate_pseudo_label(images[i], images[0], images[-1], labels[0], labels[-1])
                labels[i] = torch.sigmoid(pseudo_label).detach()
            # save pseudo-labels
            for i, label_ in enumerate(labels):
                for j, label in enumerate(label_.detach().cpu()):
                    dataset = data[i]['dataset'][j]
                    image_id = data[i]['image_id'][j]
                    pseudo_label_path = os.path.join(pseudo_label_folder, "{}/{}.png".format(dataset, image_id))

                    height = data[i]['height'].item()
                    width = data[i]['width'].item()
                    result = TF.to_pil_image(label)
                    result = result.resize((height, width))
                    dirname = os.path.dirname(pseudo_label_path)
                    if not os.path.exists(dirname):
                        os.makedirs(dirname)
                    result.save(pseudo_label_path)

Source File: test-jv.py From Jacinle with MIT License

5 votes

def imwrite(path, tensor):
    TF.to_pil_image(tensor).save(path)

Source File: test_transform_siamfc.py From open-vot with MIT License

5 votes

def test_transform_siamfc(self):
        base_dataset = VOT(self.vot_dir, anno_type='rect', download=True)
        transform = TransformSiamFC(stats_path=self.stats_path)
        dataset = Pairwise(
            base_dataset, transform=transform, pairs_per_video=1, subset='train')
        self.assertGreater(len(dataset), 0)

        for crop_z, crop_x, labels, weights in dataset:
            self.assertAlmostEqual(
                weights[labels == 1].sum().item(),
                weights[labels == 0].sum().item())
            self.assertAlmostEqual(
                weights.sum().item(), labels[labels >= 0].numel())
            self.assertEqual(
                weights[labels == transform.ignore_label].sum().item(), 0)

        if self.visualize:
            crop_z, crop_x, labels, weights = random.choice(dataset)
            crop_z = F.to_pil_image(crop_z / 255.0)
            crop_x = F.to_pil_image(crop_x / 255.0)
            labels = self._rescale(labels.squeeze().cpu().numpy())
            weights = self._rescale(weights.squeeze().cpu().numpy())

            bndbox_z = np.array([31, 31, 64, 64])
            bndbox_x = np.array([95, 95, 64, 64])

            show_frame(crop_z, bndbox_z, fig_n=1, pause=1)
            show_frame(crop_x, bndbox_x, fig_n=2, pause=1)
            show_frame(labels, fig_n=3, pause=1, cmap='hot')
            show_frame(weights, fig_n=4, pause=5, cmap='hot')

Source File: image_prior.py From Torchelie with MIT License

5 votes

def superres(img,
             hourglass,
             input_dim,
             scale,
             iters,
             lr,
             noise_std=1 / 30,
             device='cuda'):
    im = TFF.to_tensor(img)[None].to(device)
    z = input_noise((im.shape[2] * scale, im.shape[3] * scale), input_dim)
    z = z.to(device)

    def body(batch):
        recon = hourglass(z + torch.randn_like(z) * noise_std)
        loss = F.mse_loss(
            F.interpolate(recon, size=im.shape[2:], mode='bilinear'), im)
        loss.backward()
        return {
            "loss": loss,
        }

    def display():
        recon = hourglass(z)
        loss = F.mse_loss(
            F.interpolate(recon, size=im.shape[2:], mode='bilinear'), im)
        return {
            "loss":
            loss,
            "recon":
            recon.clamp(0, 1),
            'orig':
            F.interpolate(im, scale_factor=scale, mode='bicubic').clamp(0, 1)
        }

    loop = make_loop(hourglass, body, display, iters, lr)
    loop.to(device)
    loop.run(1)
    with torch.no_grad():
        hourglass.eval()
        return TFF.to_pil_image(hourglass(z)[0].cpu())

Source File: data_loaders.py From ModelFeast with MIT License

5 votes

def _tansform_(self, x):
        x = np.array(x, dtype='float32') / 255
        x = (x - 0.5) / 0.5 
        x = x.transpose((2, 0, 1)) # 将 channel 放到第0维，这是 pytorch 要求的输入方式
        x = torch.from_numpy(x)

        # # for inceptionresnetV2
        # x = TF.to_pil_image(x)
        # x = TF.resize(x, (64, 32))
        # x = TF.to_tensor(x)

        return x

Source File: preprocessing_transforms.py From ViP with MIT License

5 votes

def _to_pil(self, clip):
        # Must be of type uint8 if images have multiple channels, int16, int32, or float32 if there is only one channel
        if isinstance(clip[0], np.ndarray):
            if 'float' in str(clip[0].dtype):
                clip = np.array(clip).astype('float32')
            if 'int64' == str(clip[0].dtype):
                clip = np.array(clip).astype('int32')
            if clip[0].ndim == 3:
                clip = np.array(clip).astype('uint8')

        output=[]
        for frame in clip:
            output.append(F.to_pil_image(frame))
        
        return output

Source File: utils.py From skorch with BSD 3-Clause "New" or "Revised" License

5 votes

def convert_cell_to_img(t, padding=16):
    """Converts pytorch tensor into a Pillow Image. The padding will be removed
    from the resulting image"""
    std = torch.Tensor([0.229, 0.224, 0.225]).reshape(-1, 1, 1)
    mu = torch.Tensor([0.485, 0.456, 0.406]).reshape(-1, 1, 1)
    output = t.mul(std)
    output.add_(mu)
    img = to_pil_image(output)
    w, h = img.size
    return img.crop((padding, padding, w - padding, h - padding))

Source File: transforms.py From person-reid-lib with MIT License

5 votes

def to_pil_image(self, pic):
        if pic.shape[2] == 3:
            return ImageData(F.to_pil_image(pic, self.mode))
        elif pic.shape[2] == 1:
            return ImageData(F.to_pil_image(pic))
        elif pic.shape[2] == 5:
            if self.use_flow:
                pic_rgb = F.to_pil_image(pic[..., :3], self.mode)
                pic_x = F.to_pil_image(pic[..., 3:4])
                pic_y = F.to_pil_image(pic[..., 4:5])
                return ImageData(pic_rgb, pic_x, pic_y)
            else:
                return ImageData(F.to_pil_image(pic[..., :3], self.mode))
        else:
            raise ValueError

Source File: transforms.py From person-reid-lib with MIT License

5 votes

def _instance_process(self, pic_list, params):
        if isinstance(pic_list, np.ndarray):
            if pic_list.ndim == 3:
                return self.to_pil_image(pic_list)
            elif pic_list.ndim == 4:
                return [self.to_pil_image(pic_i) for pic_i in range(pic_list.shape[0])]
            else:
                raise TypeError
        raise TypeError

Source File: utils.py From noise2noise-pytorch with MIT License

5 votes

def create_montage(img_name, noise_type, save_path, source_t, denoised_t, clean_t, show):
    """Creates montage for easy comparison."""

    fig, ax = plt.subplots(1, 3, figsize=(9, 3))
    fig.canvas.set_window_title(img_name.capitalize()[:-4])

    # Bring tensors to CPU
    source_t = source_t.cpu().narrow(0, 0, 3)
    denoised_t = denoised_t.cpu()
    clean_t = clean_t.cpu()
    
    source = tvF.to_pil_image(source_t)
    denoised = tvF.to_pil_image(torch.clamp(denoised_t, 0, 1))
    clean = tvF.to_pil_image(clean_t)

    # Build image montage
    psnr_vals = [psnr(source_t, clean_t), psnr(denoised_t, clean_t)]
    titles = ['Input: {:.2f} dB'.format(psnr_vals[0]),
              'Denoised: {:.2f} dB'.format(psnr_vals[1]),
              'Ground truth']
    zipped = zip(titles, [source, denoised, clean])
    for j, (title, img) in enumerate(zipped):
        ax[j].imshow(img)
        ax[j].set_title(title)
        ax[j].axis('off')

    # Open pop up window, if requested
    if show > 0:
        plt.show()

    # Save to files
    fname = os.path.splitext(img_name)[0]
    source.save(os.path.join(save_path, f'{fname}-{noise_type}-noisy.png'))
    denoised.save(os.path.join(save_path, f'{fname}-{noise_type}-denoised.png'))
    fig.savefig(os.path.join(save_path, f'{fname}-{noise_type}-montage.png'), bbox_inches='tight')

Source File: datasets.py From noise2noise-pytorch with MIT License

5 votes

def __getitem__(self, index):
        """Retrieves image from folder and corrupts it."""

        # Use converged image, if requested
        if self.clean_targets:
            target = self.reference
        else:
            target_fname = self.imgs[index].replace('render', 'target')
            file_ext = '.exr' if self.hdr_targets else '.png'
            target_fname = os.path.splitext(target_fname)[0] + file_ext
            target_path = os.path.join(self.root_dir, 'target', target_fname)
            if self.hdr_targets:
                target = tvF.to_pil_image(load_hdr_as_tensor(target_path))
            else:
                target = Image.open(target_path).convert('RGB')

        # Get buffers
        render_path = os.path.join(self.root_dir, 'render', self.imgs[index])
        albedo_path = os.path.join(self.root_dir, 'albedo', self.albedos[index])
        normal_path =  os.path.join(self.root_dir, 'normal', self.normals[index])

        if self.hdr_buffers:
            render = tvF.to_pil_image(load_hdr_as_tensor(render_path))
            albedo = tvF.to_pil_image(load_hdr_as_tensor(albedo_path))
            normal = tvF.to_pil_image(load_hdr_as_tensor(normal_path))
        else:
            render = Image.open(render_path).convert('RGB')
            albedo = Image.open(albedo_path).convert('RGB')
            normal = Image.open(normal_path).convert('RGB')

        # Crop
        if self.crop_size != 0:
            buffers = [render, albedo, normal, target]
            buffers = [tvF.to_tensor(b) for b in self._random_crop(buffers)]

        # Stack buffers to create input volume
        source = torch.cat(buffers[:3], dim=0)
        target = buffers[3]

        return source, target

Source File: test_on_image.py From LCFCN with Apache License 2.0

5 votes

def apply(image_path, model_name, model_path):
  transformer = ut.ComposeJoint(
                    [
                         [transforms.ToTensor(), None],
                         [transforms.Normalize(*ut.mean_std), None],
                         [None,  ut.ToLong() ]
                    ])  

  # Load best model
  model = model_dict[model_name](n_classes=2).cuda()
  model.load_state_dict(torch.load(model_path))

  # Read Image
  image_raw = imread(image_path)
  collection = list(map(FT.to_pil_image, [image_raw, image_raw]))
  image, _ = transformer(collection)

  batch = {"images":image[None]}
  
  # Make predictions
  pred_blobs = model.predict(batch, method="blobs").squeeze()
  pred_counts = int(model.predict(batch, method="counts").ravel()[0])

  # Save Output
  save_path = image_path + "_blobs_count:{}.png".format(pred_counts)

  imsave(save_path, ut.combine_image_blobs(image_raw, pred_blobs))
  print("| Counts: {}\n| Output saved in: {}".format(pred_counts, save_path))

Source File: shanghai.py From LCFCN with Apache License 2.0

5 votes

def __getitem__(self, index):        
        name = self.img_names[index]
      
        # LOAD IMG, POINT, and ROI
        image = imread(os.path.join(self.path, "images", name))
        if image.ndim == 2:
            image = image[:,:,None].repeat(3,2)
        pointList = hu.load_mat(os.path.join(self.path, 
                        "ground-truth", 
          "GT_" + name.replace(".jpg", "") +".mat"))
        pointList = pointList["image_info"][0][0][0][0][0] 
        
        points = np.zeros(image.shape[:2], "uint8")[:,:,None]
        H, W = image.shape[:2]
        for x, y in pointList:
            points[min(int(y), H-1), min(int(x), W-1)] = 1

        counts = torch.LongTensor(np.array([pointList.shape[0]]))

        collection = list(map(FT.to_pil_image, [image, points]))
        image, points = transformers.apply_transform(self.split, image, points, 
                   transform_name=self.exp_dict['dataset']['transform'])
            
        return {"images":image, 
                "points":points.squeeze(), 
                "counts":counts, 
                'meta':{"index":index}}

Source File: trancos.py From LCFCN with Apache License 2.0

5 votes

def __getitem__(self, index):
        name = self.img_names[index]

        # LOAD IMG, POINT, and ROI
        image = imread(os.path.join(self.path, name + ".jpg"))
        points = imread(os.path.join(self.path, name + "dots.png"))[:,:,:1].clip(0,1)
        roi = loadmat(os.path.join(self.path, name + "mask.mat"))["BW"][:,:,np.newaxis]
        
        # LOAD IMG AND POINT
        image = image * roi
        image = hu.shrink2roi(image, roi)
        points = hu.shrink2roi(points, roi).astype("uint8")

        counts = torch.LongTensor(np.array([int(points.sum())]))   
        
        collection = list(map(FT.to_pil_image, [image, points]))
        image, points = transformers.apply_transform(self.split, image, points, 
                   transform_name=self.exp_dict['dataset']['transform'])
            
        return {"images":image, 
                "points":points.squeeze(), 
                "counts":counts, 
                'meta':{"index":index}}

Source File: dataset.py From pytorch-UNet with MIT License

5 votes

def __call__(self, image, mask):
        # transforming to PIL image
        image, mask = F.to_pil_image(image), F.to_pil_image(mask)

        # random crop
        if self.crop:
            i, j, h, w = T.RandomCrop.get_params(image, self.crop)
            image, mask = F.crop(image, i, j, h, w), F.crop(mask, i, j, h, w)

        if np.random.rand() < self.p_flip:
            image, mask = F.hflip(image), F.hflip(mask)

        # color transforms || ONLY ON IMAGE
        if self.color_jitter_params:
            image = self.color_tf(image)

        # random affine transform
        if np.random.rand() < self.p_random_affine:
            affine_params = T.RandomAffine(180).get_params((-90, 90), (1, 1), (2, 2), (-45, 45), self.crop)
            image, mask = F.affine(image, *affine_params), F.affine(mask, *affine_params)

        # transforming to tensor
        image = F.to_tensor(image)
        if not self.long_mask:
            mask = F.to_tensor(mask)
        else:
            mask = to_long_tensor(mask)

        return image, mask

Source File: utils.py From ICDAR-2019-SROIE with MIT License

4 votes

def transform(image, boxes, labels, split):
    """
    Apply the transformations above.

    :param image: image, a PIL Image
    :param boxes: bounding boxes in boundary coordinates, a tensor of dimensions (n_objects, 4)
    :param labels: labels of objects, a tensor of dimensions (n_objects)
    :param split: one of 'TRAIN' or 'TEST', since different sets of transformations are applied
    :return: transformed image, transformed bounding box coordinates, transformed labels
    """
    assert split in {'TRAIN', 'TEST'}

    # Mean and standard deviation of ImageNet data that our base VGG from torchvision was trained on
    # see: https://pytorch.org/docs/stable/torchvision/models.html
    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]

    new_image = image
    new_boxes = boxes
    new_labels = labels
    # Skip the following operations if validation/evaluation
    if split == 'TRAIN':
        # A series of photometric distortions in random order, each with 50% chance of occurrence, as in Caffe repo
        new_image = photometric_distort(new_image)

        # Convert PIL image to Torch tensor
        new_image = FT.to_tensor(new_image)

        # Expand image (zoom out) with a 50% chance - helpful for training detection of small objects
        # Fill surrounding space with the mean of ImageNet data that our base VGG was trained on
        if random.random() < 0.5:
            new_image, new_boxes = expand(new_image, boxes, filler=mean)

        # Randomly crop image (zoom in)
        new_image, new_boxes, new_labels = random_crop(new_image, new_boxes, new_labels)

        # Convert Torch tensor to PIL image
        new_image = FT.to_pil_image(new_image)

        # Flip image with a 50% chance
        if random.random() < 0.5:
            new_image, new_boxes = flip(new_image, new_boxes)

    # Resize image to (300, 300) - this also converts absolute boundary coordinates to their fractional form
    new_image, new_boxes = resize(new_image, new_boxes, dims=(300, 300))

    # Convert PIL image to Torch tensor
    new_image = FT.to_tensor(new_image)

    # Normalize by mean and standard deviation of ImageNet data that our base VGG was trained on
    new_image = FT.normalize(new_image, mean=mean, std=std)

    return new_image, new_boxes, new_labels

Source File: image_prior.py From Torchelie with MIT License

4 votes

def inpainting(img,
               mask,
               hourglass,
               input_dim,
               iters,
               lr,
               noise_std=1 / 30,
               device='cuda'):
    im = TFF.to_tensor(img)[None].to(device)
    mask = TFF.to_tensor(mask)[None].to(device)
    z = input_noise((im.shape[2], im.shape[3]), input_dim)
    z = z.to(device)
    print(hourglass)

    def body(batch):
        recon = hourglass(z + torch.randn_like(z) * noise_std)
        loss = torch.sum(
            F.mse_loss(F.interpolate(recon, size=im.shape[2:], mode='nearest'),
                       im,
                       reduction='none') * mask / mask.sum())
        loss.backward()
        return {"loss": loss}

    def display():
        recon = hourglass(z)
        recon = F.interpolate(recon, size=im.shape[2:], mode='nearest')
        loss = F.mse_loss(recon * mask, im)

        result = recon * (1 - mask) + im * mask
        return {
            "loss": loss,
            "recon": recon.clamp(0, 1),
            'orig': im,
            'result': result.clamp(0, 1)
        }

    loop = make_loop(hourglass, body, display, iters, lr)
    loop.test_loop.callbacks.add_callbacks([tcb.Log('result', 'result')])
    loop.to(device)
    loop.run(1)
    with torch.no_grad():
        hourglass.eval()
        return TFF.to_pil_image(hourglass(z)[0].cpu())

Source File: utils.py From a-PyTorch-Tutorial-to-Object-Detection with MIT License

4 votes

def transform(image, boxes, labels, difficulties, split):
    """
    Apply the transformations above.

    :param image: image, a PIL Image
    :param boxes: bounding boxes in boundary coordinates, a tensor of dimensions (n_objects, 4)
    :param labels: labels of objects, a tensor of dimensions (n_objects)
    :param difficulties: difficulties of detection of these objects, a tensor of dimensions (n_objects)
    :param split: one of 'TRAIN' or 'TEST', since different sets of transformations are applied
    :return: transformed image, transformed bounding box coordinates, transformed labels, transformed difficulties
    """
    assert split in {'TRAIN', 'TEST'}

    # Mean and standard deviation of ImageNet data that our base VGG from torchvision was trained on
    # see: https://pytorch.org/docs/stable/torchvision/models.html
    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]

    new_image = image
    new_boxes = boxes
    new_labels = labels
    new_difficulties = difficulties
    # Skip the following operations for evaluation/testing
    if split == 'TRAIN':
        # A series of photometric distortions in random order, each with 50% chance of occurrence, as in Caffe repo
        new_image = photometric_distort(new_image)

        # Convert PIL image to Torch tensor
        new_image = FT.to_tensor(new_image)

        # Expand image (zoom out) with a 50% chance - helpful for training detection of small objects
        # Fill surrounding space with the mean of ImageNet data that our base VGG was trained on
        if random.random() < 0.5:
            new_image, new_boxes = expand(new_image, boxes, filler=mean)

        # Randomly crop image (zoom in)
        new_image, new_boxes, new_labels, new_difficulties = random_crop(new_image, new_boxes, new_labels,
                                                                         new_difficulties)

        # Convert Torch tensor to PIL image
        new_image = FT.to_pil_image(new_image)

        # Flip image with a 50% chance
        if random.random() < 0.5:
            new_image, new_boxes = flip(new_image, new_boxes)

    # Resize image to (300, 300) - this also converts absolute boundary coordinates to their fractional form
    new_image, new_boxes = resize(new_image, new_boxes, dims=(300, 300))

    # Convert PIL image to Torch tensor
    new_image = FT.to_tensor(new_image)

    # Normalize by mean and standard deviation of ImageNet data that our base VGG was trained on
    new_image = FT.normalize(new_image, mean=mean, std=std)

    return new_image, new_boxes, new_labels, new_difficulties

Source File: __init__.py From flashtorch with MIT License

4 votes

def apply_transforms(image, size=224):
    """Transforms a PIL image to torch.Tensor.

    Applies a series of tranformations on PIL image including a conversion
    to a tensor. The returned tensor has a shape of :math:`(N, C, H, W)` and
    is ready to be used as an input to neural networks.

    First the image is resized to 256, then cropped to 224. The `means` and
    `stds` for normalisation are taken from numbers used in ImageNet, as
    currently developing the package for visualizing pre-trained models.

    The plan is to to expand this to handle custom size/mean/std.

    Args:
        image (PIL.Image.Image or numpy array)
        size (int, optional, default=224): Desired size (width/height) of the
            output tensor

    Shape:
        Input: :math:`(C, H, W)` for numpy array
        Output: :math:`(N, C, H, W)`

    Returns:
        torch.Tensor (torch.float32): Transformed image tensor

    Note:
        Symbols used to describe dimensions:
            - N: number of images in a batch
            - C: number of channels
            - H: height of the image
            - W: width of the image

    """

    if not isinstance(image, Image.Image):
        image = F.to_pil_image(image)

    means = [0.485, 0.456, 0.406]
    stds = [0.229, 0.224, 0.225]

    transform = transforms.Compose([
        transforms.Resize(size),
        transforms.CenterCrop(size),
        transforms.ToTensor(),
        transforms.Normalize(means, stds)
    ])

    tensor = transform(image).unsqueeze(0)

    tensor.requires_grad = True

    return tensor

Python torchvision.transforms.functional.to_pil_image() Examples