Python Examples of torchvision.transforms.functional.to

Source File: swapping_dataset.py From srntt-pytorch with Apache License 2.0

6 votes

def __getitem__(self, index):
        filename = self.filenames[index]

        img_in = Image.open(self.input_dir / filename).convert('RGB')
        img_in_lr = img_in.resize(self.input_size, Image.BICUBIC)
        img_in_up = img_in_lr.resize(self.output_size, Image.BICUBIC)

        img_ref = Image.open(self.ref_dir / filename).convert('RGB')
        img_ref = img_ref.resize(self.output_size, Image.BICUBIC)
        img_ref_lr = img_ref.resize(self.input_size, Image.BICUBIC)
        img_ref_up = img_ref_lr.resize(self.output_size, Image.BICUBIC)

        return {'img_in': TF.to_tensor(img_in_up),
                'img_ref': TF.to_tensor(img_ref),
                'img_ref_blur': TF.to_tensor(img_ref_up),
                'filename': Path(filename).stem}

Source File: PILTransform.py From ext_portrait_segmentation with MIT License

6 votes

def __call__(self, rgb_img, label_img=None):

        label1 = label_img
        label2 = label_img
        if self.scale1 != 1:
            w, h = label_img.size
            label1 = label1.resize((w//self.scale1, h//self.scale1), Image.NEAREST)

        if self.scale2 != 1:
            w, h = label_img.size
            label2 = label2.resize((w//self.scale2, h//self.scale2), Image.NEAREST)

        rgb_img = F.to_tensor(rgb_img) # convert to tensor (values between 0 and 1)
        rgb_img = F.normalize(rgb_img, self.mean, self.std) # normalize the tensor
        label1 = torch.LongTensor(np.array(label1).astype(np.int64))
        label2 = torch.LongTensor(np.array(label2).astype(np.int64))


        return rgb_img, label1, label2

Source File: cvfunctional.py From opencv_transforms_torchvision with MIT License

6 votes

def cv_transform(img):
    # img = resize(img, size=(100, 300))
    # img = to_tensor(img)
    # img = normalize(img, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    # img = pad(img, padding=(10, 10, 20, 20), fill=(255, 255, 255), padding_mode='constant')
    # img = pad(img, padding=(100, 100, 100, 100), fill=5, padding_mode='symmetric')
    # img = crop(img, -40, -20, 1000, 1000)
    # img = center_crop(img, (310, 300))
    # img = resized_crop(img, -10.3, -20, 330, 220, (500, 500))
    # img = hflip(img)
    # img = vflip(img)
    # tl, tr, bl, br, center = five_crop(img, 100)
    # img = adjust_brightness(img, 2.1)
    # img = adjust_contrast(img, 1.5)
    # img = adjust_saturation(img, 2.3)
    # img = adjust_hue(img, 0.5)
    # img = adjust_gamma(img, gamma=3, gain=0.1)
    # img = rotate(img, 10, resample='BILINEAR', expand=True, center=None)
    # img = to_grayscale(img, 3)
    # img = affine(img, 10, (0, 0), 1, 0, resample='BICUBIC', fillcolor=(255,255,0))
    # img = gaussion_noise(img)
    # img = poisson_noise(img)
    img = salt_and_pepper(img)
    return to_tensor(img)

Source File: Dataloader.py From Text_Segmentation_Image_Inpainting with GNU General Public License v3.0

6 votes

def process_images(self, clean, mask):
        i, j, h, w = RandomResizedCrop.get_params(clean, scale=(0.5, 2.0), ratio=(3. / 4., 4. / 3.))
        clean_img = resized_crop(clean, i, j, h, w, size=self.img_size, interpolation=Image.BICUBIC)
        mask = resized_crop(mask, i, j, h, w, self.img_size, interpolation=Image.BICUBIC)

        # get mask before further image augment
        # mask = self.get_mask(raw_img, clean_img)

        if self.add_random_masks:
            mask = random_masks(mask.copy(), size=self.img_size[0], offset=10)
        mask = np.where(np.array(mask) > brightness_difference * 255, np.uint8(255), np.uint8(0))
        mask = cv2.dilate(mask, np.ones((10, 10), np.uint8), iterations=1)

        mask = np.expand_dims(mask, -1)
        mask_t = to_tensor(mask)
        # mask_t = (mask_t > brightness_difference).float()

        # mask_t, _ = torch.max(mask_t, dim=0, keepdim=True)
        binary_mask = (1 - mask_t)  # valid positions are 1; holes are 0
        binary_mask = binary_mask.expand(3, -1, -1)
        clean_img = self.transformer(clean_img)
        corrupted_img = clean_img * binary_mask
        return corrupted_img, binary_mask, clean_img

Source File: Dataloader.py From Text_Segmentation_Image_Inpainting with GNU General Public License v3.0

6 votes

def resize_pad_tensor(self, pil_img):
        origin = to_tensor(pil_img).unsqueeze(0)
        fix_len = self.resize
        long = max(pil_img.size)
        ratio = fix_len / long
        new_size = tuple(map(lambda x: int(x * ratio) // 8 * 8, pil_img.size))
        img = pil_img.resize(new_size, Image.BICUBIC)
        # img = pil_img
        img = self.transformer(img).unsqueeze(0)

        _, _, h, w = img.size()
        if fix_len > w:

            boarder_pad = (0, fix_len - w, 0, 0)
        else:

            boarder_pad = (0, 0, 0, fix_len - h)

        img = pad(img, boarder_pad, value=0)
        mask_resizer = self.resize_mask(boarder_pad, pil_img.size)
        return img, origin, mask_resizer

Source File: datasets.py From noise2noise-pytorch with MIT License

6 votes

def __getitem__(self, index):
        """Retrieves image from folder and corrupts it."""

        # Load PIL image
        img_path = os.path.join(self.root_dir, self.imgs[index])
        img =  Image.open(img_path).convert('RGB')

        # Random square crop
        if self.crop_size != 0:
            img = self._random_crop([img])[0]

        # Corrupt source image
        tmp = self._corrupt(img)
        source = tvF.to_tensor(self._corrupt(img))

        # Corrupt target image, but not when clean targets are requested
        if self.clean_targets:
            target = tvF.to_tensor(img)
        else:
            target = tvF.to_tensor(self._corrupt(img))

        return source, target

Source File: transforms.py From medicaltorch with Apache License 2.0

6 votes

def __call__(self, sample):
        rdict = {}
        input_data = sample['input']

        if isinstance(input_data, list):
            ret_input = [F.to_tensor(item)
                         for item in input_data]
        else:
            ret_input = F.to_tensor(input_data)

        rdict['input'] = ret_input

        if self.labeled:
            gt_data = sample['gt']
            if gt_data is not None:
                if isinstance(gt_data, list):
                    ret_gt = [F.to_tensor(item)
                              for item in gt_data]
                else:
                    ret_gt = F.to_tensor(gt_data)

                rdict['gt'] = ret_gt
        sample.update(rdict)
        return sample

Source File: data_loader.py From real-world-sr with MIT License

6 votes

def __getitem__(self, index):
        # get downscaled, cropped and gt (if available) image
        hr_image = Image.open(self.hr_files[index])
        w, h = hr_image.size
        cs = utils.calculate_valid_crop_size(min(w, h), self.upscale_factor)
        if self.crop_size is not None:
            cs = min(cs, self.crop_size)
        cropped_image = TF.to_tensor(T.CenterCrop(cs // self.upscale_factor)(hr_image))
        hr_image = T.CenterCrop(cs)(hr_image)
        hr_image = TF.to_tensor(hr_image)
        resized_image = utils.imresize(hr_image, 1.0 / self.upscale_factor, True)
        if self.lr_files is None:
            return resized_image, cropped_image, resized_image
        else:
            lr_image = Image.open(self.lr_files[index])
            lr_image = TF.to_tensor(T.CenterCrop(cs // self.upscale_factor)(lr_image))
            return resized_image, cropped_image, lr_image

Source File: cvfunctional.py From opencv_transforms_torchvision with MIT License

6 votes

def pil_transform(img):
    # img = functional.resize(img, size=(100, 300))
    # img = functional.to_tensor(img)
    # img = functional.normalize(img, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    # img = functional.pad(img, padding=(10, 10, 20, 20), fill=(255, 255, 255), padding_mode='constant')
    # img = functional.pad(img, padding=(100, 100, 100, 100), padding_mode='symmetric')
    # img = functional.crop(img, -40, -20, 1000, 1000)
    # img = functional.center_crop(img, (310, 300))
    # img = functional.resized_crop(img, -10.3, -20, 330, 220, (500, 500))
    # img = functional.hflip(img)
    # img = functional.vflip(img)
    # tl, tr, bl, br, center = functional.five_crop(img, 100)
    # img = functional.adjust_brightness(img, 2.1)
    # img = functional.adjust_contrast(img, 1.5)
    # img = functional.adjust_saturation(img, 2.3)
    # img = functional.adjust_hue(img, 0.5)
    # img = functional.adjust_gamma(img, gamma=3, gain=0.1)
    # img = functional.rotate(img, 10, resample=PIL.Image.BILINEAR, expand=True, center=None)
    # img = functional.to_grayscale(img, 3)
    # img = functional.affine(img, 10, (0, 0), 1, 0, resample=PIL.Image.BICUBIC, fillcolor=(255,255,0))

    return functional.to_tensor(img)

Source File: transform_siamfc.py From open-vot with MIT License

6 votes

def __call__(self, img_z, img_x, bndbox_z, bndbox_x):
        crop_z = self._crop(img_z, bndbox_z, self.exemplar_sz)
        crop_x = self._crop(img_x, bndbox_x, self.search_sz)
        labels, weights = self._create_labels()

        crop_z = self._acquire_augment(
            crop_z, self.exemplar_sz, self.stats.rgb_variance_z)
        crop_x = self._acquire_augment(
            crop_x, self.search_sz, self.stats.rgb_variance_x)

        crop_z = (255.0 * F.to_tensor(crop_z)).float()
        crop_x = (255.0 * F.to_tensor(crop_x)).float()
        labels = torch.from_numpy(labels).float()
        weights = torch.from_numpy(weights).float()

        return crop_z, crop_x, labels, weights

Source File: dataset.py From generative-graph-transformer with MIT License

6 votes

def load_raw_images(ids, images_path):
    r"""
    Load images from raw files
    
    :param ids: ids of the images in the data order
    :param images_path: path of the raw images
    :return: the images, as pytorch tensors
    """
    images = []
    for count, id in enumerate(ids):
        # if count % 10000 == 0:
        #     print(count)
        image_path = images_path + "{:0>7d}".format(int(id)) + ".png"
        img = Image.open(image_path).convert('L')
        img = tvf.to_tensor(img)
        assert img.shape[1] == img.shape[2]
        assert img.shape[1] in {64, 128}
        images.append(img)
    return images

Source File: utils.py From oft with MIT License

6 votes

def collate(batch):

    idxs, images, calibs, objects, grids = zip(*batch)

    # Crop images to the same dimensions
    minw = min(img.size[0] for img in images)
    minh = min(img.size[1] for img in images)
    images = [img.crop((0, 0, minw, minh)) for img in images]

    # Create a vector of indices
    idxs = torch.LongTensor(idxs)

    # Stack images and calibration matrices along the batch dimension
    images = torch.stack([to_tensor(img) for img in images])
    calibs = torch.stack(calibs)
    grids = torch.stack(grids)

    return idxs, images, calibs, objects, grids

Source File: ffhq_data_to_torch.py From DeepPrivacy with MIT License

6 votes

def save_image_batch(idx, image_ids):
    imsizes = [4, 8, 16, 32, 64, 128]
    impaths = [os.path.join(SOURCE_IMG_DIR, get_impath(image_id))
               for image_id in image_ids]
    images = []
    for impath in impaths:
        images.append(plt.imread(impath))

    for imsize in imsizes:
        to_save = torch.zeros((len(impaths), 3, imsize, imsize),
                              dtype=torch.float32)
        for i, im in enumerate(images):
            im = im[:, :, :3]
            im = cv2.resize(im, (imsize, imsize),
                            interpolation=cv2.INTER_AREA)
            im = to_tensor(im)
            assert im.max() <= 1.0
            assert len(im.shape) == 3
            assert im.dtype == torch.float32
            to_save[i] = im
        target_dir = os.path.join(TARGET_IMAGE_DIR, str(imsize))
        target_path = os.path.join(target_dir, "{}.torch".format(str(idx)))
        os.makedirs(target_dir, exist_ok=True)
        torch.save(to_save, target_path)
        del to_save

Source File: transforms.py From ray with Apache License 2.0

5 votes

def __call__(self, image, target):
        image = F.to_tensor(image)
        target = torch.as_tensor(np.asarray(target), dtype=torch.int64)
        return image, target

Source File: box_utils.py From r2c with MIT License

5 votes

def to_tensor_and_normalize(image):
        tensor255 = functional.to_tensor(image) * 255
        return functional.normalize(tensor255, mean=(102.9801, 115.9465, 122.7717), std=(1, 1, 1))

Source File: box_utils.py From r2c with MIT License

5 votes

def to_tensor_and_normalize(image):
        return functional.normalize(functional.to_tensor(image), mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))

Source File: transforms.py From Parsing-R-CNN with MIT License

5 votes

def __call__(self, image, target):
        return F.to_tensor(image), target

Source File: siamfc.py From open-vot with MIT License

5 votes

def _extract_feature(self, image):
        if isinstance(image, Image.Image):
            image = (255.0 * TF.to_tensor(image)).unsqueeze(0)
        elif isinstance(image, (list, tuple)):
            image = 255.0 * torch.stack([TF.to_tensor(c) for c in image])
        else:
            raise Exception('Incorrect input type: {}'.format(type(image)))

        with torch.set_grad_enabled(False):
            self.branch.eval()
            return self.branch(image.to(self.device))

Source File: transform.py From seamseg with BSD 3-Clause "New" or "Revised" License

5 votes

def __call__(self, img, msk, cat, iscrowd):
        # Random flip
        if self.random_flip:
            img, msk = self._random_flip(img, msk)

        # Adjust scale, possibly at random
        if self.random_scale is not None:
            target_size = self._random_target_size()
        else:
            target_size = self.shortest_size
        scale = self._adjusted_scale(img.size[0], img.size[1], target_size)

        out_size = tuple(int(dim * scale) for dim in img.size)
        img = img.resize(out_size, resample=Image.BILINEAR)
        msk = [m.resize(out_size, resample=Image.NEAREST) for m in msk]

        # Wrap in np.array
        cat = np.array(cat, dtype=np.int32)
        iscrowd = np.array(iscrowd, dtype=np.uint8)

        # Image transformations
        img = tfn.to_tensor(img)
        img = self._normalize_image(img)

        # Label transformations
        msk = np.stack([np.array(m, dtype=np.int32, copy=False) for m in msk], axis=0)
        msk, cat, iscrowd = self._compact_labels(msk, cat, iscrowd)

        # Convert labels to torch and extract bounding boxes
        msk = torch.from_numpy(msk.astype(np.long))
        cat = torch.from_numpy(cat.astype(np.long))
        iscrowd = torch.from_numpy(iscrowd)
        bbx = extract_boxes(msk, cat.numel())

        return dict(img=img, msk=msk, cat=cat, iscrowd=iscrowd, bbx=bbx)

Source File: transform_dcfnet.py From open-vot with MIT License

5 votes

def __call__(self, img_z, img_x, bndbox_z, bndbox_x):

        crop_z = self._crop(img_z, bndbox_z)
        crop_x = self._crop(img_x, bndbox_x)

        # data augmentation
        if np.random.rand() > 0.5:
            crop_z = F.hflip(crop_z)
            crop_x = F.hflip(crop_x)

        crop_z = 255.0 * F.to_tensor(crop_z)
        crop_x = 255.0 * F.to_tensor(crop_x)

        # color augmentation
        if self.stats:
            offset_z = np.reshape(np.dot(
                self.stats.rgb_variance_z,
                np.random.randn(3, 1)), (3, 1, 1))
            offset_x = np.reshape(np.dot(
                self.stats.rgb_variance_x,
                np.random.randn(3, 1)), (3, 1, 1))
            crop_z += torch.from_numpy(offset_z).float()
            crop_x += torch.from_numpy(offset_x).float()
            crop_z = torch.clamp(crop_z, 0.0, 255.0)
            crop_x = torch.clamp(crop_x, 0.0, 255.0)

        return crop_z, crop_x

Source File: transform_goturn.py From open-vot with MIT License

5 votes

def __call__(self, *args):
        assert len(args) in [2, 4]
        if len(args) == 2:
            img_z, img_x, bndbox_z, bndbox_x = \
                args[0], args[1], args[0], args[1]
        elif len(args) == 4:
            img_z, img_x, bndbox_z, bndbox_x = args

        # shift search area
        rand_bndbox_x = self._rand_shift(bndbox_x, img_x.size)

        # crop image regions
        crop_z = self._crop(img_z, bndbox_z)
        crop_x = self._crop(img_x, rand_bndbox_x)
        labels = self._create_labels(rand_bndbox_x, bndbox_x)

        # convert data to tensors
        crop_z = 255.0 * F.to_tensor(crop_z)
        crop_x = 255.0 * F.to_tensor(crop_x)
        labels = torch.from_numpy(labels).float()

        # color augmentation
        mean_color = torch.tensor(self.mean_color).float().view(3, 1, 1)
        crop_z -= mean_color
        crop_x -= mean_color

        return crop_z, crop_x, labels

Source File: data.py From multi-person-tracker with MIT License

5 votes

def __getitem__(self, idx):
        img = cv2.cvtColor(cv2.imread(self.image_file_names[idx]), cv2.COLOR_BGR2RGB)
        return to_tensor(img)

Source File: pose_resnet_inference.py From ehpi_action_recognition with MIT License

5 votes

def get_human(model: PoseResNet, skeleton_type: Type[SkeletonBase], image: np.ndarray, bb: BoundingBox) -> Human:
    if bb.label is not "person":
        return None
    with torch.no_grad():
        center, scale = bb_to_center_scale(bb)
        r = 0
        image_size = [pose_resnet_config.input_height, pose_resnet_config.input_width]
        trans = get_affine_transform(center, scale, r, image_size)
        net_input = cv2.warpAffine(
            image,
            trans,
            (int(image_size[0]), int(image_size[1])),
            flags=cv2.INTER_LINEAR)
        net_input = to_tensor(net_input)
        net_input = normalize(net_input, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        net_input = net_input.unsqueeze(0)

        output = model(net_input.cuda())
        heatmaps = output.clone().cpu().numpy()
        preds, joint_scores = get_final_preds(heatmaps, [center], [scale])
        skeleton_coco: SkeletonCoco = SkeletonCoco
        preds = preds[0].tolist()
        assert len(joint_scores) <= 1, "Joints for more than one human .."

        for joint_num, joint_score in enumerate(joint_scores[0]):
            assert len(joint_score) <= 1, "More than one joint score!!"
            skeleton_coco.joints[joint_num].x = preds[joint_num][0]
            skeleton_coco.joints[joint_num].y = preds[joint_num][1]
            skeleton_coco.joints[joint_num].score = joint_score[0].item()
            skeleton_coco.joints[joint_num].visibility = JointVisibility.VISIBLE

        human = get_human_pose_result(skeleton_coco, skeleton_type)
        human.bounding_box = bb
        return human

Source File: data_prepare.py From VisualizingNDF with MIT License

5 votes

def __getitem__(self, idx):
        # read image from the disk
        image_path = self.img_path_list[idx]
        image = PIL.Image.open(image_path)
        # transformation for data augmentation
        if self.transform:
            # Use PIL and transformation provided by Pytorch
            if np.random.rand() > 0.5 and self.split == 'train':
                image = transform_f.hflip(image)
            # only crop if input image size is large enough
            if self.crop_limit > 1:
                # random cropping
                if self.split == 'train':
                    x_start = int(self.crop_limit*np.random.rand())
                    y_start = int(self.crop_limit*np.random.rand())
                else:
                    # only apply central-crop for evaluation set
                    x_start = 15
                    y_start = 15
                image = transform_f.crop(image, y_start, x_start, 
                                         self.crop_size,
                                         self.crop_size)
        image = transform_f.to_tensor(image)
        image = transform_f.normalize(image, mean=self.mean, 
                                      std=self.std)                
        sample = {'image': image, 
                  'age': self.label[idx], 
                  'index': idx}    
        return sample

Source File: image_prior.py From Torchelie with MIT License

5 votes

def superres(img,
             hourglass,
             input_dim,
             scale,
             iters,
             lr,
             noise_std=1 / 30,
             device='cuda'):
    im = TFF.to_tensor(img)[None].to(device)
    z = input_noise((im.shape[2] * scale, im.shape[3] * scale), input_dim)
    z = z.to(device)

    def body(batch):
        recon = hourglass(z + torch.randn_like(z) * noise_std)
        loss = F.mse_loss(
            F.interpolate(recon, size=im.shape[2:], mode='bilinear'), im)
        loss.backward()
        return {
            "loss": loss,
        }

    def display():
        recon = hourglass(z)
        loss = F.mse_loss(
            F.interpolate(recon, size=im.shape[2:], mode='bilinear'), im)
        return {
            "loss":
            loss,
            "recon":
            recon.clamp(0, 1),
            'orig':
            F.interpolate(im, scale_factor=scale, mode='bicubic').clamp(0, 1)
        }

    loop = make_loop(hourglass, body, display, iters, lr)
    loop.to(device)
    loop.run(1)
    with torch.no_grad():
        hourglass.eval()
        return TFF.to_pil_image(hourglass(z)[0].cpu())

Source File: data_specs.py From margipose with Apache License 2.0

5 votes

def convert(self, img: PIL.Image.Image):
        return normalize_pixels(tr.to_tensor(img), self.mean, self.stddev)

Source File: data_loaders.py From ModelFeast with MIT License

5 votes

def _tansform_(self, x):
        x = np.array(x, dtype='float32') / 255
        x = (x - 0.5) / 0.5 
        x = x.transpose((2, 0, 1)) # 将 channel 放到第0维，这是 pytorch 要求的输入方式
        x = torch.from_numpy(x)

        # # for inceptionresnetV2
        # x = TF.to_pil_image(x)
        # x = TF.resize(x, (64, 32))
        # x = TF.to_tensor(x)

        return x

Source File: box_utils.py From HGL-pytorch with MIT License

5 votes

def to_tensor_and_normalize(image):
        tensor255 = functional.to_tensor(image) * 255
        return functional.normalize(tensor255, mean=(102.9801, 115.9465, 122.7717), std=(1, 1, 1))

Source File: preprocessing_transforms.py From ViP with MIT License

5 votes

def _to_tensor(self, clip):
        """
        torchvision converts PIL images and numpy arrays that are uint8 0 to 255 to float 0 to 1
        Converts numpy arrays that are float to float tensor
        """
            
        if isinstance(clip[0], torch.Tensor):
            return clip

        output = []
        for frame in clip:
            output.append(F.to_tensor(frame))
        
        return output

Source File: transform.py From inplace_abn with BSD 3-Clause "New" or "Revised" License

5 votes

def __call__(self, img):
        # Scaling
        scale = self.longest_max_size/float(max(img.size[0],img.size[1]))
        if scale != 1.:
            out_size = tuple(int(dim * scale) for dim in img.size)
            img = img.resize(out_size, resample=Image.BILINEAR)

        # Convert to torch and normalize
        img = tfn.to_tensor(img)
        img.sub_(img.new(self.rgb_mean).view(-1, 1, 1))
        img.div_(img.new(self.rgb_std).view(-1, 1, 1))

        return img

Python torchvision.transforms.functional.to_tensor() Examples