Python Examples of torchvision.transforms.functional.resize

Source File: utils.py From ICDAR-2019-SROIE with MIT License

8 votes

def resize(image, boxes, dims=(300, 300), return_percent_coords=True):
    """
    Resize image. For the SSD300, resize to (300, 300).

    Since percent/fractional coordinates are calculated for the bounding boxes (w.r.t image dimensions) in this process,
    you may choose to retain them.

    :param image: image, a PIL Image
    :param boxes: bounding boxes in boundary coordinates, a tensor of dimensions (n_objects, 4)
    :return: resized image, updated bounding box coordinates (or fractional coordinates, in which case they remain the same)
    """
    # Resize image
    new_image = FT.resize(image, dims)

    # Resize bounding boxes
    old_dims = torch.FloatTensor([image.width, image.height, image.width, image.height]).unsqueeze(0)
    new_boxes = boxes / old_dims  # percent coordinates

    if not return_percent_coords:
        new_dims = torch.FloatTensor([dims[1], dims[0], dims[1], dims[0]]).unsqueeze(0)
        new_boxes = new_boxes * new_dims

    return new_image, new_boxes

Source File: joint_transforms.py From cross-season-segmentation with MIT License

7 votes

def __call__(self, img, mask):
        if self.padding > 0:
            img = ImageOps.expand(img, border=self.padding, fill=0)
            mask = ImageOps.expand(mask, border=self.padding, fill=0)

        assert img.size == mask.size
        w, h = img.size
        th, tw = self.size
        if w == tw and h == th:
            return img, mask
        if w < tw or h < th:
            return img.resize((tw, th), Image.BILINEAR), mask.resize(
                (tw, th), Image.NEAREST)

        x1 = random.randint(0, w - tw)
        y1 = random.randint(0, h - th)
        return img.crop((x1, y1, x1 + tw, y1 + th)
                        ), mask.crop((x1, y1, x1 + tw, y1 + th))

Source File: transforms.py From ChaLearn_liveness_challenge with MIT License

6 votes

def __call__(self, img_dict):
        
        if np.random.rand() < self.p:
            data_get_func = img_dict['meta']['get_item_func']
            curr_idx = img_dict['meta']['idx']
            max_idx = img_dict['meta']['max_idx']

            other_idx = np.random.randint(0, max_idx)
            data4augm = data_get_func(other_idx)
            while (curr_idx == other_idx) or (self.same_label and data4augm['label'] != img_dict['label']):
                other_idx = np.random.randint(0, max_idx)
                data4augm = data_get_func(other_idx)

            alpha = np.random.rand()

            keys = ['rgb', 'depth', 'ir']
            for key in keys:
                img_dict[key] = Image.blend(data4augm[key].resize(img_dict[key].size),
                                            img_dict[key],
                                            alpha=alpha)
            if not self.same_label:
                img_dict['label'] = alpha * img_dict['label'] + (1 - alpha) * data4augm['label']
    
        return img_dict

Source File: data_loader_stargan.py From adversarial-object-removal with MIT License

6 votes

def __getitem__(self, index):
        # Apply transforms to the image.
        image = torch.FloatTensor(self.nc,self.out_img_size, self.out_img_size).fill_(-1.)
        # Get the individual images.
        randbox = random.randrange(len(self.metadata['images'][index]))
        imglabel = np.zeros(10, dtype=np.int)
        boxlabel = np.zeros(10, dtype=np.int)
        for i,bb in enumerate(self.metadata['images'][index]):
            imid = random.randrange(self.num_data)
            bbox = [int(bc*self.out_img_size) for bc in bb]
            img, label = self.dataset[imid]
            scImg = FN.resize(img,(bbox[3],bbox[2]))
            image[:, bbox[1]:bbox[1]+bbox[3], bbox[0]:bbox[0]+bbox[2]] = FN.normalize(FN.to_tensor(scImg), mean=(0.5,)*self.nc, std=(0.5,)*self.nc)
            #imglabel[label] = 1
            if i == randbox:
                outBox = FN.normalize(FN.to_tensor(FN.resize(scImg, (self.bbox_out_size, self.bbox_out_size))), mean=(0.5,)*self.nc, std=(0.5,)*self.nc)
                mask = torch.zeros(1,self.out_img_size,self.out_img_size)
                mask[0,bbox[1]:bbox[1]+bbox[3],bbox[0]:bbox[0]+bbox[2]] = 1.
                outbbox = bbox
                #boxlabel[label]=1

        #return image[[0,0,0],::], torch.FloatTensor([1]), outBox[[0,0,0],::], torch.FloatTensor([1]), mask, torch.IntTensor(outbbox)
        return image, torch.FloatTensor([1]), outBox, torch.FloatTensor([1]), mask, torch.IntTensor(outbbox)

Source File: utils.py From a-PyTorch-Tutorial-to-Object-Detection with MIT License

6 votes

def resize(image, boxes, dims=(300, 300), return_percent_coords=True):
    """
    Resize image. For the SSD300, resize to (300, 300).

    Since percent/fractional coordinates are calculated for the bounding boxes (w.r.t image dimensions) in this process,
    you may choose to retain them.

    :param image: image, a PIL Image
    :param boxes: bounding boxes in boundary coordinates, a tensor of dimensions (n_objects, 4)
    :return: resized image, updated bounding box coordinates (or fractional coordinates, in which case they remain the same)
    """
    # Resize image
    new_image = FT.resize(image, dims)

    # Resize bounding boxes
    old_dims = torch.FloatTensor([image.width, image.height, image.width, image.height]).unsqueeze(0)
    new_boxes = boxes / old_dims  # percent coordinates

    if not return_percent_coords:
        new_dims = torch.FloatTensor([dims[1], dims[0], dims[1], dims[0]]).unsqueeze(0)
        new_boxes = new_boxes * new_dims

    return new_image, new_boxes

Source File: transforms.py From person-reid-lib with MIT License

6 votes

def _instance_process(self, img, params):
        if params is None:
            img.img = img.img.resize((self.width, self.height), self.interpolation)

            if img.x is not None:
                img.x = img.x.resize((self.width, self.height), self.interpolation)
            if img.y is not None:
                img.y = img.y.resize((self.width, self.height), self.interpolation)

        else:
            new_width, new_height, x1, y1 = params
            img.img = img.img.resize((new_width, new_height), self.interpolation)
            img.img = img.img.crop((x1, y1, x1 + self.width, y1 + self.height))

            if img.x is not None:
                img.x = img.x.resize((new_width, new_height), self.interpolation)
                img.x = img.x.crop((x1, y1, x1 + self.width, y1 + self.height))

            if img.y is not None:
                img.y = img.y.resize((new_width, new_height), self.interpolation)
                img.y = img.y.crop((x1, y1, x1 + self.width, y1 + self.height))

        return img

Source File: transforms.py From ChaLearn_liveness_challenge with MIT License

6 votes

def __call__(self, img_dict):
        
        if np.random.rand() < self.p:
            data_get_func = img_dict['meta']['get_item_func']
            curr_idx = img_dict['meta']['idx']
            max_idx = img_dict['meta']['max_idx']

            other_idx = np.random.randint(0, max_idx)
            data4augm = data_get_func(other_idx)
            while (curr_idx == other_idx) or (self.same_label and data4augm['label'] != img_dict['label']):
                other_idx = np.random.randint(0, max_idx)
                data4augm = data_get_func(other_idx)

            alpha = np.random.rand()

            keys = ['rgb', 'depth', 'ir']
            for key in keys:
                img_dict[key] = Image.blend(data4augm[key].resize(img_dict[key].size),
                                            img_dict[key],
                                            alpha=alpha)
            if not self.same_label:
                img_dict['label'] = alpha * img_dict['label'] + (1 - alpha) * data4augm['label']
    
        return img_dict

Source File: datasets.py From noise2noise-pytorch with MIT License

6 votes

def _random_crop(self, img_list):
        """Performs random square crop of fixed size.
        Works with list so that all items get the same cropped window (e.g. for buffers).
        """

        w, h = img_list[0].size
        assert w >= self.crop_size and h >= self.crop_size, \
            f'Error: Crop size: {self.crop_size}, Image size: ({w}, {h})'
        cropped_imgs = []
        i = np.random.randint(0, h - self.crop_size + 1)
        j = np.random.randint(0, w - self.crop_size + 1)

        for img in img_list:
            # Resize if dimensions are too small
            if min(w, h) < self.crop_size:
                img = tvF.resize(img, (self.crop_size, self.crop_size))

            # Random crop
            cropped_imgs.append(tvF.crop(img, i, j, self.crop_size, self.crop_size))

        return cropped_imgs

Source File: cvfunctional.py From opencv_transforms_torchvision with MIT License

6 votes

def resized_crop(img, i, j, h, w, size, interpolation='BILINEAR'):
    """Crop the given CV Image and resize it to desired size. Notably used in RandomResizedCrop.

    Args:
        img (np.ndarray): Image to be cropped.
        i: Upper pixel coordinate.
        j: Left pixel coordinate.
        h: Height of the cropped image.
        w: Width of the cropped image.
        size (sequence or int): Desired output size. Same semantics as ``scale``.
        interpolation (str, optional): Desired interpolation. Default is
            ``BILINEAR``.
    Returns:
        np.ndarray: Cropped image.
    """
    assert _is_numpy_image(img), 'img should be CV Image'
    img = crop(img, i, j, h, w)
    img = resize(img, size, interpolation)
    return img

Source File: cvfunctional.py From opencv_transforms_torchvision with MIT License

6 votes

def cv_transform(img):
    # img = resize(img, size=(100, 300))
    # img = to_tensor(img)
    # img = normalize(img, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    # img = pad(img, padding=(10, 10, 20, 20), fill=(255, 255, 255), padding_mode='constant')
    # img = pad(img, padding=(100, 100, 100, 100), fill=5, padding_mode='symmetric')
    # img = crop(img, -40, -20, 1000, 1000)
    # img = center_crop(img, (310, 300))
    # img = resized_crop(img, -10.3, -20, 330, 220, (500, 500))
    # img = hflip(img)
    # img = vflip(img)
    # tl, tr, bl, br, center = five_crop(img, 100)
    # img = adjust_brightness(img, 2.1)
    # img = adjust_contrast(img, 1.5)
    # img = adjust_saturation(img, 2.3)
    # img = adjust_hue(img, 0.5)
    # img = adjust_gamma(img, gamma=3, gain=0.1)
    # img = rotate(img, 10, resample='BILINEAR', expand=True, center=None)
    # img = to_grayscale(img, 3)
    # img = affine(img, 10, (0, 0), 1, 0, resample='BICUBIC', fillcolor=(255,255,0))
    # img = gaussion_noise(img)
    # img = poisson_noise(img)
    img = salt_and_pepper(img)
    return to_tensor(img)

Source File: cvfunctional.py From opencv_transforms_torchvision with MIT License

6 votes

def pil_transform(img):
    # img = functional.resize(img, size=(100, 300))
    # img = functional.to_tensor(img)
    # img = functional.normalize(img, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    # img = functional.pad(img, padding=(10, 10, 20, 20), fill=(255, 255, 255), padding_mode='constant')
    # img = functional.pad(img, padding=(100, 100, 100, 100), padding_mode='symmetric')
    # img = functional.crop(img, -40, -20, 1000, 1000)
    # img = functional.center_crop(img, (310, 300))
    # img = functional.resized_crop(img, -10.3, -20, 330, 220, (500, 500))
    # img = functional.hflip(img)
    # img = functional.vflip(img)
    # tl, tr, bl, br, center = functional.five_crop(img, 100)
    # img = functional.adjust_brightness(img, 2.1)
    # img = functional.adjust_contrast(img, 1.5)
    # img = functional.adjust_saturation(img, 2.3)
    # img = functional.adjust_hue(img, 0.5)
    # img = functional.adjust_gamma(img, gamma=3, gain=0.1)
    # img = functional.rotate(img, 10, resample=PIL.Image.BILINEAR, expand=True, center=None)
    # img = functional.to_grayscale(img, 3)
    # img = functional.affine(img, 10, (0, 0), 1, 0, resample=PIL.Image.BICUBIC, fillcolor=(255,255,0))

    return functional.to_tensor(img)

Source File: points.py From aerial_wildlife_detection with MIT License

5 votes

def __call__(self, img, points=None, labels=None):
        sz_orig = img.size
        img = F.resize(img, self.size, self.interpolation)
        sz_new = img.size

        if points is not None and len(points) > 0:
            points = _pointTranslate(points, sz_orig, sz_new)

        return img, points, labels

Source File: boundingBoxes.py From aerial_wildlife_detection with MIT License

5 votes

def __call__(self, img, bboxes=None, labels=None):
        sz_orig = img.size
        img = F.resize(img, self.size, self.interpolation)
        sz_new = img.size

        if bboxes is not None and len(bboxes) > 0:
            bboxes = _bboxResize(bboxes, sz_orig, sz_new)

        return img, bboxes, labels

Source File: video_transforms.py From Sound-of-Pixels with MIT License

5 votes

def __call__(self, frames):
        """
        Args:
            frames: a list of PIL Image
        Returns:
            a list of PIL Image: Rescaled images.
        """
        out_frames = []
        for frame in frames:
            out_frames.append(F.resize(frame, self.size, self.interpolation))
        return out_frames

Source File: transforms.py From ChaLearn_liveness_challenge with MIT License

5 votes

def __call__(self, img_dict):
        data_get_func = img_dict['meta']['get_item_func']
        curr_idx = img_dict['meta']['idx']
        max_idx = img_dict['meta']['max_idx']
        
        other_idx = np.random.randint(0, max_idx)
        data4augm = data_get_func(other_idx)
        while (curr_idx == other_idx) or (self.same_label and data4augm['label'] != img_dict['label']):
            other_idx = np.random.randint(0, max_idx)
            data4augm = data_get_func(other_idx)
            
        depth4augm = data4augm['depth'].resize(img_dict['depth'].size)
        mask4augm = np.array(depth4augm.convert('L')) > 0
        mask4augm = cv2.morphologyEx(mask4augm.astype(np.uint8), 
                                     cv2.MORPH_OPEN, 
                                     self.kernel_orgl)
        
        mask = np.array(img_dict['depth'].convert('L')) > 0
        mask = cv2.morphologyEx(mask.astype(np.uint8), 
                                cv2.MORPH_OPEN, 
                                self.kernel_augm)
        mask = (mask == mask4augm) & (mask)
        mask = np.repeat(np.expand_dims(mask, 2), 3, axis=2)
        
        keys = ['depth', 'ir']
        for key in keys:
            np_img = np.array(img_dict[key]) * mask
            img_dict[key] = Image.fromarray(np_img)
        return img_dict

Source File: transforms.py From FreeAnchor with MIT License

5 votes

def __call__(self, image, target):
        size = self.get_size(image.size)
        image = F.resize(image, size)
        target = target.resize(image.size)
        return image, target

Source File: basic_dataset.py From srntt-pytorch with Apache License 2.0

5 votes

def __getitem__(self, index):
        filename = self.filenames[index]
        img = Image.open(filename).convert('RGB')
        img_hr = self.transforms(img)
        down_size = [l // self.scale_factor for l in img_hr.size[::-1]]
        img_lr = TF.resize(img_hr, down_size, interpolation=Image.BICUBIC)
        return {'lr': TF.to_tensor(img_lr) * 2 - 1,
                'hr': TF.to_tensor(img_hr) * 2 - 1,
                'path': filename.stem}

Source File: prepare_data.py From style-based-gan-pytorch with MIT License

5 votes

def resize_and_convert(img, size, quality=100):
    img = trans_fn.resize(img, size, Image.LANCZOS)
    img = trans_fn.center_crop(img, size)
    buffer = BytesIO()
    img.save(buffer, format='jpeg', quality=quality)
    val = buffer.getvalue()

    return val

Source File: torchvision_extension.py From DenseNAS with Apache License 2.0

5 votes

def __call__(self, img):
        
        targetSz = int(round(random.uniform(self.minSize, self.maxSize)))

        return F.resize(img, targetSz)

Source File: transforms.py From HRNet-MaskRCNN-Benchmark with MIT License

5 votes

def __call__(self, image, target):
        size = self.get_size(image.size)
        pad_size = self.get_pad_size(size)
        image = F.resize(image, size)
        image = F.pad(image, pad_size)
        target = target.resize(image.size)
        return image, target

Source File: transforms.py From Holocron with MIT License

5 votes

def __call__(self, image, target):
        if isinstance(self.size, int):
            if image.size[1] < image.size[0]:
                target['boxes'] *= self.size / image.size[1]
            else:
                target['boxes'] *= self.size / image.size[0]
        elif isinstance(self.size, tuple):
            target['boxes'][:, [0, 2]] *= self.size[0] / image.size[0]
            target['boxes'][:, [1, 3]] *= self.size[1] / image.size[1]
        return F.resize(image, self.size, self.interpolation), target

Source File: predictor.py From sampling-free with MIT License

5 votes

def compute_prediction(self, original_image):
        """
        Arguments:
            original_image (np.ndarray): an image as returned by OpenCV

        Returns:
            prediction (BoxList): the detected objects. Additional information
                of the detection properties can be found in the fields of
                the BoxList via `prediction.fields()`
        """
        # apply pre-processing to image
        image = self.transforms(original_image)
        # convert to an ImageList, padded so that it is divisible by
        # cfg.DATALOADER.SIZE_DIVISIBILITY
        image_list = to_image_list(image, self.cfg.DATALOADER.SIZE_DIVISIBILITY)
        image_list = image_list.to(self.device)
        # compute predictions
        with torch.no_grad():
            predictions = self.model(image_list)
        predictions = [o.to(self.cpu_device) for o in predictions]

        # always single image is passed at a time
        prediction = predictions[0]

        # reshape prediction (a BoxList) into the original image size
        height, width = original_image.shape[:-1]
        prediction = prediction.resize((width, height))

        if prediction.has_field("mask"):
            # if we have masks, paste the masks in the right position
            # in the image, as defined by the bounding boxes
            masks = prediction.get_field("mask")
            # always single image is passed at a time
            masks = self.masker([masks], [prediction])[0]
            prediction.add_field("mask", masks)
        return prediction

Source File: predictor.py From sampling-free with MIT License

5 votes

def __call__(self, image):
        size = self.get_size(image.size)
        image = F.resize(image, size)
        return image

Source File: transforms.py From sampling-free with MIT License

5 votes

def __call__(self, image, target=None):
        size = self.get_size(image.size)
        image = F.resize(image, size)
        if target is None:
            return image
        target = target.resize(image.size)
        return image, target

Source File: box_utils.py From r2c with MIT License

5 votes

def resize_image(image, desired_width=768, desired_height=384, random_pad=False):
    """Resizes an image keeping the aspect ratio mostly unchanged.

    Returns:
    image: the resized image
    window: (x1, y1, x2, y2). If max_dim is provided, padding might
        be inserted in the returned image. If so, this window is the
        coordinates of the image part of the full image (excluding
        the padding). The x2, y2 pixels are not included.
    scale: The scale factor used to resize the image
    padding: Padding added to the image [left, top, right, bottom]
    """
    # Default window (x1, y1, x2, y2) and default scale == 1.
    w, h = image.size

    width_scale = desired_width / w
    height_scale = desired_height / h
    scale = min(width_scale, height_scale)

    # Resize image using bilinear interpolation
    if scale != 1:
        image = functional.resize(image, (round(h * scale), round(w * scale)))
    w, h = image.size
    y_pad = desired_height - h
    x_pad = desired_width - w
    top_pad = random.randint(0, y_pad) if random_pad else y_pad // 2
    left_pad = random.randint(0, x_pad) if random_pad else x_pad // 2

    padding = (left_pad, top_pad, x_pad - left_pad, y_pad - top_pad)
    assert all([x >= 0 for x in padding])
    image = functional.pad(image, padding)
    window = [left_pad, top_pad, w + left_pad, h + top_pad]

    return image, window, scale, padding

Source File: data_loaders.py From ModelFeast with MIT License

5 votes

def _tansform_(self, x):
        x = np.array(x, dtype='float32') / 255
        x = (x - 0.5) / 0.5 
        x = x.transpose((2, 0, 1)) # 将 channel 放到第0维，这是 pytorch 要求的输入方式
        x = torch.from_numpy(x)

        # # for inceptionresnetV2
        # x = TF.to_pil_image(x)
        # x = TF.resize(x, (64, 32))
        # x = TF.to_tensor(x)

        return x

Source File: resize_video.py From torchvideo with Mozilla Public License 2.0

5 votes

def _transform(self, frames: PILVideo, params: None) -> PILVideoI:
        for frame in frames:
            yield F.resize(frame, self.size, self.interpolation)

Source File: transforms.py From ChaLearn_liveness_challenge with MIT License

5 votes

def __call__(self, img_dict):
        data_get_func = img_dict['meta']['get_item_func']
        curr_idx = img_dict['meta']['idx']
        max_idx = img_dict['meta']['max_idx']
        
        other_idx = np.random.randint(0, max_idx)
        data4augm = data_get_func(other_idx)
        while (curr_idx == other_idx) or (self.same_label and data4augm['label'] != img_dict['label']):
            other_idx = np.random.randint(0, max_idx)
            data4augm = data_get_func(other_idx)
            
        depth4augm = data4augm['depth'].resize(img_dict['depth'].size)
        mask4augm = np.array(depth4augm.convert('L')) > 0
        mask4augm = cv2.morphologyEx(mask4augm.astype(np.uint8), 
                                     cv2.MORPH_OPEN, 
                                     self.kernel_orgl)
        
        mask = np.array(img_dict['depth'].convert('L')) > 0
        mask = cv2.morphologyEx(mask.astype(np.uint8), 
                                cv2.MORPH_OPEN, 
                                self.kernel_augm)
        mask = (mask == mask4augm) & (mask)
        mask = np.repeat(np.expand_dims(mask, 2), 3, axis=2)
        
        keys = ['depth', 'ir']
        for key in keys:
            np_img = np.array(img_dict[key]) * mask
            img_dict[key] = Image.fromarray(np_img)
        return img_dict

Source File: transforms.py From ChaLearn_liveness_challenge with MIT License

5 votes

def __call__(self, img_dict):
        keys = ['rgb', 'ir', 'depth']
        for key in keys:
            img_dict[key] = F.resize(img_dict[key], self.size, self.interpolation)
        return img_dict

Source File: RANet_Data_transform.py From RANet with Apache License 2.0

5 votes

def __call__(self, sample, **kwargs):
        for idx, img in enumerate(sample):
            size = np.asarray(img.size, dtype='float32')
            p = self.out_size[1] / self.out_size[0]
            size = size / [1, p]
            ms = max(size)
            t_size = np.floor(size / ms * self.out_size[0])
            t_size = np.asanyarray(t_size * [1, p] + t_size % 2, dtype='int32')
            img = img.resize(t_size)
            sample[idx] =  img.crop(((t_size[0] - self.out_size[0])/2,
                             (t_size[1] - self.out_size[1])/2, (t_size[0] - self.out_size[0])/2 + self.out_size[0], (t_size[1] - self.out_size[1])/2 + self.out_size[1]))
        return sample

Python torchvision.transforms.functional.resize() Examples