Python Examples of skimage.transform

Source File: utils.py From dataiku-contrib with Apache License 2.0

7 votes

def resize(image, output_shape, order=1, mode='constant', cval=0, clip=True,
           preserve_range=False, anti_aliasing=False, anti_aliasing_sigma=None):
    """A wrapper for Scikit-Image resize().
    Scikit-Image generates warnings on every call to resize() if it doesn't
    receive the right parameters. The right parameters depend on the version
    of skimage. This solves the problem by using different parameters per
    version. And it provides a central place to control resizing defaults.
    """
    if LooseVersion(skimage.__version__) >= LooseVersion("0.14"):
        # New in 0.14: anti_aliasing. Default it to False for backward
        # compatibility with skimage 0.13.
        return skimage.transform.resize(
            image, output_shape,
            order=order, mode=mode, cval=cval, clip=clip,
            preserve_range=preserve_range, anti_aliasing=anti_aliasing,
            anti_aliasing_sigma=anti_aliasing_sigma)
    else:
        return skimage.transform.resize(
            image, output_shape,
            order=order, mode=mode, cval=cval, clip=clip,
            preserve_range=preserve_range)

Source File: test_embedding.py From deep-ranking with MIT License

6 votes

def __init__(self, root_dir, transform=None, loader = pil_loader):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        if transform == None :
            transform = torchvision.transforms.Compose([torchvision.transforms.Resize(224),
                                                        torchvision.transforms.RandomHorizontalFlip(p=0.5),
                                                        torchvision.transforms.RandomVerticalFlip(p=0.5),
                                                        torchvision.transforms.ToTensor()])
        self.root_dir = root_dir
        self.transform = transform
        self.loader = loader

        self.images = os.listdir(os.path.join(self.root_dir))

        self.image_class = np.array(pd.read_csv('val_details.txt', sep='\t')[['mage','class']]).astype('str')
        self.class_dic = {}
        for i in self.image_class :
            self.class_dic[i[0]]=i[1]

Source File: utils.py From Mask-RCNN-Pedestrian-Detection with MIT License

6 votes

def box_refinement_graph(box, gt_box):
    """Compute refinement needed to transform box to gt_box.
    box and gt_box are [N, (y1, x1, y2, x2)]
    """
    box = tf.cast(box, tf.float32)
    gt_box = tf.cast(gt_box, tf.float32)

    height = box[:, 2] - box[:, 0]
    width = box[:, 3] - box[:, 1]
    center_y = box[:, 0] + 0.5 * height
    center_x = box[:, 1] + 0.5 * width

    gt_height = gt_box[:, 2] - gt_box[:, 0]
    gt_width = gt_box[:, 3] - gt_box[:, 1]
    gt_center_y = gt_box[:, 0] + 0.5 * gt_height
    gt_center_x = gt_box[:, 1] + 0.5 * gt_width

    dy = (gt_center_y - center_y) / height
    dx = (gt_center_x - center_x) / width
    dh = tf.log(gt_height / height)
    dw = tf.log(gt_width / width)

    result = tf.stack([dy, dx, dh, dw], axis=1)
    return result

Source File: helpers.py From tutorials with Apache License 2.0

6 votes

def rescale(img, input_height, input_width):
    # print("Original image shape:" + str(img.shape) + " --> it should be in H, W, C!")
    # print("Model's input shape is %dx%d") % (input_height, input_width)
    aspect = img.shape[1] / float(img.shape[0])
    # print("Orginal aspect ratio: " + str(aspect))
    if(aspect > 1):
        # landscape orientation - wide image
        res = int(aspect * input_height)
        imgScaled = skimage.transform.resize(
            img,
            (input_width, res),
            preserve_range=False)
    if(aspect < 1):
        # portrait orientation - tall image
        res = int(input_width / aspect)
        imgScaled = skimage.transform.resize(
            img,
            (res, input_height),
            preserve_range=False)
    if(aspect == 1):
        imgScaled = skimage.transform.resize(
            img,
            (input_width, input_height),
            preserve_range=False)
    return imgScaled

Source File: utils.py From Mask-RCNN-Pedestrian-Detection with MIT License

6 votes

def minimize_mask(bbox, mask, mini_shape):
    """Resize masks to a smaller version to reduce memory load.
    Mini-masks can be resized back to image scale using expand_masks()

    See inspect_data.ipynb notebook for more details.
    """
    mini_mask = np.zeros(mini_shape + (mask.shape[-1],), dtype=bool)
    for i in range(mask.shape[-1]):
        m = mask[:, :, i]
        y1, x1, y2, x2 = bbox[i][:4]
        m = m[y1:y2, x1:x2]
        if m.size == 0:
            raise Exception("Invalid bounding box with area of zero")
        # Resize with bilinear interpolation
        m = skimage.transform.resize(m, mini_shape, order=1, mode="constant")
        mini_mask[:, :, i] = np.around(m).astype(np.bool)
    return mini_mask

Source File: utils.py From dataiku-contrib with Apache License 2.0

6 votes

def box_refinement_graph(box, gt_box):
    """Compute refinement needed to transform box to gt_box.
    box and gt_box are [N, (y1, x1, y2, x2)]
    """
    box = tf.cast(box, tf.float32)
    gt_box = tf.cast(gt_box, tf.float32)

    height = box[:, 2] - box[:, 0]
    width = box[:, 3] - box[:, 1]
    center_y = box[:, 0] + 0.5 * height
    center_x = box[:, 1] + 0.5 * width

    gt_height = gt_box[:, 2] - gt_box[:, 0]
    gt_width = gt_box[:, 3] - gt_box[:, 1]
    gt_center_y = gt_box[:, 0] + 0.5 * gt_height
    gt_center_x = gt_box[:, 1] + 0.5 * gt_width

    dy = (gt_center_y - center_y) / height
    dx = (gt_center_x - center_x) / width
    dh = tf.log(gt_height / height)
    dw = tf.log(gt_width / width)

    result = tf.stack([dy, dx, dh, dw], axis=1)
    return result

Source File: query.py From deep-ranking with MIT License

6 votes

def __init__(self, root_dir, transform=None, loader = pil_loader):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        if transform == None :
            transform = torchvision.transforms.Compose([torchvision.transforms.Resize(224),
                                                        torchvision.transforms.RandomHorizontalFlip(p=0.5),
                                                        torchvision.transforms.RandomVerticalFlip(p=0.5),
                                                        torchvision.transforms.ToTensor()])
        self.root_dir = root_dir
        self.transform = transform
        self.loader = loader

        self.images = os.listdir(os.path.join(self.root_dir))

        self.image_class = np.array(pd.read_csv('val_details.txt', sep='\t')[['mage','class']]).astype('str')
        self.class_dic = {}
        for i in self.image_class :
            self.class_dic[i[0]]=i[1]

Source File: utils.py From Mask-RCNN-Pedestrian-Detection with MIT License

6 votes

def expand_mask(bbox, mini_mask, image_shape):
    """Resizes mini masks back to image size. Reverses the change
    of minimize_mask().

    See inspect_data.ipynb notebook for more details.
    """
    mask = np.zeros(image_shape[:2] + (mini_mask.shape[-1],), dtype=bool)
    for i in range(mask.shape[-1]):
        m = mini_mask[:, :, i]
        y1, x1, y2, x2 = bbox[i][:4]
        h = y2 - y1
        w = x2 - x1
        # Resize with bilinear interpolation
        m = skimage.transform.resize(m, (h, w), order=1, mode="constant")
        mask[y1:y2, x1:x2, i] = np.around(m).astype(np.bool)
    return mask


# TODO: Build and use this function to reduce code duplication

Source File: helpers.py From Feed-Forward-Style-Transfer with MIT License

6 votes

def load_img(path):
    """Returns a numpy array of an image specified by its path.
    
    Args:
        path: string representing the file path of the image to load
        
    Returns:
        resized_img: numpy array representing the loaded RGB image
        shape: the image shape
    """

    # Load image [height, width, depth]
    img = skimage.io.imread(path) / 255.0
    assert (0 <= img).all() and (img <= 1.0).all()

    # Crop image from center
    short_edge = min(img.shape[:2])
    yy = int((img.shape[0] - short_edge) / 2)
    xx = int((img.shape[1] - short_edge) / 2)
    shape = list(img.shape)

    crop_img = img[yy: yy + short_edge, xx: xx + short_edge]
    resized_img = skimage.transform.resize(crop_img, (shape[0], shape[1]))
    return resized_img, shape

Source File: grid.py From pysheds with GNU General Public License v3.0

6 votes

def _convert_grid_indices_crs(self, grid_indices, old_crs, new_crs):
        if _OLD_PYPROJ:
            x2, y2 = pyproj.transform(old_crs, new_crs, grid_indices[:,1],
                                    grid_indices[:,0])
        else:
            x2, y2 = pyproj.transform(old_crs, new_crs, grid_indices[:,1],
                                      grid_indices[:,0], errcheck=True,
                                      always_xy=True)
        yx2 = np.column_stack([y2, x2])
        return yx2

    # def _convert_outer_indices_crs(self, affine, shape, old_crs, new_crs):
    #     y1, x1 = self.grid_indices(affine=affine, shape=shape)
    #     lx, _ = pyproj.transform(old_crs, new_crs,
    #                               x1, np.repeat(y1[0], len(x1)))
    #     rx, _ = pyproj.transform(old_crs, new_crs,
    #                               x1, np.repeat(y1[-1], len(x1)))
    #     __, by = pyproj.transform(old_crs, new_crs,
    #                               np.repeat(x1[0], len(y1)), y1)
    #     __, uy = pyproj.transform(old_crs, new_crs,
    #                               np.repeat(x1[-1], len(y1)), y1)
    #     return by, uy, lx, rx

Source File: coco.py From EfficientDet.Pytorch with MIT License

6 votes

def __init__(self, root_dir, set_name='train2017', transform=None):
        """
        Args:
            root_dir (string): COCO directory.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.root_dir = root_dir
        self.set_name = set_name
        self.transform = transform

        self.coco = COCO(os.path.join(self.root_dir, 'annotations',
                                      'instances_' + self.set_name + '.json'))
        self.image_ids = self.coco.getImgIds()

        self.load_classes()

Source File: image_processing.py From text-to-image with MIT License

6 votes

def load_image_array(image_file, image_size):
	img = skimage.io.imread(image_file)
	# GRAYSCALE
	if len(img.shape) == 2:
		img_new = np.ndarray( (img.shape[0], img.shape[1], 3), dtype = 'uint8')
		img_new[:,:,0] = img
		img_new[:,:,1] = img
		img_new[:,:,2] = img
		img = img_new

	img_resized = skimage.transform.resize(img, (image_size, image_size))

	# FLIP HORIZONTAL WIRH A PROBABILITY 0.5
	if random.random() > 0.5:
		img_resized = np.fliplr(img_resized)
	
	
	return img_resized.astype('float32')

Source File: image_processing.py From text-to-image with MIT License

6 votes

def load_image_array(image_file, image_size):
	img = skimage.io.imread(image_file)
	# GRAYSCALE
	if len(img.shape) == 2:
		img_new = np.ndarray( (img.shape[0], img.shape[1], 3), dtype = 'uint8')
		img_new[:,:,0] = img
		img_new[:,:,1] = img
		img_new[:,:,2] = img
		img = img_new

	img_resized = skimage.transform.resize(img, (image_size, image_size))

	# FLIP HORIZONTAL WIRH A PROBABILITY 0.5
	if random.random() > 0.5:
		img_resized = np.fliplr(img_resized)
	
	
	return img_resized.astype('float32')

Source File: utils.py From PanopticSegmentation with MIT License

6 votes

def resize(image, output_shape, order=1, mode='constant', cval=0, clip=True,
           preserve_range=False, anti_aliasing=False, anti_aliasing_sigma=None):
    """A wrapper for Scikit-Image resize().

    Scikit-Image generates warnings on every call to resize() if it doesn't
    receive the right parameters. The right parameters depend on the version
    of skimage. This solves the problem by using different parameters per
    version. And it provides a central place to control resizing defaults.
    """
    if LooseVersion(skimage.__version__) >= LooseVersion("0.14"):
        # New in 0.14: anti_aliasing. Default it to False for backward
        # compatibility with skimage 0.13.
        return skimage.transform.resize(
            image, output_shape,
            order=order, mode=mode, cval=cval, clip=clip,
            preserve_range=preserve_range, anti_aliasing=anti_aliasing,
            anti_aliasing_sigma=anti_aliasing_sigma)
    else:
        return skimage.transform.resize(
            image, output_shape,
            order=order, mode=mode, cval=cval, clip=clip,
            preserve_range=preserve_range)

Source File: utils.py From PanopticSegmentation with MIT License

6 votes

def box_refinement_graph(box, gt_box):
    """Compute refinement needed to transform box to gt_box.
    box and gt_box are [N, (y1, x1, y2, x2)]
    """
    box = tf.cast(box, tf.float32)
    gt_box = tf.cast(gt_box, tf.float32)

    height = box[:, 2] - box[:, 0]
    width = box[:, 3] - box[:, 1]
    center_y = box[:, 0] + 0.5 * height
    center_x = box[:, 1] + 0.5 * width

    gt_height = gt_box[:, 2] - gt_box[:, 0]
    gt_width = gt_box[:, 3] - gt_box[:, 1]
    gt_center_y = gt_box[:, 0] + 0.5 * gt_height
    gt_center_x = gt_box[:, 1] + 0.5 * gt_width

    dy = (gt_center_y - center_y) / height
    dx = (gt_center_x - center_x) / width
    dh = tf.log(gt_height / height)
    dw = tf.log(gt_width / width)

    result = tf.stack([dy, dx, dh, dw], axis=1)
    return result

Source File: dataloader.py From EfficientDet-PyTorch with Apache License 2.0

6 votes

def __call__(self, sample):
        image, annots = sample['img'], sample['annot']

        rows, cols, cns = image.shape

        largest_side = max(rows, cols)

        scale = self.img_size / largest_side

        # resize the image with the computed scale
        image = skimage.transform.resize(image, (int(round(rows*scale)), int(round((cols*scale)))))
        rows, cols, cns = image.shape

        new_image = np.zeros((self.img_size, self.img_size, cns)).astype(np.float32)
        new_image[:rows, :cols, :] = image.astype(np.float32)

        annots[:, :4] *= scale

        return {'img': torch.from_numpy(new_image), 'annot': torch.from_numpy(annots), 'scale': scale}

Source File: c3d.py From Recipes with MIT License

6 votes

def set_weights(net,model_file):
    '''
    Sets the parameters of the model using the weights stored in model_file
    Parameters
    ----------
    net: a Lasagne layer

    model_file: string
        path to the model that containes the weights

    Returns
    -------
    None

    '''
    with open(model_file) as f:
        print('Load pretrained weights from %s...' % model_file)
        model = pickle.load(f)
    print('Set the weights...')
    lasagne.layers.set_all_param_values(net, model,trainable=True)


######## Below, there are several helper functions to transform (lists of) images into the right format  ######

Source File: data_utils.py From deep-ranking with MIT License

5 votes

def __init__(self, root_dir='tiny-imagenet-200', transform=None, loader = pil_loader):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        if transform == None :
            transform = torchvision.transforms.Compose([torchvision.transforms.Resize(224),torchvision.transforms.RandomHorizontalFlip(p=0.5),torchvision.transforms.RandomVerticalFlip(p=0.5),torchvision.transforms.ToTensor()])

        self.root_dir = root_dir
        self.transform = transform
        self.loader = loader
        self.class_dict = {}
        self.rev_dict = {}
        self.image_dict = {}
        self.big_dict = {}
        L = []
        
        for i,j in enumerate(os.listdir(os.path.join(self.root_dir,'train'))):
            self.class_dict[j] = i
            self.rev_dict[i] = j
            self.image_dict[j] = np.array(os.listdir(os.path.join(self.root_dir,'train',j,'images')))
            for k,l in enumerate(os.listdir(os.path.join(self.root_dir,'train',j,'images'))):
                L.append((l,i))

        for i,j in enumerate(L):
            self.big_dict[i] = j
            
        self.num_classes = 200

Source File: bm_comp_perform.py From BIRL with BSD 3-Clause "New" or "Revised" License

5 votes

def register_image_pair(idx, path_img_target, path_img_source, path_out):
    """ register two images together

    :param int idx: empty parameter for using the function in parallel
    :param str path_img_target: path to the target image
    :param str path_img_source: path to the source image
    :param str path_out: path for exporting the output
    :return tuple(str,float):
    """
    start = time.time()
    # load and denoise reference image
    img_target = io.imread(path_img_target)
    img_target = denoise_wavelet(img_target, wavelet_levels=7, multichannel=True)
    img_target_gray = rgb2gray(img_target)

    # load and denoise moving image
    img_source = io.imread(path_img_source)
    img_source = denoise_bilateral(img_source, sigma_color=0.05,
                                   sigma_spatial=2, multichannel=True)
    img_source_gray = rgb2gray(img_source)

    # detect ORB features on both images
    detector_target = ORB(n_keypoints=150)
    detector_source = ORB(n_keypoints=150)
    detector_target.detect_and_extract(img_target_gray)
    detector_source.detect_and_extract(img_source_gray)
    matches = match_descriptors(detector_target.descriptors,
                                detector_source.descriptors)
    # robustly estimate affine transform model with RANSAC
    model, _ = ransac((detector_target.keypoints[matches[:, 0]],
                       detector_source.keypoints[matches[:, 1]]),
                      AffineTransform, min_samples=25, max_trials=500,
                      residual_threshold=0.95)

    # warping source image with estimated transformations
    img_warped = warp(img_target, model.inverse, output_shape=img_target.shape[:2])
    path_img_warped = os.path.join(path_out, NAME_IMAGE_WARPED % idx)
    io.imsave(path_img_warped, img_warped)
    # summarise experiment
    execution_time = time.time() - start
    return path_img_warped, execution_time

Source File: helpers.py From Feed-Forward-Style-Transfer with MIT License

5 votes

def load_img_to(path, height=None, width=None):
    """Returns a resized numpy array of an image specified by its path.
    
    Args:
        path: string representing the file path of the image to load
        height: int representing the height value to scale image
        width: int representing width value to scale image
        
    Returns:
        img: numpy array representing the loaded RGB image
    """

    # Load image
    img = skimage.io.imread(path) / 255.0
    if height is not None and width is not None:
        ny = height
        nx = width
    elif height is not None:
        ny = height
        nx = img.shape[1] * ny / img.shape[0]
    elif width is not None:
        nx = width
        ny = img.shape[0] * nx / img.shape[1]
    else:
        ny = img.shape[0]
        nx = img.shape[1]

    if len(img.shape) < 3:
        img = np.dstack((img, img, img))

    return skimage.transform.resize(img, (ny, nx)), [ny, nx, 3]

Source File: query.py From deep-ranking with MIT License

5 votes

def __init__(self, root_dir, transform=None, loader = pil_loader):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        if transform == None :
            transform = torchvision.transforms.Compose([torchvision.transforms.Resize(224),
                                                        torchvision.transforms.RandomHorizontalFlip(p=0.5),
                                                        torchvision.transforms.RandomVerticalFlip(p=0.5),
                                                        torchvision.transforms.ToTensor()])
        self.root_dir = root_dir
        self.transform = transform
        self.loader = loader
        # class_dict -> n01443537 : 0 etc
        self.class_dict = {}
       # rev_dict -> 0 : n01443537 etc
        self.rev_dict = {}
        # image dict -> n01443537 : np.array([n01443537_0.JPEG    n01443537_150.JPEG  
        #                               n01443537_200.JPEG  n01443537_251.JPEG etc]) 
        self.image_dict = {}
        # big_dict -> idx : [img_name, class]
        self.big_dict = {}

        L = []

        for i,j in enumerate(os.listdir(os.path.join(self.root_dir))):
            self.class_dict[j] = i
            self.rev_dict[i] = j
            self.image_dict[j] = np.array(os.listdir(os.path.join(self.root_dir,j,'images')))
            for k,l in enumerate(os.listdir(os.path.join(self.root_dir,j,'images'))):
                L.append((l,i))

        for i,j in enumerate(L):
            self.big_dict[i] = j


        self.num_classes = 200

Source File: test_embedding.py From deep-ranking with MIT License

5 votes

def __getitem__(self, idx):
        paths,lab = self._sample(idx)
        temp = self.loader(paths)
        if self.transform:
            temp = self.transform(temp)
        return temp,lab

Source File: coco.py From EfficientDet.Pytorch with MIT License

5 votes

def __getitem__(self, idx):

        img = self.load_image(idx)
        annot = self.load_annotations(idx)
        sample = {'img': img, 'annot': annot}
        if self.transform:
            sample = self.transform(sample)
        return sample

Source File: geometry_utils.py From MOTSFusion with MIT License

5 votes

def transformPointImage(point_img, pose):
    coords = point_img.reshape((point_img.shape[0] * point_img.shape[1], point_img.shape[2]))

    # transform points according to estimated camera motion (pose)
    transformation_matrix = pose
    coords = np.column_stack((coords, np.ones(len(coords))))
    coords = coords @ transformation_matrix.T
    coords = coords[:, 0:3]

    coord_img = np.reshape(coords, point_img.shape)
    return coord_img

Source File: utils.py From Mask-RCNN-Pedestrian-Detection with MIT License

5 votes

def box_refinement(box, gt_box):
    """Compute refinement needed to transform box to gt_box.
    box and gt_box are [N, (y1, x1, y2, x2)]. (y2, x2) is
    assumed to be outside the box.
    """
    box = box.astype(np.float32)
    gt_box = gt_box.astype(np.float32)

    height = box[:, 2] - box[:, 0]
    width = box[:, 3] - box[:, 1]
    center_y = box[:, 0] + 0.5 * height
    center_x = box[:, 1] + 0.5 * width

    gt_height = gt_box[:, 2] - gt_box[:, 0]
    gt_width = gt_box[:, 3] - gt_box[:, 1]
    gt_center_y = gt_box[:, 0] + 0.5 * gt_height
    gt_center_x = gt_box[:, 1] + 0.5 * gt_width

    dy = (gt_center_y - center_y) / height
    dx = (gt_center_x - center_x) / width
    dh = np.log(gt_height / height)
    dw = np.log(gt_width / width)

    return np.stack([dy, dx, dh, dw], axis=1)


############################################################
#  Dataset
############################################################

Source File: utils.py From PanopticSegmentation with MIT License

5 votes

def box_refinement(box, gt_box):
    """Compute refinement needed to transform box to gt_box.
    box and gt_box are [N, (y1, x1, y2, x2)]. (y2, x2) is
    assumed to be outside the box.
    """
    box = box.astype(np.float32)
    gt_box = gt_box.astype(np.float32)

    height = box[:, 2] - box[:, 0]
    width = box[:, 3] - box[:, 1]
    center_y = box[:, 0] + 0.5 * height
    center_x = box[:, 1] + 0.5 * width

    gt_height = gt_box[:, 2] - gt_box[:, 0]
    gt_width = gt_box[:, 3] - gt_box[:, 1]
    gt_center_y = gt_box[:, 0] + 0.5 * gt_height
    gt_center_x = gt_box[:, 1] + 0.5 * gt_width

    dy = (gt_center_y - center_y) / height
    dx = (gt_center_x - center_x) / width
    dh = np.log(gt_height / height)
    dw = np.log(gt_width / width)

    return np.stack([dy, dx, dh, dw], axis=1)


############################################################
#  Dataset
############################################################

Source File: dataloader.py From EfficientDet-PyTorch with Apache License 2.0

5 votes

def __getitem__(self, idx):

        img = self.load_image(idx)
        annot = self.load_annotations(idx)
        sample = {'img': img, 'annot': annot}
        if self.transform:
            sample = self.transform(sample)

        return sample

Source File: dataloader.py From EfficientDet-PyTorch with Apache License 2.0

5 votes

def load_annotations(self, image_index):
        # get ground truth annotations
        annotations_ids = self.coco.getAnnIds(imgIds=self.image_ids[image_index], iscrowd=False)
        annotations     = np.zeros((0, 5))

        # some images appear to miss annotations (like image with id 257034)
        if len(annotations_ids) == 0:
            return annotations

        # parse annotations
        coco_annotations = self.coco.loadAnns(annotations_ids)
        for idx, a in enumerate(coco_annotations):

            # some annotations have basically no width / height, skip them
            if a['bbox'][2] < 1 or a['bbox'][3] < 1:
                continue

            annotation        = np.zeros((1, 5))
            annotation[0, :4] = a['bbox']
            annotation[0, 4]  = self.coco_label_to_label(a['category_id'])
            annotations       = np.append(annotations, annotation, axis=0)

        # transform from [x, y, w, h] to [x1, y1, x2, y2]
        annotations[:, 2] = annotations[:, 0] + annotations[:, 2]
        annotations[:, 3] = annotations[:, 1] + annotations[:, 3]

        return annotations

Source File: dataloader.py From EfficientDet-PyTorch with Apache License 2.0

5 votes

def __getitem__(self, idx):

        img = self.load_image(idx)
        annot = self.load_annotations(idx)
        sample = {'img': img, 'annot': annot}
        if self.transform:
            sample = self.transform(sample)

        return sample

Source File: image_loader.py From CVTron with Apache License 2.0

5 votes

def load_image(path, height, width):
    img = skimage.io.imread(path)
    img = img / 255.0
    assert (0 <= img).all() and (img <= 1.0).all()
    short_edge = min(img.shape[:2])
    yy = int((img.shape[0] - short_edge) / 2)
    xx = int((img.shape[1] - short_edge) / 2)
    crop_img = img[yy:yy + short_edge, xx:xx + short_edge]
    resized_img = skimage.transform.resize(crop_img, (height, width))
    return resized_img

Python skimage.transform() Examples