Python skimage.transform() Examples

The following are 30 code examples of skimage.transform(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module skimage , or try the search function .
Example #1
Source File: utils.py    From dataiku-contrib with Apache License 2.0 7 votes vote down vote up
def resize(image, output_shape, order=1, mode='constant', cval=0, clip=True,
           preserve_range=False, anti_aliasing=False, anti_aliasing_sigma=None):
    """A wrapper for Scikit-Image resize().
    Scikit-Image generates warnings on every call to resize() if it doesn't
    receive the right parameters. The right parameters depend on the version
    of skimage. This solves the problem by using different parameters per
    version. And it provides a central place to control resizing defaults.
    """
    if LooseVersion(skimage.__version__) >= LooseVersion("0.14"):
        # New in 0.14: anti_aliasing. Default it to False for backward
        # compatibility with skimage 0.13.
        return skimage.transform.resize(
            image, output_shape,
            order=order, mode=mode, cval=cval, clip=clip,
            preserve_range=preserve_range, anti_aliasing=anti_aliasing,
            anti_aliasing_sigma=anti_aliasing_sigma)
    else:
        return skimage.transform.resize(
            image, output_shape,
            order=order, mode=mode, cval=cval, clip=clip,
            preserve_range=preserve_range) 
Example #2
Source File: test_embedding.py    From deep-ranking with MIT License 6 votes vote down vote up
def __init__(self, root_dir, transform=None, loader = pil_loader):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        if transform == None :
            transform = torchvision.transforms.Compose([torchvision.transforms.Resize(224),
                                                        torchvision.transforms.RandomHorizontalFlip(p=0.5),
                                                        torchvision.transforms.RandomVerticalFlip(p=0.5),
                                                        torchvision.transforms.ToTensor()])
        self.root_dir = root_dir
        self.transform = transform
        self.loader = loader

        self.images = os.listdir(os.path.join(self.root_dir))

        self.image_class = np.array(pd.read_csv('val_details.txt', sep='\t')[['mage','class']]).astype('str')
        self.class_dic = {}
        for i in self.image_class :
            self.class_dic[i[0]]=i[1] 
Example #3
Source File: utils.py    From Mask-RCNN-Pedestrian-Detection with MIT License 6 votes vote down vote up
def box_refinement_graph(box, gt_box):
    """Compute refinement needed to transform box to gt_box.
    box and gt_box are [N, (y1, x1, y2, x2)]
    """
    box = tf.cast(box, tf.float32)
    gt_box = tf.cast(gt_box, tf.float32)

    height = box[:, 2] - box[:, 0]
    width = box[:, 3] - box[:, 1]
    center_y = box[:, 0] + 0.5 * height
    center_x = box[:, 1] + 0.5 * width

    gt_height = gt_box[:, 2] - gt_box[:, 0]
    gt_width = gt_box[:, 3] - gt_box[:, 1]
    gt_center_y = gt_box[:, 0] + 0.5 * gt_height
    gt_center_x = gt_box[:, 1] + 0.5 * gt_width

    dy = (gt_center_y - center_y) / height
    dx = (gt_center_x - center_x) / width
    dh = tf.log(gt_height / height)
    dw = tf.log(gt_width / width)

    result = tf.stack([dy, dx, dh, dw], axis=1)
    return result 
Example #4
Source File: helpers.py    From tutorials with Apache License 2.0 6 votes vote down vote up
def rescale(img, input_height, input_width):
    # print("Original image shape:" + str(img.shape) + " --> it should be in H, W, C!")
    # print("Model's input shape is %dx%d") % (input_height, input_width)
    aspect = img.shape[1] / float(img.shape[0])
    # print("Orginal aspect ratio: " + str(aspect))
    if(aspect > 1):
        # landscape orientation - wide image
        res = int(aspect * input_height)
        imgScaled = skimage.transform.resize(
            img,
            (input_width, res),
            preserve_range=False)
    if(aspect < 1):
        # portrait orientation - tall image
        res = int(input_width / aspect)
        imgScaled = skimage.transform.resize(
            img,
            (res, input_height),
            preserve_range=False)
    if(aspect == 1):
        imgScaled = skimage.transform.resize(
            img,
            (input_width, input_height),
            preserve_range=False)
    return imgScaled 
Example #5
Source File: utils.py    From Mask-RCNN-Pedestrian-Detection with MIT License 6 votes vote down vote up
def minimize_mask(bbox, mask, mini_shape):
    """Resize masks to a smaller version to reduce memory load.
    Mini-masks can be resized back to image scale using expand_masks()

    See inspect_data.ipynb notebook for more details.
    """
    mini_mask = np.zeros(mini_shape + (mask.shape[-1],), dtype=bool)
    for i in range(mask.shape[-1]):
        m = mask[:, :, i]
        y1, x1, y2, x2 = bbox[i][:4]
        m = m[y1:y2, x1:x2]
        if m.size == 0:
            raise Exception("Invalid bounding box with area of zero")
        # Resize with bilinear interpolation
        m = skimage.transform.resize(m, mini_shape, order=1, mode="constant")
        mini_mask[:, :, i] = np.around(m).astype(np.bool)
    return mini_mask 
Example #6
Source File: utils.py    From dataiku-contrib with Apache License 2.0 6 votes vote down vote up
def box_refinement_graph(box, gt_box):
    """Compute refinement needed to transform box to gt_box.
    box and gt_box are [N, (y1, x1, y2, x2)]
    """
    box = tf.cast(box, tf.float32)
    gt_box = tf.cast(gt_box, tf.float32)

    height = box[:, 2] - box[:, 0]
    width = box[:, 3] - box[:, 1]
    center_y = box[:, 0] + 0.5 * height
    center_x = box[:, 1] + 0.5 * width

    gt_height = gt_box[:, 2] - gt_box[:, 0]
    gt_width = gt_box[:, 3] - gt_box[:, 1]
    gt_center_y = gt_box[:, 0] + 0.5 * gt_height
    gt_center_x = gt_box[:, 1] + 0.5 * gt_width

    dy = (gt_center_y - center_y) / height
    dx = (gt_center_x - center_x) / width
    dh = tf.log(gt_height / height)
    dw = tf.log(gt_width / width)

    result = tf.stack([dy, dx, dh, dw], axis=1)
    return result 
Example #7
Source File: query.py    From deep-ranking with MIT License 6 votes vote down vote up
def __init__(self, root_dir, transform=None, loader = pil_loader):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        if transform == None :
            transform = torchvision.transforms.Compose([torchvision.transforms.Resize(224),
                                                        torchvision.transforms.RandomHorizontalFlip(p=0.5),
                                                        torchvision.transforms.RandomVerticalFlip(p=0.5),
                                                        torchvision.transforms.ToTensor()])
        self.root_dir = root_dir
        self.transform = transform
        self.loader = loader

        self.images = os.listdir(os.path.join(self.root_dir))

        self.image_class = np.array(pd.read_csv('val_details.txt', sep='\t')[['mage','class']]).astype('str')
        self.class_dic = {}
        for i in self.image_class :
            self.class_dic[i[0]]=i[1] 
Example #8
Source File: utils.py    From Mask-RCNN-Pedestrian-Detection with MIT License 6 votes vote down vote up
def expand_mask(bbox, mini_mask, image_shape):
    """Resizes mini masks back to image size. Reverses the change
    of minimize_mask().

    See inspect_data.ipynb notebook for more details.
    """
    mask = np.zeros(image_shape[:2] + (mini_mask.shape[-1],), dtype=bool)
    for i in range(mask.shape[-1]):
        m = mini_mask[:, :, i]
        y1, x1, y2, x2 = bbox[i][:4]
        h = y2 - y1
        w = x2 - x1
        # Resize with bilinear interpolation
        m = skimage.transform.resize(m, (h, w), order=1, mode="constant")
        mask[y1:y2, x1:x2, i] = np.around(m).astype(np.bool)
    return mask


# TODO: Build and use this function to reduce code duplication 
Example #9
Source File: helpers.py    From Feed-Forward-Style-Transfer with MIT License 6 votes vote down vote up
def load_img(path):
    """Returns a numpy array of an image specified by its path.
    
    Args:
        path: string representing the file path of the image to load
        
    Returns:
        resized_img: numpy array representing the loaded RGB image
        shape: the image shape
    """

    # Load image [height, width, depth]
    img = skimage.io.imread(path) / 255.0
    assert (0 <= img).all() and (img <= 1.0).all()

    # Crop image from center
    short_edge = min(img.shape[:2])
    yy = int((img.shape[0] - short_edge) / 2)
    xx = int((img.shape[1] - short_edge) / 2)
    shape = list(img.shape)

    crop_img = img[yy: yy + short_edge, xx: xx + short_edge]
    resized_img = skimage.transform.resize(crop_img, (shape[0], shape[1]))
    return resized_img, shape 
Example #10
Source File: grid.py    From pysheds with GNU General Public License v3.0 6 votes vote down vote up
def _convert_grid_indices_crs(self, grid_indices, old_crs, new_crs):
        if _OLD_PYPROJ:
            x2, y2 = pyproj.transform(old_crs, new_crs, grid_indices[:,1],
                                    grid_indices[:,0])
        else:
            x2, y2 = pyproj.transform(old_crs, new_crs, grid_indices[:,1],
                                      grid_indices[:,0], errcheck=True,
                                      always_xy=True)
        yx2 = np.column_stack([y2, x2])
        return yx2

    # def _convert_outer_indices_crs(self, affine, shape, old_crs, new_crs):
    #     y1, x1 = self.grid_indices(affine=affine, shape=shape)
    #     lx, _ = pyproj.transform(old_crs, new_crs,
    #                               x1, np.repeat(y1[0], len(x1)))
    #     rx, _ = pyproj.transform(old_crs, new_crs,
    #                               x1, np.repeat(y1[-1], len(x1)))
    #     __, by = pyproj.transform(old_crs, new_crs,
    #                               np.repeat(x1[0], len(y1)), y1)
    #     __, uy = pyproj.transform(old_crs, new_crs,
    #                               np.repeat(x1[-1], len(y1)), y1)
    #     return by, uy, lx, rx 
Example #11
Source File: coco.py    From EfficientDet.Pytorch with MIT License 6 votes vote down vote up
def __init__(self, root_dir, set_name='train2017', transform=None):
        """
        Args:
            root_dir (string): COCO directory.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.root_dir = root_dir
        self.set_name = set_name
        self.transform = transform

        self.coco = COCO(os.path.join(self.root_dir, 'annotations',
                                      'instances_' + self.set_name + '.json'))
        self.image_ids = self.coco.getImgIds()

        self.load_classes() 
Example #12
Source File: image_processing.py    From text-to-image with MIT License 6 votes vote down vote up
def load_image_array(image_file, image_size):
	img = skimage.io.imread(image_file)
	# GRAYSCALE
	if len(img.shape) == 2:
		img_new = np.ndarray( (img.shape[0], img.shape[1], 3), dtype = 'uint8')
		img_new[:,:,0] = img
		img_new[:,:,1] = img
		img_new[:,:,2] = img
		img = img_new

	img_resized = skimage.transform.resize(img, (image_size, image_size))

	# FLIP HORIZONTAL WIRH A PROBABILITY 0.5
	if random.random() > 0.5:
		img_resized = np.fliplr(img_resized)
	
	
	return img_resized.astype('float32') 
Example #13
Source File: image_processing.py    From text-to-image with MIT License 6 votes vote down vote up
def load_image_array(image_file, image_size):
	img = skimage.io.imread(image_file)
	# GRAYSCALE
	if len(img.shape) == 2:
		img_new = np.ndarray( (img.shape[0], img.shape[1], 3), dtype = 'uint8')
		img_new[:,:,0] = img
		img_new[:,:,1] = img
		img_new[:,:,2] = img
		img = img_new

	img_resized = skimage.transform.resize(img, (image_size, image_size))

	# FLIP HORIZONTAL WIRH A PROBABILITY 0.5
	if random.random() > 0.5:
		img_resized = np.fliplr(img_resized)
	
	
	return img_resized.astype('float32') 
Example #14
Source File: utils.py    From PanopticSegmentation with MIT License 6 votes vote down vote up
def resize(image, output_shape, order=1, mode='constant', cval=0, clip=True,
           preserve_range=False, anti_aliasing=False, anti_aliasing_sigma=None):
    """A wrapper for Scikit-Image resize().

    Scikit-Image generates warnings on every call to resize() if it doesn't
    receive the right parameters. The right parameters depend on the version
    of skimage. This solves the problem by using different parameters per
    version. And it provides a central place to control resizing defaults.
    """
    if LooseVersion(skimage.__version__) >= LooseVersion("0.14"):
        # New in 0.14: anti_aliasing. Default it to False for backward
        # compatibility with skimage 0.13.
        return skimage.transform.resize(
            image, output_shape,
            order=order, mode=mode, cval=cval, clip=clip,
            preserve_range=preserve_range, anti_aliasing=anti_aliasing,
            anti_aliasing_sigma=anti_aliasing_sigma)
    else:
        return skimage.transform.resize(
            image, output_shape,
            order=order, mode=mode, cval=cval, clip=clip,
            preserve_range=preserve_range) 
Example #15
Source File: utils.py    From PanopticSegmentation with MIT License 6 votes vote down vote up
def box_refinement_graph(box, gt_box):
    """Compute refinement needed to transform box to gt_box.
    box and gt_box are [N, (y1, x1, y2, x2)]
    """
    box = tf.cast(box, tf.float32)
    gt_box = tf.cast(gt_box, tf.float32)

    height = box[:, 2] - box[:, 0]
    width = box[:, 3] - box[:, 1]
    center_y = box[:, 0] + 0.5 * height
    center_x = box[:, 1] + 0.5 * width

    gt_height = gt_box[:, 2] - gt_box[:, 0]
    gt_width = gt_box[:, 3] - gt_box[:, 1]
    gt_center_y = gt_box[:, 0] + 0.5 * gt_height
    gt_center_x = gt_box[:, 1] + 0.5 * gt_width

    dy = (gt_center_y - center_y) / height
    dx = (gt_center_x - center_x) / width
    dh = tf.log(gt_height / height)
    dw = tf.log(gt_width / width)

    result = tf.stack([dy, dx, dh, dw], axis=1)
    return result 
Example #16
Source File: dataloader.py    From EfficientDet-PyTorch with Apache License 2.0 6 votes vote down vote up
def __call__(self, sample):
        image, annots = sample['img'], sample['annot']

        rows, cols, cns = image.shape

        largest_side = max(rows, cols)

        scale = self.img_size / largest_side

        # resize the image with the computed scale
        image = skimage.transform.resize(image, (int(round(rows*scale)), int(round((cols*scale)))))
        rows, cols, cns = image.shape

        new_image = np.zeros((self.img_size, self.img_size, cns)).astype(np.float32)
        new_image[:rows, :cols, :] = image.astype(np.float32)

        annots[:, :4] *= scale

        return {'img': torch.from_numpy(new_image), 'annot': torch.from_numpy(annots), 'scale': scale} 
Example #17
Source File: c3d.py    From Recipes with MIT License 6 votes vote down vote up
def set_weights(net,model_file):
    '''
    Sets the parameters of the model using the weights stored in model_file
    Parameters
    ----------
    net: a Lasagne layer

    model_file: string
        path to the model that containes the weights

    Returns
    -------
    None

    '''
    with open(model_file) as f:
        print('Load pretrained weights from %s...' % model_file)
        model = pickle.load(f)
    print('Set the weights...')
    lasagne.layers.set_all_param_values(net, model,trainable=True)


######## Below, there are several helper functions to transform (lists of) images into the right format  ###### 
Example #18
Source File: data_utils.py    From deep-ranking with MIT License 5 votes vote down vote up
def __init__(self, root_dir='tiny-imagenet-200', transform=None, loader = pil_loader):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        if transform == None :
            transform = torchvision.transforms.Compose([torchvision.transforms.Resize(224),torchvision.transforms.RandomHorizontalFlip(p=0.5),torchvision.transforms.RandomVerticalFlip(p=0.5),torchvision.transforms.ToTensor()])

        self.root_dir = root_dir
        self.transform = transform
        self.loader = loader
        self.class_dict = {}
        self.rev_dict = {}
        self.image_dict = {}
        self.big_dict = {}
        L = []
        
        for i,j in enumerate(os.listdir(os.path.join(self.root_dir,'train'))):
            self.class_dict[j] = i
            self.rev_dict[i] = j
            self.image_dict[j] = np.array(os.listdir(os.path.join(self.root_dir,'train',j,'images')))
            for k,l in enumerate(os.listdir(os.path.join(self.root_dir,'train',j,'images'))):
                L.append((l,i))

        for i,j in enumerate(L):
            self.big_dict[i] = j
            
        self.num_classes = 200 
Example #19
Source File: bm_comp_perform.py    From BIRL with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def register_image_pair(idx, path_img_target, path_img_source, path_out):
    """ register two images together

    :param int idx: empty parameter for using the function in parallel
    :param str path_img_target: path to the target image
    :param str path_img_source: path to the source image
    :param str path_out: path for exporting the output
    :return tuple(str,float):
    """
    start = time.time()
    # load and denoise reference image
    img_target = io.imread(path_img_target)
    img_target = denoise_wavelet(img_target, wavelet_levels=7, multichannel=True)
    img_target_gray = rgb2gray(img_target)

    # load and denoise moving image
    img_source = io.imread(path_img_source)
    img_source = denoise_bilateral(img_source, sigma_color=0.05,
                                   sigma_spatial=2, multichannel=True)
    img_source_gray = rgb2gray(img_source)

    # detect ORB features on both images
    detector_target = ORB(n_keypoints=150)
    detector_source = ORB(n_keypoints=150)
    detector_target.detect_and_extract(img_target_gray)
    detector_source.detect_and_extract(img_source_gray)
    matches = match_descriptors(detector_target.descriptors,
                                detector_source.descriptors)
    # robustly estimate affine transform model with RANSAC
    model, _ = ransac((detector_target.keypoints[matches[:, 0]],
                       detector_source.keypoints[matches[:, 1]]),
                      AffineTransform, min_samples=25, max_trials=500,
                      residual_threshold=0.95)

    # warping source image with estimated transformations
    img_warped = warp(img_target, model.inverse, output_shape=img_target.shape[:2])
    path_img_warped = os.path.join(path_out, NAME_IMAGE_WARPED % idx)
    io.imsave(path_img_warped, img_warped)
    # summarise experiment
    execution_time = time.time() - start
    return path_img_warped, execution_time 
Example #20
Source File: helpers.py    From Feed-Forward-Style-Transfer with MIT License 5 votes vote down vote up
def load_img_to(path, height=None, width=None):
    """Returns a resized numpy array of an image specified by its path.
    
    Args:
        path: string representing the file path of the image to load
        height: int representing the height value to scale image
        width: int representing width value to scale image
        
    Returns:
        img: numpy array representing the loaded RGB image
    """

    # Load image
    img = skimage.io.imread(path) / 255.0
    if height is not None and width is not None:
        ny = height
        nx = width
    elif height is not None:
        ny = height
        nx = img.shape[1] * ny / img.shape[0]
    elif width is not None:
        nx = width
        ny = img.shape[0] * nx / img.shape[1]
    else:
        ny = img.shape[0]
        nx = img.shape[1]

    if len(img.shape) < 3:
        img = np.dstack((img, img, img))

    return skimage.transform.resize(img, (ny, nx)), [ny, nx, 3] 
Example #21
Source File: query.py    From deep-ranking with MIT License 5 votes vote down vote up
def __init__(self, root_dir, transform=None, loader = pil_loader):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        if transform == None :
            transform = torchvision.transforms.Compose([torchvision.transforms.Resize(224),
                                                        torchvision.transforms.RandomHorizontalFlip(p=0.5),
                                                        torchvision.transforms.RandomVerticalFlip(p=0.5),
                                                        torchvision.transforms.ToTensor()])
        self.root_dir = root_dir
        self.transform = transform
        self.loader = loader
        # class_dict -> n01443537 : 0 etc
        self.class_dict = {}
       # rev_dict -> 0 : n01443537 etc
        self.rev_dict = {}
        # image dict -> n01443537 : np.array([n01443537_0.JPEG    n01443537_150.JPEG  
        #                               n01443537_200.JPEG  n01443537_251.JPEG etc]) 
        self.image_dict = {}
        # big_dict -> idx : [img_name, class]
        self.big_dict = {}

        L = []

        for i,j in enumerate(os.listdir(os.path.join(self.root_dir))):
            self.class_dict[j] = i
            self.rev_dict[i] = j
            self.image_dict[j] = np.array(os.listdir(os.path.join(self.root_dir,j,'images')))
            for k,l in enumerate(os.listdir(os.path.join(self.root_dir,j,'images'))):
                L.append((l,i))

        for i,j in enumerate(L):
            self.big_dict[i] = j


        self.num_classes = 200 
Example #22
Source File: test_embedding.py    From deep-ranking with MIT License 5 votes vote down vote up
def __getitem__(self, idx):
        paths,lab = self._sample(idx)
        temp = self.loader(paths)
        if self.transform:
            temp = self.transform(temp)
        return temp,lab 
Example #23
Source File: coco.py    From EfficientDet.Pytorch with MIT License 5 votes vote down vote up
def __getitem__(self, idx):

        img = self.load_image(idx)
        annot = self.load_annotations(idx)
        sample = {'img': img, 'annot': annot}
        if self.transform:
            sample = self.transform(sample)
        return sample 
Example #24
Source File: geometry_utils.py    From MOTSFusion with MIT License 5 votes vote down vote up
def transformPointImage(point_img, pose):
    coords = point_img.reshape((point_img.shape[0] * point_img.shape[1], point_img.shape[2]))

    # transform points according to estimated camera motion (pose)
    transformation_matrix = pose
    coords = np.column_stack((coords, np.ones(len(coords))))
    coords = coords @ transformation_matrix.T
    coords = coords[:, 0:3]

    coord_img = np.reshape(coords, point_img.shape)
    return coord_img 
Example #25
Source File: utils.py    From Mask-RCNN-Pedestrian-Detection with MIT License 5 votes vote down vote up
def box_refinement(box, gt_box):
    """Compute refinement needed to transform box to gt_box.
    box and gt_box are [N, (y1, x1, y2, x2)]. (y2, x2) is
    assumed to be outside the box.
    """
    box = box.astype(np.float32)
    gt_box = gt_box.astype(np.float32)

    height = box[:, 2] - box[:, 0]
    width = box[:, 3] - box[:, 1]
    center_y = box[:, 0] + 0.5 * height
    center_x = box[:, 1] + 0.5 * width

    gt_height = gt_box[:, 2] - gt_box[:, 0]
    gt_width = gt_box[:, 3] - gt_box[:, 1]
    gt_center_y = gt_box[:, 0] + 0.5 * gt_height
    gt_center_x = gt_box[:, 1] + 0.5 * gt_width

    dy = (gt_center_y - center_y) / height
    dx = (gt_center_x - center_x) / width
    dh = np.log(gt_height / height)
    dw = np.log(gt_width / width)

    return np.stack([dy, dx, dh, dw], axis=1)


############################################################
#  Dataset
############################################################ 
Example #26
Source File: utils.py    From PanopticSegmentation with MIT License 5 votes vote down vote up
def box_refinement(box, gt_box):
    """Compute refinement needed to transform box to gt_box.
    box and gt_box are [N, (y1, x1, y2, x2)]. (y2, x2) is
    assumed to be outside the box.
    """
    box = box.astype(np.float32)
    gt_box = gt_box.astype(np.float32)

    height = box[:, 2] - box[:, 0]
    width = box[:, 3] - box[:, 1]
    center_y = box[:, 0] + 0.5 * height
    center_x = box[:, 1] + 0.5 * width

    gt_height = gt_box[:, 2] - gt_box[:, 0]
    gt_width = gt_box[:, 3] - gt_box[:, 1]
    gt_center_y = gt_box[:, 0] + 0.5 * gt_height
    gt_center_x = gt_box[:, 1] + 0.5 * gt_width

    dy = (gt_center_y - center_y) / height
    dx = (gt_center_x - center_x) / width
    dh = np.log(gt_height / height)
    dw = np.log(gt_width / width)

    return np.stack([dy, dx, dh, dw], axis=1)


############################################################
#  Dataset
############################################################ 
Example #27
Source File: dataloader.py    From EfficientDet-PyTorch with Apache License 2.0 5 votes vote down vote up
def __getitem__(self, idx):

        img = self.load_image(idx)
        annot = self.load_annotations(idx)
        sample = {'img': img, 'annot': annot}
        if self.transform:
            sample = self.transform(sample)

        return sample 
Example #28
Source File: dataloader.py    From EfficientDet-PyTorch with Apache License 2.0 5 votes vote down vote up
def load_annotations(self, image_index):
        # get ground truth annotations
        annotations_ids = self.coco.getAnnIds(imgIds=self.image_ids[image_index], iscrowd=False)
        annotations     = np.zeros((0, 5))

        # some images appear to miss annotations (like image with id 257034)
        if len(annotations_ids) == 0:
            return annotations

        # parse annotations
        coco_annotations = self.coco.loadAnns(annotations_ids)
        for idx, a in enumerate(coco_annotations):

            # some annotations have basically no width / height, skip them
            if a['bbox'][2] < 1 or a['bbox'][3] < 1:
                continue

            annotation        = np.zeros((1, 5))
            annotation[0, :4] = a['bbox']
            annotation[0, 4]  = self.coco_label_to_label(a['category_id'])
            annotations       = np.append(annotations, annotation, axis=0)

        # transform from [x, y, w, h] to [x1, y1, x2, y2]
        annotations[:, 2] = annotations[:, 0] + annotations[:, 2]
        annotations[:, 3] = annotations[:, 1] + annotations[:, 3]

        return annotations 
Example #29
Source File: dataloader.py    From EfficientDet-PyTorch with Apache License 2.0 5 votes vote down vote up
def __getitem__(self, idx):

        img = self.load_image(idx)
        annot = self.load_annotations(idx)
        sample = {'img': img, 'annot': annot}
        if self.transform:
            sample = self.transform(sample)

        return sample 
Example #30
Source File: image_loader.py    From CVTron with Apache License 2.0 5 votes vote down vote up
def load_image(path, height, width):
    img = skimage.io.imread(path)
    img = img / 255.0
    assert (0 <= img).all() and (img <= 1.0).all()
    short_edge = min(img.shape[:2])
    yy = int((img.shape[0] - short_edge) / 2)
    xx = int((img.shape[1] - short_edge) / 2)
    crop_img = img[yy:yy + short_edge, xx:xx + short_edge]
    resized_img = skimage.transform.resize(crop_img, (height, width))
    return resized_img