Python Examples of albumentations.Compose

Source File: cloud_transform.py From kaggle-understanding-clouds with BSD 2-Clause "Simplified" License

7 votes

def get_training_augmentation(resize_to=(320,640), crop_size=(288,576)):
    print('[get_training_augmentation] crop_size:', crop_size, ', resize_to:', resize_to) 

    train_transform = [
        albu.HorizontalFlip(p=0.5),
        albu.VerticalFlip(p=0.5),
        albu.ShiftScaleRotate(scale_limit=0.20, rotate_limit=10, shift_limit=0.1, p=0.5, border_mode=cv2.BORDER_CONSTANT, value=0),
        albu.GridDistortion(p=0.5),
        albu.Resize(*resize_to),
        albu.RandomCrop(*crop_size),
        albu.ChannelShuffle(),
        albu.InvertImg(),
        albu.ToGray(),
        albu.Normalize(),
    ]

    return albu.Compose(train_transform)

Source File: test_serialization.py From albumentations with MIT License

6 votes

def test_transform_pipeline_serialization_with_keypoints(seed, image, keypoints, keypoint_format, labels):
    aug = A.Compose(
        [
            A.OneOrOther(
                A.Compose([A.RandomRotate90(), A.OneOf([A.HorizontalFlip(p=0.5), A.VerticalFlip(p=0.5)])]),
                A.Compose([A.Rotate(p=0.5), A.OneOf([A.HueSaturationValue(p=0.5), A.RGBShift(p=0.7)], p=1)]),
            ),
            A.HorizontalFlip(p=1),
            A.RandomBrightnessContrast(p=0.5),
        ],
        keypoint_params={"format": keypoint_format, "label_fields": ["labels"]},
    )
    serialized_aug = A.to_dict(aug)
    deserialized_aug = A.from_dict(serialized_aug)
    set_seed(seed)
    aug_data = aug(image=image, keypoints=keypoints, labels=labels)
    set_seed(seed)
    deserialized_aug_data = deserialized_aug(image=image, keypoints=keypoints, labels=labels)
    assert np.array_equal(aug_data["image"], deserialized_aug_data["image"])
    assert np.array_equal(aug_data["keypoints"], deserialized_aug_data["keypoints"])

Source File: test_serialization.py From albumentations with MIT License

6 votes

def test_transform_pipeline_serialization(seed, image, mask):
    aug = A.Compose(
        [
            A.OneOrOther(
                A.Compose(
                    [
                        A.Resize(1024, 1024),
                        A.RandomSizedCrop(min_max_height=(256, 1024), height=512, width=512, p=1),
                        A.OneOf(
                            [
                                A.RandomSizedCrop(min_max_height=(256, 512), height=384, width=384, p=0.5),
                                A.RandomSizedCrop(min_max_height=(256, 512), height=512, width=512, p=0.5),
                            ]
                        ),
                    ]
                ),
                A.Compose(
                    [
                        A.Resize(1024, 1024),
                        A.RandomSizedCrop(min_max_height=(256, 1025), height=256, width=256, p=1),
                        A.OneOf([A.HueSaturationValue(p=0.5), A.RGBShift(p=0.7)], p=1),
                    ]
                ),
            ),
            A.HorizontalFlip(p=1),
            A.RandomBrightnessContrast(p=0.5),
        ]
    )
    serialized_aug = A.to_dict(aug)
    deserialized_aug = A.from_dict(serialized_aug)
    set_seed(seed)
    aug_data = aug(image=image, mask=mask)
    set_seed(seed)
    deserialized_aug_data = deserialized_aug(image=image, mask=mask)
    assert np.array_equal(aug_data["image"], deserialized_aug_data["image"])
    assert np.array_equal(aug_data["mask"], deserialized_aug_data["mask"])

Source File: captioning.py From virtex with MIT License

6 votes

def __init__(
        self,
        data_root: str,
        split: str,
        tokenizer: SentencePieceBPETokenizer,
        image_transform: Callable = T.DEFAULT_IMAGE_TRANSFORM,
        max_caption_length: int = 30,
        use_single_caption: bool = False,
        percentage: float = 100.0,
    ):
        lmdb_path = os.path.join(data_root, f"serialized_{split}.lmdb")
        self.reader = LmdbReader(lmdb_path, percentage=percentage)

        self.image_transform = image_transform
        self.caption_transform = alb.Compose(
            [
                T.NormalizeCaption(),
                T.TokenizeCaption(tokenizer),
                T.TruncateCaptionTokens(max_caption_length),
            ]
        )
        self.use_single_caption = use_single_caption
        self.padding_idx = tokenizer.token_to_id("<unk>")

Source File: experiment.py From mlcomp with Apache License 2.0

6 votes

def get_test_transforms():
        return A.Compose([A.Normalize(mean=(0.485, ), std=(0.229, ))])

Source File: spacenet_binary_dataset.py From SpaceNet_Off_Nadir_Solutions with Apache License 2.0

5 votes

def pad_mask_image(self, mask, image, img_id, crop_shape):
        composed = Compose([PadIfNeeded(crop_shape[0], crop_shape[1], p=1),
                            RandomCrop(crop_shape[0], crop_shape[1], p=1)], p=1)

        if np.sum(mask) != 0:

            s = 0
            tries = 0
            while s == 0:
                # crop = composed(crop_shape[0], crop_shape[1])
                croped = composed(image=image, mask=mask)

                image_padded = croped['image']
                mask_padded = croped['mask']
                # print(mask_padded.shape)
                s = np.sum(mask_padded)
                tries += 1
                if tries > 5:
                    break
        else:

            croped = composed(image=image, mask=mask)
            image_padded = croped['image']
            mask_padded = croped['mask']
            
        return mask_padded, image_padded

Source File: test_serialization.py From albumentations with MIT License

5 votes

def test_additional_targets_for_image_only_serialization(augmentation_cls, params, image, seed):
    aug = A.Compose([augmentation_cls(always_apply=True, **params)], additional_targets={"image2": "image"})
    image2 = image.copy()

    serialized_aug = A.to_dict(aug)
    deserialized_aug = A.from_dict(serialized_aug)
    set_seed(seed)
    aug_data = aug(image=image, image2=image2)
    set_seed(seed)
    deserialized_aug_data = deserialized_aug(image=image, image2=image2)
    assert np.array_equal(aug_data["image"], deserialized_aug_data["image"])
    assert np.array_equal(aug_data["image2"], deserialized_aug_data["image2"])

Source File: test_pytorch.py From albumentations with MIT License

5 votes

def test_additional_targets_for_totensor():
    with pytest.warns(DeprecationWarning):
        aug = A.Compose([ToTensor(num_classes=4)], additional_targets={"image2": "image", "mask2": "mask"})
    for _i in range(10):
        image1 = np.random.randint(low=0, high=256, size=(100, 100, 3), dtype=np.uint8)
        image2 = image1.copy()
        mask1 = np.random.randint(low=0, high=256, size=(100, 100, 4), dtype=np.uint8)
        mask2 = mask1.copy()
        res = aug(image=image1, image2=image2, mask=mask1, mask2=mask2)
        assert np.array_equal(res["image"], res["image2"])
        assert np.array_equal(res["mask"], res["mask2"])

Source File: test_pytorch.py From albumentations with MIT License

5 votes

def test_additional_targets_for_totensorv2():
    aug = A.Compose([ToTensorV2()], additional_targets={"image2": "image", "mask2": "mask"})
    for _i in range(10):
        image1 = np.random.randint(low=0, high=256, size=(100, 100, 3), dtype=np.uint8)
        image2 = image1.copy()
        mask1 = np.random.randint(low=0, high=256, size=(100, 100, 4), dtype=np.uint8)
        mask2 = mask1.copy()
        res = aug(image=image1, image2=image2, mask=mask1, mask2=mask2)
        assert isinstance(res["image"], torch.Tensor) and res["image"].shape == image1.shape[::-1]
        assert isinstance(res["image2"], torch.Tensor) and res["image2"].shape == image2.shape[::-1]
        assert isinstance(res["mask"], torch.Tensor) and res["mask"].shape == mask1.shape
        assert isinstance(res["mask2"], torch.Tensor) and res["mask2"].shape == mask2.shape
        assert np.array_equal(res["image"], res["image2"])
        assert np.array_equal(res["mask"], res["mask2"])

Source File: test_transforms.py From albumentations with MIT License

5 votes

def test_additional_targets_for_image_only(augmentation_cls, params):
    aug = A.Compose([augmentation_cls(always_apply=True, **params)], additional_targets={"image2": "image"})
    for _i in range(10):
        image1 = np.random.randint(low=0, high=256, size=(100, 100, 3), dtype=np.uint8)
        image2 = image1.copy()
        res = aug(image=image1, image2=image2)
        aug1 = res["image"]
        aug2 = res["image2"]
        assert np.array_equal(aug1, aug2)

Source File: test_transforms.py From albumentations with MIT License

5 votes

def test_force_apply():
    """
    Unit test for https://github.com/albumentations-team/albumentations/issues/189
    """
    aug = A.Compose(
        [
            A.OneOrOther(
                A.Compose(
                    [
                        A.RandomSizedCrop(min_max_height=(256, 1025), height=512, width=512, p=1),
                        A.OneOf(
                            [
                                A.RandomSizedCrop(min_max_height=(256, 512), height=384, width=384, p=0.5),
                                A.RandomSizedCrop(min_max_height=(256, 512), height=512, width=512, p=0.5),
                            ]
                        ),
                    ]
                ),
                A.Compose(
                    [
                        A.RandomSizedCrop(min_max_height=(256, 1025), height=256, width=256, p=1),
                        A.OneOf([A.HueSaturationValue(p=0.5), A.RGBShift(p=0.7)], p=1),
                    ]
                ),
            ),
            A.HorizontalFlip(p=1),
            A.RandomBrightnessContrast(p=0.5),
        ]
    )

    res = aug(image=np.zeros((1248, 1248, 3), dtype=np.uint8))
    assert res["image"].shape[0] in (256, 384, 512)
    assert res["image"].shape[1] in (256, 384, 512)

Source File: cityscapes.py From pytorch-segmentation with MIT License

5 votes

def __init__(self, base_dir='../data/cityscapes', split='train',
                 affine_augmenter=None, image_augmenter=None, target_size=(1024, 2048),
                 net_type='unet', ignore_index=255, debug=False):
        self.debug = debug
        self.base_dir = Path(base_dir)
        assert net_type in ['unet', 'deeplab']
        self.net_type = net_type
        self.ignore_index = ignore_index
        self.split = 'val' if split == 'valid' else split

        self.img_paths = sorted(self.base_dir.glob(f'leftImg8bit/{self.split}/*/*leftImg8bit.png'))
        self.lbl_paths = sorted(self.base_dir.glob(f'gtFine/{self.split}/*/*gtFine_labelIds.png'))
        assert len(self.img_paths) == len(self.lbl_paths)

        # Resize
        if isinstance(target_size, str):
            target_size = eval(target_size)
        if self.split == 'train':
            if self.net_type == 'deeplab':
                target_size = (target_size[0] + 1, target_size[1] + 1)
            self.resizer = albu.Compose([albu.RandomScale(scale_limit=(-0.5, 0.5), p=1.0),
                                         PadIfNeededRightBottom(min_height=target_size[0], min_width=target_size[1],
                                                                value=0, ignore_index=self.ignore_index, p=1.0),
                                         albu.RandomCrop(height=target_size[0], width=target_size[1], p=1.0)])
        else:
            self.resizer = None

        # Augment
        if self.split == 'train':
            self.affine_augmenter = affine_augmenter
            self.image_augmenter = image_augmenter
        else:
            self.affine_augmenter = None
            self.image_augmenter = None

Source File: config.py From mlcomp with Apache License 2.0

5 votes

def parse_albu(configs: List[dict]):
    res = []
    for config in configs:
        assert 'name' in config, f'name is required in {config}'
        config = config.copy()
        name = config.pop('name')
        if name == 'Compose':
            items = config.pop('items')
            aug = A.Compose(parse_albu(items), **config)
        else:
            aug = getattr(A, name)(**config)
        res.append(aug)
    return res

Source File: transforms.py From mmdetection with Apache License 2.0

5 votes

def __init__(self,
                 transforms,
                 bbox_params=None,
                 keymap=None,
                 update_pad_shape=False,
                 skip_img_without_anno=False):
        if Compose is None:
            raise RuntimeError('albumentations is not installed')

        self.transforms = transforms
        self.filter_lost_elements = False
        self.update_pad_shape = update_pad_shape
        self.skip_img_without_anno = skip_img_without_anno

        # A simple workaround to remove masks without boxes
        if (isinstance(bbox_params, dict) and 'label_fields' in bbox_params
                and 'filter_lost_elements' in bbox_params):
            self.filter_lost_elements = True
            self.origin_label_fields = bbox_params['label_fields']
            bbox_params['label_fields'] = ['idx_mapper']
            del bbox_params['filter_lost_elements']

        self.bbox_params = (
            self.albu_builder(bbox_params) if bbox_params else None)
        self.aug = Compose([self.albu_builder(t) for t in self.transforms],
                           bbox_params=self.bbox_params)

        if not keymap:
            self.keymap_to_albu = {
                'img': 'image',
                'gt_masks': 'masks',
                'gt_bboxes': 'bboxes'
            }
        else:
            self.keymap_to_albu = keymap
        self.keymap_back = {v: k for k, v in self.keymap_to_albu.items()}

Source File: cloud_transform.py From kaggle-understanding-clouds with BSD 2-Clause "Simplified" License

5 votes

def get_test_augmentation(resize_to=(320,640)):
    """Add paddings to make image shape divisible by 32"""
    test_transform = [
        albu.Resize(*resize_to),
        albu.Normalize(),
    ]
    return albu.Compose(test_transform)

Source File: example_bbox_keypoint_rotate.py From albumentations_examples with MIT License

5 votes

def main():
    image = cv2.imread("images/image_1.jpg")

    keypoints = cv2.goodFeaturesToTrack(
        cv2.cvtColor(image, cv2.COLOR_RGB2GRAY), maxCorners=100, qualityLevel=0.5, minDistance=5
    ).squeeze(1)

    bboxes = [(kp[0] - 10, kp[1] - 10, kp[0] + 10, kp[1] + 10) for kp in keypoints]

    disp_image = visualize(image, keypoints, bboxes)
    plt.figure(figsize=(10, 10))
    plt.imshow(cv2.cvtColor(disp_image, cv2.COLOR_RGB2BGR))
    plt.tight_layout()
    plt.show()

    aug = A.Compose(
        [A.ShiftScaleRotate(scale_limit=0.1, shift_limit=0.2, rotate_limit=10, always_apply=True)],
        bbox_params=A.BboxParams(format="pascal_voc", label_fields=["bbox_labels"]),
        keypoint_params=A.KeypointParams(format="xy"),
    )

    for _i in range(10):
        data = aug(image=image, keypoints=keypoints, bboxes=bboxes, bbox_labels=np.ones(len(bboxes)))

        aug_image = data["image"]
        aug_image = visualize(aug_image, data["keypoints"], data["bboxes"])

        plt.figure(figsize=(10, 10))
        plt.imshow(cv2.cvtColor(aug_image, cv2.COLOR_RGB2BGR))
        plt.tight_layout()
        plt.show()

Source File: augmentations.py From YOLOv3-model-pruning with MIT License

5 votes

def augment(image, boxes):
    h, w, _ = image.shape
    labels, boxes_coord = boxes[:, 0], boxes[:, 1:]
    labels = labels.tolist()
    boxes_coord = boxes_coord * h     # 得到原图尺寸下的坐标（未归一化的坐标）
    boxes_coord[:, 0] = np.clip(boxes_coord[:, 0]-boxes_coord[:, 2]/2, a_min=0, a_max=None)   # 确保x_min和y_min有效
    boxes_coord[:, 1] = np.clip(boxes_coord[:, 1]-boxes_coord[:, 3]/2, a_min=0, a_max=None)
    boxes_coord = boxes_coord.tolist()      # [x_min, y_min, width, height]

    # 在这里设置数据增强的方法
    aug = A.Compose([
        A.HorizontalFlip(p=0.5),
        # A.HueSaturationValue(hue_shift_limit=10, sat_shift_limit=10, val_shift_limit=10, p=0.5),
        # A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.1, rotate_limit=5, border_mode=0, p=0.5)
    ], bbox_params={'format':'coco', 'label_fields': ['category_id']})

    augmented = aug(image=image, bboxes=boxes_coord, category_id=labels)

    # 经过aug之后，如果把boxes变没了，则返回原来的图片
    if augmented['bboxes']:
        image = augmented['image']

        boxes_coord = np.array(augmented['bboxes']) # x_min, y_min, w, h → x, y, w, h
        boxes_coord[:, 0] = boxes_coord[:, 0] + boxes_coord[:, 2]/2
        boxes_coord[:, 1] = boxes_coord[:, 1] + boxes_coord[:, 3]/2
        boxes_coord = boxes_coord / h
        labels = np.array(augmented['category_id'])[:, None]
        boxes = np.concatenate((labels, boxes_coord), 1)

    return image, boxes

Source File: dataset.py From kaggle-kuzushiji-2019 with MIT License

5 votes

def get_transform(train: bool) -> Callable:
    train_initial_size = 2048
    crop_min_max_height = (400, 533)
    crop_width = 512
    crop_height = 384
    if train:
        transforms = [
            A.LongestMaxSize(max_size=train_initial_size),
            A.RandomSizedCrop(
                min_max_height=crop_min_max_height,
                width=crop_width,
                height=crop_height,
                w2h_ratio=crop_width / crop_height,
            ),
            A.HueSaturationValue(
                hue_shift_limit=7,
                sat_shift_limit=10,
                val_shift_limit=10,
            ),
            A.RandomBrightnessContrast(),
            A.RandomGamma(),
        ]
    else:
        test_size = int(train_initial_size *
                        crop_height / np.mean(crop_min_max_height))
        print(f'Test image max size {test_size} px')
        transforms = [
            A.LongestMaxSize(max_size=test_size),
        ]
    transforms.extend([
        ToTensor(),
    ])
    return A.Compose(
        transforms,
        bbox_params={
            'format': 'coco',
            'min_area': 0,
            'min_visibility': 0.5,
            'label_fields': ['labels'],
        },
    )

Source File: augmentations.py From yolov3-channel-pruning with MIT License

5 votes

def augment(image, boxes):
    h, w, _ = image.shape
    labels, boxes_coord = boxes[:, 0], boxes[:, 1:]
    labels = labels.tolist()
    boxes_coord = boxes_coord * h     # 得到原图尺寸下的坐标（未归一化的坐标）
    boxes_coord[:, 0] = np.clip(boxes_coord[:, 0]-boxes_coord[:, 2]/2, a_min=0, a_max=None)   # 确保x_min和y_min有效
    boxes_coord[:, 1] = np.clip(boxes_coord[:, 1]-boxes_coord[:, 3]/2, a_min=0, a_max=None)
    boxes_coord = boxes_coord.tolist()      # [x_min, y_min, width, height]

    # 在这里设置数据增强的方法
    aug = A.Compose([
        A.HorizontalFlip(p=0.5),
        # A.HueSaturationValue(hue_shift_limit=10, sat_shift_limit=10, val_shift_limit=10, p=0.5),
        # A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.1, rotate_limit=5, border_mode=0, p=0.5)
    ], bbox_params={'format':'coco', 'label_fields': ['category_id']})

    augmented = aug(image=image, bboxes=boxes_coord, category_id=labels)

    # 经过aug之后，如果把boxes变没了，则返回原来的图片
    if augmented['bboxes']:
        image = augmented['image']

        boxes_coord = np.array(augmented['bboxes']) # x_min, y_min, w, h → x, y, w, h
        boxes_coord[:, 0] = boxes_coord[:, 0] + boxes_coord[:, 2]/2
        boxes_coord[:, 1] = boxes_coord[:, 1] + boxes_coord[:, 3]/2
        boxes_coord = boxes_coord / h
        labels = np.array(augmented['category_id'])[:, None]
        boxes = np.concatenate((labels, boxes_coord), 1)

    return image, boxes

Source File: transforms.py From IoU-Uniform-R-CNN with Apache License 2.0

4 votes

def __init__(self,
                 transforms,
                 bbox_params=None,
                 keymap=None,
                 update_pad_shape=False,
                 skip_img_without_anno=False):
        """
        Adds custom transformations from Albumentations lib.
        Please, visit `https://albumentations.readthedocs.io`
        to get more information.

        transforms (list): list of albu transformations
        bbox_params (dict): bbox_params for albumentation `Compose`
        keymap (dict): contains {'input key':'albumentation-style key'}
        skip_img_without_anno (bool): whether to skip the image
                                      if no ann left after aug
        """

        self.transforms = transforms
        self.filter_lost_elements = False
        self.update_pad_shape = update_pad_shape
        self.skip_img_without_anno = skip_img_without_anno

        # A simple workaround to remove masks without boxes
        if (isinstance(bbox_params, dict) and 'label_fields' in bbox_params
                and 'filter_lost_elements' in bbox_params):
            self.filter_lost_elements = True
            self.origin_label_fields = bbox_params['label_fields']
            bbox_params['label_fields'] = ['idx_mapper']
            del bbox_params['filter_lost_elements']

        self.bbox_params = (
            self.albu_builder(bbox_params) if bbox_params else None)
        self.aug = Compose([self.albu_builder(t) for t in self.transforms],
                           bbox_params=self.bbox_params)

        if not keymap:
            self.keymap_to_albu = {
                'img': 'image',
                'gt_masks': 'masks',
                'gt_bboxes': 'bboxes'
            }
        else:
            self.keymap_to_albu = keymap
        self.keymap_back = {v: k for k, v in self.keymap_to_albu.items()}

Source File: policy_transform.py From kaggle-hpa with BSD 2-Clause "Simplified" License

4 votes

def policy_transform(split,
                     policies=None,
                     size=512,
                     per_image_norm=False,
                     mean_std=None,
                     **kwargs):
  means = np.array([127.5, 127.5, 127.5, 127.5])
  stds = np.array([255.0, 255.0, 255.0, 255.0])

  base_aug = Compose([
    RandomRotate90(),
    Flip(),
    Transpose(),
  ])

  if policies is None:
    policies = []

  if isinstance(policies, str):
    with open(policies, 'r') as fid:
      policies = eval(fid.read())
      policies = itertools.chain.from_iterable(policies)

  aug_list = []
  for policy in policies:
    op_1, params_1 = policy[0]
    op_2, params_2 = policy[1]
    aug = Compose([
      globals().get(op_1)(**params_1),
      globals().get(op_2)(**params_2),
    ])
    aug_list.append(aug)

  print('len(aug_list):', len(aug_list))
  resize = Resize(height=size, width=size, always_apply=True)

  def transform(image):
    if split == 'train':
      image = base_aug(image=image)['image']
      if len(aug_list) > 0:
        aug = random.choice(aug_list)
        image = aug(image=image)['image']
      image = resize(image=image)['image']
    else:
      if size != image.shape[0]:
        image = resize(image=image)['image']

    image = image.astype(np.float32)
    if per_image_norm:
        mean = np.mean(image.reshape(-1, 4), axis=0)
        std = np.std(image.reshape(-1, 4), axis=0)
        image -= mean
        image /= (std + 0.0000001)
    else:
        image -= means
        image /= stds
    image = np.transpose(image, (2, 0, 1))

    return image

  return transform

Source File: factories.py From virtex with MIT License

4 votes

def from_config(cls, config: Config, split: str = "train"):
        r"""
        Create a dataset directly from config. Names in this factory are paths
        of dataset directories (relative to the project directory), because
        config parameter ``DATA.ROOT`` is used to create objects.

        Parameters
        ----------
        config: virtex.config.Config
            Config object with all the parameters.
        split: str, optional (default = "train")
            Which split to load for the dataset. One of ``{"trainval", "test"}``
            for VOC2007, or one of ``{"train", "val"}`` for ImageNet.
        """

        _C = config
        # Every dataset needs these two args.
        kwargs = {"data_root": _C.DATA.ROOT, "split": split}

        # For VOC2007, `IMAGE_TRANSFORM_TRAIN` is used for "trainval" split and
        # `IMAGE_TRANSFORM_VAL` is used fo "test" split.
        image_transform_names: List[str] = list(
            _C.DATA.IMAGE_TRANSFORM_TRAIN
            if "train" in split
            else _C.DATA.IMAGE_TRANSFORM_VAL
        )
        # Create a list of image transformations based on names.
        image_transform_list: List[Callable] = []

        for name in image_transform_names:
            # Pass dimensions for resize/crop, else rely on the defaults.
            if name in {"random_resized_crop", "center_crop", "global_resize"}:
                transform = ImageTransformsFactory.create(name, 224)
            elif name in {"smallest_resize"}:
                transform = ImageTransformsFactory.create(name, 256)
            else:
                transform = ImageTransformsFactory.create(name)

            image_transform_list.append(transform)

        kwargs["image_transform"] = alb.Compose(image_transform_list)

        return cls.create(_C.DATA.ROOT, **kwargs)

Source File: pascal_voc.py From pytorch-segmentation with MIT License

4 votes

def __init__(self, base_dir='../data/pascal_voc_2012/VOCdevkit/VOC2012', split='train_aug',
                 affine_augmenter=None, image_augmenter=None, target_size=(512, 512),
                 net_type='unet', ignore_index=255, debug=False):
        self.debug = debug
        self.base_dir = Path(base_dir)
        assert net_type in ['unet', 'deeplab']
        self.net_type = net_type
        self.ignore_index = ignore_index
        self.split = split

        valid_ids = self.base_dir / 'ImageSets' / 'Segmentation' / 'val.txt'
        with open(valid_ids, 'r') as f:
            valid_ids = f.readlines()
        if self.split == 'valid':
            lbl_dir = 'SegmentationClass'
            img_ids = valid_ids
        else:
            valid_set = set([valid_id.strip() for valid_id in valid_ids])
            lbl_dir = 'SegmentationClassAug' if 'aug' in split else 'SegmentationClass'
            all_set = set([p.name[:-4] for p in self.base_dir.joinpath(lbl_dir).iterdir()])
            img_ids = list(all_set - valid_set)
        self.img_paths = [(self.base_dir / 'JPEGImages' / f'{img_id.strip()}.jpg') for img_id in img_ids]
        self.lbl_paths = [(self.base_dir / lbl_dir / f'{img_id.strip()}.png') for img_id in img_ids]

        # Resize
        if isinstance(target_size, str):
            target_size = eval(target_size)
        if 'train' in self.split:
            if self.net_type == 'deeplab':
                target_size = (target_size[0] + 1, target_size[1] + 1)
            self.resizer = albu.Compose([albu.RandomScale(scale_limit=(-0.5, 0.5), p=1.0),
                                         PadIfNeededRightBottom(min_height=target_size[0], min_width=target_size[1],
                                                                value=0, ignore_index=self.ignore_index, p=1.0),
                                         albu.RandomCrop(height=target_size[0], width=target_size[1], p=1.0)])
        else:
            # self.resizer = None
            self.resizer = albu.Compose([PadIfNeededRightBottom(min_height=target_size[0], min_width=target_size[1],
                                                                value=0, ignore_index=self.ignore_index, p=1.0),
                                         albu.Crop(x_min=0, x_max=target_size[1],
                                                   y_min=0, y_max=target_size[0])])

        # Augment
        if 'train' in self.split:
            self.affine_augmenter = affine_augmenter
            self.image_augmenter = image_augmenter
        else:
            self.affine_augmenter = None
            self.image_augmenter = None

Source File: apolloscape.py From pytorch-segmentation with MIT License

4 votes

def __init__(self,
                 base_dir='../../data/apolloscape',
                 road_record_list=[{'road':'road02_seg','record':[22, 23, 24, 25, 26]}, {'road':'road03_seg', 'record':[7, 8, 9, 10, 11, 12]}],
                 split='train',
                 ignore_index=255,
                 debug=False):
        self.debug = debug
        self.base_dir = Path(base_dir)
        self.ignore_index = ignore_index
        self.split = split
        self.img_paths = []
        self.lbl_paths = []

        for road_record in road_record_list:
          self.road_dir = self.base_dir / Path(road_record['road'])
          self.record_list = road_record['record']

          for record in self.record_list:
            img_paths_tmp = self.road_dir.glob(f'ColorImage/Record{record:03}/Camera 5/*.jpg')
            lbl_paths_tmp = self.road_dir.glob(f'Label/Record{record:03}/Camera 5/*.png')

            img_paths_basenames = {Path(img_path.name).stem for img_path in img_paths_tmp}
            lbl_paths_basenames = {Path(lbl_path.name).stem.replace('_bin', '') for lbl_path in lbl_paths_tmp}

            intersection_basenames = img_paths_basenames & lbl_paths_basenames

            img_paths_intersection = [self.road_dir / Path(f'ColorImage/Record{record:03}/Camera 5/{intersection_basename}.jpg')
                                      for intersection_basename in intersection_basenames]
            lbl_paths_intersection = [self.road_dir / Path(f'Label/Record{record:03}/Camera 5/{intersection_basename}_bin.png')
                                      for intersection_basename in intersection_basenames]

            self.img_paths += img_paths_intersection
            self.lbl_paths += lbl_paths_intersection

        self.img_paths.sort()
        self.lbl_paths.sort()
        print(len(self.img_paths), len(self.lbl_paths))
        assert len(self.img_paths) == len(self.lbl_paths)

        self.resizer = albu.Resize(height=512, width=1024)
        self.augmenter = albu.Compose([albu.HorizontalFlip(p=0.5),
                                       # albu.RandomRotate90(p=0.5),
                                       albu.Rotate(limit=10, p=0.5),
                                       # albu.CLAHE(p=0.2),
                                       # albu.RandomContrast(p=0.2),
                                       # albu.RandomBrightness(p=0.2),
                                       # albu.RandomGamma(p=0.2),
                                       # albu.GaussNoise(p=0.2),
                                       # albu.Cutout(p=0.2)
                                       ])
        self.img_transformer = transforms.Compose([transforms.ToTensor(),
                                                   transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                                                        std=[0.229, 0.224, 0.225])])
        self.lbl_transformer = torch.LongTensor

Source File: transforms.py From ttfnet with Apache License 2.0

4 votes

def __init__(self,
                 transforms,
                 bbox_params=None,
                 keymap=None,
                 update_pad_shape=False,
                 skip_img_without_anno=False):
        """
        Adds custom transformations from Albumentations lib.
        Please, visit `https://albumentations.readthedocs.io`
        to get more information.

        transforms (list): list of albu transformations
        bbox_params (dict): bbox_params for albumentation `Compose`
        keymap (dict): contains {'input key':'albumentation-style key'}
        skip_img_without_anno (bool): whether to skip the image
                                      if no ann left after aug
        """

        self.transforms = transforms
        self.filter_lost_elements = False
        self.update_pad_shape = update_pad_shape
        self.skip_img_without_anno = skip_img_without_anno

        # A simple workaround to remove masks without boxes
        if (isinstance(bbox_params, dict) and 'label_fields' in bbox_params
                and 'filter_lost_elements' in bbox_params):
            self.filter_lost_elements = True
            self.origin_label_fields = bbox_params['label_fields']
            bbox_params['label_fields'] = ['idx_mapper']
            del bbox_params['filter_lost_elements']

        self.bbox_params = (
            self.albu_builder(bbox_params) if bbox_params else None)
        self.aug = Compose([self.albu_builder(t) for t in self.transforms],
                           bbox_params=self.bbox_params)

        if not keymap:
            self.keymap_to_albu = {
                'img': 'image',
                'gt_masks': 'masks',
                'gt_bboxes': 'bboxes'
            }
        else:
            self.keymap_to_albu = keymap
        self.keymap_back = {v: k for k, v in self.keymap_to_albu.items()}

Source File: factories.py From virtex with MIT License

4 votes

def from_config(cls, config: Config, split: str = "train"):
        r"""
        Create a dataset directly from config. Names in this factory match with
        names in :class:`PretrainingModelFactory` because both use same config
        parameter ``MODEL.NAME`` to create objects.

        Parameters
        ----------
        config: virtex.config.Config
            Config object with all the parameters.
        split: str, optional (default = "train")
            Which split to load for the dataset. One of ``{"train", "val"}``.
        """

        _C = config
        # Every dataset needs these two args.
        kwargs = {"data_root": _C.DATA.ROOT, "split": split}

        # Create a list of image transformations based on transform names.
        image_transform_list: List[Callable] = []

        for name in getattr(_C.DATA, f"IMAGE_TRANSFORM_{split.upper()}"):
            # Pass dimensions if cropping / resizing, else rely on the defaults
            # as per `ImageTransformsFactory`.
            if "resize" in name or "crop" in name:
                image_transform_list.append(
                    ImageTransformsFactory.create(name, _C.DATA.IMAGE_CROP_SIZE)
                )
            else:
                image_transform_list.append(ImageTransformsFactory.create(name))

        kwargs["image_transform"] = alb.Compose(image_transform_list)

        # Add dataset specific kwargs.
        if _C.MODEL.NAME != "multilabel_classification":
            tokenizer = TokenizerFactory.from_config(_C)
            kwargs.update(
                tokenizer=tokenizer,
                max_caption_length=_C.DATA.MAX_CAPTION_LENGTH,
                use_single_caption=_C.DATA.USE_SINGLE_CAPTION,
                percentage=_C.DATA.USE_PERCENTAGE if split == "train" else 100.0,
            )

        # Dataset names match with model names (and ofcourse pretext names).
        return cls.create(_C.MODEL.NAME, **kwargs)

Source File: dataset.py From kaggle-kuzushiji-2019 with MIT License

4 votes

def get_transform(
        *,
        train: bool,
        test_height: int,
        crop_width: int,
        crop_height: int,
        scale_aug: float,
        color_hue_aug: int,
        color_sat_aug: int,
        color_val_aug: int,
        normalize: bool = True,
        ) -> Callable:
    train_initial_size = 3072  # this value should not matter any more?
    crop_ratio = crop_height / test_height
    crop_min_max_height = tuple(
        int(train_initial_size * crop_ratio * (1 + sign * scale_aug))
        for sign in [-1, 1])
    if train:
        transforms = [
            LongestMaxSizeRandomSizedCrop(
                max_size=train_initial_size,
                min_max_height=crop_min_max_height,
                width=crop_width,
                height=crop_height,
                w2h_ratio=crop_width / crop_height,
            ),
            A.HueSaturationValue(
                hue_shift_limit=color_hue_aug,
                sat_shift_limit=color_sat_aug,
                val_shift_limit=color_val_aug,
            ),
            A.RandomBrightnessContrast(),
            A.RandomGamma(),
        ]
    else:
        transforms = [
            A.LongestMaxSize(max_size=test_height),
        ]
    if normalize:
        transforms.append(A.Normalize())
    transforms.extend([
        ToTensor(),
    ])
    return A.Compose(
        transforms,
        bbox_params={
            'format': 'coco',
            'min_area': 0,
            'min_visibility': 0.99,
            'label_fields': ['labels'],
        },
    )

Source File: test.py From seismic-deeplearning with MIT License

4 votes

def test(*options, cfg=None, debug=False):
    update_config(config, options=options, config_file=cfg)
    n_classes = config.DATASET.NUM_CLASSES

    # Start logging
    load_log_configuration(config.LOG_CONFIG)
    logger = logging.getLogger(__name__)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    log_dir, _ = os.path.split(config.TEST.MODEL_PATH)

    # load model:
    model = getattr(models, config.MODEL.NAME).get_seg_model(config)
    model.load_state_dict(torch.load(config.TEST.MODEL_PATH), strict=False)
    model = model.to(device)  # Send to GPU if available

    running_metrics_overall = runningScore(n_classes)

    # Augmentation
    section_aug = Compose([Normalize(mean=(config.TRAIN.MEAN,), std=(config.TRAIN.STD,), max_pixel_value=1,)])

    splits = ["test1", "test2"] if "Both" in config.TEST.SPLIT else [config.TEST.SPLIT]

    for sdx, split in enumerate(splits):
        labels = np.load(path.join(config.DATASET.ROOT, "test_once", split + "_labels.npy"))
        section_file = path.join(config.DATASET.ROOT, "splits", "section_" + split + ".txt")
        _write_section_file(labels, section_file)
        _evaluate_split(split, section_aug, model, device, running_metrics_overall, config, debug=debug)

    # FINAL TEST RESULTS:
    score, class_iou = running_metrics_overall.get_scores()

    logger.info("--------------- FINAL RESULTS -----------------")
    logger.info(f'Pixel Acc: {score["Pixel Acc: "]:.3f}')
    for cdx, class_name in enumerate(_CLASS_NAMES):
        logger.info(f'     {class_name}_accuracy {score["Class Accuracy: "][cdx]:.3f}')
    logger.info(f'Mean Class Acc: {score["Mean Class Acc: "]:.3f}')
    logger.info(f'Freq Weighted IoU: {score["Freq Weighted IoU: "]:.3f}')
    logger.info(f'Mean IoU: {score["Mean IoU: "]:0.3f}')

    # Save confusion matrix:
    confusion = score["confusion_matrix"]
    np.savetxt(path.join(log_dir, "confusion.csv"), confusion, delimiter=" ")

Source File: augmentation.py From EfficientDet.Pytorch with MIT License

4 votes

def get_augumentation(phase, width=512, height=512, min_area=0., min_visibility=0.):
    list_transforms = []
    if phase == 'train':
        list_transforms.extend([
            albu.augmentations.transforms.LongestMaxSize(
                max_size=width, always_apply=True),
            albu.PadIfNeeded(min_height=height, min_width=width,
                             always_apply=True, border_mode=0, value=[0, 0, 0]),
            albu.augmentations.transforms.RandomResizedCrop(
                height=height,
                width=width, p=0.3),
            albu.augmentations.transforms.Flip(),
            albu.augmentations.transforms.Transpose(),
            albu.OneOf([
                albu.RandomBrightnessContrast(brightness_limit=0.5,
                                              contrast_limit=0.4),
                albu.RandomGamma(gamma_limit=(50, 150)),
                albu.NoOp()
            ]),
            albu.OneOf([
                albu.RGBShift(r_shift_limit=20, b_shift_limit=15,
                              g_shift_limit=15),
                albu.HueSaturationValue(hue_shift_limit=5,
                                        sat_shift_limit=5),
                albu.NoOp()
            ]),
            albu.CLAHE(p=0.8),
            albu.HorizontalFlip(p=0.5),
            albu.VerticalFlip(p=0.5),
        ])
    if(phase == 'test' or phase == 'valid'):
        list_transforms.extend([
            albu.Resize(height=height, width=width)
        ])
    list_transforms.extend([
        albu.Normalize(mean=(0.485, 0.456, 0.406),
                       std=(0.229, 0.224, 0.225), p=1),
        ToTensor()
    ])
    if(phase == 'test'):
        return albu.Compose(list_transforms)
    return albu.Compose(list_transforms, bbox_params=albu.BboxParams(format='pascal_voc', min_area=min_area,
                                                                     min_visibility=min_visibility, label_fields=['category_id']))

Python albumentations.Compose() Examples