Python utils.generate_pyramid_anchors() Examples
The following are 16
code examples of utils.generate_pyramid_anchors().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
utils
, or try the search function
.
Example #1
Source File: model.py From raster-deep-learning with Apache License 2.0 | 6 votes |
def get_anchors(self, image_shape): """Returns anchor pyramid for the given image size.""" backbone_shapes = compute_backbone_shapes(self.config, image_shape) # Cache anchors and reuse if image shape is the same if not hasattr(self, "_anchor_cache"): self._anchor_cache = {} if not tuple(image_shape) in self._anchor_cache: # Generate Anchors a = utils.generate_pyramid_anchors( self.config.RPN_ANCHOR_SCALES, self.config.RPN_ANCHOR_RATIOS, backbone_shapes, self.config.BACKBONE_STRIDES, self.config.RPN_ANCHOR_STRIDE) # Keep a copy of the latest anchors in pixel coordinates because # it's used in inspect_model notebooks. # TODO: Remove this after the notebook are refactored to not use it self.anchors = a # Normalize coordinates self._anchor_cache[tuple(image_shape)] = utils.norm_boxes(a, image_shape[:2]) return self._anchor_cache[tuple(image_shape)]
Example #2
Source File: model.py From i.ann.maskrcnn with GNU General Public License v2.0 | 6 votes |
def get_anchors(self, image_shape): """Returns anchor pyramid for the given image size.""" backbone_shapes = compute_backbone_shapes(self.config, image_shape) # Cache anchors and reuse if image shape is the same if not hasattr(self, "_anchor_cache"): self._anchor_cache = {} if not tuple(image_shape) in self._anchor_cache: # Generate Anchors a = utils.generate_pyramid_anchors( self.config.RPN_ANCHOR_SCALES, self.config.RPN_ANCHOR_RATIOS, backbone_shapes, self.config.BACKBONE_STRIDES, self.config.RPN_ANCHOR_STRIDE) # Keep a copy of the latest anchors in pixel coordinates because # it's used in inspect_model notebooks. # TODO: Remove this after the notebook are refactored to not use it self.anchors = a # Normalize coordinates self._anchor_cache[tuple(image_shape)] = utils.norm_boxes(a, image_shape[:2]) return self._anchor_cache[tuple(image_shape)]
Example #3
Source File: model.py From CFUN with MIT License | 5 votes |
def __init__(self, dataset, config, augmentation=False): """A data_generator that returns the image, mask and other ground truth for training.""" self.image_ids = np.copy(dataset.image_ids) self.dataset = dataset self.config = config self.augmentation = augmentation # Anchors # [anchor_count, (z1, y1, x1, z2, y2, x2)] self.anchors = utils.generate_pyramid_anchors(config.RPN_ANCHOR_SCALES, config.RPN_ANCHOR_RATIOS, compute_backbone_shapes(config, config.IMAGE_SHAPE), config.BACKBONE_STRIDES, config.RPN_ANCHOR_STRIDE)
Example #4
Source File: model.py From decentralized_AI with GNU General Public License v2.0 | 4 votes |
def __init__(self, dataset, config, augment=True): """A generator that returns images and corresponding target class ids, bounding box deltas, and masks. dataset: The Dataset object to pick data from config: The model config object shuffle: If True, shuffles the samples before every epoch augment: If True, applies image augmentation to images (currently only horizontal flips are supported) Returns a Python generator. Upon calling next() on it, the generator returns two lists, inputs and outputs. The containtes of the lists differs depending on the received arguments: inputs list: - images: [batch, H, W, C] - image_metas: [batch, size of image meta] - rpn_match: [batch, N] Integer (1=positive anchor, -1=negative, 0=neutral) - rpn_bbox: [batch, N, (dy, dx, log(dh), log(dw))] Anchor bbox deltas. - gt_class_ids: [batch, MAX_GT_INSTANCES] Integer class IDs - gt_boxes: [batch, MAX_GT_INSTANCES, (y1, x1, y2, x2)] - gt_masks: [batch, height, width, MAX_GT_INSTANCES]. The height and width are those of the image unless use_mini_mask is True, in which case they are defined in MINI_MASK_SHAPE. outputs list: Usually empty in regular training. But if detection_targets is True then the outputs list contains target class_ids, bbox deltas, and masks. """ self.b = 0 # batch item index self.image_index = -1 self.image_ids = np.copy(dataset.image_ids) self.error_count = 0 self.dataset = dataset self.config = config self.augment = augment # Anchors # [anchor_count, (y1, x1, y2, x2)] self.anchors = utils.generate_pyramid_anchors(config.RPN_ANCHOR_SCALES, config.RPN_ANCHOR_RATIOS, config.BACKBONE_SHAPES, config.BACKBONE_STRIDES, config.RPN_ANCHOR_STRIDE)
Example #5
Source File: model.py From decentralized_AI with GNU General Public License v2.0 | 4 votes |
def build(self, config): """Build Mask R-CNN architecture. """ # Image size must be dividable by 2 multiple times h, w = config.IMAGE_SHAPE[:2] if h / 2**6 != int(h / 2**6) or w / 2**6 != int(w / 2**6): raise Exception("Image size must be dividable by 2 at least 6 times " "to avoid fractions when downscaling and upscaling." "For example, use 256, 320, 384, 448, 512, ... etc. ") # Build the shared convolutional layers. # Bottom-up Layers # Returns a list of the last layers of each stage, 5 in total. # Don't create the thead (stage 5), so we pick the 4th item in the list. resnet = ResNet("resnet101", stage5=True) C1, C2, C3, C4, C5 = resnet.stages() # Top-down Layers # TODO: add assert to varify feature map sizes match what's in config self.fpn = FPN(C1, C2, C3, C4, C5, out_channels=256) # Generate Anchors self.anchors = Variable(torch.from_numpy(utils.generate_pyramid_anchors(config.RPN_ANCHOR_SCALES, config.RPN_ANCHOR_RATIOS, config.BACKBONE_SHAPES, config.BACKBONE_STRIDES, config.RPN_ANCHOR_STRIDE)).float(), requires_grad=False) if self.config.GPU_COUNT: self.anchors = self.anchors.cuda() # RPN self.rpn = RPN(len(config.RPN_ANCHOR_RATIOS), config.RPN_ANCHOR_STRIDE, 256) # FPN Classifier self.classifier = Classifier(256, config.POOL_SIZE, config.IMAGE_SHAPE, config.NUM_CLASSES) # FPN Mask self.mask = Mask(256, config.MASK_POOL_SIZE, config.IMAGE_SHAPE, config.NUM_CLASSES) # Fix batch norm layers def set_bn_fix(m): classname = m.__class__.__name__ if classname.find('BatchNorm') != -1: for p in m.parameters(): p.requires_grad = False self.apply(set_bn_fix)
Example #6
Source File: model.py From CFUN with MIT License | 4 votes |
def build(self, config, test_flag=False): """Build 3D-Mask-RCNN architecture.""" # Image size must be dividable by 2 multiple times h, w, d = config.IMAGE_SHAPE[:3] if h / 16 != int(h / 16) or w / 16 != int(w / 16) or d / 16 != int(d / 16): raise Exception("Image size must be dividable by 16. Use 256, 320, 512, ... etc.") # Build the shared convolutional layers. # Returns a list of the last layers of each stage, 3 in total. if self.config.BACKBONE == "P3D19": print("using P3D19 as backbone") P3D_Resnet = backbone.P3D19(config=config) else: print("using P3D35 as backbone") P3D_Resnet = backbone.P3D35(config=config) C1, C2, C3 = P3D_Resnet.stages() # Top-down Layers self.fpn = FPN(C1, C2, C3, out_channels=config.TOP_DOWN_PYRAMID_SIZE, config=config) # Generate Anchors self.anchors = Variable(torch.from_numpy(utils.generate_pyramid_anchors(config.RPN_ANCHOR_SCALES, config.RPN_ANCHOR_RATIOS, compute_backbone_shapes( config, config.IMAGE_SHAPE), config.BACKBONE_STRIDES, config.RPN_ANCHOR_STRIDE)).float(), requires_grad=False) if self.config.GPU_COUNT: self.anchors = self.anchors.cuda() # RPN self.rpn = RPN(len(config.RPN_ANCHOR_RATIOS), config.RPN_ANCHOR_STRIDE, config.TOP_DOWN_PYRAMID_SIZE, config.RPN_CONV_CHANNELS) if self.config.STAGE != 'beginning': for p in self.parameters(): p.requires_grad = False # FPN Classifier self.classifier = Classifier(config.TOP_DOWN_PYRAMID_SIZE, config.POOL_SIZE, config.IMAGE_SHAPE, 2, config.FPN_CLASSIFY_FC_LAYERS_SIZE, test_flag) # FPN Mask self.mask = Mask(1, config.MASK_POOL_SIZE, config.NUM_CLASSES, config.UNET_MASK_BRANCH_CHANNEL, self.config.STAGE, test_flag) if test_flag: for p in self.parameters(): p.requires_grad = False # Fix batch norm layers def set_bn_fix(m): classname = m.__class__.__name__ if classname.find('BatchNorm') != -1: for p in m.parameters(): p.requires_grad = False if not config.TRAIN_BN: self.apply(set_bn_fix)
Example #7
Source File: model.py From CFUN with MIT License | 4 votes |
def build(self, config, test_flag=False): """Build 3D-Mask-RCNN architecture.""" # Image size must be dividable by 2 multiple times h, w, d = config.IMAGE_SHAPE[:3] if h / 16 != int(h / 16) or w / 16 != int(w / 16) or d / 16 != int(d / 16): raise Exception("Image size must be dividable by 16. Use 256, 320, 512, ... etc.") # Build the shared convolutional layers. # Returns a list of the last layers of each stage, 5 in total. P3D_Resnet = backbone.P3D19(config=config) C1, C2, C3 = P3D_Resnet.stages() # Top-down Layers self.fpn = FPN(C1, C2, C3, out_channels=config.TOP_DOWN_PYRAMID_SIZE, config=config) # Generate Anchors self.anchors = Variable(torch.from_numpy(utils.generate_pyramid_anchors(config.RPN_ANCHOR_SCALES, config.RPN_ANCHOR_RATIOS, compute_backbone_shapes( config, config.IMAGE_SHAPE), config.BACKBONE_STRIDES, config.RPN_ANCHOR_STRIDE)).float(), requires_grad=False) if self.config.GPU_COUNT: self.anchors = self.anchors.cuda() # RPN self.rpn = RPN(len(config.RPN_ANCHOR_RATIOS), config.RPN_ANCHOR_STRIDE, config.TOP_DOWN_PYRAMID_SIZE, config.RPN_CONV_CHANNELS) # FPN Classifier self.classifier = Classifier(config.TOP_DOWN_PYRAMID_SIZE, config.POOL_SIZE, config.IMAGE_SHAPE, 2, config.FPN_CLASSIFY_FC_LAYERS_SIZE, test_flag) # FPN Mask self.mask = Mask(1, config.MASK_POOL_SIZE, config.NUM_CLASSES, config.UNET_MASK_BRANCH_CHANNEL, self.config.STAGE, test_flag) # Fix batch norm layers def set_bn_fix(m): classname = m.__class__.__name__ if classname.find('BatchNorm') != -1: for p in m.parameters(): p.requires_grad = False if not config.TRAIN_BN: self.apply(set_bn_fix)
Example #8
Source File: model.py From cvpr-2018-autonomous-driving-autopilot-solution with MIT License | 4 votes |
def __init__(self, dataset, config, augment=True, augmentation = None): """A generator that returns images and corresponding target class ids, bounding box deltas, and masks. dataset: The Dataset object to pick data from config: The model config object shuffle: If True, shuffles the samples before every epoch augment: If True, applies image augmentation to images (currently only horizontal flips are supported) Returns a Python generator. Upon calling next() on it, the generator returns two lists, inputs and outputs. The containtes of the lists differs depending on the received arguments: inputs list: - images: [batch, H, W, C] - image_metas: [batch, size of image meta] - rpn_match: [batch, N] Integer (1=positive anchor, -1=negative, 0=neutral) - rpn_bbox: [batch, N, (dy, dx, log(dh), log(dw))] Anchor bbox deltas. - gt_class_ids: [batch, MAX_GT_INSTANCES] Integer class IDs - gt_boxes: [batch, MAX_GT_INSTANCES, (y1, x1, y2, x2)] - gt_masks: [batch, height, width, MAX_GT_INSTANCES]. The height and width are those of the image unless use_mini_mask is True, in which case they are defined in MINI_MASK_SHAPE. outputs list: Usually empty in regular training. But if detection_targets is True then the outputs list contains target class_ids, bbox deltas, and masks. """ self.b = 0 # batch item index self.image_index = -1 self.image_ids = np.copy(dataset.image_ids) self.error_count = 0 self.dataset = dataset self.config = config self.augment = augment self.augmentation = augmentation # Anchors # [anchor_count, (y1, x1, y2, x2)] self.anchors = utils.generate_pyramid_anchors(config.RPN_ANCHOR_SCALES, config.RPN_ANCHOR_RATIOS, config.BACKBONE_SHAPES, config.BACKBONE_STRIDES, config.RPN_ANCHOR_STRIDE)
Example #9
Source File: model.py From cvpr-2018-autonomous-driving-autopilot-solution with MIT License | 4 votes |
def build(self, config): """Build Mask R-CNN architecture. """ # Image size must be dividable by 2 multiple times h, w = config.IMAGE_SHAPE[:2] if h / 2**6 != int(h / 2**6) or w / 2**6 != int(w / 2**6): raise Exception("Image size must be dividable by 2 at least 6 times " "to avoid fractions when downscaling and upscaling." "For example, use 256, 320, 384, 448, 512, ... etc. ") # Build the shared convolutional layers. # Bottom-up Layers # Returns a list of the last layers of each stage, 5 in total. # Don't create the thead (stage 5), so we pick the 4th item in the list. resnet = ResNet("resnet101", stage5=True) C1, C2, C3, C4, C5 = resnet.stages() # Top-down Layers # TODO: add assert to varify feature map sizes match what's in config self.fpn = FPN(C1, C2, C3, C4, C5, out_channels=256) # Generate Anchors self.anchors = Variable(torch.from_numpy(utils.generate_pyramid_anchors(config.RPN_ANCHOR_SCALES, config.RPN_ANCHOR_RATIOS, config.BACKBONE_SHAPES, config.BACKBONE_STRIDES, config.RPN_ANCHOR_STRIDE)).float(), requires_grad=False) if self.config.GPU_COUNT: self.anchors = self.anchors.cuda() # RPN self.rpn = RPN(len(config.RPN_ANCHOR_RATIOS), config.RPN_ANCHOR_STRIDE, 256) # FPN Classifier self.classifier = Classifier(256, config.POOL_SIZE, config.IMAGE_SHAPE, config.NUM_CLASSES) # FPN Mask self.mask = Mask(256, config.MASK_POOL_SIZE, config.IMAGE_SHAPE, config.NUM_CLASSES) # Fix batch norm layers def set_bn_fix(m): classname = m.__class__.__name__ if classname.find('BatchNorm') != -1: for p in m.parameters(): p.requires_grad = False self.apply(set_bn_fix)
Example #10
Source File: model_seresnext.py From cvpr-2018-autonomous-driving-autopilot-solution with MIT License | 4 votes |
def __init__(self, dataset, config, augment=True, augmentation = None): """A generator that returns images and corresponding target class ids, bounding box deltas, and masks. dataset: The Dataset object to pick data from config: The model config object shuffle: If True, shuffles the samples before every epoch augment: If True, applies image augmentation to images (currently only horizontal flips are supported) Returns a Python generator. Upon calling next() on it, the generator returns two lists, inputs and outputs. The containtes of the lists differs depending on the received arguments: inputs list: - images: [batch, H, W, C] - image_metas: [batch, size of image meta] - rpn_match: [batch, N] Integer (1=positive anchor, -1=negative, 0=neutral) - rpn_bbox: [batch, N, (dy, dx, log(dh), log(dw))] Anchor bbox deltas. - gt_class_ids: [batch, MAX_GT_INSTANCES] Integer class IDs - gt_boxes: [batch, MAX_GT_INSTANCES, (y1, x1, y2, x2)] - gt_masks: [batch, height, width, MAX_GT_INSTANCES]. The height and width are those of the image unless use_mini_mask is True, in which case they are defined in MINI_MASK_SHAPE. outputs list: Usually empty in regular training. But if detection_targets is True then the outputs list contains target class_ids, bbox deltas, and masks. """ self.b = 0 # batch item index self.image_index = -1 self.image_ids = np.copy(dataset.image_ids) self.error_count = 0 self.dataset = dataset self.config = config self.augment = augment self.augmentation = augmentation # Anchors # [anchor_count, (y1, x1, y2, x2)] self.anchors = utils.generate_pyramid_anchors(config.RPN_ANCHOR_SCALES, config.RPN_ANCHOR_RATIOS, config.BACKBONE_SHAPES, config.BACKBONE_STRIDES, config.RPN_ANCHOR_STRIDE)
Example #11
Source File: model_seresnext.py From cvpr-2018-autonomous-driving-autopilot-solution with MIT License | 4 votes |
def build(self, config): """Build Mask R-CNN architecture. """ # Image size must be dividable by 2 multiple times h, w = config.IMAGE_SHAPE[:2] if h / 2**6 != int(h / 2**6) or w / 2**6 != int(w / 2**6): raise Exception("Image size must be dividable by 2 at least 6 times " "to avoid fractions when downscaling and upscaling." "For example, use 256, 320, 384, 448, 512, ... etc. ") # Build the shared convolutional layers. # Bottom-up Layers # Returns a list of the last layers of each stage, 5 in total. # Don't create the thead (stage 5), so we pick the 4th item in the list. resnet = se_resnext101_32x4d(stage5=True) C1, C2, C3, C4, C5 = resnet.stages() # Top-down Layers # TODO: add assert to varify feature map sizes match what's in config self.fpn = FPN(C1, C2, C3, C4, C5, out_channels=256) # Generate Anchors self.anchors = Variable(torch.from_numpy(utils.generate_pyramid_anchors(config.RPN_ANCHOR_SCALES, config.RPN_ANCHOR_RATIOS, config.BACKBONE_SHAPES, config.BACKBONE_STRIDES, config.RPN_ANCHOR_STRIDE)).float(), requires_grad=False) if self.config.GPU_COUNT: self.anchors = self.anchors.cuda() # RPN self.rpn = RPN(len(config.RPN_ANCHOR_RATIOS), config.RPN_ANCHOR_STRIDE, 256) # FPN Classifier self.classifier = Classifier(256, config.POOL_SIZE, config.IMAGE_SHAPE, config.NUM_CLASSES) # FPN Mask self.mask = Mask(256, config.MASK_POOL_SIZE, config.IMAGE_SHAPE, config.NUM_CLASSES) # Fix batch norm layers def set_bn_fix(m): classname = m.__class__.__name__ if classname.find('BatchNorm') != -1: for p in m.parameters(): p.requires_grad = False self.apply(set_bn_fix)
Example #12
Source File: model_resnext_v2.py From cvpr-2018-autonomous-driving-autopilot-solution with MIT License | 4 votes |
def __init__(self, dataset, config, augment=True): """A generator that returns images and corresponding target class ids, bounding box deltas, and masks. dataset: The Dataset object to pick data from config: The model config object shuffle: If True, shuffles the samples before every epoch augment: If True, applies image augmentation to images (currently only horizontal flips are supported) Returns a Python generator. Upon calling next() on it, the generator returns two lists, inputs and outputs. The containtes of the lists differs depending on the received arguments: inputs list: - images: [batch, H, W, C] - image_metas: [batch, size of image meta] - rpn_match: [batch, N] Integer (1=positive anchor, -1=negative, 0=neutral) - rpn_bbox: [batch, N, (dy, dx, log(dh), log(dw))] Anchor bbox deltas. - gt_class_ids: [batch, MAX_GT_INSTANCES] Integer class IDs - gt_boxes: [batch, MAX_GT_INSTANCES, (y1, x1, y2, x2)] - gt_masks: [batch, height, width, MAX_GT_INSTANCES]. The height and width are those of the image unless use_mini_mask is True, in which case they are defined in MINI_MASK_SHAPE. outputs list: Usually empty in regular training. But if detection_targets is True then the outputs list contains target class_ids, bbox deltas, and masks. """ self.b = 0 # batch item index self.image_index = -1 self.image_ids = np.copy(dataset.image_ids) self.error_count = 0 self.dataset = dataset self.config = config self.augment = augment # Anchors # [anchor_count, (y1, x1, y2, x2)] self.anchors = utils.generate_pyramid_anchors(config.RPN_ANCHOR_SCALES, config.RPN_ANCHOR_RATIOS, config.BACKBONE_SHAPES, config.BACKBONE_STRIDES, config.RPN_ANCHOR_STRIDE)
Example #13
Source File: model_resnext.py From cvpr-2018-autonomous-driving-autopilot-solution with MIT License | 4 votes |
def __init__(self, dataset, config, augment=True): """A generator that returns images and corresponding target class ids, bounding box deltas, and masks. dataset: The Dataset object to pick data from config: The model config object shuffle: If True, shuffles the samples before every epoch augment: If True, applies image augmentation to images (currently only horizontal flips are supported) Returns a Python generator. Upon calling next() on it, the generator returns two lists, inputs and outputs. The containtes of the lists differs depending on the received arguments: inputs list: - images: [batch, H, W, C] - image_metas: [batch, size of image meta] - rpn_match: [batch, N] Integer (1=positive anchor, -1=negative, 0=neutral) - rpn_bbox: [batch, N, (dy, dx, log(dh), log(dw))] Anchor bbox deltas. - gt_class_ids: [batch, MAX_GT_INSTANCES] Integer class IDs - gt_boxes: [batch, MAX_GT_INSTANCES, (y1, x1, y2, x2)] - gt_masks: [batch, height, width, MAX_GT_INSTANCES]. The height and width are those of the image unless use_mini_mask is True, in which case they are defined in MINI_MASK_SHAPE. outputs list: Usually empty in regular training. But if detection_targets is True then the outputs list contains target class_ids, bbox deltas, and masks. """ self.b = 0 # batch item index self.image_index = -1 self.image_ids = np.copy(dataset.image_ids) self.error_count = 0 self.dataset = dataset self.config = config self.augment = augment # Anchors # [anchor_count, (y1, x1, y2, x2)] self.anchors = utils.generate_pyramid_anchors(config.RPN_ANCHOR_SCALES, config.RPN_ANCHOR_RATIOS, config.BACKBONE_SHAPES, config.BACKBONE_STRIDES, config.RPN_ANCHOR_STRIDE)
Example #14
Source File: model_resnext.py From cvpr-2018-autonomous-driving-autopilot-solution with MIT License | 4 votes |
def build(self, config): """Build Mask R-CNN architecture. """ # Image size must be dividable by 2 multiple times h, w = config.IMAGE_SHAPE[:2] if h / 2**6 != int(h / 2**6) or w / 2**6 != int(w / 2**6): raise Exception("Image size must be dividable by 2 at least 6 times " "to avoid fractions when downscaling and upscaling." "For example, use 256, 320, 384, 448, 512, ... etc. ") # Build the shared convolutional layers. # Bottom-up Layers # Returns a list of the last layers of each stage, 5 in total. # Don't create the thead (stage 5), so we pick the 4th item in the list. resnet = ResNext(stage5=True) C1, C2, C3, C4, C5 = resnet.stages() # Top-down Layers # TODO: add assert to varify feature map sizes match what's in config self.fpn = FPN(C1, C2, C3, C4, C5, out_channels=256) # Generate Anchors self.anchors = Variable(torch.from_numpy(utils.generate_pyramid_anchors(config.RPN_ANCHOR_SCALES, config.RPN_ANCHOR_RATIOS, config.BACKBONE_SHAPES, config.BACKBONE_STRIDES, config.RPN_ANCHOR_STRIDE)).float(), requires_grad=False) if self.config.GPU_COUNT: self.anchors = self.anchors.cuda() # RPN self.rpn = RPN(len(config.RPN_ANCHOR_RATIOS), config.RPN_ANCHOR_STRIDE, 256) # FPN Classifier self.classifier = Classifier(256, config.POOL_SIZE, config.IMAGE_SHAPE, config.NUM_CLASSES) # FPN Mask self.mask = Mask(256, config.MASK_POOL_SIZE, config.IMAGE_SHAPE, config.NUM_CLASSES) # Fix batch norm layers def set_bn_fix(m): classname = m.__class__.__name__ if classname.find('BatchNorm') != -1: for p in m.parameters(): p.requires_grad = False self.apply(set_bn_fix)
Example #15
Source File: model_resnet_v2.py From cvpr-2018-autonomous-driving-autopilot-solution with MIT License | 4 votes |
def __init__(self, dataset, config, augment=True, augmentation = None): """A generator that returns images and corresponding target class ids, bounding box deltas, and masks. dataset: The Dataset object to pick data from config: The model config object shuffle: If True, shuffles the samples before every epoch augment: If True, applies image augmentation to images (currently only horizontal flips are supported) Returns a Python generator. Upon calling next() on it, the generator returns two lists, inputs and outputs. The containtes of the lists differs depending on the received arguments: inputs list: - images: [batch, H, W, C] - image_metas: [batch, size of image meta] - rpn_match: [batch, N] Integer (1=positive anchor, -1=negative, 0=neutral) - rpn_bbox: [batch, N, (dy, dx, log(dh), log(dw))] Anchor bbox deltas. - gt_class_ids: [batch, MAX_GT_INSTANCES] Integer class IDs - gt_boxes: [batch, MAX_GT_INSTANCES, (y1, x1, y2, x2)] - gt_masks: [batch, height, width, MAX_GT_INSTANCES]. The height and width are those of the image unless use_mini_mask is True, in which case they are defined in MINI_MASK_SHAPE. outputs list: Usually empty in regular training. But if detection_targets is True then the outputs list contains target class_ids, bbox deltas, and masks. """ self.b = 0 # batch item index self.image_index = -1 self.image_ids = np.copy(dataset.image_ids) self.error_count = 0 self.dataset = dataset self.config = config self.augment = augment self.augmentation = augmentation # Anchors # [anchor_count, (y1, x1, y2, x2)] self.anchors = utils.generate_pyramid_anchors(config.RPN_ANCHOR_SCALES, config.RPN_ANCHOR_RATIOS, config.BACKBONE_SHAPES, config.BACKBONE_STRIDES, config.RPN_ANCHOR_STRIDE)
Example #16
Source File: model_resnet_v2.py From cvpr-2018-autonomous-driving-autopilot-solution with MIT License | 4 votes |
def build(self, config): """Build Mask R-CNN architecture. """ # Image size must be dividable by 2 multiple times h, w = config.IMAGE_SHAPE[:2] if h / 2**6 != int(h / 2**6) or w / 2**6 != int(w / 2**6): raise Exception("Image size must be dividable by 2 at least 6 times " "to avoid fractions when downscaling and upscaling." "For example, use 256, 320, 384, 448, 512, ... etc. ") # Build the shared convolutional layers. # Bottom-up Layers # Returns a list of the last layers of each stage, 5 in total. # Don't create the thead (stage 5), so we pick the 4th item in the list. resnet = ResNet("resnet101", stage5=True) C1, C2, C3, C4, C5 = resnet.stages() # Top-down Layers # TODO: add assert to varify feature map sizes match what's in config self.fpn = FPN(C1, C2, C3, C4, C5, out_channels=256) # Generate Anchors self.anchors = Variable(torch.from_numpy(utils.generate_pyramid_anchors(config.RPN_ANCHOR_SCALES, config.RPN_ANCHOR_RATIOS, config.BACKBONE_SHAPES, config.BACKBONE_STRIDES, config.RPN_ANCHOR_STRIDE)).float(), requires_grad=False) if self.config.GPU_COUNT: self.anchors = self.anchors.cuda() # RPN self.rpn = RPN(len(config.RPN_ANCHOR_RATIOS), config.RPN_ANCHOR_STRIDE, 256) # FPN Classifier self.classifier = Classifier(256, config.POOL_SIZE, config.IMAGE_SHAPE, config.NUM_CLASSES) # FPN Mask self.mask = Mask(256, config.MASK_POOL_SIZE, config.IMAGE_SHAPE, config.NUM_CLASSES) # Fix batch norm layers def set_bn_fix(m): classname = m.__class__.__name__ if classname.find('BatchNorm') != -1: for p in m.parameters(): p.requires_grad = False self.apply(set_bn_fix)