Python tensorflow.contrib.layers.python.layers.utils.convert_collection_to_dict() Examples
The following are 30
code examples of tensorflow.contrib.layers.python.layers.utils.convert_collection_to_dict().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
tensorflow.contrib.layers.python.layers.utils
, or try the search function
.
Example #1
Source File: nets.py From DeepMatchVO with MIT License | 5 votes |
def pose_net(tgt_image, src_image_stack, is_training=True): inputs = tf.concat([tgt_image, src_image_stack], axis=3) num_source = int(src_image_stack.get_shape()[3].value//3) with tf.variable_scope('pose_exp_net') as sc: end_points_collection = sc.original_name_scope + '_end_points' with slim.arg_scope([slim.conv2d, slim.conv2d_transpose], normalizer_fn=None, weights_regularizer=slim.l2_regularizer(1e-4), activation_fn=tf.nn.relu, outputs_collections=end_points_collection): # cnv1 to cnv5b are shared between pose and explainability prediction cnv1 = slim.conv2d(inputs, 16, [7, 7], stride=2, scope='cnv1') cnv2 = slim.conv2d(cnv1, 32, [5, 5], stride=2, scope='cnv2') cnv3 = slim.conv2d(cnv2, 64, [3, 3], stride=2, scope='cnv3') cnv4 = slim.conv2d(cnv3, 128, [3, 3], stride=2, scope='cnv4') cnv5 = slim.conv2d(cnv4, 256, [3, 3], stride=2, scope='cnv5') # Pose specific layers with tf.variable_scope('pose'): cnv6 = slim.conv2d(cnv5, 256, [3, 3], stride=2, scope='cnv6') cnv7 = slim.conv2d(cnv6, 256, [3, 3], stride=2, scope='cnv7') pose_pred = slim.conv2d(cnv7, 6*num_source, [1, 1], scope='pred', stride=1, normalizer_fn=None, activation_fn=None) pose_avg = tf.reduce_mean(pose_pred, [1, 2]) # Empirically we found that scaling by a small constant facilitates training. pose_final = 0.01 * tf.reshape(pose_avg, [-1, num_source, 6]) # Exp mask specific layers end_points = utils.convert_collection_to_dict(end_points_collection) return pose_final, end_points # Adapt from https://github.com/yzcjtr/GeoNet/blob/master/geonet_nets.py
Example #2
Source File: slim_resnet_utils.py From X-Detector with Apache License 2.0 | 5 votes |
def resnet_v1_backbone(inputs, blocks, is_training=True, output_stride=None, include_root_block=True, reuse=None, scope=None): with variable_scope.variable_scope( scope, 'resnet_v1', [inputs], reuse=reuse) as sc: end_points_collection = sc.original_name_scope + '_end_points' with arg_scope( [layers.conv2d, bottleneck, resnet_utils.stack_blocks_dense], outputs_collections=end_points_collection): with arg_scope([layers.batch_norm], is_training=is_training): net = inputs if include_root_block: if output_stride is not None: if output_stride % 4 != 0: raise ValueError('The output_stride needs to be a multiple of 4.') output_stride /= 4 net = resnet_utils.conv2d_same(net, 64, 7, stride=2, scope='conv1') net = layers_lib.max_pool2d(net, [3, 3], stride=2, scope='pool1') net = resnet_utils.stack_blocks_dense(net, blocks, output_stride) # Convert end_points_collection into a dictionary of end_points. end_points = utils.convert_collection_to_dict(end_points_collection) return net, end_points
Example #3
Source File: resnet_v1_test.py From keras-lambda with MIT License | 5 votes |
def _resnet_plain(self, inputs, blocks, output_stride=None, scope=None): """A plain ResNet without extra layers before or after the ResNet blocks.""" with variable_scope.variable_scope(scope, values=[inputs]): with arg_scope([layers.conv2d], outputs_collections='end_points'): net = resnet_utils.stack_blocks_dense(inputs, blocks, output_stride) end_points = utils.convert_collection_to_dict('end_points') return net, end_points
Example #4
Source File: resnet_v2_test.py From keras-lambda with MIT License | 5 votes |
def _resnet_plain(self, inputs, blocks, output_stride=None, scope=None): """A plain ResNet without extra layers before or after the ResNet blocks.""" with variable_scope.variable_scope(scope, values=[inputs]): with arg_scope([layers.conv2d], outputs_collections='end_points'): net = resnet_utils.stack_blocks_dense(inputs, blocks, output_stride) end_points = utils.convert_collection_to_dict('end_points') return net, end_points
Example #5
Source File: nets.py From DF-Net with MIT License | 5 votes |
def pose_net_fb(tgt_image, src_image_stack, is_training=True, reuse=False): inputs = tf.concat([tgt_image, src_image_stack], axis=3) H = inputs.get_shape()[1].value W = inputs.get_shape()[2].value num_source = int(src_image_stack.get_shape()[3].value//3) with tf.variable_scope('pose_net') as sc: if reuse: sc.reuse_variables() end_points_collection = sc.original_name_scope + '_end_points' with slim.arg_scope([slim.conv2d, slim.conv2d_transpose], normalizer_fn=None, weights_regularizer=slim.l2_regularizer(0.05), activation_fn=tf.nn.relu, outputs_collections=end_points_collection): # cnv1 to cnv5b are shared between pose and explainability prediction cnv1 = slim.conv2d(inputs,16, [7, 7], stride=2, scope='cnv1') cnv2 = slim.conv2d(cnv1, 32, [5, 5], stride=2, scope='cnv2') cnv3 = slim.conv2d(cnv2, 64, [3, 3], stride=2, scope='cnv3') cnv4 = slim.conv2d(cnv3, 128, [3, 3], stride=2, scope='cnv4') cnv5 = slim.conv2d(cnv4, 256, [3, 3], stride=2, scope='cnv5') cnv6 = slim.conv2d(cnv5, 256, [3, 3], stride=2, scope='cnv6') cnv7 = slim.conv2d(cnv6, 256, [3, 3], stride=2, scope='cnv7') # Double the number of channels pose_pred = slim.conv2d(cnv7, 6*num_source*2, [1, 1], scope='pred', stride=1, normalizer_fn=None, activation_fn=None) pose_avg = tf.reduce_mean(pose_pred, [1, 2]) # Empirically we found that scaling by a small constant # facilitates training. # 1st half: target->source, 2nd half: source->target pose_final = 0.01 * tf.reshape(pose_avg, [-1, num_source, 6*2]) end_points = utils.convert_collection_to_dict(end_points_collection) return pose_final, end_points # helper functions # Credit: https://github.com/mrharicot/monodepth/blob/master/monodepth_model.py
Example #6
Source File: resnet_v1_test.py From auto-alt-text-lambda-api with MIT License | 5 votes |
def _resnet_plain(self, inputs, blocks, output_stride=None, scope=None): """A plain ResNet without extra layers before or after the ResNet blocks.""" with variable_scope.variable_scope(scope, values=[inputs]): with arg_scope([layers.conv2d], outputs_collections='end_points'): net = resnet_utils.stack_blocks_dense(inputs, blocks, output_stride) end_points = utils.convert_collection_to_dict('end_points') return net, end_points
Example #7
Source File: resnet_v2_test.py From auto-alt-text-lambda-api with MIT License | 5 votes |
def _resnet_plain(self, inputs, blocks, output_stride=None, scope=None): """A plain ResNet without extra layers before or after the ResNet blocks.""" with variable_scope.variable_scope(scope, values=[inputs]): with arg_scope([layers.conv2d], outputs_collections='end_points'): net = resnet_utils.stack_blocks_dense(inputs, blocks, output_stride) end_points = utils.convert_collection_to_dict('end_points') return net, end_points
Example #8
Source File: nets.py From layered-scene-inference with Apache License 2.0 | 4 votes |
def decoder_simple(feat, nconv=7, is_training=True, skip_feat=None, reuse=False): """Creates a simple encoder CNN. Args: feat: Input geatures with size B X nz or B X H X W X nz nconv: number of deconv layers is_training: whether batch_norm should be in train mode skip_feat: additional skip-features per upconv layer reuse: Whether to reuse weights from an already defined net Returns: A decoder CNN which adds nconv upsampling layers units. """ batch_norm_params = {'is_training': is_training} n_filters = [32, 64, 128, 256] if nconv > 4: for _ in range(nconv - 4): n_filters.append(512) with tf.variable_scope('decoder', reuse=reuse) as sc: end_points_collection = sc.original_name_scope + '_end_points' with slim.arg_scope( [slim.conv2d, slim.conv2d_transpose], normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, weights_regularizer=slim.l2_regularizer(0.05), activation_fn=tf.nn.relu, outputs_collections=end_points_collection): if feat.get_shape().ndims == 2: feat = tf.expand_dims(tf.expand_dims(feat, 1), 1) for nc in range(nconv, 0, -1): n_filt = n_filters[nc - 1] feat = slim.conv2d_transpose( feat, n_filt, [4, 4], stride=2, scope='upcnv' + str(nc)) if (nc > 1) and (skip_feat is not None): feat = tf.concat([feat, skip_feat[-nc + 1]], axis=3) feat = slim.conv2d( feat, n_filt, [3, 3], stride=1, scope='upcnv' + str(nc) + 'b') end_points = utils.convert_collection_to_dict(end_points_collection) return feat, end_points
Example #9
Source File: vqa_self_attention.py From training_results_v0.5 with Apache License 2.0 | 4 votes |
def body(self, features): hp = self.hparams # pylint: disable=eval-used if hp.image_input_type == "image": image_feat = vqa_layers.image_embedding( features["inputs"], model_fn=eval(hp.image_model_fn), trainable=hp.train_resnet, is_training=hp.mode == tf.estimator.ModeKeys.TRAIN) else: image_feat = features["inputs"] image_feat = common_layers.flatten4d3d(image_feat) image_hidden_size = hp.hidden_size image_feat = common_layers.dense(image_feat, image_hidden_size) utils.collect_named_outputs("norms", "image_feat_after_proj", tf.norm(image_feat, axis=-1)) question = common_layers.flatten4d3d(features["question"]) utils.collect_named_outputs("norms", "question_embedding", tf.norm(question, axis=-1)) (encoder_input, encoder_self_attention_bias, encoder_decoder_attention_bias) = prepare_image_question_encoder( image_feat, question, hp) encoder_input = tf.nn.dropout( encoder_input, keep_prob=1.-hp.layer_prepostprocess_dropout) # scale query by sqrt(hidden_size) query = tf.get_variable("query", [hp.hidden_size]) * hp.hidden_size **0.5 query = tf.expand_dims(tf.expand_dims(query, axis=0), axis=0) batch_size = common_layers.shape_list(encoder_input)[0] query = tf.tile(query, [batch_size, 1, 1]) query = tf.nn.dropout( query, keep_prob=1.-hp.layer_prepostprocess_dropout) decoder_output = iterative_encoder_decoder( encoder_input, encoder_self_attention_bias, encoder_decoder_attention_bias, query, hp) utils.collect_named_outputs("norms", "decoder_output", tf.norm(decoder_output, axis=-1)) norm_tensors = utils.convert_collection_to_dict("norms") vqa_layers.summarize_tensors(norm_tensors, tag="norms/") # Expand dimension 1 and 2 return tf.expand_dims(decoder_output, axis=1)
Example #10
Source File: vqa_recurrent_self_attention.py From training_results_v0.5 with Apache License 2.0 | 4 votes |
def body(self, features): hp = self.hparams # pylint: disable=eval-used if hp.image_input_type == "image": image_feat = vqa_layers.image_embedding( features["inputs"], model_fn=eval(hp.image_model_fn), trainable=hp.train_resnet, is_training=hp.mode == tf.estimator.ModeKeys.TRAIN) else: image_feat = features["inputs"] image_feat = common_layers.flatten4d3d(image_feat) image_feat = common_layers.dense(image_feat, hp.hidden_size) utils.collect_named_outputs("norms", "image_feat_after_proj", tf.norm(image_feat, axis=-1)) question = common_layers.flatten4d3d(features["question"]) utils.collect_named_outputs("norms", "question_embedding", tf.norm(question, axis=-1)) (encoder_input, encoder_self_attention_bias, encoder_decoder_attention_bias) = prepare_image_question_encoder( image_feat, question, hp) encoder_input = tf.nn.dropout( encoder_input, keep_prob=1.-hp.layer_prepostprocess_dropout) encoder_output, _ = recurrent_transformer_decoder( encoder_input, None, encoder_self_attention_bias, None, hp, name="encoder") utils.collect_named_outputs( "norms", "encoder_output", tf.norm(encoder_output, axis=-1)) # scale query by sqrt(hidden_size) query = tf.get_variable("query", [hp.hidden_size]) * hp.hidden_size **0.5 query = tf.expand_dims(tf.expand_dims(query, axis=0), axis=0) batch_size = common_layers.shape_list(encoder_input)[0] query = tf.tile(query, [batch_size, 1, 1]) query = tf.nn.dropout( query, keep_prob=1.-hp.layer_prepostprocess_dropout) decoder_output, _ = recurrent_transformer_decoder( query, encoder_output, None, encoder_decoder_attention_bias, hp, name="decoder") utils.collect_named_outputs("norms", "decoder_output", tf.norm(decoder_output, axis=-1)) norm_tensors = utils.convert_collection_to_dict("norms") vqa_layers.summarize_tensors(norm_tensors, tag="norms/") # Expand dimension 1 and 2 return tf.expand_dims(decoder_output, axis=1)
Example #11
Source File: truncated_vgg.py From Table-Detection-using-Deep-learning with BSD 3-Clause "New" or "Revised" License | 4 votes |
def truncated_vgg_16(inputs, is_training=True, scope='vgg_16'): """Oxford Net VGG 16-Layers version D Example. For use in SSD object detection network, which has this particular truncated version of VGG16 detailed in its paper. Args: inputs: a tensor of size [batch_size, height, width, channels]. scope: Optional scope for the variables. Returns: the last op containing the conv5 tensor and end_points dict. """ with variable_scope.variable_scope(scope, 'vgg_16', [inputs]) as sc: end_points_collection = sc.original_name_scope + '_end_points' # Collect outputs for conv2d, fully_connected and max_pool2d. with arg_scope( [layers.conv2d, layers_lib.fully_connected, layers_lib.max_pool2d], outputs_collections=end_points_collection ): net = layers_lib.repeat( inputs, 2, layers.conv2d, 64, [3, 3], scope='conv1') net = layers_lib.max_pool2d(net, [2, 2], scope='pool1') net = layers_lib.repeat( net, 2, layers.conv2d, 128, [3, 3], scope='conv2' ) net = layers_lib.max_pool2d(net, [2, 2], scope='pool2') net = layers_lib.repeat( net, 3, layers.conv2d, 256, [3, 3], scope='conv3' ) net = layers_lib.max_pool2d(net, [2, 2], scope='pool3') net = layers_lib.repeat( net, 3, layers.conv2d, 512, [3, 3], scope='conv4' ) net = layers_lib.max_pool2d(net, [2, 2], scope='pool4') net = layers_lib.repeat( net, 3, layers.conv2d, 512, [3, 3], scope='conv5' ) # Convert end_points_collection into a end_point dict. end_points = utils.convert_collection_to_dict( end_points_collection ) return net, end_points
Example #12
Source File: truncated_vgg.py From Tabulo with BSD 3-Clause "New" or "Revised" License | 4 votes |
def truncated_vgg_16(inputs, is_training=True, scope='vgg_16'): """Oxford Net VGG 16-Layers version D Example. For use in SSD object detection network, which has this particular truncated version of VGG16 detailed in its paper. Args: inputs: a tensor of size [batch_size, height, width, channels]. scope: Optional scope for the variables. Returns: the last op containing the conv5 tensor and end_points dict. """ with variable_scope.variable_scope(scope, 'vgg_16', [inputs]) as sc: end_points_collection = sc.original_name_scope + '_end_points' # Collect outputs for conv2d, fully_connected and max_pool2d. with arg_scope( [layers.conv2d, layers_lib.fully_connected, layers_lib.max_pool2d], outputs_collections=end_points_collection ): net = layers_lib.repeat( inputs, 2, layers.conv2d, 64, [3, 3], scope='conv1') net = layers_lib.max_pool2d(net, [2, 2], scope='pool1') net = layers_lib.repeat( net, 2, layers.conv2d, 128, [3, 3], scope='conv2' ) net = layers_lib.max_pool2d(net, [2, 2], scope='pool2') net = layers_lib.repeat( net, 3, layers.conv2d, 256, [3, 3], scope='conv3' ) net = layers_lib.max_pool2d(net, [2, 2], scope='pool3') net = layers_lib.repeat( net, 3, layers.conv2d, 512, [3, 3], scope='conv4' ) net = layers_lib.max_pool2d(net, [2, 2], scope='pool4') net = layers_lib.repeat( net, 3, layers.conv2d, 512, [3, 3], scope='conv5' ) # Convert end_points_collection into a end_point dict. end_points = utils.convert_collection_to_dict( end_points_collection ) return net, end_points
Example #13
Source File: truncated_vgg.py From luminoth with BSD 3-Clause "New" or "Revised" License | 4 votes |
def truncated_vgg_16(inputs, is_training=True, scope='vgg_16'): """Oxford Net VGG 16-Layers version D Example. For use in SSD object detection network, which has this particular truncated version of VGG16 detailed in its paper. Args: inputs: a tensor of size [batch_size, height, width, channels]. scope: Optional scope for the variables. Returns: the last op containing the conv5 tensor and end_points dict. """ with variable_scope.variable_scope(scope, 'vgg_16', [inputs]) as sc: end_points_collection = sc.original_name_scope + '_end_points' # Collect outputs for conv2d, fully_connected and max_pool2d. with arg_scope( [layers.conv2d, layers_lib.fully_connected, layers_lib.max_pool2d], outputs_collections=end_points_collection ): net = layers_lib.repeat( inputs, 2, layers.conv2d, 64, [3, 3], scope='conv1') net = layers_lib.max_pool2d(net, [2, 2], scope='pool1') net = layers_lib.repeat( net, 2, layers.conv2d, 128, [3, 3], scope='conv2' ) net = layers_lib.max_pool2d(net, [2, 2], scope='pool2') net = layers_lib.repeat( net, 3, layers.conv2d, 256, [3, 3], scope='conv3' ) net = layers_lib.max_pool2d(net, [2, 2], scope='pool3') net = layers_lib.repeat( net, 3, layers.conv2d, 512, [3, 3], scope='conv4' ) net = layers_lib.max_pool2d(net, [2, 2], scope='pool4') net = layers_lib.repeat( net, 3, layers.conv2d, 512, [3, 3], scope='conv5' ) # Convert end_points_collection into a end_point dict. end_points = utils.convert_collection_to_dict( end_points_collection ) return net, end_points
Example #14
Source File: vgg.py From lambda-packs with MIT License | 4 votes |
def vgg_19(inputs, num_classes=1000, is_training=True, dropout_keep_prob=0.5, spatial_squeeze=True, scope='vgg_19'): """Oxford Net VGG 19-Layers version E Example. Note: All the fully_connected layers have been transformed to conv2d layers. To use in classification mode, resize input to 224x224. Args: inputs: a tensor of size [batch_size, height, width, channels]. num_classes: number of predicted classes. is_training: whether or not the model is being trained. dropout_keep_prob: the probability that activations are kept in the dropout layers during training. spatial_squeeze: whether or not should squeeze the spatial dimensions of the outputs. Useful to remove unnecessary dimensions for classification. scope: Optional scope for the variables. Returns: the last op containing the log predictions and end_points dict. """ with variable_scope.variable_scope(scope, 'vgg_19', [inputs]) as sc: end_points_collection = sc.name + '_end_points' # Collect outputs for conv2d, fully_connected and max_pool2d. with arg_scope( [layers.conv2d, layers_lib.fully_connected, layers_lib.max_pool2d], outputs_collections=end_points_collection): net = layers_lib.repeat( inputs, 2, layers.conv2d, 64, [3, 3], scope='conv1') net = layers_lib.max_pool2d(net, [2, 2], scope='pool1') net = layers_lib.repeat(net, 2, layers.conv2d, 128, [3, 3], scope='conv2') net = layers_lib.max_pool2d(net, [2, 2], scope='pool2') net = layers_lib.repeat(net, 4, layers.conv2d, 256, [3, 3], scope='conv3') net = layers_lib.max_pool2d(net, [2, 2], scope='pool3') net = layers_lib.repeat(net, 4, layers.conv2d, 512, [3, 3], scope='conv4') net = layers_lib.max_pool2d(net, [2, 2], scope='pool4') net = layers_lib.repeat(net, 4, layers.conv2d, 512, [3, 3], scope='conv5') net = layers_lib.max_pool2d(net, [2, 2], scope='pool5') # Use conv2d instead of fully_connected layers. net = layers.conv2d(net, 4096, [7, 7], padding='VALID', scope='fc6') net = layers_lib.dropout( net, dropout_keep_prob, is_training=is_training, scope='dropout6') net = layers.conv2d(net, 4096, [1, 1], scope='fc7') net = layers_lib.dropout( net, dropout_keep_prob, is_training=is_training, scope='dropout7') net = layers.conv2d( net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='fc8') # Convert end_points_collection into a end_point dict. end_points = utils.convert_collection_to_dict(end_points_collection) if spatial_squeeze: net = array_ops.squeeze(net, [1, 2], name='fc8/squeezed') end_points[sc.name + '/fc8'] = net return net, end_points
Example #15
Source File: nets.py From LEGO with MIT License | 4 votes |
def pose_exp_net(tgt_image, src_image_stack, do_exp=True, is_training=True): H = tgt_image.get_shape()[1].value W = tgt_image.get_shape()[2].value tgt_image = tf.image.resize_bilinear(tgt_image, [128, 416]) src_image_stack = tf.image.resize_bilinear(src_image_stack, [128, 416]) inputs = tf.concat([tgt_image, src_image_stack], axis=3) batch_norm_params = {'is_training': is_training} num_source = int(src_image_stack.get_shape()[3].value//3) with tf.variable_scope('pose_exp_net') as sc: end_points_collection = sc.original_name_scope + '_end_points' with slim.arg_scope([slim.conv2d, slim.conv2d_transpose], # normalizer_fn = None, normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, weights_regularizer=slim.l2_regularizer(0.05), activation_fn=tf.nn.relu, outputs_collections=end_points_collection): # cnv1 to cnv5b are shared between pose and explainability prediction cnv1 = slim.conv2d(inputs,16, [7, 7], stride=1, scope='cnv1') cnv2 = slim.conv2d(cnv1, 32, [5, 5], stride=2, scope='cnv2') cnv3 = slim.conv2d(cnv2, 64, [3, 3], stride=2, scope='cnv3') cnv4 = slim.conv2d(cnv3, 128, [3, 3], stride=2, scope='cnv4') cnv5 = slim.conv2d(cnv4, 256, [3, 3], stride=2, scope='cnv5') # Pose specific layers with tf.variable_scope('pose'): cnv6 = slim.conv2d(cnv5, 256, [3, 3], stride=2, scope='cnv6') cnv7 = slim.conv2d(cnv6, 256, [3, 3], stride=2, scope='cnv7') pose_pred = slim.conv2d(cnv7, 6*num_source, [1, 1], scope='pred', stride=1, normalizer_fn=None, activation_fn=None) pose_avg = tf.reduce_mean(pose_pred, [1, 2]) # Empirically we found that scaling by a small constant # facilitates training. pose_final = 0.01 * tf.reshape(pose_avg, [-1, num_source, 6]) # Exp mask specific layers if do_exp: with tf.variable_scope('exp'): upcnv5 = slim.conv2d_transpose(cnv5, 256, [3, 3], stride=2, scope='upcnv5') upcnv4 = slim.conv2d_transpose(upcnv5, 128, [3, 3], stride=2, scope='upcnv4') mask4 = slim.conv2d(upcnv4, num_source * 2, [3, 3], stride=1, scope='mask4', normalizer_fn=None, activation_fn=None) upcnv3 = slim.conv2d_transpose(upcnv4, 64, [3, 3], stride=2, scope='upcnv3') mask3 = slim.conv2d(upcnv3, num_source * 2, [3, 3], stride=1, scope='mask3', normalizer_fn=None, activation_fn=None) upcnv2 = slim.conv2d_transpose(upcnv3, 32, [5, 5], stride=2, scope='upcnv2') mask2 = slim.conv2d(upcnv2, num_source * 2, [5, 5], stride=1, scope='mask2', normalizer_fn=None, activation_fn=None) upcnv1 = slim.conv2d_transpose(upcnv2, 16, [7, 7], stride=2, scope='upcnv1') mask1 = slim.conv2d(upcnv1, num_source * 2, [7, 7], stride=1, scope='mask1', normalizer_fn=None, activation_fn=None) else: mask1 = None mask2 = None mask3 = None mask4 = None end_points = utils.convert_collection_to_dict(end_points_collection) return pose_final, [mask1, mask2, mask3, mask4], end_points
Example #16
Source File: nets.py From layered-scene-inference with Apache License 2.0 | 4 votes |
def encoder_simple(inp_img, nz=1000, is_training=True, reuse=False): """Creates a simple encoder CNN. Args: inp_img: TensorFlow node for input with size B X H X W X C nz: number of units in last layer, default=1000 is_training: whether batch_norm should be in train mode reuse: Whether to reuse weights from an already defined net Returns: An encoder CNN which computes a final representation with nz units. """ batch_norm_params = {'is_training': is_training} with tf.variable_scope('encoder', reuse=reuse) as sc: end_points_collection = sc.original_name_scope + '_end_points' with slim.arg_scope( [slim.conv2d, slim.fully_connected], normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, weights_regularizer=slim.l2_regularizer(0.05), activation_fn=tf.nn.relu, outputs_collections=end_points_collection): cnv1 = slim.conv2d(inp_img, 32, [7, 7], stride=2, scope='cnv1') cnv1b = slim.conv2d(cnv1, 32, [7, 7], stride=1, scope='cnv1b') cnv2 = slim.conv2d(cnv1b, 64, [5, 5], stride=2, scope='cnv2') cnv2b = slim.conv2d(cnv2, 64, [5, 5], stride=1, scope='cnv2b') cnv3 = slim.conv2d(cnv2b, 128, [3, 3], stride=2, scope='cnv3') cnv3b = slim.conv2d(cnv3, 128, [3, 3], stride=1, scope='cnv3b') cnv4 = slim.conv2d(cnv3b, 256, [3, 3], stride=2, scope='cnv4') cnv4b = slim.conv2d(cnv4, 256, [3, 3], stride=1, scope='cnv4b') cnv5 = slim.conv2d(cnv4b, 512, [3, 3], stride=2, scope='cnv5') cnv5b = slim.conv2d(cnv5, 512, [3, 3], stride=1, scope='cnv5b') cnv6 = slim.conv2d(cnv5b, 512, [3, 3], stride=2, scope='cnv6') cnv6b = slim.conv2d(cnv6, 512, [3, 3], stride=1, scope='cnv6b') cnv7 = slim.conv2d(cnv6b, 512, [3, 3], stride=2, scope='cnv7') cnv7b = slim.conv2d(cnv7, 512, [3, 3], stride=1, scope='cnv7b') cnv7b_flat = slim.flatten(cnv7b, scope='cnv7b_flat') enc = slim.stack( cnv7b_flat, slim.fully_connected, [2 * nz, nz, nz], scope='fc') end_points = utils.convert_collection_to_dict(end_points_collection) return enc, end_points
Example #17
Source File: vqa_attention.py From training_results_v0.5 with Apache License 2.0 | 4 votes |
def body(self, features): hp = self.hparams # pylint: disable=eval-used if hp.image_input_type == "image": image_feat = vqa_layers.image_embedding( features["inputs"], model_fn=eval(hp.image_model_fn), trainable=hp.train_resnet, is_training=hp.mode == tf.estimator.ModeKeys.TRAIN) else: image_feat = features["inputs"] image_feat = common_layers.flatten4d3d(image_feat) # image feature self attention # image_feat = tf.nn.dropout( # image_feat, keep_prob=1.-hp.layer_prepostprocess_dropout) # image_feat = image_feat - tf.reduce_mean( # image_feat, axis=-1, keepdims=True) # image_feat = tf.nn.l2_normalize(image_feat, -1) # utils.collect_named_outputs("norms", "image_feat_after_l2", # tf.norm(image_feat, axis=-1)) image_feat = tf.nn.dropout(image_feat, keep_prob=1.-hp.dropout) image_feat = image_encoder(image_feat, hp) utils.collect_named_outputs("norms", "image_feat_encoded", tf.norm(image_feat, axis=-1)) image_feat = common_layers.l2_norm(image_feat) utils.collect_named_outputs("norms", "image_feat_encoded_l2", tf.norm(image_feat, axis=-1)) query = question_encoder(features["question"], hp) utils.collect_named_outputs("norms", "query", tf.norm(query, axis=-1)) image_ave = attn(image_feat, query, hp) utils.collect_named_outputs("norms", "image_ave", tf.norm(image_ave, axis=-1)) image_question = tf.concat([image_ave, query], axis=1) utils.collect_named_outputs("norms", "image_question", tf.norm(image_question, axis=-1)) image_question = tf.nn.dropout(image_question, 1. - hp.dropout) output = mlp(image_question, hp) utils.collect_named_outputs("norms", "output", tf.norm(output, axis=-1)) norm_tensors = utils.convert_collection_to_dict("norms") vqa_layers.summarize_tensors(norm_tensors, tag="norms/") # Expand dimension 1 and 2 return tf.expand_dims(tf.expand_dims(output, axis=1), axis=2)
Example #18
Source File: nets.py From layered-scene-inference with Apache License 2.0 | 4 votes |
def pixelwise_predictor(feat, nc=3, n_layers=1, n_layerwise_steps=0, skip_feat=None, reuse=False, is_training=True): """Predicts texture images and probilistic masks. Args: feat: B X H X W X C feature vectors nc: number of output channels n_layers: number of plane equations to predict (denoted as L) n_layerwise_steps: Number of independent per-layer up-conv steps skip_feat: List of features useful for skip connections. Used if lws>0. reuse: Whether to reuse weights from an already defined net is_training: whether batch_norm should be in train mode Returns: textures : L X B X H X W X nc. """ with tf.variable_scope('pixelwise_pred', reuse=reuse) as sc: end_points_collection = sc.original_name_scope + '_end_points' with slim.arg_scope( [slim.conv2d], normalizer_fn=None, weights_regularizer=slim.l2_regularizer(0.05), activation_fn=tf.nn.sigmoid, outputs_collections=end_points_collection): preds = [] for l in range(n_layers): with tf.variable_scope('upsample_' + str(l), reuse=reuse): feat_l, _ = decoder_simple( feat, nconv=n_layerwise_steps, skip_feat=skip_feat, reuse=reuse, is_training=is_training) pred = slim.conv2d( feat_l, nc, [3, 3], stride=1, scope='pred_' + str(l)) preds.append(pred) end_points = utils.convert_collection_to_dict(end_points_collection) preds = tf.stack(preds, axis=0) return preds, end_points
Example #19
Source File: vgg.py From lambda-packs with MIT License | 4 votes |
def vgg_16(inputs, num_classes=1000, is_training=True, dropout_keep_prob=0.5, spatial_squeeze=True, scope='vgg_16'): """Oxford Net VGG 16-Layers version D Example. Note: All the fully_connected layers have been transformed to conv2d layers. To use in classification mode, resize input to 224x224. Args: inputs: a tensor of size [batch_size, height, width, channels]. num_classes: number of predicted classes. is_training: whether or not the model is being trained. dropout_keep_prob: the probability that activations are kept in the dropout layers during training. spatial_squeeze: whether or not should squeeze the spatial dimensions of the outputs. Useful to remove unnecessary dimensions for classification. scope: Optional scope for the variables. Returns: the last op containing the log predictions and end_points dict. """ with variable_scope.variable_scope(scope, 'vgg_16', [inputs]) as sc: end_points_collection = sc.original_name_scope + '_end_points' # Collect outputs for conv2d, fully_connected and max_pool2d. with arg_scope( [layers.conv2d, layers_lib.fully_connected, layers_lib.max_pool2d], outputs_collections=end_points_collection): net = layers_lib.repeat( inputs, 2, layers.conv2d, 64, [3, 3], scope='conv1') net = layers_lib.max_pool2d(net, [2, 2], scope='pool1') net = layers_lib.repeat(net, 2, layers.conv2d, 128, [3, 3], scope='conv2') net = layers_lib.max_pool2d(net, [2, 2], scope='pool2') net = layers_lib.repeat(net, 3, layers.conv2d, 256, [3, 3], scope='conv3') net = layers_lib.max_pool2d(net, [2, 2], scope='pool3') net = layers_lib.repeat(net, 3, layers.conv2d, 512, [3, 3], scope='conv4') net = layers_lib.max_pool2d(net, [2, 2], scope='pool4') net = layers_lib.repeat(net, 3, layers.conv2d, 512, [3, 3], scope='conv5') net = layers_lib.max_pool2d(net, [2, 2], scope='pool5') # Use conv2d instead of fully_connected layers. net = layers.conv2d(net, 4096, [7, 7], padding='VALID', scope='fc6') net = layers_lib.dropout( net, dropout_keep_prob, is_training=is_training, scope='dropout6') net = layers.conv2d(net, 4096, [1, 1], scope='fc7') net = layers_lib.dropout( net, dropout_keep_prob, is_training=is_training, scope='dropout7') net = layers.conv2d( net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='fc8') # Convert end_points_collection into a end_point dict. end_points = utils.convert_collection_to_dict(end_points_collection) if spatial_squeeze: net = array_ops.squeeze(net, [1, 2], name='fc8/squeezed') end_points[sc.name + '/fc8'] = net return net, end_points
Example #20
Source File: vgg.py From keras-lambda with MIT License | 4 votes |
def vgg_a(inputs, num_classes=1000, is_training=True, dropout_keep_prob=0.5, spatial_squeeze=True, scope='vgg_a'): """Oxford Net VGG 11-Layers version A Example. Note: All the fully_connected layers have been transformed to conv2d layers. To use in classification mode, resize input to 224x224. Args: inputs: a tensor of size [batch_size, height, width, channels]. num_classes: number of predicted classes. is_training: whether or not the model is being trained. dropout_keep_prob: the probability that activations are kept in the dropout layers during training. spatial_squeeze: whether or not should squeeze the spatial dimensions of the outputs. Useful to remove unnecessary dimensions for classification. scope: Optional scope for the variables. Returns: the last op containing the log predictions and end_points dict. """ with variable_scope.variable_scope(scope, 'vgg_a', [inputs]) as sc: end_points_collection = sc.original_name_scope + '_end_points' # Collect outputs for conv2d, fully_connected and max_pool2d. with arg_scope( [layers.conv2d, layers_lib.max_pool2d], outputs_collections=end_points_collection): net = layers_lib.repeat( inputs, 1, layers.conv2d, 64, [3, 3], scope='conv1') net = layers_lib.max_pool2d(net, [2, 2], scope='pool1') net = layers_lib.repeat(net, 1, layers.conv2d, 128, [3, 3], scope='conv2') net = layers_lib.max_pool2d(net, [2, 2], scope='pool2') net = layers_lib.repeat(net, 2, layers.conv2d, 256, [3, 3], scope='conv3') net = layers_lib.max_pool2d(net, [2, 2], scope='pool3') net = layers_lib.repeat(net, 2, layers.conv2d, 512, [3, 3], scope='conv4') net = layers_lib.max_pool2d(net, [2, 2], scope='pool4') net = layers_lib.repeat(net, 2, layers.conv2d, 512, [3, 3], scope='conv5') net = layers_lib.max_pool2d(net, [2, 2], scope='pool5') # Use conv2d instead of fully_connected layers. net = layers.conv2d(net, 4096, [7, 7], padding='VALID', scope='fc6') net = layers_lib.dropout( net, dropout_keep_prob, is_training=is_training, scope='dropout6') net = layers.conv2d(net, 4096, [1, 1], scope='fc7') net = layers_lib.dropout( net, dropout_keep_prob, is_training=is_training, scope='dropout7') net = layers.conv2d( net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='fc8') # Convert end_points_collection into a end_point dict. end_points = utils.convert_collection_to_dict(end_points_collection) if spatial_squeeze: net = array_ops.squeeze(net, [1, 2], name='fc8/squeezed') end_points[sc.name + '/fc8'] = net return net, end_points
Example #21
Source File: vgg.py From keras-lambda with MIT License | 4 votes |
def vgg_16(inputs, num_classes=1000, is_training=True, dropout_keep_prob=0.5, spatial_squeeze=True, scope='vgg_16'): """Oxford Net VGG 16-Layers version D Example. Note: All the fully_connected layers have been transformed to conv2d layers. To use in classification mode, resize input to 224x224. Args: inputs: a tensor of size [batch_size, height, width, channels]. num_classes: number of predicted classes. is_training: whether or not the model is being trained. dropout_keep_prob: the probability that activations are kept in the dropout layers during training. spatial_squeeze: whether or not should squeeze the spatial dimensions of the outputs. Useful to remove unnecessary dimensions for classification. scope: Optional scope for the variables. Returns: the last op containing the log predictions and end_points dict. """ with variable_scope.variable_scope(scope, 'vgg_16', [inputs]) as sc: end_points_collection = sc.original_name_scope + '_end_points' # Collect outputs for conv2d, fully_connected and max_pool2d. with arg_scope( [layers.conv2d, layers_lib.fully_connected, layers_lib.max_pool2d], outputs_collections=end_points_collection): net = layers_lib.repeat( inputs, 2, layers.conv2d, 64, [3, 3], scope='conv1') net = layers_lib.max_pool2d(net, [2, 2], scope='pool1') net = layers_lib.repeat(net, 2, layers.conv2d, 128, [3, 3], scope='conv2') net = layers_lib.max_pool2d(net, [2, 2], scope='pool2') net = layers_lib.repeat(net, 3, layers.conv2d, 256, [3, 3], scope='conv3') net = layers_lib.max_pool2d(net, [2, 2], scope='pool3') net = layers_lib.repeat(net, 3, layers.conv2d, 512, [3, 3], scope='conv4') net = layers_lib.max_pool2d(net, [2, 2], scope='pool4') net = layers_lib.repeat(net, 3, layers.conv2d, 512, [3, 3], scope='conv5') net = layers_lib.max_pool2d(net, [2, 2], scope='pool5') # Use conv2d instead of fully_connected layers. net = layers.conv2d(net, 4096, [7, 7], padding='VALID', scope='fc6') net = layers_lib.dropout( net, dropout_keep_prob, is_training=is_training, scope='dropout6') net = layers.conv2d(net, 4096, [1, 1], scope='fc7') net = layers_lib.dropout( net, dropout_keep_prob, is_training=is_training, scope='dropout7') net = layers.conv2d( net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='fc8') # Convert end_points_collection into a end_point dict. end_points = utils.convert_collection_to_dict(end_points_collection) if spatial_squeeze: net = array_ops.squeeze(net, [1, 2], name='fc8/squeezed') end_points[sc.name + '/fc8'] = net return net, end_points
Example #22
Source File: vgg.py From keras-lambda with MIT License | 4 votes |
def vgg_19(inputs, num_classes=1000, is_training=True, dropout_keep_prob=0.5, spatial_squeeze=True, scope='vgg_19'): """Oxford Net VGG 19-Layers version E Example. Note: All the fully_connected layers have been transformed to conv2d layers. To use in classification mode, resize input to 224x224. Args: inputs: a tensor of size [batch_size, height, width, channels]. num_classes: number of predicted classes. is_training: whether or not the model is being trained. dropout_keep_prob: the probability that activations are kept in the dropout layers during training. spatial_squeeze: whether or not should squeeze the spatial dimensions of the outputs. Useful to remove unnecessary dimensions for classification. scope: Optional scope for the variables. Returns: the last op containing the log predictions and end_points dict. """ with variable_scope.variable_scope(scope, 'vgg_19', [inputs]) as sc: end_points_collection = sc.name + '_end_points' # Collect outputs for conv2d, fully_connected and max_pool2d. with arg_scope( [layers.conv2d, layers_lib.fully_connected, layers_lib.max_pool2d], outputs_collections=end_points_collection): net = layers_lib.repeat( inputs, 2, layers.conv2d, 64, [3, 3], scope='conv1') net = layers_lib.max_pool2d(net, [2, 2], scope='pool1') net = layers_lib.repeat(net, 2, layers.conv2d, 128, [3, 3], scope='conv2') net = layers_lib.max_pool2d(net, [2, 2], scope='pool2') net = layers_lib.repeat(net, 4, layers.conv2d, 256, [3, 3], scope='conv3') net = layers_lib.max_pool2d(net, [2, 2], scope='pool3') net = layers_lib.repeat(net, 4, layers.conv2d, 512, [3, 3], scope='conv4') net = layers_lib.max_pool2d(net, [2, 2], scope='pool4') net = layers_lib.repeat(net, 4, layers.conv2d, 512, [3, 3], scope='conv5') net = layers_lib.max_pool2d(net, [2, 2], scope='pool5') # Use conv2d instead of fully_connected layers. net = layers.conv2d(net, 4096, [7, 7], padding='VALID', scope='fc6') net = layers_lib.dropout( net, dropout_keep_prob, is_training=is_training, scope='dropout6') net = layers.conv2d(net, 4096, [1, 1], scope='fc7') net = layers_lib.dropout( net, dropout_keep_prob, is_training=is_training, scope='dropout7') net = layers.conv2d( net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='fc8') # Convert end_points_collection into a end_point dict. end_points = utils.convert_collection_to_dict(end_points_collection) if spatial_squeeze: net = array_ops.squeeze(net, [1, 2], name='fc8/squeezed') end_points[sc.name + '/fc8'] = net return net, end_points
Example #23
Source File: vgg.py From lambda-packs with MIT License | 4 votes |
def vgg_a(inputs, num_classes=1000, is_training=True, dropout_keep_prob=0.5, spatial_squeeze=True, scope='vgg_a'): """Oxford Net VGG 11-Layers version A Example. Note: All the fully_connected layers have been transformed to conv2d layers. To use in classification mode, resize input to 224x224. Args: inputs: a tensor of size [batch_size, height, width, channels]. num_classes: number of predicted classes. is_training: whether or not the model is being trained. dropout_keep_prob: the probability that activations are kept in the dropout layers during training. spatial_squeeze: whether or not should squeeze the spatial dimensions of the outputs. Useful to remove unnecessary dimensions for classification. scope: Optional scope for the variables. Returns: the last op containing the log predictions and end_points dict. """ with variable_scope.variable_scope(scope, 'vgg_a', [inputs]) as sc: end_points_collection = sc.original_name_scope + '_end_points' # Collect outputs for conv2d, fully_connected and max_pool2d. with arg_scope( [layers.conv2d, layers_lib.max_pool2d], outputs_collections=end_points_collection): net = layers_lib.repeat( inputs, 1, layers.conv2d, 64, [3, 3], scope='conv1') net = layers_lib.max_pool2d(net, [2, 2], scope='pool1') net = layers_lib.repeat(net, 1, layers.conv2d, 128, [3, 3], scope='conv2') net = layers_lib.max_pool2d(net, [2, 2], scope='pool2') net = layers_lib.repeat(net, 2, layers.conv2d, 256, [3, 3], scope='conv3') net = layers_lib.max_pool2d(net, [2, 2], scope='pool3') net = layers_lib.repeat(net, 2, layers.conv2d, 512, [3, 3], scope='conv4') net = layers_lib.max_pool2d(net, [2, 2], scope='pool4') net = layers_lib.repeat(net, 2, layers.conv2d, 512, [3, 3], scope='conv5') net = layers_lib.max_pool2d(net, [2, 2], scope='pool5') # Use conv2d instead of fully_connected layers. net = layers.conv2d(net, 4096, [7, 7], padding='VALID', scope='fc6') net = layers_lib.dropout( net, dropout_keep_prob, is_training=is_training, scope='dropout6') net = layers.conv2d(net, 4096, [1, 1], scope='fc7') net = layers_lib.dropout( net, dropout_keep_prob, is_training=is_training, scope='dropout7') net = layers.conv2d( net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='fc8') # Convert end_points_collection into a end_point dict. end_points = utils.convert_collection_to_dict(end_points_collection) if spatial_squeeze: net = array_ops.squeeze(net, [1, 2], name='fc8/squeezed') end_points[sc.name + '/fc8'] = net return net, end_points
Example #24
Source File: vgg16.py From Chinese-Character-and-Calligraphic-Image-Processing with MIT License | 4 votes |
def vgg_19(inputs, num_classes=1000, is_training=True, dropout_keep_prob=0.5, spatial_squeeze=True, scope='vgg_19'): """Oxford Net VGG 19-Layers version E Example. Note: All the fully_connected layers have been transformed to conv2d layers. To use in classification mode, resize input to 224x224. Args: inputs: a tensor of size [batch_size, height, width, channels]. num_classes: number of predicted classes. is_training: whether or not the model is being trained. dropout_keep_prob: the probability that activations are kept in the dropout layers during training. spatial_squeeze: whether or not should squeeze the spatial dimensions of the outputs. Useful to remove unnecessary dimensions for classification. scope: Optional scope for the variables. Returns: the last op containing the log predictions and end_points dict. """ with variable_scope.variable_scope(scope, 'vgg_19', [inputs]) as sc: end_points_collection = sc.name + '_end_points' # Collect outputs for conv2d, fully_connected and max_pool2d. with arg_scope( [layers.conv2d, layers_lib.fully_connected, layers_lib.max_pool2d], outputs_collections=end_points_collection): net = layers_lib.repeat( inputs, 2, layers.conv2d, 64, [3, 3], scope='conv1') net = layers_lib.max_pool2d(net, [2, 2], scope='pool1') net = layers_lib.repeat(net, 2, layers.conv2d, 128, [3, 3], scope='conv2') net = layers_lib.max_pool2d(net, [2, 2], scope='pool2') net = layers_lib.repeat(net, 4, layers.conv2d, 256, [3, 3], scope='conv3') net = layers_lib.max_pool2d(net, [2, 2], scope='pool3') net = layers_lib.repeat(net, 4, layers.conv2d, 512, [3, 3], scope='conv4') net = layers_lib.max_pool2d(net, [2, 2], scope='pool4') net = layers_lib.repeat(net, 4, layers.conv2d, 512, [3, 3], scope='conv5') net = layers_lib.max_pool2d(net, [2, 2], scope='pool5') # Use conv2d instead of fully_connected layers. net = layers.conv2d(net, 4096, [7, 7], padding='VALID', scope='fc6') net = layers_lib.dropout( net, dropout_keep_prob, is_training=is_training, scope='dropout6') net = layers.conv2d(net, 4096, [1, 1], scope='fc7') net = layers_lib.dropout( net, dropout_keep_prob, is_training=is_training, scope='dropout7') net = layers.conv2d( net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='fc8') # Convert end_points_collection into a end_point dict. end_points = utils.convert_collection_to_dict(end_points_collection) if spatial_squeeze: net = array_ops.squeeze(net, [1, 2], name='fc8/squeezed') end_points[sc.name + '/fc8'] = net return net, end_points
Example #25
Source File: vqa_attention.py From BERT with Apache License 2.0 | 4 votes |
def body(self, features): hp = self.hparams # pylint: disable=eval-used if hp.image_input_type == "image": image_feat = vqa_layers.image_embedding( features["inputs"], model_fn=eval(hp.image_model_fn), trainable=hp.train_resnet, is_training=hp.mode == tf.estimator.ModeKeys.TRAIN) else: image_feat = features["inputs"] image_feat = common_layers.flatten4d3d(image_feat) # image feature self attention # image_feat = tf.nn.dropout( # image_feat, keep_prob=1.-hp.layer_prepostprocess_dropout) # image_feat = image_feat - tf.reduce_mean( # image_feat, axis=-1, keepdims=True) # image_feat = tf.nn.l2_normalize(image_feat, -1) # utils.collect_named_outputs("norms", "image_feat_after_l2", # tf.norm(image_feat, axis=-1)) image_feat = tf.nn.dropout(image_feat, keep_prob=1.-hp.dropout) image_feat = image_encoder(image_feat, hp) utils.collect_named_outputs("norms", "image_feat_encoded", tf.norm(image_feat, axis=-1)) image_feat = common_layers.l2_norm(image_feat) utils.collect_named_outputs("norms", "image_feat_encoded_l2", tf.norm(image_feat, axis=-1)) query = question_encoder(features["question"], hp) utils.collect_named_outputs("norms", "query", tf.norm(query, axis=-1)) image_ave = attn(image_feat, query, hp) utils.collect_named_outputs("norms", "image_ave", tf.norm(image_ave, axis=-1)) image_question = tf.concat([image_ave, query], axis=1) utils.collect_named_outputs("norms", "image_question", tf.norm(image_question, axis=-1)) image_question = tf.nn.dropout(image_question, 1. - hp.dropout) output = mlp(image_question, hp) utils.collect_named_outputs("norms", "output", tf.norm(output, axis=-1)) norm_tensors = utils.convert_collection_to_dict("norms") vqa_layers.summarize_tensors(norm_tensors, tag="norms/") # Expand dimension 1 and 2 return tf.expand_dims(tf.expand_dims(output, axis=1), axis=2)
Example #26
Source File: vgg.py From auto-alt-text-lambda-api with MIT License | 4 votes |
def vgg_16(inputs, num_classes=1000, is_training=True, dropout_keep_prob=0.5, spatial_squeeze=True, scope='vgg_16'): """Oxford Net VGG 16-Layers version D Example. Note: All the fully_connected layers have been transformed to conv2d layers. To use in classification mode, resize input to 224x224. Args: inputs: a tensor of size [batch_size, height, width, channels]. num_classes: number of predicted classes. is_training: whether or not the model is being trained. dropout_keep_prob: the probability that activations are kept in the dropout layers during training. spatial_squeeze: whether or not should squeeze the spatial dimensions of the outputs. Useful to remove unnecessary dimensions for classification. scope: Optional scope for the variables. Returns: the last op containing the log predictions and end_points dict. """ with variable_scope.variable_scope(scope, 'vgg_16', [inputs]) as sc: end_points_collection = sc.original_name_scope + '_end_points' # Collect outputs for conv2d, fully_connected and max_pool2d. with arg_scope( [layers.conv2d, layers_lib.fully_connected, layers_lib.max_pool2d], outputs_collections=end_points_collection): net = layers_lib.repeat( inputs, 2, layers.conv2d, 64, [3, 3], scope='conv1') net = layers_lib.max_pool2d(net, [2, 2], scope='pool1') net = layers_lib.repeat(net, 2, layers.conv2d, 128, [3, 3], scope='conv2') net = layers_lib.max_pool2d(net, [2, 2], scope='pool2') net = layers_lib.repeat(net, 3, layers.conv2d, 256, [3, 3], scope='conv3') net = layers_lib.max_pool2d(net, [2, 2], scope='pool3') net = layers_lib.repeat(net, 3, layers.conv2d, 512, [3, 3], scope='conv4') net = layers_lib.max_pool2d(net, [2, 2], scope='pool4') net = layers_lib.repeat(net, 3, layers.conv2d, 512, [3, 3], scope='conv5') net = layers_lib.max_pool2d(net, [2, 2], scope='pool5') # Use conv2d instead of fully_connected layers. net = layers.conv2d(net, 4096, [7, 7], padding='VALID', scope='fc6') net = layers_lib.dropout( net, dropout_keep_prob, is_training=is_training, scope='dropout6') net = layers.conv2d(net, 4096, [1, 1], scope='fc7') net = layers_lib.dropout( net, dropout_keep_prob, is_training=is_training, scope='dropout7') net = layers.conv2d( net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='fc8') # Convert end_points_collection into a end_point dict. end_points = utils.convert_collection_to_dict(end_points_collection) if spatial_squeeze: net = array_ops.squeeze(net, [1, 2], name='fc8/squeezed') end_points[sc.name + '/fc8'] = net return net, end_points
Example #27
Source File: vgg.py From auto-alt-text-lambda-api with MIT License | 4 votes |
def vgg_19(inputs, num_classes=1000, is_training=True, dropout_keep_prob=0.5, spatial_squeeze=True, scope='vgg_19'): """Oxford Net VGG 19-Layers version E Example. Note: All the fully_connected layers have been transformed to conv2d layers. To use in classification mode, resize input to 224x224. Args: inputs: a tensor of size [batch_size, height, width, channels]. num_classes: number of predicted classes. is_training: whether or not the model is being trained. dropout_keep_prob: the probability that activations are kept in the dropout layers during training. spatial_squeeze: whether or not should squeeze the spatial dimensions of the outputs. Useful to remove unnecessary dimensions for classification. scope: Optional scope for the variables. Returns: the last op containing the log predictions and end_points dict. """ with variable_scope.variable_scope(scope, 'vgg_19', [inputs]) as sc: end_points_collection = sc.name + '_end_points' # Collect outputs for conv2d, fully_connected and max_pool2d. with arg_scope( [layers.conv2d, layers_lib.fully_connected, layers_lib.max_pool2d], outputs_collections=end_points_collection): net = layers_lib.repeat( inputs, 2, layers.conv2d, 64, [3, 3], scope='conv1') net = layers_lib.max_pool2d(net, [2, 2], scope='pool1') net = layers_lib.repeat(net, 2, layers.conv2d, 128, [3, 3], scope='conv2') net = layers_lib.max_pool2d(net, [2, 2], scope='pool2') net = layers_lib.repeat(net, 4, layers.conv2d, 256, [3, 3], scope='conv3') net = layers_lib.max_pool2d(net, [2, 2], scope='pool3') net = layers_lib.repeat(net, 4, layers.conv2d, 512, [3, 3], scope='conv4') net = layers_lib.max_pool2d(net, [2, 2], scope='pool4') net = layers_lib.repeat(net, 4, layers.conv2d, 512, [3, 3], scope='conv5') net = layers_lib.max_pool2d(net, [2, 2], scope='pool5') # Use conv2d instead of fully_connected layers. net = layers.conv2d(net, 4096, [7, 7], padding='VALID', scope='fc6') net = layers_lib.dropout( net, dropout_keep_prob, is_training=is_training, scope='dropout6') net = layers.conv2d(net, 4096, [1, 1], scope='fc7') net = layers_lib.dropout( net, dropout_keep_prob, is_training=is_training, scope='dropout7') net = layers.conv2d( net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='fc8') # Convert end_points_collection into a end_point dict. end_points = utils.convert_collection_to_dict(end_points_collection) if spatial_squeeze: net = array_ops.squeeze(net, [1, 2], name='fc8/squeezed') end_points[sc.name + '/fc8'] = net return net, end_points
Example #28
Source File: vqa_attention.py From tensor2tensor with Apache License 2.0 | 4 votes |
def body(self, features): hp = self.hparams model_fn = resnet_v1_152 if hp.image_model_fn != "resnet_v1_152": model_fn = eval(hp.image_model_fn) # pylint: disable=eval-used if hp.image_input_type == "image": image_feat = vqa_layers.image_embedding( features["inputs"], model_fn=model_fn, trainable=hp.train_resnet, is_training=hp.mode == tf.estimator.ModeKeys.TRAIN) else: image_feat = features["inputs"] if hp.image_feat_size: image_feat = common_layers.dense(image_feat, hp.image_feat_size) # apply layer normalization and dropout on image_feature utils.collect_named_outputs("norms", "image_feat_before_l2", tf.norm(image_feat, axis=-1)) image_feat = common_layers.l2_norm(image_feat) utils.collect_named_outputs("norms", "image_feat_after_l2", tf.norm(image_feat, axis=-1)) image_feat = tf.nn.dropout(image_feat, keep_prob=1.-hp.dropout) query = question_encoder(features["question"], hp) utils.collect_named_outputs("norms", "query", tf.norm(query, axis=-1)) image_ave = attn(image_feat, query, hp) utils.collect_named_outputs("norms", "image_ave", tf.norm(image_ave, axis=-1)) image_question = tf.concat([image_ave, query], axis=1) utils.collect_named_outputs("norms", "image_question", tf.norm(image_question, axis=-1)) image_question = tf.nn.dropout(image_question, 1. - hp.dropout) output = mlp(image_question, hp) utils.collect_named_outputs("norms", "output", tf.norm(output, axis=-1)) norm_tensors = utils.convert_collection_to_dict("norms") vqa_layers.summarize_tensors(norm_tensors, tag="norms/") # Expand dimension 1 and 2 return tf.expand_dims(tf.expand_dims(output, axis=1), axis=2)
Example #29
Source File: vqa_attention.py From tensor2tensor with Apache License 2.0 | 4 votes |
def body(self, features): hp = self.hparams # pylint: disable=eval-used if hp.image_input_type == "image": image_feat = vqa_layers.image_embedding( features["inputs"], model_fn=eval(hp.image_model_fn), trainable=hp.train_resnet, is_training=hp.mode == tf.estimator.ModeKeys.TRAIN) else: image_feat = features["inputs"] image_feat = common_layers.flatten4d3d(image_feat) # image feature self attention # image_feat = tf.nn.dropout( # image_feat, keep_prob=1.-hp.layer_prepostprocess_dropout) # image_feat = image_feat - tf.reduce_mean( # image_feat, axis=-1, keepdims=True) # image_feat = tf.nn.l2_normalize(image_feat, -1) # utils.collect_named_outputs("norms", "image_feat_after_l2", # tf.norm(image_feat, axis=-1)) image_feat = tf.nn.dropout(image_feat, keep_prob=1.-hp.dropout) image_feat = image_encoder(image_feat, hp) utils.collect_named_outputs("norms", "image_feat_encoded", tf.norm(image_feat, axis=-1)) image_feat = common_layers.l2_norm(image_feat) utils.collect_named_outputs("norms", "image_feat_encoded_l2", tf.norm(image_feat, axis=-1)) query = question_encoder(features["question"], hp) utils.collect_named_outputs("norms", "query", tf.norm(query, axis=-1)) image_ave = attn(image_feat, query, hp) utils.collect_named_outputs("norms", "image_ave", tf.norm(image_ave, axis=-1)) image_question = tf.concat([image_ave, query], axis=1) utils.collect_named_outputs("norms", "image_question", tf.norm(image_question, axis=-1)) image_question = tf.nn.dropout(image_question, 1. - hp.dropout) output = mlp(image_question, hp) utils.collect_named_outputs("norms", "output", tf.norm(output, axis=-1)) norm_tensors = utils.convert_collection_to_dict("norms") vqa_layers.summarize_tensors(norm_tensors, tag="norms/") # Expand dimension 1 and 2 return tf.expand_dims(tf.expand_dims(output, axis=1), axis=2)
Example #30
Source File: vqa_self_attention.py From tensor2tensor with Apache License 2.0 | 4 votes |
def body(self, features): hp = self.hparams # pylint: disable=eval-used if hp.image_input_type == "image": image_feat = vqa_layers.image_embedding( features["inputs"], model_fn=eval(hp.image_model_fn), trainable=hp.train_resnet, is_training=hp.mode == tf.estimator.ModeKeys.TRAIN) else: image_feat = features["inputs"] image_feat = common_layers.flatten4d3d(image_feat) image_hidden_size = hp.hidden_size image_feat = common_layers.dense(image_feat, image_hidden_size) utils.collect_named_outputs("norms", "image_feat_after_proj", tf.norm(image_feat, axis=-1)) question = common_layers.flatten4d3d(features["question"]) utils.collect_named_outputs("norms", "question_embedding", tf.norm(question, axis=-1)) (encoder_input, encoder_self_attention_bias, encoder_decoder_attention_bias) = prepare_image_question_encoder( image_feat, question, hp) encoder_input = tf.nn.dropout( encoder_input, keep_prob=1.-hp.layer_prepostprocess_dropout) encoder_output = image_question_encoder( encoder_input, encoder_self_attention_bias, hp) utils.collect_named_outputs( "norms", "encoder_output", tf.norm(encoder_output, axis=-1)) # scale query by sqrt(hidden_size) query = tf.get_variable("query", [hp.hidden_size]) * hp.hidden_size **0.5 query = tf.expand_dims(tf.expand_dims(query, axis=0), axis=0) batch_size = common_layers.shape_list(encoder_input)[0] query = tf.tile(query, [batch_size, 1, 1]) query = tf.nn.dropout( query, keep_prob=1.-hp.layer_prepostprocess_dropout) decoder_output = decoder( query, encoder_output, None, encoder_decoder_attention_bias, hp) utils.collect_named_outputs("norms", "decoder_output", tf.norm(decoder_output, axis=-1)) norm_tensors = utils.convert_collection_to_dict("norms") vqa_layers.summarize_tensors(norm_tensors, tag="norms/") # Expand dimension 1 and 2 return tf.expand_dims(decoder_output, axis=1)