Python tensorflow.variance_scaling_initializer() Examples
The following are 30
code examples of tensorflow.variance_scaling_initializer().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
tensorflow
, or try the search function
.
Example #1
Source File: mixnet_model.py From tpu_models with Apache License 2.0 | 6 votes |
def dense_kernel_initializer(shape, dtype=None, partition_info=None): """Initialization for dense kernels. This initialization is equal to tf.variance_scaling_initializer(scale=1.0/3.0, mode='fan_out', distribution='uniform'). It is written out explicitly here for clarity. Args: shape: shape of variable dtype: dtype of variable partition_info: unused Returns: an initialization for the variable """ del partition_info init_range = 1.0 / np.sqrt(shape[1]) return tf.random_uniform(shape, -init_range, init_range, dtype=dtype)
Example #2
Source File: resnet_model.py From gradient-checkpointing with MIT License | 6 votes |
def conv2d_fixed_padding(inputs, filters, kernel_size, strides, data_format): """Strided 2-D convolution with explicit padding. The padding is consistent and is based only on `kernel_size`, not on the dimensions of `inputs` (as opposed to using `tf.layers.conv2d` alone). """ global conv2d_counter if strides > 1: inputs = fixed_padding(inputs, kernel_size, data_format) result = tf.layers.conv2d( inputs=inputs, filters=filters, kernel_size=kernel_size, strides=strides, padding=('SAME' if strides == 1 else 'VALID'), use_bias=False, kernel_initializer=tf.variance_scaling_initializer(), data_format=data_format, name="conv2d_%d"%(conv2d_counter,)) conv2d_counter+=1 return result
Example #3
Source File: attention.py From nlp_research with MIT License | 6 votes |
def concat_attention(a, b, a_lengths, b_lengths, max_seq_len, hidden_units=150, scope='concat-attention', reuse=False): with tf.variable_scope(scope, reuse=reuse): a = tf.expand_dims(a, 2) b = tf.expand_dims(b, 1) c = tf.concat([a, b], axis=3) W = tf.get_variable( name='matmul_weights', initializer=tf.contrib.layers.variance_scaling_initializer(), shape=[shape(c, -1), hidden_units] ) cW = tf.einsum('ijkl,lm->ijkm', c, W) v = tf.get_variable( name='dot_weights', initializer=tf.ones_initializer(), shape=[hidden_units] ) logits = tf.einsum('ijkl,l->ijk', tf.nn.tanh(cW), v) logits = logits - tf.expand_dims(tf.reduce_max(logits, axis=2), 2) attn = tf.exp(logits) attn = mask_attention_weights(attn, a_lengths, b_lengths, max_seq_len) return attn / tf.expand_dims(tf.reduce_sum(attn, axis=2) + 1e-10, 2)
Example #4
Source File: resnet_model.py From nsfw with Apache License 2.0 | 6 votes |
def conv2d_fixed_padding(inputs, filters, kernel_size, strides, data_format): """Strided 2-D convolution with explicit padding.""" # The padding is consistent and is based only on `kernel_size`, not on the # dimensions of `inputs` (as opposed to using `tf.layers.conv2d` alone). if strides > 1: inputs = fixed_padding(inputs, kernel_size, data_format) return tf.layers.conv2d( inputs=inputs, filters=filters, kernel_size=kernel_size, strides=strides, padding=('SAME' if strides == 1 else 'VALID'), use_bias=False, kernel_initializer=tf.variance_scaling_initializer(), data_format=data_format) ################################################################################ # ResNet block definitions. ################################################################################
Example #5
Source File: resnet_model.py From Gun-Detector with Apache License 2.0 | 6 votes |
def conv2d_fixed_padding(inputs, filters, kernel_size, strides, data_format): """Strided 2-D convolution with explicit padding.""" # The padding is consistent and is based only on `kernel_size`, not on the # dimensions of `inputs` (as opposed to using `tf.layers.conv2d` alone). if strides > 1: inputs = fixed_padding(inputs, kernel_size, data_format) return tf.layers.conv2d( inputs=inputs, filters=filters, kernel_size=kernel_size, strides=strides, padding=('SAME' if strides == 1 else 'VALID'), use_bias=False, kernel_initializer=tf.variance_scaling_initializer(), data_format=data_format) ################################################################################ # ResNet block definitions. ################################################################################
Example #6
Source File: mnasnet_model.py From tpu_models with Apache License 2.0 | 6 votes |
def conv_kernel_initializer(shape, dtype=None, partition_info=None): """Initialization for convolutional kernels. The main difference with tf.variance_scaling_initializer is that tf.variance_scaling_initializer uses a truncated normal with an uncorrected standard deviation, whereas here we use a normal distribution. Similarly, tf.contrib.layers.variance_scaling_initializer uses a truncated normal with a corrected standard deviation. Args: shape: shape of variable dtype: dtype of variable partition_info: unused Returns: an initialization for the variable """ del partition_info kernel_height, kernel_width, _, out_filters = shape fan_out = int(kernel_height * kernel_width * out_filters) return tf.random_normal( shape, mean=0.0, stddev=np.sqrt(2.0 / fan_out), dtype=dtype)
Example #7
Source File: train.py From UNMT-SPR with MIT License | 6 votes |
def get_initializer(params): if params.initializer == "uniform": max_val = 0.1 * params.initializer_gain return tf.random_uniform_initializer(-max_val, max_val) elif params.initializer == "normal": return tf.random_normal_initializer(0.0, params.initializer_gain) elif params.initializer == "orthogonal": return tf.orthogonal_initializer(params.initializer_gain) elif params.initializer == "normal_unit_scaling": return tf.variance_scaling_initializer(params.initializer_gain, mode="fan_avg", distribution="normal") elif params.initializer == "uniform_unit_scaling": return tf.variance_scaling_initializer(params.initializer_gain, mode="fan_avg", distribution="uniform") else: raise ValueError("Unrecognized initializer: %s" % params.initializer)
Example #8
Source File: optimize.py From fine-lm with MIT License | 6 votes |
def get_variable_initializer(hparams): """Get variable initializer from hparams.""" if not hparams.initializer: return None if not tf.contrib.eager.in_eager_mode(): tf.logging.info("Using variable initializer: %s", hparams.initializer) if hparams.initializer == "orthogonal": return tf.orthogonal_initializer(gain=hparams.initializer_gain) elif hparams.initializer == "uniform": max_val = 0.1 * hparams.initializer_gain return tf.random_uniform_initializer(-max_val, max_val) elif hparams.initializer == "normal_unit_scaling": return tf.variance_scaling_initializer( hparams.initializer_gain, mode="fan_avg", distribution="normal") elif hparams.initializer == "uniform_unit_scaling": return tf.variance_scaling_initializer( hparams.initializer_gain, mode="fan_avg", distribution="uniform") elif hparams.initializer == "xavier": return tf.contrib.layers.xavier_initializer() else: raise ValueError("Unrecognized initializer: %s" % hparams.initializer)
Example #9
Source File: utils.py From EfficientUnet with MIT License | 6 votes |
def conv_kernel_initializer(shape, dtype=K.floatx()): """Initialization for convolutional kernels. The main difference with tf.variance_scaling_initializer is that tf.variance_scaling_initializer uses a truncated normal with an uncorrected standard deviation, whereas here we use a normal distribution. Similarly, tf.contrib.layers.variance_scaling_initializer uses a truncated normal with a corrected standard deviation. Args: shape: shape of variable dtype: dtype of variable Returns: an initialization for the variable """ kernel_height, kernel_width, _, out_filters = shape fan_out = int(kernel_height * kernel_width * out_filters) return tf.random_normal( shape, mean=0.0, stddev=np.sqrt(2.0 / fan_out), dtype=dtype)
Example #10
Source File: initializer.py From zero with BSD 3-Clause "New" or "Revised" License | 6 votes |
def get_initializer(initializer, initializer_gain): tfdtype = tf.as_dtype(dtype.floatx()) if initializer == "uniform": max_val = initializer_gain return tf.random_uniform_initializer(-max_val, max_val, dtype=tfdtype) elif initializer == "normal": return tf.random_normal_initializer(0.0, initializer_gain, dtype=tfdtype) elif initializer == "normal_unit_scaling": return tf.variance_scaling_initializer(initializer_gain, mode="fan_avg", distribution="normal", dtype=tfdtype) elif initializer == "uniform_unit_scaling": return tf.variance_scaling_initializer(initializer_gain, mode="fan_avg", distribution="uniform", dtype=tfdtype) else: tf.logging.warn("Unrecognized initializer: %s" % initializer) tf.logging.warn("Return to default initializer: glorot_uniform_initializer") return tf.glorot_uniform_initializer(dtype=tfdtype)
Example #11
Source File: demo.py From R3Det_Tensorflow with MIT License | 6 votes |
def dense_kernel_initializer(shape, dtype=None, partition_info=None): """Initialization for dense kernels. This initialization is equal to tf.variance_scaling_initializer(scale=1.0/3.0, mode='fan_out', distribution='uniform'). It is written out explicitly here for clarity. Args: shape: shape of variable dtype: dtype of variable partition_info: unused Returns: an initialization for the variable """ del partition_info init_range = 1.0 / np.sqrt(shape[1]) return tf.random_uniform(shape, -init_range, init_range, dtype=dtype)
Example #12
Source File: demo.py From R3Det_Tensorflow with MIT License | 6 votes |
def conv_kernel_initializer(shape, dtype=None, partition_info=None): """Initialization for convolutional kernels. The main difference with tf.variance_scaling_initializer is that tf.variance_scaling_initializer uses a truncated normal with an uncorrected standard deviation, whereas here we use a normal distribution. Similarly, tf.contrib.layers.variance_scaling_initializer uses a truncated normal with a corrected standard deviation. Args: shape: shape of variable dtype: dtype of variable partition_info: unused Returns: an initialization for the variable """ del partition_info kernel_height, kernel_width, _, out_filters = shape fan_out = int(kernel_height * kernel_width * out_filters) return tf.random_normal( shape, mean=0.0, stddev=np.sqrt(2.0 / fan_out), dtype=dtype)
Example #13
Source File: efficientnet_model.py From R3Det_Tensorflow with MIT License | 6 votes |
def conv_kernel_initializer(shape, dtype=None, partition_info=None): """Initialization for convolutional kernels. The main difference with tf.variance_scaling_initializer is that tf.variance_scaling_initializer uses a truncated normal with an uncorrected standard deviation, whereas here we use a normal distribution. Similarly, tf.initializers.variance_scaling uses a truncated normal with a corrected standard deviation. Args: shape: shape of variable dtype: dtype of variable partition_info: unused Returns: an initialization for the variable """ del partition_info kernel_height, kernel_width, _, out_filters = shape fan_out = int(kernel_height * kernel_width * out_filters) return tf.random_normal( shape, mean=0.0, stddev=np.sqrt(2.0 / fan_out), dtype=dtype)
Example #14
Source File: resnet_model.py From benchmarks with The Unlicense | 6 votes |
def resnet_backbone(image, num_blocks, group_func, block_func): """ Sec 5.1: We adopt the initialization of [15] for all convolutional layers. TensorFlow does not have the true "MSRA init". We use variance_scaling as an approximation. """ with argscope(Conv2D, use_bias=False, kernel_initializer=tf.variance_scaling_initializer(scale=2.0, mode='fan_out')): l = Conv2D('conv0', image, 64, 7, strides=2, activation=BNReLU) l = MaxPooling('pool0', l, pool_size=3, strides=2, padding='SAME') l = group_func('group0', l, block_func, 64, num_blocks[0], 1) l = group_func('group1', l, block_func, 128, num_blocks[1], 2) l = group_func('group2', l, block_func, 256, num_blocks[2], 2) l = group_func('group3', l, block_func, 512, num_blocks[3], 2) l = GlobalAvgPooling('gap', l) logits = FullyConnected('linear', l, 1000, kernel_initializer=tf.random_normal_initializer(stddev=0.01)) """ Sec 5.1: The 1000-way fully-connected layer is initialized by drawing weights from a zero-mean Gaussian with standard deviation of 0.01. """ return logits
Example #15
Source File: mixnet_model.py From tpu_models with Apache License 2.0 | 6 votes |
def conv_kernel_initializer(shape, dtype=None, partition_info=None): """Initialization for convolutional kernels. The main difference with tf.variance_scaling_initializer is that tf.variance_scaling_initializer uses a truncated normal with an uncorrected standard deviation, whereas here we use a normal distribution. Similarly, tf.contrib.layers.variance_scaling_initializer uses a truncated normal with a corrected standard deviation. Args: shape: shape of variable dtype: dtype of variable partition_info: unused Returns: an initialization for the variable """ del partition_info kernel_height, kernel_width, _, out_filters = shape fan_out = int(kernel_height * kernel_width * out_filters) return tf.random_normal( shape, mean=0.0, stddev=np.sqrt(2.0 / fan_out), dtype=dtype)
Example #16
Source File: efficientnet_model.py From tpu_models with Apache License 2.0 | 6 votes |
def dense_kernel_initializer(shape, dtype=None, partition_info=None): """Initialization for dense kernels. This initialization is equal to tf.variance_scaling_initializer(scale=1.0/3.0, mode='fan_out', distribution='uniform'). It is written out explicitly here for clarity. Args: shape: shape of variable dtype: dtype of variable partition_info: unused Returns: an initialization for the variable """ del partition_info init_range = 1.0 / np.sqrt(shape[1]) return tf.random_uniform(shape, -init_range, init_range, dtype=dtype)
Example #17
Source File: models.py From Object_Detection_Tracking with Apache License 2.0 | 6 votes |
def small_object_classification_head( self, feature, num_class, scope="small_object_classification"): config = self.config dim = config.fpn_frcnn_fc_head_dim # 1024 initializer = tf.variance_scaling_initializer() with tf.variable_scope(scope): hidden = dense( feature, dim, W_init=initializer, activation=tf.nn.relu, scope="fc6") hidden = dense( hidden, dim, W_init=initializer, activation=tf.nn.relu, scope="fc7") classification = dense( hidden, num_class, W_init=tf.random_normal_initializer(stddev=0.01), scope="class") # [K,num_class] return classification # feature: [K,C,7,7] # feature for each roi
Example #18
Source File: efficientnet_model.py From tpu_models with Apache License 2.0 | 6 votes |
def conv_kernel_initializer(shape, dtype=None, partition_info=None): """Initialization for convolutional kernels. The main difference with tf.variance_scaling_initializer is that tf.variance_scaling_initializer uses a truncated normal with an uncorrected standard deviation, whereas here we use a normal distribution. Similarly, tf.contrib.layers.variance_scaling_initializer uses a truncated normal with a corrected standard deviation. Args: shape: shape of variable dtype: dtype of variable partition_info: unused Returns: an initialization for the variable """ del partition_info kernel_height, kernel_width, _, out_filters = shape fan_out = int(kernel_height * kernel_width * out_filters) return tf.random_normal( shape, mean=0.0, stddev=np.sqrt(2.0 / fan_out), dtype=dtype)
Example #19
Source File: resnet.py From tpu_models with Apache License 2.0 | 6 votes |
def conv2d_fixed_padding(self, inputs, filters, kernel_size, strides): """Strided 2-D convolution with explicit padding. The padding is consistent and is based only on `kernel_size`, not on the dimensions of `inputs` (as opposed to using `tf.layers.conv2d` alone). Args: inputs: `Tensor` of size `[batch, channels, height_in, width_in]`. filters: `int` number of filters in the convolution. kernel_size: `int` size of the kernel to be used in the convolution. strides: `int` strides of the convolution. Returns: A `Tensor` of shape `[batch, filters, height_out, width_out]`. """ if strides > 1: inputs = self.fixed_padding(inputs, kernel_size) return tf.layers.conv2d( inputs=inputs, filters=filters, kernel_size=kernel_size, strides=strides, padding=('SAME' if strides == 1 else 'VALID'), use_bias=False, kernel_initializer=tf.variance_scaling_initializer(), data_format=self._data_format)
Example #20
Source File: efficientnet_model.py From tpu_models with Apache License 2.0 | 6 votes |
def dense_kernel_initializer(shape, dtype=None, partition_info=None): """Initialization for dense kernels. This initialization is equal to tf.variance_scaling_initializer(scale=1.0/3.0, mode='fan_out', distribution='uniform'). It is written out explicitly here for clarity. Args: shape: shape of variable dtype: dtype of variable partition_info: unused Returns: an initialization for the variable """ del partition_info init_range = 1.0 / np.sqrt(shape[1]) return tf.random_uniform(shape, -init_range, init_range, dtype=dtype)
Example #21
Source File: efficientnet_model.py From tpu_models with Apache License 2.0 | 6 votes |
def conv_kernel_initializer(shape, dtype=None, partition_info=None): """Initialization for convolutional kernels. The main difference with tf.variance_scaling_initializer is that tf.variance_scaling_initializer uses a truncated normal with an uncorrected standard deviation, whereas here we use a normal distribution. Similarly, tf.contrib.layers.variance_scaling_initializer uses a truncated normal with a corrected standard deviation. Args: shape: shape of variable dtype: dtype of variable partition_info: unused Returns: an initialization for the variable """ del partition_info kernel_height, kernel_width, _, out_filters = shape fan_out = int(kernel_height * kernel_width * out_filters) return tf.random_normal( shape, mean=0.0, stddev=np.sqrt(2.0 / fan_out), dtype=dtype)
Example #22
Source File: trainer.py From THUMT with BSD 3-Clause "New" or "Revised" License | 6 votes |
def get_initializer(params): if params.initializer == "uniform": max_val = params.initializer_gain return tf.random_uniform_initializer(-max_val, max_val) elif params.initializer == "normal": return tf.random_normal_initializer(0.0, params.initializer_gain) elif params.initializer == "normal_unit_scaling": return tf.variance_scaling_initializer(params.initializer_gain, mode="fan_avg", distribution="normal") elif params.initializer == "uniform_unit_scaling": return tf.variance_scaling_initializer(params.initializer_gain, mode="fan_avg", distribution="uniform") else: raise ValueError("Unrecognized initializer: %s" % params.initializer)
Example #23
Source File: efficientnet_model.py From R3Det_Tensorflow with MIT License | 6 votes |
def dense_kernel_initializer(shape, dtype=None, partition_info=None): """Initialization for dense kernels. This initialization is equal to tf.variance_scaling_initializer(scale=1.0/3.0, mode='fan_out', distribution='uniform'). It is written out explicitly here for clarity. Args: shape: shape of variable dtype: dtype of variable partition_info: unused Returns: an initialization for the variable """ del partition_info init_range = 1.0 / np.sqrt(shape[1]) return tf.random_uniform(shape, -init_range, init_range, dtype=dtype)
Example #24
Source File: densenet_model.py From tpu_models with Apache License 2.0 | 5 votes |
def conv(image, filters, strides=1, kernel_size=3): """Convolution with default options from the densenet paper.""" # Use initialization from https://arxiv.org/pdf/1502.01852.pdf return tf.layers.conv2d( inputs=image, filters=filters, kernel_size=kernel_size, strides=strides, activation=tf.identity, use_bias=False, padding="same", kernel_initializer=tf.variance_scaling_initializer(), )
Example #25
Source File: resnet.py From tpu_models with Apache License 2.0 | 5 votes |
def conv2d_fixed_padding(inputs, filters, kernel_size, strides, data_format='channels_last'): """Strided 2-D convolution with explicit padding. The padding is consistent and is based only on `kernel_size`, not on the dimensions of `inputs` (as opposed to using `tf.layers.conv2d` alone). Args: inputs: `Tensor` of size `[batch, channels, height_in, width_in]`. filters: `int` number of filters in the convolution. kernel_size: `int` size of the kernel to be used in the convolution. strides: `int` strides of the convolution. data_format: `str` either "channels_first" for `[batch, channels, height, width]` or "channels_last for `[batch, height, width, channels]`. Returns: A `Tensor` of shape `[batch, filters, height_out, width_out]`. """ if strides > 1: inputs = spatial_transform_ops.fixed_padding(inputs, kernel_size, data_format=data_format) return tf.layers.conv2d( inputs=inputs, filters=filters, kernel_size=kernel_size, strides=strides, padding=('SAME' if strides == 1 else 'VALID'), use_bias=False, kernel_initializer=tf.variance_scaling_initializer(), data_format=data_format)
Example #26
Source File: layer.py From bonnet with GNU General Public License v3.0 | 5 votes |
def weight_variable(shape, train): print("W: ", shape, "Train:", train) return tf.get_variable("w", shape=shape, initializer=tf.variance_scaling_initializer, trainable=train)
Example #27
Source File: layer.py From bonnet with GNU General Public License v3.0 | 5 votes |
def upsample_layer(input_tensor, train, upsample_factor=2, kernels=-1, data_format="NCHW"): """Builds a full conv layer, with variables and relu Args: input_tensor: input tensor upsample_factor: how much to upsample kernels: -1 = same as input, otherwise number of kernels to upsample data_format: Self explanatory Returns: output: Output tensor from the upsampling """ if data_format == "NCHW": # depth of previous layer feature map prev_depth = input_tensor.get_shape().as_list()[1] else: # depth of previous layer feature map prev_depth = input_tensor.get_shape().as_list()[3] if kernels < 0: kernel_nr = prev_depth else: kernel_nr = kernels with tf.variable_scope('upconv'): output = tf.contrib.layers.conv2d_transpose(input_tensor, kernel_nr, (2, 2), stride=2, padding='VALID', data_format=data_format, activation_fn=tf.nn.relu, weights_initializer=tf.variance_scaling_initializer, weights_regularizer=None, trainable=train) print("W: ", [2, 2, prev_depth, kernel_nr], "Train:", train) return output
Example #28
Source File: attention.py From nlp_research with MIT License | 5 votes |
def additive_attention(a, b, a_lengths, b_lengths, max_seq_len, hidden_units=150, scope='additive-attention', reuse=False): with tf.variable_scope(scope, reuse=reuse): a = tf.expand_dims(a, 2) b = tf.expand_dims(b, 1) v = tf.get_variable( name='dot_weights', initializer=tf.variance_scaling_initializer(), shape=[hidden_units] ) logits = tf.einsum('ijkl,l->ijk', tf.nn.tanh(a + b), v) logits = logits - tf.expand_dims(tf.reduce_max(logits, axis=2), 2) attn = tf.exp(logits) attn = mask_attention_weights(attn, a_lengths, b_lengths, max_seq_len) return attn / tf.expand_dims(tf.reduce_sum(attn, axis=2) + 1e-10, 2)
Example #29
Source File: sequence_cnn_encoder.py From neuralmonkey with BSD 3-Clause "New" or "Revised" License | 5 votes |
def output(self) -> tf.Tensor: pooled_outputs = [] for filter_size, num_filters in self.filters: with tf.variable_scope("conv-maxpool-%s" % filter_size): # Convolution Layer filter_shape = [filter_size, self.embedding_size, num_filters] w_filter = get_variable( "conv_W", filter_shape, initializer=tf.variance_scaling_initializer( mode="fan_avg", distribution="uniform")) b_filter = get_variable( "conv_bias", [num_filters], initializer=tf.zeros_initializer()) conv = tf.nn.conv1d( self.embedded_inputs, w_filter, stride=1, padding="VALID", name="conv") # Apply nonlinearity conv_relu = tf.nn.relu(tf.nn.bias_add(conv, b_filter)) # Max-pooling over the outputs pooled = tf.reduce_max(conv_relu, 1) pooled_outputs.append(pooled) # Combine all the pooled features return tf.concat(pooled_outputs, axis=1)
Example #30
Source File: transformer.py From neuralmonkey with BSD 3-Clause "New" or "Revised" License | 5 votes |
def modality_matrix(self) -> tf.Tensor: """Create an embedding matrix for varyining target modalities. Used to embed different target space modalities in the tensor2tensor models (e.g. during the zero-shot translation). """ emb_size = self.input_sequence.temporal_states.shape.as_list()[-1] return get_variable( name="target_modality_embedding_matrix", shape=[32, emb_size], dtype=tf.float32, initializer=tf.variance_scaling_initializer( mode="fan_avg", distribution="uniform"))