Python model_utils.FramePooling() Examples
The following are 3
code examples of model_utils.FramePooling().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
model_utils
, or try the search function
.
Example #1
Source File: frame_level_models.py From Youtube-8M-WILLOW with Apache License 2.0 | 4 votes |
def forward(self, reshaped_input): feature_size = self.feature_size cluster_size = self.cluster_size add_batch_norm = self.add_batch_norm max_frames = self.max_frames is_training = self.is_training cluster_weights = tf.get_variable("cluster_weights", [feature_size, cluster_size], initializer = tf.random_normal_initializer(stddev=1 / math.sqrt(feature_size))) tf.summary.histogram("cluster_weights", cluster_weights) activation = tf.matmul(reshaped_input, cluster_weights) if add_batch_norm: activation = slim.batch_norm( activation, center=True, scale=True, is_training=is_training, scope="cluster_bn") else: cluster_biases = tf.get_variable("cluster_biases", [cluster_size], initializer = tf.random_normal(stddev=1 / math.sqrt(feature_size))) tf.summary.histogram("cluster_biases", cluster_biases) activation += cluster_biases if activation == 'glu': space_ind = range(cluster_size/2) gate_ind = range(cluster_size/2,cluster_size) gates = tf.sigmoid(activation[:,gate_ind]) activation = tf.multiply(activation[:,space_ind],gates) elif activation == 'relu': activation = tf.nn.relu6(activation) tf.summary.histogram("cluster_output", activation) activation = tf.reshape(activation, [-1, max_frames, cluster_size]) avg_activation = utils.FramePooling(activation, 'average') avg_activation = tf.nn.l2_normalize(avg_activation,1) max_activation = utils.FramePooling(activation, 'max') max_activation = tf.nn.l2_normalize(max_activation,1) return tf.concat([avg_activation,max_activation],1)
Example #2
Source File: xp_frame_level_models.py From Y8M with Apache License 2.0 | 4 votes |
def create_model(self, model_input, vocab_size, num_frames, l2_penalty=1e-4, **unused_params): """Creates a model which uses a logistic classifier over the average of the frame-level features. This class is intended to be an example for implementors of frame level models. If you want to train a model over averaged features it is more efficient to average them beforehand rather than on the fly. Args: model_input: A 'batch_size' x 'max_frames' x 'num_features' matrix of input features. vocab_size: The number of classes in the dataset. num_frames: A vector of length 'batch' which indicates the number of frames for each video (before padding). Returns: A dictionary with a tensor containing the probability predictions of the model in the 'predictions' key. The dimensions of the tensor are 'batch_size' x 'num_classes'. """ # num_frames = tf.cast(tf.expand_dims(num_frames, 1), tf.float32) # feature_size = model_input.get_shape().as_list()[2] # # logging.info('model_input shape: {}'.format( # model_input.get_shape().as_list())) # # denominators = tf.reshape( # tf.tile(num_frames, [1, feature_size]), [-1, feature_size]) # avg_pooled = tf.reduce_sum(model_input, axis=[1]) / denominators avg_pooled = utils.FramePooling(model_input, 'average') logging.info( 'avg_pooled shape: {}'.format( avg_pooled.get_shape().as_list() )) aggregated_model = getattr(video_level_models, FLAGS.video_level_classifier_model) return aggregated_model().create_model( model_input=avg_pooled, vocab_size=vocab_size, num_mixtures=2, **unused_params)
Example #3
Source File: xp_frame_level_models.py From Y8M with Apache License 2.0 | 4 votes |
def create_model(self, model_input, vocab_size, num_frames, l2_penalty=1e-4, **unused_params): """Creates a model which uses a logistic classifier over the average of the frame-level features. This class is intended to be an example for implementors of frame level models. If you want to train a model over averaged features it is more efficient to average them beforehand rather than on the fly. Args: model_input: A 'batch_size' x 'max_frames' x 'num_features' matrix of input features. vocab_size: The number of classes in the dataset. num_frames: A vector of length 'batch' which indicates the number of frames for each video (before padding). Returns: A dictionary with a tensor containing the probability predictions of the model in the 'predictions' key. The dimensions of the tensor are 'batch_size' x 'num_classes'. """ # num_frames = tf.cast(tf.expand_dims(num_frames, 1), tf.float32) # feature_size = model_input.get_shape().as_list()[2] # # logging.info('model_input shape: {}'.format( # model_input.get_shape().as_list())) # # denominators = tf.reshape( # tf.tile(num_frames, [1, feature_size]), [-1, feature_size]) # avg_pooled = tf.reduce_sum(model_input, axis=[1]) / denominators avg_pooled = utils.FramePooling(model_input, 'average') logging.info( 'avg_pooled shape: {}'.format( avg_pooled.get_shape().as_list() )) aggregated_model = getattr(video_level_models, FLAGS.video_level_classifier_model) return aggregated_model().create_model( model_input=avg_pooled, vocab_size=vocab_size, num_mixtures=2, **unused_params)