Python model_utils.SampleRandomFrames() Examples
The following are 2
code examples of model_utils.SampleRandomFrames().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
model_utils
, or try the search function
.
Example #1
Source File: frame_level_models.py From Youtube-8M-WILLOW with Apache License 2.0 | 4 votes |
def create_model(self, model_input, vocab_size, num_frames, is_training=True, **unused_params): """Creates a model which uses a stack of LSTMs to represent the video. Args: model_input: A 'batch_size' x 'max_frames' x 'num_features' matrix of input features. vocab_size: The number of classes in the dataset. num_frames: A vector of length 'batch' which indicates the number of frames for each video (before padding). Returns: A dictionary with a tensor containing the probability predictions of the model in the 'predictions' key. The dimensions of the tensor are 'batch_size' x 'num_classes'. """ lstm_size = FLAGS.lstm_cells number_of_layers = FLAGS.lstm_layers random_frames = FLAGS.lstm_random_sequence iterations = FLAGS.iterations backward = FLAGS.lstm_backward if random_frames: num_frames_2 = tf.cast(tf.expand_dims(num_frames, 1), tf.float32) model_input = utils.SampleRandomFrames(model_input, num_frames_2, iterations) if backward: model_input = tf.reverse_sequence(model_input, num_frames, seq_axis=1) stacked_lstm = tf.contrib.rnn.MultiRNNCell( [ tf.contrib.rnn.BasicLSTMCell( lstm_size, forget_bias=1.0, state_is_tuple=False) for _ in range(number_of_layers) ], state_is_tuple=False) loss = 0.0 with tf.variable_scope("RNN"): outputs, state = tf.nn.dynamic_rnn(stacked_lstm, model_input, sequence_length=num_frames, dtype=tf.float32) aggregated_model = getattr(video_level_models, FLAGS.video_level_classifier_model) return aggregated_model().create_model( model_input=state, vocab_size=vocab_size, is_training=is_training, **unused_params)
Example #2
Source File: frame_level_models.py From Youtube-8M-WILLOW with Apache License 2.0 | 4 votes |
def create_model(self, model_input, vocab_size, num_frames, is_training=True, **unused_params): """Creates a model which uses a stack of GRUs to represent the video. Args: model_input: A 'batch_size' x 'max_frames' x 'num_features' matrix of input features. vocab_size: The number of classes in the dataset. num_frames: A vector of length 'batch' which indicates the number of frames for each video (before padding). Returns: A dictionary with a tensor containing the probability predictions of the model in the 'predictions' key. The dimensions of the tensor are 'batch_size' x 'num_classes'. """ gru_size = FLAGS.gru_cells number_of_layers = FLAGS.gru_layers backward = FLAGS.gru_backward random_frames = FLAGS.gru_random_sequence iterations = FLAGS.iterations if random_frames: num_frames_2 = tf.cast(tf.expand_dims(num_frames, 1), tf.float32) model_input = utils.SampleRandomFrames(model_input, num_frames_2, iterations) if backward: model_input = tf.reverse_sequence(model_input, num_frames, seq_axis=1) stacked_GRU = tf.contrib.rnn.MultiRNNCell( [ tf.contrib.rnn.GRUCell(gru_size) for _ in range(number_of_layers) ], state_is_tuple=False) loss = 0.0 with tf.variable_scope("RNN"): outputs, state = tf.nn.dynamic_rnn(stacked_GRU, model_input, sequence_length=num_frames, dtype=tf.float32) aggregated_model = getattr(video_level_models, FLAGS.video_level_classifier_model) return aggregated_model().create_model( model_input=state, vocab_size=vocab_size, is_training=is_training, **unused_params)