Python tensorflow.flags.FLAGS Examples

The following are 30 code examples of tensorflow.flags.FLAGS(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module tensorflow.flags , or try the search function .
Example #1
Source File: train.py    From youtube-8m with Apache License 2.0 6 votes vote down vote up
def get_input_data_tensors(reader,
                           data_pattern,
                           batch_size=256,
                           num_epochs=None):
  logging.info("Using batch size of " + str(batch_size) + " for training.")
  with tf.name_scope("train_input"):
    files = gfile.Glob(data_pattern)
    if not files:
      raise IOError("Unable to find training files. data_pattern='" +
                    data_pattern + "'.")
    logging.info("Number of training files: %s.", str(len(files)))
    files.sort()
    filename_queue = tf.train.string_input_producer(
        files, num_epochs=num_epochs, shuffle=False)
    training_data = reader.prepare_reader(filename_queue)

    return tf.train.batch(
        training_data,
        batch_size=batch_size,
        capacity=FLAGS.batch_size * 4,
        allow_smaller_final_batch=True,
        enqueue_many=True) 
Example #2
Source File: check_video_id_match.py    From youtube-8m with Apache License 2.0 6 votes vote down vote up
def get_input_evaluation_tensors(reader,
                                 data_pattern,
                                 batch_size=256):
  logging.info("Using batch size of " + str(batch_size) + " for evaluation.")
  with tf.name_scope("eval_input"):
    files = gfile.Glob(data_pattern)
    if not files:
      print data_pattern, files
      raise IOError("Unable to find the evaluation files.")
    logging.info("number of evaluation files: " + str(len(files)))
    files.sort()
    filename_queue = tf.train.string_input_producer(
        files, shuffle=False, num_epochs=1)
    eval_data = reader.prepare_reader(filename_queue)
    return tf.train.batch(
        eval_data,
        batch_size=batch_size,
        capacity=3 * FLAGS.batch_size,
        allow_smaller_final_batch=True,
        enqueue_many=True) 
Example #3
Source File: inference-pre-ensemble.py    From youtube-8m with Apache License 2.0 6 votes vote down vote up
def main(unused_argv):
  logging.set_verbosity(tf.logging.INFO)

  # convert feature_names and feature_sizes to lists of values
  feature_names, feature_sizes = utils.GetListOfFeatureNamesAndSizes(
      FLAGS.feature_names, FLAGS.feature_sizes)

  if FLAGS.frame_features:
    reader = readers.YT8MFrameFeatureReader(feature_names=feature_names,
                                            feature_sizes=feature_sizes)
  else:
    reader = readers.YT8MAggregatedFeatureReader(feature_names=feature_names,
                                                 feature_sizes=feature_sizes)

  if FLAGS.output_dir is "":
    raise ValueError("'output_dir' was not specified. "
      "Unable to continue with inference.")

  if FLAGS.input_data_pattern is "":
    raise ValueError("'input_data_pattern' was not specified. "
      "Unable to continue with inference.")

  inference(reader, FLAGS.model_checkpoint_path, FLAGS.input_data_pattern,
      FLAGS.output_dir, FLAGS.batch_size, FLAGS.top_k) 
Example #4
Source File: losses.py    From youtube-8m with Apache License 2.0 6 votes vote down vote up
def calculate_loss_mix2(self, predictions, predictions_class, predictions_encoder, labels, **unused_params):
    with tf.name_scope("loss_mix2"):
      float_labels = tf.cast(labels, tf.float32)
      float_encoders = float_labels
      for i in range(FLAGS.encoder_layers):
        var_i = np.loadtxt(FLAGS.autoencoder_dir+'autoencoder_layer%d.model' % i)
        weight_i = tf.constant(var_i[:-1,:],dtype=tf.float32)
        bias_i = tf.reshape(tf.constant(var_i[-1,:],dtype=tf.float32),[-1])
        float_encoders = tf.nn.xw_plus_b(float_encoders,weight_i,bias_i)
        if i<FLAGS.encoder_layers-1:
          float_encoders = tf.nn.relu(float_encoders)
        else:
          hidden_mean = tf.reduce_mean(float_encoders,axis=1,keep_dims=True)
          hidden_std = tf.sqrt(tf.reduce_mean(tf.square(float_encoders-hidden_mean),axis=1,keep_dims=True))
          float_encoders = (float_encoders-hidden_mean)/(hidden_std+1e-6)
          #float_encoders = tf.nn.sigmoid(float_encoders)
      cross_entropy_encoder = 0.1*self.calculate_mseloss(predictions_encoder,float_encoders)
      cross_entropy_loss = self.calculate_loss(predictions,labels)
      return cross_entropy_encoder+cross_entropy_loss, float_encoders
      #return cross_entropy_encoder, float_encoders 
Example #5
Source File: losses.py    From youtube-8m with Apache License 2.0 6 votes vote down vote up
def calculate_loss(self, predictions, labels, **unused_params):
    bound = FLAGS.softmax_bound
    vocab_size_1 = bound
    with tf.name_scope("loss_softmax"):
      epsilon = 10e-8
      float_labels = tf.cast(labels, tf.float32)
      labels_1 = float_labels[:,:vocab_size_1]
      predictions_1 = predictions[:,:vocab_size_1]
      cross_entropy_loss = CrossEntropyLoss().calculate_loss(predictions_1,labels_1)
      lables_2 = float_labels[:,vocab_size_1:]
      predictions_2 = predictions[:,vocab_size_1:]
      # l1 normalization (labels are no less than 0)
      label_rowsum = tf.maximum(
          tf.reduce_sum(lables_2, 1, keep_dims=True),
          epsilon)
      label_append = 1.0-tf.reduce_max(lables_2, 1, keep_dims=True)
      norm_float_labels = tf.concat((tf.div(lables_2, label_rowsum),label_append),axis=1)
      predictions_append = 1.0-tf.reduce_sum(predictions_2, 1, keep_dims=True)
      softmax_outputs = tf.concat((predictions_2,predictions_append),axis=1)
      softmax_loss = norm_float_labels * tf.log(softmax_outputs + epsilon) + (
          1 - norm_float_labels) * tf.log(1 - softmax_outputs + epsilon)
      softmax_loss = tf.negative(tf.reduce_sum(softmax_loss, 1))
    return tf.reduce_mean(softmax_loss) + cross_entropy_loss 
Example #6
Source File: losses.py    From youtube-8m with Apache License 2.0 6 votes vote down vote up
def calculate_loss(self, predictions, labels, weights=None, **unused_params):
    with tf.name_scope("loss_xent"):
      epsilon = 10e-6
      if FLAGS.label_smoothing:
        float_labels = smoothing(labels)
      else:
        float_labels = tf.cast(labels, tf.float32)
      cross_entropy_loss = float_labels * tf.log(predictions + epsilon) + (
          1 - float_labels) * tf.log(1 - predictions + epsilon)
      cross_entropy_loss = tf.negative(cross_entropy_loss)
      if weights is not None:
        print cross_entropy_loss, weights
        weighted_loss = tf.einsum("ij,i->ij", cross_entropy_loss, weights)
        print "create weighted_loss", weighted_loss
        return tf.reduce_mean(tf.reduce_sum(weighted_loss, 1))
      else:
        return tf.reduce_mean(tf.reduce_sum(cross_entropy_loss, 1)) 
Example #7
Source File: inference.py    From youtube-8m with Apache License 2.0 6 votes vote down vote up
def main(unused_argv):
  logging.set_verbosity(tf.logging.INFO)

  # convert feature_names and feature_sizes to lists of values
  feature_names, feature_sizes = utils.GetListOfFeatureNamesAndSizes(
      FLAGS.feature_names, FLAGS.feature_sizes)

  if FLAGS.frame_features:
    reader = readers.YT8MFrameFeatureReader(feature_names=feature_names,
                                            feature_sizes=feature_sizes)
  else:
    reader = readers.YT8MAggregatedFeatureReader(feature_names=feature_names,
                                                 feature_sizes=feature_sizes)

  if FLAGS.output_file is "":
    raise ValueError("'output_file' was not specified. "
      "Unable to continue with inference.")

  if FLAGS.input_data_pattern is "":
    raise ValueError("'input_data_pattern' was not specified. "
      "Unable to continue with inference.")

  inference(reader, FLAGS.train_dir, FLAGS.input_data_pattern,
    FLAGS.output_file, FLAGS.batch_size, FLAGS.top_k) 
Example #8
Source File: inference_autoencoder.py    From youtube-8m with Apache License 2.0 6 votes vote down vote up
def main(unused_argv):
  logging.set_verbosity(tf.logging.INFO)

  # convert feature_names and feature_sizes to lists of values
  feature_names, feature_sizes = utils.GetListOfFeatureNamesAndSizes(
      FLAGS.feature_names, FLAGS.feature_sizes)

  if FLAGS.frame_features:
    reader = readers.YT8MFrameFeatureReader(feature_names=feature_names,
                                            feature_sizes=feature_sizes)
  else:
    reader = readers.YT8MAggregatedFeatureReader(feature_names=feature_names,
                                                 feature_sizes=feature_sizes)

  if FLAGS.output_file is "":
    raise ValueError("'output_file' was not specified. "
      "Unable to continue with inference.")

  if FLAGS.input_data_pattern is "":
    raise ValueError("'input_data_pattern' was not specified. "
      "Unable to continue with inference.")

  inference(reader, FLAGS.train_dir, FLAGS.input_data_pattern,
    FLAGS.output_file, FLAGS.batch_size, FLAGS.top_k) 
Example #9
Source File: inference_test.py    From youtube-8m with Apache License 2.0 6 votes vote down vote up
def main(unused_argv):
  logging.set_verbosity(tf.logging.INFO)

  # convert feature_names and feature_sizes to lists of values
  feature_names, feature_sizes = utils.GetListOfFeatureNamesAndSizes(
      FLAGS.feature_names, FLAGS.feature_sizes)

  if FLAGS.frame_features:
    reader = readers.YT8MFrameFeatureReader(feature_names=feature_names,
                                            feature_sizes=feature_sizes)
  else:
    reader = readers.YT8MAggregatedFeatureReader(feature_names=feature_names,
                                                 feature_sizes=feature_sizes)

  if FLAGS.output_file is "":
    raise ValueError("'output_file' was not specified. "
      "Unable to continue with inference.")

  if FLAGS.input_data_pattern is "":
    raise ValueError("'input_data_pattern' was not specified. "
      "Unable to continue with inference.")

  inference(reader, FLAGS.train_dir, FLAGS.input_data_pattern,
    FLAGS.output_file, FLAGS.batch_size, FLAGS.top_k) 
Example #10
Source File: labels_rbm.py    From youtube-8m with Apache License 2.0 6 votes vote down vote up
def create_model(self, model_input, vocab_size, l2_penalty=1e-8, **unused_params):
        """Creates a logistic model.

        Args:
          model_input: 'batch' x 'num_features' matrix of input features.
          vocab_size: The number of classes in the dataset.

        Returns:
          A dictionary with a tensor containing the probability predictions of the
          model in the 'predictions' key. The dimensions of the tensor are
          batch_size x num_classes."""

        input_size = vocab_size
        output_size = FLAGS.hidden_size
        with tf.name_scope("rbm"):
            self.weights = tf.Variable(tf.truncated_normal([input_size, output_size],
                                    stddev=1.0 / math.sqrt(float(input_size))), name="weights")
            self.v_bias = tf.Variable(tf.zeros([input_size]), name="v_bias")
            self.h_bias = tf.Variable(tf.zeros([output_size]), name="h_bias")
            tf.add_to_collection(name=tf.GraphKeys.REGULARIZATION_LOSSES, value=l2_penalty*tf.nn.l2_loss(self.weights))
            tf.add_to_collection(name=tf.GraphKeys.REGULARIZATION_LOSSES, value=l2_penalty*tf.nn.l2_loss(self.v_bias))
            tf.add_to_collection(name=tf.GraphKeys.REGULARIZATION_LOSSES, value=l2_penalty*tf.nn.l2_loss(self.h_bias)) 
Example #11
Source File: train.py    From Youtube-8M-WILLOW with Apache License 2.0 6 votes vote down vote up
def build_model(self, model, reader):
    """Find the model and build the graph."""

    label_loss_fn = find_class_by_name(FLAGS.label_loss, [losses])()
    optimizer_class = find_class_by_name(FLAGS.optimizer, [tf.train])
  
    build_graph(reader=reader,
                 model=model,
                 optimizer_class=optimizer_class,
                 clip_gradient_norm=FLAGS.clip_gradient_norm,
                 train_data_pattern=FLAGS.train_data_pattern,
                 label_loss_fn=label_loss_fn,
                 base_learning_rate=FLAGS.base_learning_rate,
                 learning_rate_decay=FLAGS.learning_rate_decay,
                 learning_rate_decay_examples=FLAGS.learning_rate_decay_examples,
                 regularization_penalty=FLAGS.regularization_penalty,
                 num_readers=FLAGS.num_readers,
                 batch_size=FLAGS.batch_size,
                 num_epochs=FLAGS.num_epochs)
  
    return tf.train.Saver(max_to_keep=0, keep_checkpoint_every_n_hours=5) 
Example #12
Source File: losses_embedding.py    From youtube-8m with Apache License 2.0 6 votes vote down vote up
def calculate_loss(self, predictions, labels, **unused_params):
        bound = FLAGS.softmax_bound
        vocab_size_1 = bound
        with tf.name_scope("loss_softmax"):
            epsilon = 10e-8
            float_labels = tf.cast(labels, tf.float32)
            labels_1 = float_labels[:,:vocab_size_1]
            predictions_1 = predictions[:,:vocab_size_1]
            cross_entropy_loss = CrossEntropyLoss().calculate_loss(predictions_1,labels_1)
            lables_2 = float_labels[:,vocab_size_1:]
            predictions_2 = predictions[:,vocab_size_1:]
            # l1 normalization (labels are no less than 0)
            label_rowsum = tf.maximum(
                tf.reduce_sum(lables_2, 1, keep_dims=True),
                epsilon)
            label_append = 1.0-tf.reduce_max(lables_2, 1, keep_dims=True)
            norm_float_labels = tf.concat((tf.div(lables_2, label_rowsum),label_append),axis=1)
            predictions_append = 1.0-tf.reduce_sum(predictions_2, 1, keep_dims=True)
            softmax_outputs = tf.concat((predictions_2,predictions_append),axis=1)
            softmax_loss = norm_float_labels * tf.log(softmax_outputs + epsilon) + (
                                                                                       1 - norm_float_labels) * tf.log(1 - softmax_outputs + epsilon)
            softmax_loss = tf.negative(tf.reduce_sum(softmax_loss, 1))
        return tf.reduce_mean(softmax_loss) + cross_entropy_loss 
Example #13
Source File: train_autoencoder.py    From youtube-8m with Apache License 2.0 6 votes vote down vote up
def get_forward_parameters(vocab_size=4716):
    t_vars = tf.trainable_variables()
    h1_vars_weight = [var for var in t_vars if 'hidden_1' in var.name and 'weights' in var.name]
    h1_vars_biases = [var for var in t_vars if 'hidden_1' in var.name and 'biases' in var.name]
    h2_vars_weight = [var for var in t_vars if 'hidden_2' in var.name and 'weights' in var.name]
    h2_vars_biases = [var for var in t_vars if 'hidden_2' in var.name and 'biases' in var.name]
    o1_vars_weight = [var for var in t_vars if 'output_1' in var.name and 'weights' in var.name]
    o1_vars_biases = [var for var in t_vars if 'output_1' in var.name and 'biases' in var.name]
    o2_vars_weight = [var for var in t_vars if 'output_2' in var.name and 'weights' in var.name]
    o2_vars_biases = [var for var in t_vars if 'output_2' in var.name and 'biases' in var.name]
    h1_vars_biases = tf.reshape(h1_vars_biases[0],[1,FLAGS.hidden_size_1])
    h2_vars_biases = tf.reshape(h2_vars_biases[0],[1,FLAGS.hidden_size_2])
    o1_vars_biases = tf.reshape(o1_vars_biases[0],[1,FLAGS.hidden_size_1])
    o2_vars_biases = tf.reshape(o2_vars_biases[0],[1,vocab_size])
    vars_1 = tf.concat((h1_vars_weight[0],h1_vars_biases),axis=0)
    vars_2 = tf.concat((h2_vars_weight[0],h2_vars_biases),axis=0)
    vars_3 = tf.concat((o1_vars_weight[0],o1_vars_biases),axis=0)
    vars_4 = tf.concat((o2_vars_weight[0],o2_vars_biases),axis=0)
    return [vars_1,vars_2,vars_3,vars_4] 
Example #14
Source File: inference.py    From Youtube-8M-WILLOW with Apache License 2.0 6 votes vote down vote up
def main(unused_argv):
  logging.set_verbosity(tf.logging.INFO)

  # convert feature_names and feature_sizes to lists of values
  feature_names, feature_sizes = utils.GetListOfFeatureNamesAndSizes(
      FLAGS.feature_names, FLAGS.feature_sizes)

  if FLAGS.frame_features:
    reader = readers.YT8MFrameFeatureReader(feature_names=feature_names,
                                            feature_sizes=feature_sizes)
  else:
    reader = readers.YT8MAggregatedFeatureReader(feature_names=feature_names,
                                                 feature_sizes=feature_sizes)

  if FLAGS.output_file is "":
    raise ValueError("'output_file' was not specified. "
      "Unable to continue with inference.")

  if FLAGS.input_data_pattern is "":
    raise ValueError("'input_data_pattern' was not specified. "
      "Unable to continue with inference.")

  inference(reader, FLAGS.train_dir, FLAGS.input_data_pattern,
    FLAGS.output_file, FLAGS.batch_size, FLAGS.top_k) 
Example #15
Source File: losses.py    From youtube-8m with Apache License 2.0 5 votes vote down vote up
def calculate_loss_negative(self, predictions_pos, predictions_neg, labels, **unused_params):
    with tf.name_scope("loss_negative"):
      epsilon = 10e-6
      float_labels = tf.cast(labels, tf.float32)
      weight_pos = np.loadtxt(FLAGS.autoencoder_dir+"labels_uni.out")
      weight_pos = tf.reshape(tf.cast(weight_pos,dtype=tf.float32),[1,-1])
      weight_pos = tf.log(tf.reduce_max(weight_pos)/weight_pos)+1
      cross_entropy_loss_1 = float_labels * tf.log(predictions_pos + epsilon)*weight_pos + (
          1 - float_labels) * tf.log(1 - predictions_pos + epsilon)
      cross_entropy_loss_2 = (1-float_labels) * tf.log(predictions_neg + epsilon) + \
                             float_labels * tf.log(1 - predictions_neg + epsilon)
      cross_entropy_loss = tf.negative(cross_entropy_loss_1+cross_entropy_loss_2)
      return tf.reduce_mean(tf.reduce_sum(cross_entropy_loss, 1)) 
Example #16
Source File: inference_test.py    From youtube-8m with Apache License 2.0 5 votes vote down vote up
def get_input_data_tensors(reader, data_pattern, batch_size, num_readers=1):
  """Creates the section of the graph which reads the input data.

  Args:
    reader: A class which parses the input data.
    data_pattern: A 'glob' style path to the data files.
    batch_size: How many examples to process at a time.
    num_readers: How many I/O threads to use.

  Returns:
    A tuple containing the features tensor, labels tensor, and optionally a
    tensor containing the number of frames per video. The exact dimensions
    depend on the reader being used.

  Raises:
    IOError: If no files matching the given pattern were found.
  """
  with tf.name_scope("input"):
    files = gfile.Glob(data_pattern)
    if FLAGS.set=="ensamble_validate":
        files = [file for file in files if 'validatea' not in file]
    elif FLAGS.set=="ensamble_train":
        files = [file for file in files if 'validatea' in file]
    files.sort()
    if not files:
      raise IOError("Unable to find input files. data_pattern='" +
                    data_pattern + "'")
    logging.info("number of input files: " + str(len(files)))
    filename_queue = tf.train.string_input_producer(
        files, num_epochs=1, shuffle=False)
    examples_and_labels = [reader.prepare_reader(filename_queue)
                           for _ in range(num_readers)]

    video_id_batch, video_batch, unused_labels, num_frames_batch = (
        tf.train.batch_join(examples_and_labels,
                            batch_size=batch_size,
                            allow_smaller_final_batch = True,
                            enqueue_many=True))
    return video_id_batch, video_batch, unused_labels, num_frames_batch 
Example #17
Source File: train-with-rebuild.py    From youtube-8m with Apache License 2.0 5 votes vote down vote up
def main(unused_argv):
    # Load the environment.
    env = json.loads(os.environ.get("TF_CONFIG", "{}"))

    # Load the cluster data from the environment.
    cluster_data = env.get("cluster", None)
    cluster = tf.train.ClusterSpec(cluster_data) if cluster_data else None

    # Load the task data from the environment.
    task_data = env.get("task", None) or {"type": "master", "index": 0}
    task = type("TaskSpec", (object,), task_data)

    # Logging the version.
    logging.set_verbosity(tf.logging.INFO)
    logging.info("%s: Tensorflow version: %s.",
                 task_as_string(task), tf.__version__)

    # Dispatch to a master, a worker, or a parameter server.
    if not cluster or task.type == "master" or task.type == "worker":
        Trainer(cluster, task, FLAGS.train_dir, FLAGS.log_device_placement).run(
            start_new_model=FLAGS.start_new_model)
    elif task.type == "ps":
        ParameterServer(cluster, task).run()
    else:
        raise ValueError("%s: Invalid task_type: %s." %
                         (task_as_string(task), task.type)) 
Example #18
Source File: inference_with_rebuild.py    From youtube-8m with Apache License 2.0 5 votes vote down vote up
def write_to_record(id_batch, label_batch, predictions, filenum, num_examples_processed):
    writer = tf.python_io.TFRecordWriter(FLAGS.output_dir + '/' + 'predictions-%03d.tfrecord' % filenum)
    for i in range(num_examples_processed):
        video_id = id_batch[i]
        label = np.nonzero(label_batch[i,:])[0]
        example = get_output_feature(video_id, label, [predictions[i,:]], ['predictions'])
        serialized = example.SerializeToString()
        writer.write(serialized)
    writer.close() 
Example #19
Source File: losses_embedding.py    From youtube-8m with Apache License 2.0 5 votes vote down vote up
def calculate_loss(self, predictions, labels, **unused_params):
        with tf.name_scope("loss_xent"):
            epsilon = 10e-6
            origin_labels = tf.cast(labels, tf.float32)
            vocab_size = origin_labels.get_shape().as_list()[1]
            float_labels = tf.tile(tf.reshape(origin_labels,[-1, 1, vocab_size]),[1,FLAGS.top_k,1])
            float_labels = tf.reshape(float_labels,[-1,vocab_size])
            cross_entropy_loss = float_labels * tf.log(predictions + epsilon) + (
                1 - float_labels) * tf.log(1 - predictions + epsilon)
            cross_entropy_loss = tf.negative(cross_entropy_loss)
            num_labels = tf.minimum(tf.reduce_sum(origin_labels,axis=1),tf.constant(FLAGS.top_k,dtype=tf.float32))
            mask = tf.reshape(tf.sequence_mask(num_labels,tf.constant(FLAGS.top_k,dtype=tf.float32),dtype=tf.float32),[-1])
            cross_entropy_loss = tf.reduce_sum(tf.reduce_sum(cross_entropy_loss, 1)*mask)/(tf.reduce_sum(mask)+epsilon)

            return cross_entropy_loss 
Example #20
Source File: losses_embedding.py    From youtube-8m with Apache License 2.0 5 votes vote down vote up
def calculate_loss_mix(self, predictions, predictions_class, labels, **unused_params):
        with tf.name_scope("loss_softmax_mix"):
            vocab_size = labels.get_shape().as_list()[1]
            cross_entropy_class = tf.constant(0.0)
            for i in range(FLAGS.moe_layers):
                predictions_subclass = predictions_class[:,i*vocab_size:(i+1)*vocab_size]
                cross_entropy_class = cross_entropy_class + self.calculate_loss(predictions_subclass,labels)
            cross_entropy_loss = self.calculate_loss(predictions,labels)
            return cross_entropy_loss + 0.1*cross_entropy_class 
Example #21
Source File: train_autoencoder.py    From youtube-8m with Apache License 2.0 5 votes vote down vote up
def build_model(self):
    """Find the model and build the graph."""

    # Convert feature_names and feature_sizes to lists of values.
    feature_names, feature_sizes = utils.GetListOfFeatureNamesAndSizes(
        FLAGS.feature_names, FLAGS.feature_sizes)

    if FLAGS.frame_features:
      if FLAGS.frame_only:
          reader = readers.YT8MFrameFeatureOnlyReader(
              feature_names=feature_names, feature_sizes=feature_sizes)
      else:
          reader = readers.YT8MFrameFeatureReader(
              feature_names=feature_names, feature_sizes=feature_sizes)
    else:
      reader = readers.YT8MAggregatedFeatureReader(
          feature_names=feature_names, feature_sizes=feature_sizes)

    # Find the model.
    model = find_class_by_name(FLAGS.model,
                               [labels_autoencoder])()
    label_loss_fn = find_class_by_name(FLAGS.label_loss, [losses])()
    optimizer_class = find_class_by_name(FLAGS.optimizer, [tf.train])

    build_graph(reader=reader,
                 model=model,
                 optimizer_class=optimizer_class,
                 clip_gradient_norm=FLAGS.clip_gradient_norm,
                 train_data_pattern=FLAGS.train_data_pattern,
                 label_loss_fn=label_loss_fn,
                 base_learning_rate=FLAGS.base_learning_rate,
                 learning_rate_decay=FLAGS.learning_rate_decay,
                 learning_rate_decay_examples=FLAGS.learning_rate_decay_examples,
                 regularization_penalty=FLAGS.regularization_penalty,
                 num_readers=FLAGS.num_readers,
                 batch_size=FLAGS.batch_size,
                 num_epochs=FLAGS.num_epochs)

    logging.info("%s: Built graph.", task_as_string(self.task))

    return tf.train.Saver(max_to_keep=2, keep_checkpoint_every_n_hours=0.25) 
Example #22
Source File: train_autoencoder.py    From youtube-8m with Apache License 2.0 5 votes vote down vote up
def main(unused_argv):
  # Load the environment.
  env = json.loads(os.environ.get("TF_CONFIG", "{}"))

  # Load the cluster data from the environment.
  cluster_data = env.get("cluster", None)
  cluster = tf.train.ClusterSpec(cluster_data) if cluster_data else None

  # Load the task data from the environment.
  task_data = env.get("task", None) or {"type": "master", "index": 0}
  task = type("TaskSpec", (object,), task_data)

  # Logging the version.
  logging.set_verbosity(tf.logging.INFO)
  logging.info("%s: Tensorflow version: %s.",
               task_as_string(task), tf.__version__)

  # Dispatch to a master, a worker, or a parameter server.
  if not cluster or task.type == "master" or task.type == "worker":
    Trainer(cluster, task, FLAGS.train_dir, FLAGS.log_device_placement).run(
        start_new_model=FLAGS.start_new_model)
  elif task.type == "ps":
    ParameterServer(cluster, task).run()
  else:
    raise ValueError("%s: Invalid task_type: %s." %
                     (task_as_string(task), task.type)) 
Example #23
Source File: train_embedding.py    From youtube-8m with Apache License 2.0 5 votes vote down vote up
def main(unused_argv):
  # Load the environment.
  env = json.loads(os.environ.get("TF_CONFIG", "{}"))

  # Load the cluster data from the environment.
  cluster_data = env.get("cluster", None)
  cluster = tf.train.ClusterSpec(cluster_data) if cluster_data else None

  # Load the task data from the environment.
  task_data = env.get("task", None) or {"type": "master", "index": 0}
  task = type("TaskSpec", (object,), task_data)

  # Logging the version.
  logging.set_verbosity(tf.logging.INFO)
  logging.info("%s: Tensorflow version: %s.",
               task_as_string(task), tf.__version__)

  # Dispatch to a master, a worker, or a parameter server.
  if not cluster or task.type == "master" or task.type == "worker":
    Trainer(cluster, task, FLAGS.train_dir, FLAGS.log_device_placement).run(
        start_new_model=FLAGS.start_new_model)
  elif task.type == "ps":
    ParameterServer(cluster, task).run()
  else:
    raise ValueError("%s: Invalid task_type: %s." %
                     (task_as_string(task), task.type)) 
Example #24
Source File: losses.py    From youtube-8m with Apache License 2.0 5 votes vote down vote up
def calculate_loss_mix(self, predictions, predictions_class, labels, **unused_params):
    with tf.name_scope("loss_softmax_mix"):
      vocab_size = labels.get_shape().as_list()[1]
      cross_entropy_class = tf.constant(0.0)
      for i in range(FLAGS.moe_layers):
        predictions_subclass = predictions_class[:,i*vocab_size:(i+1)*vocab_size]
        cross_entropy_class = cross_entropy_class + self.calculate_loss(predictions_subclass,labels)
      cross_entropy_loss = self.calculate_loss(predictions,labels)
      return cross_entropy_loss + 0.1*cross_entropy_class 
Example #25
Source File: train.py    From youtube-8m with Apache License 2.0 5 votes vote down vote up
def optional_assign_weights(sess, weights_input, weights_assignment):
  if weights_input is not None:
    weights, length = get_video_weights_array()
    _ = sess.run(weights_assignment, feed_dict={weights_input: weights})
    print "Assigned weights from %s" % FLAGS.sample_freq_file
  else:
    print "Collection weights_input not found" 
Example #26
Source File: train_ensemble.py    From youtube-8m with Apache License 2.0 5 votes vote down vote up
def main(unused_argv):
  # Load the environment.
  env = json.loads(os.environ.get("TF_CONFIG", "{}"))

  # Load the cluster data from the environment.
  cluster_data = env.get("cluster", None)
  cluster = tf.train.ClusterSpec(cluster_data) if cluster_data else None

  # Load the task data from the environment.
  task_data = env.get("task", None) or {"type": "master", "index": 0}
  task = type("TaskSpec", (object,), task_data)

  # Logging the version.
  logging.set_verbosity(tf.logging.INFO)
  logging.info("%s: Tensorflow version: %s.",
               task_as_string(task), tf.__version__)

  # Dispatch to a master, a worker, or a parameter server.
  if not cluster or task.type == "master" or task.type == "worker":
    Trainer(cluster, task, FLAGS.train_dir, FLAGS.log_device_placement).run(
        start_new_model=FLAGS.start_new_model)
  elif task.type == "ps":
    ParameterServer(cluster, task).run()
  else:
    raise ValueError("%s: Invalid task_type: %s." %
                     (task_as_string(task), task.type)) 
Example #27
Source File: train_ensemble.py    From youtube-8m with Apache License 2.0 5 votes vote down vote up
def get_input_data_tensors(reader,
                                 data_pattern,
                                 batch_size=1024,
                                 num_epochs=None,
                                 num_readers=1):
    """Creates the section of the graph which reads the evaluation data.

    Args:
      reader: A class which parses the training data.
      data_pattern: A 'glob' style path to the data files.
      batch_size: How many examples to process at a time.
      num_readers: How many I/O threads to use.

    Returns:
      A tuple containing the features tensor, labels tensor, and optionally a
      tensor containing the number of frames per video. The exact dimensions
      depend on the reader being used.

    Raises:
      IOError: If no files matching the given pattern were found.
    """
    logging.info("Using batch size of " + str(batch_size) + " for evaluation.")
    with tf.name_scope("eval_input"):
        files = gfile.Glob(data_pattern)
        if not files:
            raise IOError("Unable to find the evaluation files.")
        logging.info("number of evaluation files: " + str(len(files)))
        files.sort()
        filename_queue = tf.train.string_input_producer(
            files, shuffle=False, num_epochs=num_epochs)
        training_data = reader.prepare_reader(filename_queue)
        return tf.train.batch(
            training_data,
            batch_size=batch_size,
            capacity=5 * FLAGS.batch_size,
            allow_smaller_final_batch=True,
            enqueue_many=True) 
Example #28
Source File: train.py    From youtube-8m with Apache License 2.0 5 votes vote down vote up
def main(unused_argv):
  # Load the environment.
  env = json.loads(os.environ.get("TF_CONFIG", "{}"))

  # Load the cluster data from the environment.
  cluster_data = env.get("cluster", None)
  cluster = tf.train.ClusterSpec(cluster_data) if cluster_data else None

  # Load the task data from the environment.
  task_data = env.get("task", None) or {"type": "master", "index": 0}
  task = type("TaskSpec", (object,), task_data)

  # Logging the version.
  logging.set_verbosity(tf.logging.INFO)
  logging.info("%s: Tensorflow version: %s.",
               task_as_string(task), tf.__version__)

  # Dispatch to a master, a worker, or a parameter server.
  if not cluster or task.type == "master" or task.type == "worker":
    Trainer(cluster, task, FLAGS.train_dir, FLAGS.log_device_placement).run(
        start_new_model=FLAGS.start_new_model)
  elif task.type == "ps":
    ParameterServer(cluster, task).run()
  else:
    raise ValueError("%s: Invalid task_type: %s." %
                     (task_as_string(task), task.type)) 
Example #29
Source File: inference-pre-ensemble-distill.py    From youtube-8m with Apache License 2.0 5 votes vote down vote up
def write_to_record(id_batch, label_batch, predictions, filenum, num_examples_processed):
    writer = tf.python_io.TFRecordWriter(FLAGS.output_dir + '/' + 'predictions-%04d.tfrecord' % filenum)
    for i in range(num_examples_processed):
        video_id = id_batch[i]
        label = np.nonzero(label_batch[i,:])[0]
        example = get_output_feature(video_id, label, [predictions[i,:]], ['predictions'])
        serialized = example.SerializeToString()
        writer.write(serialized)
    writer.close() 
Example #30
Source File: labels_embedding.py    From youtube-8m with Apache License 2.0 5 votes vote down vote up
def create_model(self, model_input, vocab_size, l2_penalty=1e-8, **unused_params):
        """Creates a logistic model.

        Args:
          model_input: 'batch' x 'num_features' matrix of input features.
          vocab_size: The number of classes in the dataset.

        Returns:
          A dictionary with a tensor containing the probability predictions of the
          model in the 'predictions' key. The dimensions of the tensor are
          batch_size x num_classes."""
        model_input = tf.cast(model_input,dtype=tf.float32)
        hidden_size = FLAGS.hidden_size

        model_mask, indices_input = tf.nn.top_k(model_input, k=FLAGS.top_k)
        indices_input = tf.reshape(indices_input, [-1])
        models_mask = tf.reshape(model_mask, [-1,FLAGS.top_k,1])
        with tf.name_scope("embedding"):
            embeddings = tf.Variable(
                tf.random_uniform([vocab_size, hidden_size], -1.0, 1.0))
            embed = tf.nn.embedding_lookup(embeddings, indices_input)
            output = slim.fully_connected(
                embed,
                vocab_size,
                activation_fn=tf.nn.sigmoid,
                weights_regularizer=slim.l2_regularizer(l2_penalty),
                scope="output")
        indices_one_hot = tf.one_hot(indices_input, vocab_size)
        output = output * (1 - indices_one_hot) + indices_one_hot
        output_val = tf.reshape(output,[-1,FLAGS.top_k,vocab_size])
        predictions_val = tf.reduce_sum(output_val*models_mask, axis=1)/tf.reduce_sum(models_mask, axis=1)
        return {"predictions": output, "predictions_val": predictions_val}