Python Examples of tensorflow.flags.FLAGS

Source File: train.py From youtube-8m with Apache License 2.0

6 votes

def get_input_data_tensors(reader,
                           data_pattern,
                           batch_size=256,
                           num_epochs=None):
  logging.info("Using batch size of " + str(batch_size) + " for training.")
  with tf.name_scope("train_input"):
    files = gfile.Glob(data_pattern)
    if not files:
      raise IOError("Unable to find training files. data_pattern='" +
                    data_pattern + "'.")
    logging.info("Number of training files: %s.", str(len(files)))
    files.sort()
    filename_queue = tf.train.string_input_producer(
        files, num_epochs=num_epochs, shuffle=False)
    training_data = reader.prepare_reader(filename_queue)

    return tf.train.batch(
        training_data,
        batch_size=batch_size,
        capacity=FLAGS.batch_size * 4,
        allow_smaller_final_batch=True,
        enqueue_many=True)

Source File: check_video_id_match.py From youtube-8m with Apache License 2.0

6 votes

def get_input_evaluation_tensors(reader,
                                 data_pattern,
                                 batch_size=256):
  logging.info("Using batch size of " + str(batch_size) + " for evaluation.")
  with tf.name_scope("eval_input"):
    files = gfile.Glob(data_pattern)
    if not files:
      print data_pattern, files
      raise IOError("Unable to find the evaluation files.")
    logging.info("number of evaluation files: " + str(len(files)))
    files.sort()
    filename_queue = tf.train.string_input_producer(
        files, shuffle=False, num_epochs=1)
    eval_data = reader.prepare_reader(filename_queue)
    return tf.train.batch(
        eval_data,
        batch_size=batch_size,
        capacity=3 * FLAGS.batch_size,
        allow_smaller_final_batch=True,
        enqueue_many=True)

Source File: inference-pre-ensemble.py From youtube-8m with Apache License 2.0

6 votes

def main(unused_argv):
  logging.set_verbosity(tf.logging.INFO)

  # convert feature_names and feature_sizes to lists of values
  feature_names, feature_sizes = utils.GetListOfFeatureNamesAndSizes(
      FLAGS.feature_names, FLAGS.feature_sizes)

  if FLAGS.frame_features:
    reader = readers.YT8MFrameFeatureReader(feature_names=feature_names,
                                            feature_sizes=feature_sizes)
  else:
    reader = readers.YT8MAggregatedFeatureReader(feature_names=feature_names,
                                                 feature_sizes=feature_sizes)

  if FLAGS.output_dir is "":
    raise ValueError("'output_dir' was not specified. "
      "Unable to continue with inference.")

  if FLAGS.input_data_pattern is "":
    raise ValueError("'input_data_pattern' was not specified. "
      "Unable to continue with inference.")

  inference(reader, FLAGS.model_checkpoint_path, FLAGS.input_data_pattern,
      FLAGS.output_dir, FLAGS.batch_size, FLAGS.top_k)

Source File: losses.py From youtube-8m with Apache License 2.0

6 votes

def calculate_loss_mix2(self, predictions, predictions_class, predictions_encoder, labels, **unused_params):
    with tf.name_scope("loss_mix2"):
      float_labels = tf.cast(labels, tf.float32)
      float_encoders = float_labels
      for i in range(FLAGS.encoder_layers):
        var_i = np.loadtxt(FLAGS.autoencoder_dir+'autoencoder_layer%d.model' % i)
        weight_i = tf.constant(var_i[:-1,:],dtype=tf.float32)
        bias_i = tf.reshape(tf.constant(var_i[-1,:],dtype=tf.float32),[-1])
        float_encoders = tf.nn.xw_plus_b(float_encoders,weight_i,bias_i)
        if i<FLAGS.encoder_layers-1:
          float_encoders = tf.nn.relu(float_encoders)
        else:
          hidden_mean = tf.reduce_mean(float_encoders,axis=1,keep_dims=True)
          hidden_std = tf.sqrt(tf.reduce_mean(tf.square(float_encoders-hidden_mean),axis=1,keep_dims=True))
          float_encoders = (float_encoders-hidden_mean)/(hidden_std+1e-6)
          #float_encoders = tf.nn.sigmoid(float_encoders)
      cross_entropy_encoder = 0.1*self.calculate_mseloss(predictions_encoder,float_encoders)
      cross_entropy_loss = self.calculate_loss(predictions,labels)
      return cross_entropy_encoder+cross_entropy_loss, float_encoders
      #return cross_entropy_encoder, float_encoders

Source File: losses.py From youtube-8m with Apache License 2.0

6 votes

def calculate_loss(self, predictions, labels, **unused_params):
    bound = FLAGS.softmax_bound
    vocab_size_1 = bound
    with tf.name_scope("loss_softmax"):
      epsilon = 10e-8
      float_labels = tf.cast(labels, tf.float32)
      labels_1 = float_labels[:,:vocab_size_1]
      predictions_1 = predictions[:,:vocab_size_1]
      cross_entropy_loss = CrossEntropyLoss().calculate_loss(predictions_1,labels_1)
      lables_2 = float_labels[:,vocab_size_1:]
      predictions_2 = predictions[:,vocab_size_1:]
      # l1 normalization (labels are no less than 0)
      label_rowsum = tf.maximum(
          tf.reduce_sum(lables_2, 1, keep_dims=True),
          epsilon)
      label_append = 1.0-tf.reduce_max(lables_2, 1, keep_dims=True)
      norm_float_labels = tf.concat((tf.div(lables_2, label_rowsum),label_append),axis=1)
      predictions_append = 1.0-tf.reduce_sum(predictions_2, 1, keep_dims=True)
      softmax_outputs = tf.concat((predictions_2,predictions_append),axis=1)
      softmax_loss = norm_float_labels * tf.log(softmax_outputs + epsilon) + (
          1 - norm_float_labels) * tf.log(1 - softmax_outputs + epsilon)
      softmax_loss = tf.negative(tf.reduce_sum(softmax_loss, 1))
    return tf.reduce_mean(softmax_loss) + cross_entropy_loss

Source File: losses.py From youtube-8m with Apache License 2.0

6 votes

def calculate_loss(self, predictions, labels, weights=None, **unused_params):
    with tf.name_scope("loss_xent"):
      epsilon = 10e-6
      if FLAGS.label_smoothing:
        float_labels = smoothing(labels)
      else:
        float_labels = tf.cast(labels, tf.float32)
      cross_entropy_loss = float_labels * tf.log(predictions + epsilon) + (
          1 - float_labels) * tf.log(1 - predictions + epsilon)
      cross_entropy_loss = tf.negative(cross_entropy_loss)
      if weights is not None:
        print cross_entropy_loss, weights
        weighted_loss = tf.einsum("ij,i->ij", cross_entropy_loss, weights)
        print "create weighted_loss", weighted_loss
        return tf.reduce_mean(tf.reduce_sum(weighted_loss, 1))
      else:
        return tf.reduce_mean(tf.reduce_sum(cross_entropy_loss, 1))

Source File: inference.py From youtube-8m with Apache License 2.0

6 votes

def main(unused_argv):
  logging.set_verbosity(tf.logging.INFO)

  # convert feature_names and feature_sizes to lists of values
  feature_names, feature_sizes = utils.GetListOfFeatureNamesAndSizes(
      FLAGS.feature_names, FLAGS.feature_sizes)

  if FLAGS.frame_features:
    reader = readers.YT8MFrameFeatureReader(feature_names=feature_names,
                                            feature_sizes=feature_sizes)
  else:
    reader = readers.YT8MAggregatedFeatureReader(feature_names=feature_names,
                                                 feature_sizes=feature_sizes)

  if FLAGS.output_file is "":
    raise ValueError("'output_file' was not specified. "
      "Unable to continue with inference.")

  if FLAGS.input_data_pattern is "":
    raise ValueError("'input_data_pattern' was not specified. "
      "Unable to continue with inference.")

  inference(reader, FLAGS.train_dir, FLAGS.input_data_pattern,
    FLAGS.output_file, FLAGS.batch_size, FLAGS.top_k)

Source File: inference_autoencoder.py From youtube-8m with Apache License 2.0

6 votes

def main(unused_argv):
  logging.set_verbosity(tf.logging.INFO)

  # convert feature_names and feature_sizes to lists of values
  feature_names, feature_sizes = utils.GetListOfFeatureNamesAndSizes(
      FLAGS.feature_names, FLAGS.feature_sizes)

  if FLAGS.frame_features:
    reader = readers.YT8MFrameFeatureReader(feature_names=feature_names,
                                            feature_sizes=feature_sizes)
  else:
    reader = readers.YT8MAggregatedFeatureReader(feature_names=feature_names,
                                                 feature_sizes=feature_sizes)

  if FLAGS.output_file is "":
    raise ValueError("'output_file' was not specified. "
      "Unable to continue with inference.")

  if FLAGS.input_data_pattern is "":
    raise ValueError("'input_data_pattern' was not specified. "
      "Unable to continue with inference.")

  inference(reader, FLAGS.train_dir, FLAGS.input_data_pattern,
    FLAGS.output_file, FLAGS.batch_size, FLAGS.top_k)

Source File: inference_test.py From youtube-8m with Apache License 2.0

6 votes

def main(unused_argv):
  logging.set_verbosity(tf.logging.INFO)

  # convert feature_names and feature_sizes to lists of values
  feature_names, feature_sizes = utils.GetListOfFeatureNamesAndSizes(
      FLAGS.feature_names, FLAGS.feature_sizes)

  if FLAGS.frame_features:
    reader = readers.YT8MFrameFeatureReader(feature_names=feature_names,
                                            feature_sizes=feature_sizes)
  else:
    reader = readers.YT8MAggregatedFeatureReader(feature_names=feature_names,
                                                 feature_sizes=feature_sizes)

  if FLAGS.output_file is "":
    raise ValueError("'output_file' was not specified. "
      "Unable to continue with inference.")

  if FLAGS.input_data_pattern is "":
    raise ValueError("'input_data_pattern' was not specified. "
      "Unable to continue with inference.")

  inference(reader, FLAGS.train_dir, FLAGS.input_data_pattern,
    FLAGS.output_file, FLAGS.batch_size, FLAGS.top_k)

Source File: labels_rbm.py From youtube-8m with Apache License 2.0

6 votes

def create_model(self, model_input, vocab_size, l2_penalty=1e-8, **unused_params):
        """Creates a logistic model.

        Args:
          model_input: 'batch' x 'num_features' matrix of input features.
          vocab_size: The number of classes in the dataset.

        Returns:
          A dictionary with a tensor containing the probability predictions of the
          model in the 'predictions' key. The dimensions of the tensor are
          batch_size x num_classes."""

        input_size = vocab_size
        output_size = FLAGS.hidden_size
        with tf.name_scope("rbm"):
            self.weights = tf.Variable(tf.truncated_normal([input_size, output_size],
                                    stddev=1.0 / math.sqrt(float(input_size))), name="weights")
            self.v_bias = tf.Variable(tf.zeros([input_size]), name="v_bias")
            self.h_bias = tf.Variable(tf.zeros([output_size]), name="h_bias")
            tf.add_to_collection(name=tf.GraphKeys.REGULARIZATION_LOSSES, value=l2_penalty*tf.nn.l2_loss(self.weights))
            tf.add_to_collection(name=tf.GraphKeys.REGULARIZATION_LOSSES, value=l2_penalty*tf.nn.l2_loss(self.v_bias))
            tf.add_to_collection(name=tf.GraphKeys.REGULARIZATION_LOSSES, value=l2_penalty*tf.nn.l2_loss(self.h_bias))

Source File: train.py From Youtube-8M-WILLOW with Apache License 2.0

6 votes

def build_model(self, model, reader):
    """Find the model and build the graph."""

    label_loss_fn = find_class_by_name(FLAGS.label_loss, [losses])()
    optimizer_class = find_class_by_name(FLAGS.optimizer, [tf.train])
  
    build_graph(reader=reader,
                 model=model,
                 optimizer_class=optimizer_class,
                 clip_gradient_norm=FLAGS.clip_gradient_norm,
                 train_data_pattern=FLAGS.train_data_pattern,
                 label_loss_fn=label_loss_fn,
                 base_learning_rate=FLAGS.base_learning_rate,
                 learning_rate_decay=FLAGS.learning_rate_decay,
                 learning_rate_decay_examples=FLAGS.learning_rate_decay_examples,
                 regularization_penalty=FLAGS.regularization_penalty,
                 num_readers=FLAGS.num_readers,
                 batch_size=FLAGS.batch_size,
                 num_epochs=FLAGS.num_epochs)
  
    return tf.train.Saver(max_to_keep=0, keep_checkpoint_every_n_hours=5)

Source File: losses_embedding.py From youtube-8m with Apache License 2.0

6 votes

def calculate_loss(self, predictions, labels, **unused_params):
        bound = FLAGS.softmax_bound
        vocab_size_1 = bound
        with tf.name_scope("loss_softmax"):
            epsilon = 10e-8
            float_labels = tf.cast(labels, tf.float32)
            labels_1 = float_labels[:,:vocab_size_1]
            predictions_1 = predictions[:,:vocab_size_1]
            cross_entropy_loss = CrossEntropyLoss().calculate_loss(predictions_1,labels_1)
            lables_2 = float_labels[:,vocab_size_1:]
            predictions_2 = predictions[:,vocab_size_1:]
            # l1 normalization (labels are no less than 0)
            label_rowsum = tf.maximum(
                tf.reduce_sum(lables_2, 1, keep_dims=True),
                epsilon)
            label_append = 1.0-tf.reduce_max(lables_2, 1, keep_dims=True)
            norm_float_labels = tf.concat((tf.div(lables_2, label_rowsum),label_append),axis=1)
            predictions_append = 1.0-tf.reduce_sum(predictions_2, 1, keep_dims=True)
            softmax_outputs = tf.concat((predictions_2,predictions_append),axis=1)
            softmax_loss = norm_float_labels * tf.log(softmax_outputs + epsilon) + (
                                                                                       1 - norm_float_labels) * tf.log(1 - softmax_outputs + epsilon)
            softmax_loss = tf.negative(tf.reduce_sum(softmax_loss, 1))
        return tf.reduce_mean(softmax_loss) + cross_entropy_loss

Source File: train_autoencoder.py From youtube-8m with Apache License 2.0

6 votes

def get_forward_parameters(vocab_size=4716):
    t_vars = tf.trainable_variables()
    h1_vars_weight = [var for var in t_vars if 'hidden_1' in var.name and 'weights' in var.name]
    h1_vars_biases = [var for var in t_vars if 'hidden_1' in var.name and 'biases' in var.name]
    h2_vars_weight = [var for var in t_vars if 'hidden_2' in var.name and 'weights' in var.name]
    h2_vars_biases = [var for var in t_vars if 'hidden_2' in var.name and 'biases' in var.name]
    o1_vars_weight = [var for var in t_vars if 'output_1' in var.name and 'weights' in var.name]
    o1_vars_biases = [var for var in t_vars if 'output_1' in var.name and 'biases' in var.name]
    o2_vars_weight = [var for var in t_vars if 'output_2' in var.name and 'weights' in var.name]
    o2_vars_biases = [var for var in t_vars if 'output_2' in var.name and 'biases' in var.name]
    h1_vars_biases = tf.reshape(h1_vars_biases[0],[1,FLAGS.hidden_size_1])
    h2_vars_biases = tf.reshape(h2_vars_biases[0],[1,FLAGS.hidden_size_2])
    o1_vars_biases = tf.reshape(o1_vars_biases[0],[1,FLAGS.hidden_size_1])
    o2_vars_biases = tf.reshape(o2_vars_biases[0],[1,vocab_size])
    vars_1 = tf.concat((h1_vars_weight[0],h1_vars_biases),axis=0)
    vars_2 = tf.concat((h2_vars_weight[0],h2_vars_biases),axis=0)
    vars_3 = tf.concat((o1_vars_weight[0],o1_vars_biases),axis=0)
    vars_4 = tf.concat((o2_vars_weight[0],o2_vars_biases),axis=0)
    return [vars_1,vars_2,vars_3,vars_4]

Source File: inference.py From Youtube-8M-WILLOW with Apache License 2.0

6 votes

def main(unused_argv):
  logging.set_verbosity(tf.logging.INFO)

  # convert feature_names and feature_sizes to lists of values
  feature_names, feature_sizes = utils.GetListOfFeatureNamesAndSizes(
      FLAGS.feature_names, FLAGS.feature_sizes)

  if FLAGS.frame_features:
    reader = readers.YT8MFrameFeatureReader(feature_names=feature_names,
                                            feature_sizes=feature_sizes)
  else:
    reader = readers.YT8MAggregatedFeatureReader(feature_names=feature_names,
                                                 feature_sizes=feature_sizes)

  if FLAGS.output_file is "":
    raise ValueError("'output_file' was not specified. "
      "Unable to continue with inference.")

  if FLAGS.input_data_pattern is "":
    raise ValueError("'input_data_pattern' was not specified. "
      "Unable to continue with inference.")

  inference(reader, FLAGS.train_dir, FLAGS.input_data_pattern,
    FLAGS.output_file, FLAGS.batch_size, FLAGS.top_k)

Source File: losses.py From youtube-8m with Apache License 2.0

5 votes

def calculate_loss_negative(self, predictions_pos, predictions_neg, labels, **unused_params):
    with tf.name_scope("loss_negative"):
      epsilon = 10e-6
      float_labels = tf.cast(labels, tf.float32)
      weight_pos = np.loadtxt(FLAGS.autoencoder_dir+"labels_uni.out")
      weight_pos = tf.reshape(tf.cast(weight_pos,dtype=tf.float32),[1,-1])
      weight_pos = tf.log(tf.reduce_max(weight_pos)/weight_pos)+1
      cross_entropy_loss_1 = float_labels * tf.log(predictions_pos + epsilon)*weight_pos + (
          1 - float_labels) * tf.log(1 - predictions_pos + epsilon)
      cross_entropy_loss_2 = (1-float_labels) * tf.log(predictions_neg + epsilon) + \
                             float_labels * tf.log(1 - predictions_neg + epsilon)
      cross_entropy_loss = tf.negative(cross_entropy_loss_1+cross_entropy_loss_2)
      return tf.reduce_mean(tf.reduce_sum(cross_entropy_loss, 1))

Source File: inference_test.py From youtube-8m with Apache License 2.0

5 votes

def get_input_data_tensors(reader, data_pattern, batch_size, num_readers=1):
  """Creates the section of the graph which reads the input data.

  Args:
    reader: A class which parses the input data.
    data_pattern: A 'glob' style path to the data files.
    batch_size: How many examples to process at a time.
    num_readers: How many I/O threads to use.

  Returns:
    A tuple containing the features tensor, labels tensor, and optionally a
    tensor containing the number of frames per video. The exact dimensions
    depend on the reader being used.

  Raises:
    IOError: If no files matching the given pattern were found.
  """
  with tf.name_scope("input"):
    files = gfile.Glob(data_pattern)
    if FLAGS.set=="ensamble_validate":
        files = [file for file in files if 'validatea' not in file]
    elif FLAGS.set=="ensamble_train":
        files = [file for file in files if 'validatea' in file]
    files.sort()
    if not files:
      raise IOError("Unable to find input files. data_pattern='" +
                    data_pattern + "'")
    logging.info("number of input files: " + str(len(files)))
    filename_queue = tf.train.string_input_producer(
        files, num_epochs=1, shuffle=False)
    examples_and_labels = [reader.prepare_reader(filename_queue)
                           for _ in range(num_readers)]

    video_id_batch, video_batch, unused_labels, num_frames_batch = (
        tf.train.batch_join(examples_and_labels,
                            batch_size=batch_size,
                            allow_smaller_final_batch = True,
                            enqueue_many=True))
    return video_id_batch, video_batch, unused_labels, num_frames_batch

Source File: train-with-rebuild.py From youtube-8m with Apache License 2.0

5 votes

def main(unused_argv):
    # Load the environment.
    env = json.loads(os.environ.get("TF_CONFIG", "{}"))

    # Load the cluster data from the environment.
    cluster_data = env.get("cluster", None)
    cluster = tf.train.ClusterSpec(cluster_data) if cluster_data else None

    # Load the task data from the environment.
    task_data = env.get("task", None) or {"type": "master", "index": 0}
    task = type("TaskSpec", (object,), task_data)

    # Logging the version.
    logging.set_verbosity(tf.logging.INFO)
    logging.info("%s: Tensorflow version: %s.",
                 task_as_string(task), tf.__version__)

    # Dispatch to a master, a worker, or a parameter server.
    if not cluster or task.type == "master" or task.type == "worker":
        Trainer(cluster, task, FLAGS.train_dir, FLAGS.log_device_placement).run(
            start_new_model=FLAGS.start_new_model)
    elif task.type == "ps":
        ParameterServer(cluster, task).run()
    else:
        raise ValueError("%s: Invalid task_type: %s." %
                         (task_as_string(task), task.type))

Source File: inference_with_rebuild.py From youtube-8m with Apache License 2.0

5 votes

def write_to_record(id_batch, label_batch, predictions, filenum, num_examples_processed):
    writer = tf.python_io.TFRecordWriter(FLAGS.output_dir + '/' + 'predictions-%03d.tfrecord' % filenum)
    for i in range(num_examples_processed):
        video_id = id_batch[i]
        label = np.nonzero(label_batch[i,:])[0]
        example = get_output_feature(video_id, label, [predictions[i,:]], ['predictions'])
        serialized = example.SerializeToString()
        writer.write(serialized)
    writer.close()

Source File: losses_embedding.py From youtube-8m with Apache License 2.0

5 votes

def calculate_loss(self, predictions, labels, **unused_params):
        with tf.name_scope("loss_xent"):
            epsilon = 10e-6
            origin_labels = tf.cast(labels, tf.float32)
            vocab_size = origin_labels.get_shape().as_list()[1]
            float_labels = tf.tile(tf.reshape(origin_labels,[-1, 1, vocab_size]),[1,FLAGS.top_k,1])
            float_labels = tf.reshape(float_labels,[-1,vocab_size])
            cross_entropy_loss = float_labels * tf.log(predictions + epsilon) + (
                1 - float_labels) * tf.log(1 - predictions + epsilon)
            cross_entropy_loss = tf.negative(cross_entropy_loss)
            num_labels = tf.minimum(tf.reduce_sum(origin_labels,axis=1),tf.constant(FLAGS.top_k,dtype=tf.float32))
            mask = tf.reshape(tf.sequence_mask(num_labels,tf.constant(FLAGS.top_k,dtype=tf.float32),dtype=tf.float32),[-1])
            cross_entropy_loss = tf.reduce_sum(tf.reduce_sum(cross_entropy_loss, 1)*mask)/(tf.reduce_sum(mask)+epsilon)

            return cross_entropy_loss

Source File: losses_embedding.py From youtube-8m with Apache License 2.0

5 votes

def calculate_loss_mix(self, predictions, predictions_class, labels, **unused_params):
        with tf.name_scope("loss_softmax_mix"):
            vocab_size = labels.get_shape().as_list()[1]
            cross_entropy_class = tf.constant(0.0)
            for i in range(FLAGS.moe_layers):
                predictions_subclass = predictions_class[:,i*vocab_size:(i+1)*vocab_size]
                cross_entropy_class = cross_entropy_class + self.calculate_loss(predictions_subclass,labels)
            cross_entropy_loss = self.calculate_loss(predictions,labels)
            return cross_entropy_loss + 0.1*cross_entropy_class

Source File: train_autoencoder.py From youtube-8m with Apache License 2.0

5 votes

def build_model(self):
    """Find the model and build the graph."""

    # Convert feature_names and feature_sizes to lists of values.
    feature_names, feature_sizes = utils.GetListOfFeatureNamesAndSizes(
        FLAGS.feature_names, FLAGS.feature_sizes)

    if FLAGS.frame_features:
      if FLAGS.frame_only:
          reader = readers.YT8MFrameFeatureOnlyReader(
              feature_names=feature_names, feature_sizes=feature_sizes)
      else:
          reader = readers.YT8MFrameFeatureReader(
              feature_names=feature_names, feature_sizes=feature_sizes)
    else:
      reader = readers.YT8MAggregatedFeatureReader(
          feature_names=feature_names, feature_sizes=feature_sizes)

    # Find the model.
    model = find_class_by_name(FLAGS.model,
                               [labels_autoencoder])()
    label_loss_fn = find_class_by_name(FLAGS.label_loss, [losses])()
    optimizer_class = find_class_by_name(FLAGS.optimizer, [tf.train])

    build_graph(reader=reader,
                 model=model,
                 optimizer_class=optimizer_class,
                 clip_gradient_norm=FLAGS.clip_gradient_norm,
                 train_data_pattern=FLAGS.train_data_pattern,
                 label_loss_fn=label_loss_fn,
                 base_learning_rate=FLAGS.base_learning_rate,
                 learning_rate_decay=FLAGS.learning_rate_decay,
                 learning_rate_decay_examples=FLAGS.learning_rate_decay_examples,
                 regularization_penalty=FLAGS.regularization_penalty,
                 num_readers=FLAGS.num_readers,
                 batch_size=FLAGS.batch_size,
                 num_epochs=FLAGS.num_epochs)

    logging.info("%s: Built graph.", task_as_string(self.task))

    return tf.train.Saver(max_to_keep=2, keep_checkpoint_every_n_hours=0.25)

Source File: train_autoencoder.py From youtube-8m with Apache License 2.0

5 votes

def main(unused_argv):
  # Load the environment.
  env = json.loads(os.environ.get("TF_CONFIG", "{}"))

  # Load the cluster data from the environment.
  cluster_data = env.get("cluster", None)
  cluster = tf.train.ClusterSpec(cluster_data) if cluster_data else None

  # Load the task data from the environment.
  task_data = env.get("task", None) or {"type": "master", "index": 0}
  task = type("TaskSpec", (object,), task_data)

  # Logging the version.
  logging.set_verbosity(tf.logging.INFO)
  logging.info("%s: Tensorflow version: %s.",
               task_as_string(task), tf.__version__)

  # Dispatch to a master, a worker, or a parameter server.
  if not cluster or task.type == "master" or task.type == "worker":
    Trainer(cluster, task, FLAGS.train_dir, FLAGS.log_device_placement).run(
        start_new_model=FLAGS.start_new_model)
  elif task.type == "ps":
    ParameterServer(cluster, task).run()
  else:
    raise ValueError("%s: Invalid task_type: %s." %
                     (task_as_string(task), task.type))

Source File: train_embedding.py From youtube-8m with Apache License 2.0

5 votes

def main(unused_argv):
  # Load the environment.
  env = json.loads(os.environ.get("TF_CONFIG", "{}"))

  # Load the cluster data from the environment.
  cluster_data = env.get("cluster", None)
  cluster = tf.train.ClusterSpec(cluster_data) if cluster_data else None

  # Load the task data from the environment.
  task_data = env.get("task", None) or {"type": "master", "index": 0}
  task = type("TaskSpec", (object,), task_data)

  # Logging the version.
  logging.set_verbosity(tf.logging.INFO)
  logging.info("%s: Tensorflow version: %s.",
               task_as_string(task), tf.__version__)

  # Dispatch to a master, a worker, or a parameter server.
  if not cluster or task.type == "master" or task.type == "worker":
    Trainer(cluster, task, FLAGS.train_dir, FLAGS.log_device_placement).run(
        start_new_model=FLAGS.start_new_model)
  elif task.type == "ps":
    ParameterServer(cluster, task).run()
  else:
    raise ValueError("%s: Invalid task_type: %s." %
                     (task_as_string(task), task.type))

Source File: losses.py From youtube-8m with Apache License 2.0

5 votes

def calculate_loss_mix(self, predictions, predictions_class, labels, **unused_params):
    with tf.name_scope("loss_softmax_mix"):
      vocab_size = labels.get_shape().as_list()[1]
      cross_entropy_class = tf.constant(0.0)
      for i in range(FLAGS.moe_layers):
        predictions_subclass = predictions_class[:,i*vocab_size:(i+1)*vocab_size]
        cross_entropy_class = cross_entropy_class + self.calculate_loss(predictions_subclass,labels)
      cross_entropy_loss = self.calculate_loss(predictions,labels)
      return cross_entropy_loss + 0.1*cross_entropy_class

Source File: train.py From youtube-8m with Apache License 2.0

5 votes

def optional_assign_weights(sess, weights_input, weights_assignment):
  if weights_input is not None:
    weights, length = get_video_weights_array()
    _ = sess.run(weights_assignment, feed_dict={weights_input: weights})
    print "Assigned weights from %s" % FLAGS.sample_freq_file
  else:
    print "Collection weights_input not found"

Source File: train_ensemble.py From youtube-8m with Apache License 2.0

5 votes

def main(unused_argv):
  # Load the environment.
  env = json.loads(os.environ.get("TF_CONFIG", "{}"))

  # Load the cluster data from the environment.
  cluster_data = env.get("cluster", None)
  cluster = tf.train.ClusterSpec(cluster_data) if cluster_data else None

  # Load the task data from the environment.
  task_data = env.get("task", None) or {"type": "master", "index": 0}
  task = type("TaskSpec", (object,), task_data)

  # Logging the version.
  logging.set_verbosity(tf.logging.INFO)
  logging.info("%s: Tensorflow version: %s.",
               task_as_string(task), tf.__version__)

  # Dispatch to a master, a worker, or a parameter server.
  if not cluster or task.type == "master" or task.type == "worker":
    Trainer(cluster, task, FLAGS.train_dir, FLAGS.log_device_placement).run(
        start_new_model=FLAGS.start_new_model)
  elif task.type == "ps":
    ParameterServer(cluster, task).run()
  else:
    raise ValueError("%s: Invalid task_type: %s." %
                     (task_as_string(task), task.type))

Source File: train_ensemble.py From youtube-8m with Apache License 2.0

5 votes

def get_input_data_tensors(reader,
                                 data_pattern,
                                 batch_size=1024,
                                 num_epochs=None,
                                 num_readers=1):
    """Creates the section of the graph which reads the evaluation data.

    Args:
      reader: A class which parses the training data.
      data_pattern: A 'glob' style path to the data files.
      batch_size: How many examples to process at a time.
      num_readers: How many I/O threads to use.

    Returns:
      A tuple containing the features tensor, labels tensor, and optionally a
      tensor containing the number of frames per video. The exact dimensions
      depend on the reader being used.

    Raises:
      IOError: If no files matching the given pattern were found.
    """
    logging.info("Using batch size of " + str(batch_size) + " for evaluation.")
    with tf.name_scope("eval_input"):
        files = gfile.Glob(data_pattern)
        if not files:
            raise IOError("Unable to find the evaluation files.")
        logging.info("number of evaluation files: " + str(len(files)))
        files.sort()
        filename_queue = tf.train.string_input_producer(
            files, shuffle=False, num_epochs=num_epochs)
        training_data = reader.prepare_reader(filename_queue)
        return tf.train.batch(
            training_data,
            batch_size=batch_size,
            capacity=5 * FLAGS.batch_size,
            allow_smaller_final_batch=True,
            enqueue_many=True)

Source File: train.py From youtube-8m with Apache License 2.0

5 votes

def main(unused_argv):
  # Load the environment.
  env = json.loads(os.environ.get("TF_CONFIG", "{}"))

  # Load the cluster data from the environment.
  cluster_data = env.get("cluster", None)
  cluster = tf.train.ClusterSpec(cluster_data) if cluster_data else None

  # Load the task data from the environment.
  task_data = env.get("task", None) or {"type": "master", "index": 0}
  task = type("TaskSpec", (object,), task_data)

  # Logging the version.
  logging.set_verbosity(tf.logging.INFO)
  logging.info("%s: Tensorflow version: %s.",
               task_as_string(task), tf.__version__)

  # Dispatch to a master, a worker, or a parameter server.
  if not cluster or task.type == "master" or task.type == "worker":
    Trainer(cluster, task, FLAGS.train_dir, FLAGS.log_device_placement).run(
        start_new_model=FLAGS.start_new_model)
  elif task.type == "ps":
    ParameterServer(cluster, task).run()
  else:
    raise ValueError("%s: Invalid task_type: %s." %
                     (task_as_string(task), task.type))

Source File: inference-pre-ensemble-distill.py From youtube-8m with Apache License 2.0

5 votes

def write_to_record(id_batch, label_batch, predictions, filenum, num_examples_processed):
    writer = tf.python_io.TFRecordWriter(FLAGS.output_dir + '/' + 'predictions-%04d.tfrecord' % filenum)
    for i in range(num_examples_processed):
        video_id = id_batch[i]
        label = np.nonzero(label_batch[i,:])[0]
        example = get_output_feature(video_id, label, [predictions[i,:]], ['predictions'])
        serialized = example.SerializeToString()
        writer.write(serialized)
    writer.close()

Source File: labels_embedding.py From youtube-8m with Apache License 2.0

5 votes

def create_model(self, model_input, vocab_size, l2_penalty=1e-8, **unused_params):
        """Creates a logistic model.

        Args:
          model_input: 'batch' x 'num_features' matrix of input features.
          vocab_size: The number of classes in the dataset.

        Returns:
          A dictionary with a tensor containing the probability predictions of the
          model in the 'predictions' key. The dimensions of the tensor are
          batch_size x num_classes."""
        model_input = tf.cast(model_input,dtype=tf.float32)
        hidden_size = FLAGS.hidden_size

        model_mask, indices_input = tf.nn.top_k(model_input, k=FLAGS.top_k)
        indices_input = tf.reshape(indices_input, [-1])
        models_mask = tf.reshape(model_mask, [-1,FLAGS.top_k,1])
        with tf.name_scope("embedding"):
            embeddings = tf.Variable(
                tf.random_uniform([vocab_size, hidden_size], -1.0, 1.0))
            embed = tf.nn.embedding_lookup(embeddings, indices_input)
            output = slim.fully_connected(
                embed,
                vocab_size,
                activation_fn=tf.nn.sigmoid,
                weights_regularizer=slim.l2_regularizer(l2_penalty),
                scope="output")
        indices_one_hot = tf.one_hot(indices_input, vocab_size)
        output = output * (1 - indices_one_hot) + indices_one_hot
        output_val = tf.reshape(output,[-1,FLAGS.top_k,vocab_size])
        predictions_val = tf.reduce_sum(output_val*models_mask, axis=1)/tf.reduce_sum(models_mask, axis=1)
        return {"predictions": output, "predictions_val": predictions_val}

Python tensorflow.flags.FLAGS Examples