Python official.recommendation.data_preprocessing.instantiate_pipeline() Examples

The following are 8 code examples of official.recommendation.data_preprocessing.instantiate_pipeline(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module official.recommendation.data_preprocessing , or try the search function .
Example #1
Source File: ncf_common.py    From models with Apache License 2.0 6 votes vote down vote up
def get_inputs(params):
  """Returns some parameters used by the model."""
  if FLAGS.download_if_missing and not FLAGS.use_synthetic_data:
    movielens.download(FLAGS.dataset, FLAGS.data_dir)

  if FLAGS.seed is not None:
    np.random.seed(FLAGS.seed)

  if FLAGS.use_synthetic_data:
    producer = data_pipeline.DummyConstructor()
    num_users, num_items = data_preprocessing.DATASET_TO_NUM_USERS_AND_ITEMS[
        FLAGS.dataset]
    num_train_steps = rconst.SYNTHETIC_BATCHES_PER_EPOCH
    num_eval_steps = rconst.SYNTHETIC_BATCHES_PER_EPOCH
  else:
    num_users, num_items, producer = data_preprocessing.instantiate_pipeline(
        dataset=FLAGS.dataset, data_dir=FLAGS.data_dir, params=params,
        constructor_type=FLAGS.constructor_type,
        deterministic=FLAGS.seed is not None)
    num_train_steps = producer.train_batches_per_epoch
    num_eval_steps = producer.eval_batches_per_epoch

  return num_users, num_items, num_train_steps, num_eval_steps, producer 
Example #2
Source File: ncf_common.py    From Live-feed-object-device-identification-using-Tensorflow-and-OpenCV with Apache License 2.0 6 votes vote down vote up
def get_inputs(params):
  """Returns some parameters used by the model."""
  if FLAGS.download_if_missing and not FLAGS.use_synthetic_data:
    movielens.download(FLAGS.dataset, FLAGS.data_dir)

  if FLAGS.seed is not None:
    np.random.seed(FLAGS.seed)

  if FLAGS.use_synthetic_data:
    producer = data_pipeline.DummyConstructor()
    num_users, num_items = data_preprocessing.DATASET_TO_NUM_USERS_AND_ITEMS[
        FLAGS.dataset]
    num_train_steps = rconst.SYNTHETIC_BATCHES_PER_EPOCH
    num_eval_steps = rconst.SYNTHETIC_BATCHES_PER_EPOCH
  else:
    num_users, num_items, producer = data_preprocessing.instantiate_pipeline(
        dataset=FLAGS.dataset, data_dir=FLAGS.data_dir, params=params,
        constructor_type=FLAGS.constructor_type,
        deterministic=FLAGS.seed is not None)
    num_train_steps = producer.train_batches_per_epoch
    num_eval_steps = producer.eval_batches_per_epoch

  return num_users, num_items, num_train_steps, num_eval_steps, producer 
Example #3
Source File: ncf_common.py    From models with Apache License 2.0 6 votes vote down vote up
def get_inputs(params):
  """Returns some parameters used by the model."""
  if FLAGS.download_if_missing and not FLAGS.use_synthetic_data:
    movielens.download(FLAGS.dataset, FLAGS.data_dir)

  if FLAGS.seed is not None:
    np.random.seed(FLAGS.seed)

  if FLAGS.use_synthetic_data:
    producer = data_pipeline.DummyConstructor()
    num_users, num_items = movielens.DATASET_TO_NUM_USERS_AND_ITEMS[
        FLAGS.dataset]
    num_train_steps = rconst.SYNTHETIC_BATCHES_PER_EPOCH
    num_eval_steps = rconst.SYNTHETIC_BATCHES_PER_EPOCH
  else:
    num_users, num_items, producer = data_preprocessing.instantiate_pipeline(
        dataset=FLAGS.dataset, data_dir=FLAGS.data_dir, params=params,
        constructor_type=FLAGS.constructor_type,
        deterministic=FLAGS.seed is not None)
    num_train_steps = producer.train_batches_per_epoch
    num_eval_steps = producer.eval_batches_per_epoch

  return num_users, num_items, num_train_steps, num_eval_steps, producer 
Example #4
Source File: ncf_main.py    From training_results_v0.5 with Apache License 2.0 5 votes vote down vote up
def main(_):
  """Train NCF model and evaluate its hit rate (HR) metric."""
  tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
      FLAGS.tpu,
      zone=FLAGS.tpu_zone,
      project=FLAGS.gcp_project)
  master = tpu_cluster_resolver.master()

  ncf_dataset, cleanup_fn = data_preprocessing.instantiate_pipeline(
      dataset=FLAGS.dataset,
      data_dir=FLAGS.data_dir,
      # TODO(shizhiw): support multihost.
      batch_size=FLAGS.batch_size,
      eval_batch_size=FLAGS.eval_batch_size,
      num_neg=FLAGS.num_neg,
      epochs_per_cycle=1,
      match_mlperf=FLAGS.ml_perf,
      use_subprocess=FLAGS.use_subprocess,
      cache_id=FLAGS.cache_id)

  train_params, eval_params = create_params(ncf_dataset)

  eval_graph_spec = build_graph(
      eval_params, ncf_dataset, tpu_embedding.INFERENCE)

  for epoch in range(_NUM_EPOCHS):
    tf.logging.info("Training {}...".format(epoch))
    # build training graph each epoch as number of batches per epoch
    # i.e. batch_count might change by 1 between epochs.
    train_graph_spec = build_graph(
        train_params, ncf_dataset, tpu_embedding.TRAINING)

    run_graph(master, train_graph_spec, epoch)

    tf.logging.info("Evaluating {}...".format(epoch))
    run_graph(master, eval_graph_spec, epoch)

  cleanup_fn()  # Cleanup data construction artifacts and subprocess. 
Example #5
Source File: create_ncf_data.py    From Live-feed-object-device-identification-using-Tensorflow-and-OpenCV with Apache License 2.0 5 votes vote down vote up
def prepare_raw_data(flag_obj):
  """Downloads and prepares raw data for data generation."""
  movielens.download(flag_obj.dataset, flag_obj.data_dir)

  data_processing_params = {
      "train_epochs": flag_obj.num_train_epochs,
      "batch_size": flag_obj.train_prebatch_size,
      "eval_batch_size": flag_obj.eval_prebatch_size,
      "batches_per_step": 1,
      "stream_files": True,
      "num_neg": flag_obj.num_negative_samples,
  }

  num_users, num_items, producer = data_preprocessing.instantiate_pipeline(
      dataset=flag_obj.dataset,
      data_dir=flag_obj.data_dir,
      params=data_processing_params,
      constructor_type=flag_obj.constructor_type,
      epoch_dir=flag_obj.data_dir,
      generate_data_offline=True)

  # pylint: disable=protected-access
  input_metadata = {
      "num_users": num_users,
      "num_items": num_items,
      "constructor_type": flag_obj.constructor_type,
      "num_train_elements": producer._elements_in_epoch,
      "num_eval_elements": producer._eval_elements_in_epoch,
      "num_train_epochs": flag_obj.num_train_epochs,
      "train_prebatch_size": flag_obj.train_prebatch_size,
      "eval_prebatch_size": flag_obj.eval_prebatch_size,
      "num_train_steps": producer.train_batches_per_epoch,
      "num_eval_steps": producer.eval_batches_per_epoch,
  }
  # pylint: enable=protected-access

  return producer, input_metadata 
Example #6
Source File: create_ncf_data.py    From models with Apache License 2.0 5 votes vote down vote up
def prepare_raw_data(flag_obj):
  """Downloads and prepares raw data for data generation."""
  movielens.download(flag_obj.dataset, flag_obj.data_dir)

  data_processing_params = {
      "train_epochs": flag_obj.num_train_epochs,
      "batch_size": flag_obj.train_prebatch_size,
      "eval_batch_size": flag_obj.eval_prebatch_size,
      "batches_per_step": 1,
      "stream_files": True,
      "num_neg": flag_obj.num_negative_samples,
  }

  num_users, num_items, producer = data_preprocessing.instantiate_pipeline(
      dataset=flag_obj.dataset,
      data_dir=flag_obj.data_dir,
      params=data_processing_params,
      constructor_type=flag_obj.constructor_type,
      epoch_dir=flag_obj.data_dir,
      generate_data_offline=True)

  # pylint: disable=protected-access
  input_metadata = {
      "num_users": num_users,
      "num_items": num_items,
      "constructor_type": flag_obj.constructor_type,
      "num_train_elements": producer._elements_in_epoch,
      "num_eval_elements": producer._eval_elements_in_epoch,
      "num_train_epochs": flag_obj.num_train_epochs,
      "train_prebatch_size": flag_obj.train_prebatch_size,
      "eval_prebatch_size": flag_obj.eval_prebatch_size,
      "num_train_steps": producer.train_batches_per_epoch,
      "num_eval_steps": producer.eval_batches_per_epoch,
  }
  # pylint: enable=protected-access

  return producer, input_metadata 
Example #7
Source File: ncf_main.py    From class-balanced-loss with MIT License 5 votes vote down vote up
def main(_):
  """Train NCF model and evaluate its hit rate (HR) metric."""
  tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
      FLAGS.tpu,
      zone=FLAGS.tpu_zone,
      project=FLAGS.gcp_project)
  master = tpu_cluster_resolver.master()

  ncf_dataset, cleanup_fn = data_preprocessing.instantiate_pipeline(
      dataset=FLAGS.dataset,
      data_dir=FLAGS.data_dir,
      # TODO(shizhiw): support multihost.
      batch_size=FLAGS.batch_size,
      eval_batch_size=FLAGS.eval_batch_size,
      num_neg=FLAGS.num_neg,
      epochs_per_cycle=1,
      match_mlperf=FLAGS.ml_perf,
      use_subprocess=FLAGS.use_subprocess,
      cache_id=FLAGS.cache_id)

  train_params, eval_params = create_params(ncf_dataset)

  eval_graph_spec = build_graph(
      eval_params, ncf_dataset, tpu_embedding.INFERENCE)

  for epoch in range(_NUM_EPOCHS):
    tf.logging.info("Training {}...".format(epoch))
    # build training graph each epoch as number of batches per epoch
    # i.e. batch_count might change by 1 between epochs.
    train_graph_spec = build_graph(
        train_params, ncf_dataset, tpu_embedding.TRAINING)

    run_graph(master, train_graph_spec, epoch)

    tf.logging.info("Evaluating {}...".format(epoch))
    run_graph(master, eval_graph_spec, epoch)

  cleanup_fn()  # Cleanup data construction artifacts and subprocess. 
Example #8
Source File: data_test.py    From multilabel-image-classification-tensorflow with MIT License 5 votes vote down vote up
def test_end_to_end(self):
    ncf_dataset, _ = data_preprocessing.instantiate_pipeline(
        dataset=DATASET, data_dir=self.temp_data_dir,
        batch_size=BATCH_SIZE, eval_batch_size=EVAL_BATCH_SIZE,
        num_cycles=1, num_data_readers=2, num_neg=NUM_NEG)

    g = tf.Graph()
    with g.as_default():
      input_fn, record_dir, batch_count = \
        data_preprocessing.make_input_fn(ncf_dataset, True)
      dataset = input_fn({"batch_size": BATCH_SIZE, "use_tpu": False,
                          "use_xla_for_gpu": False})
    first_epoch = self.drain_dataset(dataset=dataset, g=g)
    user_inv_map = {v: k for k, v in ncf_dataset.user_map.items()}
    item_inv_map = {v: k for k, v in ncf_dataset.item_map.items()}

    train_examples = {
        True: set(),
        False: set(),
    }
    for features, labels in first_epoch:
      for u, i, l in zip(features[movielens.USER_COLUMN],
                         features[movielens.ITEM_COLUMN], labels):

        u_raw = user_inv_map[u]
        i_raw = item_inv_map[i]
        if ((u_raw, i_raw) in self.seen_pairs) != l:
          # The evaluation item is not considered during false negative
          # generation, so it will occasionally appear as a negative example
          # during training.
          assert not l
          assert i_raw == self.holdout[u_raw][1]
        train_examples[l].add((u_raw, i_raw))
    num_positives_seen = len(train_examples[True])

    assert ncf_dataset.num_train_positives == num_positives_seen

    # This check is more heuristic because negatives are sampled with
    # replacement. It only checks that negative generation is reasonably random.
    assert len(train_examples[False]) / NUM_NEG / num_positives_seen > 0.9