Python tensorflow.estimator() Examples

The following are 30 code examples of tensorflow.estimator(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module tensorflow , or try the search function .
Example #1
Source File: models.py    From professional-services with Apache License 2.0 6 votes vote down vote up
def dnn_classifier(self):
        """Builds the DNN model(classifier)
        with the parameters parsed from the user input
        Returns : tf.estimator object, Canned estimator of DNN Classifier
        """
        return tf.estimator.DNNClassifier(
            config=self.config,
            feature_columns=self.deep_columns,
            hidden_units=self.hidden_units,
            n_classes=self.n_classes,
            weight_column=self.weight_column,
            label_vocabulary=self.label_vocabulary,
            optimizer=self.dnn_optimizer,
            activation_fn=self.activation_fn,
            dropout=self.dropout,
            input_layer_partitioner=self.input_layer_partitioner,
            warm_start_from=self.warm_start_from,
            loss_reduction=self.loss_reduction
        ) 
Example #2
Source File: main.py    From training_results_v0.5 with Apache License 2.0 6 votes vote down vote up
def validate(
        working_dir: 'tf.estimator working directory',
        *tf_record_dirs: 'Directories where holdout data are',
        checkpoint_name: 'Which checkpoint to evaluate (None=latest)'=None,
        validate_name: 'Name for validation set (i.e., selfplay or human)'=None):
    qmeas.start_time('validate')
    tf_records = []
    with timer("Building lists of holdout files"):
        for record_dir in tf_record_dirs:
            tf_records.extend(gfile.Glob(os.path.join(record_dir, '*.zz')))

    first_record = os.path.basename(tf_records[0])
    last_record = os.path.basename(tf_records[-1])
    with timer("Validating from {} to {}".format(first_record, last_record)):
        dual_net.validate(
            working_dir, tf_records, checkpoint_name=checkpoint_name,
            name=validate_name)
    qmeas.stop_time('validate') 
Example #3
Source File: main.py    From training_results_v0.5 with Apache License 2.0 6 votes vote down vote up
def bootstrap(
        working_dir: 'tf.estimator working directory. If not set, defaults to a random tmp dir'=None,
        model_save_path: 'Where to export the first bootstrapped generation'=None):
    qmeas.start_time('bootstrap')
    if working_dir is None:
        with tempfile.TemporaryDirectory() as working_dir:
            _ensure_dir_exists(working_dir)
            _ensure_dir_exists(os.path.dirname(model_save_path))
            dual_net.bootstrap(working_dir)
            dual_net.export_model(working_dir, model_save_path)
    else:
        _ensure_dir_exists(working_dir)
        _ensure_dir_exists(os.path.dirname(model_save_path))
        dual_net.bootstrap(working_dir)
        dual_net.export_model(working_dir, model_save_path)
    qmeas.stop_time('bootstrap') 
Example #4
Source File: build_model.py    From g-tensorflow-models with Apache License 2.0 6 votes vote down vote up
def model_fn(self, features, labels, mode, params):
    """Function fulfilling the tf.estimator model_fn interface.

    Args:
      features: a dict containing the input features for prediction.
      labels: a dict from target name to Tensor-value prediction.
      mode: the ModeKey string.
      params: a dictionary of parameters for building the model; current params
        are params["batch_size"]: the integer batch size.

    Returns:
      A tf.estimator.EstimatorSpec object ready for use in training, inference.
      or evaluation.
    """
    self.build_graph(features, labels, mode, params['batch_size'])

    return tf.estimator.EstimatorSpec(
        mode,
        predictions=self.predictions,
        loss=self.total_loss,
        train_op=self.train_op,
        eval_metric_ops={}) 
Example #5
Source File: train_higgs.py    From g-tensorflow-models with Apache License 2.0 6 votes vote down vote up
def _make_csv_serving_input_receiver_fn(column_names, column_defaults):
  """Returns serving_input_receiver_fn for csv.

  The input arguments are relevant to `tf.decode_csv()`.

  Args:
    column_names: a list of column names in the order within input csv.
    column_defaults: a list of default values with the same size of
        column_names. Each entity must be either a list of one scalar, or an
        empty list to denote the corresponding column is required.
        e.g. [[""], [2.5], []] indicates the third column is required while
            the first column must be string and the second must be float/double.

  Returns:
    a serving_input_receiver_fn that handles csv for serving.
  """
  def serving_input_receiver_fn():
    csv = tf.placeholder(dtype=tf.string, shape=[None], name="csv")
    features = dict(zip(column_names, tf.decode_csv(csv, column_defaults)))
    receiver_tensors = {"inputs": csv}
    return tf.estimator.export.ServingInputReceiver(features, receiver_tensors)

  return serving_input_receiver_fn 
Example #6
Source File: dual_net.py    From training_results_v0.5 with Apache License 2.0 6 votes vote down vote up
def export_model(working_dir, model_path):
    """Take the latest checkpoint and export it to model_path for selfplay.

    Assumes that all relevant model files are prefixed by the same name.
    (For example, foo.index, foo.meta and foo.data-00000-of-00001).

    Args:
        working_dir: The directory where tf.estimator keeps its checkpoints
        model_path: The path (can be a gs:// path) to export model to
    """
    estimator = tf.estimator.Estimator(model_fn, model_dir=working_dir,
                                       params='ignored')
    latest_checkpoint = estimator.latest_checkpoint()
    all_checkpoint_files = tf.gfile.Glob(latest_checkpoint + '*')
    for filename in all_checkpoint_files:
        suffix = filename.partition(latest_checkpoint)[2]
        destination_path = model_path + suffix
        print("Copying {} to {}".format(filename, destination_path))
        tf.gfile.Copy(filename, destination_path) 
Example #7
Source File: build_model.py    From models with Apache License 2.0 6 votes vote down vote up
def model_fn(self, features, labels, mode, params):
    """Function fulfilling the tf.estimator model_fn interface.

    Args:
      features: a dict containing the input features for prediction.
      labels: a dict from target name to Tensor-value prediction.
      mode: the ModeKey string.
      params: a dictionary of parameters for building the model; current params
        are params["batch_size"]: the integer batch size.

    Returns:
      A tf.estimator.EstimatorSpec object ready for use in training, inference.
      or evaluation.
    """
    self.build_graph(features, labels, mode, params['batch_size'])

    return tf.estimator.EstimatorSpec(
        mode,
        predictions=self.predictions,
        loss=self.total_loss,
        train_op=self.train_op,
        eval_metric_ops={}) 
Example #8
Source File: dual_net.py    From training_results_v0.5 with Apache License 2.0 6 votes vote down vote up
def initialize_graph(self):
        if not self.inference:
            with self.sess.graph.as_default():
                features, labels = get_inference_input()
                estimator_spec = model_fn(features, labels,
                                        tf.estimator.ModeKeys.PREDICT, self.hparams)
                self.inference_input = features
                self.inference_output = estimator_spec.predictions
                if self.save_file is not None:
                    self.initialize_weights(self.save_file)
                else:
                    self.sess.run(tf.global_variables_initializer())
        else:
            input_name = "pos_tensor"
            input_tensors = self.graph.get_tensor_by_name("import/" + input_name + ":0")
            self.inference_input = input_tensors
            output_names = ["policy_output", "value_output"]
            output_tensors = []
            for name in output_names:
                output_tensors.append(self.graph.get_tensor_by_name("import/" + name + ":0"))
            self.inference_output = output_tensors 
Example #9
Source File: _task_commons.py    From tf-yarn with Apache License 2.0 6 votes vote down vote up
def _shutdown_container(
    client: skein.ApplicationClient,
    cluster_tasks: List[str],
    run_config: tf.estimator.RunConfig,
    thread: Optional[MonitoredThread]
) -> None:
    # Wait for all tasks connected to this one. The set of tasks to
    # wait for contains all tasks in the cluster, or the ones
    # matching ``device_filters`` if set. The implementation assumes
    # that ``device_filers`` are symmetric.
    exception = thread.exception if thread is not None and isinstance(thread, MonitoredThread) \
        else None
    task = cluster.get_task()
    event.stop_event(client, task, exception)
    wait_for_connected_tasks(
        client,
        cluster_tasks,
        getattr(run_config.session_config, "device_filters", []))

    event.broadcast_container_stop_time(client, task)

    if exception is not None:
        raise exception from None 
Example #10
Source File: _task_commons.py    From tf-yarn with Apache License 2.0 6 votes vote down vote up
def _gen_monitored_train_and_evaluate(client: skein.ApplicationClient):
    task = cluster.get_task()

    def train_and_evaluate(
            estimator: tf.estimator,
            train_spec: tf.estimator.TrainSpec,
            eval_spec: tf.estimator.EvalSpec):
        event.broadcast_train_eval_start_timer(client, task)
        tf.estimator.train_and_evaluate(
            estimator,
            train_spec,
            eval_spec
        )
        event.broadcast_train_eval_stop_timer(client, task)

    return train_and_evaluate 
Example #11
Source File: dual_net.py    From training_results_v0.5 with Apache License 2.0 6 votes vote down vote up
def initialize_graph(self):
        if not self.inference:
            with self.sess.graph.as_default():
                features, labels = get_inference_input()
                estimator_spec = model_fn(features, labels,
                                        tf.estimator.ModeKeys.PREDICT, self.hparams)
                self.inference_input = features
                self.inference_output = estimator_spec.predictions
                if self.save_file is not None:
                    self.initialize_weights(self.save_file)
                else:
                    self.sess.run(tf.global_variables_initializer())
        else:
            input_name = "pos_tensor"
            input_tensors = self.graph.get_tensor_by_name("import/" + input_name + ":0")
            self.inference_input = input_tensors
            output_names = ["policy_output", "value_output"]
            output_tensors = []
            for name in output_names:
                output_tensors.append(self.graph.get_tensor_by_name("import/" + name + ":0"))
            self.inference_output = output_tensors 
Example #12
Source File: dual_net.py    From training_results_v0.5 with Apache License 2.0 6 votes vote down vote up
def export_model(working_dir, model_path):
    """Take the latest checkpoint and export it to model_path for selfplay.

    Assumes that all relevant model files are prefixed by the same name.
    (For example, foo.index, foo.meta and foo.data-00000-of-00001).

    Args:
        working_dir: The directory where tf.estimator keeps its checkpoints
        model_path: The path (can be a gs:// path) to export model to
    """
    estimator = tf.estimator.Estimator(model_fn, model_dir=working_dir,
                                       params='ignored')
    latest_checkpoint = estimator.latest_checkpoint()
    all_checkpoint_files = tf.gfile.Glob(latest_checkpoint + '*')
    for filename in all_checkpoint_files:
        suffix = filename.partition(latest_checkpoint)[2]
        destination_path = model_path + suffix
        print("Copying {} to {}".format(filename, destination_path))
        tf.gfile.Copy(filename, destination_path) 
Example #13
Source File: main.py    From training_results_v0.5 with Apache License 2.0 6 votes vote down vote up
def bootstrap(
        working_dir: 'tf.estimator working directory. If not set, defaults to a random tmp dir'=None,
        model_save_path: 'Where to export the first bootstrapped generation'=None):
    qmeas.start_time('bootstrap')
    if working_dir is None:
        with tempfile.TemporaryDirectory() as working_dir:
            _ensure_dir_exists(working_dir)
            _ensure_dir_exists(os.path.dirname(model_save_path))
            dual_net.bootstrap(working_dir)
            dual_net.export_model(working_dir, model_save_path)
    else:
        _ensure_dir_exists(working_dir)
        _ensure_dir_exists(os.path.dirname(model_save_path))
        dual_net.bootstrap(working_dir)
        dual_net.export_model(working_dir, model_save_path)
    qmeas.stop_time('bootstrap') 
Example #14
Source File: train_higgs.py    From Live-feed-object-device-identification-using-Tensorflow-and-OpenCV with Apache License 2.0 6 votes vote down vote up
def _make_csv_serving_input_receiver_fn(column_names, column_defaults):
  """Returns serving_input_receiver_fn for csv.

  The input arguments are relevant to `tf.decode_csv()`.

  Args:
    column_names: a list of column names in the order within input csv.
    column_defaults: a list of default values with the same size of
        column_names. Each entity must be either a list of one scalar, or an
        empty list to denote the corresponding column is required.
        e.g. [[""], [2.5], []] indicates the third column is required while
            the first column must be string and the second must be float/double.

  Returns:
    a serving_input_receiver_fn that handles csv for serving.
  """
  def serving_input_receiver_fn():
    csv = tf.placeholder(dtype=tf.string, shape=[None], name="csv")
    features = dict(zip(column_names, tf.decode_csv(csv, column_defaults)))
    receiver_tensors = {"inputs": csv}
    return tf.estimator.export.ServingInputReceiver(features, receiver_tensors)

  return serving_input_receiver_fn 
Example #15
Source File: models.py    From professional-services with Apache License 2.0 6 votes vote down vote up
def polynomial_classifier(self):
        """Builds the logistic classification model
        with the parameters parsed from the user input
        Returns: A Custom Estimator of Polynomial classifier
        """
        return tf.estimator.Estimator(
            model_fn=self.poly_classification_model_fn,
            model_dir=self.model_dir,
            config=self.config,
            params={
                'degree': self.polynomial_degree,
                'feature_names': self.feature_names,
                'batch_size': self.batch_size,
                'optimizer': self.optimizer
            }
        ) 
Example #16
Source File: models.py    From professional-services with Apache License 2.0 6 votes vote down vote up
def polynomial_regressor(self):
        """Builds the polynomial regression model
        with the parameters parsed from the user input
        Returns: A Custom Estimator of Polynomial regression
        """
        return tf.estimator.Estimator(
            model_fn=self.poly_regression_model_fn,
            model_dir=self.model_dir, config=self.config,
            params={
                'batch_size': self.batch_size,
                'polynomial_degree': self.polynomial_degree,
                'feature_names': self.feature_names,
                'optimizer': self.optimizer
            },
            warm_start_from=self.warm_start_from
        ) 
Example #17
Source File: losses_impl.py    From ranking with Apache License 2.0 6 votes vote down vote up
def normalize_weights(self, labels, weights):
    """Normalizes weights needed for tf.estimator (not tf.keras).

    This is needed for `tf.estimator` given that the reduction may be
    `SUM_OVER_NONZERO_WEIGHTS`. This function is not needed after we migrate
    from the deprecated reduction to `SUM` or `SUM_OVER_BATCH_SIZE`.

    Args:
      labels: A `Tensor` of shape [batch_size, list_size] representing graded
        relevance.
      weights: A scalar, a `Tensor` with shape [batch_size, 1] for list-wise
        weights, or a `Tensor` with shape [batch_size, list_size] for item-wise
        weights.

    Returns:
      The normalized weights.
    """
    del labels
    return 1.0 if weights is None else weights 
Example #18
Source File: losses_impl.py    From ranking with Apache License 2.0 6 votes vote down vote up
def compute(self, labels, logits, weights, reduction):
    """Computes the reduced loss for tf.estimator (not tf.keras).

    Note that this function is not compatible with keras.

    Args:
      labels: A `Tensor` of the same shape as `logits` representing graded
        relevance.
      logits: A `Tensor` with shape [batch_size, list_size]. Each value is the
        ranking score of the corresponding item.
      weights: A scalar, a `Tensor` with shape [batch_size, 1] for list-wise
        weights, or a `Tensor` with shape [batch_size, list_size] for item-wise
        weights.
      reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to
        reduce training loss over batch.

    Returns:
      Reduced loss for training and eval.
    """
    losses, loss_weights = self.compute_unreduced_loss(labels, logits)
    weights = tf.multiply(self.normalize_weights(labels, weights), loss_weights)
    return tf.compat.v1.losses.compute_weighted_loss(
        losses, weights, reduction=reduction) 
Example #19
Source File: losses_impl.py    From ranking with Apache License 2.0 6 votes vote down vote up
def eval_metric(self, labels, logits, weights):
    """Computes the eval metric for the loss in tf.estimator (not tf.keras).

    Note that this function is not compatible with keras.

    Args:
      labels: A `Tensor` of the same shape as `logits` representing graded
        relevance.
      logits: A `Tensor` with shape [batch_size, list_size]. Each value is the
        ranking score of the corresponding item.
      weights: A scalar, a `Tensor` with shape [batch_size, 1] for list-wise
        weights, or a `Tensor` with shape [batch_size, list_size] for item-wise
        weights.

    Returns:
      A metric op.
    """
    losses, loss_weights = self.compute_unreduced_loss(labels, logits)
    weights = tf.multiply(self.normalize_weights(labels, weights), loss_weights)
    return tf.compat.v1.metrics.mean(losses, weights) 
Example #20
Source File: models.py    From professional-services with Apache License 2.0 6 votes vote down vote up
def combined_classifier(self):
        """Builds a combined DNN and linear classifier parsed from user input.
        Returns : tf.estimator object, Canned estimator of Combined Classifier
        """
        return tf.estimator.DNNLinearCombinedClassifier(
            config=self.config,
            linear_feature_columns=self.feature_columns,
            linear_optimizer=self.linear_optimizer,
            dnn_feature_columns=self.deep_columns,
            dnn_hidden_units=self.hidden_units,
            dnn_activation_fn=self.activation_fn,
            dnn_dropout=self.dropout,
            n_classes=self.n_classes,
            weight_column=self.weight_column,
            label_vocabulary=self.label_vocabulary,
            input_layer_partitioner=self.input_layer_partitioner,
            warm_start_from=self.warm_start_from,
            loss_reduction=self.loss_reduction,
            batch_norm=self.batch_norm,
            linear_sparse_combiner=self.linear_sparse_combiner
        ) 
Example #21
Source File: loop.py    From task_adaptation with Apache License 2.0 6 votes vote down vote up
def run_training_loop(hub_module,
                      hub_module_signature,
                      work_dir,
                      tpu_name,
                      save_checkpoints_steps,
                      optimization_params,
                      data_params):
  """Runs training loop."""
  estimator = setup_estimator(hub_module,
                              hub_module_signature,
                              work_dir,
                              tpu_name,
                              save_checkpoints_steps,
                              optimization_params,
                              data_params)
  input_fn = data_loader.build_data_pipeline(data_params, mode="train")

  # TPUs require the max number of steps to be specified explicitly.
  estimator.train(input_fn, max_steps=optimization_params["max_steps"]) 
Example #22
Source File: model.py    From ranking with Apache License 2.0 6 votes vote down vote up
def __init__(self, transform_fn=None):
    """Constructor for the common components of all ranking models.

    Args:
      transform_fn: (function) A user-provided function that transforms raw
        features into dense Tensors with the following signature:
        * Args:
          `features`: A dict of Tensors or SparseTensors that contains the raw
            features from an input_fn.
          `mode`: Optional. See estimator `ModeKeys`.
          `params`: Optional. See tf.estimator model_fn. Hyperparameters for the
            model.
        * Returns:
          `context_features`: A dict of `Tensor`s with shape [batch_size, ...]
          `example_features`: A dict of `Tensor`s with shape [batch_size,
            list_size, ...]
    """
    if transform_fn is None:
      self._transform_fn = feature.make_identity_transform_fn({})
    else:
      self._transform_fn = transform_fn 
Example #23
Source File: models.py    From professional-services with Apache License 2.0 6 votes vote down vote up
def dnn_regressor(self):
        """Builds the DNN model(regressor)
        with the parameters parsed from the user input
        Returns : tf.estimator object, Canned estimator of DNN Regressor
        """
        return tf.estimator.DNNRegressor(
            config=self.config,
            feature_columns=self.deep_columns,
            hidden_units=self.hidden_units,
            label_dimension=self.label_dimension,
            weight_column=self.weight_column,
            optimizer=self.dnn_optimizer,
            activation_fn=self.activation_fn,
            dropout=self.dropout,
            input_layer_partitioner=self.input_layer_partitioner,
            warm_start_from=self.warm_start_from,
            loss_reduction=self.loss_reduction
        ) 
Example #24
Source File: build_model.py    From multilabel-image-classification-tensorflow with MIT License 6 votes vote down vote up
def model_fn(self, features, labels, mode, params):
    """Function fulfilling the tf.estimator model_fn interface.

    Args:
      features: a dict containing the input features for prediction.
      labels: a dict from target name to Tensor-value prediction.
      mode: the ModeKey string.
      params: a dictionary of parameters for building the model; current params
        are params["batch_size"]: the integer batch size.

    Returns:
      A tf.estimator.EstimatorSpec object ready for use in training, inference.
      or evaluation.
    """
    self.build_graph(features, labels, mode, params['batch_size'])

    return tf.estimator.EstimatorSpec(
        mode,
        predictions=self.predictions,
        loss=self.total_loss,
        train_op=self.train_op,
        eval_metric_ops={}) 
Example #25
Source File: pre_train.py    From BERT_TF with Apache License 2.0 5 votes vote down vote up
def main():
    # tf.gfile.MakeDirs(FLAGS.output_dir)
    Path(bert_config.model_dir).mkdir(exist_ok=True)

    model_fn = model_fn_builder(
        bert_config=bert_config,
        init_checkpoint=bert_config.init_checkpoint,
        learning_rate=bert_config.learning_rate,
        num_train_steps=bert_config.num_train_steps)
    
    input_fn = functools.partial(train_input_fn, 
                                 path=bert_config.data_path,
                                 batch_size=bert_config.batch_size,
                                 repeat_num=bert_config.num_train_steps,
                                 max_length = bert_config.max_length,
                                 train_type=bert_config.train_type,
                                 reverse=bert_config.reverse)

    run_config = tf.contrib.tpu.RunConfig(
        keep_checkpoint_max=1,
        save_checkpoints_steps=1000,
        model_dir=bert_config.model_dir)
    estimator = tf.estimator.Estimator(model_fn, config=run_config)
    estimator.train(input_fn)

    # train_spec = tf.estimator.TrainSpec(input_fn=input_fn)
    # eval_spec = tf.estimator.EvalSpec(input_fn=input_fn, steps=1000)
    # tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)

    # for evaluation, the repeat_num in input_fn has to be reset
    # estimator.evaluate(input_fn) 
Example #26
Source File: wide_deep.py    From uai-sdk with Apache License 2.0 5 votes vote down vote up
def serving_input_receiver_fn():
  """An input receiver that expects a serialized tf.Example."""
  receiver_tensors = tf.placeholder(dtype=tf.string,
                                         shape=[None,],
                                         name='input_example_tensor')
  features = parse_csv(receiver_tensors)
  return tf.estimator.export.ServingInputReceiver(features, {"input":receiver_tensors}) 
Example #27
Source File: tutorial_1.py    From BERT_TF with Apache License 2.0 5 votes vote down vote up
def generator_fn(words, tags):
    with codecs.open(words, 'r', 'utf-8') as file_words,\
         codecs.open(tags, 'r', 'utf-8') as file_tags: 
        for line_words, line_tags in zip(file_words, file_tags):
            yield parse_fn(line_words, line_tags)

## the input_fn which constructs the dataset(needed by tf.estimator later) 
Example #28
Source File: task.py    From pipelines with Apache License 2.0 5 votes vote down vote up
def get_estimator(schema, transformed_data_dir, target_name, output_dir, hidden_units,
                  optimizer, learning_rate, feature_columns):
  """Get proper tf.estimator (DNNClassifier or DNNRegressor)."""
  optimizer = tf.train.AdagradOptimizer(learning_rate)
  if optimizer == 'Adam':
    optimizer = tf.train.AdamOptimizer(learning_rate)
  elif optimizer == 'SGD':
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)

  # Set how often to run checkpointing in terms of steps.
  config = tf.contrib.learn.RunConfig(save_checkpoints_steps=1000)
  n_classes = is_classification(transformed_data_dir, target_name)
  if n_classes:
    estimator = tf.estimator.DNNClassifier(
        feature_columns=feature_columns,
        hidden_units=hidden_units,
        n_classes=n_classes,
        config=config,
        model_dir=output_dir)
  else:
    estimator = tf.estimator.DNNRegressor(
        feature_columns=feature_columns,
        hidden_units=hidden_units,
        config=config,
        model_dir=output_dir,
        optimizer=optimizer)

  return estimator 
Example #29
Source File: models.py    From professional-services with Apache License 2.0 5 votes vote down vote up
def linear_classifier(self):
        """Builds the logistic regression model
        with the parameters parsed from the user input
        Returns : tf.estimator object, Canned estimator of Linear Classifier
        """
        return tf.estimator.LinearClassifier(
            config=self.config,
            feature_columns=self.feature_columns,
            label_vocabulary=self.label_vocabulary,
            loss_reduction=self.loss_reduction,
            n_classes=self.n_classes,
            optimizer=self.linear_optimizer,
            partitioner=self.partitioner,
            warm_start_from=self.warm_start_from
        ) 
Example #30
Source File: task.py    From pipelines with Apache License 2.0 5 votes vote down vote up
def build_feature_columns(schema, transformed_data_dir, target):
  """Build feature columns that tf.estimator expects."""

  feature_columns = []
  for entry in schema:
    name = entry['name']
    datatype = entry['type']
    if name == target:
      continue

    if datatype == 'NUMBER':
      feature_columns.append(tf.feature_column.numeric_column(name, shape=()))
    elif datatype == 'IMAGE_URL':
      feature_columns.append(tf.feature_column.numeric_column(name, shape=(2048)))
    elif datatype == 'CATEGORY':
      vocab_size = get_vocab_size(transformed_data_dir, name)
      category_column = tf.feature_column.categorical_column_with_identity(name, num_buckets=vocab_size)
      indicator_column = tf.feature_column.indicator_column(category_column)
      feature_columns.append(indicator_column)
    elif datatype == 'TEXT':
      vocab_size = get_vocab_size(transformed_data_dir, name)
      indices_column = tf.feature_column.categorical_column_with_identity(name + '_indices', num_buckets=vocab_size + 1)
      weighted_column = tf.feature_column.weighted_categorical_column(indices_column, name + '_weights')
      indicator_column = tf.feature_column.indicator_column(weighted_column)
      feature_columns.append(indicator_column)

  return feature_columns