Python Examples of tensorflow.feature

Source File: parsing_utils.py From estimator with Apache License 2.0

7 votes

def regressor_parse_example_spec(
    feature_columns,  # pylint: disable=missing-docstring
    label_key,
    label_dtype=tf.dtypes.float32,
    label_default=None,
    label_dimension=1,
    weight_column=None):
  parsing_spec = tf.compat.v1.feature_column.make_parse_example_spec(
      feature_columns)
  label_spec = tf.io.FixedLenFeature((label_dimension,), label_dtype,
                                     label_default)
  return _add_label_and_weight_to_parsing_spec(
      parsing_spec=parsing_spec,
      label_key=label_key,
      label_spec=label_spec,
      weight_column=weight_column)

Source File: task.py From professional-services with Apache License 2.0

6 votes

def get_feature_columns(num_hash_buckets, embedding_dimension):
  """Creates sequential input columns to `RNNEstimator`.

  Args:
    num_hash_buckets: `int`, number of embedding vectors to use.
    embedding_dimension: `int`, size of embedding vectors.

  Returns:
    List of `tf.feature_column` ojects.
  """

  id_col = feature_column.sequence_categorical_column_with_hash_bucket(
      constants.TOKENS, num_hash_buckets, dtype=tf.string)
  features_columns = [tf.feature_column.embedding_column(
      id_col, dimension=embedding_dimension)]
  return features_columns

Source File: boosted_trees.py From estimator with Apache License 2.0

6 votes

def _is_numeric_column(feature_column):
  """Returns True if column is a continuous numeric that should be bucketized."""
  # These columns always produce categorical integers and do not require
  # additional bucketization.
  if isinstance(
      feature_column,
      (
          feature_column_lib.CategoricalColumn,
          fc_old._CategoricalColumn,  # pylint:disable=protected-access
          feature_column_lib.BucketizedColumn,
          fc_old._BucketizedColumn,  # pylint:disable=protected-access
          feature_column_lib.IndicatorColumn,
          fc_old._IndicatorColumn)):  # pylint:disable=protected-access
    return False
  # NumericColumns are always interpreted as continuous numerics.
  if isinstance(feature_column,
                (feature_column_lib.NumericColumn, fc_old._NumericColumn)):
    return True
  # For other dense columns, the dtype is used.
  if isinstance(feature_column,
                (feature_column_lib.DenseColumn, fc_old._DenseColumn)):
    # NOTE: GBDT requires that all DenseColumns expose a dtype attribute
    return feature_column.dtype.is_floating
  else:
    raise ValueError('Encountered unexpected column {}'.format(feature_column))

Source File: boosted_trees.py From estimator with Apache License 2.0

6 votes

def _get_transformed_features(
    features,
    sorted_feature_columns,
    bucket_boundaries_dict=None,
):
  """Gets the transformed features from features/feature_columns pair.

  Args:
    features: a dicionary of name to Tensor.
    sorted_feature_columns: a list/set of tf.feature_column, sorted by name.
    bucket_boundaries_dict: a dict of name to list of Tensors.

  Returns:
    result_features: a list of the transformed features, sorted by the name.

  Raises:
    ValueError: when unsupported features/columns are tried.
  """
  return _get_transformed_features_and_merge_with_previously_transformed(
      features, sorted_feature_columns, sorted_feature_columns,
      bucket_boundaries_dict)

Source File: model.py From estimator with Apache License 2.0

6 votes

def __init__(self,
               num_features,
               exogenous_feature_columns=None,
               dtype=tf.dtypes.float32):
    """Constructor for generative models.

    Args:
      num_features: Number of features for the time series
      exogenous_feature_columns: A list of `tf.feature_column`s (for example
        `tf.feature_column.embedding_column`) corresponding to exogenous
        features which provide extra information to the model but are not part
        of the series to be predicted. Passed to
        `tf.feature_column.input_layer`.
      dtype: The floating point datatype to use.
    """
    if exogenous_feature_columns:
      self._exogenous_feature_columns = exogenous_feature_columns
    else:
      self._exogenous_feature_columns = []
    self.num_features = num_features
    self.dtype = dtype
    self._input_statistics = None
    self._graph_initialized = False
    self._stats_means = None
    self._stats_sigmas = None

Source File: model.py From estimator with Apache License 2.0

6 votes

def _get_exogenous_embedding_shape(self):
    """Computes the shape of the vector returned by _process_exogenous_features.

    Returns:
      The shape as a list. Does not include a batch dimension.
    """
    if not self._exogenous_feature_columns:
      return (0,)
    with tf.Graph().as_default():
      parsed_features = (
          tf.compat.v1.feature_column.make_parse_example_spec(
              self._exogenous_feature_columns))
      placeholder_features = tf.compat.v1.io.parse_example(
          serialized=tf.compat.v1.placeholder(
              shape=[None], dtype=tf.dtypes.string),
          features=parsed_features)
      embedded = tf.compat.v1.feature_column.input_layer(
          features=placeholder_features,
          feature_columns=self._exogenous_feature_columns)
      return embedded.get_shape().as_list()[1:]

Source File: tensor_graph.py From PADME with MIT License

5 votes

def create_estimator_inputs(self, feature_columns, weight_column, features,
    labels, mode):
    """This is called by make_estimator() to create tensors for the inputs.

    feature_columns and weight_column are the arguments passed to make_estimator().
    Features, labels and mode are the arguments passed to the estimator's
    model function. THis method creates and returns a dict with one entry for
    every Feature, Label, or Weight layer in the graph. The keys are the layers,
    and the values are the tensors that correspond to them.

    Any subclass that overrides default_generator() must also override this
    method.
    """
    if self.__class__.default_generator != TensorGraph.default_generator:
      raise ValueError(
        "Class overrides default_generator() but not create_estimator_inputs()"
      )
    tensors = {}
    for layer, column in zip(self.features, feature_columns):
      tensors[layer] = tf.feature_column.input_layer(features, [column])
    if weight_column is not None:
      tensors[self.task_weights[0]] = tf.feature_column.input_layer(
        features, [weight_column])
    if labels is not None:
      tensors[self.labels[0]] = tf.cast(labels, self.labels[0].dtype)
    return tensors

Source File: models.py From professional-services with Apache License 2.0

5 votes

def __init__(
            self,
            model_name,
            batch_size,
            optimizer,
            feature_names,
            model_dir=None,
            config=None,
            warm_start_from=None,
            learning_rate=0.03,
            polynomial_degree=2):
        """Initializes the classifier instance with parameters parsed from the user

        Args:
            model_name : str, name of the model
            batch_size : int, batch size
            optimizer : str, name of the optimizer to be used
            feature_columns : tf.feature_column object, Normal feature columns
            model_dir : str, directory to store model checkpoints
            config : tf.Config object, RunConfig object to configure the runtime settings
            warm_start_from : str, A string filepath to a checkpoint to warm-start from
            polynomial_degree : int, degree to which polynomial model is to be used
        """
        self.model_name = model_name
        self.batch_size = batch_size
        self.model_dir = model_dir
        self.optimizer = optimizer
        self.config = config
        self.warm_start_from = warm_start_from
        self.polynomial_degree = polynomial_degree
        self.learning_rate = learning_rate
        self.feature_names = feature_names

Source File: parsing_utils.py From estimator with Apache License 2.0

5 votes

def classifier_parse_example_spec(feature_columns,
                                  label_key,
                                  label_dtype=tf.dtypes.int64,
                                  label_default=None,
                                  weight_column=None):
  parsing_spec = tf.compat.v1.feature_column.make_parse_example_spec(
      feature_columns)
  label_spec = tf.io.FixedLenFeature((1,), label_dtype, label_default)
  return _add_label_and_weight_to_parsing_spec(
      parsing_spec=parsing_spec,
      label_key=label_key,
      label_spec=label_spec,
      weight_column=weight_column)

Source File: boosted_trees.py From estimator with Apache License 2.0

5 votes

def _get_float_feature_columns(sorted_feature_columns):
  """Get float feature columns.

  Args:
    sorted_feature_columns: a list of feature columns sorted by name.

  Returns:
    float_columns: a list of float feature columns sorted by name.
  """
  float_columns = []
  for feature_column in sorted_feature_columns:
    if _is_numeric_column(feature_column):
      float_columns.append(feature_column)
  return float_columns

Source File: parsing_utils.py From estimator with Apache License 2.0

4 votes

def _add_label_and_weight_to_parsing_spec(parsing_spec,
                                          label_key,
                                          label_spec,
                                          weight_column=None):
  """Adds label and weight spec to given parsing spec.

  Args:
    parsing_spec: A dict mapping each feature key to a `FixedLenFeature` or
      `VarLenFeature` to which label and weight spec are added.
    label_key: A string identifying the label. It means tf.Example stores labels
      with this key.
    label_spec: A `FixedLenFeature`.
    weight_column: A string or a `NumericColumn` created by
      `tf.feature_column.numeric_column` defining feature column representing
      weights. It is used to down weight or boost examples during training. It
      will be multiplied by the loss of the example. If it is a string, it is
      used as a key to fetch weight tensor from the `features`. If it is a
      `NumericColumn`, raw tensor is fetched by key `weight_column.key`, then
      weight_column.normalizer_fn is applied on it to get weight tensor.

  Returns:
    A dict mapping each feature key to a `FixedLenFeature` or `VarLenFeature`
      value.
  """
  if label_key in parsing_spec:
    raise ValueError('label should not be used as feature. '
                     'label_key: {}, features: {}'.format(
                         label_key, parsing_spec.keys()))
  parsing_spec[label_key] = label_spec

  if weight_column is None:
    return parsing_spec

  if isinstance(weight_column, six.string_types):
    weight_column = tf.feature_column.numeric_column(weight_column)

  if not isinstance(weight_column, fc.NumericColumn):
    raise ValueError('weight_column should be an instance of '
                     'tf.feature_column.numeric_column. '
                     'Given type: {} value: {}'.format(
                         type(weight_column), weight_column))

  if weight_column.key in parsing_spec:
    raise ValueError('weight_column should not be used as feature. '
                     'weight_column: {}, features: {}'.format(
                         weight_column.key, parsing_spec.keys()))

  parsing_spec.update(weight_column.parse_example_spec)
  return parsing_spec

Source File: boosted_trees.py From estimator with Apache License 2.0

4 votes

def _generate_feature_col_name_mapping(sorted_feature_columns):
  """Return a list of feature column names for feature ids.

    Example:

    ```
    gender_col = indicator_column(
        categorical_column_with_vocabulary_list(
          'gender', ['male', 'female', 'n/a']))
    # Results in 3 binary features for which we store the mapping to the
    # original feature column.
    _generate_feature_col_name_mapping([gender_col])
    ['gender', 'gender', 'gender]
    ```

  Args:
    sorted_feature_columns: a list/set of tf.feature_column sorted by name.

  Returns:
    feature_col_name_mapping: a list of feature column names indexed by the
    feature ids.

  Raises:
    ValueError: when unsupported features/columns are tried.
  """
  # pylint:disable=protected-access
  names = []
  for column in sorted_feature_columns:
    if isinstance(
        column, (feature_column_lib.IndicatorColumn, fc_old._IndicatorColumn)):
      categorical_column = column.categorical_column
      if hasattr(categorical_column, 'num_buckets'):
        one_hot_depth = categorical_column.num_buckets
      else:
        assert hasattr(categorical_column, '_num_buckets')
        one_hot_depth = categorical_column._num_buckets
      for _ in range(one_hot_depth):
        names.append(categorical_column.name)
    elif isinstance(
        column,
        (feature_column_lib.BucketizedColumn, fc_old._BucketizedColumn)):
      names.append(column.name)
    elif isinstance(column,
                    (fc_old._DenseColumn, feature_column_lib.DenseColumn)):
      num_float_features = _get_variable_shape(
          column)[0] if _get_variable_shape(column).as_list() else 1
      for _ in range(num_float_features):
        names.append(column.name)
    elif isinstance(
        column,
        (feature_column_lib.CategoricalColumn, fc_old._CategoricalColumn)):
      names.append(column.name)
    else:
      raise ValueError('Got unexpected feature column type'.format(column))
  return names
  # pylint:enable=protected-access

Python tensorflow.feature_column() Examples