Python Examples of tensorflow.string_to_hash_bucket

Source File: inputs.py From vehicle_counting_tensorflow with MIT License

6 votes

def _get_features_dict(input_dict):
  """Extracts features dict from input dict."""

  source_id = _replace_empty_string_with_random_number(
      input_dict[fields.InputDataFields.source_id])

  hash_from_source_id = tf.string_to_hash_bucket_fast(source_id, HASH_BINS)
  features = {
      fields.InputDataFields.image:
          input_dict[fields.InputDataFields.image],
      HASH_KEY: tf.cast(hash_from_source_id, tf.int32),
      fields.InputDataFields.true_image_shape:
          input_dict[fields.InputDataFields.true_image_shape],
      fields.InputDataFields.original_image_spatial_shape:
          input_dict[fields.InputDataFields.original_image_spatial_shape]
  }
  if fields.InputDataFields.original_image in input_dict:
    features[fields.InputDataFields.original_image] = input_dict[
        fields.InputDataFields.original_image]
  return features

Source File: experiment.py From scalable_agent with Apache License 2.0

6 votes

def _instruction(self, instruction):
    # Split string.
    splitted = tf.string_split(instruction)
    dense = tf.sparse_tensor_to_dense(splitted, default_value='')
    length = tf.reduce_sum(tf.to_int32(tf.not_equal(dense, '')), axis=1)

    # To int64 hash buckets. Small risk of having collisions. Alternatively, a
    # vocabulary can be used.
    num_hash_buckets = 1000
    buckets = tf.string_to_hash_bucket_fast(dense, num_hash_buckets)

    # Embed the instruction. Embedding size 20 seems to be enough.
    embedding_size = 20
    embedding = snt.Embed(num_hash_buckets, embedding_size)(buckets)

    # Pad to make sure there is at least one output.
    padding = tf.to_int32(tf.equal(tf.shape(embedding)[1], 0))
    embedding = tf.pad(embedding, [[0, 0], [0, padding], [0, 0]])

    core = tf.contrib.rnn.LSTMBlockCell(64, name='language_lstm')
    output, _ = tf.nn.dynamic_rnn(core, embedding, length, dtype=tf.float32)

    # Return last output.
    return tf.reverse_sequence(output, length, seq_axis=1)[:, 0]

Source File: inputs.py From MAX-Object-Detector with Apache License 2.0

6 votes

def _get_features_dict(input_dict):
  """Extracts features dict from input dict."""

  source_id = _replace_empty_string_with_random_number(
      input_dict[fields.InputDataFields.source_id])

  hash_from_source_id = tf.string_to_hash_bucket_fast(source_id, HASH_BINS)
  features = {
      fields.InputDataFields.image:
          input_dict[fields.InputDataFields.image],
      HASH_KEY: tf.cast(hash_from_source_id, tf.int32),
      fields.InputDataFields.true_image_shape:
          input_dict[fields.InputDataFields.true_image_shape],
      fields.InputDataFields.original_image_spatial_shape:
          input_dict[fields.InputDataFields.original_image_spatial_shape]
  }
  if fields.InputDataFields.original_image in input_dict:
    features[fields.InputDataFields.original_image] = input_dict[
        fields.InputDataFields.original_image]
  return features

Source File: inputs.py From g-tensorflow-models with Apache License 2.0

6 votes

def _get_features_dict(input_dict):
  """Extracts features dict from input dict."""

  source_id = _replace_empty_string_with_random_number(
      input_dict[fields.InputDataFields.source_id])

  hash_from_source_id = tf.string_to_hash_bucket_fast(source_id, HASH_BINS)
  features = {
      fields.InputDataFields.image:
          input_dict[fields.InputDataFields.image],
      HASH_KEY: tf.cast(hash_from_source_id, tf.int32),
      fields.InputDataFields.true_image_shape:
          input_dict[fields.InputDataFields.true_image_shape],
      fields.InputDataFields.original_image_spatial_shape:
          input_dict[fields.InputDataFields.original_image_spatial_shape]
  }
  if fields.InputDataFields.original_image in input_dict:
    features[fields.InputDataFields.original_image] = input_dict[
        fields.InputDataFields.original_image]
  return features

Source File: inputs.py From multilabel-image-classification-tensorflow with MIT License

6 votes

def _get_features_dict(input_dict):
  """Extracts features dict from input dict."""

  source_id = _replace_empty_string_with_random_number(
      input_dict[fields.InputDataFields.source_id])

  hash_from_source_id = tf.string_to_hash_bucket_fast(source_id, HASH_BINS)
  features = {
      fields.InputDataFields.image:
          input_dict[fields.InputDataFields.image],
      HASH_KEY: tf.cast(hash_from_source_id, tf.int32),
      fields.InputDataFields.true_image_shape:
          input_dict[fields.InputDataFields.true_image_shape],
      fields.InputDataFields.original_image_spatial_shape:
          input_dict[fields.InputDataFields.original_image_spatial_shape]
  }
  if fields.InputDataFields.original_image in input_dict:
    features[fields.InputDataFields.original_image] = input_dict[
        fields.InputDataFields.original_image]
  return features

Source File: string_to_hash_bucket.py From rlgraph with Apache License 2.0

5 votes

def _graph_fn_call(self, text_inputs):
        """
        Args:
            text_inputs (SingleDataOp): The Text input to generate a hash bucket for.

        Returns:
            tuple:
                - SingleDataOp: The hash lookup table (int64) that can be used as input to embedding-lookups.
                - SingleDataOp: The length (number of words) of the longest string in the `text_input` batch.
        """
        if get_backend() == "tf":
            # Split the input string.
            split_text_inputs = tf.string_split(source=text_inputs, delimiter=self.delimiter)
            # Build a tensor of n rows (number of items in text_inputs) words with
            dense = tf.sparse_tensor_to_dense(sp_input=split_text_inputs, default_value="")

            length = tf.reduce_sum(input_tensor=tf.cast(x=tf.not_equal(x=dense, y=""), dtype=tf.int32), axis=-1)
            if self.hash_function == "fast":
                hash_bucket = tf.string_to_hash_bucket_fast(input=dense, num_buckets=self.num_hash_buckets)
            else:
                hash_bucket = tf.string_to_hash_bucket_strong(input=dense,
                                                              num_buckets=self.num_hash_buckets,
                                                              key=self.hash_keys)

            # Int64 is tf's default for `string_to_hash_bucket` operation: Can leave as is.
            if self.dtype != "int64":
                hash_bucket = tf.cast(x=hash_bucket, dtype=dtype_(self.dtype))

            # Hash-bucket output is always batch-major.
            hash_bucket._batch_rank = 0
            hash_bucket._time_rank = 1

            return hash_bucket, length

Source File: inputs.py From Person-Detection-and-Tracking with MIT License

5 votes

def _get_features_dict(input_dict):
  """Extracts features dict from input dict."""
  hash_from_source_id = tf.string_to_hash_bucket_fast(
      input_dict[fields.InputDataFields.source_id], HASH_BINS)
  features = {
      fields.InputDataFields.image:
          input_dict[fields.InputDataFields.image],
      HASH_KEY: tf.cast(hash_from_source_id, tf.int32),
      fields.InputDataFields.true_image_shape:
          input_dict[fields.InputDataFields.true_image_shape]
  }
  if fields.InputDataFields.original_image in input_dict:
    features[fields.InputDataFields.original_image] = input_dict[
        fields.InputDataFields.original_image]
  return features

Source File: utils.py From realmix with Apache License 2.0

5 votes

def hash_float(x, big_num=1000 * 1000):
    """Hash a tensor 'x' into a floating point number in the range [0, 1)."""
    return tf.cast(
        tf.string_to_hash_bucket_fast(x, big_num), tf.float32
    ) / tf.constant(float(big_num))

Source File: inputs.py From Gun-Detector with Apache License 2.0

5 votes

def _get_features_dict(input_dict):
  """Extracts features dict from input dict."""
  hash_from_source_id = tf.string_to_hash_bucket_fast(
      input_dict[fields.InputDataFields.source_id], HASH_BINS)
  features = {
      fields.InputDataFields.image:
          input_dict[fields.InputDataFields.image],
      HASH_KEY: tf.cast(hash_from_source_id, tf.int32),
      fields.InputDataFields.true_image_shape:
          input_dict[fields.InputDataFields.true_image_shape]
  }
  if fields.InputDataFields.original_image in input_dict:
    features[fields.InputDataFields.original_image] = input_dict[
        fields.InputDataFields.original_image]
  return features

Source File: string_to_hash_bucket_op_test.py From deep_image_model with Apache License 2.0

5 votes

def testStringToOneHashBucketFast(self):
    with self.test_session():
      input_string = tf.placeholder(tf.string)
      output = tf.string_to_hash_bucket_fast(input_string, 1)
      result = output.eval(feed_dict={input_string: ['a', 'b', 'c']})

      self.assertAllEqual([0, 0, 0], result)

Source File: string_to_hash_bucket_op_test.py From deep_image_model with Apache License 2.0

5 votes

def testStringToHashBucketsFast(self):
    with self.test_session():
      input_string = tf.placeholder(tf.string)
      output = tf.string_to_hash_bucket_fast(input_string, 10)
      result = output.eval(feed_dict={input_string: ['a', 'b', 'c', 'd']})

      # Fingerprint64('a') -> 12917804110809363939 -> mod 10 -> 9
      # Fingerprint64('b') -> 11795596070477164822 -> mod 10 -> 2
      # Fingerprint64('c') -> 11430444447143000872 -> mod 10 -> 2
      # Fingerprint64('d') -> 4470636696479570465 -> mod 10 -> 5
      self.assertAllEqual([9, 2, 2, 5], result)

Source File: inputs.py From ros_tensorflow with Apache License 2.0

5 votes

def _get_features_dict(input_dict):
  """Extracts features dict from input dict."""
  hash_from_source_id = tf.string_to_hash_bucket_fast(
      input_dict[fields.InputDataFields.source_id], HASH_BINS)
  features = {
      fields.InputDataFields.image:
          input_dict[fields.InputDataFields.image],
      HASH_KEY: tf.cast(hash_from_source_id, tf.int32),
      fields.InputDataFields.true_image_shape:
          input_dict[fields.InputDataFields.true_image_shape]
  }
  if fields.InputDataFields.original_image in input_dict:
    features[fields.InputDataFields.original_image] = input_dict[
        fields.InputDataFields.original_image]
  return features

Source File: tf_utils.py From realistic-ssl-evaluation with Apache License 2.0

5 votes

def hash_float(x, big_num=1000 * 1000):
    """Hash a tensor 'x' into a floating point number in the range [0, 1)."""
    return tf.cast(
        tf.string_to_hash_bucket_fast(x, big_num), tf.float32
    ) / tf.constant(float(big_num))

Source File: inputs.py From BMW-TensorFlow-Training-GUI with Apache License 2.0

5 votes

def _get_features_dict(input_dict):
  """Extracts features dict from input dict."""
  hash_from_source_id = tf.string_to_hash_bucket_fast(
      input_dict[fields.InputDataFields.source_id], HASH_BINS)
  features = {
      fields.InputDataFields.image:
          input_dict[fields.InputDataFields.image],
      HASH_KEY: tf.cast(hash_from_source_id, tf.int32),
      fields.InputDataFields.true_image_shape:
          input_dict[fields.InputDataFields.true_image_shape]
  }
  if fields.InputDataFields.original_image in input_dict:
    features[fields.InputDataFields.original_image] = input_dict[
        fields.InputDataFields.original_image]
  return features

Source File: datasets.py From stereo-magnification with Apache License 2.0

5 votes

def hash_in_range(self, buckets, base, limit):
    """Return true if the hashed id falls in the range [base, limit)."""
    hash_bucket = tf.string_to_hash_bucket_fast(self.id, buckets)
    return tf.logical_and(
        tf.greater_equal(hash_bucket, base), tf.less(hash_bucket, limit))

Source File: utils.py From DeepCTR with Apache License 2.0

5 votes

def call(self, x, mask=None, **kwargs):
        if x.dtype != tf.string:
            x = tf.as_string(x, )
        try:
            hash_x = tf.string_to_hash_bucket_fast(x, self.num_buckets if not self.mask_zero else self.num_buckets - 1,
                                                    name=None)  # weak hash
        except:
            hash_x = tf.strings.to_hash_bucket_fast(x, self.num_buckets if not self.mask_zero else self.num_buckets - 1,
                                               name=None)  # weak hash
        if self.mask_zero:
            mask_1 = tf.cast(tf.not_equal(x, "0"), 'int64')
            mask_2 = tf.cast(tf.not_equal(x, "0.0"), 'int64')
            mask = mask_1 * mask_2
            hash_x = (hash_x + 1) * mask
        return hash_x

Python tensorflow.string_to_hash_bucket_fast() Examples