Python Examples of tensorflow.int

Source File: analyzers.py From transform with Apache License 2.0

6 votes

def mean(x, reduce_instance_dims=True, name=None, output_dtype=None):
  """Computes the mean of the values of a `Tensor` over the whole dataset.

  Args:
    x: A `Tensor` or `SparseTensor`. Its type must be floating point
        (float{16|32|64}), or integral ([u]int{8|16|32|64}).
    reduce_instance_dims: By default collapses the batch and instance dimensions
        to arrive at a single scalar output. If False, only collapses the batch
        dimension and outputs a vector of the same shape as the input.
    name: (Optional) A name for this operation.
    output_dtype: (Optional) If not None, casts the output tensor to this type.

  Returns:
    A `Tensor` containing the mean. If `x` is floating point, the mean will have
    the same type as `x`. If `x` is integral, the output is cast to float32.

  Raises:
    TypeError: If the type of `x` is not supported.
  """
  with tf.compat.v1.name_scope(name, 'mean'):
    return _mean_and_var(x, reduce_instance_dims, output_dtype)[0]

Source File: analyzers.py From transform with Apache License 2.0

6 votes

def _get_top_k_and_frequency_threshold(top_k, frequency_threshold):
  """Validate `top_k` and `frequency_threshold` values and convert to number."""
  if top_k is not None:
    top_k = int(top_k)
    if top_k < 0:
      raise ValueError('top_k must be non-negative, but got: %r' % top_k)

  if frequency_threshold is not None:
    frequency_threshold = float(frequency_threshold)
    if frequency_threshold < 0:
      raise ValueError(
          'frequency_threshold must be non-negative, but got: %r' %
          frequency_threshold)
    elif frequency_threshold <= 1:
      # Note: this warning is misleading in the context where tokens are ranked
      # based on mutual information rather than frequency.
      tf.compat.v1.logging.warn(
          'frequency_threshold %d <= 1 is a no-op, use None instead.',
          frequency_threshold)
  return top_k, frequency_threshold

Source File: data.py From imagenet with MIT License

5 votes

def read_image_paths_labels(self):
		"""
		Reads the paths of the images (from the folders structure)
		and the indexes of the labels (using an annotation file)
		"""
		paths = []
		labels = []

		if self.mode == 'train':
			for i, wnid in enumerate(self.wnids):
				img_names = os.listdir(os.path.join(self.cfg.DATA_PATH, self.mode, wnid))
				for img_name in img_names:
					paths.append(os.path.join(self.cfg.DATA_PATH, self.mode, wnid, img_name))
					labels.append(i)

			# shuffling the images names and relative labels
			d = zip(paths, labels)
			random.shuffle(d)
			paths, labels = zip(*d)
		
		else:
			with open(os.path.join(self.cfg.DATA_PATH, 'data', 'ILSVRC2012_validation_ground_truth.txt')) as f:
				groundtruths = f.readlines()
			groundtruths = [int(x.strip()) for x in groundtruths]

			images_names = sorted(os.listdir(os.path.join(self.cfg.DATA_PATH, 'ILSVRC2012_img_val')))

			for image_name, gt in zip(images_names, groundtruths):
				paths.append(os.path.join(self.cfg.DATA_PATH, 'ILSVRC2012_img_val', image_name))
				labels.append(gt)
		
		self.dataset_size = len(paths)

		return tf.constant(paths), tf.constant(labels)

Source File: analyzers.py From transform with Apache License 2.0

5 votes

def sum(x, reduce_instance_dims=True, name=None):  # pylint: disable=redefined-builtin
  """Computes the sum of the values of a `Tensor` over the whole dataset.

  Args:
    x: A `Tensor` or `SparseTensor`. Its type must be floating point
        (float{16|32|64}),integral (int{8|16|32|64}), or
        unsigned integral (uint{8|16})
    reduce_instance_dims: By default collapses the batch and instance dimensions
        to arrive at a single scalar output. If False, only collapses the batch
        dimension and outputs a vector of the same shape as the input.
    name: (Optional) A name for this operation.

  Returns:
    A `Tensor` containing the sum. If `x` is float32 or float64, the sum will
    have the same type as `x`. If `x` is float16, the output is cast to float32.
    If `x` is integral, the output is cast to [u]int64. If `x` is sparse and
    reduce_inst_dims is False will return 0 in place where column has no values
    across batches.

  Raises:
    TypeError: If the type of `x` is not supported.
  """
  with tf.compat.v1.name_scope(name, 'sum'):
    if reduce_instance_dims:
      if isinstance(x, tf.SparseTensor):
        x = x.values
      x = tf.reduce_sum(input_tensor=x)
    elif isinstance(x, tf.SparseTensor):
      if x.dtype == tf.uint8 or x.dtype == tf.uint16:
        x = tf.cast(x, tf.int64)
      elif x.dtype == tf.uint32 or x.dtype == tf.uint64:
        TypeError('Data type %r is not supported' % x.dtype)
      x = tf.sparse.reduce_sum(x, axis=0)
    else:
      x = tf.reduce_sum(input_tensor=x, axis=0)
    output_dtype, sum_fn = _sum_combine_fn_and_dtype(x.dtype)
    return _numeric_combine([x], sum_fn, reduce_instance_dims,
                            [output_dtype])[0]

Source File: analyzers.py From transform with Apache License 2.0

5 votes

def count_per_key(key, key_vocabulary_filename=None, name=None):
  """Computes the count of each element of a `Tensor`.

  Args:
    key: A Tensor or `SparseTensor` of dtype tf.string or tf.int.
    key_vocabulary_filename: (Optional) The file name for the key-output mapping
      file. If None and key are provided, this combiner assumes the keys fit in
      memory and will not store the result in a file. If empty string, a file
      name will be chosen based on the current scope. If not an empty string,
      should be unique within a given preprocessing function.
    name: (Optional) A name for this operation.

  Returns:
    Either:
    (A) Two `Tensor`s: one the key vocab with dtype of input;
        the other the count for each key, dtype tf.int64. (if
        key_vocabulary_filename is None).
    (B) The filename where the key-value mapping is stored (if
        key_vocabulary_filename is not None).

  Raises:
    TypeError: If the type of `x` is not supported.
  """

  with tf.compat.v1.name_scope(name, 'count_per_key'):
    key_dtype = key.dtype
    batch_keys, batch_counts = tf_utils.reduce_batch_count_or_sum_per_key(
        x=None, key=key, reduce_instance_dims=True)

    output_dtype, sum_fn = _sum_combine_fn_and_dtype(tf.int64)
    numeric_combine_result = _numeric_combine(
        [batch_counts], sum_fn, True, [output_dtype], key=batch_keys,
        key_vocabulary_filename=key_vocabulary_filename)

    if key_vocabulary_filename is not None:
      return numeric_combine_result
    keys, counts = numeric_combine_result
    if key_dtype is not tf.string:
      keys = tf.strings.to_number(keys, key_dtype)
    return keys, counts

Source File: analyzers.py From transform with Apache License 2.0

5 votes

def calculate_recommended_min_diff_from_avg(dataset_size):
  """Calculates a recommended min_diff_from_avg argument to tft.vocabulary.

  Computes a default min_diff_from_average parameter based on the size of the
  dataset. The MI (or AMI) of a token x label will be pushed to zero whenever
  the difference between the observed and the expected (average) cooccurrence
  with the label is < min_diff_from_average. This can be thought of as a
  regularization parameter for mutual information based vocabularies.

  Args:
    dataset_size: The number of recods in the dataset. The bigger the dataset,
      the higher the min_diff_from_average will be.

  Returns:
    An integer that is recomended to use as the min_diff_from_avg parameter of
    `vocabulary`.
  """
  # The minimum and maximum min_diff_from_avg parameter to use.
  min_value, max_value = 2, 25
  # Heuristics for a "small" and "large" dataset. The selected parameter will
  # be between min_value and max_value depending on where the dataset_size falls
  # relative to these values.
  small_dataset_size, large_dataset_size = 10000, 1000000
  return int(
      builtin_min(
          max_value,
          builtin_max(min_value, (dataset_size - small_dataset_size) /
                      (large_dataset_size - small_dataset_size) *
                      (max_value - min_value) + min_value)))

Source File: analyzers.py From transform with Apache License 2.0

5 votes

def __init__(self,
               num_quantiles,
               epsilon,
               bucket_numpy_dtype,
               always_return_num_quantiles=False,
               has_weights=False,
               output_shape=None,
               include_max_and_min=False,
               feature_shape=None):
    self._num_quantiles = num_quantiles
    self._epsilon = epsilon
    self._bucket_numpy_dtype = bucket_numpy_dtype
    self._always_return_num_quantiles = always_return_num_quantiles
    self._has_weights = has_weights
    self._output_shape = output_shape
    self._include_max_and_min = include_max_and_min
    if feature_shape is None:
      self._feature_shape = []
    elif isinstance(feature_shape, int):
      self._feature_shape = [feature_shape]
    else:
      self._feature_shape = feature_shape
    self._num_features = int(np.prod(self._feature_shape, dtype=np.int64))
    if not self._always_return_num_quantiles and self._num_features > 1:
      raise NotImplementedError(
          'Elementwise quantiles requires same boundary count.')
    self._tf_config = None    # Assigned in initialize_local_state().
    self._graph_state = None  # Lazily created in _get_graph_state().

Source File: beam_search_decoder_cell.py From im2latex with Apache License 2.0

5 votes

def __init__(self, embeddings, cell, batch_size, start_token, end_token,
            beam_size=5, div_gamma=1, div_prob=0):
        """Initializes parameters for Beam Search

        Args:
            embeddings: (tf.Variable) shape = (vocab_size, embedding_size)
            cell: instance of Cell that defines a step function, etc.
            batch_size: tf.int extracted with tf.Shape or int
            start_token: id of start token
            end_token: int, id of the end token
            beam_size: int, size of the beam
            div_gamma: float, amount of penalty to add to beam hypo for
                diversity. Coefficient of penaly will be log(div_gamma).
                Use value between 0 and 1. (1 means no penalty)
            div_prob: only apply div penalty with probability div_prob.
                div_prob = 0. means never apply penalty

        """

        self._embeddings = embeddings
        self._cell = cell
        self._dim_embeddings = embeddings.shape[-1].value
        self._batch_size = batch_size
        self._start_token = start_token
        self._beam_size  = beam_size
        self._end_token = end_token
        self._vocab_size = embeddings.shape[0].value
        self._div_gamma = float(div_gamma)
        self._div_prob = float(div_prob)

Source File: beam_search_decoder_cell.py From im2latex with Apache License 2.0

5 votes

def mask_probs(probs, end_token, finished):
    """
    Args:
        probs: tensor of shape [batch_size, beam_size, vocab_size]
        end_token: (int)
        finished: tensor of shape [batch_size, beam_size], dtype = tf.bool
    """
    # one hot of shape [vocab_size]
    vocab_size = probs.shape[-1].value
    one_hot = tf.one_hot(end_token, vocab_size, on_value=0.,
            off_value=probs.dtype.min, dtype=probs.dtype)
    # expand dims of shape [batch_size, beam_size, 1]
    finished = tf.expand_dims(tf.cast(finished, probs.dtype), axis=-1)

    return (1. - finished) * probs + finished * one_hot

Source File: data.py From imagenet with MIT License

4 votes

def input_parser(self, img_path, label):
		"""
		Parse a single example
		Reads the image tensor (and preprocess it) given its path and produce a one-hot label given an integer index

		Args:
			img_path: a TF string tensor representing the path of the image
			label: a TF int tensor representing an index in the one-hot vector

		Returns:
			a preprocessed tf.float32 tensor of shape (heigth, width, channels)
			a tf.int one-hot tensor  
		"""
		one_hot = tf.one_hot(label, self.cfg.NUM_CLASSES)

		# image reading
		image = self.read_image(img_path)
		image_shape = tf.shape(image)

		# resize of the image (setting largest border to 256px)
		new_h = tf.cond(image_shape[0] < image_shape[1], 
							lambda: tf.div(tf.multiply(256, image_shape[1]), image_shape[0]), 
							lambda: 256)
		new_w = tf.cond(image_shape[0] < image_shape[1], 
							lambda: 256, 
							lambda: tf.div(tf.multiply(256, image_shape[0]), image_shape[1]))
		
		image = tf.image.resize_images(image, size=[new_h, new_w])
		
		if self.mode == 'test':
			# take random crops for testing
			patches = []
			for k in range(self.cfg.K_PATCHES):
				patches.append(tf.random_crop(image, size=[self.cfg.IMG_SHAPE[0], self.cfg.IMG_SHAPE[1], self.cfg.IMG_SHAPE[2]]))

			image = patches
		else:
			image = tf.random_crop(image, size=[self.cfg.IMG_SHAPE[0], self.cfg.IMG_SHAPE[1], self.cfg.IMG_SHAPE[2]])

			if self.mode == 'train':
				# some easy data augmentation
				image = tf.image.random_flip_left_right(image)
				image = tf.image.random_contrast(image, lower=0.8, upper=1.2)

		# normalization
		image = tf.to_float(image)
		image = tf.subtract(image, self.cfg.IMAGENET_MEAN)

		return image, one_hot

Source File: mappers.py From transform with Apache License 2.0

4 votes

def bucketize_per_key(x, key, num_buckets, epsilon=None, name=None):
  """Returns a bucketized column, with a bucket index assigned to each input.

  Args:
    x: A numeric input `Tensor` or `SparseTensor` with rank 1, whose values
      should be mapped to buckets.  `SparseTensor`s will have their non-missing
      values mapped and missing values left as missing.
    key: A Tensor or `SparseTensor` with the same shape as `x` and dtype
      tf.string.  If `x` is a `SparseTensor`, `key` must exactly match `x` in
      everything except values, i.e. indices and dense_shape must be identical.
    num_buckets: Values in the input `x` are divided into approximately
      equal-sized buckets, where the number of buckets is num_buckets.
    epsilon: (Optional) see `bucketize`
    name: (Optional) A name for this operation.

  Returns:
    A `Tensor` of the same shape as `x`, with each element in the
    returned tensor representing the bucketized value. Bucketized value is
    in the range [0, actual_num_buckets).

  Raises:
    ValueError: If value of num_buckets is not > 1.
  """
  with tf.compat.v1.name_scope(name, 'bucketize_per_key'):
    if not isinstance(num_buckets, int):
      raise TypeError(
          'num_buckets must be an int, got {}'.format(type(num_buckets)))

    if num_buckets < 1:
      raise ValueError('Invalid num_buckets {}'.format(num_buckets))

    if epsilon is None:
      # See explanation in args documentation for epsilon.
      epsilon = min(1.0 / num_buckets, 0.01)

    (key_vocab, bucket_boundaries, scale_factor_per_key, shift_per_key,
     actual_num_buckets) = (
         analyzers._quantiles_per_key(  # pylint: disable=protected-access
             x.values if isinstance(x, tf.SparseTensor) else x,
             key.values if isinstance(key, tf.SparseTensor) else key,
             num_buckets, epsilon))
    return _apply_buckets_with_keys(x, key, key_vocab, bucket_boundaries,
                                    scale_factor_per_key, shift_per_key,
                                    actual_num_buckets)

Source File: analyzers.py From transform with Apache License 2.0

4 votes

def histogram(x, boundaries=None, categorical=False, name=None):
  """Computes a histogram over x, given the bin boundaries or bin count.

  Ex (1):
  counts, boundaries = histogram([0, 1, 0, 1, 0, 3, 0, 1], range(5))
  counts: [4, 3, 0, 1, 0]
  boundaries: [0, 1, 2, 3, 4]

  Ex (2):
  Can be used to compute class weights.
  counts, classes = histogram([0, 1, 0, 1, 0, 3, 0, 1], categorical=True)
  probabilities = counts / tf.reduce_sum(counts)
  class_weights = dict(map(lambda (a, b): (a.numpy(), 1.0 / b.numpy()),
                           zip(classes, probabilities)))

  Args:
    x: A `Tensor` or `SparseTensor`.
    boundaries: (Optional) A `Tensor` or `int` used to build the histogram;
        ignored if `categorical` is True. If possible, provide boundaries as
        multiple sorted values.  Default to 10 intervals over the 0-1 range,
        or find the min/max if an int is provided (not recommended because
        multi-phase analysis is inefficient).
    categorical: (Optional) A `bool` that treats `x` as discrete values if true.
    name: (Optional) A name for this operation.

  Returns:
    counts: The histogram, as counts per bin.
    boundaries: A `Tensor` used to build the histogram representing boundaries.
  """

  with tf.compat.v1.name_scope(name, 'histogram'):
    # We need to flatten because BoostedTreesBucketize expects a rank-1 input
    x = x.values if isinstance(x, tf.SparseTensor) else tf.reshape(x, [-1])
    if categorical:
      x_dtype = x.dtype
      x = x if x_dtype == tf.string else tf.strings.as_string(x)
      elements, counts = count_per_key(x)
      if x_dtype != elements.dtype:
        elements = tf.strings.to_number(elements, tf.int64)
      return counts, elements

    if boundaries is None:
      boundaries = tf.range(11, dtype=tf.float32) / 10.0
    elif isinstance(boundaries, int) or tf.rank(boundaries) == 0:
      min_value, max_value = _min_and_max(x, True)
      boundaries = tf.linspace(tf.cast(min_value, tf.float32),
                               tf.cast(max_value, tf.float32),
                               boundaries)

    # Shift the boundaries slightly to account for floating point errors,
    # and due to the fact that the rightmost boundary is essentially ignored.
    boundaries = tf.expand_dims(tf.cast(boundaries, tf.float32), 0) - 0.0001

    bucket_indices = tf_utils.apply_bucketize_op(tf.cast(x, tf.float32),
                                                 boundaries,
                                                 remove_leftmost_boundary=True)

    bucket_vocab, counts = count_per_key(tf.strings.as_string(bucket_indices))
    counts = tf_utils.reorder_histogram(bucket_vocab, counts,
                                        tf.size(boundaries) - 1)
    return counts, boundaries

Python tensorflow.int() Examples