Python Examples of tensorflow.compat.v2.string

Source File: librispeech_dev_clean_split.py From armory with MIT License

6 votes

def _info(self):
        return tfds.core.DatasetInfo(
            builder=self,
            description=_DESCRIPTION,
            features=tfds.features.FeaturesDict(
                {
                    "speech": tfds.features.Audio(),
                    "text": tfds.features.Text(
                        encoder_config=self.builder_config.text_encoder_config
                    ),
                    "speaker_id": tf.int64,
                    "chapter_id": tf.int64,
                    "id": tf.string,
                    "label": tfds.features.ClassLabel(names=_LABELS),
                }
            ),
            supervised_keys=("speech", "label"),
            homepage=_URL,
            citation=_CITATION,
            metadata=tfds.core.MetadataDict(sample_rate=16000,),
        )

Source File: feature_column_v2_test.py From hub with Apache License 2.0

6 votes

def testDenseFeaturesInKeras(self):
    features = {
        "text": np.array(["hello world", "pair-programming"]),
    }
    label = np.int64([0, 1])
    feature_columns = [
        hub.text_embedding_column_v2("text", self.model, trainable=True),
    ]
    input_features = dict(
        text=tf.keras.layers.Input(name="text", shape=[None], dtype=tf.string))
    dense_features = tf.keras.layers.DenseFeatures(feature_columns)
    x = dense_features(input_features)
    x = tf.keras.layers.Dense(16, activation="relu")(x)
    logits = tf.keras.layers.Dense(1, activation="linear")(x)
    model = tf.keras.Model(inputs=input_features, outputs=logits)
    model.compile(
        optimizer="rmsprop", loss="binary_crossentropy", metrics=["accuracy"])
    model.fit(x=features, y=label, epochs=10)
    self.assertAllEqual(model.predict(features["text"]).shape, [2, 1])

Source File: testing_utils.py From valan with Apache License 2.0

6 votes

def __init__(self, unroll_length=1):
    self._env = MockEnv(state_space_size=4, unroll_length=unroll_length)
    self._agent = MockAgent(unroll_length=unroll_length)
    self._actor_output_spec = common.ActorOutput(
        initial_agent_state=tf.TensorSpec(shape=[5], dtype=tf.float32),
        env_output=self._env.env_spec,
        agent_output=self._agent.agent_spec,
        actor_action=common.ActorAction(
            chosen_action_idx=tf.TensorSpec(
                shape=[unroll_length + 1], dtype=tf.int32),
            oracle_next_action_idx=tf.TensorSpec(
                shape=[unroll_length + 1], dtype=tf.int32)),
        loss_type=tf.TensorSpec(shape=[], dtype=tf.int32),
        info=tf.TensorSpec(shape=[], dtype=tf.string),
    )

Source File: testing_utils.py From valan with Apache License 2.0

6 votes

def __init__(self, state_space_size, unroll_length=1):
    self._state_space_size = state_space_size
    # Creates simple dynamics (T stands for transition):
    #   states = [0, 1, ... len(state_space_size - 1)] + [STOP]
    #   actions = [-1, 1]
    #   T(s, a) = s + a  iff (s + a) is a valid state
    #           = STOP   otherwise
    self._action_space = [-1, 1]
    self._current_state = None
    self._env_spec = common.EnvOutput(
        reward=tf.TensorSpec(shape=[unroll_length + 1], dtype=tf.float32),
        done=tf.TensorSpec(shape=[unroll_length + 1], dtype=tf.bool),
        observation={
            'f1':
                tf.TensorSpec(
                    shape=[unroll_length + 1, 4, 10], dtype=tf.float32),
            'f2':
                tf.TensorSpec(
                    shape=[unroll_length + 1, 7, 10, 2], dtype=tf.float32)
        },
        info=tf.TensorSpec(shape=[unroll_length + 1], dtype=tf.string))

Source File: eval_actor_test.py From valan with Apache License 2.0

6 votes

def test_run_eval_actor_once(self):
    hparams = {}
    hparams['max_iter'] = 1
    hparams['num_episodes_per_iter'] = 5
    hparams['logdir'] = os.path.join(FLAGS.test_tmpdir, 'model')

    mock_problem = testing_utils.MockProblem(unroll_length=FLAGS.unroll_length)
    agent = mock_problem.get_agent()
    ckpt_manager = _get_ckpt_manager(hparams['logdir'], agent=agent)
    ckpt_manager.save(checkpoint_number=0)

    # Create a no-op gRPC server that responds to Aggregator RPCs.
    server_address = 'unix:/tmp/eval_actor_test_grpc'
    server = grpc.Server([server_address])

    @tf.function(input_signature=[tf.TensorSpec(shape=(), dtype=tf.string)])
    def eval_enqueue(_):
      return []

    server.bind(eval_enqueue, batched=False)

    server.start()

    eval_actor.run_with_aggregator(mock_problem, server_address, hparams)

Source File: utils_test.py From valan with Apache License 2.0

6 votes

def testReadWriteSpecs(self):
    logdir = FLAGS.test_tmpdir
    specs = {
        'a': tf.TensorSpec(shape=(2, 3), dtype=tf.float32),
        'b': {
            'b_1': tf.TensorSpec(shape=(5,), dtype=tf.string),
            'b_2': tf.TensorSpec(shape=(5, 6), dtype=tf.int32),
        }
    }
    utils.write_specs(logdir, specs)
    # Now read and verify
    specs_read = utils.read_specs(logdir)

    def _check_equal(sp1, sp2):
      self.assertEqual(sp1, sp2)

    tf.nest.map_structure(_check_equal, specs, specs_read)

Source File: dataset.py From language with Apache License 2.0

6 votes

def placeholder_for_type(context,
                         type_spec,
                         name = None):
  """Produce a Tensorflow placeholder for this type_spec.

  Args:
    context: a NeuralQueryContext
    type_spec: a single type_spec (see tuple_dataset)
    name: a name to use for the placeholder

  Returns:
    a Tensorflow placeholder

    Raises:
      ValueError, if the type_spec is invalid
  """
  if type_spec == str:
    return tf.compat.v1.placeholder(tf.string, shape=[None], name=name)
  elif isinstance(type_spec, str) and context.is_type(type_spec):
    name = name or ('%s_ph' % type_spec)
    return context.placeholder(name, type_spec).tf
  else:
    raise ValueError('bad type spec %r' % type_spec)

Source File: squad.py From datasets with Apache License 2.0

6 votes

def _info(self):
    return tfds.core.DatasetInfo(
        builder=self,
        description=_DESCRIPTION,
        features=tfds.features.FeaturesDict({
            "id":
                tf.string,
            "title":
                tfds.features.Text(),
            "context":
                tfds.features.Text(),
            "question":
                tfds.features.Text(),
            "answers":
                tfds.features.Sequence({
                    "text": tfds.features.Text(),
                    "answer_start": tf.int32,
                }),
        }),
        # No default supervised_keys (as we have to pass both question
        # and context as input).
        supervised_keys=None,
        homepage="https://rajpurkar.github.io/SQuAD-explorer/",
        citation=_CITATION,
    )

Source File: libritts.py From datasets with Apache License 2.0

6 votes

def _info(self):
    return tfds.core.DatasetInfo(
        builder=self,
        description=_DESCRIPTION,
        features=tfds.features.FeaturesDict({
            "speech": tfds.features.Audio(
                file_format="wav", sample_rate=24000),
            "text_original": tfds.features.Text(),
            "text_normalized": tfds.features.Text(),
            "speaker_id": tf.int64,
            "chapter_id": tf.int64,
            "id": tf.string,
        }),
        supervised_keys=("text_normalized", "speech"),
        homepage=_URL,
        citation=_CITATION,
        metadata=tfds.core.MetadataDict(sample_rate=24000,),
    )

Source File: groove.py From datasets with Apache License 2.0

6 votes

def _info(self):
    features_dict = {
        "id": tf.string,
        "drummer":
            tfds.features.ClassLabel(
                names=["drummer%d" % i for i in range(1, 11)]),
        "type": tfds.features.ClassLabel(names=["beat", "fill"]),
        "bpm": tf.int32,
        "time_signature": tfds.features.ClassLabel(names=_TIME_SIGNATURES),
        "style": {
            "primary": tfds.features.ClassLabel(names=_PRIMARY_STYLES),
            "secondary": tf.string,
        },
        "midi": tf.string
    }
    if self.builder_config.include_audio:
      features_dict["audio"] = tfds.features.Audio(
          dtype=tf.float32, sample_rate=self.builder_config.audio_rate)
    return tfds.core.DatasetInfo(
        builder=self,
        description=_DESCRIPTION,
        features=tfds.features.FeaturesDict(features_dict),
        homepage="https://g.co/magenta/groove-dataset",
        citation=_CITATION,
    )

Source File: tedlium.py From datasets with Apache License 2.0

6 votes

def _info(self):
    return tfds.core.DatasetInfo(
        builder=self,
        description=self.builder_config.description,
        features=tfds.features.FeaturesDict({
            "speech":
                tfds.features.Audio(sample_rate=16000),
            "text":
                tfds.features.Text(),
            "speaker_id":
                tf.string,
            "gender":
                tfds.features.ClassLabel(names=["unknown", "female", "male"]),
            "id":
                tf.string,
        }),
        supervised_keys=("speech", "text"),
        homepage=self.builder_config.url,
        citation=self.builder_config.citation,
        metadata=tfds.core.MetadataDict(sample_rate=16000,),
    )

Source File: librispeech.py From datasets with Apache License 2.0

6 votes

def _info(self):
    return tfds.core.DatasetInfo(
        builder=self,
        description=_DESCRIPTION,
        features=tfds.features.FeaturesDict({
            "speech":
                tfds.features.Audio(sample_rate=16000),
            "text":
                tfds.features.Text(
                    encoder_config=self.builder_config.text_encoder_config),
            "speaker_id":
                tf.int64,
            "chapter_id":
                tf.int64,
            "id":
                tf.string,
        }),
        supervised_keys=("speech", "text"),
        homepage=_URL,
        citation=_CITATION,
        metadata=tfds.core.MetadataDict(sample_rate=16000,),
    )

Source File: dmlab.py From datasets with Apache License 2.0

6 votes

def _parse_single_image(self, example_proto):
    """Parses single video from the input tfrecords.

    Args:
      example_proto: tfExample proto with a single video.

    Returns:
      dict with all frames, positions and actions.
    """

    feature_map = {
        "image": tf.io.FixedLenFeature(shape=[], dtype=tf.string),
        "filename": tf.io.FixedLenFeature(shape=[], dtype=tf.string),
        "label": tf.io.FixedLenFeature(shape=[], dtype=tf.int64),
    }

    parse_single = tf.io.parse_single_example(example_proto, feature_map)

    return parse_single

Source File: amazon_us_reviews.py From datasets with Apache License 2.0

6 votes

def _info(self):
    return tfds.core.DatasetInfo(
        builder=self,
        description=_DESCRIPTION,
        features=tfds.features.FeaturesDict({
            "data":
                collections.OrderedDict([
                    ("marketplace", tf.string), ("customer_id", tf.string),
                    ("review_id", tf.string), ("product_id", tf.string),
                    ("product_parent", tf.string), ("product_title", tf.string),
                    ("product_category", tf.string), ("star_rating", tf.int32),
                    ("helpful_votes", tf.int32), ("total_votes", tf.int32),
                    ("vine", tfds.features.ClassLabel(names=["Y", "N"])),
                    ("verified_purchase",
                     tfds.features.ClassLabel(names=["Y", "N"])),
                    ("review_headline", tf.string), ("review_body", tf.string),
                    ("review_date", tf.string)
                ])
        }),
        supervised_keys=None,
        homepage="https://s3.amazonaws.com/amazon-reviews-pds/readme.html",
        citation=_CITATION,
    )

Source File: pg19.py From datasets with Apache License 2.0

6 votes

def _info(self):

    return tfds.core.DatasetInfo(
        builder=self,
        description=_DESCRIPTION,
        features=tfds.features.FeaturesDict({
            'book_text': tfds.features.Text(),
            'book_id': tf.int32,
            'book_title': tf.string,
            'publication_date': tf.string,
            'book_link': tf.string
        }),
        supervised_keys=None,
        homepage='https://github.com/deepmind/pg19',
        citation=_CITATION,
    )

Source File: opinion_abstracts.py From datasets with Apache License 2.0

6 votes

def _info(self) -> tfds.core.DatasetInfo:
    config = self.builder_config
    return tfds.core.DatasetInfo(
        builder=self,
        description=_DESCRIPTION,
        features=tfds.features.FeaturesDict({
            config.name_key:
                tf.string,
            config.id_key:
                tf.string,
            config.summary_key:
                tf.string,
            config.opinions_key:
                tfds.features.Sequence(
                    tfds.features.FeaturesDict({
                        "key": tf.string,
                        "value": tf.string
                    })),
        }),
        supervised_keys=(config.opinions_key, config.summary_key),
        homepage="http://www.ccs.neu.edu/home/luwang/data.html",
        citation=_CITATION,
    )

Source File: dementiabank.py From datasets with Apache License 2.0

5 votes

def _info(self):
    return tfds.core.DatasetInfo(
        builder=self,
        description=_DESCRIPTION,
        features=tfds.features.FeaturesDict({
            'audio': tfds.features.Audio(file_format='mp3', sample_rate=44100),
            'label': tfds.features.ClassLabel(names=['dementia', 'control']),
            'speaker_id': tf.string,
        }),
        supervised_keys=('audio', 'label'),
        homepage='https://dementia.talkbank.org/',
        citation=_CITATION,
    )

Source File: continuous_batched_test.py From compression with Apache License 2.0

5 votes

def test_information_bounds(self):
    # `bits(training=True)` should be greater than `bits(training=False)`
    # because it is defined as an upper bound (albeit for infinite data). The
    # actual length of the bit string should always be greater than
    # `bits(training=False)` because range coding is only asymptotically
    # optimal, and because it operates on quantized probabilities.
    for scale in 2 ** tf.linspace(-2., 7., 10):
      noisy = uniform_noise.NoisyNormal(loc=0., scale=scale)
      em = ContinuousBatchedEntropyModel(noisy, 1, compression=True)
      x = noisy.base.sample([10000])
      bits_eval = em.bits(x, training=False)
      bits_training = em.bits(x, training=True)
      bits_compressed = 8 * len(em.compress(x).numpy())
      self.assertGreater(bits_training, .9975 * bits_eval)
      self.assertGreater(bits_compressed, bits_eval)

Source File: continuous_batched_test.py From compression with Apache License 2.0

5 votes

def test_default_kwargs_throw_error_on_compression(self):
    noisy = uniform_noise.NoisyNormal(loc=.25, scale=10.)
    em = ContinuousBatchedEntropyModel(noisy, 1)
    x = tf.zeros(10)
    with self.assertRaises(RuntimeError):
      em.compress(x)
    s = tf.zeros(10, dtype=tf.string)
    with self.assertRaises(RuntimeError):
      em.decompress(s, [10])

Source File: movielens.py From datasets with Apache License 2.0

5 votes

def __init__(
      self,
      format_version: Optional[str] = None,
      table_option: Optional[str] = None,
      download_url: Optional[str] = None,
      parsing_fn: Optional[Callable[
          [str],
          Iterator[Tuple[int, Dict[str, Any]]],
      ]] = None,
      **kwargs
  ) -> None:
    """Constructs a MovieLensConfig.

    Args:
      format_version: a string to identify the format of the dataset, one of
          '_FORMAT_VERSIONS'.
      table_option: a string to identify the table to expose, one of
          '_TABLE_OPTIONS'.
      download_url: a string url for downloading the dataset.
      parsing_fn: a callable for parsing the data.
      **kwargs: keyword arguments forwarded to super.

    Raises:
      ValueError: if format_version is not one of '_FORMAT_VERSIONS' or if
          table_option is not one of '_TABLE_OPTIONS'.
    """
    if format_version not in _FORMAT_VERSIONS:
      raise ValueError('format_version must be one of %s.' % _FORMAT_VERSIONS)
    if table_option not in _TABLE_OPTIONS:
      raise ValueError('table_option must be one of %s.' % _TABLE_OPTIONS)
    super(MovieLensConfig, self).__init__(**kwargs)
    self._format_version = format_version
    self._table_option = table_option
    self._download_url = download_url
    self._parsing_fn = parsing_fn

Source File: census_example_v2.py From transform with Apache License 2.0

5 votes

def export_serving_model(tf_transform_output, model, output_dir):
  """Exports a keras model for serving.

  Args:
    tf_transform_output: Wrapper around output of tf.Transform.
    model: A keras model to export for serving.
    output_dir: A directory where the model will be exported to.
  """
  # The layer has to be saved to the model for keras tracking purpases.
  model.tft_layer = tf_transform_output.transform_features_layer()

  @tf.function
  def serve_tf_examples_fn(serialized_tf_examples):
    """Serving tf.function model wrapper."""
    feature_spec = RAW_DATA_FEATURE_SPEC.copy()
    feature_spec.pop(LABEL_KEY)
    parsed_features = tf.io.parse_example(serialized_tf_examples, feature_spec)
    transformed_features = model.tft_layer(parsed_features)
    outputs = model(transformed_features)
    classes_names = tf.constant([['0', '1']])
    classes = tf.tile(classes_names, [tf.shape(outputs)[0], 1])
    return {'classes': classes, 'scores': outputs}

  concrete_serving_fn = serve_tf_examples_fn.get_concrete_function(
      tf.TensorSpec(shape=[None], dtype=tf.string, name='inputs'))
  signatures = {'serving_default': concrete_serving_fn}

  # This is required in order to make this model servable with model_server.
  versioned_output_dir = os.path.join(output_dir, '1')
  model.save(versioned_output_dir, save_format='tf', signatures=signatures)

Source File: feature_test.py From ranking with Apache License 2.0

5 votes

def test_keras_inputs_dynamic_list_shape(self):
    keras_inputs = feature.create_keras_inputs(
        context_feature_columns=self._context_feature_columns,
        example_feature_columns=self._example_feature_columns,
        size_feature_name=None)

    self.assertEqual(keras_inputs['query_length'].shape.as_list(), [None, 1])
    self.assertEqual(keras_inputs['query_length'].dtype, tf.int64)

    self.assertEqual(keras_inputs['utility'].shape.as_list(), [None, None, 1])
    self.assertEqual(keras_inputs['utility'].dtype, tf.float32)

    self.assertEqual(keras_inputs['unigrams'].dtype, tf.string)

Source File: feature_column_v2_test.py From hub with Apache License 2.0

5 votes

def __init__(self, returns_dict=False):
    embeddings = [
        ("", [0, 0, 0, 0]),  # OOV items are mapped to this embedding.
        ("hello world", [1, 2, 3, 4]),
        ("pair-programming", [5, 5, 5, 5]),
    ]
    keys = tf.constant([item[0] for item in embeddings], dtype=tf.string)
    indices = tf.constant(list(range(len(embeddings))), dtype=tf.int64)
    tbl_init = KeyValueTensorInitializer(keys, indices)
    self.table = HashTable(tbl_init, 0)
    self.weights = tf.Variable(
        list([item[1] for item in embeddings]), dtype=tf.float32)
    self.variables = [self.weights]
    self.trainable_variables = self.variables
    self._returns_dict = returns_dict

Source File: feature_column_v2_test.py From hub with Apache License 2.0

5 votes

def testMakeParseExampleSpec(self):
    text_column = hub.text_embedding_column_v2(
        "text", self.model, trainable=False)
    parsing_spec = tf.feature_column.make_parse_example_spec([text_column])
    self.assertEqual(parsing_spec,
                     {"text": tf.io.FixedLenFeature([1], dtype=tf.string)})

Source File: ljspeech.py From datasets with Apache License 2.0

5 votes

def _info(self):
    return tfds.core.DatasetInfo(
        builder=self,
        description=_DESCRIPTION,
        features=tfds.features.FeaturesDict({
            "id": tf.string,
            "speech": tfds.features.Audio(sample_rate=22050),
            "text": tfds.features.Text(),
            "text_normalized": tfds.features.Text(),
        }),
        supervised_keys=("text_normalized", "speech"),
        homepage=_URL,
        citation=_CITATION,
        metadata=tfds.core.MetadataDict(sample_rate=22050),
    )

Source File: dataset.py From language with Apache License 2.0

5 votes

def k_hot_array_from_string_list(context,
                                 typename,
                                 entity_names):
  """Create a numpy array encoding a k-hot set.

  Args:
    context: a NeuralExpressionContext
    typename: type of entity_names
    entity_names: list of names of type typename

  Returns:
    A k-hot-array representation of the set of entity_names. For frozen
    dictionaries, unknown entity names are mapped to the unknown_id of their
    type or discarded if the unknown_value of the type is None. Unknown entity
    names will throw an nql.EntityNameException for non-frozen dictionaries.
    It is possible for this method to return an all-zeros array.
  """
  # Empty string is not a valid entity_name.
  ids = [context.get_id(e, typename) for e in entity_names if e]
  # None is not a valid id.
  valid_ids = [x for x in ids if x is not None]
  max_id = context.get_max_id(typename)
  result = np.zeros((max_id,), dtype='float32')
  if valid_ids:
    result[valid_ids] = 1.
  return result

Source File: dataset.py From language with Apache License 2.0

5 votes

def spec_as_tf_type(spec):
  """Convert a type_spec to a tf type.

  Args:
    spec: a single specification for tuple_generator_builder

  Returns:
    type specification required by tf.data.Dataset.from_generator
  """
  if spec == str:
    return tf.string
  elif isinstance(spec, int):
    return tf.int32
  else:
    return tf.float32

Source File: imagenet_adversarial.py From armory with MIT License

5 votes

def _generate_examples(self, path):
        """Yields examples."""

        clean_key = "clean"
        adversarial_key = "adversarial"

        def _parse(serialized_example):
            ds_features = {
                "height": tf.io.FixedLenFeature([], tf.int64),
                "width": tf.io.FixedLenFeature([], tf.int64),
                "label": tf.io.FixedLenFeature([], tf.int64),
                "adv-image": tf.io.FixedLenFeature([], tf.string),
                "clean-image": tf.io.FixedLenFeature([], tf.string),
            }
            example = tf.io.parse_single_example(serialized_example, ds_features)

            img_clean = tf.io.decode_raw(example["clean-image"], tf.float32)
            img_adv = tf.io.decode_raw(example["adv-image"], tf.float32)
            # float values are integers in [0.0, 255.0] for clean and adversarial
            img_clean = tf.cast(img_clean, tf.uint8)
            img_clean = tf.reshape(img_clean, (example["height"], example["width"], 3))
            img_adv = tf.cast(img_adv, tf.uint8)
            img_adv = tf.reshape(img_adv, (example["height"], example["width"], 3))
            return {clean_key: img_clean, adversarial_key: img_adv}, example["label"]

        ds = tf.data.TFRecordDataset(filenames=[path])
        ds = ds.map(lambda x: _parse(x))
        default_graph = tf.compat.v1.keras.backend.get_session().graph
        ds = tfds.as_numpy(ds, graph=default_graph)

        for i, (img, label) in enumerate(ds):
            yield str(i), {
                "images": img,
                "label": label,
            }

Source File: covid19sum.py From datasets with Apache License 2.0

5 votes

def _info(self) -> tfds.core.DatasetInfo:
    features = {k: tf.string for k in _ADDITIONAL_FEATURES + [_ABSTRACT]}
    features[_BODY_TEXT] = tfds.features.Sequence(
        tfds.features.FeaturesDict({
            _SECTION: tf.string,
            _TEXT: tf.string
        }))
    return tfds.core.DatasetInfo(
        builder=self,
        description=_DESCRIPTION,
        features=tfds.features.FeaturesDict(features),
        supervised_keys=(_BODY_TEXT, _ABSTRACT),
        homepage=_HOMEPAGE,
        citation=_CITATION,
    )

Source File: voxforge.py From datasets with Apache License 2.0

5 votes

def _info(self):
    return tfds.core.DatasetInfo(
        builder=self,
        description=_DESCRIPTION,
        features=tfds.features.FeaturesDict({
            'audio': tfds.features.Audio(file_format='wav', sample_rate=16000),
            'label': tfds.features.ClassLabel(names=LABELS),
            'speaker_id': tf.string
        }),
        supervised_keys=('audio', 'label'),
        homepage=_HOMEPAGE,
        citation=_CITATION,
    )

Python tensorflow.compat.v2.string() Examples