Python Examples of tensorflow

Source File: train_utils.py From rasa-for-botfront with Apache License 2.0

6 votes

def load_tf_hub_model(model_url: Text) -> Any:
    """Load model from cache if possible, otherwise from TFHub"""

    import tensorflow_hub as tfhub

    # needed to load the ConveRT model
    # noinspection PyUnresolvedReferences
    import tensorflow_text
    import os

    # required to take care of cases when other files are already
    # stored in the default TFHUB_CACHE_DIR
    try:
        return tfhub.load(model_url)
    except OSError:
        directory = io_utils.create_temporary_directory()
        os.environ["TFHUB_CACHE_DIR"] = directory
        return tfhub.load(model_url)

Source File: embedding_generator.py From hub with Apache License 2.0

6 votes

def generate_embeddings(items, module_url, random_projection_matrix=None):
  """Generates embeddings using a TF-Hub module.

  Args:
    items: The items to generate embedding for.
    module_url: The TF-Hub module url.
    random_projection_matrix: A numpy array of the random projection weights.

  Returns:
    item, embedding tuple.
  """

  global embed_fn
  if embed_fn is None:
    embed_fn = hub.load(module_url)
  embeddings = embed_fn(items).numpy()
  if random_projection_matrix is not None:
    embeddings = embeddings.dot(random_projection_matrix)
  return items, embeddings

Source File: question_answering.py From models with Apache License 2.0

6 votes

def __init__(self, params=cfg.TaskConfig, logging_dir=None):
    super(QuestionAnsweringTask, self).__init__(params, logging_dir)
    if params.hub_module_url and params.init_checkpoint:
      raise ValueError('At most one of `hub_module_url` and '
                       '`init_checkpoint` can be specified.')
    if params.hub_module_url:
      self._hub_module = hub.load(params.hub_module_url)
    else:
      self._hub_module = None

    if params.validation_data.tokenization == 'WordPiece':
      self.squad_lib = squad_lib_wp
    elif params.validation_data.tokenization == 'SentencePiece':
      self.squad_lib = squad_lib_sp
    else:
      raise ValueError('Unsupported tokenization method: {}'.format(
          params.validation_data.tokenization))

    if params.validation_data.input_path:
      self._tf_record_input_path, self._eval_examples, self._eval_features = (
          self._preprocess_eval_data(params.validation_data))

Source File: sentence_prediction.py From models with Apache License 2.0

6 votes

def build_inputs(self, params, input_context=None):
    """Returns tf.data.Dataset for sentence_prediction task."""
    if params.input_path == 'dummy':

      def dummy_data(_):
        dummy_ids = tf.zeros((1, params.seq_length), dtype=tf.int32)
        x = dict(
            input_word_ids=dummy_ids,
            input_mask=dummy_ids,
            input_type_ids=dummy_ids)
        y = tf.zeros((1, 1), dtype=tf.int32)
        return (x, y)

      dataset = tf.data.Dataset.range(1)
      dataset = dataset.repeat()
      dataset = dataset.map(
          dummy_data, num_parallel_calls=tf.data.experimental.AUTOTUNE)
      return dataset

    return data_loader_factory.get_data_loader(params).load(input_context)

Source File: export_test_v2.py From hub with Apache License 2.0

5 votes

def testEmptyInput(self):
    export_v2.export_module_from_file(
        embedding_file=self._embedding_file_path,
        export_path=self.get_temp_dir(),
        num_oov_buckets=1,
        num_lines_to_ignore=0,
        num_lines_to_use=None)
    hub_module = hub.load(self.get_temp_dir())
    tokens = tf.constant(["", "", ""])
    embeddings = hub_module(tokens)
    self.assertAllClose(
        embeddings.numpy(), [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]],
        rtol=0.02)

Source File: tagging.py From models with Apache License 2.0

5 votes

def build_inputs(self, params, input_context=None):
    """Returns tf.data.Dataset for sentence_prediction task."""
    if params.input_path == 'dummy':

      def dummy_data(_):
        dummy_ids = tf.zeros((1, params.seq_length), dtype=tf.int32)
        x = dict(
            input_word_ids=dummy_ids,
            input_mask=dummy_ids,
            input_type_ids=dummy_ids)

        # Include some label_id as -1, which will be ignored in loss/metrics.
        y = tf.random.uniform(
            shape=(1, params.seq_length),
            minval=-1,
            maxval=len(self.task_config.class_names),
            dtype=tf.dtypes.int32)
        return (x, y)

      dataset = tf.data.Dataset.range(1)
      dataset = dataset.repeat()
      dataset = dataset.map(
          dummy_data, num_parallel_calls=tf.data.experimental.AUTOTUNE)
      return dataset

    return data_loader_factory.get_data_loader(params).load(input_context)

Source File: tagging.py From models with Apache License 2.0

5 votes

def __init__(self, params=cfg.TaskConfig, logging_dir=None):
    super(TaggingTask, self).__init__(params, logging_dir)
    if params.hub_module_url and params.init_checkpoint:
      raise ValueError('At most one of `hub_module_url` and '
                       '`init_checkpoint` can be specified.')
    if not params.class_names:
      raise ValueError('TaggingConfig.class_names cannot be empty.')

    if params.hub_module_url:
      self._hub_module = hub.load(params.hub_module_url)
    else:
      self._hub_module = None

Source File: question_answering.py From models with Apache License 2.0

5 votes

def build_inputs(self, params, input_context=None):
    """Returns tf.data.Dataset for sentence_prediction task."""
    if params.input_path == 'dummy':
      # Dummy training data for unit test.
      def dummy_data(_):
        dummy_ids = tf.zeros((1, params.seq_length), dtype=tf.int32)
        x = dict(
            input_word_ids=dummy_ids,
            input_mask=dummy_ids,
            input_type_ids=dummy_ids)
        y = dict(
            start_positions=tf.constant(0, dtype=tf.int32),
            end_positions=tf.constant(1, dtype=tf.int32))
        return (x, y)

      dataset = tf.data.Dataset.range(1)
      dataset = dataset.repeat()
      dataset = dataset.map(
          dummy_data, num_parallel_calls=tf.data.experimental.AUTOTUNE)
      return dataset

    if params.is_training:
      dataloader_params = params
    else:
      input_path = self._tf_record_input_path
      dataloader_params = params.replace(input_path=input_path)

    return data_loader_factory.get_data_loader(
        dataloader_params).load(input_context)

Source File: sentence_prediction.py From models with Apache License 2.0

5 votes

def __init__(self, params=cfg.TaskConfig, logging_dir=None):
    super(SentencePredictionTask, self).__init__(params, logging_dir)
    if params.hub_module_url and params.init_checkpoint:
      raise ValueError('At most one of `hub_module_url` and '
                       '`init_checkpoint` can be specified.')
    if params.hub_module_url:
      self._hub_module = hub.load(params.hub_module_url)
    else:
      self._hub_module = None
    self.metric_type = params.metric_type

Source File: tfhub_memory_usage_benchmark.py From models with Apache License 2.0

5 votes

def load_model(self, hub_model_handle):
    """Loads a TF Hub module."""
    hub.load(hub_model_handle)

Source File: export_test_v2.py From hub with Apache License 2.0

5 votes

def testNumLinesUse(self):
    export_v2.export_module_from_file(
        embedding_file=self._embedding_file_path,
        export_path=self.get_temp_dir(),
        num_oov_buckets=1,
        num_lines_to_ignore=0,
        num_lines_to_use=2)
    hub_module = hub.load(self.get_temp_dir())
    tokens = tf.constant(["cat", "dog", "mouse"])
    embeddings = hub_module(tokens)
    self.assertAllClose(
        embeddings.numpy(), [[1.1, 2.56, 3.45], [1, 2, 3], [0, 0, 0]],
        rtol=0.02)

Source File: export_test_v2.py From hub with Apache License 2.0

5 votes

def testEmptyLeading(self):
    export_v2.export_module_from_file(
        embedding_file=self._embedding_file_path,
        export_path=self.get_temp_dir(),
        num_oov_buckets=1,
        num_lines_to_ignore=0,
        num_lines_to_use=None)
    hub_module = hub.load(self.get_temp_dir())
    tokens = tf.constant(["", "cat dog"])
    embeddings = hub_module(tokens)
    self.assertAllClose(
        embeddings.numpy(), [[0.0, 0.0, 0.0], [1.49, 3.22, 4.56]], rtol=0.02)

Source File: hub_module_tokenizer.py From text with Apache License 2.0

5 votes

def __init__(self, hub_module_handle):
    """Initializes a new HubModuleTokenizer instance.

    Args:
      hub_module_handle: A string handle accepted by hub.load().  Supported
        cases include (1) a local path to a directory containing a module, and
        (2) a handle to a module uploaded to e.g., https://tfhub.dev
    """
    super(HubModuleTokenizer, self).__init__()
    empty_tags = set()
    hub_module = hub.load(hub_module_handle, tags=empty_tags)
    self._hub_module_signature = hub_module.signatures['default']
    _tf_text_hub_module_tokenizer_create_counter.get_cell().increase_by(1)

Source File: export_test_v2.py From hub with Apache License 2.0

5 votes

def testExportTextEmbeddingModule(self):
    export_v2.export_module_from_file(
        embedding_file=self._embedding_file_path,
        export_path=self.get_temp_dir(),
        num_oov_buckets=1,
        num_lines_to_ignore=0,
        num_lines_to_use=None)
    hub_module = hub.load(self.get_temp_dir())
    tokens = tf.constant(["cat", "cat cat", "lizard. dog", "cat? dog", ""])
    embeddings = hub_module(tokens)
    self.assertAllClose(
        embeddings.numpy(),
        [[1.11, 2.56, 3.45], [1.57, 3.62, 4.88], [0.70, 1.41, 2.12],
         [1.49, 3.22, 4.56], [0.0, 0.0, 0.0]],
        rtol=0.02)

Source File: export_test_v2.py From hub with Apache License 2.0

5 votes

def testEmbeddingLoaded(self):
    vocabulary, embeddings = export_v2.load(self._embedding_file_path,
                                            export_v2.parse_line,
                                            num_lines_to_ignore=0,
                                            num_lines_to_use=None)
    self.assertEqual((3,), np.shape(vocabulary))
    self.assertEqual((3, 3), np.shape(embeddings))

Source File: export_test.py From hub with Apache License 2.0

5 votes

def test_empty_input(self):
    export.train_and_export(
        epoch=1,
        dataset=self.mock_dataset,
        export_path="%s/model/1" % self.get_temp_dir())
    model = hub.load("%s/model/1" % self.get_temp_dir())
    output_ = model(tf.zeros([1, 28, 28, 1], dtype=tf.uint8).numpy())
    self.assertEqual(output_.shape, [1, 10])

Source File: similarity_finder.py From hub with Apache License 2.0

5 votes

def __init__(
      self,
      module_url,
      index_file_path,
      mapping_file_path,
      dimensions,
      random_projection_matrix_file,
  ):

    # Load the TF-Hub module
    print('Loading the TF-Hub module...')
    self.embed_fn = hub.load(module_url)
    print('TF-hub module is loaded.')

    dimensions = self.embed_fn(['']).shape[1]

    self.random_projection_matrix = None
    if tf.io.gfile.exists(random_projection_matrix_file):
      with open(random_projection_matrix_file, 'rb') as handle:
        self.random_projection_matrix = pickle.load(handle)
      dimensions = self.random_projection_matrix.shape[1]

    self.index = annoy.AnnoyIndex(dimensions, metric=_METRIC)
    self.index.load(index_file_path, prefault=True)
    print('Annoy index is loaded.')
    with open(mapping_file_path, 'rb') as handle:
      self.mapping = pickle.load(handle)
    print('Mapping file is loaded.')

Source File: embedding_generator.py From hub with Apache License 2.0

5 votes

def run(args):
  """Runs the embedding generation Beam pipeline."""

  if tf.io.gfile.exists(args.embed_output_dir):
    print('Removing embedding output directory...')
    tf.io.gfile.rmtree(args.embed_output_dir)
  print('Creating empty output directory...')
  tf.io.gfile.makedirs(args.embed_output_dir)

  options = beam.options.pipeline_options.PipelineOptions(**vars(args))

  original_dim = hub.load(args.module_url)(['']).shape[1]

  random_projection_matrix = generate_random_projection_weights(
      original_dim, args.projected_dim, args.embed_output_dir)

  print('Starting the Beam pipeline...')
  with beam.Pipeline(runner=_RUNNER, options=options) as pipeline:
    _ = (
        pipeline
        | 'Read sentences from files' >>
        beam.io.ReadFromText(file_pattern=args.data_file_pattern)
        | 'Batch elements' >> util.BatchElements(
            min_batch_size=_BATCH_SIZE / 2, max_batch_size=_BATCH_SIZE)
        | 'Generate embeddings' >> beam.Map(
            generate_embeddings, args.module_url, random_projection_matrix)
        | 'Encode to tf example' >> beam.FlatMap(to_tf_example)
        | 'Write to TFRecords files' >> beam.io.WriteToTFRecord(
            file_path_prefix='{}/emb'.format(args.embed_output_dir),
            file_name_suffix='.tfrecords')
    )

  print('Beam pipeline completed.')

Source File: e2e_test.py From hub with Apache License 2.0

5 votes

def test_load_v1(self):
    if (not hasattr(tf_v1.saved_model, "load_v2") or
        not tf_v1.executing_eagerly()):
      return  # The test only applies when running V2 mode.
    full_module_path = test_utils.get_test_data_path("half_plus_two_v1.tar.gz")
    os.chdir(os.path.dirname(full_module_path))
    server_port = test_utils.start_http_server()
    handle = "http://localhost:%d/half_plus_two_v1.tar.gz" % server_port
    hub.load(handle)

Source File: e2e_test.py From hub with Apache License 2.0

5 votes

def test_load(self):
    if not hasattr(tf_v1.saved_model, "load_v2"):
      try:
        hub.load("@my/tf2_module/2")
        self.fail("Failure expected. hub.module() not support in TF 1.x")
      except NotImplementedError:
        pass
    elif tf_v1.executing_eagerly():

      class AdderModule(tf.train.Checkpoint):

        @tf.function(
            input_signature=[tf.TensorSpec(shape=None, dtype=tf.float32)])
        def add(self, x):
          return x + x + 1.

      to_export = AdderModule()
      save_dir = os.path.join(self.get_temp_dir(), "saved_model_v2")
      tf.saved_model.save(to_export, save_dir)
      module_name = "test_module_v2.tgz"
      self._create_tgz(save_dir, module_name)

      restored_module = hub.load(
          "http://localhost:%d/%s" % (self.server_port, module_name))
      self.assertIsNotNone(restored_module)
      self.assertTrue(hasattr(restored_module, "add"))

Source File: use.py From nboost with Apache License 2.0

4 votes

def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.module = hub.load(self.model_dir)

Python tensorflow_hub.load() Examples