Python Examples of tensorflow.python.ops.lookup_ops.index_table_from

Source File: vocab_utils.py From inference with Apache License 2.0

5 votes

def create_vocab_tables(src_vocab_file, tgt_vocab_file, share_vocab):
  """Creates vocab tables for src_vocab_file and tgt_vocab_file."""
  src_vocab_table = lookup_ops.index_table_from_file(
      src_vocab_file, default_value=UNK_ID)
  if share_vocab:
    tgt_vocab_table = src_vocab_table
  else:
    tgt_vocab_table = lookup_ops.index_table_from_file(
        tgt_vocab_file, default_value=UNK_ID)
  return src_vocab_table, tgt_vocab_table

Source File: feature_column.py From Serverless-Deep-Learning-with-TensorFlow-and-AWS-Lambda with MIT License

5 votes

def _transform_feature(self, inputs):
    input_tensor = _to_sparse_input(inputs.get(self.key))

    if self.dtype.is_integer != input_tensor.dtype.is_integer:
      raise ValueError(
          'Column dtype and SparseTensors dtype must be compatible. '
          'key: {}, column dtype: {}, tensor dtype: {}'.format(
              self.key, self.dtype, input_tensor.dtype))

    _assert_string_or_int(
        input_tensor.dtype,
        prefix='column_name: {} input_tensor'.format(self.key))

    key_dtype = self.dtype
    if input_tensor.dtype.is_integer:
      # `index_table_from_file` requires 64-bit integer keys.
      key_dtype = dtypes.int64
      input_tensor = math_ops.to_int64(input_tensor)

    return lookup_ops.index_table_from_file(
        vocabulary_file=self.vocabulary_file,
        num_oov_buckets=self.num_oov_buckets,
        vocab_size=self.vocabulary_size,
        default_value=self.default_value,
        key_dtype=key_dtype,
        name='{}_lookup'.format(self.key)).lookup(input_tensor)

Source File: vocab_utils.py From active-qa with Apache License 2.0

5 votes

def create_vocab_tables(src_vocab_file, tgt_vocab_file, share_vocab):
  """Creates vocab tables for src_vocab_file and tgt_vocab_file."""
  src_vocab_table = lookup_ops.index_table_from_file(
      src_vocab_file, default_value=UNK_ID)
  if share_vocab:
    tgt_vocab_table = src_vocab_table
  else:
    tgt_vocab_table = lookup_ops.index_table_from_file(
        tgt_vocab_file, default_value=UNK_ID)
  return src_vocab_table, tgt_vocab_table

Source File: vocab_utils.py From nlp-architect with Apache License 2.0

5 votes

def create_vocab_tables(src_vocab_file, tgt_vocab_file, share_vocab):
    """Creates vocab tables for src_vocab_file and tgt_vocab_file."""
    src_vocab_table = lookup_ops.index_table_from_file(src_vocab_file, default_value=UNK_ID)
    if share_vocab:
        tgt_vocab_table = src_vocab_table
    else:
        tgt_vocab_table = lookup_ops.index_table_from_file(tgt_vocab_file, default_value=UNK_ID)
    return src_vocab_table, tgt_vocab_table

Source File: qe_model.py From qebrain with BSD 2-Clause "Simplified" License

5 votes

def create_vocab_tables(src_vocab_file, tgt_vocab_file, share_vocab, vocab_size):
    src_vocab_table = lookup_ops.index_table_from_file(
        src_vocab_file, default_value=vocab_size)
    if share_vocab:
        tgt_vocab_table = src_vocab_table
    else:
        tgt_vocab_table = lookup_ops.index_table_from_file(
            tgt_vocab_file, default_value=vocab_size)
    return src_vocab_table, tgt_vocab_table

Source File: expert_model.py From qebrain with BSD 2-Clause "Simplified" License

5 votes

def create_vocab_tables(src_vocab_file, tgt_vocab_file, share_vocab, vocab_size):
    src_vocab_table = lookup_ops.index_table_from_file(
        src_vocab_file, default_value=vocab_size)
    if share_vocab:
        tgt_vocab_table = src_vocab_table
    else:
        tgt_vocab_table = lookup_ops.index_table_from_file(
            tgt_vocab_file, default_value=vocab_size)
    return src_vocab_table, tgt_vocab_table

Source File: decoder_main.py From NAO with GNU General Public License v3.0

5 votes

def create_vocab_tables(vocab_file):
  """Creates vocab tables for src_vocab_file and tgt_vocab_file."""
  vocab_table = lookup_ops.index_table_from_file(
      vocab_file, default_value=0)
  return vocab_table

Source File: vocab_utils.py From nmt with Apache License 2.0

5 votes

def create_vocab_tables(src_vocab_file, tgt_vocab_file, share_vocab):
  """Creates vocab tables for src_vocab_file and tgt_vocab_file."""
  src_vocab_table = lookup_ops.index_table_from_file(
      src_vocab_file, default_value=UNK_ID)
  if share_vocab:
    tgt_vocab_table = src_vocab_table
  else:
    tgt_vocab_table = lookup_ops.index_table_from_file(
        tgt_vocab_file, default_value=UNK_ID)
  return src_vocab_table, tgt_vocab_table

Source File: vocab.py From THRED with MIT License

5 votes

def create_vocab_table(vocab_file):
    """Creates vocab tables for vocab_file."""
    return lookup_ops.index_table_from_file(vocab_file, default_value=UNK_ID)

Source File: feature_column.py From lambda-packs with MIT License

5 votes

def _transform_feature(self, inputs):
    input_tensor = _to_sparse_input(inputs.get(self.key))

    if self.dtype.is_integer != input_tensor.dtype.is_integer:
      raise ValueError(
          'Column dtype and SparseTensors dtype must be compatible. '
          'key: {}, column dtype: {}, tensor dtype: {}'.format(
              self.key, self.dtype, input_tensor.dtype))

    _assert_string_or_int(
        input_tensor.dtype,
        prefix='column_name: {} input_tensor'.format(self.key))

    key_dtype = self.dtype
    if input_tensor.dtype.is_integer:
      # `index_table_from_file` requires 64-bit integer keys.
      key_dtype = dtypes.int64
      input_tensor = math_ops.to_int64(input_tensor)

    return lookup_ops.index_table_from_file(
        vocabulary_file=self.vocabulary_file,
        num_oov_buckets=self.num_oov_buckets,
        vocab_size=self.vocabulary_size,
        default_value=self.default_value,
        key_dtype=key_dtype,
        name='{}_lookup'.format(self.key)).lookup(input_tensor)

Source File: tokenizeddata.py From ChatLearner with Apache License 2.0

5 votes

def __init__(self, corpus_dir, hparams=None, training=True, buffer_size=8192):
        """
        Args:
            corpus_dir: Name of the folder storing corpus files for training.
            hparams: The object containing the loaded hyper parameters. If None, it will be 
                    initialized here.
            training: Whether to use this object for training.
            buffer_size: The buffer size used for mapping process during data processing.
        """
        if hparams is None:
            self.hparams = HParams(corpus_dir).hparams
        else:
            self.hparams = hparams

        self.src_max_len = self.hparams.src_max_len
        self.tgt_max_len = self.hparams.tgt_max_len

        self.training = training
        self.text_set = None
        self.id_set = None

        vocab_file = os.path.join(corpus_dir, VOCAB_FILE)
        self.vocab_size, _ = check_vocab(vocab_file)
        self.vocab_table = lookup_ops.index_table_from_file(vocab_file,
                                                            default_value=self.hparams.unk_id)
        # print("vocab_size = {}".format(self.vocab_size))

        if training:
            self.case_table = prepare_case_table()
            self.reverse_vocab_table = None
            self._load_corpus(corpus_dir)
            self._convert_to_tokens(buffer_size)
        else:
            self.case_table = None
            self.reverse_vocab_table = \
                lookup_ops.index_to_string_table_from_file(vocab_file,
                                                           default_value=self.hparams.unk_token)

Source File: model_helper.py From LSTM-CNN-CWS with Apache License 2.0

5 votes

def create_infer_model(hparams, model_creator):
  """Create inference model."""
  graph = tf.Graph()
  vocab_file = hparams.vocab_file

  with graph.as_default(), tf.container("infer"):
    vocab_table = lookup_ops.index_table_from_file(
      vocab_file, default_value = UNK_ID)
    # for the labels
    '''
    Although this is nonsense for the inference procedure, this is to ensure
    the labels are not None when building the model graph.
    (refer to model.BasicModel._decode_layer)
    '''
    mapping_strings = tf.constant(['0'])
    index_table = tf.contrib.lookup.index_table_from_tensor(
    mapping = mapping_strings, default_value = 0)

    txt_placeholder = tf.placeholder(shape=[None], dtype = tf.string)
    batch_size_placeholder = tf.placeholder(shape = [], dtype = tf.int64)

    txt_dataset = tf.data.Dataset.from_tensor_slices(
        txt_placeholder)
    iterator = data_iterator.get_infer_iterator(
        txt_dataset,
        vocab_table,
        index_table,
        batch_size = batch_size_placeholder)

    model = model_creator(
        hparams,
        iterator = iterator,
        mode = tf.contrib.learn.ModeKeys.INFER,
        vocab_table = vocab_table)

  return InferModel(
      graph = graph,
      model = model,
      txt_placeholder = txt_placeholder,
      batch_size_placeholder = batch_size_placeholder,
      iterator = iterator)

Source File: model_helper.py From LSTM-CNN-CWS with Apache License 2.0

5 votes

def create_eval_model(hparams, model_creator):
  vocab_file = hparams.vocab_file
  index_file = hparams.index_file
  graph = tf.Graph()

  with graph.as_default(), tf.container("eval"):
    vocab_table = lookup_ops.index_table_from_file(
      vocab_file, default_value = UNK_ID)
    # for the labels
    index_table = lookup_ops.index_table_from_file(
      index_file, default_value = 0)

    # the file's name
    txt_file_placeholder = tf.placeholder(shape = (), dtype = tf.string)
    lb_file_placeholder = tf.placeholder(shape = (), dtype = tf.string)
    txt_dataset = tf.data.TextLineDataset(txt_file_placeholder)
    lb_dataset = tf.data.TextLineDataset(lb_file_placeholder)

    iterator = data_iterator.get_iterator(
        txt_dataset,
        lb_dataset,
        vocab_table,
        index_table,
        batch_size = hparams.batch_size,
        num_buckets = hparams.num_buckets,
        max_len = hparams.max_len)

    model = model_creator(
        hparams,
        iterator = iterator,
        mode = tf.contrib.learn.ModeKeys.EVAL,
        vocab_table = vocab_table)

  return EvalModel(
      graph = graph,
      model = model,
      txt_file_placeholder = txt_file_placeholder,
      lb_file_placeholder = lb_file_placeholder,
      iterator = iterator)

Source File: model_helper.py From LSTM-CNN-CWS with Apache License 2.0

5 votes

def create_train_model(hparams, model_creator):
  txt_file = "%s.%s" % (hparams.train_prefix, "txt")
  lb_file = "%s.%s" % (hparams.train_prefix, "lb")
  vocab_file = hparams.vocab_file
  index_file = hparams.index_file

  graph = tf.Graph()

  with graph.as_default(), tf.container("train"):
    vocab_table = lookup_ops.index_table_from_file(
      vocab_file, default_value = UNK_ID)
    # for the labels
    index_table = lookup_ops.index_table_from_file(
      index_file, default_value = 0)

    txt_dataset = tf.data.TextLineDataset(txt_file)
    lb_dataset = tf.data.TextLineDataset(lb_file)

    iterator = data_iterator.get_iterator(
        txt_dataset,
        lb_dataset,
        vocab_table,
        index_table,
        batch_size = hparams.batch_size,
        num_buckets = hparams.num_buckets,
        max_len = hparams.max_len)

    model = model_creator(
        hparams,
        iterator = iterator,
        mode = tf.contrib.learn.ModeKeys.TRAIN,
        vocab_table = vocab_table)

  return TrainModel(
      graph = graph,
      model = model,
      iterator = iterator)

Source File: vocab_utils.py From parallax with Apache License 2.0

5 votes

def create_vocab_tables(src_vocab_file, tgt_vocab_file, share_vocab):
  """Creates vocab tables for src_vocab_file and tgt_vocab_file."""
  src_vocab_table = lookup_ops.index_table_from_file(
      src_vocab_file, default_value=UNK_ID)
  if share_vocab:
    tgt_vocab_table = src_vocab_table
  else:
    tgt_vocab_table = lookup_ops.index_table_from_file(
        tgt_vocab_file, default_value=UNK_ID)
  return src_vocab_table, tgt_vocab_table

Source File: vocab_utils.py From NETransliteration-COLING2018 with MIT License

5 votes

def create_vocab_tables(src_vocab_file, tgt_vocab_file, share_vocab):
  """Creates vocab tables for src_vocab_file and tgt_vocab_file."""
  src_vocab_table = lookup_ops.index_table_from_file(
      src_vocab_file, default_value=UNK_ID)
  if share_vocab:
    tgt_vocab_table = src_vocab_table
  else:
    tgt_vocab_table = lookup_ops.index_table_from_file(
        tgt_vocab_file, default_value=UNK_ID)
  return src_vocab_table, tgt_vocab_table