Python tensorflow.compat.v1.Example() Examples

The following are 30 code examples of tensorflow.compat.v1.Example(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module tensorflow.compat.v1 , or try the search function .
Example #1
Source File: run_finetune_coherence.py    From language with Apache License 2.0 6 votes vote down vote up
def convert_single_example(example, rand_example, max_seq_length, tokenizer):
  """Converts a single `InputExample` into a single `InputFeatures`."""
  # Add padding examples here

  example_type = collections.namedtuple(
      "Example", ["input_ids", "input_mask", "segment_ids", "labels"])

  labels = range(8)  # inconsequential
  rand_sents = rand_example[:8]
  target_sents = example[:4] + example[5:] + rand_sents
  bert_input = create_cpc_input_from_text(
      tokenizer,
      example[4],
      target_sents,
      labels,
      group_size=16,
      max_seq_length=max_seq_length)

  feature = example_type(bert_input.tokens, bert_input.mask, bert_input.seg_ids,
                         labels)
  return feature 
Example #2
Source File: generate_detection_data.py    From models with Apache License 2.0 6 votes vote down vote up
def construct_pipeline(pipeline, input_tfrecord, output_tfrecord, model_dir,
                       confidence_threshold, num_shards):
  """Returns a Beam pipeline to run object detection inference.

  Args:
    pipeline: Initialized beam pipeline.
    input_tfrecord: A TFRecord of tf.train.Example protos containing images.
    output_tfrecord: A TFRecord of tf.train.Example protos that contain images
      in the input TFRecord and the detections from the model.
    model_dir: Path to `saved_model` to use for inference.
    confidence_threshold: Threshold to use when keeping detection results.
    num_shards: The number of output shards.
  """
  input_collection = (
      pipeline | 'ReadInputTFRecord' >> beam.io.tfrecordio.ReadFromTFRecord(
          input_tfrecord,
          coder=beam.coders.BytesCoder()))
  output_collection = input_collection | 'RunInference' >> beam.ParDo(
      GenerateDetectionDataFn(model_dir, confidence_threshold))
  output_collection = output_collection | 'Reshuffle' >> beam.Reshuffle()
  _ = output_collection | 'WritetoDisk' >> beam.io.tfrecordio.WriteToTFRecord(
      output_tfrecord,
      num_shards=num_shards,
      coder=beam.coders.ProtoCoder(tf.train.Example)) 
Example #3
Source File: run_squad.py    From mesh with Apache License 2.0 6 votes vote down vote up
def process_feature(self, feature):
    """Write a InputFeature to the TFRecordWriter as a tf.train.Example."""
    self.num_features += 1

    def create_int_feature(values):
      feature = tf.train.Feature(
          int64_list=tf.train.Int64List(value=list(values)))
      return feature

    features = collections.OrderedDict()
    features["unique_ids"] = create_int_feature([feature.unique_id])
    features["input_ids"] = create_int_feature(feature.input_ids)
    features["input_mask"] = create_int_feature(feature.input_mask)
    features["segment_ids"] = create_int_feature(feature.segment_ids)

    if self.is_training:
      features["start_positions"] = create_int_feature([feature.start_position])
      features["end_positions"] = create_int_feature([feature.end_position])
      impossible = 0
      if feature.is_impossible:
        impossible = 1
      features["is_impossible"] = create_int_feature([impossible])

    tf_example = tf.train.Example(features=tf.train.Features(feature=features))
    self._writer.write(tf_example.SerializeToString()) 
Example #4
Source File: generator_utils.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def to_example(dictionary):
  """Helper: build tf.Example from (string -> int/float/str list) dictionary."""
  features = {}
  for (k, v) in six.iteritems(dictionary):
    if not v:
      raise ValueError("Empty generated field: %s" % str((k, v)))
    # Subtly in PY2 vs PY3, map is not scriptable in py3. As a result,
    # map objects will fail with TypeError, unless converted to a list.
    if six.PY3 and isinstance(v, map):
      v = list(v)
    if (isinstance(v[0], six.integer_types) or
        np.issubdtype(type(v[0]), np.integer)):
      features[k] = tf.train.Feature(int64_list=tf.train.Int64List(value=v))
    elif isinstance(v[0], float):
      features[k] = tf.train.Feature(float_list=tf.train.FloatList(value=v))
    elif isinstance(v[0], six.string_types):
      if not six.PY2:  # Convert in python 3.
        v = [bytes(x, "utf-8") for x in v]
      features[k] = tf.train.Feature(bytes_list=tf.train.BytesList(value=v))
    elif isinstance(v[0], bytes):
      features[k] = tf.train.Feature(bytes_list=tf.train.BytesList(value=v))
    else:
      raise ValueError("Value for %s is not a recognized type; v: %s type: %s" %
                       (k, str(v[0]), str(type(v[0]))))
  return tf.train.Example(features=tf.train.Features(feature=features)) 
Example #5
Source File: run_squad.py    From language with Apache License 2.0 6 votes vote down vote up
def process_feature(self, feature):
    """Write a InputFeature to the TFRecordWriter as a tf.train.Example."""
    self.num_features += 1

    def create_int_feature(values):
      feature = tf.train.Feature(
          int64_list=tf.train.Int64List(value=list(values)))
      return feature

    features = collections.OrderedDict()
    features["unique_ids"] = create_int_feature([feature.unique_id])
    features["input_ids"] = create_int_feature(feature.input_ids)
    features["input_mask"] = create_int_feature(feature.input_mask)
    features["segment_ids"] = create_int_feature(feature.segment_ids)

    if self.is_training:
      features["start_positions"] = create_int_feature([feature.start_position])
      features["end_positions"] = create_int_feature([feature.end_position])
      impossible = 0
      if feature.is_impossible:
        impossible = 1
      features["is_impossible"] = create_int_feature([impossible])

    tf_example = tf.train.Example(features=tf.train.Features(feature=features))
    self._writer.write(tf_example.SerializeToString()) 
Example #6
Source File: run_bert_boolq.py    From language with Apache License 2.0 6 votes vote down vote up
def file_based_convert_examples_to_features(
    examples, label_list, max_seq_length, tokenizer, output_file):
  """Convert a set of `InputExample`s to a TFRecord file."""

  writer = tf.python_io.TFRecordWriter(output_file)

  for (ex_index, example) in enumerate(examples):
    if ex_index % 10000 == 0:
      tf.logging.info("Writing example %d of %d" % (ex_index, len(examples)))

    feature = convert_single_example(ex_index, example, label_list,
                                     max_seq_length, tokenizer)

    def create_int_feature(values):
      f = tf.train.Feature(int64_list=tf.train.Int64List(value=list(values)))
      return f

    features = collections.OrderedDict()
    features["input_ids"] = create_int_feature(feature.input_ids)
    features["input_mask"] = create_int_feature(feature.input_mask)
    features["segment_ids"] = create_int_feature(feature.segment_ids)
    features["label_ids"] = create_int_feature([feature.label_id])

    tf_example = tf.train.Example(features=tf.train.Features(feature=features))
    writer.write(tf_example.SerializeToString()) 
Example #7
Source File: run_squad_membership.py    From language with Apache License 2.0 6 votes vote down vote up
def process_feature(self, feature):
    """Write a InputFeature to the TFRecordWriter as a tf.train.Example."""
    self.num_features += 1

    def create_int_feature(values):
      feature = tf.train.Feature(
          int64_list=tf.train.Int64List(value=list(values)))
      return feature

    features = collections.OrderedDict()
    features["unique_ids"] = create_int_feature([feature.unique_id])
    features["input_ids"] = create_int_feature(feature.input_ids)
    features["input_mask"] = create_int_feature(feature.input_mask)
    features["segment_ids"] = create_int_feature(feature.segment_ids)

    if self.is_training:
      features["label_ids"] = create_int_feature([feature.label_id])

    tf_example = tf.train.Example(features=tf.train.Features(feature=features))
    self._writer.write(tf_example.SerializeToString()) 
Example #8
Source File: run_classifier.py    From albert with Apache License 2.0 6 votes vote down vote up
def serving_input_receiver_fn():
  """Creates an input function for serving."""
  seq_len = FLAGS.max_seq_length
  serialized_example = tf.placeholder(
      dtype=tf.string, shape=[None], name="serialized_example")
  features = {
      "input_ids": tf.FixedLenFeature([seq_len], dtype=tf.int64),
      "input_mask": tf.FixedLenFeature([seq_len], dtype=tf.int64),
      "segment_ids": tf.FixedLenFeature([seq_len], dtype=tf.int64),
  }
  feature_map = tf.parse_example(serialized_example, features=features)
  feature_map["is_real_example"] = tf.constant(1, dtype=tf.int32)
  feature_map["label_ids"] = tf.constant(0, dtype=tf.int32)

  # tf.Example only supports tf.int64, but the TPU only supports tf.int32.
  # So cast all int64 to int32.
  for name in feature_map.keys():
    t = feature_map[name]
    if t.dtype == tf.int64:
      t = tf.to_int32(t)
    feature_map[name] = t

  return tf.estimator.export.ServingInputReceiver(
      features=feature_map, receiver_tensors=serialized_example) 
Example #9
Source File: cache_tasks_main.py    From text-to-text-transfer-transformer with Apache License 2.0 5 votes vote down vote up
def _info_dict(self, ex):
    if not ex:
      return {}
    assert len(ex) == 1
    ex = ex[0]
    info = {"num_shards": self._num_shards, "features": {}}
    feature_dict = info["features"]
    for k, v in ex.items():
      t = tf.constant(v)
      # Change int32 to int64 since the tf.Example proto will store it this way.
      dtype = "int64" if t.dtype.name == "int32" else t.dtype.name
      shape = [None] * len(t.shape)
      feature_dict[k] = {"shape": shape, "dtype": dtype}
    return info 
Example #10
Source File: test_utils.py    From text-to-text-transfer-transformer with Apache License 2.0 5 votes vote down vote up
def _dump_examples_to_tfrecord(path, examples):
  """Writes list of example dicts to a TFRecord file of tf.Example protos."""
  logging.info("Writing examples to TFRecord: %s", path)
  with tf.io.TFRecordWriter(path) as writer:
    for ex in examples:
      writer.write(dataset_utils.dict_to_tfexample(ex).SerializeToString()) 
Example #11
Source File: cache_tasks_main.py    From text-to-text-transfer-transformer with Apache License 2.0 5 votes vote down vote up
def expand(self, pcoll):
    return (
        pcoll
        | beam.Map(t5.data.dict_to_tfexample)
        | beam.Reshuffle()
        | beam.io.tfrecordio.WriteToTFRecord(
            self._output_path,
            num_shards=self._num_shards,
            coder=beam.coders.ProtoCoder(tf.train.Example))) 
Example #12
Source File: run_finetune_coherence.py    From language with Apache License 2.0 5 votes vote down vote up
def file_based_convert_examples_to_features(input_file, max_seq_length,
                                            tokenizer, output_file):
  """Convert a set of `InputExample`s to a TFRecord file."""

  tmp1, tmp2, _, _ = read_data(input_file, 0)
  examples = tmp1 + tmp2

  dirname = os.path.dirname(output_file)
  if not tf.gfile.Exists(dirname):
    tf.gfile.MakeDirs(dirname)
  writer = tf.python_io.TFRecordWriter(output_file)

  for (ex_index, example) in enumerate(examples):
    if ex_index % 1000 == 0:
      tf.logging.info("Writing example %d of %d" % (ex_index, len(examples)))

    rand_example = random.choice(examples)
    input_feature = convert_single_example(example, rand_example,
                                           max_seq_length, tokenizer)

    features = collections.OrderedDict()
    for i in range(16):
      features["input_ids" + str(i)] = create_int_feature(
          input_feature.input_ids[i])
      features["input_mask" + str(i)] = create_int_feature(
          input_feature.input_mask[i])
      features["segment_ids" + str(i)] = create_int_feature(
          input_feature.segment_ids[i])
    features["labels"] = create_int_feature(input_feature.labels)
    features["label_types"] = create_int_feature(list(range(8)))

    tf_example = tf.train.Example(features=tf.train.Features(feature=features))
    writer.write(tf_example.SerializeToString())
  writer.close() 
Example #13
Source File: run_bert_boolq_distill.py    From language with Apache License 2.0 5 votes vote down vote up
def file_based_convert_examples_to_features(
    examples, label_list, max_seq_length, tokenizer, output_file):
  """Convert a set of `InputExample`s to a TFRecord file."""

  writer = tf.python_io.TFRecordWriter(output_file)

  for (ex_index, example) in enumerate(examples):
    if ex_index % 10000 == 0:
      tf.logging.info("Writing example %d of %d" % (ex_index, len(examples)))

    feature = convert_single_example(ex_index, example, label_list,
                                     max_seq_length, tokenizer)

    def create_int_feature(values):
      f = tf.train.Feature(int64_list=tf.train.Int64List(value=list(values)))
      return f

    def create_float_feature(values):
      f = tf.train.Feature(float_list=tf.train.FloatList(value=list(values)))
      return f

    features = collections.OrderedDict()
    features["input_ids"] = create_int_feature(feature.input_ids)
    features["input_mask"] = create_int_feature(feature.input_mask)
    features["segment_ids"] = create_int_feature(feature.segment_ids)
    features["label_ids"] = create_int_feature([feature.label_id])
    features["probs"] = create_float_feature(feature.probs)

    tf_example = tf.train.Example(features=tf.train.Features(feature=features))
    writer.write(tf_example.SerializeToString()) 
Example #14
Source File: run_pretraining.py    From training with Apache License 2.0 5 votes vote down vote up
def _decode_record(record, name_to_features):
  """Decodes a record to a TensorFlow example."""
  example = tf.parse_single_example(record, name_to_features)

  # tf.Example only supports tf.int64, but the TPU only supports tf.int32.
  # So cast all int64 to int32.
  for name in list(example.keys()):
    t = example[name]
    if t.dtype == tf.int64:
      t = tf.to_int32(t)
    example[name] = t

  return example 
Example #15
Source File: abstract_model.py    From tensor2robot with Apache License 2.0 5 votes vote down vote up
def get_feature_specification(
      self, mode):
    """Required features for the model_fn/model_inference_fn.

    Note, the model_fn might use additional features for debugging/development
    purposes. The create_export_outputs_fn will however only require the
    specified required features. Only this subset of features will be used to
    generate automatic tf.Example extractors and numpy placeholders for the
    serving models.

    Args:
      mode: The mode for feature specifications
    """ 
Example #16
Source File: reader.py    From magenta with Apache License 2.0 5 votes vote down vote up
def get_example(self, batch_size):
    """Get a single example from the tfrecord file.

    Args:
      batch_size: Int, minibatch size.

    Returns:
      tf.Example protobuf parsed from tfrecord.
    """
    reader = tf.TFRecordReader()
    num_epochs = None if self.is_training else 1
    capacity = batch_size
    path_queue = tf.train.input_producer(
        [self.record_path],
        num_epochs=num_epochs,
        shuffle=self.is_training,
        capacity=capacity)
    unused_key, serialized_example = reader.read(path_queue)
    features = {
        "note_str": tf.FixedLenFeature([], dtype=tf.string),
        "pitch": tf.FixedLenFeature([1], dtype=tf.int64),
        "velocity": tf.FixedLenFeature([1], dtype=tf.int64),
        "audio": tf.FixedLenFeature([64000], dtype=tf.float32),
        "qualities": tf.FixedLenFeature([10], dtype=tf.int64),
        "instrument_source": tf.FixedLenFeature([1], dtype=tf.int64),
        "instrument_family": tf.FixedLenFeature([1], dtype=tf.int64),
    }
    example = tf.parse_single_example(serialized_example, features)
    return example 
Example #17
Source File: preprocessors.py    From tensor2robot with Apache License 2.0 5 votes vote down vote up
def create_metaexample_spec(
    model_spec,
    num_samples_per_task,
    prefix):
  """Converts a model feature/label spec into a MetaExample spec.

  Args:
    model_spec: The base model tensor spec.
    num_samples_per_task: Number of episodes in the task.
    prefix: The tf.Example feature column name prefix.
  Returns:
    A TSpecStructure. For each spec in model_spec, the output contains
    num_samples_per_task corresponding specs stored as: "<name>/i".
  """
  model_spec = utils.flatten_spec_structure(model_spec)
  meta_example_spec = TSpecStructure()

  for key in model_spec.keys():
    for i in range(num_samples_per_task):
      spec = model_spec[key]
      name_prefix = '{:s}_ep{:d}'.format(prefix, i)
      new_name = name_prefix + '/' + six.ensure_str(spec.name)
      meta_example_spec[key + '/{:}'.format(i)] = (
          utils.ExtendedTensorSpec.from_spec(
              spec, name=new_name))
  return meta_example_spec 
Example #18
Source File: create_cococameratraps_tfexample_main.py    From models with Apache License 2.0 5 votes vote down vote up
def create_pipeline(pipeline,
                    image_directory,
                    input_annotations_file,
                    output_tfrecord_prefix=None,
                    num_images_per_shard=200,
                    keep_bboxes=True):
  """Creates a beam pipeline for producing a COCO-CameraTraps Image dataset.

  Args:
    pipeline: Initialized beam pipeline.
    image_directory: Path to image directory
    input_annotations_file: Path to a coco-cameratraps annotation file
    output_tfrecord_prefix: Absolute path for tfrecord outputs. Final files will
      be named {output_tfrecord_prefix}@N.
    num_images_per_shard: The number of images to store in each shard
    keep_bboxes: Whether to keep any bounding boxes that exist in the json file
  """

  logging.info('Reading data from COCO-CameraTraps Dataset.')

  data = load_json_data(input_annotations_file)

  num_shards = int(np.ceil(float(len(data['images']))/num_images_per_shard))

  image_examples = (
      pipeline | ('CreateCollections') >> beam.Create(
          [im['id'] for im in data['images']])
      | ('ParseImage') >> beam.ParDo(ParseImage(
          image_directory, data['images'], data['annotations'],
          data['categories'], keep_bboxes=keep_bboxes)))
  _ = (image_examples
       | ('Reshuffle') >> beam.Reshuffle()
       | ('WriteTfImageExample') >> beam.io.tfrecordio.WriteToTFRecord(
           output_tfrecord_prefix,
           num_shards=num_shards,
           coder=beam.coders.ProtoCoder(tf.train.Example))) 
Example #19
Source File: classifier_utils.py    From language with Apache License 2.0 5 votes vote down vote up
def file_based_convert_examples_to_features(examples, label_list,
                                            max_seq_length, tokenizer,
                                            output_file, task_name):
  """Convert a set of `InputExample`s to a TFRecord file."""

  writer = tf.python_io.TFRecordWriter(output_file)

  for (ex_index, example) in enumerate(examples):
    if ex_index % 10000 == 0:
      tf.logging.info("Writing example %d of %d" % (ex_index, len(examples)))

    feature = convert_single_example(ex_index, example, label_list,
                                     max_seq_length, tokenizer, task_name)

    def create_int_feature(values):
      f = tf.train.Feature(int64_list=tf.train.Int64List(value=list(values)))
      return f

    def create_float_feature(values):
      f = tf.train.Feature(float_list=tf.train.FloatList(value=list(values)))
      return f

    features = collections.OrderedDict()
    features["input_ids"] = create_int_feature(feature.input_ids)
    features["input_mask"] = create_int_feature(feature.input_mask)
    features["segment_ids"] = create_int_feature(feature.segment_ids)
    features["label_ids"] = create_float_feature([feature.label_id])\
        if task_name == "sts-b" else create_int_feature([feature.label_id])
    features["is_real_example"] = create_int_feature(
        [int(feature.is_real_example)])

    tf_example = tf.train.Example(features=tf.train.Features(feature=features))
    writer.write(tf_example.SerializeToString())
  writer.close() 
Example #20
Source File: run_nq.py    From language with Apache License 2.0 5 votes vote down vote up
def process_feature(self, feature):
    """Write a InputFeature to the TFRecordWriter as a tf.train.Example."""
    self.num_features += 1

    def create_int_feature(values):
      feature = tf.train.Feature(
          int64_list=tf.train.Int64List(value=list(values)))
      return feature

    features = collections.OrderedDict()
    features["unique_ids"] = create_int_feature([feature.unique_id])
    features["input_ids"] = create_int_feature(feature.input_ids)
    features["input_mask"] = create_int_feature(feature.input_mask)
    features["segment_ids"] = create_int_feature(feature.segment_ids)

    if self.is_training:
      features["start_positions"] = create_int_feature([feature.start_position])
      features["end_positions"] = create_int_feature([feature.end_position])
      features["answer_types"] = create_int_feature([feature.answer_type])
    else:
      token_map = [-1] * len(feature.input_ids)
      for k, v in feature.token_to_orig_map.items():
        token_map[k] = v
      features["token_map"] = create_int_feature(token_map)

    tf_example = tf.train.Example(features=tf.train.Features(feature=features))
    self._writer.write(tf_example.SerializeToString()) 
Example #21
Source File: run_concat_classifier.py    From language with Apache License 2.0 5 votes vote down vote up
def file_based_convert_examples_to_features(examples, label_list,
                                            max_seq_length, tokenizer,
                                            output_file):
  """Convert a set of `InputExample`s to a TFRecord file."""

  writer = tf.python_io.TFRecordWriter(output_file)

  for (ex_index, example) in enumerate(examples):
    if ex_index % 10000 == 0:
      tf.logging.info("Writing example %d of %d" % (ex_index, len(examples)))

    feature = convert_single_example(ex_index, example, label_list,
                                     max_seq_length, tokenizer)

    def create_int_feature(values):
      f = tf.train.Feature(int64_list=tf.train.Int64List(value=list(values)))
      return f

    features = collections.OrderedDict()
    features["input_ids"] = create_int_feature(feature.input_ids)
    features["input_mask"] = create_int_feature(feature.input_mask)
    features["segment_ids"] = create_int_feature(feature.segment_ids)
    features["label_ids"] = create_int_feature([feature.label_id])
    features["is_real_example"] = create_int_feature(
        [int(feature.is_real_example)])

    tf_example = tf.train.Example(features=tf.train.Features(feature=features))
    writer.write(tf_example.SerializeToString())
  writer.close() 
Example #22
Source File: run_classifier.py    From language with Apache License 2.0 5 votes vote down vote up
def file_based_convert_examples_to_features(examples, label_list,
                                            max_seq_length, tokenizer,
                                            output_file):
  """Convert a set of `InputExample`s to a TFRecord file."""

  writer = tf.python_io.TFRecordWriter(output_file)

  for (ex_index, example) in enumerate(examples):
    if ex_index % 10000 == 0:
      tf.logging.info("Writing example %d of %d" % (ex_index, len(examples)))

    feature = convert_single_example(ex_index, example, label_list,
                                     max_seq_length, tokenizer)

    def create_int_feature(values):
      f = tf.train.Feature(int64_list=tf.train.Int64List(value=list(values)))
      return f

    features = collections.OrderedDict()
    features["input_ids"] = create_int_feature(feature.input_ids)
    features["input_mask"] = create_int_feature(feature.input_mask)
    features["segment_ids"] = create_int_feature(feature.segment_ids)
    features["label_ids"] = create_int_feature([feature.label_id])
    features["is_real_example"] = create_int_feature(
        [int(feature.is_real_example)])

    tf_example = tf.train.Example(features=tf.train.Features(feature=features))
    writer.write(tf_example.SerializeToString())
  writer.close() 
Example #23
Source File: run_nq.py    From language with Apache License 2.0 5 votes vote down vote up
def process(self, example):
    """Coverts an NQ example in a list of serialized tf examples."""
    nq_examples = read_nq_entry(example, self.is_training)
    input_features = []
    for nq_example in nq_examples:
      input_features.extend(
          convert_single_example(nq_example, self.tokenizer, self.is_training))

    for input_feature in input_features:
      input_feature.example_index = int(example["id"])
      input_feature.unique_id = (
          input_feature.example_index + input_feature.doc_span_index)

      def create_int_feature(values):
        return tf.train.Feature(
            int64_list=tf.train.Int64List(value=list(values)))

      features = collections.OrderedDict()
      features["unique_ids"] = create_int_feature([input_feature.unique_id])
      features["input_ids"] = create_int_feature(input_feature.input_ids)
      features["input_mask"] = create_int_feature(input_feature.input_mask)
      features["segment_ids"] = create_int_feature(input_feature.segment_ids)

      if self.is_training:
        features["start_positions"] = create_int_feature(
            [input_feature.start_position])
        features["end_positions"] = create_int_feature(
            [input_feature.end_position])
        features["answer_types"] = create_int_feature(
            [input_feature.answer_type])
      else:
        token_map = [-1] * len(input_feature.input_ids)
        for k, v in input_feature.token_to_orig_map.items():
          token_map[k] = v
        features["token_map"] = create_int_feature(token_map)

      yield tf.train.Example(features=tf.train.Features(
          feature=features)).SerializeToString() 
Example #24
Source File: run_pretraining.py    From language with Apache License 2.0 5 votes vote down vote up
def _decode_record(record, name_to_features):
  """Decodes a record to a TensorFlow example."""
  example = tf.parse_single_example(record, name_to_features)

  # tf.Example only supports tf.int64, but the TPU only supports tf.int32.
  # So cast all int64 to int32.
  for name in list(example.keys()):
    t = example[name]
    if t.dtype == tf.int64:
      t = tf.to_int32(t)
    example[name] = t

  return example 
Example #25
Source File: input_fns.py    From language with Apache License 2.0 5 votes vote down vote up
def process_feature(self, feature):
    """Write a InputFeature to the TFRecordWriter as a tf.train.Example."""
    self.num_features += 1

    def create_int_feature(values):
      feature = tf.train.Feature(
          int64_list=tf.train.Int64List(value=list(values)))
      return feature

    def create_bytes_feature(value):
      return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

    features = collections.OrderedDict()
    features["qas_ids"] = create_bytes_feature(feature.qas_id)
    features["qry_input_ids"] = create_int_feature(feature.qry_input_ids)
    features["qry_input_mask"] = create_int_feature(feature.qry_input_mask)
    features["qry_entity_id"] = create_int_feature(feature.qry_entity_id)

    if feature.relation_input_ids:
      for ii in range(len(feature.relation_input_ids)):
        features["rel_input_ids_%d" % ii] = create_int_feature(
            feature.relation_input_ids[ii])
        features["rel_input_mask_%d" % ii] = create_int_feature(
            feature.relation_input_mask[ii])

    if self.is_training:
      if feature.answer_mention is not None:
        features["answer_mentions"] = create_int_feature(feature.answer_mention)
      features["answer_entities"] = create_int_feature(feature.answer_entity)

    if self.has_bridge:
      if feature.bridge_mention is not None:
        features["bridge_mentions"] = create_int_feature(feature.bridge_mention)
      for ii, bridge_entity in enumerate(feature.bridge_entity):
        features["bridge_entities_%d" % ii] = create_int_feature(bridge_entity)

    tf_example = tf.train.Example(features=tf.train.Features(feature=features))
    self._writer.write(tf_example.SerializeToString()) 
Example #26
Source File: input_fns.py    From language with Apache License 2.0 5 votes vote down vote up
def input_fn_builder(input_file, is_training, drop_remainder,
                     names_to_features):
  """Creates an `input_fn` closure to be passed to TPUEstimator."""

  def _decode_record(record, name_to_features):
    """Decodes a record to a TensorFlow example."""
    example = tf.parse_single_example(record, name_to_features)

    # tf.Example only supports tf.int64, but the TPU only supports tf.int32.
    # So cast all int64 to int32.
    for name in list(example.keys()):
      t = example[name]
      if t.dtype == tf.int64:
        t = tf.to_int32(t)
      example[name] = t

    return example

  def input_fn(params):
    """The actual input function."""
    batch_size = params["batch_size"]

    # For training, we want a lot of parallel reading and shuffling.
    # For eval, we want no shuffling and parallel reading doesn't matter.
    d = tf.data.TFRecordDataset(input_file)
    if is_training:
      d = d.repeat()
      d = d.shuffle(buffer_size=100)

    d = d.apply(
        contrib_data.map_and_batch(
            lambda record: _decode_record(record, names_to_features),
            batch_size=batch_size,
            drop_remainder=drop_remainder))

    return d

  return input_fn 
Example #27
Source File: input_fns.py    From language with Apache License 2.0 5 votes vote down vote up
def read_examples(self, queries_file, p=1.0):
    """Read a json file into a list of Example."""
    with tf.gfile.Open(queries_file, "r") as reader:
      examples = []
      for line in tqdm(reader):
        item = json.loads(line.strip())

        qas_id = item["id"]
        relation = random.choice(item["relation"]["text"])
        if item["subject"]["name"] is None or random.uniform(0., 1.) < p:
          question_text = (
              random.choice(item["subject"]["mentions"])["text"] + " . " +
              relation)
        else:
          question_text = item["subject"]["name"] + " . " + relation
        answer_mention = item["object"]["global_mention"]
        answer_entity = item["object"]["ent_id"]

        example = Example(
            qas_id=qas_id,
            question_text=question_text,
            subject_entity=[item["subject"]["wikidata_id"]],
            relations=[relation],
            answer_mention=[answer_mention],
            answer_entity=[answer_entity])
        examples.append(example)

    return examples 
Example #28
Source File: input_fns.py    From language with Apache License 2.0 5 votes vote down vote up
def read_examples(self, queries_file, p=1.0):
    """Read a json file into a list of Example."""
    with tf.gfile.Open(queries_file, "r") as reader:
      examples = []
      for line in tqdm(reader):
        item = json.loads(line.strip())

        qas_id = item["id"]
        relation_1 = random.choice(item["relation"][0]["text"])
        relation_2 = random.choice(item["relation"][1]["text"])
        if item["subject"]["name"] is None or random.uniform(0., 1.) < p:
          question_text = (
              random.choice(item["subject"]["mentions"])["text"] + " . " +
              relation_1 + " . " + relation_2)
        else:
          question_text = (
              item["subject"]["name"] + " . " + relation_1 + " . " + relation_2)
        answer_mention = item["object"]["global_mention"]
        answer_entity = item["object"]["ent_id"]
        bridge_mention = item["bridge"]["global_mention_1"]
        bridge_entity = [item["bridge"]["ent_id"]]

        example = Example(
            qas_id=qas_id,
            question_text=question_text,
            subject_entity=[item["subject"]["wikidata_id"]],
            relations=[relation_1, relation_2],
            answer_mention=[answer_mention],
            answer_entity=[answer_entity],
            bridge_mention=[bridge_mention],
            bridge_entity=[bridge_entity])
        examples.append(example)

    return examples 
Example #29
Source File: input_fns.py    From language with Apache License 2.0 5 votes vote down vote up
def read_examples(self, queries_file, p=1.0):
    """Read a json file into a list of Example."""
    with tf.gfile.Open(queries_file, "r") as reader:
      examples = []
      for line in tqdm(reader):
        item = json.loads(line.strip())

        qas_id = item["id"]
        relation_1 = random.choice(item["relation"][0]["text"])
        relation_2 = random.choice(item["relation"][1]["text"])
        relation_3 = random.choice(item["relation"][2]["text"])
        if item["subject"]["name"] is None or random.uniform(0., 1.) < p:
          question_text = (
              random.choice(item["subject"]["mentions"])["text"] + " . " +
              relation_1 + " . " + relation_2 + " . " + relation_3)
        else:
          question_text = (
              item["subject"]["name"] + " . " + relation_1 + " . " +
              relation_2 + " . " + relation_3)
        answer_mention = item["object"]["global_mention"]
        answer_entity = item["object"]["ent_id"]
        bridge_mention = item["bridge_0"]["global_mention_1"]
        bridge_entity = [item["bridge_%d" % ii]["ent_id"] for ii in range(2)]

        example = Example(
            qas_id=qas_id,
            question_text=question_text,
            subject_entity=[item["subject"]["wikidata_id"]],
            relations=[relation_1, relation_2, relation_3],
            answer_mention=[answer_mention],
            answer_entity=[answer_entity],
            bridge_mention=[bridge_mention],
            bridge_entity=[bridge_entity])
        examples.append(example)

    return examples 
Example #30
Source File: answer_extractor.py    From language with Apache License 2.0 5 votes vote down vote up
def process_feature(self, feature):
    """Write a InputFeature to the TFRecordWriter as a tf.train.Example."""
    self.num_features += 1

    def create_int_feature(values):
      feature = tf.train.Feature(
          int64_list=tf.train.Int64List(value=list(values)))
      return feature

    features = collections.OrderedDict()
    features["unique_ids"] = create_int_feature([feature.unique_id])
    features["input_ids"] = create_int_feature(feature.input_ids)
    features["input_mask"] = create_int_feature(feature.input_mask)
    features["segment_ids"] = create_int_feature(feature.segment_ids)
    features["supporting_mask"] = create_int_feature(feature.supporting_mask)

    if self.is_training:
      features["start_positions"] = create_int_feature([feature.start_position])
      features["end_positions"] = create_int_feature([feature.end_position])
      impossible = 0
      if feature.is_impossible:
        impossible = 1
      features["is_impossible"] = create_int_feature([impossible])
      features["question_type"] = create_int_feature([feature.question_type])
      features["supporting_labels"] = create_int_feature(
          feature.supporting_labels)

    tf_example = tf.train.Example(features=tf.train.Features(feature=features))
    self._writer.write(tf_example.SerializeToString())