Python tensorflow.compat.v1.Example() Examples
The following are 30
code examples of tensorflow.compat.v1.Example().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
tensorflow.compat.v1
, or try the search function
.
Example #1
Source File: run_finetune_coherence.py From language with Apache License 2.0 | 6 votes |
def convert_single_example(example, rand_example, max_seq_length, tokenizer): """Converts a single `InputExample` into a single `InputFeatures`.""" # Add padding examples here example_type = collections.namedtuple( "Example", ["input_ids", "input_mask", "segment_ids", "labels"]) labels = range(8) # inconsequential rand_sents = rand_example[:8] target_sents = example[:4] + example[5:] + rand_sents bert_input = create_cpc_input_from_text( tokenizer, example[4], target_sents, labels, group_size=16, max_seq_length=max_seq_length) feature = example_type(bert_input.tokens, bert_input.mask, bert_input.seg_ids, labels) return feature
Example #2
Source File: generate_detection_data.py From models with Apache License 2.0 | 6 votes |
def construct_pipeline(pipeline, input_tfrecord, output_tfrecord, model_dir, confidence_threshold, num_shards): """Returns a Beam pipeline to run object detection inference. Args: pipeline: Initialized beam pipeline. input_tfrecord: A TFRecord of tf.train.Example protos containing images. output_tfrecord: A TFRecord of tf.train.Example protos that contain images in the input TFRecord and the detections from the model. model_dir: Path to `saved_model` to use for inference. confidence_threshold: Threshold to use when keeping detection results. num_shards: The number of output shards. """ input_collection = ( pipeline | 'ReadInputTFRecord' >> beam.io.tfrecordio.ReadFromTFRecord( input_tfrecord, coder=beam.coders.BytesCoder())) output_collection = input_collection | 'RunInference' >> beam.ParDo( GenerateDetectionDataFn(model_dir, confidence_threshold)) output_collection = output_collection | 'Reshuffle' >> beam.Reshuffle() _ = output_collection | 'WritetoDisk' >> beam.io.tfrecordio.WriteToTFRecord( output_tfrecord, num_shards=num_shards, coder=beam.coders.ProtoCoder(tf.train.Example))
Example #3
Source File: run_squad.py From mesh with Apache License 2.0 | 6 votes |
def process_feature(self, feature): """Write a InputFeature to the TFRecordWriter as a tf.train.Example.""" self.num_features += 1 def create_int_feature(values): feature = tf.train.Feature( int64_list=tf.train.Int64List(value=list(values))) return feature features = collections.OrderedDict() features["unique_ids"] = create_int_feature([feature.unique_id]) features["input_ids"] = create_int_feature(feature.input_ids) features["input_mask"] = create_int_feature(feature.input_mask) features["segment_ids"] = create_int_feature(feature.segment_ids) if self.is_training: features["start_positions"] = create_int_feature([feature.start_position]) features["end_positions"] = create_int_feature([feature.end_position]) impossible = 0 if feature.is_impossible: impossible = 1 features["is_impossible"] = create_int_feature([impossible]) tf_example = tf.train.Example(features=tf.train.Features(feature=features)) self._writer.write(tf_example.SerializeToString())
Example #4
Source File: generator_utils.py From tensor2tensor with Apache License 2.0 | 6 votes |
def to_example(dictionary): """Helper: build tf.Example from (string -> int/float/str list) dictionary.""" features = {} for (k, v) in six.iteritems(dictionary): if not v: raise ValueError("Empty generated field: %s" % str((k, v))) # Subtly in PY2 vs PY3, map is not scriptable in py3. As a result, # map objects will fail with TypeError, unless converted to a list. if six.PY3 and isinstance(v, map): v = list(v) if (isinstance(v[0], six.integer_types) or np.issubdtype(type(v[0]), np.integer)): features[k] = tf.train.Feature(int64_list=tf.train.Int64List(value=v)) elif isinstance(v[0], float): features[k] = tf.train.Feature(float_list=tf.train.FloatList(value=v)) elif isinstance(v[0], six.string_types): if not six.PY2: # Convert in python 3. v = [bytes(x, "utf-8") for x in v] features[k] = tf.train.Feature(bytes_list=tf.train.BytesList(value=v)) elif isinstance(v[0], bytes): features[k] = tf.train.Feature(bytes_list=tf.train.BytesList(value=v)) else: raise ValueError("Value for %s is not a recognized type; v: %s type: %s" % (k, str(v[0]), str(type(v[0])))) return tf.train.Example(features=tf.train.Features(feature=features))
Example #5
Source File: run_squad.py From language with Apache License 2.0 | 6 votes |
def process_feature(self, feature): """Write a InputFeature to the TFRecordWriter as a tf.train.Example.""" self.num_features += 1 def create_int_feature(values): feature = tf.train.Feature( int64_list=tf.train.Int64List(value=list(values))) return feature features = collections.OrderedDict() features["unique_ids"] = create_int_feature([feature.unique_id]) features["input_ids"] = create_int_feature(feature.input_ids) features["input_mask"] = create_int_feature(feature.input_mask) features["segment_ids"] = create_int_feature(feature.segment_ids) if self.is_training: features["start_positions"] = create_int_feature([feature.start_position]) features["end_positions"] = create_int_feature([feature.end_position]) impossible = 0 if feature.is_impossible: impossible = 1 features["is_impossible"] = create_int_feature([impossible]) tf_example = tf.train.Example(features=tf.train.Features(feature=features)) self._writer.write(tf_example.SerializeToString())
Example #6
Source File: run_bert_boolq.py From language with Apache License 2.0 | 6 votes |
def file_based_convert_examples_to_features( examples, label_list, max_seq_length, tokenizer, output_file): """Convert a set of `InputExample`s to a TFRecord file.""" writer = tf.python_io.TFRecordWriter(output_file) for (ex_index, example) in enumerate(examples): if ex_index % 10000 == 0: tf.logging.info("Writing example %d of %d" % (ex_index, len(examples))) feature = convert_single_example(ex_index, example, label_list, max_seq_length, tokenizer) def create_int_feature(values): f = tf.train.Feature(int64_list=tf.train.Int64List(value=list(values))) return f features = collections.OrderedDict() features["input_ids"] = create_int_feature(feature.input_ids) features["input_mask"] = create_int_feature(feature.input_mask) features["segment_ids"] = create_int_feature(feature.segment_ids) features["label_ids"] = create_int_feature([feature.label_id]) tf_example = tf.train.Example(features=tf.train.Features(feature=features)) writer.write(tf_example.SerializeToString())
Example #7
Source File: run_squad_membership.py From language with Apache License 2.0 | 6 votes |
def process_feature(self, feature): """Write a InputFeature to the TFRecordWriter as a tf.train.Example.""" self.num_features += 1 def create_int_feature(values): feature = tf.train.Feature( int64_list=tf.train.Int64List(value=list(values))) return feature features = collections.OrderedDict() features["unique_ids"] = create_int_feature([feature.unique_id]) features["input_ids"] = create_int_feature(feature.input_ids) features["input_mask"] = create_int_feature(feature.input_mask) features["segment_ids"] = create_int_feature(feature.segment_ids) if self.is_training: features["label_ids"] = create_int_feature([feature.label_id]) tf_example = tf.train.Example(features=tf.train.Features(feature=features)) self._writer.write(tf_example.SerializeToString())
Example #8
Source File: run_classifier.py From albert with Apache License 2.0 | 6 votes |
def serving_input_receiver_fn(): """Creates an input function for serving.""" seq_len = FLAGS.max_seq_length serialized_example = tf.placeholder( dtype=tf.string, shape=[None], name="serialized_example") features = { "input_ids": tf.FixedLenFeature([seq_len], dtype=tf.int64), "input_mask": tf.FixedLenFeature([seq_len], dtype=tf.int64), "segment_ids": tf.FixedLenFeature([seq_len], dtype=tf.int64), } feature_map = tf.parse_example(serialized_example, features=features) feature_map["is_real_example"] = tf.constant(1, dtype=tf.int32) feature_map["label_ids"] = tf.constant(0, dtype=tf.int32) # tf.Example only supports tf.int64, but the TPU only supports tf.int32. # So cast all int64 to int32. for name in feature_map.keys(): t = feature_map[name] if t.dtype == tf.int64: t = tf.to_int32(t) feature_map[name] = t return tf.estimator.export.ServingInputReceiver( features=feature_map, receiver_tensors=serialized_example)
Example #9
Source File: cache_tasks_main.py From text-to-text-transfer-transformer with Apache License 2.0 | 5 votes |
def _info_dict(self, ex): if not ex: return {} assert len(ex) == 1 ex = ex[0] info = {"num_shards": self._num_shards, "features": {}} feature_dict = info["features"] for k, v in ex.items(): t = tf.constant(v) # Change int32 to int64 since the tf.Example proto will store it this way. dtype = "int64" if t.dtype.name == "int32" else t.dtype.name shape = [None] * len(t.shape) feature_dict[k] = {"shape": shape, "dtype": dtype} return info
Example #10
Source File: test_utils.py From text-to-text-transfer-transformer with Apache License 2.0 | 5 votes |
def _dump_examples_to_tfrecord(path, examples): """Writes list of example dicts to a TFRecord file of tf.Example protos.""" logging.info("Writing examples to TFRecord: %s", path) with tf.io.TFRecordWriter(path) as writer: for ex in examples: writer.write(dataset_utils.dict_to_tfexample(ex).SerializeToString())
Example #11
Source File: cache_tasks_main.py From text-to-text-transfer-transformer with Apache License 2.0 | 5 votes |
def expand(self, pcoll): return ( pcoll | beam.Map(t5.data.dict_to_tfexample) | beam.Reshuffle() | beam.io.tfrecordio.WriteToTFRecord( self._output_path, num_shards=self._num_shards, coder=beam.coders.ProtoCoder(tf.train.Example)))
Example #12
Source File: run_finetune_coherence.py From language with Apache License 2.0 | 5 votes |
def file_based_convert_examples_to_features(input_file, max_seq_length, tokenizer, output_file): """Convert a set of `InputExample`s to a TFRecord file.""" tmp1, tmp2, _, _ = read_data(input_file, 0) examples = tmp1 + tmp2 dirname = os.path.dirname(output_file) if not tf.gfile.Exists(dirname): tf.gfile.MakeDirs(dirname) writer = tf.python_io.TFRecordWriter(output_file) for (ex_index, example) in enumerate(examples): if ex_index % 1000 == 0: tf.logging.info("Writing example %d of %d" % (ex_index, len(examples))) rand_example = random.choice(examples) input_feature = convert_single_example(example, rand_example, max_seq_length, tokenizer) features = collections.OrderedDict() for i in range(16): features["input_ids" + str(i)] = create_int_feature( input_feature.input_ids[i]) features["input_mask" + str(i)] = create_int_feature( input_feature.input_mask[i]) features["segment_ids" + str(i)] = create_int_feature( input_feature.segment_ids[i]) features["labels"] = create_int_feature(input_feature.labels) features["label_types"] = create_int_feature(list(range(8))) tf_example = tf.train.Example(features=tf.train.Features(feature=features)) writer.write(tf_example.SerializeToString()) writer.close()
Example #13
Source File: run_bert_boolq_distill.py From language with Apache License 2.0 | 5 votes |
def file_based_convert_examples_to_features( examples, label_list, max_seq_length, tokenizer, output_file): """Convert a set of `InputExample`s to a TFRecord file.""" writer = tf.python_io.TFRecordWriter(output_file) for (ex_index, example) in enumerate(examples): if ex_index % 10000 == 0: tf.logging.info("Writing example %d of %d" % (ex_index, len(examples))) feature = convert_single_example(ex_index, example, label_list, max_seq_length, tokenizer) def create_int_feature(values): f = tf.train.Feature(int64_list=tf.train.Int64List(value=list(values))) return f def create_float_feature(values): f = tf.train.Feature(float_list=tf.train.FloatList(value=list(values))) return f features = collections.OrderedDict() features["input_ids"] = create_int_feature(feature.input_ids) features["input_mask"] = create_int_feature(feature.input_mask) features["segment_ids"] = create_int_feature(feature.segment_ids) features["label_ids"] = create_int_feature([feature.label_id]) features["probs"] = create_float_feature(feature.probs) tf_example = tf.train.Example(features=tf.train.Features(feature=features)) writer.write(tf_example.SerializeToString())
Example #14
Source File: run_pretraining.py From training with Apache License 2.0 | 5 votes |
def _decode_record(record, name_to_features): """Decodes a record to a TensorFlow example.""" example = tf.parse_single_example(record, name_to_features) # tf.Example only supports tf.int64, but the TPU only supports tf.int32. # So cast all int64 to int32. for name in list(example.keys()): t = example[name] if t.dtype == tf.int64: t = tf.to_int32(t) example[name] = t return example
Example #15
Source File: abstract_model.py From tensor2robot with Apache License 2.0 | 5 votes |
def get_feature_specification( self, mode): """Required features for the model_fn/model_inference_fn. Note, the model_fn might use additional features for debugging/development purposes. The create_export_outputs_fn will however only require the specified required features. Only this subset of features will be used to generate automatic tf.Example extractors and numpy placeholders for the serving models. Args: mode: The mode for feature specifications """
Example #16
Source File: reader.py From magenta with Apache License 2.0 | 5 votes |
def get_example(self, batch_size): """Get a single example from the tfrecord file. Args: batch_size: Int, minibatch size. Returns: tf.Example protobuf parsed from tfrecord. """ reader = tf.TFRecordReader() num_epochs = None if self.is_training else 1 capacity = batch_size path_queue = tf.train.input_producer( [self.record_path], num_epochs=num_epochs, shuffle=self.is_training, capacity=capacity) unused_key, serialized_example = reader.read(path_queue) features = { "note_str": tf.FixedLenFeature([], dtype=tf.string), "pitch": tf.FixedLenFeature([1], dtype=tf.int64), "velocity": tf.FixedLenFeature([1], dtype=tf.int64), "audio": tf.FixedLenFeature([64000], dtype=tf.float32), "qualities": tf.FixedLenFeature([10], dtype=tf.int64), "instrument_source": tf.FixedLenFeature([1], dtype=tf.int64), "instrument_family": tf.FixedLenFeature([1], dtype=tf.int64), } example = tf.parse_single_example(serialized_example, features) return example
Example #17
Source File: preprocessors.py From tensor2robot with Apache License 2.0 | 5 votes |
def create_metaexample_spec( model_spec, num_samples_per_task, prefix): """Converts a model feature/label spec into a MetaExample spec. Args: model_spec: The base model tensor spec. num_samples_per_task: Number of episodes in the task. prefix: The tf.Example feature column name prefix. Returns: A TSpecStructure. For each spec in model_spec, the output contains num_samples_per_task corresponding specs stored as: "<name>/i". """ model_spec = utils.flatten_spec_structure(model_spec) meta_example_spec = TSpecStructure() for key in model_spec.keys(): for i in range(num_samples_per_task): spec = model_spec[key] name_prefix = '{:s}_ep{:d}'.format(prefix, i) new_name = name_prefix + '/' + six.ensure_str(spec.name) meta_example_spec[key + '/{:}'.format(i)] = ( utils.ExtendedTensorSpec.from_spec( spec, name=new_name)) return meta_example_spec
Example #18
Source File: create_cococameratraps_tfexample_main.py From models with Apache License 2.0 | 5 votes |
def create_pipeline(pipeline, image_directory, input_annotations_file, output_tfrecord_prefix=None, num_images_per_shard=200, keep_bboxes=True): """Creates a beam pipeline for producing a COCO-CameraTraps Image dataset. Args: pipeline: Initialized beam pipeline. image_directory: Path to image directory input_annotations_file: Path to a coco-cameratraps annotation file output_tfrecord_prefix: Absolute path for tfrecord outputs. Final files will be named {output_tfrecord_prefix}@N. num_images_per_shard: The number of images to store in each shard keep_bboxes: Whether to keep any bounding boxes that exist in the json file """ logging.info('Reading data from COCO-CameraTraps Dataset.') data = load_json_data(input_annotations_file) num_shards = int(np.ceil(float(len(data['images']))/num_images_per_shard)) image_examples = ( pipeline | ('CreateCollections') >> beam.Create( [im['id'] for im in data['images']]) | ('ParseImage') >> beam.ParDo(ParseImage( image_directory, data['images'], data['annotations'], data['categories'], keep_bboxes=keep_bboxes))) _ = (image_examples | ('Reshuffle') >> beam.Reshuffle() | ('WriteTfImageExample') >> beam.io.tfrecordio.WriteToTFRecord( output_tfrecord_prefix, num_shards=num_shards, coder=beam.coders.ProtoCoder(tf.train.Example)))
Example #19
Source File: classifier_utils.py From language with Apache License 2.0 | 5 votes |
def file_based_convert_examples_to_features(examples, label_list, max_seq_length, tokenizer, output_file, task_name): """Convert a set of `InputExample`s to a TFRecord file.""" writer = tf.python_io.TFRecordWriter(output_file) for (ex_index, example) in enumerate(examples): if ex_index % 10000 == 0: tf.logging.info("Writing example %d of %d" % (ex_index, len(examples))) feature = convert_single_example(ex_index, example, label_list, max_seq_length, tokenizer, task_name) def create_int_feature(values): f = tf.train.Feature(int64_list=tf.train.Int64List(value=list(values))) return f def create_float_feature(values): f = tf.train.Feature(float_list=tf.train.FloatList(value=list(values))) return f features = collections.OrderedDict() features["input_ids"] = create_int_feature(feature.input_ids) features["input_mask"] = create_int_feature(feature.input_mask) features["segment_ids"] = create_int_feature(feature.segment_ids) features["label_ids"] = create_float_feature([feature.label_id])\ if task_name == "sts-b" else create_int_feature([feature.label_id]) features["is_real_example"] = create_int_feature( [int(feature.is_real_example)]) tf_example = tf.train.Example(features=tf.train.Features(feature=features)) writer.write(tf_example.SerializeToString()) writer.close()
Example #20
Source File: run_nq.py From language with Apache License 2.0 | 5 votes |
def process_feature(self, feature): """Write a InputFeature to the TFRecordWriter as a tf.train.Example.""" self.num_features += 1 def create_int_feature(values): feature = tf.train.Feature( int64_list=tf.train.Int64List(value=list(values))) return feature features = collections.OrderedDict() features["unique_ids"] = create_int_feature([feature.unique_id]) features["input_ids"] = create_int_feature(feature.input_ids) features["input_mask"] = create_int_feature(feature.input_mask) features["segment_ids"] = create_int_feature(feature.segment_ids) if self.is_training: features["start_positions"] = create_int_feature([feature.start_position]) features["end_positions"] = create_int_feature([feature.end_position]) features["answer_types"] = create_int_feature([feature.answer_type]) else: token_map = [-1] * len(feature.input_ids) for k, v in feature.token_to_orig_map.items(): token_map[k] = v features["token_map"] = create_int_feature(token_map) tf_example = tf.train.Example(features=tf.train.Features(feature=features)) self._writer.write(tf_example.SerializeToString())
Example #21
Source File: run_concat_classifier.py From language with Apache License 2.0 | 5 votes |
def file_based_convert_examples_to_features(examples, label_list, max_seq_length, tokenizer, output_file): """Convert a set of `InputExample`s to a TFRecord file.""" writer = tf.python_io.TFRecordWriter(output_file) for (ex_index, example) in enumerate(examples): if ex_index % 10000 == 0: tf.logging.info("Writing example %d of %d" % (ex_index, len(examples))) feature = convert_single_example(ex_index, example, label_list, max_seq_length, tokenizer) def create_int_feature(values): f = tf.train.Feature(int64_list=tf.train.Int64List(value=list(values))) return f features = collections.OrderedDict() features["input_ids"] = create_int_feature(feature.input_ids) features["input_mask"] = create_int_feature(feature.input_mask) features["segment_ids"] = create_int_feature(feature.segment_ids) features["label_ids"] = create_int_feature([feature.label_id]) features["is_real_example"] = create_int_feature( [int(feature.is_real_example)]) tf_example = tf.train.Example(features=tf.train.Features(feature=features)) writer.write(tf_example.SerializeToString()) writer.close()
Example #22
Source File: run_classifier.py From language with Apache License 2.0 | 5 votes |
def file_based_convert_examples_to_features(examples, label_list, max_seq_length, tokenizer, output_file): """Convert a set of `InputExample`s to a TFRecord file.""" writer = tf.python_io.TFRecordWriter(output_file) for (ex_index, example) in enumerate(examples): if ex_index % 10000 == 0: tf.logging.info("Writing example %d of %d" % (ex_index, len(examples))) feature = convert_single_example(ex_index, example, label_list, max_seq_length, tokenizer) def create_int_feature(values): f = tf.train.Feature(int64_list=tf.train.Int64List(value=list(values))) return f features = collections.OrderedDict() features["input_ids"] = create_int_feature(feature.input_ids) features["input_mask"] = create_int_feature(feature.input_mask) features["segment_ids"] = create_int_feature(feature.segment_ids) features["label_ids"] = create_int_feature([feature.label_id]) features["is_real_example"] = create_int_feature( [int(feature.is_real_example)]) tf_example = tf.train.Example(features=tf.train.Features(feature=features)) writer.write(tf_example.SerializeToString()) writer.close()
Example #23
Source File: run_nq.py From language with Apache License 2.0 | 5 votes |
def process(self, example): """Coverts an NQ example in a list of serialized tf examples.""" nq_examples = read_nq_entry(example, self.is_training) input_features = [] for nq_example in nq_examples: input_features.extend( convert_single_example(nq_example, self.tokenizer, self.is_training)) for input_feature in input_features: input_feature.example_index = int(example["id"]) input_feature.unique_id = ( input_feature.example_index + input_feature.doc_span_index) def create_int_feature(values): return tf.train.Feature( int64_list=tf.train.Int64List(value=list(values))) features = collections.OrderedDict() features["unique_ids"] = create_int_feature([input_feature.unique_id]) features["input_ids"] = create_int_feature(input_feature.input_ids) features["input_mask"] = create_int_feature(input_feature.input_mask) features["segment_ids"] = create_int_feature(input_feature.segment_ids) if self.is_training: features["start_positions"] = create_int_feature( [input_feature.start_position]) features["end_positions"] = create_int_feature( [input_feature.end_position]) features["answer_types"] = create_int_feature( [input_feature.answer_type]) else: token_map = [-1] * len(input_feature.input_ids) for k, v in input_feature.token_to_orig_map.items(): token_map[k] = v features["token_map"] = create_int_feature(token_map) yield tf.train.Example(features=tf.train.Features( feature=features)).SerializeToString()
Example #24
Source File: run_pretraining.py From language with Apache License 2.0 | 5 votes |
def _decode_record(record, name_to_features): """Decodes a record to a TensorFlow example.""" example = tf.parse_single_example(record, name_to_features) # tf.Example only supports tf.int64, but the TPU only supports tf.int32. # So cast all int64 to int32. for name in list(example.keys()): t = example[name] if t.dtype == tf.int64: t = tf.to_int32(t) example[name] = t return example
Example #25
Source File: input_fns.py From language with Apache License 2.0 | 5 votes |
def process_feature(self, feature): """Write a InputFeature to the TFRecordWriter as a tf.train.Example.""" self.num_features += 1 def create_int_feature(values): feature = tf.train.Feature( int64_list=tf.train.Int64List(value=list(values))) return feature def create_bytes_feature(value): return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) features = collections.OrderedDict() features["qas_ids"] = create_bytes_feature(feature.qas_id) features["qry_input_ids"] = create_int_feature(feature.qry_input_ids) features["qry_input_mask"] = create_int_feature(feature.qry_input_mask) features["qry_entity_id"] = create_int_feature(feature.qry_entity_id) if feature.relation_input_ids: for ii in range(len(feature.relation_input_ids)): features["rel_input_ids_%d" % ii] = create_int_feature( feature.relation_input_ids[ii]) features["rel_input_mask_%d" % ii] = create_int_feature( feature.relation_input_mask[ii]) if self.is_training: if feature.answer_mention is not None: features["answer_mentions"] = create_int_feature(feature.answer_mention) features["answer_entities"] = create_int_feature(feature.answer_entity) if self.has_bridge: if feature.bridge_mention is not None: features["bridge_mentions"] = create_int_feature(feature.bridge_mention) for ii, bridge_entity in enumerate(feature.bridge_entity): features["bridge_entities_%d" % ii] = create_int_feature(bridge_entity) tf_example = tf.train.Example(features=tf.train.Features(feature=features)) self._writer.write(tf_example.SerializeToString())
Example #26
Source File: input_fns.py From language with Apache License 2.0 | 5 votes |
def input_fn_builder(input_file, is_training, drop_remainder, names_to_features): """Creates an `input_fn` closure to be passed to TPUEstimator.""" def _decode_record(record, name_to_features): """Decodes a record to a TensorFlow example.""" example = tf.parse_single_example(record, name_to_features) # tf.Example only supports tf.int64, but the TPU only supports tf.int32. # So cast all int64 to int32. for name in list(example.keys()): t = example[name] if t.dtype == tf.int64: t = tf.to_int32(t) example[name] = t return example def input_fn(params): """The actual input function.""" batch_size = params["batch_size"] # For training, we want a lot of parallel reading and shuffling. # For eval, we want no shuffling and parallel reading doesn't matter. d = tf.data.TFRecordDataset(input_file) if is_training: d = d.repeat() d = d.shuffle(buffer_size=100) d = d.apply( contrib_data.map_and_batch( lambda record: _decode_record(record, names_to_features), batch_size=batch_size, drop_remainder=drop_remainder)) return d return input_fn
Example #27
Source File: input_fns.py From language with Apache License 2.0 | 5 votes |
def read_examples(self, queries_file, p=1.0): """Read a json file into a list of Example.""" with tf.gfile.Open(queries_file, "r") as reader: examples = [] for line in tqdm(reader): item = json.loads(line.strip()) qas_id = item["id"] relation = random.choice(item["relation"]["text"]) if item["subject"]["name"] is None or random.uniform(0., 1.) < p: question_text = ( random.choice(item["subject"]["mentions"])["text"] + " . " + relation) else: question_text = item["subject"]["name"] + " . " + relation answer_mention = item["object"]["global_mention"] answer_entity = item["object"]["ent_id"] example = Example( qas_id=qas_id, question_text=question_text, subject_entity=[item["subject"]["wikidata_id"]], relations=[relation], answer_mention=[answer_mention], answer_entity=[answer_entity]) examples.append(example) return examples
Example #28
Source File: input_fns.py From language with Apache License 2.0 | 5 votes |
def read_examples(self, queries_file, p=1.0): """Read a json file into a list of Example.""" with tf.gfile.Open(queries_file, "r") as reader: examples = [] for line in tqdm(reader): item = json.loads(line.strip()) qas_id = item["id"] relation_1 = random.choice(item["relation"][0]["text"]) relation_2 = random.choice(item["relation"][1]["text"]) if item["subject"]["name"] is None or random.uniform(0., 1.) < p: question_text = ( random.choice(item["subject"]["mentions"])["text"] + " . " + relation_1 + " . " + relation_2) else: question_text = ( item["subject"]["name"] + " . " + relation_1 + " . " + relation_2) answer_mention = item["object"]["global_mention"] answer_entity = item["object"]["ent_id"] bridge_mention = item["bridge"]["global_mention_1"] bridge_entity = [item["bridge"]["ent_id"]] example = Example( qas_id=qas_id, question_text=question_text, subject_entity=[item["subject"]["wikidata_id"]], relations=[relation_1, relation_2], answer_mention=[answer_mention], answer_entity=[answer_entity], bridge_mention=[bridge_mention], bridge_entity=[bridge_entity]) examples.append(example) return examples
Example #29
Source File: input_fns.py From language with Apache License 2.0 | 5 votes |
def read_examples(self, queries_file, p=1.0): """Read a json file into a list of Example.""" with tf.gfile.Open(queries_file, "r") as reader: examples = [] for line in tqdm(reader): item = json.loads(line.strip()) qas_id = item["id"] relation_1 = random.choice(item["relation"][0]["text"]) relation_2 = random.choice(item["relation"][1]["text"]) relation_3 = random.choice(item["relation"][2]["text"]) if item["subject"]["name"] is None or random.uniform(0., 1.) < p: question_text = ( random.choice(item["subject"]["mentions"])["text"] + " . " + relation_1 + " . " + relation_2 + " . " + relation_3) else: question_text = ( item["subject"]["name"] + " . " + relation_1 + " . " + relation_2 + " . " + relation_3) answer_mention = item["object"]["global_mention"] answer_entity = item["object"]["ent_id"] bridge_mention = item["bridge_0"]["global_mention_1"] bridge_entity = [item["bridge_%d" % ii]["ent_id"] for ii in range(2)] example = Example( qas_id=qas_id, question_text=question_text, subject_entity=[item["subject"]["wikidata_id"]], relations=[relation_1, relation_2, relation_3], answer_mention=[answer_mention], answer_entity=[answer_entity], bridge_mention=[bridge_mention], bridge_entity=[bridge_entity]) examples.append(example) return examples
Example #30
Source File: answer_extractor.py From language with Apache License 2.0 | 5 votes |
def process_feature(self, feature): """Write a InputFeature to the TFRecordWriter as a tf.train.Example.""" self.num_features += 1 def create_int_feature(values): feature = tf.train.Feature( int64_list=tf.train.Int64List(value=list(values))) return feature features = collections.OrderedDict() features["unique_ids"] = create_int_feature([feature.unique_id]) features["input_ids"] = create_int_feature(feature.input_ids) features["input_mask"] = create_int_feature(feature.input_mask) features["segment_ids"] = create_int_feature(feature.segment_ids) features["supporting_mask"] = create_int_feature(feature.supporting_mask) if self.is_training: features["start_positions"] = create_int_feature([feature.start_position]) features["end_positions"] = create_int_feature([feature.end_position]) impossible = 0 if feature.is_impossible: impossible = 1 features["is_impossible"] = create_int_feature([impossible]) features["question_type"] = create_int_feature([feature.question_type]) features["supporting_labels"] = create_int_feature( feature.supporting_labels) tf_example = tf.train.Example(features=tf.train.Features(feature=features)) self._writer.write(tf_example.SerializeToString())