Python preprocessing.make_tf_example() Examples

The following are 19 code examples of preprocessing.make_tf_example(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module preprocessing , or try the search function .
Example #1
Source File: test_preprocessing.py    From training with Apache License 2.0 6 votes vote down vote up
def test_tpu_rotate(self):
        num_records = 100
        raw_data = self.create_random_data(num_records)
        tfexamples = list(map(preprocessing.make_tf_example, *zip(*raw_data)))

        with tempfile.NamedTemporaryFile() as f:
            preprocessing.write_tf_examples(f.name, tfexamples)

            self.reset_random()
            run_one = self.extract_tpu_data(f.name, random_rotation=False)

            self.reset_random()
            run_two = self.extract_tpu_data(f.name, random_rotation=True)

            self.reset_random()
            run_three = self.extract_tpu_data(f.name, random_rotation=True)

        self.assert_rotate_data(run_one, run_two, run_three) 
Example #2
Source File: test_preprocessing.py    From training with Apache License 2.0 6 votes vote down vote up
def test_rotate_pyfunc(self):
        num_records = 20
        raw_data = self.create_random_data(num_records)
        tfexamples = list(map(preprocessing.make_tf_example, *zip(*raw_data)))

        with tempfile.NamedTemporaryFile() as f:
            preprocessing.write_tf_examples(f.name, tfexamples)

            self.reset_random()
            run_one = self.extract_data(f.name, random_rotation=False)

            self.reset_random()
            run_two = self.extract_data(f.name, random_rotation=True)

            self.reset_random()
            run_three = self.extract_data(f.name, random_rotation=True)

        self.assert_rotate_data(run_one, run_two, run_three) 
Example #3
Source File: test_preprocessing.py    From training with Apache License 2.0 5 votes vote down vote up
def test_serialize_round_trip(self):
        np.random.seed(1)
        raw_data = self.create_random_data(10)
        tfexamples = list(map(preprocessing.make_tf_example, *zip(*raw_data)))

        with tempfile.NamedTemporaryFile() as f:
            preprocessing.write_tf_examples(f.name, tfexamples)
            recovered_data = self.extract_data(f.name)

        self.assertEqualData(raw_data, recovered_data) 
Example #4
Source File: preprocessing_test.py    From multilabel-image-classification-tensorflow with MIT License 5 votes vote down vote up
def test_serialize_round_trip_no_parse(self):
    np.random.seed(1)
    raw_data = self.create_random_data(10)
    tfexamples = list(map(preprocessing.make_tf_example, *zip(*raw_data)))

    with tempfile.NamedTemporaryFile() as start_file, \
        tempfile.NamedTemporaryFile() as rewritten_file:
      preprocessing.write_tf_examples(start_file.name, tfexamples)
      # We want to test that the rewritten, shuffled file contains correctly
      # serialized tf.Examples.
      batch_size = 4
      batches = list(preprocessing.shuffle_tf_examples(
          1000, batch_size, [start_file.name]))
      # 2 batches of 4, 1 incomplete batch of 2.
      self.assertEqual(len(batches), 3)

      # concatenate list of lists into one list
      all_batches = list(itertools.chain.from_iterable(batches))

      for _ in batches:
        preprocessing.write_tf_examples(
            rewritten_file.name, all_batches, serialize=False)

      original_data = self.extract_data(start_file.name)
      recovered_data = self.extract_data(rewritten_file.name)

    # stuff is shuffled, so sort before checking equality
    def sort_key(nparray_tuple):
      return nparray_tuple[2]
    original_data = sorted(original_data, key=sort_key)
    recovered_data = sorted(recovered_data, key=sort_key)

    self.assertEqualData(original_data, recovered_data) 
Example #5
Source File: preprocessing_test.py    From multilabel-image-classification-tensorflow with MIT License 5 votes vote down vote up
def test_filter(self):
    raw_data = self.create_random_data(100)
    tfexamples = list(map(preprocessing.make_tf_example, *zip(*raw_data)))

    with tempfile.NamedTemporaryFile() as f:
      preprocessing.write_tf_examples(f.name, tfexamples)
      recovered_data = self.extract_data(f.name, filter_amount=.05)

    self.assertLess(len(recovered_data), 50) 
Example #6
Source File: preprocessing_test.py    From multilabel-image-classification-tensorflow with MIT License 5 votes vote down vote up
def test_serialize_round_trip(self):
    np.random.seed(1)
    raw_data = self.create_random_data(10)
    tfexamples = list(map(preprocessing.make_tf_example, *zip(*raw_data)))

    with tempfile.NamedTemporaryFile() as f:
      preprocessing.write_tf_examples(f.name, tfexamples)
      recovered_data = self.extract_data(f.name)

    self.assertEqualData(raw_data, recovered_data) 
Example #7
Source File: preprocessing_test.py    From g-tensorflow-models with Apache License 2.0 5 votes vote down vote up
def test_serialize_round_trip_no_parse(self):
    np.random.seed(1)
    raw_data = self.create_random_data(10)
    tfexamples = list(map(preprocessing.make_tf_example, *zip(*raw_data)))

    with tempfile.NamedTemporaryFile() as start_file, \
        tempfile.NamedTemporaryFile() as rewritten_file:
      preprocessing.write_tf_examples(start_file.name, tfexamples)
      # We want to test that the rewritten, shuffled file contains correctly
      # serialized tf.Examples.
      batch_size = 4
      batches = list(preprocessing.shuffle_tf_examples(
          1000, batch_size, [start_file.name]))
      # 2 batches of 4, 1 incomplete batch of 2.
      self.assertEqual(len(batches), 3)

      # concatenate list of lists into one list
      all_batches = list(itertools.chain.from_iterable(batches))

      for _ in batches:
        preprocessing.write_tf_examples(
            rewritten_file.name, all_batches, serialize=False)

      original_data = self.extract_data(start_file.name)
      recovered_data = self.extract_data(rewritten_file.name)

    # stuff is shuffled, so sort before checking equality
    def sort_key(nparray_tuple):
      return nparray_tuple[2]
    original_data = sorted(original_data, key=sort_key)
    recovered_data = sorted(recovered_data, key=sort_key)

    self.assertEqualData(original_data, recovered_data) 
Example #8
Source File: preprocessing_test.py    From g-tensorflow-models with Apache License 2.0 5 votes vote down vote up
def test_filter(self):
    raw_data = self.create_random_data(100)
    tfexamples = list(map(preprocessing.make_tf_example, *zip(*raw_data)))

    with tempfile.NamedTemporaryFile() as f:
      preprocessing.write_tf_examples(f.name, tfexamples)
      recovered_data = self.extract_data(f.name, filter_amount=.05)

    self.assertLess(len(recovered_data), 50) 
Example #9
Source File: preprocessing_test.py    From g-tensorflow-models with Apache License 2.0 5 votes vote down vote up
def test_serialize_round_trip(self):
    np.random.seed(1)
    raw_data = self.create_random_data(10)
    tfexamples = list(map(preprocessing.make_tf_example, *zip(*raw_data)))

    with tempfile.NamedTemporaryFile() as f:
      preprocessing.write_tf_examples(f.name, tfexamples)
      recovered_data = self.extract_data(f.name)

    self.assertEqualData(raw_data, recovered_data) 
Example #10
Source File: test_preprocessing.py    From training with Apache License 2.0 5 votes vote down vote up
def test_filter(self):
        raw_data = self.create_random_data(100)
        tfexamples = list(map(preprocessing.make_tf_example, *zip(*raw_data)))

        with tempfile.NamedTemporaryFile() as f:
            preprocessing.write_tf_examples(f.name, tfexamples)
            recovered_data = self.extract_data(f.name, filter_amount=.05)

        # TODO: this will flake out very infrequently.  Use set_random_seed
        self.assertLess(len(recovered_data), 50) 
Example #11
Source File: test_preprocessing.py    From training_results_v0.5 with Apache License 2.0 5 votes vote down vote up
def test_serialize_round_trip(self):
        np.random.seed(1)
        raw_data = self.create_random_data(10)
        tfexamples = list(map(preprocessing.make_tf_example, *zip(*raw_data)))

        with tempfile.NamedTemporaryFile() as f:
            preprocessing.write_tf_examples(f.name, tfexamples)
            recovered_data = self.extract_data(f.name)

        self.assertEqualData(raw_data, recovered_data) 
Example #12
Source File: preprocessing_test.py    From Gun-Detector with Apache License 2.0 5 votes vote down vote up
def test_serialize_round_trip_no_parse(self):
    np.random.seed(1)
    raw_data = self.create_random_data(10)
    tfexamples = list(map(preprocessing.make_tf_example, *zip(*raw_data)))

    with tempfile.NamedTemporaryFile() as start_file, \
        tempfile.NamedTemporaryFile() as rewritten_file:
      preprocessing.write_tf_examples(start_file.name, tfexamples)
      # We want to test that the rewritten, shuffled file contains correctly
      # serialized tf.Examples.
      batch_size = 4
      batches = list(preprocessing.shuffle_tf_examples(
          1000, batch_size, [start_file.name]))
      # 2 batches of 4, 1 incomplete batch of 2.
      self.assertEqual(len(batches), 3)

      # concatenate list of lists into one list
      all_batches = list(itertools.chain.from_iterable(batches))

      for _ in batches:
        preprocessing.write_tf_examples(
            rewritten_file.name, all_batches, serialize=False)

      original_data = self.extract_data(start_file.name)
      recovered_data = self.extract_data(rewritten_file.name)

    # stuff is shuffled, so sort before checking equality
    def sort_key(nparray_tuple):
      return nparray_tuple[2]
    original_data = sorted(original_data, key=sort_key)
    recovered_data = sorted(recovered_data, key=sort_key)

    self.assertEqualData(original_data, recovered_data) 
Example #13
Source File: preprocessing_test.py    From Gun-Detector with Apache License 2.0 5 votes vote down vote up
def test_filter(self):
    raw_data = self.create_random_data(100)
    tfexamples = list(map(preprocessing.make_tf_example, *zip(*raw_data)))

    with tempfile.NamedTemporaryFile() as f:
      preprocessing.write_tf_examples(f.name, tfexamples)
      recovered_data = self.extract_data(f.name, filter_amount=.05)

    self.assertLess(len(recovered_data), 50) 
Example #14
Source File: preprocessing_test.py    From Gun-Detector with Apache License 2.0 5 votes vote down vote up
def test_serialize_round_trip(self):
    np.random.seed(1)
    raw_data = self.create_random_data(10)
    tfexamples = list(map(preprocessing.make_tf_example, *zip(*raw_data)))

    with tempfile.NamedTemporaryFile() as f:
      preprocessing.write_tf_examples(f.name, tfexamples)
      recovered_data = self.extract_data(f.name)

    self.assertEqualData(raw_data, recovered_data) 
Example #15
Source File: test_preprocessing.py    From training_results_v0.5 with Apache License 2.0 5 votes vote down vote up
def test_filter(self):
        raw_data = self.create_random_data(100)
        tfexamples = list(map(preprocessing.make_tf_example, *zip(*raw_data)))

        with tempfile.NamedTemporaryFile() as f:
            preprocessing.write_tf_examples(f.name, tfexamples)
            recovered_data = self.extract_data(f.name, filter_amount=.05)

        # TODO: this will flake out very infrequently.  Use set_random_seed
        self.assertLess(len(recovered_data), 50) 
Example #16
Source File: test_preprocessing.py    From training_results_v0.5 with Apache License 2.0 5 votes vote down vote up
def test_serialize_round_trip(self):
        np.random.seed(1)
        raw_data = self.create_random_data(10)
        tfexamples = list(map(preprocessing.make_tf_example, *zip(*raw_data)))

        with tempfile.NamedTemporaryFile() as f:
            preprocessing.write_tf_examples(f.name, tfexamples)
            recovered_data = self.extract_data(f.name)

        self.assertEqualData(raw_data, recovered_data) 
Example #17
Source File: test_preprocessing.py    From training_results_v0.5 with Apache License 2.0 5 votes vote down vote up
def test_serialize_round_trip_no_parse(self):
        np.random.seed(1)
        raw_data = self.create_random_data(10)
        tfexamples = list(map(preprocessing.make_tf_example, *zip(*raw_data)))

        with tempfile.NamedTemporaryFile() as start_file, \
                tempfile.NamedTemporaryFile() as rewritten_file:
            preprocessing.write_tf_examples(start_file.name, tfexamples)
            # We want to test that the rewritten, shuffled file contains correctly
            # serialized tf.Examples.
            batch_size = 4
            batches = list(preprocessing.shuffle_tf_examples(
                batch_size, [start_file.name]))
            # 2 batches of 4, 1 incomplete batch of 2.
            self.assertEqual(len(batches), 3)

            # concatenate list of lists into one list
            all_batches = list(itertools.chain.from_iterable(batches))

            for batch in batches:
                preprocessing.write_tf_examples(
                    rewritten_file.name, all_batches, serialize=False)

            original_data = self.extract_data(start_file.name)
            recovered_data = self.extract_data(rewritten_file.name)

        # stuff is shuffled, so sort before checking equality
        def sort_key(nparray_tuple): return nparray_tuple[2]
        original_data = sorted(original_data, key=sort_key)
        recovered_data = sorted(recovered_data, key=sort_key)

        self.assertEqualData(original_data, recovered_data) 
Example #18
Source File: test_preprocessing.py    From training_results_v0.5 with Apache License 2.0 5 votes vote down vote up
def test_filter(self):
        raw_data = self.create_random_data(100)
        tfexamples = list(map(preprocessing.make_tf_example, *zip(*raw_data)))

        with tempfile.NamedTemporaryFile() as f:
            preprocessing.write_tf_examples(f.name, tfexamples)
            recovered_data = self.extract_data(f.name, filter_amount=.05)

        # TODO: this will flake out very infrequently.  Use set_random_seed
        self.assertLess(len(recovered_data), 50) 
Example #19
Source File: rotate_examples.py    From training with Apache License 2.0 4 votes vote down vote up
def convert(paths):
    position, in_path, out_path = paths
    assert tf.gfile.Exists(in_path)
    assert tf.gfile.Exists(os.path.dirname(out_path))

    in_size = get_size(in_path)
    if tf.gfile.Exists(out_path):
        # Make sure out_path is about the size of in_path
        size = get_size(out_path)
        error = (size - in_size) / (in_size + 1)
        # 5% smaller to 20% larger
        if -0.05 < error < 0.20:
            return out_path + " already existed"
        return "ERROR on file size ({:.1f}% diff) {}".format(
            100 * error, out_path)

    num_batches = dual_net.EXAMPLES_PER_GENERATION // FLAGS.batch_size + 1

    with tf.python_io.TFRecordWriter(out_path, OPTS) as writer:
        record_iter = tqdm(
            batched_reader(in_path),
            desc=os.path.basename(in_path),
            position=position,
            total=num_batches)
        for record in record_iter:
            xs, rs = preprocessing.batch_parse_tf_example(len(record), record)
            # Undo cast in batch_parse_tf_example.
            xs = tf.cast(xs, tf.uint8)

            # map the rotation function.
            x_rot, r_rot = preprocessing._random_rotation(xs, rs)

            with tf.Session() as sess:
                x_rot, r_rot = sess.run([x_rot, r_rot])
            tf.reset_default_graph()

            pi_rot = r_rot['pi_tensor']
            val_rot = r_rot['value_tensor']
            for r, x, pi, val in zip(record, x_rot, pi_rot, val_rot):
                record_out = preprocessing.make_tf_example(x, pi, val)
                serialized = record_out.SerializeToString()
                writer.write(serialized)
                assert len(r) == len(serialized), (len(r), len(serialized))