Python tensorflow.python.lib.io.file_io.write_string_to_file() Examples

The following are 30 code examples of tensorflow.python.lib.io.file_io.write_string_to_file(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module tensorflow.python.lib.io.file_io , or try the search function .
Example #1
Source File: saver.py    From lingvo with Apache License 2.0 6 votes vote down vote up
def _DoSanityCheck(self, prefix):
    """Sanity-check the content of the checkpoint."""
    if not self._sanity_checks:
      return
    reader = tf.train.NewCheckpointReader(prefix)
    content = {}
    for variables, rule in self._sanity_checks:
      args = []
      for v in variables:
        key = _VarKey(v)
        if key in content:
          args.append(content[key])
        else:
          value = reader.get_tensor(key)
          content[key] = value
          args.append(value)
      if not rule.Check(*args):
        # TODO(zhifengc): Maybe should return an explicit signal
        # so that the caller (the controller loop) can Restore()
        # the latest checkpoint before raise the error.
        msg = "Checkpoint sanity check failed: {} {} {}\n".format(
            prefix, ",".join([_VarKey(v) for v in variables]), rule)
        # Also saves the error messge into a file.
        file_io.write_string_to_file("{}.failed".format(prefix), msg)
        raise tf.errors.AbortedError(None, None, msg) 
Example #2
Source File: saved_model_half_plus_two.py    From deep_image_model with Apache License 2.0 6 votes vote down vote up
def _write_assets(assets_directory, assets_filename):
  """Writes asset files to be used with SavedModel for half plus two.

  Args:
    assets_directory: The directory to which the assets should be written.
    assets_filename: Name of the file to which the asset contents should be
        written.

  Returns:
    The path to which the assets file was written.
  """
  if not file_io.file_exists(assets_directory):
    file_io.recursive_create_dir(assets_directory)

  path = os.path.join(
      compat.as_bytes(assets_directory), compat.as_bytes(assets_filename))
  file_io.write_string_to_file(path, "asset-file-contents")
  return path 
Example #3
Source File: chicago_taxi_client.py    From code-snippets with Apache License 2.0 6 votes vote down vote up
def _do_mlengine_inference(model, version, serialized_examples):
  """Performs inference on the model:version in CMLE."""
  working_dir = tempfile.mkdtemp()
  instances_file = os.path.join(working_dir, 'test.json')
  json_examples = []
  for serialized_example in serialized_examples:
    # The encoding follows the example in:
    # https://github.com/GoogleCloudPlatform/training-data-analyst/blob/master/quests/tpu/invoke_model.py
    json_examples.append(
        '{ "inputs": { "b64": "%s" } }' % base64.b64encode(serialized_example))
  # print('\n'.join(json_examples))
  file_io.write_string_to_file(instances_file, '\n'.join(json_examples))
  gcloud_command = [
      'gcloud', 'ml-engine', 'predict', '--model', model, '--version', version,
      '--json-instances', instances_file
  ]
  print(subprocess.check_output(gcloud_command)) 
Example #4
Source File: saved_model_half_plus_two.py    From jetson with MIT License 6 votes vote down vote up
def _write_assets(assets_directory, assets_filename):
  """Writes asset files to be used with SavedModel for half plus two.

  Args:
    assets_directory: The directory to which the assets should be written.
    assets_filename: Name of the file to which the asset contents should be
        written.

  Returns:
    The path to which the assets file was written.
  """
  if not file_io.file_exists(assets_directory):
    file_io.recursive_create_dir(assets_directory)

  path = os.path.join(
      tf.compat.as_bytes(assets_directory), tf.compat.as_bytes(assets_filename))
  file_io.write_string_to_file(path, "asset-file-contents")
  return path 
Example #5
Source File: _job.py    From tensorfx with Apache License 2.0 6 votes vote down vote up
def start(self):
    """Performs startup logic, including building graphs.
    """
    if self._config.master:
      # Save out job information for later reference alongside all other outputs.
      job_args = ' '.join(self._model_builder.args._args).replace(' --', '\n--').split('\n')
      job_info = {
        'config': self._config._env,
        'args': job_args
      }
      job_spec = yaml.safe_dump(job_info, default_flow_style=False)
      job_file = os.path.join(self._output, 'job.yaml')

      tfio.recursive_create_dir(self._output)
      tfio.write_string_to_file(job_file, job_spec)

      # Create a checkpoints directory. This is needed to ensure checkpoint restoration logic
      # can lookup an existing directory.
      tfio.recursive_create_dir(self.checkpoints_path)

    # Build the graphs that will be used during the course of the job.
    self._training, self._evaluation, self._prediction = \
      self._model_builder.build_graph_interfaces(self._inputs, self._config) 
Example #6
Source File: __init__.py    From auto-alt-text-lambda-api with MIT License 6 votes vote down vote up
def visualize_embeddings(summary_writer, config):
  """Stores a config file used by the embedding projector.

  Args:
    summary_writer: The summary writer used for writting events.
    config: `tf.contrib.tensorboard.plugins.projector.ProjectorConfig`
      proto that holds the configuration for the projector such as paths to
      checkpoint files and metadata files for the embeddings. If
      `config.model_checkpoint_path` is none, it defaults to the
      `logdir` used by the summary_writer.

  Raises:
    ValueError: If the summary writer does not have a `logdir`.
  """
  logdir = summary_writer.get_logdir()

  # Sanity checks.
  if logdir is None:
    raise ValueError('Summary writer must have a logdir')

  # Saving the config file in the logdir.
  config_pbtxt = text_format.MessageToString(config)
  file_io.write_string_to_file(
      os.path.join(logdir, PROJECTOR_FILENAME), config_pbtxt) 
Example #7
Source File: __init__.py    From keras-lambda with MIT License 6 votes vote down vote up
def visualize_embeddings(summary_writer, config):
  """Stores a config file used by the embedding projector.

  Args:
    summary_writer: The summary writer used for writting events.
    config: `tf.contrib.tensorboard.plugins.projector.ProjectorConfig`
      proto that holds the configuration for the projector such as paths to
      checkpoint files and metadata files for the embeddings. If
      `config.model_checkpoint_path` is none, it defaults to the
      `logdir` used by the summary_writer.

  Raises:
    ValueError: If the summary writer does not have a `logdir`.
  """
  logdir = summary_writer.get_logdir()

  # Sanity checks.
  if logdir is None:
    raise ValueError('Summary writer must have a logdir')

  # Saving the config file in the logdir.
  config_pbtxt = text_format.MessageToString(config)
  file_io.write_string_to_file(
      os.path.join(logdir, PROJECTOR_FILENAME), config_pbtxt) 
Example #8
Source File: __init__.py    From lambda-packs with MIT License 6 votes vote down vote up
def visualize_embeddings(summary_writer, config):
  """Stores a config file used by the embedding projector.

  Args:
    summary_writer: The summary writer used for writting events.
    config: `tf.contrib.tensorboard.plugins.projector.ProjectorConfig`
      proto that holds the configuration for the projector such as paths to
      checkpoint files and metadata files for the embeddings. If
      `config.model_checkpoint_path` is none, it defaults to the
      `logdir` used by the summary_writer.

  Raises:
    ValueError: If the summary writer does not have a `logdir`.
  """
  logdir = summary_writer.get_logdir()

  # Sanity checks.
  if logdir is None:
    raise ValueError('Summary writer must have a logdir')

  # Saving the config file in the logdir.
  config_pbtxt = text_format.MessageToString(config)
  file_io.write_string_to_file(
      os.path.join(logdir, projector_plugin.PROJECTOR_FILENAME), config_pbtxt) 
Example #9
Source File: saved_model_test.py    From keras-lambda with MIT License 5 votes vote down vote up
def _build_asset_collection(self, asset_file_name, asset_file_contents,
                              asset_file_tensor_name):
    asset_filepath = os.path.join(
        compat.as_bytes(test.get_temp_dir()), compat.as_bytes(asset_file_name))
    file_io.write_string_to_file(asset_filepath, asset_file_contents)
    asset_file_tensor = constant_op.constant(
        asset_filepath, name=asset_file_tensor_name)
    ops.add_to_collection(ops.GraphKeys.ASSET_FILEPATHS, asset_file_tensor)
    asset_collection = ops.get_collection(ops.GraphKeys.ASSET_FILEPATHS)
    return asset_collection 
Example #10
Source File: saved_model_test.py    From keras-lambda with MIT License 5 votes vote down vote up
def testAssets(self):
    export_dir = os.path.join(test.get_temp_dir(), "test_assets")
    builder = saved_model_builder.SavedModelBuilder(export_dir)

    with self.test_session(graph=ops.Graph()) as sess:
      self._init_and_validate_variable(sess, "v", 42)

      # Build an asset collection.
      ignored_filepath = os.path.join(
          compat.as_bytes(test.get_temp_dir()), compat.as_bytes("ignored.txt"))
      file_io.write_string_to_file(ignored_filepath, "will be ignored")

      asset_collection = self._build_asset_collection("hello42.txt",
                                                      "foo bar baz",
                                                      "asset_file_tensor")

      builder.add_meta_graph_and_variables(
          sess, ["foo"], assets_collection=asset_collection)

    # Save the SavedModel to disk.
    builder.save()

    with self.test_session(graph=ops.Graph()) as sess:
      foo_graph = loader.load(sess, ["foo"], export_dir)
      self._validate_asset_collection(export_dir, foo_graph.collection_def,
                                      "hello42.txt", "foo bar baz",
                                      "asset_file_tensor:0")
      ignored_asset_path = os.path.join(
          compat.as_bytes(export_dir),
          compat.as_bytes(constants.ASSETS_DIRECTORY),
          compat.as_bytes("ignored.txt"))
      self.assertFalse(file_io.file_exists(ignored_asset_path)) 
Example #11
Source File: builder_impl.py    From keras-lambda with MIT License 5 votes vote down vote up
def save(self, as_text=False):
    """Writes a `SavedModel` protocol buffer to disk.

    The function writes the SavedModel protocol buffer to the export directory
    in serialized format.

    Args:
      as_text: Writes the SavedModel protocol buffer in text format to disk.

    Returns:
      The path to which the SavedModel protocol buffer was written.
    """
    if not file_io.file_exists(self._export_dir):
      file_io.recursive_create_dir(self._export_dir)

    if as_text:
      path = os.path.join(
          compat.as_bytes(self._export_dir),
          compat.as_bytes(constants.SAVED_MODEL_FILENAME_PBTXT))
      file_io.write_string_to_file(path, str(self._saved_model))
    else:
      path = os.path.join(
          compat.as_bytes(self._export_dir),
          compat.as_bytes(constants.SAVED_MODEL_FILENAME_PB))
      file_io.write_string_to_file(path, self._saved_model.SerializeToString())
    tf_logging.info("SavedModel written to: %s", path)

    return path 
Example #12
Source File: builder_impl.py    From Serverless-Deep-Learning-with-TensorFlow-and-AWS-Lambda with MIT License 5 votes vote down vote up
def save(self, as_text=False):
    """Writes a `SavedModel` protocol buffer to disk.

    The function writes the SavedModel protocol buffer to the export directory
    in serialized format.

    Args:
      as_text: Writes the SavedModel protocol buffer in text format to disk.

    Returns:
      The path to which the SavedModel protocol buffer was written.
    """
    if not file_io.file_exists(self._export_dir):
      file_io.recursive_create_dir(self._export_dir)

    if as_text:
      path = os.path.join(
          compat.as_bytes(self._export_dir),
          compat.as_bytes(constants.SAVED_MODEL_FILENAME_PBTXT))
      file_io.write_string_to_file(path, str(self._saved_model))
    else:
      path = os.path.join(
          compat.as_bytes(self._export_dir),
          compat.as_bytes(constants.SAVED_MODEL_FILENAME_PB))
      file_io.write_string_to_file(path, self._saved_model.SerializeToString())
    tf_logging.info("SavedModel written to: %s", path)

    return path 
Example #13
Source File: metadata_io_test.py    From transform with Apache License 2.0 5 votes vote down vote up
def _write_schema_to_disk(self, basedir, schema_string):
    version_basedir = os.path.join(basedir, 'v1-json')

    # Write a proto by hand to disk
    file_io.recursive_create_dir(version_basedir)
    file_io.write_string_to_file(os.path.join(version_basedir, 'schema.json'),
                                 schema_string) 
Example #14
Source File: omr.py    From moonlight with Apache License 2.0 5 votes vote down vote up
def main(argv):
  if FLAGS.output_type not in VALID_OUTPUT_TYPES:
    raise ValueError('output_type "%s" not in allowed types: %s' %
                     (FLAGS.output_type, VALID_OUTPUT_TYPES))

  # Exclude argv[0], which is the current binary.
  patterns = argv[1:]
  if not patterns:
    raise ValueError('PNG file glob(s) must be specified')
  input_paths = []
  for pattern in patterns:
    pattern_paths = file_io.get_matching_files(pattern)
    if not pattern_paths:
      raise ValueError('Pattern "%s" failed to match any files' % pattern)
    input_paths.extend(pattern_paths)

  start = time.time()
  output = run(
      input_paths,
      FLAGS.glyphs_saved_model,
      output_notesequence=FLAGS.output_type == 'NoteSequence')
  end = time.time()
  sys.stderr.write('OMR elapsed time: %.2f\n' % (end - start))

  if FLAGS.output_type == 'MusicXML':
    output_bytes = conversions.score_to_musicxml(output)
  else:
    if FLAGS.text_format:
      output_bytes = text_format.MessageToString(output).encode('utf-8')
    else:
      output_bytes = output.SerializeToString()
  file_io.write_string_to_file(FLAGS.output, output_bytes) 
Example #15
Source File: gcs_smoke.py    From deep_image_model with Apache License 2.0 5 votes vote down vote up
def create_object_test():
  """Verifies file_io's object manipulation methods ."""
  starttime = int(round(time.time() * 1000))
  dir_name = "%s/tf_gcs_test_%s" % (FLAGS.gcs_bucket_url, starttime)
  print("Creating dir %s." % dir_name)
  file_io.create_dir(dir_name)

  # Create a file in this directory.
  file_name = "%s/test_file.txt" % dir_name
  print("Creating file %s." % file_name)
  file_io.write_string_to_file(file_name, "test file creation.")

  list_files_pattern = "%s/test_file*.txt" % dir_name
  print("Getting files matching pattern %s." % list_files_pattern)
  files_list = file_io.get_matching_files(list_files_pattern)
  print(files_list)

  assert len(files_list) == 1
  assert files_list[0] == file_name

  # Cleanup test files.
  print("Deleting file %s." % file_name)
  file_io.delete_file(file_name)

  # Delete directory.
  print("Deleting directory %s." % dir_name)
  file_io.delete_recursively(dir_name) 
Example #16
Source File: saved_model_test.py    From deep_image_model with Apache License 2.0 5 votes vote down vote up
def testAssets(self):
    export_dir = os.path.join(tf.test.get_temp_dir(), "test_assets")
    builder = saved_model_builder.SavedModelBuilder(export_dir)

    with self.test_session(graph=tf.Graph()) as sess:
      self._init_and_validate_variable(sess, "v", 42)

      # Build an asset collection.
      ignored_filepath = os.path.join(
          compat.as_bytes(tf.test.get_temp_dir()),
          compat.as_bytes("ignored.txt"))
      file_io.write_string_to_file(ignored_filepath, "will be ignored")

      asset_collection = self._build_asset_collection("hello42.txt",
                                                      "foo bar baz",
                                                      "asset_file_tensor")

      builder.add_meta_graph_and_variables(
          sess, ["foo"], assets_collection=asset_collection)

    # Save the SavedModel to disk.
    builder.save()

    with self.test_session(graph=tf.Graph()) as sess:
      foo_graph = loader.load(sess, ["foo"], export_dir)
      self._validate_asset_collection(export_dir, foo_graph.collection_def,
                                      "hello42.txt", "foo bar baz",
                                      "asset_file_tensor:0")
      ignored_asset_path = os.path.join(
          compat.as_bytes(export_dir),
          compat.as_bytes(constants.ASSETS_DIRECTORY),
          compat.as_bytes("ignored.txt"))
      self.assertFalse(file_io.file_exists(ignored_asset_path)) 
Example #17
Source File: saved_model_test.py    From deep_image_model with Apache License 2.0 5 votes vote down vote up
def _build_asset_collection(self, asset_file_name, asset_file_contents,
                              asset_file_tensor_name):
    asset_filepath = os.path.join(
        compat.as_bytes(tf.test.get_temp_dir()),
        compat.as_bytes(asset_file_name))
    file_io.write_string_to_file(asset_filepath, asset_file_contents)
    asset_file_tensor = tf.constant(asset_filepath, name=asset_file_tensor_name)
    tf.add_to_collection(tf.GraphKeys.ASSET_FILEPATHS, asset_file_tensor)
    asset_collection = tf.get_collection(tf.GraphKeys.ASSET_FILEPATHS)
    return asset_collection 
Example #18
Source File: builder.py    From deep_image_model with Apache License 2.0 5 votes vote down vote up
def save(self, as_text=False):
    """Writes a `SavedModel` protocol buffer to disk.

    The function writes the SavedModel protocol buffer to the export directory
    in serialized format.

    Args:
      as_text: Writes the SavedModel protocol buffer in text format to disk.

    Returns:
      The path to which the SavedModel protocol buffer was written.
    """
    if not file_io.file_exists(self._export_dir):
      file_io.recursive_create_dir(self._export_dir)

    if as_text:
      path = os.path.join(
          compat.as_bytes(self._export_dir),
          compat.as_bytes(constants.SAVED_MODEL_FILENAME_PBTXT))
      file_io.write_string_to_file(path, str(self._saved_model))
    else:
      path = os.path.join(
          compat.as_bytes(self._export_dir),
          compat.as_bytes(constants.SAVED_MODEL_FILENAME_PB))
      file_io.write_string_to_file(path, self._saved_model.SerializeToString())
    tf_logging.info("SavedModel written to: %s", path)

    return path 
Example #19
Source File: builder_impl.py    From lambda-packs with MIT License 5 votes vote down vote up
def save(self, as_text=False):
    """Writes a `SavedModel` protocol buffer to disk.

    The function writes the SavedModel protocol buffer to the export directory
    in serialized format.

    Args:
      as_text: Writes the SavedModel protocol buffer in text format to disk.

    Returns:
      The path to which the SavedModel protocol buffer was written.
    """
    if not file_io.file_exists(self._export_dir):
      file_io.recursive_create_dir(self._export_dir)

    if as_text:
      path = os.path.join(
          compat.as_bytes(self._export_dir),
          compat.as_bytes(constants.SAVED_MODEL_FILENAME_PBTXT))
      file_io.write_string_to_file(path, str(self._saved_model))
    else:
      path = os.path.join(
          compat.as_bytes(self._export_dir),
          compat.as_bytes(constants.SAVED_MODEL_FILENAME_PB))
      file_io.write_string_to_file(path, self._saved_model.SerializeToString())
    tf_logging.info("SavedModel written to: %s", path)

    return path 
Example #20
Source File: test_feature_transforms.py    From pydatalab with Apache License 2.0 5 votes vote down vote up
def test_make_transform_graph_images(self):

    print('Testing make_transform_graph with image_to_vec. ' +
          'It may take a few minutes because it needs to download a large inception checkpoint.')

    def _open_and_encode_image(img_url):
      with file_io.FileIO(img_url, 'r') as f:
        img = Image.open(f).convert('RGB')
        output = cStringIO.StringIO()
        img.save(output, 'jpeg')
      return base64.urlsafe_b64encode(output.getvalue())

    try:
      output_folder = tempfile.mkdtemp()
      stats_file_path = os.path.join(output_folder, feature_transforms.STATS_FILE)
      stats = {'column_stats': {}}
      file_io.write_string_to_file(stats_file_path, json.dumps(stats))

      schema = [{'name': 'img', 'type': 'STRING'}]
      features = {'img': {'transform': 'image_to_vec', 'source_column': 'img'}}

      # Test transformation with encoded image content.
      img_string1 = _open_and_encode_image(
          'gs://cloud-ml-data/img/flower_photos/daisy/15207766_fc2f1d692c_n.jpg')
      img_string2 = _open_and_encode_image(
          'gs://cloud-ml-data/img/flower_photos/dandelion/8980164828_04fbf64f79_n.jpg')
      # Test transformation with direct file path.
      img_string3 = 'gs://cloud-ml-data/img/flower_photos/daisy/15207766_fc2f1d692c_n.jpg'
      img_string4 = 'gs://cloud-ml-data/img/flower_photos/dandelion/8980164828_04fbf64f79_n.jpg'
      input_data = [img_string1, img_string2, img_string3, img_string4]
      results = self._run_graph(output_folder, features, schema, stats, input_data)
      embeddings = results['img']
      self.assertEqual(len(embeddings), 4)
      self.assertEqual(len(embeddings[0]), 2048)
      self.assertEqual(embeddings[0].dtype, np.float32)
      self.assertTrue(any(x != 0.0 for x in embeddings[1]))
      self.assertTrue(any(x != 0.0 for x in embeddings[3]))

    finally:
      shutil.rmtree(output_folder) 
Example #21
Source File: test_feature_transforms.py    From pydatalab with Apache License 2.0 5 votes vote down vote up
def test_make_transform_graph_category(self):
    output_folder = tempfile.mkdtemp()
    try:
      file_io.write_string_to_file(
          os.path.join(output_folder, feature_transforms.VOCAB_ANALYSIS_FILE % 'cat1'),
          '\n'.join(['red,300', 'blue,200', 'green,100']))

      file_io.write_string_to_file(
          os.path.join(output_folder, feature_transforms.VOCAB_ANALYSIS_FILE % 'cat2'),
          '\n'.join(['pizza,300', 'ice_cream,200', 'cookies,100']))

      stats = {'column_stats': {}}  # stats file needed but unused.
      file_io.write_string_to_file(
          os.path.join(output_folder, feature_transforms.STATS_FILE),
          json.dumps(stats))

      schema = [{'name': 'cat1', 'type': 'STRING'}, {'name': 'cat2', 'type': 'STRING'}]
      features = {'cat1': {'transform': 'one_hot', 'source_column': 'cat1'},
                  'cat2': {'transform': 'embedding', 'source_column': 'cat2'}}
      input_data = ['red,pizza',
                    'blue,',
                    'green,extra']

      results = self._run_graph(output_folder, features, schema, stats, input_data)

      for result, expected_result in zip(results['cat1'].flatten().tolist(), [0, 1, 2]):
        self.assertEqual(result, expected_result)

      for result, expected_result in zip(results['cat2'].flatten().tolist(),
                                         [0, 3, 3]):
        self.assertEqual(result, expected_result)
    finally:
      shutil.rmtree(output_folder) 
Example #22
Source File: test_feature_transforms.py    From pydatalab with Apache License 2.0 5 votes vote down vote up
def test_make_transform_graph_numerics(self):
    output_folder = tempfile.mkdtemp()
    stats_file_path = os.path.join(output_folder, feature_transforms.STATS_FILE)
    try:
      stats = {'column_stats':
                {'num1': {'max': 10.0, 'mean': 9.5, 'min': 0.0},  # noqa
                 'num2': {'max': 1.0, 'mean': 2.0, 'min': -1.0},
                 'num3': {'max': 10.0, 'mean': 2.0, 'min': 5.0}}}
      schema = [{'name': 'num1', 'type': 'FLOAT'},
                {'name': 'num2', 'type': 'FLOAT'},
                {'name': 'num3', 'type': 'INTEGER'}]
      features = {'num1': {'transform': 'identity', 'source_column': 'num1'},
                  'num2': {'transform': 'scale', 'value': 10, 'source_column': 'num2'},
                  'num3': {'transform': 'scale', 'source_column': 'num3'}}
      input_data = ['5.0,-1.0,10',
                    '10.0,1.0,5',
                    '15.0,0.5,7']
      file_io.write_string_to_file(
          stats_file_path,
          json.dumps(stats))

      results = self._run_graph(output_folder, features, schema, stats, input_data)

      for result, expected_result in zip(results['num1'].flatten().tolist(),
                                         [5, 10, 15]):
        self.assertAlmostEqual(result, expected_result)

      for result, expected_result in zip(results['num2'].flatten().tolist(),
                                         [-10, 10, 5]):
        self.assertAlmostEqual(result, expected_result)

      for result, expected_result in zip(results['num3'].flatten().tolist(),
                                         [1, -1, (7.0 - 5) * 2.0 / 5.0 - 1]):
        self.assertAlmostEqual(result, expected_result)
    finally:
      shutil.rmtree(output_folder) 
Example #23
Source File: feature_analysis.py    From pydatalab with Apache License 2.0 5 votes vote down vote up
def save_schema_features(schema, features, output):
  # Save a copy of the schema and features in the output folder.
  file_io.write_string_to_file(
    os.path.join(output, constant.SCHEMA_FILE),
    json.dumps(schema, indent=2))

  file_io.write_string_to_file(
    os.path.join(output, constant.FEATURES_FILE),
    json.dumps(features, indent=2)) 
Example #24
Source File: builder_impl.py    From auto-alt-text-lambda-api with MIT License 5 votes vote down vote up
def save(self, as_text=False):
    """Writes a `SavedModel` protocol buffer to disk.

    The function writes the SavedModel protocol buffer to the export directory
    in serialized format.

    Args:
      as_text: Writes the SavedModel protocol buffer in text format to disk.

    Returns:
      The path to which the SavedModel protocol buffer was written.
    """
    if not file_io.file_exists(self._export_dir):
      file_io.recursive_create_dir(self._export_dir)

    if as_text:
      path = os.path.join(
          compat.as_bytes(self._export_dir),
          compat.as_bytes(constants.SAVED_MODEL_FILENAME_PBTXT))
      file_io.write_string_to_file(path, str(self._saved_model))
    else:
      path = os.path.join(
          compat.as_bytes(self._export_dir),
          compat.as_bytes(constants.SAVED_MODEL_FILENAME_PB))
      file_io.write_string_to_file(path, self._saved_model.SerializeToString())
    tf_logging.info("SavedModel written to: %s", path)

    return path 
Example #25
Source File: saved_model_test.py    From auto-alt-text-lambda-api with MIT License 5 votes vote down vote up
def _build_asset_collection(self, asset_file_name, asset_file_contents,
                              asset_file_tensor_name):
    asset_filepath = os.path.join(
        compat.as_bytes(test.get_temp_dir()), compat.as_bytes(asset_file_name))
    file_io.write_string_to_file(asset_filepath, asset_file_contents)
    asset_file_tensor = constant_op.constant(
        asset_filepath, name=asset_file_tensor_name)
    ops.add_to_collection(ops.GraphKeys.ASSET_FILEPATHS, asset_file_tensor)
    asset_collection = ops.get_collection(ops.GraphKeys.ASSET_FILEPATHS)
    return asset_collection 
Example #26
Source File: saved_model_test.py    From auto-alt-text-lambda-api with MIT License 5 votes vote down vote up
def testAssets(self):
    export_dir = os.path.join(test.get_temp_dir(), "test_assets")
    builder = saved_model_builder.SavedModelBuilder(export_dir)

    with self.test_session(graph=ops.Graph()) as sess:
      self._init_and_validate_variable(sess, "v", 42)

      # Build an asset collection.
      ignored_filepath = os.path.join(
          compat.as_bytes(test.get_temp_dir()), compat.as_bytes("ignored.txt"))
      file_io.write_string_to_file(ignored_filepath, "will be ignored")

      asset_collection = self._build_asset_collection("hello42.txt",
                                                      "foo bar baz",
                                                      "asset_file_tensor")

      builder.add_meta_graph_and_variables(
          sess, ["foo"], assets_collection=asset_collection)

    # Save the SavedModel to disk.
    builder.save()

    with self.test_session(graph=ops.Graph()) as sess:
      foo_graph = loader.load(sess, ["foo"], export_dir)
      self._validate_asset_collection(export_dir, foo_graph.collection_def,
                                      "hello42.txt", "foo bar baz",
                                      "asset_file_tensor:0")
      ignored_asset_path = os.path.join(
          compat.as_bytes(export_dir),
          compat.as_bytes(constants.ASSETS_DIRECTORY),
          compat.as_bytes("ignored.txt"))
      self.assertFalse(file_io.file_exists(ignored_asset_path)) 
Example #27
Source File: cloud_preprocess.py    From pydatalab with Apache License 2.0 5 votes vote down vote up
def run_analysis(args):
  """Builds an analysis file for training.

  Uses BiqQuery tables to do the analysis.

  Args:
    args: command line args

  Raises:
    ValueError if schema contains unknown types.
  """
  import google.datalab.bigquery as bq
  if args.bigquery_table:
    table = bq.Table(args.bigquery_table)
    schema_list = table.schema._bq_schema
  else:
    schema_list = json.loads(
        file_io.read_file_to_string(args.schema_file).decode())
    table = bq.ExternalDataSource(
        source=args.input_file_pattern,
        schema=bq.Schema(schema_list))

  # Check the schema is supported.
  for col_schema in schema_list:
    col_type = col_schema['type'].lower()
    if col_type != 'string' and col_type != 'integer' and col_type != 'float':
      raise ValueError('Schema contains an unsupported type %s.' % col_type)

  run_numerical_analysis(table, schema_list, args)
  run_categorical_analysis(table, schema_list, args)

  # Save a copy of the schema to the output location.
  file_io.write_string_to_file(
      os.path.join(args.output_dir, SCHEMA_FILE),
      json.dumps(schema_list, indent=2, separators=(',', ': '))) 
Example #28
Source File: test_analyze.py    From pydatalab with Apache License 2.0 5 votes vote down vote up
def test_numerics(self):
    output_folder = tempfile.mkdtemp()
    input_file_path = tempfile.mkstemp(dir=output_folder)[1]
    try:
      file_io.write_string_to_file(
        input_file_path,
        '\n'.join(['%s,%s,%s' % (i, 10 * i + 0.5, i + 0.5) for i in range(100)]))

      schema = [{'name': 'col1', 'type': 'INTEGER'},
                {'name': 'col2', 'type': 'FLOAT'},
                {'name': 'col3', 'type': 'FLOAT'}]
      features = {'col1': {'transform': 'scale', 'source_column': 'col1'},
                  'col2': {'transform': 'identity', 'source_column': 'col2'},
                  'col3': {'transform': 'target'}}
      feature_analysis.run_local_analysis(
          output_folder, [input_file_path], schema, features)

      stats = json.loads(
          file_io.read_file_to_string(
              os.path.join(output_folder, analyze.constant.STATS_FILE)).decode())

      self.assertEqual(stats['num_examples'], 100)
      col = stats['column_stats']['col1']
      self.assertAlmostEqual(col['max'], 99.0)
      self.assertAlmostEqual(col['min'], 0.0)
      self.assertAlmostEqual(col['mean'], 49.5)

      col = stats['column_stats']['col2']
      self.assertAlmostEqual(col['max'], 990.5)
      self.assertAlmostEqual(col['min'], 0.5)
      self.assertAlmostEqual(col['mean'], 495.5)
    finally:
      shutil.rmtree(output_folder) 
Example #29
Source File: test_analyze.py    From pydatalab with Apache License 2.0 5 votes vote down vote up
def test_categorical(self):
    output_folder = tempfile.mkdtemp()
    input_file_path = tempfile.mkstemp(dir=output_folder)[1]
    try:
      csv_file = ['red,apple', 'red,pepper', 'red,apple', 'blue,grape',
                  'blue,apple', 'green,pepper']
      file_io.write_string_to_file(
        input_file_path,
        '\n'.join(csv_file))

      schema = [{'name': 'color', 'type': 'STRING'},
                {'name': 'type', 'type': 'STRING'}]
      features = {'color': {'transform': 'one_hot', 'source_column': 'color'},
                  'type': {'transform': 'target'}}
      feature_analysis.run_local_analysis(
        output_folder, [input_file_path], schema, features)

      stats = json.loads(
          file_io.read_file_to_string(
              os.path.join(output_folder, analyze.constant.STATS_FILE)).decode())
      self.assertEqual(stats['column_stats']['color']['vocab_size'], 3)

      # Color column.
      vocab_str = file_io.read_file_to_string(
        os.path.join(output_folder, analyze.constant.VOCAB_ANALYSIS_FILE % 'color'))
      vocab = pd.read_csv(six.StringIO(vocab_str),
                          header=None,
                          names=['color', 'count'])
      expected_vocab = pd.DataFrame(
          {'color': ['red', 'blue', 'green'], 'count': [3, 2, 1]},
          columns=['color', 'count'])
      pd.util.testing.assert_frame_equal(vocab, expected_vocab)

    finally:
      shutil.rmtree(output_folder) 
Example #30
Source File: feature_analysis.py    From pydatalab with Apache License 2.0 5 votes vote down vote up
def save_schema_features(schema, features, output):
  # Save a copy of the schema and features in the output folder.
  file_io.write_string_to_file(
    os.path.join(output, constant.SCHEMA_FILE),
    json.dumps(schema, indent=2))

  file_io.write_string_to_file(
    os.path.join(output, constant.FEATURES_FILE),
    json.dumps(features, indent=2))