Python Examples of tensorflow.python.lib.io.file

Source File: util.py From pydatalab with Apache License 2.0

6 votes

def _recursive_copy(src_dir, dest_dir):
  """Copy the contents of src_dir into the folder dest_dir.
  Args:
    src_dir: gsc or local path.
    dest_dir: gcs or local path.
  When called, dest_dir should exist.
  """
  src_dir = python_portable_string(src_dir)
  dest_dir = python_portable_string(dest_dir)

  file_io.recursive_create_dir(dest_dir)
  for file_name in file_io.list_directory(src_dir):
    old_path = os.path.join(src_dir, file_name)
    new_path = os.path.join(dest_dir, file_name)

    if file_io.is_directory(old_path):
      _recursive_copy(old_path, new_path)
    else:
      file_io.copy(old_path, new_path, overwrite=True)

Source File: task.py From pydatalab with Apache License 2.0

6 votes

def recursive_copy(src_dir, dest_dir):
  """Copy the contents of src_dir into the folder dest_dir.
  Args:
    src_dir: gsc or local path.
    dest_dir: gcs or local path.
  """

  file_io.recursive_create_dir(dest_dir)
  for file_name in file_io.list_directory(src_dir):
    old_path = os.path.join(src_dir, file_name)
    new_path = os.path.join(dest_dir, file_name)

    if file_io.is_directory(old_path):
      recursive_copy(old_path, new_path)
    else:
      file_io.copy(old_path, new_path, overwrite=True)

Source File: builder_impl.py From lambda-packs with MIT License

5 votes

def _save_and_write_assets(self, assets_collection_to_add=None):
    """Saves asset to the meta graph and writes asset files to disk.

    Args:
      assets_collection_to_add: The collection where the asset paths are setup.
    """
    asset_source_filepath_list = _maybe_save_assets(assets_collection_to_add)

    # Return if there are no assets to write.
    if len(asset_source_filepath_list) is 0:
      tf_logging.info("No assets to write.")
      return

    assets_destination_dir = os.path.join(
        compat.as_bytes(self._export_dir),
        compat.as_bytes(constants.ASSETS_DIRECTORY))

    if not file_io.file_exists(assets_destination_dir):
      file_io.recursive_create_dir(assets_destination_dir)

    # Copy each asset from source path to destination path.
    for asset_source_filepath in asset_source_filepath_list:
      asset_source_filename = os.path.basename(asset_source_filepath)

      asset_destination_filepath = os.path.join(
          compat.as_bytes(assets_destination_dir),
          compat.as_bytes(asset_source_filename))

      # Only copy the asset file to the destination if it does not already
      # exist. This is to ensure that an asset with the same name defined as
      # part of multiple graphs is only copied the first time.
      if not file_io.file_exists(asset_destination_filepath):
        file_io.copy(asset_source_filepath, asset_destination_filepath)

    tf_logging.info("Assets written to: %s", assets_destination_dir)

Source File: builder_impl.py From auto-alt-text-lambda-api with MIT License

5 votes

def _save_and_write_assets(self, assets_collection_to_add=None):
    """Saves asset to the meta graph and writes asset files to disk.

    Args:
      assets_collection_to_add: The collection where the asset paths are setup.
    """
    asset_source_filepath_list = self._maybe_save_assets(
        assets_collection_to_add)

    # Return if there are no assets to write.
    if len(asset_source_filepath_list) is 0:
      tf_logging.info("No assets to write.")
      return

    assets_destination_dir = os.path.join(
        compat.as_bytes(self._export_dir),
        compat.as_bytes(constants.ASSETS_DIRECTORY))

    if not file_io.file_exists(assets_destination_dir):
      file_io.recursive_create_dir(assets_destination_dir)

    # Copy each asset from source path to destination path.
    for asset_source_filepath in asset_source_filepath_list:
      asset_source_filename = os.path.basename(asset_source_filepath)

      asset_destination_filepath = os.path.join(
          compat.as_bytes(assets_destination_dir),
          compat.as_bytes(asset_source_filename))

      # Only copy the asset file to the destination if it does not already
      # exist. This is to ensure that an asset with the same name defined as
      # part of multiple graphs is only copied the first time.
      if not file_io.file_exists(asset_destination_filepath):
        file_io.copy(asset_source_filepath, asset_destination_filepath)

    tf_logging.info("Assets written to: %s", assets_destination_dir)

Source File: local_preprocess.py From pydatalab with Apache License 2.0

5 votes

def run_analysis(args):
  """Builds an analysis files for training."""

  # Read the schema and input feature types
  schema_list = json.loads(
      file_io.read_file_to_string(args.schema_file))

  run_numerical_categorical_analysis(args, schema_list)

  # Also save a copy of the schema in the output folder.
  file_io.copy(args.schema_file,
               os.path.join(args.output_dir, SCHEMA_FILE),
               overwrite=True)

Source File: predict.py From pydatalab with Apache License 2.0

5 votes

def main(argv=None):
  args = parse_arguments(sys.argv if argv is None else argv)

  if args.cloud:
    tmpdir = tempfile.mkdtemp()
    try:
      local_packages = [os.path.join(tmpdir, os.path.basename(p)) for p in args.extra_package]
      for source, dest in zip(args.extra_package, local_packages):
        file_io.copy(source, dest, overwrite=True)

      options = {
          'staging_location': os.path.join(args.output_dir, 'tmp', 'staging'),
          'temp_location': os.path.join(args.output_dir, 'tmp', 'staging'),
          'job_name': args.job_name,
          'project': args.project_id,
          'no_save_main_session': True,
          'extra_packages': local_packages,
          'teardown_policy': 'TEARDOWN_ALWAYS',
      }
      opts = beam.pipeline.PipelineOptions(flags=[], **options)
      # Or use BlockingDataflowPipelineRunner
      p = beam.Pipeline('DataflowRunner', options=opts)
      make_prediction_pipeline(p, args)
      print(('Dataflow Job submitted, see Job %s at '
             'https://console.developers.google.com/dataflow?project=%s') %
            (options['job_name'], args.project_id))
      sys.stdout.flush()
      runner_results = p.run()
    finally:
      shutil.rmtree(tmpdir)
  else:
    p = beam.Pipeline('DirectRunner')
    make_prediction_pipeline(p, args)
    runner_results = p.run()

  return runner_results

Source File: builder.py From deep_image_model with Apache License 2.0

5 votes

def _save_and_write_assets(self, assets_collection_to_add=None):
    """Saves asset to the meta graph and writes asset files to disk.

    Args:
      assets_collection_to_add: The collection where the asset paths are setup.
    """
    asset_source_filepath_list = self._maybe_save_assets(
        assets_collection_to_add)

    # Return if there are no assets to write.
    if len(asset_source_filepath_list) is 0:
      tf_logging.info("No assets to write.")
      return

    assets_destination_dir = os.path.join(
        compat.as_bytes(self._export_dir),
        compat.as_bytes(constants.ASSETS_DIRECTORY))

    if not file_io.file_exists(assets_destination_dir):
      file_io.recursive_create_dir(assets_destination_dir)

    # Copy each asset from source path to destination path.
    for asset_source_filepath in asset_source_filepath_list:
      asset_source_filename = os.path.basename(asset_source_filepath)

      asset_destination_filepath = os.path.join(
          compat.as_bytes(assets_destination_dir),
          compat.as_bytes(asset_source_filename))

      # Only copy the asset file to the destination if it does not already
      # exist. This is to ensure that an asset with the same name defined as
      # part of multiple graphs is only copied the first time.
      if not file_io.file_exists(asset_destination_filepath):
        file_io.copy(asset_source_filepath, asset_destination_filepath)

    tf_logging.info("Assets written to: %s", assets_destination_dir)

Source File: builder_impl.py From Serverless-Deep-Learning-with-TensorFlow-and-AWS-Lambda with MIT License

5 votes

def _save_and_write_assets(self, assets_collection_to_add=None):
    """Saves asset to the meta graph and writes asset files to disk.

    Args:
      assets_collection_to_add: The collection where the asset paths are setup.
    """
    asset_source_filepath_list = _maybe_save_assets(assets_collection_to_add)

    # Return if there are no assets to write.
    if len(asset_source_filepath_list) is 0:
      tf_logging.info("No assets to write.")
      return

    assets_destination_dir = os.path.join(
        compat.as_bytes(self._export_dir),
        compat.as_bytes(constants.ASSETS_DIRECTORY))

    if not file_io.file_exists(assets_destination_dir):
      file_io.recursive_create_dir(assets_destination_dir)

    # Copy each asset from source path to destination path.
    for asset_source_filepath in asset_source_filepath_list:
      asset_source_filename = os.path.basename(asset_source_filepath)

      asset_destination_filepath = os.path.join(
          compat.as_bytes(assets_destination_dir),
          compat.as_bytes(asset_source_filename))

      # Only copy the asset file to the destination if it does not already
      # exist. This is to ensure that an asset with the same name defined as
      # part of multiple graphs is only copied the first time.
      if not file_io.file_exists(asset_destination_filepath):
        file_io.copy(asset_source_filepath, asset_destination_filepath)

    tf_logging.info("Assets written to: %s", assets_destination_dir)

Source File: builder_impl.py From keras-lambda with MIT License

5 votes

def _save_and_write_assets(self, assets_collection_to_add=None):
    """Saves asset to the meta graph and writes asset files to disk.

    Args:
      assets_collection_to_add: The collection where the asset paths are setup.
    """
    asset_source_filepath_list = self._maybe_save_assets(
        assets_collection_to_add)

    # Return if there are no assets to write.
    if len(asset_source_filepath_list) is 0:
      tf_logging.info("No assets to write.")
      return

    assets_destination_dir = os.path.join(
        compat.as_bytes(self._export_dir),
        compat.as_bytes(constants.ASSETS_DIRECTORY))

    if not file_io.file_exists(assets_destination_dir):
      file_io.recursive_create_dir(assets_destination_dir)

    # Copy each asset from source path to destination path.
    for asset_source_filepath in asset_source_filepath_list:
      asset_source_filename = os.path.basename(asset_source_filepath)

      asset_destination_filepath = os.path.join(
          compat.as_bytes(assets_destination_dir),
          compat.as_bytes(asset_source_filename))

      # Only copy the asset file to the destination if it does not already
      # exist. This is to ensure that an asset with the same name defined as
      # part of multiple graphs is only copied the first time.
      if not file_io.file_exists(asset_destination_filepath):
        file_io.copy(asset_source_filepath, asset_destination_filepath)

    tf_logging.info("Assets written to: %s", assets_destination_dir)

Source File: _cloud.py From pydatalab with Apache License 2.0

4 votes

def preprocess(train_dataset, output_dir, eval_dataset, checkpoint, pipeline_option):
    """Preprocess data in Cloud with DataFlow."""

    import apache_beam as beam
    import google.datalab.utils
    from . import _preprocess

    if checkpoint is None:
      checkpoint = _util._DEFAULT_CHECKPOINT_GSURL

    job_name = ('preprocess-image-classification-' +
                datetime.datetime.now().strftime('%y%m%d-%H%M%S'))

    staging_package_url = _util.repackage_to_staging(output_dir)
    tmpdir = tempfile.mkdtemp()
    # suppress DataFlow warnings about wheel package as extra package.
    original_level = logging.getLogger().getEffectiveLevel()
    logging.getLogger().setLevel(logging.ERROR)
    try:
      # Workaround for DataFlow 2.0, which doesn't work well with extra packages in GCS.
      # Remove when the issue is fixed and new version of DataFlow is included in Datalab.
      extra_packages = [staging_package_url, _TF_GS_URL, _PROTOBUF_GS_URL]
      local_packages = [os.path.join(tmpdir, os.path.basename(p))
                        for p in extra_packages]
      for source, dest in zip(extra_packages, local_packages):
        file_io.copy(source, dest, overwrite=True)

      options = {
          'staging_location': os.path.join(output_dir, 'tmp', 'staging'),
          'temp_location': os.path.join(output_dir, 'tmp'),
          'job_name': job_name,
          'project': _util.default_project(),
          'extra_packages': local_packages,
          'teardown_policy': 'TEARDOWN_ALWAYS',
          'no_save_main_session': True
      }
      if pipeline_option is not None:
        options.update(pipeline_option)

      opts = beam.pipeline.PipelineOptions(flags=[], **options)
      p = beam.Pipeline('DataflowRunner', options=opts)
      _preprocess.configure_pipeline(p, train_dataset, eval_dataset,
                                     checkpoint, output_dir, job_name)
      job_results = p.run()
    finally:
      shutil.rmtree(tmpdir)
      logging.getLogger().setLevel(original_level)

    if (_util.is_in_IPython()):
      import IPython
      dataflow_url = 'https://console.developers.google.com/dataflow?project=%s' % \
                     _util.default_project()
      html = 'Job "%s" submitted.' % job_name
      html += '<p>Click <a href="%s" target="_blank">here</a> to track preprocessing job. <br/>' \
          % dataflow_url
      IPython.display.display_html(html, raw=True)
    return google.datalab.utils.DataflowJob(job_results)

Source File: _cloud.py From pydatalab with Apache License 2.0

4 votes

def batch_predict(dataset, model_dir, output_csv, output_bq_table, pipeline_option):
    """Batch predict running in cloud."""

    import apache_beam as beam
    import google.datalab.utils
    from . import _predictor

    if output_csv is None and output_bq_table is None:
      raise ValueError('output_csv and output_bq_table cannot both be None.')
    if 'temp_location' not in pipeline_option:
      raise ValueError('"temp_location" is not set in cloud.')

    job_name = ('batch-predict-image-classification-' +
                datetime.datetime.now().strftime('%y%m%d-%H%M%S'))
    staging_package_url = _util.repackage_to_staging(pipeline_option['temp_location'])
    tmpdir = tempfile.mkdtemp()
    # suppress DataFlow warnings about wheel package as extra package.
    original_level = logging.getLogger().getEffectiveLevel()
    logging.getLogger().setLevel(logging.ERROR)
    try:
      # Workaround for DataFlow 2.0, which doesn't work well with extra packages in GCS.
      # Remove when the issue is fixed and new version of DataFlow is included in Datalab.
      extra_packages = [staging_package_url, _TF_GS_URL, _PROTOBUF_GS_URL]
      local_packages = [os.path.join(tmpdir, os.path.basename(p))
                        for p in extra_packages]
      for source, dest in zip(extra_packages, local_packages):
        file_io.copy(source, dest, overwrite=True)

      options = {
          'staging_location': os.path.join(pipeline_option['temp_location'], 'staging'),
          'job_name': job_name,
          'project': _util.default_project(),
          'extra_packages': local_packages,
          'teardown_policy': 'TEARDOWN_ALWAYS',
          'no_save_main_session': True
      }
      options.update(pipeline_option)

      opts = beam.pipeline.PipelineOptions(flags=[], **options)
      p = beam.Pipeline('DataflowRunner', options=opts)
      _predictor.configure_pipeline(p, dataset, model_dir, output_csv, output_bq_table)
      job_results = p.run()
    finally:
      shutil.rmtree(tmpdir)
      logging.getLogger().setLevel(original_level)

    if (_util.is_in_IPython()):
      import IPython
      dataflow_url = ('https://console.developers.google.com/dataflow?project=%s' %
                      _util.default_project())
      html = 'Job "%s" submitted.' % job_name
      html += ('<p>Click <a href="%s" target="_blank">here</a> to track batch prediction job. <br/>'
               % dataflow_url)
      IPython.display.display_html(html, raw=True)
    return google.datalab.utils.DataflowJob(job_results)

Source File: visualize.py From pipelines with Apache License 2.0

4 votes

def datahtml(
    bucket_name,
    commit_sha,
    train_file_path
):
    import json
    import seaborn as sns
    import matplotlib.pyplot as plt
    import os
    image_path = os.path.join(bucket_name, commit_sha, 'visualization.png')
    image_url = os.path.join('https://storage.googleapis.com', bucket_name.lstrip('gs://'), commit_sha, 'visualization.png')
    html_path = os.path.join(bucket_name, 'kaggle.html')
    # ouptut visualization to a file

    import pandas as pd
    df_train = pd.read_csv(train_file_path)
    sns.set()
    cols = ['SalePrice', 'OverallQual', 'GrLivArea', 'GarageCars', 'TotalBsmtSF', 'FullBath', 'YearBuilt']
    sns.pairplot(df_train[cols], size = 3)
    plt.savefig('visualization.png')
    from tensorflow.python.lib.io import file_io
    file_io.copy('visualization.png', image_path)
    rendered_template = """
    <html>
        <head>
            <title>correlation image</title>
        </head>
        <body>
            <img src={}>
        </body>
    </html>""".format(image_url)
    file_io.write_string_to_file(html_path, rendered_template)

    metadata = {
        'outputs' : [{
        'type': 'web-app',
        'storage': 'gcs',
        'source': html_path,
        }]
    }
    with file_io.FileIO('/mlpipeline-ui-metadata.json', 'w') as f:
        json.dump(metadata, f)

Python tensorflow.python.lib.io.file_io.copy() Examples