Python tensorflow.python.lib.io.file_io.copy() Examples

The following are 12 code examples of tensorflow.python.lib.io.file_io.copy(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module tensorflow.python.lib.io.file_io , or try the search function .
Example #1
Source File: util.py    From pydatalab with Apache License 2.0 6 votes vote down vote up
def _recursive_copy(src_dir, dest_dir):
  """Copy the contents of src_dir into the folder dest_dir.
  Args:
    src_dir: gsc or local path.
    dest_dir: gcs or local path.
  When called, dest_dir should exist.
  """
  src_dir = python_portable_string(src_dir)
  dest_dir = python_portable_string(dest_dir)

  file_io.recursive_create_dir(dest_dir)
  for file_name in file_io.list_directory(src_dir):
    old_path = os.path.join(src_dir, file_name)
    new_path = os.path.join(dest_dir, file_name)

    if file_io.is_directory(old_path):
      _recursive_copy(old_path, new_path)
    else:
      file_io.copy(old_path, new_path, overwrite=True) 
Example #2
Source File: task.py    From pydatalab with Apache License 2.0 6 votes vote down vote up
def recursive_copy(src_dir, dest_dir):
  """Copy the contents of src_dir into the folder dest_dir.
  Args:
    src_dir: gsc or local path.
    dest_dir: gcs or local path.
  """

  file_io.recursive_create_dir(dest_dir)
  for file_name in file_io.list_directory(src_dir):
    old_path = os.path.join(src_dir, file_name)
    new_path = os.path.join(dest_dir, file_name)

    if file_io.is_directory(old_path):
      recursive_copy(old_path, new_path)
    else:
      file_io.copy(old_path, new_path, overwrite=True) 
Example #3
Source File: builder_impl.py    From lambda-packs with MIT License 5 votes vote down vote up
def _save_and_write_assets(self, assets_collection_to_add=None):
    """Saves asset to the meta graph and writes asset files to disk.

    Args:
      assets_collection_to_add: The collection where the asset paths are setup.
    """
    asset_source_filepath_list = _maybe_save_assets(assets_collection_to_add)

    # Return if there are no assets to write.
    if len(asset_source_filepath_list) is 0:
      tf_logging.info("No assets to write.")
      return

    assets_destination_dir = os.path.join(
        compat.as_bytes(self._export_dir),
        compat.as_bytes(constants.ASSETS_DIRECTORY))

    if not file_io.file_exists(assets_destination_dir):
      file_io.recursive_create_dir(assets_destination_dir)

    # Copy each asset from source path to destination path.
    for asset_source_filepath in asset_source_filepath_list:
      asset_source_filename = os.path.basename(asset_source_filepath)

      asset_destination_filepath = os.path.join(
          compat.as_bytes(assets_destination_dir),
          compat.as_bytes(asset_source_filename))

      # Only copy the asset file to the destination if it does not already
      # exist. This is to ensure that an asset with the same name defined as
      # part of multiple graphs is only copied the first time.
      if not file_io.file_exists(asset_destination_filepath):
        file_io.copy(asset_source_filepath, asset_destination_filepath)

    tf_logging.info("Assets written to: %s", assets_destination_dir) 
Example #4
Source File: builder_impl.py    From auto-alt-text-lambda-api with MIT License 5 votes vote down vote up
def _save_and_write_assets(self, assets_collection_to_add=None):
    """Saves asset to the meta graph and writes asset files to disk.

    Args:
      assets_collection_to_add: The collection where the asset paths are setup.
    """
    asset_source_filepath_list = self._maybe_save_assets(
        assets_collection_to_add)

    # Return if there are no assets to write.
    if len(asset_source_filepath_list) is 0:
      tf_logging.info("No assets to write.")
      return

    assets_destination_dir = os.path.join(
        compat.as_bytes(self._export_dir),
        compat.as_bytes(constants.ASSETS_DIRECTORY))

    if not file_io.file_exists(assets_destination_dir):
      file_io.recursive_create_dir(assets_destination_dir)

    # Copy each asset from source path to destination path.
    for asset_source_filepath in asset_source_filepath_list:
      asset_source_filename = os.path.basename(asset_source_filepath)

      asset_destination_filepath = os.path.join(
          compat.as_bytes(assets_destination_dir),
          compat.as_bytes(asset_source_filename))

      # Only copy the asset file to the destination if it does not already
      # exist. This is to ensure that an asset with the same name defined as
      # part of multiple graphs is only copied the first time.
      if not file_io.file_exists(asset_destination_filepath):
        file_io.copy(asset_source_filepath, asset_destination_filepath)

    tf_logging.info("Assets written to: %s", assets_destination_dir) 
Example #5
Source File: local_preprocess.py    From pydatalab with Apache License 2.0 5 votes vote down vote up
def run_analysis(args):
  """Builds an analysis files for training."""

  # Read the schema and input feature types
  schema_list = json.loads(
      file_io.read_file_to_string(args.schema_file))

  run_numerical_categorical_analysis(args, schema_list)

  # Also save a copy of the schema in the output folder.
  file_io.copy(args.schema_file,
               os.path.join(args.output_dir, SCHEMA_FILE),
               overwrite=True) 
Example #6
Source File: predict.py    From pydatalab with Apache License 2.0 5 votes vote down vote up
def main(argv=None):
  args = parse_arguments(sys.argv if argv is None else argv)

  if args.cloud:
    tmpdir = tempfile.mkdtemp()
    try:
      local_packages = [os.path.join(tmpdir, os.path.basename(p)) for p in args.extra_package]
      for source, dest in zip(args.extra_package, local_packages):
        file_io.copy(source, dest, overwrite=True)

      options = {
          'staging_location': os.path.join(args.output_dir, 'tmp', 'staging'),
          'temp_location': os.path.join(args.output_dir, 'tmp', 'staging'),
          'job_name': args.job_name,
          'project': args.project_id,
          'no_save_main_session': True,
          'extra_packages': local_packages,
          'teardown_policy': 'TEARDOWN_ALWAYS',
      }
      opts = beam.pipeline.PipelineOptions(flags=[], **options)
      # Or use BlockingDataflowPipelineRunner
      p = beam.Pipeline('DataflowRunner', options=opts)
      make_prediction_pipeline(p, args)
      print(('Dataflow Job submitted, see Job %s at '
             'https://console.developers.google.com/dataflow?project=%s') %
            (options['job_name'], args.project_id))
      sys.stdout.flush()
      runner_results = p.run()
    finally:
      shutil.rmtree(tmpdir)
  else:
    p = beam.Pipeline('DirectRunner')
    make_prediction_pipeline(p, args)
    runner_results = p.run()

  return runner_results 
Example #7
Source File: builder.py    From deep_image_model with Apache License 2.0 5 votes vote down vote up
def _save_and_write_assets(self, assets_collection_to_add=None):
    """Saves asset to the meta graph and writes asset files to disk.

    Args:
      assets_collection_to_add: The collection where the asset paths are setup.
    """
    asset_source_filepath_list = self._maybe_save_assets(
        assets_collection_to_add)

    # Return if there are no assets to write.
    if len(asset_source_filepath_list) is 0:
      tf_logging.info("No assets to write.")
      return

    assets_destination_dir = os.path.join(
        compat.as_bytes(self._export_dir),
        compat.as_bytes(constants.ASSETS_DIRECTORY))

    if not file_io.file_exists(assets_destination_dir):
      file_io.recursive_create_dir(assets_destination_dir)

    # Copy each asset from source path to destination path.
    for asset_source_filepath in asset_source_filepath_list:
      asset_source_filename = os.path.basename(asset_source_filepath)

      asset_destination_filepath = os.path.join(
          compat.as_bytes(assets_destination_dir),
          compat.as_bytes(asset_source_filename))

      # Only copy the asset file to the destination if it does not already
      # exist. This is to ensure that an asset with the same name defined as
      # part of multiple graphs is only copied the first time.
      if not file_io.file_exists(asset_destination_filepath):
        file_io.copy(asset_source_filepath, asset_destination_filepath)

    tf_logging.info("Assets written to: %s", assets_destination_dir) 
Example #8
Source File: builder_impl.py    From Serverless-Deep-Learning-with-TensorFlow-and-AWS-Lambda with MIT License 5 votes vote down vote up
def _save_and_write_assets(self, assets_collection_to_add=None):
    """Saves asset to the meta graph and writes asset files to disk.

    Args:
      assets_collection_to_add: The collection where the asset paths are setup.
    """
    asset_source_filepath_list = _maybe_save_assets(assets_collection_to_add)

    # Return if there are no assets to write.
    if len(asset_source_filepath_list) is 0:
      tf_logging.info("No assets to write.")
      return

    assets_destination_dir = os.path.join(
        compat.as_bytes(self._export_dir),
        compat.as_bytes(constants.ASSETS_DIRECTORY))

    if not file_io.file_exists(assets_destination_dir):
      file_io.recursive_create_dir(assets_destination_dir)

    # Copy each asset from source path to destination path.
    for asset_source_filepath in asset_source_filepath_list:
      asset_source_filename = os.path.basename(asset_source_filepath)

      asset_destination_filepath = os.path.join(
          compat.as_bytes(assets_destination_dir),
          compat.as_bytes(asset_source_filename))

      # Only copy the asset file to the destination if it does not already
      # exist. This is to ensure that an asset with the same name defined as
      # part of multiple graphs is only copied the first time.
      if not file_io.file_exists(asset_destination_filepath):
        file_io.copy(asset_source_filepath, asset_destination_filepath)

    tf_logging.info("Assets written to: %s", assets_destination_dir) 
Example #9
Source File: builder_impl.py    From keras-lambda with MIT License 5 votes vote down vote up
def _save_and_write_assets(self, assets_collection_to_add=None):
    """Saves asset to the meta graph and writes asset files to disk.

    Args:
      assets_collection_to_add: The collection where the asset paths are setup.
    """
    asset_source_filepath_list = self._maybe_save_assets(
        assets_collection_to_add)

    # Return if there are no assets to write.
    if len(asset_source_filepath_list) is 0:
      tf_logging.info("No assets to write.")
      return

    assets_destination_dir = os.path.join(
        compat.as_bytes(self._export_dir),
        compat.as_bytes(constants.ASSETS_DIRECTORY))

    if not file_io.file_exists(assets_destination_dir):
      file_io.recursive_create_dir(assets_destination_dir)

    # Copy each asset from source path to destination path.
    for asset_source_filepath in asset_source_filepath_list:
      asset_source_filename = os.path.basename(asset_source_filepath)

      asset_destination_filepath = os.path.join(
          compat.as_bytes(assets_destination_dir),
          compat.as_bytes(asset_source_filename))

      # Only copy the asset file to the destination if it does not already
      # exist. This is to ensure that an asset with the same name defined as
      # part of multiple graphs is only copied the first time.
      if not file_io.file_exists(asset_destination_filepath):
        file_io.copy(asset_source_filepath, asset_destination_filepath)

    tf_logging.info("Assets written to: %s", assets_destination_dir) 
Example #10
Source File: _cloud.py    From pydatalab with Apache License 2.0 4 votes vote down vote up
def preprocess(train_dataset, output_dir, eval_dataset, checkpoint, pipeline_option):
    """Preprocess data in Cloud with DataFlow."""

    import apache_beam as beam
    import google.datalab.utils
    from . import _preprocess

    if checkpoint is None:
      checkpoint = _util._DEFAULT_CHECKPOINT_GSURL

    job_name = ('preprocess-image-classification-' +
                datetime.datetime.now().strftime('%y%m%d-%H%M%S'))

    staging_package_url = _util.repackage_to_staging(output_dir)
    tmpdir = tempfile.mkdtemp()
    # suppress DataFlow warnings about wheel package as extra package.
    original_level = logging.getLogger().getEffectiveLevel()
    logging.getLogger().setLevel(logging.ERROR)
    try:
      # Workaround for DataFlow 2.0, which doesn't work well with extra packages in GCS.
      # Remove when the issue is fixed and new version of DataFlow is included in Datalab.
      extra_packages = [staging_package_url, _TF_GS_URL, _PROTOBUF_GS_URL]
      local_packages = [os.path.join(tmpdir, os.path.basename(p))
                        for p in extra_packages]
      for source, dest in zip(extra_packages, local_packages):
        file_io.copy(source, dest, overwrite=True)

      options = {
          'staging_location': os.path.join(output_dir, 'tmp', 'staging'),
          'temp_location': os.path.join(output_dir, 'tmp'),
          'job_name': job_name,
          'project': _util.default_project(),
          'extra_packages': local_packages,
          'teardown_policy': 'TEARDOWN_ALWAYS',
          'no_save_main_session': True
      }
      if pipeline_option is not None:
        options.update(pipeline_option)

      opts = beam.pipeline.PipelineOptions(flags=[], **options)
      p = beam.Pipeline('DataflowRunner', options=opts)
      _preprocess.configure_pipeline(p, train_dataset, eval_dataset,
                                     checkpoint, output_dir, job_name)
      job_results = p.run()
    finally:
      shutil.rmtree(tmpdir)
      logging.getLogger().setLevel(original_level)

    if (_util.is_in_IPython()):
      import IPython
      dataflow_url = 'https://console.developers.google.com/dataflow?project=%s' % \
                     _util.default_project()
      html = 'Job "%s" submitted.' % job_name
      html += '<p>Click <a href="%s" target="_blank">here</a> to track preprocessing job. <br/>' \
          % dataflow_url
      IPython.display.display_html(html, raw=True)
    return google.datalab.utils.DataflowJob(job_results) 
Example #11
Source File: _cloud.py    From pydatalab with Apache License 2.0 4 votes vote down vote up
def batch_predict(dataset, model_dir, output_csv, output_bq_table, pipeline_option):
    """Batch predict running in cloud."""

    import apache_beam as beam
    import google.datalab.utils
    from . import _predictor

    if output_csv is None and output_bq_table is None:
      raise ValueError('output_csv and output_bq_table cannot both be None.')
    if 'temp_location' not in pipeline_option:
      raise ValueError('"temp_location" is not set in cloud.')

    job_name = ('batch-predict-image-classification-' +
                datetime.datetime.now().strftime('%y%m%d-%H%M%S'))
    staging_package_url = _util.repackage_to_staging(pipeline_option['temp_location'])
    tmpdir = tempfile.mkdtemp()
    # suppress DataFlow warnings about wheel package as extra package.
    original_level = logging.getLogger().getEffectiveLevel()
    logging.getLogger().setLevel(logging.ERROR)
    try:
      # Workaround for DataFlow 2.0, which doesn't work well with extra packages in GCS.
      # Remove when the issue is fixed and new version of DataFlow is included in Datalab.
      extra_packages = [staging_package_url, _TF_GS_URL, _PROTOBUF_GS_URL]
      local_packages = [os.path.join(tmpdir, os.path.basename(p))
                        for p in extra_packages]
      for source, dest in zip(extra_packages, local_packages):
        file_io.copy(source, dest, overwrite=True)

      options = {
          'staging_location': os.path.join(pipeline_option['temp_location'], 'staging'),
          'job_name': job_name,
          'project': _util.default_project(),
          'extra_packages': local_packages,
          'teardown_policy': 'TEARDOWN_ALWAYS',
          'no_save_main_session': True
      }
      options.update(pipeline_option)

      opts = beam.pipeline.PipelineOptions(flags=[], **options)
      p = beam.Pipeline('DataflowRunner', options=opts)
      _predictor.configure_pipeline(p, dataset, model_dir, output_csv, output_bq_table)
      job_results = p.run()
    finally:
      shutil.rmtree(tmpdir)
      logging.getLogger().setLevel(original_level)

    if (_util.is_in_IPython()):
      import IPython
      dataflow_url = ('https://console.developers.google.com/dataflow?project=%s' %
                      _util.default_project())
      html = 'Job "%s" submitted.' % job_name
      html += ('<p>Click <a href="%s" target="_blank">here</a> to track batch prediction job. <br/>'
               % dataflow_url)
      IPython.display.display_html(html, raw=True)
    return google.datalab.utils.DataflowJob(job_results) 
Example #12
Source File: visualize.py    From pipelines with Apache License 2.0 4 votes vote down vote up
def datahtml(
    bucket_name,
    commit_sha,
    train_file_path
):
    import json
    import seaborn as sns
    import matplotlib.pyplot as plt
    import os
    image_path = os.path.join(bucket_name, commit_sha, 'visualization.png')
    image_url = os.path.join('https://storage.googleapis.com', bucket_name.lstrip('gs://'), commit_sha, 'visualization.png')
    html_path = os.path.join(bucket_name, 'kaggle.html')
    # ouptut visualization to a file

    import pandas as pd
    df_train = pd.read_csv(train_file_path)
    sns.set()
    cols = ['SalePrice', 'OverallQual', 'GrLivArea', 'GarageCars', 'TotalBsmtSF', 'FullBath', 'YearBuilt']
    sns.pairplot(df_train[cols], size = 3)
    plt.savefig('visualization.png')
    from tensorflow.python.lib.io import file_io
    file_io.copy('visualization.png', image_path)
    rendered_template = """
    <html>
        <head>
            <title>correlation image</title>
        </head>
        <body>
            <img src={}>
        </body>
    </html>""".format(image_url)
    file_io.write_string_to_file(html_path, rendered_template)

    metadata = {
        'outputs' : [{
        'type': 'web-app',
        'storage': 'gcs',
        'source': html_path,
        }]
    }
    with file_io.FileIO('/mlpipeline-ui-metadata.json', 'w') as f:
        json.dump(metadata, f)