Python tensorflow.python.lib.io.file_io.copy() Examples
The following are 12
code examples of tensorflow.python.lib.io.file_io.copy().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
tensorflow.python.lib.io.file_io
, or try the search function
.
Example #1
Source File: util.py From pydatalab with Apache License 2.0 | 6 votes |
def _recursive_copy(src_dir, dest_dir): """Copy the contents of src_dir into the folder dest_dir. Args: src_dir: gsc or local path. dest_dir: gcs or local path. When called, dest_dir should exist. """ src_dir = python_portable_string(src_dir) dest_dir = python_portable_string(dest_dir) file_io.recursive_create_dir(dest_dir) for file_name in file_io.list_directory(src_dir): old_path = os.path.join(src_dir, file_name) new_path = os.path.join(dest_dir, file_name) if file_io.is_directory(old_path): _recursive_copy(old_path, new_path) else: file_io.copy(old_path, new_path, overwrite=True)
Example #2
Source File: task.py From pydatalab with Apache License 2.0 | 6 votes |
def recursive_copy(src_dir, dest_dir): """Copy the contents of src_dir into the folder dest_dir. Args: src_dir: gsc or local path. dest_dir: gcs or local path. """ file_io.recursive_create_dir(dest_dir) for file_name in file_io.list_directory(src_dir): old_path = os.path.join(src_dir, file_name) new_path = os.path.join(dest_dir, file_name) if file_io.is_directory(old_path): recursive_copy(old_path, new_path) else: file_io.copy(old_path, new_path, overwrite=True)
Example #3
Source File: builder_impl.py From lambda-packs with MIT License | 5 votes |
def _save_and_write_assets(self, assets_collection_to_add=None): """Saves asset to the meta graph and writes asset files to disk. Args: assets_collection_to_add: The collection where the asset paths are setup. """ asset_source_filepath_list = _maybe_save_assets(assets_collection_to_add) # Return if there are no assets to write. if len(asset_source_filepath_list) is 0: tf_logging.info("No assets to write.") return assets_destination_dir = os.path.join( compat.as_bytes(self._export_dir), compat.as_bytes(constants.ASSETS_DIRECTORY)) if not file_io.file_exists(assets_destination_dir): file_io.recursive_create_dir(assets_destination_dir) # Copy each asset from source path to destination path. for asset_source_filepath in asset_source_filepath_list: asset_source_filename = os.path.basename(asset_source_filepath) asset_destination_filepath = os.path.join( compat.as_bytes(assets_destination_dir), compat.as_bytes(asset_source_filename)) # Only copy the asset file to the destination if it does not already # exist. This is to ensure that an asset with the same name defined as # part of multiple graphs is only copied the first time. if not file_io.file_exists(asset_destination_filepath): file_io.copy(asset_source_filepath, asset_destination_filepath) tf_logging.info("Assets written to: %s", assets_destination_dir)
Example #4
Source File: builder_impl.py From auto-alt-text-lambda-api with MIT License | 5 votes |
def _save_and_write_assets(self, assets_collection_to_add=None): """Saves asset to the meta graph and writes asset files to disk. Args: assets_collection_to_add: The collection where the asset paths are setup. """ asset_source_filepath_list = self._maybe_save_assets( assets_collection_to_add) # Return if there are no assets to write. if len(asset_source_filepath_list) is 0: tf_logging.info("No assets to write.") return assets_destination_dir = os.path.join( compat.as_bytes(self._export_dir), compat.as_bytes(constants.ASSETS_DIRECTORY)) if not file_io.file_exists(assets_destination_dir): file_io.recursive_create_dir(assets_destination_dir) # Copy each asset from source path to destination path. for asset_source_filepath in asset_source_filepath_list: asset_source_filename = os.path.basename(asset_source_filepath) asset_destination_filepath = os.path.join( compat.as_bytes(assets_destination_dir), compat.as_bytes(asset_source_filename)) # Only copy the asset file to the destination if it does not already # exist. This is to ensure that an asset with the same name defined as # part of multiple graphs is only copied the first time. if not file_io.file_exists(asset_destination_filepath): file_io.copy(asset_source_filepath, asset_destination_filepath) tf_logging.info("Assets written to: %s", assets_destination_dir)
Example #5
Source File: local_preprocess.py From pydatalab with Apache License 2.0 | 5 votes |
def run_analysis(args): """Builds an analysis files for training.""" # Read the schema and input feature types schema_list = json.loads( file_io.read_file_to_string(args.schema_file)) run_numerical_categorical_analysis(args, schema_list) # Also save a copy of the schema in the output folder. file_io.copy(args.schema_file, os.path.join(args.output_dir, SCHEMA_FILE), overwrite=True)
Example #6
Source File: predict.py From pydatalab with Apache License 2.0 | 5 votes |
def main(argv=None): args = parse_arguments(sys.argv if argv is None else argv) if args.cloud: tmpdir = tempfile.mkdtemp() try: local_packages = [os.path.join(tmpdir, os.path.basename(p)) for p in args.extra_package] for source, dest in zip(args.extra_package, local_packages): file_io.copy(source, dest, overwrite=True) options = { 'staging_location': os.path.join(args.output_dir, 'tmp', 'staging'), 'temp_location': os.path.join(args.output_dir, 'tmp', 'staging'), 'job_name': args.job_name, 'project': args.project_id, 'no_save_main_session': True, 'extra_packages': local_packages, 'teardown_policy': 'TEARDOWN_ALWAYS', } opts = beam.pipeline.PipelineOptions(flags=[], **options) # Or use BlockingDataflowPipelineRunner p = beam.Pipeline('DataflowRunner', options=opts) make_prediction_pipeline(p, args) print(('Dataflow Job submitted, see Job %s at ' 'https://console.developers.google.com/dataflow?project=%s') % (options['job_name'], args.project_id)) sys.stdout.flush() runner_results = p.run() finally: shutil.rmtree(tmpdir) else: p = beam.Pipeline('DirectRunner') make_prediction_pipeline(p, args) runner_results = p.run() return runner_results
Example #7
Source File: builder.py From deep_image_model with Apache License 2.0 | 5 votes |
def _save_and_write_assets(self, assets_collection_to_add=None): """Saves asset to the meta graph and writes asset files to disk. Args: assets_collection_to_add: The collection where the asset paths are setup. """ asset_source_filepath_list = self._maybe_save_assets( assets_collection_to_add) # Return if there are no assets to write. if len(asset_source_filepath_list) is 0: tf_logging.info("No assets to write.") return assets_destination_dir = os.path.join( compat.as_bytes(self._export_dir), compat.as_bytes(constants.ASSETS_DIRECTORY)) if not file_io.file_exists(assets_destination_dir): file_io.recursive_create_dir(assets_destination_dir) # Copy each asset from source path to destination path. for asset_source_filepath in asset_source_filepath_list: asset_source_filename = os.path.basename(asset_source_filepath) asset_destination_filepath = os.path.join( compat.as_bytes(assets_destination_dir), compat.as_bytes(asset_source_filename)) # Only copy the asset file to the destination if it does not already # exist. This is to ensure that an asset with the same name defined as # part of multiple graphs is only copied the first time. if not file_io.file_exists(asset_destination_filepath): file_io.copy(asset_source_filepath, asset_destination_filepath) tf_logging.info("Assets written to: %s", assets_destination_dir)
Example #8
Source File: builder_impl.py From Serverless-Deep-Learning-with-TensorFlow-and-AWS-Lambda with MIT License | 5 votes |
def _save_and_write_assets(self, assets_collection_to_add=None): """Saves asset to the meta graph and writes asset files to disk. Args: assets_collection_to_add: The collection where the asset paths are setup. """ asset_source_filepath_list = _maybe_save_assets(assets_collection_to_add) # Return if there are no assets to write. if len(asset_source_filepath_list) is 0: tf_logging.info("No assets to write.") return assets_destination_dir = os.path.join( compat.as_bytes(self._export_dir), compat.as_bytes(constants.ASSETS_DIRECTORY)) if not file_io.file_exists(assets_destination_dir): file_io.recursive_create_dir(assets_destination_dir) # Copy each asset from source path to destination path. for asset_source_filepath in asset_source_filepath_list: asset_source_filename = os.path.basename(asset_source_filepath) asset_destination_filepath = os.path.join( compat.as_bytes(assets_destination_dir), compat.as_bytes(asset_source_filename)) # Only copy the asset file to the destination if it does not already # exist. This is to ensure that an asset with the same name defined as # part of multiple graphs is only copied the first time. if not file_io.file_exists(asset_destination_filepath): file_io.copy(asset_source_filepath, asset_destination_filepath) tf_logging.info("Assets written to: %s", assets_destination_dir)
Example #9
Source File: builder_impl.py From keras-lambda with MIT License | 5 votes |
def _save_and_write_assets(self, assets_collection_to_add=None): """Saves asset to the meta graph and writes asset files to disk. Args: assets_collection_to_add: The collection where the asset paths are setup. """ asset_source_filepath_list = self._maybe_save_assets( assets_collection_to_add) # Return if there are no assets to write. if len(asset_source_filepath_list) is 0: tf_logging.info("No assets to write.") return assets_destination_dir = os.path.join( compat.as_bytes(self._export_dir), compat.as_bytes(constants.ASSETS_DIRECTORY)) if not file_io.file_exists(assets_destination_dir): file_io.recursive_create_dir(assets_destination_dir) # Copy each asset from source path to destination path. for asset_source_filepath in asset_source_filepath_list: asset_source_filename = os.path.basename(asset_source_filepath) asset_destination_filepath = os.path.join( compat.as_bytes(assets_destination_dir), compat.as_bytes(asset_source_filename)) # Only copy the asset file to the destination if it does not already # exist. This is to ensure that an asset with the same name defined as # part of multiple graphs is only copied the first time. if not file_io.file_exists(asset_destination_filepath): file_io.copy(asset_source_filepath, asset_destination_filepath) tf_logging.info("Assets written to: %s", assets_destination_dir)
Example #10
Source File: _cloud.py From pydatalab with Apache License 2.0 | 4 votes |
def preprocess(train_dataset, output_dir, eval_dataset, checkpoint, pipeline_option): """Preprocess data in Cloud with DataFlow.""" import apache_beam as beam import google.datalab.utils from . import _preprocess if checkpoint is None: checkpoint = _util._DEFAULT_CHECKPOINT_GSURL job_name = ('preprocess-image-classification-' + datetime.datetime.now().strftime('%y%m%d-%H%M%S')) staging_package_url = _util.repackage_to_staging(output_dir) tmpdir = tempfile.mkdtemp() # suppress DataFlow warnings about wheel package as extra package. original_level = logging.getLogger().getEffectiveLevel() logging.getLogger().setLevel(logging.ERROR) try: # Workaround for DataFlow 2.0, which doesn't work well with extra packages in GCS. # Remove when the issue is fixed and new version of DataFlow is included in Datalab. extra_packages = [staging_package_url, _TF_GS_URL, _PROTOBUF_GS_URL] local_packages = [os.path.join(tmpdir, os.path.basename(p)) for p in extra_packages] for source, dest in zip(extra_packages, local_packages): file_io.copy(source, dest, overwrite=True) options = { 'staging_location': os.path.join(output_dir, 'tmp', 'staging'), 'temp_location': os.path.join(output_dir, 'tmp'), 'job_name': job_name, 'project': _util.default_project(), 'extra_packages': local_packages, 'teardown_policy': 'TEARDOWN_ALWAYS', 'no_save_main_session': True } if pipeline_option is not None: options.update(pipeline_option) opts = beam.pipeline.PipelineOptions(flags=[], **options) p = beam.Pipeline('DataflowRunner', options=opts) _preprocess.configure_pipeline(p, train_dataset, eval_dataset, checkpoint, output_dir, job_name) job_results = p.run() finally: shutil.rmtree(tmpdir) logging.getLogger().setLevel(original_level) if (_util.is_in_IPython()): import IPython dataflow_url = 'https://console.developers.google.com/dataflow?project=%s' % \ _util.default_project() html = 'Job "%s" submitted.' % job_name html += '<p>Click <a href="%s" target="_blank">here</a> to track preprocessing job. <br/>' \ % dataflow_url IPython.display.display_html(html, raw=True) return google.datalab.utils.DataflowJob(job_results)
Example #11
Source File: _cloud.py From pydatalab with Apache License 2.0 | 4 votes |
def batch_predict(dataset, model_dir, output_csv, output_bq_table, pipeline_option): """Batch predict running in cloud.""" import apache_beam as beam import google.datalab.utils from . import _predictor if output_csv is None and output_bq_table is None: raise ValueError('output_csv and output_bq_table cannot both be None.') if 'temp_location' not in pipeline_option: raise ValueError('"temp_location" is not set in cloud.') job_name = ('batch-predict-image-classification-' + datetime.datetime.now().strftime('%y%m%d-%H%M%S')) staging_package_url = _util.repackage_to_staging(pipeline_option['temp_location']) tmpdir = tempfile.mkdtemp() # suppress DataFlow warnings about wheel package as extra package. original_level = logging.getLogger().getEffectiveLevel() logging.getLogger().setLevel(logging.ERROR) try: # Workaround for DataFlow 2.0, which doesn't work well with extra packages in GCS. # Remove when the issue is fixed and new version of DataFlow is included in Datalab. extra_packages = [staging_package_url, _TF_GS_URL, _PROTOBUF_GS_URL] local_packages = [os.path.join(tmpdir, os.path.basename(p)) for p in extra_packages] for source, dest in zip(extra_packages, local_packages): file_io.copy(source, dest, overwrite=True) options = { 'staging_location': os.path.join(pipeline_option['temp_location'], 'staging'), 'job_name': job_name, 'project': _util.default_project(), 'extra_packages': local_packages, 'teardown_policy': 'TEARDOWN_ALWAYS', 'no_save_main_session': True } options.update(pipeline_option) opts = beam.pipeline.PipelineOptions(flags=[], **options) p = beam.Pipeline('DataflowRunner', options=opts) _predictor.configure_pipeline(p, dataset, model_dir, output_csv, output_bq_table) job_results = p.run() finally: shutil.rmtree(tmpdir) logging.getLogger().setLevel(original_level) if (_util.is_in_IPython()): import IPython dataflow_url = ('https://console.developers.google.com/dataflow?project=%s' % _util.default_project()) html = 'Job "%s" submitted.' % job_name html += ('<p>Click <a href="%s" target="_blank">here</a> to track batch prediction job. <br/>' % dataflow_url) IPython.display.display_html(html, raw=True) return google.datalab.utils.DataflowJob(job_results)
Example #12
Source File: visualize.py From pipelines with Apache License 2.0 | 4 votes |
def datahtml( bucket_name, commit_sha, train_file_path ): import json import seaborn as sns import matplotlib.pyplot as plt import os image_path = os.path.join(bucket_name, commit_sha, 'visualization.png') image_url = os.path.join('https://storage.googleapis.com', bucket_name.lstrip('gs://'), commit_sha, 'visualization.png') html_path = os.path.join(bucket_name, 'kaggle.html') # ouptut visualization to a file import pandas as pd df_train = pd.read_csv(train_file_path) sns.set() cols = ['SalePrice', 'OverallQual', 'GrLivArea', 'GarageCars', 'TotalBsmtSF', 'FullBath', 'YearBuilt'] sns.pairplot(df_train[cols], size = 3) plt.savefig('visualization.png') from tensorflow.python.lib.io import file_io file_io.copy('visualization.png', image_path) rendered_template = """ <html> <head> <title>correlation image</title> </head> <body> <img src={}> </body> </html>""".format(image_url) file_io.write_string_to_file(html_path, rendered_template) metadata = { 'outputs' : [{ 'type': 'web-app', 'storage': 'gcs', 'source': html_path, }] } with file_io.FileIO('/mlpipeline-ui-metadata.json', 'w') as f: json.dump(metadata, f)