Python tensorflow.python.lib.io.file_io.get_matching_files() Examples
The following are 30
code examples of tensorflow.python.lib.io.file_io.get_matching_files().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
tensorflow.python.lib.io.file_io
, or try the search function
.
Example #1
Source File: saver.py From keras-lambda with MIT License | 6 votes |
def checkpoint_exists(checkpoint_prefix): """Checks whether a V1 or V2 checkpoint exists with the specified prefix. This is the recommended way to check if a checkpoint exists, since it takes into account the naming difference between V1 and V2 formats. Args: checkpoint_prefix: the prefix of a V1 or V2 checkpoint, with V2 taking priority. Typically the result of `Saver.save()` or that of `tf.train.latest_checkpoint()`, regardless of sharded/non-sharded or V1/V2. Returns: A bool, true iff a checkpoint referred to by `checkpoint_prefix` exists. """ pathname = _prefix_to_checkpoint_path(checkpoint_prefix, saver_pb2.SaverDef.V2) if file_io.get_matching_files(pathname): return True elif file_io.get_matching_files(checkpoint_prefix): return True else: return False
Example #2
Source File: saver.py From deep_image_model with Apache License 2.0 | 6 votes |
def checkpoint_exists(checkpoint_prefix): """Checks whether a V1 or V2 checkpoint exists with the specified prefix. This is the recommended way to check if a checkpoint exists, since it takes into account the naming difference between V1 and V2 formats. Args: checkpoint_prefix: the prefix of a V1 or V2 checkpoint, with V2 taking priority. Typically the result of `Saver.save()` or that of `tf.train.latest_checkpoint()`, regardless of sharded/non-sharded or V1/V2. Returns: A bool, true iff a checkpoint referred to by `checkpoint_prefix` exists. """ pathname = _prefix_to_checkpoint_path(checkpoint_prefix, saver_pb2.SaverDef.V2) if file_io.get_matching_files(pathname): return True elif file_io.get_matching_files(checkpoint_prefix): return True else: return False
Example #3
Source File: saver.py From Serverless-Deep-Learning-with-TensorFlow-and-AWS-Lambda with MIT License | 6 votes |
def checkpoint_exists(checkpoint_prefix): """Checks whether a V1 or V2 checkpoint exists with the specified prefix. This is the recommended way to check if a checkpoint exists, since it takes into account the naming difference between V1 and V2 formats. Args: checkpoint_prefix: the prefix of a V1 or V2 checkpoint, with V2 taking priority. Typically the result of `Saver.save()` or that of `tf.train.latest_checkpoint()`, regardless of sharded/non-sharded or V1/V2. Returns: A bool, true iff a checkpoint referred to by `checkpoint_prefix` exists. """ pathname = _prefix_to_checkpoint_path(checkpoint_prefix, saver_pb2.SaverDef.V2) if file_io.get_matching_files(pathname): return True elif file_io.get_matching_files(checkpoint_prefix): return True else: return False
Example #4
Source File: evaluation_utils.py From training_results_v0.5 with Apache License 2.0 | 6 votes |
def get_all_checkpoints(output_dir): """docstring.""" ckpt = cm.get_checkpoint_state(output_dir, None) res = [] if not ckpt: return None for path in ckpt.all_model_checkpoint_paths: # Look for either a V2 path or a V1 path, with priority for V2. v2_path = cm._prefix_to_checkpoint_path(path, saver_pb2.SaverDef.V2) v1_path = cm._prefix_to_checkpoint_path(path, saver_pb2.SaverDef.V1) if file_io.get_matching_files(v2_path) or file_io.get_matching_files( v1_path): res.append(path) else: tf.logging.error("Couldn't match files for checkpoint %s", path) return res
Example #5
Source File: saver.py From lambda-packs with MIT License | 6 votes |
def checkpoint_exists(checkpoint_prefix): """Checks whether a V1 or V2 checkpoint exists with the specified prefix. This is the recommended way to check if a checkpoint exists, since it takes into account the naming difference between V1 and V2 formats. Args: checkpoint_prefix: the prefix of a V1 or V2 checkpoint, with V2 taking priority. Typically the result of `Saver.save()` or that of `tf.train.latest_checkpoint()`, regardless of sharded/non-sharded or V1/V2. Returns: A bool, true iff a checkpoint referred to by `checkpoint_prefix` exists. """ pathname = _prefix_to_checkpoint_path(checkpoint_prefix, saver_pb2.SaverDef.V2) if file_io.get_matching_files(pathname): return True elif file_io.get_matching_files(checkpoint_prefix): return True else: return False
Example #6
Source File: evaluation_utils.py From training_results_v0.5 with Apache License 2.0 | 6 votes |
def get_all_checkpoints(output_dir): """docstring.""" ckpt = cm.get_checkpoint_state(output_dir, None) res = [] if not ckpt: return None for path in ckpt.all_model_checkpoint_paths: # Look for either a V2 path or a V1 path, with priority for V2. v2_path = cm._prefix_to_checkpoint_path(path, saver_pb2.SaverDef.V2) v1_path = cm._prefix_to_checkpoint_path(path, saver_pb2.SaverDef.V1) if file_io.get_matching_files(v2_path) or file_io.get_matching_files( v1_path): res.append(path) else: tf.logging.error("Couldn't match files for checkpoint %s", path) return res
Example #7
Source File: evaluation_utils.py From training_results_v0.5 with Apache License 2.0 | 6 votes |
def get_all_checkpoints(output_dir): """docstring.""" ckpt = cm.get_checkpoint_state(output_dir, None) res = [] if not ckpt: return None for path in ckpt.all_model_checkpoint_paths: # Look for either a V2 path or a V1 path, with priority for V2. v2_path = cm._prefix_to_checkpoint_path(path, saver_pb2.SaverDef.V2) v1_path = cm._prefix_to_checkpoint_path(path, saver_pb2.SaverDef.V1) if file_io.get_matching_files(v2_path) or file_io.get_matching_files( v1_path): res.append(path) else: tf.logging.error("Couldn't match files for checkpoint %s", path) return res
Example #8
Source File: glyph_patches.py From moonlight with Apache License 2.0 | 6 votes |
def read_patch_dimensions(): """Reads the dimensions of the input patches from disk. Parses the first example in the training set, which must have "height" and "width" features. Returns: Tuple of (height, width) read from disk, using the glob passed to --train_input_patches. """ for filename in file_io.get_matching_files(FLAGS.train_input_patches): # If one matching file is empty, go on to the next file. for record in tf_record.tf_record_iterator(filename): example = tf.train.Example.FromString(record) # Convert long (int64) to int, necessary for use in feature columns in # Python 2. patch_height = int(example.features.feature['height'].int64_list.value[0]) patch_width = int(example.features.feature['width'].int64_list.value[0]) return patch_height, patch_width
Example #9
Source File: saver.py From auto-alt-text-lambda-api with MIT License | 6 votes |
def checkpoint_exists(checkpoint_prefix): """Checks whether a V1 or V2 checkpoint exists with the specified prefix. This is the recommended way to check if a checkpoint exists, since it takes into account the naming difference between V1 and V2 formats. Args: checkpoint_prefix: the prefix of a V1 or V2 checkpoint, with V2 taking priority. Typically the result of `Saver.save()` or that of `tf.train.latest_checkpoint()`, regardless of sharded/non-sharded or V1/V2. Returns: A bool, true iff a checkpoint referred to by `checkpoint_prefix` exists. """ pathname = _prefix_to_checkpoint_path(checkpoint_prefix, saver_pb2.SaverDef.V2) if file_io.get_matching_files(pathname): return True elif file_io.get_matching_files(checkpoint_prefix): return True else: return False
Example #10
Source File: staffline_patches_kmeans_pipeline.py From moonlight with Apache License 2.0 | 5 votes |
def main(_): tf.logging.info('Building the pipeline...') records_dir = tempfile.mkdtemp(prefix='staffline_kmeans') try: patch_file_prefix = os.path.join(records_dir, 'patches') with pipeline_flags.create_pipeline() as pipeline: filenames = file_io.get_matching_files(FLAGS.music_pattern) assert filenames, 'Must have matched some filenames' if 0 < FLAGS.num_pages < len(filenames): filenames = random.sample(filenames, FLAGS.num_pages) filenames = pipeline | beam.transforms.Create(filenames) patches = filenames | beam.ParDo( staffline_patches_dofn.StafflinePatchesDoFn( patch_height=FLAGS.patch_height, patch_width=FLAGS.patch_width, num_stafflines=FLAGS.num_stafflines, timeout_ms=FLAGS.timeout_ms, max_patches_per_page=FLAGS.max_patches_per_page)) if FLAGS.num_outputs: patches |= combiners.Sample.FixedSizeGlobally(FLAGS.num_outputs) patches |= beam.io.WriteToTFRecord( patch_file_prefix, beam.coders.ProtoCoder(tf.train.Example)) tf.logging.info('Running the pipeline...') tf.logging.info('Running k-means...') patch_files = file_io.get_matching_files(patch_file_prefix + '*') clusters = train_kmeans(patch_files, FLAGS.kmeans_num_clusters, FLAGS.kmeans_batch_size, FLAGS.kmeans_num_steps) tf.logging.info('Writing the centroids...') with tf_record.TFRecordWriter(FLAGS.output_path) as writer: for cluster in clusters: example = tf.train.Example() example.features.feature['features'].float_list.value.extend(cluster) example.features.feature['height'].int64_list.value.append( FLAGS.patch_height) example.features.feature['width'].int64_list.value.append( FLAGS.patch_width) writer.write(example.SerializeToString()) tf.logging.info('Done!') finally: shutil.rmtree(records_dir)
Example #11
Source File: _local_predict.py From pydatalab with Apache License 2.0 | 5 votes |
def local_batch_predict(model_dir, csv_file_pattern, output_dir, output_format, batch_size=100): """ Batch Predict with a specified model. It does batch prediction, saves results to output files and also creates an output schema file. The output file names are input file names prepended by 'predict_results_'. Args: model_dir: The model directory containing a SavedModel (usually saved_model.pb). csv_file_pattern: a pattern of csv files as batch prediction source. output_dir: the path of the output directory. output_format: csv or json. batch_size: Larger batch_size improves performance but may cause more memory usage. """ file_io.recursive_create_dir(output_dir) csv_files = file_io.get_matching_files(csv_file_pattern) if len(csv_files) == 0: raise ValueError('No files found given ' + csv_file_pattern) with tf.Graph().as_default(), tf.Session() as sess: input_alias_map, output_alias_map = _tf_load_model(sess, model_dir) csv_tensor_name = list(input_alias_map.values())[0] output_schema = _get_output_schema(sess, output_alias_map) for csv_file in csv_files: output_file = os.path.join( output_dir, 'predict_results_' + os.path.splitext(os.path.basename(csv_file))[0] + '.' + output_format) with file_io.FileIO(output_file, 'w') as f: prediction_source = _batch_csv_reader(csv_file, batch_size) for batch in prediction_source: batch = [l.rstrip() for l in batch if l] predict_results = sess.run(fetches=output_alias_map, feed_dict={csv_tensor_name: batch}) formatted_results = _format_results(output_format, output_schema, predict_results) f.write('\n'.join(formatted_results) + '\n') file_io.write_string_to_file(os.path.join(output_dir, 'predict_results_schema.json'), json.dumps(output_schema, indent=2))
Example #12
Source File: saver.py From deep_image_model with Apache License 2.0 | 5 votes |
def _delete_file_if_exists(self, filespec): for pathname in file_io.get_matching_files(filespec): file_io.delete_file(pathname)
Example #13
Source File: saver.py From deep_image_model with Apache License 2.0 | 5 votes |
def latest_checkpoint(checkpoint_dir, latest_filename=None): """Finds the filename of latest saved checkpoint file. Args: checkpoint_dir: Directory where the variables were saved. latest_filename: Optional name for the protocol buffer file that contains the list of most recent checkpoint filenames. See the corresponding argument to `Saver.save()`. Returns: The full path to the latest checkpoint or `None` if no checkpoint was found. """ # Pick the latest checkpoint based on checkpoint state. ckpt = get_checkpoint_state(checkpoint_dir, latest_filename) if ckpt and ckpt.model_checkpoint_path: # Look for either a V2 path or a V1 path, with priority for V2. v2_path = _prefix_to_checkpoint_path(ckpt.model_checkpoint_path, saver_pb2.SaverDef.V2) v1_path = _prefix_to_checkpoint_path(ckpt.model_checkpoint_path, saver_pb2.SaverDef.V1) if file_io.get_matching_files(v2_path) or file_io.get_matching_files( v1_path): return ckpt.model_checkpoint_path else: logging.error("Couldn't match files for checkpoint %s", ckpt.model_checkpoint_path) return None
Example #14
Source File: saver.py From deep_image_model with Apache License 2.0 | 5 votes |
def get_checkpoint_mtimes(checkpoint_prefixes): """Returns the mtimes (modification timestamps) of the checkpoints. Globs for the checkpoints pointed to by `checkpoint_prefixes`. If the files exist, collect their mtime. Both V2 and V1 checkpoints are considered, in that priority. This is the recommended way to get the mtimes, since it takes into account the naming difference between V1 and V2 formats. Args: checkpoint_prefixes: a list of checkpoint paths, typically the results of `Saver.save()` or those of `tf.train.latest_checkpoint()`, regardless of sharded/non-sharded or V1/V2. Returns: A list of mtimes (in microseconds) of the found checkpoints. """ mtimes = [] def match_maybe_append(pathname): fnames = file_io.get_matching_files(pathname) if fnames: mtimes.append(file_io.stat(fnames[0]).mtime_nsec / 1e9) return True return False for checkpoint_prefix in checkpoint_prefixes: # Tries V2's metadata file first. pathname = _prefix_to_checkpoint_path(checkpoint_prefix, saver_pb2.SaverDef.V2) if match_maybe_append(pathname): continue # Otherwise, tries V1, where the prefix is the complete pathname. match_maybe_append(checkpoint_prefix) return mtimes
Example #15
Source File: gcs_smoke.py From deep_image_model with Apache License 2.0 | 5 votes |
def create_object_test(): """Verifies file_io's object manipulation methods .""" starttime = int(round(time.time() * 1000)) dir_name = "%s/tf_gcs_test_%s" % (FLAGS.gcs_bucket_url, starttime) print("Creating dir %s." % dir_name) file_io.create_dir(dir_name) # Create a file in this directory. file_name = "%s/test_file.txt" % dir_name print("Creating file %s." % file_name) file_io.write_string_to_file(file_name, "test file creation.") list_files_pattern = "%s/test_file*.txt" % dir_name print("Getting files matching pattern %s." % list_files_pattern) files_list = file_io.get_matching_files(list_files_pattern) print(files_list) assert len(files_list) == 1 assert files_list[0] == file_name # Cleanup test files. print("Deleting file %s." % file_name) file_io.delete_file(file_name) # Delete directory. print("Deleting directory %s." % dir_name) file_io.delete_recursively(dir_name)
Example #16
Source File: omr.py From moonlight with Apache License 2.0 | 5 votes |
def main(argv): if FLAGS.output_type not in VALID_OUTPUT_TYPES: raise ValueError('output_type "%s" not in allowed types: %s' % (FLAGS.output_type, VALID_OUTPUT_TYPES)) # Exclude argv[0], which is the current binary. patterns = argv[1:] if not patterns: raise ValueError('PNG file glob(s) must be specified') input_paths = [] for pattern in patterns: pattern_paths = file_io.get_matching_files(pattern) if not pattern_paths: raise ValueError('Pattern "%s" failed to match any files' % pattern) input_paths.extend(pattern_paths) start = time.time() output = run( input_paths, FLAGS.glyphs_saved_model, output_notesequence=FLAGS.output_type == 'NoteSequence') end = time.time() sys.stderr.write('OMR elapsed time: %.2f\n' % (end - start)) if FLAGS.output_type == 'MusicXML': output_bytes = conversions.score_to_musicxml(output) else: if FLAGS.text_format: output_bytes = text_format.MessageToString(output).encode('utf-8') else: output_bytes = output.SerializeToString() file_io.write_string_to_file(FLAGS.output, output_bytes)
Example #17
Source File: test_cloud_workflow.py From pydatalab with Apache License 2.0 | 5 votes |
def _run_transform(self): """Runs DataFlow for makint tf.example files. Only the train file uses DataFlow, the eval file runs beam locally to save time. """ cloud = True extra_args = [] if cloud: extra_args = ['--cloud', '--job-name=test-mltoolbox-df-%s' % uuid.uuid4().hex, '--project-id=%s' % self._get_default_project_id(), '--num-workers=3'] cmd = ['python %s' % os.path.join(CODE_PATH, 'transform.py'), '--csv=' + self._csv_train_filename, '--analysis=' + self._analysis_output, '--prefix=features_train', '--output=' + self._transform_output, '--shuffle'] + extra_args self._logger.debug('Running subprocess: %s \n\n' % ' '.join(cmd)) subprocess.check_call(' '.join(cmd), shell=True) # Don't wate time running a 2nd DF job, run it locally. cmd = ['python %s' % os.path.join(CODE_PATH, 'transform.py'), '--csv=' + self._csv_eval_filename, '--analysis=' + self._analysis_output, '--prefix=features_eval', '--output=' + self._transform_output] self._logger.debug('Running subprocess: %s \n\n' % ' '.join(cmd)) subprocess.check_call(' '.join(cmd), shell=True) # Check the files were made train_files = file_io.get_matching_files( os.path.join(self._transform_output, 'features_train*')) eval_files = file_io.get_matching_files( os.path.join(self._transform_output, 'features_eval*')) self.assertNotEqual([], train_files) self.assertNotEqual([], eval_files)
Example #18
Source File: tf_graph_utils.py From lmdis-rep with Apache License 2.0 | 5 votes |
def _delete_file_if_exists(self, filespec): for pathname in file_io.get_matching_files(filespec): s = os.stat(pathname)[stat.ST_MODE] if not (s & stat.S_IWUSR): # skip read-only file continue file_io.delete_file(pathname)
Example #19
Source File: eval.py From AttentionCluster with Apache License 2.0 | 5 votes |
def get_latest_checkpoint(): index_files = file_io.get_matching_files(os.path.join(FLAGS.train_dir, 'model.ckpt-*.index')) tf.logging.debug("Looking at {}".format(index_files)) # No files if not index_files: return None # Index file path with the maximum step size. latest_index_file = sorted( [(int(os.path.basename(f).split("-")[-1].split(".")[0]), f) for f in index_files])[-1][1] # Chop off .index suffix and return return latest_index_file[:-6]
Example #20
Source File: saver.py From Serverless-Deep-Learning-with-TensorFlow-and-AWS-Lambda with MIT License | 5 votes |
def _delete_file_if_exists(self, filespec): for pathname in file_io.get_matching_files(filespec): file_io.delete_file(pathname)
Example #21
Source File: saver.py From Serverless-Deep-Learning-with-TensorFlow-and-AWS-Lambda with MIT License | 5 votes |
def latest_checkpoint(checkpoint_dir, latest_filename=None): """Finds the filename of latest saved checkpoint file. Args: checkpoint_dir: Directory where the variables were saved. latest_filename: Optional name for the protocol buffer file that contains the list of most recent checkpoint filenames. See the corresponding argument to `Saver.save()`. Returns: The full path to the latest checkpoint or `None` if no checkpoint was found. """ # Pick the latest checkpoint based on checkpoint state. ckpt = get_checkpoint_state(checkpoint_dir, latest_filename) if ckpt and ckpt.model_checkpoint_path: # Look for either a V2 path or a V1 path, with priority for V2. v2_path = _prefix_to_checkpoint_path(ckpt.model_checkpoint_path, saver_pb2.SaverDef.V2) v1_path = _prefix_to_checkpoint_path(ckpt.model_checkpoint_path, saver_pb2.SaverDef.V1) if file_io.get_matching_files(v2_path) or file_io.get_matching_files( v1_path): return ckpt.model_checkpoint_path else: logging.error("Couldn't match files for checkpoint %s", ckpt.model_checkpoint_path) return None
Example #22
Source File: saver.py From Serverless-Deep-Learning-with-TensorFlow-and-AWS-Lambda with MIT License | 5 votes |
def get_checkpoint_mtimes(checkpoint_prefixes): """Returns the mtimes (modification timestamps) of the checkpoints. Globs for the checkpoints pointed to by `checkpoint_prefixes`. If the files exist, collect their mtime. Both V2 and V1 checkpoints are considered, in that priority. This is the recommended way to get the mtimes, since it takes into account the naming difference between V1 and V2 formats. Args: checkpoint_prefixes: a list of checkpoint paths, typically the results of `Saver.save()` or those of `tf.train.latest_checkpoint()`, regardless of sharded/non-sharded or V1/V2. Returns: A list of mtimes (in microseconds) of the found checkpoints. """ mtimes = [] def match_maybe_append(pathname): fnames = file_io.get_matching_files(pathname) if fnames: mtimes.append(file_io.stat(fnames[0]).mtime_nsec / 1e9) return True return False for checkpoint_prefix in checkpoint_prefixes: # Tries V2's metadata file first. pathname = _prefix_to_checkpoint_path(checkpoint_prefix, saver_pb2.SaverDef.V2) if match_maybe_append(pathname): continue # Otherwise, tries V1, where the prefix is the complete pathname. match_maybe_append(checkpoint_prefix) return mtimes
Example #23
Source File: saver.py From keras-lambda with MIT License | 5 votes |
def _delete_file_if_exists(self, filespec): for pathname in file_io.get_matching_files(filespec): file_io.delete_file(pathname)
Example #24
Source File: saver.py From keras-lambda with MIT License | 5 votes |
def latest_checkpoint(checkpoint_dir, latest_filename=None): """Finds the filename of latest saved checkpoint file. Args: checkpoint_dir: Directory where the variables were saved. latest_filename: Optional name for the protocol buffer file that contains the list of most recent checkpoint filenames. See the corresponding argument to `Saver.save()`. Returns: The full path to the latest checkpoint or `None` if no checkpoint was found. """ # Pick the latest checkpoint based on checkpoint state. ckpt = get_checkpoint_state(checkpoint_dir, latest_filename) if ckpt and ckpt.model_checkpoint_path: # Look for either a V2 path or a V1 path, with priority for V2. v2_path = _prefix_to_checkpoint_path(ckpt.model_checkpoint_path, saver_pb2.SaverDef.V2) v1_path = _prefix_to_checkpoint_path(ckpt.model_checkpoint_path, saver_pb2.SaverDef.V1) if file_io.get_matching_files(v2_path) or file_io.get_matching_files( v1_path): return ckpt.model_checkpoint_path else: logging.error("Couldn't match files for checkpoint %s", ckpt.model_checkpoint_path) return None
Example #25
Source File: saver.py From keras-lambda with MIT License | 5 votes |
def get_checkpoint_mtimes(checkpoint_prefixes): """Returns the mtimes (modification timestamps) of the checkpoints. Globs for the checkpoints pointed to by `checkpoint_prefixes`. If the files exist, collect their mtime. Both V2 and V1 checkpoints are considered, in that priority. This is the recommended way to get the mtimes, since it takes into account the naming difference between V1 and V2 formats. Args: checkpoint_prefixes: a list of checkpoint paths, typically the results of `Saver.save()` or those of `tf.train.latest_checkpoint()`, regardless of sharded/non-sharded or V1/V2. Returns: A list of mtimes (in microseconds) of the found checkpoints. """ mtimes = [] def match_maybe_append(pathname): fnames = file_io.get_matching_files(pathname) if fnames: mtimes.append(file_io.stat(fnames[0]).mtime_nsec / 1e9) return True return False for checkpoint_prefix in checkpoint_prefixes: # Tries V2's metadata file first. pathname = _prefix_to_checkpoint_path(checkpoint_prefix, saver_pb2.SaverDef.V2) if match_maybe_append(pathname): continue # Otherwise, tries V1, where the prefix is the complete pathname. match_maybe_append(checkpoint_prefix) return mtimes
Example #26
Source File: tfr_read.py From spotify-tensorflow with Apache License 2.0 | 5 votes |
def list_tf_records(paths, default_schema): for p in paths: files = [f for f in file_io.get_matching_files(p) if f.endswith(".tfrecords")] if len(files) == 0: raise Exception("Couldn't find any .tfrecords file in path or glob [{}]".format(p)) for f in files: yield f, resolve_schema(os.path.dirname(f), default_schema)
Example #27
Source File: task.py From cloudml-edge-automation with Apache License 2.0 | 5 votes |
def copy_data_to_tmp(input_files): """Copies data to /tmp/ and returns glob matching the files.""" files = [] for e in input_files: for path in e.split(','): files.extend(file_io.get_matching_files(path)) for path in files: if not path.startswith('gs://'): return input_files tmp_path = os.path.join('/tmp/', str(uuid.uuid4())) os.makedirs(tmp_path) subprocess.check_call(['gsutil', '-m', '-q', 'cp', '-r'] + files + [tmp_path]) return [os.path.join(tmp_path, '*')]
Example #28
Source File: task.py From cloudml-samples with Apache License 2.0 | 5 votes |
def copy_data_to_tmp(input_files): """Copies data to /tmp/ and returns glob matching the files.""" files = [] for e in input_files: for path in e.split(','): files.extend(file_io.get_matching_files(path)) for path in files: if not path.startswith('gs://'): return input_files tmp_path = os.path.join('/tmp/', str(uuid.uuid4())) os.makedirs(tmp_path) subprocess.check_call(['gsutil', '-m', '-q', 'cp', '-r'] + files + [tmp_path]) return [os.path.join(tmp_path, '*')]
Example #29
Source File: task.py From cloudml-samples with Apache License 2.0 | 5 votes |
def copy_data_to_tmp(input_files): """Copies data to /tmp/ and returns glob matching the files.""" files = [] for e in input_files: for path in e.split(','): files.extend(file_io.get_matching_files(path)) for path in files: if not path.startswith('gs://'): return input_files tmp_path = os.path.join('/tmp/', str(uuid.uuid4())) os.makedirs(tmp_path) subprocess.check_call(['gsutil', '-m', '-q', 'cp', '-r'] + files + [tmp_path]) return [os.path.join(tmp_path, '*')]
Example #30
Source File: saver.py From lambda-packs with MIT License | 5 votes |
def _delete_file_if_exists(self, filespec): for pathname in file_io.get_matching_files(filespec): file_io.delete_file(pathname)