Python tarfile.open() Examples
The following are 30
code examples of tarfile.open().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
tarfile
, or try the search function
.
Example #1
Source File: device.py From Paradrop with Apache License 2.0 | 10 votes |
def create(ctx): """ Install a chute from the working directory. """ url = "{}/chutes/".format(ctx.obj['base_url']) headers = {'Content-Type': 'application/x-tar'} if not os.path.exists("paradrop.yaml"): raise Exception("No paradrop.yaml file found in working directory.") with tempfile.TemporaryFile() as temp: tar = tarfile.open(fileobj=temp, mode="w") for dirName, subdirList, fileList in os.walk('.'): for fname in fileList: path = os.path.join(dirName, fname) arcname = os.path.normpath(path) tar.add(path, arcname=arcname) tar.close() temp.seek(0) res = router_request("POST", url, headers=headers, data=temp) data = res.json() ctx.invoke(watch, change_id=data['change_id'])
Example #2
Source File: notify.py From wechat-alfred-workflow with MIT License | 7 votes |
def convert_image(inpath, outpath, size): """Convert an image file using ``sips``. Args: inpath (str): Path of source file. outpath (str): Path to destination file. size (int): Width and height of destination image in pixels. Raises: RuntimeError: Raised if ``sips`` exits with non-zero status. """ cmd = [ b'sips', b'-z', str(size), str(size), inpath, b'--out', outpath] # log().debug(cmd) with open(os.devnull, 'w') as pipe: retcode = subprocess.call(cmd, stdout=pipe, stderr=subprocess.STDOUT) if retcode != 0: raise RuntimeError('sips exited with %d' % retcode)
Example #3
Source File: dataset_utils.py From DOTA_models with Apache License 2.0 | 6 votes |
def download_and_uncompress_tarball(tarball_url, dataset_dir): """Downloads the `tarball_url` and uncompresses it locally. Args: tarball_url: The URL of a tarball file. dataset_dir: The directory where the temporary files are stored. """ filename = tarball_url.split('/')[-1] filepath = os.path.join(dataset_dir, filename) def _progress(count, block_size, total_size): sys.stdout.write('\r>> Downloading %s %.1f%%' % ( filename, float(count * block_size) / float(total_size) * 100.0)) sys.stdout.flush() filepath, _ = urllib.request.urlretrieve(tarball_url, filepath, _progress) print() statinfo = os.stat(filepath) print('Successfully downloaded', filename, statinfo.st_size, 'bytes.') tarfile.open(filepath, 'r:gz').extractall(dataset_dir)
Example #4
Source File: persistence.py From multibootusb with GNU General Public License v2.0 | 6 votes |
def detect_missing_tools(distro): tools_dir = os.path.join('data', 'tools') if platform.system() == 'Windows': _7zip_exe = gen.resource_path( os.path.join(tools_dir, '7zip', '7z.exe')) e2fsck_exe = gen.resource_path(os.path.join(tools_dir, 'cygwin', 'e2fsck.exe')) resize2fs_exe = gen.resource_path(os.path.join(tools_dir, 'cygwin', 'resize2fs.exe')) else: _7zip_exe = '7z' e2fsck_exe = 'e2fsck' resize2fs_exe = 'resize2fs' if distro not in creator_dict or \ creator_dict[distro][0] is not create_persistence_using_resize2fs: return None try: with open(os.devnull) as devnull: for tool in [e2fsck_exe, resize2fs_exe]: p = subprocess.Popen([tool], stdout=devnull, stderr=devnull) p.communicate() except FileNotFoundError: # Windows return "'%s.exe' is not installed or not available for use." % tool except OSError: # Linux return "'%s' is not installed or not available for use." % tool return None
Example #5
Source File: node.py From Paradrop with Apache License 2.0 | 6 votes |
def update_chute(ctx, directory): """ Install a new version of the chute from the working directory. Install the files in the current directory as a chute on the node. The directory must contain a paradrop.yaml file. The entire directory will be copied to the node for installation. """ os.chdir(directory) if not os.path.exists("paradrop.yaml"): raise Exception("No paradrop.yaml file found in chute directory.") with open('paradrop.yaml', 'r') as source: config = yaml.safe_load(source) if 'name' not in config: click.echo('Chute name is not defined in paradrop.yaml.') return client = ctx.obj['client'] with tempfile.TemporaryFile() as temp: tar = tarfile.open(fileobj=temp, mode="w") for dirName, subdirList, fileList in os.walk("."): for fname in fileList: path = os.path.join(dirName, fname) arcname = os.path.normpath(path) tar.add(path, arcname=arcname) tar.close() temp.seek(0) result = client.install_tar(temp, name=config['name']) ctx.invoke(watch_change_logs, change_id=result['change_id'])
Example #6
Source File: archive.py From CAMISIM with Apache License 2.0 | 6 votes |
def __init__(self, default_compression="gz", logfile=None, verbose=True): """ Constructor @attention: @param default_compression: default compression used for files @type default_compression: str | unicode @param logfile: file handler or file path to a log file @type logfile: file | io.FileIO | StringIO.StringIO | basestring @param verbose: Not verbose means that only warnings and errors will be past to stream @type verbose: bool @return: None @rtype: None """ assert logfile is None or isinstance(logfile, basestring) or self.is_stream(logfile) assert isinstance(default_compression, basestring), "separator must be string" assert isinstance(verbose, bool), "verbose must be true or false" assert default_compression.lower() in self._open, "Unknown compression: '{}'".format(default_compression) super(Archive, self).__init__(label="Archive", default_compression=default_compression, logfile=logfile, verbose=verbose) self._open['tar'] = tarfile.open self._default_compression = default_compression
Example #7
Source File: style_transfer.py From fine-lm with MIT License | 6 votes |
def generate_samples(self, data_dir, tmp_dir, dataset_split): dataset = self.dataset_url(dataset_split) tag = "train" if dataset_split == problem.DatasetSplit.TRAIN else "dev" url = dataset[0][0] compressed_filename = os.path.basename(url) compressed_filepath = os.path.join(tmp_dir, compressed_filename) generator_utils.maybe_download(tmp_dir, compressed_filename, url) mode = "r:gz" if compressed_filepath.endswith("gz") else "r" with tarfile.open(compressed_filepath, mode) as corpus_tar: corpus_tar.extractall(tmp_dir) if self.vocab_type == text_problems.VocabType.SUBWORD: generator_utils.get_or_generate_vocab( data_dir, tmp_dir, self.vocab_filename, self.approx_vocab_size, self.vocab_data_files()) source_file = os.path.join(tmp_dir, tag + ".modern") target_file = os.path.join(tmp_dir, tag + ".original") return text_problems.text2text_txt_iterator(source_file, target_file)
Example #8
Source File: cnn_dailymail.py From fine-lm with MIT License | 6 votes |
def write_raw_text_to_files(all_files, urls_path, tmp_dir, is_training): """Write text to files.""" def write_to_file(all_files, urls_path, tmp_dir, filename): with io.open(os.path.join(tmp_dir, filename + ".source"), "w") as fstory: with io.open(os.path.join(tmp_dir, filename + ".target"), "w") as fsummary: for example in example_generator(all_files, urls_path, sum_token=True): story, summary = _story_summary_split(example) fstory.write(story + "\n") fsummary.write(summary + "\n") filename = "cnndm.train" if is_training else "cnndm.dev" tf.logging.info("Writing %s" % filename) write_to_file(all_files, urls_path, tmp_dir, filename) if not is_training: test_urls_path = generator_utils.maybe_download(tmp_dir, "all_test.txt", _TEST_URLS) filename = "cnndm.test" tf.logging.info("Writing %s" % filename) write_to_file(all_files, test_urls_path, tmp_dir, filename)
Example #9
Source File: babi_qa.py From fine-lm with MIT License | 6 votes |
def _prepare_babi_data(tmp_dir, data_dir): """Downloads and extracts the dataset. Args: tmp_dir: temp directory to download and extract the dataset data_dir: The base directory where data and vocab files are stored. Returns: tmp_dir: temp directory containing the raw data. """ if not tf.gfile.Exists(data_dir): tf.gfile.MakeDirs(data_dir) # TODO(dehghani@): find a solution for blocking user-agent (download) file_path = generator_utils.maybe_download(tmp_dir, _TAR, _URL) tar = tarfile.open(file_path) tar.extractall(tmp_dir) tar.close() return tmp_dir
Example #10
Source File: bair_robot_pushing.py From fine-lm with MIT License | 6 votes |
def generate_samples(self, data_dir, tmp_dir, dataset_split): path = generator_utils.maybe_download( tmp_dir, os.path.basename(DATA_URL), DATA_URL) tar = tarfile.open(path) tar.extractall(tmp_dir) tar.close() if dataset_split == problem.DatasetSplit.TRAIN: base_dir = os.path.join(tmp_dir, "softmotion30_44k/train/*") else: base_dir = os.path.join(tmp_dir, "softmotion30_44k/test/*") filenames = tf.gfile.Glob(base_dir) for frame_number, frame, state, action in self.parse_frames(filenames): yield { "frame_number": [frame_number], "frame": frame, "state": state, "action": action, }
Example #11
Source File: common_voice.py From fine-lm with MIT License | 6 votes |
def _collect_data(directory): """Traverses directory collecting input and target files. Args: directory: base path to extracted audio and transcripts. Returns: list of (media_base, media_filepath, label) tuples """ # Returns: data_files = [] transcripts = [ filename for filename in os.listdir(directory) if filename.endswith(".csv") ] for transcript in transcripts: transcript_path = os.path.join(directory, transcript) with open(transcript_path, "r") as transcript_file: transcript_reader = csv.reader(transcript_file) _ = transcript_reader.next() # Skip headers. for transcript_line in transcript_reader: media_name, label = transcript_line[0:2] filename = os.path.join(directory, media_name) data_files.append((media_name, filename, label)) return data_files
Example #12
Source File: librispeech.py From fine-lm with MIT License | 6 votes |
def _collect_data(directory, input_ext, transcription_ext): """Traverses directory collecting input and target files.""" # Directory from string to tuple pair of strings # key: the filepath to a datafile including the datafile's basename. Example, # if the datafile was "/path/to/datafile.wav" then the key would be # "/path/to/datafile" # value: a pair of strings (media_filepath, label) data_files = dict() for root, _, filenames in os.walk(directory): transcripts = [filename for filename in filenames if transcription_ext in filename] for transcript in transcripts: transcript_path = os.path.join(root, transcript) with open(transcript_path, "r") as transcript_file: for transcript_line in transcript_file: line_contents = transcript_line.strip().split(" ", 1) media_base, label = line_contents key = os.path.join(root, media_base) assert key not in data_files media_name = "%s.%s"%(media_base, input_ext) media_path = os.path.join(root, media_name) data_files[key] = (media_base, media_path, label) return data_files
Example #13
Source File: download.py From nmp_qc with MIT License | 6 votes |
def download_figshare(file_name, file_ext, dir_path='./', change_name = None): prepare_data_dir(dir_path) url = 'https://ndownloader.figshare.com/files/' + file_name wget.download(url, out=dir_path) file_path = os.path.join(dir_path, file_name) if file_ext == '.zip': zip_ref = zipfile.ZipFile(file_path,'r') if change_name is not None: dir_path = os.path.join(dir_path, change_name) zip_ref.extractall(dir_path) zip_ref.close() os.remove(file_path) elif file_ext == '.tar.bz2': tar_ref = tarfile.open(file_path,'r:bz2') if change_name is not None: dir_path = os.path.join(dir_path, change_name) tar_ref.extractall(dir_path) tar_ref.close() os.remove(file_path) elif change_name is not None: os.rename(file_path, os.path.join(dir_path, change_name)) # Download QM9 dataset
Example #14
Source File: node.py From Paradrop with Apache License 2.0 | 6 votes |
def import_ssh_key(ctx, path, user): """ Add an authorized key from a public key file. PATH must be a path to a public key file, which corresponds to a private key that SSH can use for authentication. Typically, ssh-keygen will place the public key in "~/.ssh/id_rsa.pub". """ client = ctx.obj['client'] with open(path, 'r') as source: key_string = source.read().strip() match = re.search("-----BEGIN \w+ PRIVATE KEY-----", key_string) if match is not None: print("The path ({}) contains a private key.".format(path)) print("Please provide the path to your public key.") return None result = client.add_ssh_key(key_string, user=user) if result is not None: print("Added public key from {}".format(path)) return result
Example #15
Source File: device.py From Paradrop with Apache License 2.0 | 6 votes |
def reconfigure(ctx): """ Reconfigure the chute without rebuilding. """ url = ctx.obj['chute_url'] + "/config" if not os.path.exists("paradrop.yaml"): raise Exception("No paradrop.yaml file found in working directory.") with open("paradrop.yaml", "r") as source: data = yaml.safe_load(source) config = data.get('config', {}) res = router_request("PUT", url, json=config) data = res.json() ctx.invoke(watch, change_id=data['change_id'])
Example #16
Source File: rooter.py From ToonRooter with MIT License | 6 votes |
def write_payload(self): port = self._port tar_path = self.create_payload_tar() log.debug(port.read_until("/ # ")) port.write("base64 -d | tar zxf -\n") port.flush() #(tarr, tarw) = os.pipe() #tar = tarfile.open(mode='w|gz', fileobj=tarw) #tar.add("payload/patch_toon.sh") log.info("Transferring payload") with open(tar_path, 'r') as f: base64.encode(f, port) os.remove(tar_path) port.flush() port.reset_input_buffer() port.write("\x04") port.flush()
Example #17
Source File: data.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 | 6 votes |
def get_caltech101_data(): url = "https://s3.us-east-2.amazonaws.com/mxnet-public/101_ObjectCategories.tar.gz" dataset_name = "101_ObjectCategories" data_folder = "data" if not os.path.isdir(data_folder): os.makedirs(data_folder) tar_path = mx.gluon.utils.download(url, path=data_folder) if (not os.path.isdir(os.path.join(data_folder, "101_ObjectCategories")) or not os.path.isdir(os.path.join(data_folder, "101_ObjectCategories_test"))): tar = tarfile.open(tar_path, "r:gz") tar.extractall(data_folder) tar.close() print('Data extracted') training_path = os.path.join(data_folder, dataset_name) testing_path = os.path.join(data_folder, "{}_test".format(dataset_name)) return training_path, testing_path
Example #18
Source File: super_resolution.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 | 6 votes |
def resolve(ctx): from PIL import Image if isinstance(ctx, list): ctx = [ctx[0]] net.load_parameters('superres.params', ctx=ctx) img = Image.open(opt.resolve_img).convert('YCbCr') y, cb, cr = img.split() data = mx.nd.expand_dims(mx.nd.expand_dims(mx.nd.array(y), axis=0), axis=0) out_img_y = mx.nd.reshape(net(data), shape=(-3, -2)).asnumpy() out_img_y = out_img_y.clip(0, 255) out_img_y = Image.fromarray(np.uint8(out_img_y[0]), mode='L') out_img_cb = cb.resize(out_img_y.size, Image.BICUBIC) out_img_cr = cr.resize(out_img_y.size, Image.BICUBIC) out_img = Image.merge('YCbCr', [out_img_y, out_img_cb, out_img_cr]).convert('RGB') out_img.save('resolved.png')
Example #19
Source File: datasets.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 | 6 votes |
def _get_data(self): if self._train: data, label = self._train_data, self._train_label else: data, label = self._test_data, self._test_label namespace = 'gluon/dataset/'+self._namespace data_file = download(_get_repo_file_url(namespace, data[0]), path=self._root, sha1_hash=data[1]) label_file = download(_get_repo_file_url(namespace, label[0]), path=self._root, sha1_hash=label[1]) with gzip.open(label_file, 'rb') as fin: struct.unpack(">II", fin.read(8)) label = np.frombuffer(fin.read(), dtype=np.uint8).astype(np.int32) with gzip.open(data_file, 'rb') as fin: struct.unpack(">IIII", fin.read(16)) data = np.frombuffer(fin.read(), dtype=np.uint8) data = data.reshape(len(label), 28, 28, 1) self._data = nd.array(data, dtype=data.dtype) self._label = label
Example #20
Source File: datasets.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 | 6 votes |
def _get_data(self): if any(not os.path.exists(path) or not check_sha1(path, sha1) for path, sha1 in ((os.path.join(self._root, name), sha1) for name, sha1 in self._train_data + self._test_data)): namespace = 'gluon/dataset/'+self._namespace filename = download(_get_repo_file_url(namespace, self._archive_file[0]), path=self._root, sha1_hash=self._archive_file[1]) with tarfile.open(filename) as tar: tar.extractall(self._root) if self._train: data_files = self._train_data else: data_files = self._test_data data, label = zip(*(self._read_batch(os.path.join(self._root, name)) for name, _ in data_files)) data = np.concatenate(data) label = np.concatenate(label) self._data = nd.array(data, dtype=data.dtype) self._label = label
Example #21
Source File: wmt_utils.py From DOTA_models with Apache License 2.0 | 6 votes |
def get_wmt_enfr_dev_set(directory): """Download the WMT en-fr training corpus to directory unless it's there.""" dev_name = "newstest2013" dev_path = os.path.join(directory, dev_name) if not (tf.gfile.Exists(dev_path + ".fr") and tf.gfile.Exists(dev_path + ".en")): dev_file = maybe_download(directory, "dev-v2.tgz", _WMT_ENFR_DEV_URL) print "Extracting tgz file %s" % dev_file with tarfile.open(dev_file, "r:gz") as dev_tar: fr_dev_file = dev_tar.getmember("dev/" + dev_name + ".fr") en_dev_file = dev_tar.getmember("dev/" + dev_name + ".en") fr_dev_file.name = dev_name + ".fr" # Extract without "dev/" prefix. en_dev_file.name = dev_name + ".en" dev_tar.extract(fr_dev_file, directory) dev_tar.extract(en_dev_file, directory) return dev_path
Example #22
Source File: test_image.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 | 6 votes |
def test_image_detiter(self): im_list = [_generate_objects() + [x] for x in TestImage.IMAGES] det_iter = mx.image.ImageDetIter(2, (3, 300, 300), imglist=im_list, path_root='') for _ in range(3): for batch in det_iter: pass det_iter.reset() val_iter = mx.image.ImageDetIter(2, (3, 300, 300), imglist=im_list, path_root='') det_iter = val_iter.sync_label_shape(det_iter) # test file list fname = './data/test_imagedetiter.lst' im_list = [[k] + _generate_objects() + [x] for k, x in enumerate(TestImage.IMAGES)] with open(fname, 'w') as f: for line in im_list: line = '\t'.join([str(k) for k in line]) f.write(line + '\n') det_iter = mx.image.ImageDetIter(2, (3, 400, 400), path_imglist=fname, path_root='') for batch in det_iter: pass
Example #23
Source File: input.py From DOTA_models with Apache License 2.0 | 6 votes |
def extract_mnist_data(filename, num_images, image_size, pixel_depth): """ Extract the images into a 4D tensor [image index, y, x, channels]. Values are rescaled from [0, 255] down to [-0.5, 0.5]. """ # if not os.path.exists(file): if not tf.gfile.Exists(filename+".npy"): with gzip.open(filename) as bytestream: bytestream.read(16) buf = bytestream.read(image_size * image_size * num_images) data = np.frombuffer(buf, dtype=np.uint8).astype(np.float32) data = (data - (pixel_depth / 2.0)) / pixel_depth data = data.reshape(num_images, image_size, image_size, 1) np.save(filename, data) return data else: with tf.gfile.Open(filename+".npy", mode='r') as file_obj: return np.load(file_obj)
Example #24
Source File: cifar10.py From DOTA_models with Apache License 2.0 | 6 votes |
def maybe_download_and_extract(): """Download and extract the tarball from Alex's website.""" dest_directory = FLAGS.data_dir if not os.path.exists(dest_directory): os.makedirs(dest_directory) filename = DATA_URL.split('/')[-1] filepath = os.path.join(dest_directory, filename) if not os.path.exists(filepath): def _progress(count, block_size, total_size): sys.stdout.write('\r>> Downloading %s %.1f%%' % (filename, float(count * block_size) / float(total_size) * 100.0)) sys.stdout.flush() filepath, _ = urllib.request.urlretrieve(DATA_URL, filepath, _progress) print() statinfo = os.stat(filepath) print('Successfully downloaded', filename, statinfo.st_size, 'bytes.') extracted_dir_path = os.path.join(dest_directory, 'cifar-10-batches-bin') if not os.path.exists(extracted_dir_path): tarfile.open(filepath, 'r:gz').extractall(dest_directory)
Example #25
Source File: download_and_convert_cifar10.py From DOTA_models with Apache License 2.0 | 6 votes |
def _download_and_uncompress_dataset(dataset_dir): """Downloads cifar10 and uncompresses it locally. Args: dataset_dir: The directory where the temporary files are stored. """ filename = _DATA_URL.split('/')[-1] filepath = os.path.join(dataset_dir, filename) if not os.path.exists(filepath): def _progress(count, block_size, total_size): sys.stdout.write('\r>> Downloading %s %.1f%%' % ( filename, float(count * block_size) / float(total_size) * 100.0)) sys.stdout.flush() filepath, _ = urllib.request.urlretrieve(_DATA_URL, filepath, _progress) print() statinfo = os.stat(filepath) print('Successfully downloaded', filename, statinfo.st_size, 'bytes.') tarfile.open(filepath, 'r:gz').extractall(dataset_dir)
Example #26
Source File: ptb.py From fine-lm with MIT License | 6 votes |
def _build_vocab(filename, vocab_path, vocab_size): """Reads a file to build a vocabulary of `vocab_size` most common words. The vocabulary is sorted by occurrence count and has one word per line. Originally from: https://github.com/tensorflow/models/blob/master/tutorials/rnn/ptb/reader.py Args: filename: file to read list of words from. vocab_path: path where to save the vocabulary. vocab_size: size of the vocabulary to generate. """ data = _read_words(filename) counter = collections.Counter(data) count_pairs = sorted(counter.items(), key=lambda x: (-x[1], x[0])) words, _ = list(zip(*count_pairs)) words = words[:vocab_size] with open(vocab_path, "w") as f: f.write("\n".join(words))
Example #27
Source File: input.py From DOTA_models with Apache License 2.0 | 5 votes |
def extract_mnist_labels(filename, num_images): """ Extract the labels into a vector of int64 label IDs. """ # if not os.path.exists(file): if not tf.gfile.Exists(filename+".npy"): with gzip.open(filename) as bytestream: bytestream.read(8) buf = bytestream.read(1 * num_images) labels = np.frombuffer(buf, dtype=np.uint8).astype(np.int32) np.save(filename, labels) return labels else: with tf.gfile.Open(filename+".npy", mode='r') as file_obj: return np.load(file_obj)
Example #28
Source File: wmt_utils.py From DOTA_models with Apache License 2.0 | 5 votes |
def gunzip_file(gz_path, new_path): """Unzips from gz_path into new_path.""" print "Unpacking %s to %s" % (gz_path, new_path) with gzip.open(gz_path, "rb") as gz_file: with open(new_path, "wb") as new_file: for line in gz_file: new_file.write(line)
Example #29
Source File: device.py From Paradrop with Apache License 2.0 | 5 votes |
def add(ctx, path): """ Add an authorized key from a file. """ url = '{sshkeys_url}/{sshkeys_user}'.format(**ctx.obj) with open(path, 'r') as source: key_string = source.read().strip() data = { 'key': key_string } result = router_request("POST", url, json=data, dump=False) if result.ok: data = result.json() print("Added: " + data.get('key', ''))
Example #30
Source File: cifar.py From fine-lm with MIT License | 5 votes |
def _get_cifar(directory, url): """Download and extract CIFAR to directory unless it is there.""" filename = os.path.basename(url) path = generator_utils.maybe_download(directory, filename, url) tarfile.open(path, "r:gz").extractall(directory)