Python Examples of shutil.unpack

Source File: model_manager.py From sagemaker-rl-container with Apache License 2.0

6 votes

def _download_and_extract_model_tar_gz(self, model_id):
        """
        This function first gets the s3 location from dynamo db,
        downloads the model, extracts it and then
        returns a tuple (str, str) of metadata string and model weights URL on disk
        """
        deployable_model_id_record = self.model_ddb_wrapper.get_model_record(experiment_id=self.experiment_id,
                                                                             model_id=model_id)
        s3_uri = deployable_model_id_record.get("s3_model_output_path", "")
        if s3_uri:
            try:
                tmp_dir = Path(f"/opt/ml/downloads/{gen_random_string()}")
                tmp_dir.mkdir(parents=True, exist_ok=True)
                tmp_model_tar_gz = os.path.join(tmp_dir.as_posix(), "model.tar.gz")
                bucket, key = parse_s3_url(s3_uri)
                self.s3_resource.Bucket(bucket).download_file(key, tmp_model_tar_gz)
                shutil.unpack_archive(filename=tmp_model_tar_gz, extract_dir=tmp_dir.as_posix())
                return self.get_model(tmp_dir.as_posix())
            except Exception as e:
                logger.exception(f"Could not parse or download {model_id} from {s3_uri} due to {e}")
                return None
        else:
            logger.exception(f"Could not s3 location of {model_id}")
            return None

Source File: build_data.py From ParlAI with MIT License

6 votes

def untar(path, fname, deleteTar=True):
    """
    Unpack the given archive file to the same directory.

    :param str path:
        The folder containing the archive. Will contain the contents.

    :param str fname:
        The filename of the archive file.

    :param bool deleteTar:
        If true, the archive will be deleted after extraction.
    """
    logging.debug(f'unpacking {fname}')
    fullpath = os.path.join(path, fname)
    shutil.unpack_archive(fullpath, path)
    if deleteTar:
        os.remove(fullpath)

Source File: node_control_tool.py From indy-node with Apache License 2.0

6 votes

def _restore_from_backup(self, src_ver: str):
        logger.info('Restoring from backup for {}'.format(src_ver))
        for file_path in self.files_to_preserve:
            try:
                shutil.copy2(os.path.join(self.backup_target, file_path),
                             os.path.join(self.tmp_dir, file_path))
            except IOError as e:
                logger.warning('Copying {} failed due to {}'
                               .format(file_path, e))
        shutil.unpack_archive(self._backup_name_ext(
            src_ver), self.backup_target, self.backup_format)
        for file_path in self.files_to_preserve:
            try:
                shutil.copy2(os.path.join(self.tmp_dir, file_path),
                             os.path.join(self.backup_target, file_path))
            except IOError as e:
                logger.warning('Copying {} failed due to {}'
                               .format(file_path, e))
        shutil.rmtree(self.tmp_dir, ignore_errors=True)

Source File: test_extract.py From py7zr with GNU Lesser General Public License v2.1

6 votes

def test_register_unpack_archive(tmp_path):
    shutil.register_unpack_format('7zip', ['.7z'], unpack_7zarchive)
    shutil.unpack_archive(str(testdata_path.joinpath('test_1.7z')), str(tmp_path))
    target = tmp_path.joinpath("setup.cfg")
    expected_mode = 33188
    expected_mtime = 1552522033
    if os.name == 'posix':
        assert target.stat().st_mode == expected_mode
    assert target.stat().st_mtime == expected_mtime
    m = hashlib.sha256()
    m.update(target.open('rb').read())
    assert m.digest() == binascii.unhexlify('ff77878e070c4ba52732b0c847b5a055a7c454731939c3217db4a7fb4a1e7240')
    m = hashlib.sha256()
    m.update(tmp_path.joinpath('setup.py').open('rb').read())
    assert m.digest() == binascii.unhexlify('b916eed2a4ee4e48c51a2b51d07d450de0be4dbb83d20e67f6fd166ff7921e49')
    m = hashlib.sha256()
    m.update(tmp_path.joinpath('scripts/py7zr').open('rb').read())
    assert m.digest() == binascii.unhexlify('b0385e71d6a07eb692f5fb9798e9d33aaf87be7dfff936fd2473eab2a593d4fd')

Source File: filetools.py From hydpy with GNU Lesser General Public License v3.0

6 votes

def currentdir(self, directory: Optional[str]) -> None:
        if directory is None:
            self._currentdir = None
        else:
            dirpath = os.path.join(self.basepath, directory)
            zippath = f'{dirpath}.zip'
            if os.path.exists(zippath):
                shutil.unpack_archive(
                    filename=zippath,
                    extract_dir=os.path.join(self.basepath, directory),
                    format='zip',
                )
                os.remove(zippath)
            elif not os.path.exists(dirpath):
                os.makedirs(dirpath)
            self._currentdir = str(directory)

Source File: lib.py From mathlib-tools with Apache License 2.0

6 votes

def get_mathlib_olean(self) -> None:
        """Get precompiled mathlib oleans for this project."""
        # Just in case the user broke the workflow (for instance git clone
        # mathlib by hand and then run `leanproject get-cache`)
        if not (self.directory/'leanpkg.path').exists():
            self.run(['leanpkg', 'configure'])
        try:
            archive = get_mathlib_archive(self.mathlib_rev, self.cache_url,
                                           self.force_download, self.repo)
        except (EOFError, shutil.ReadError):
            log.info('Something wrong happened with the olean archive. '
                     'I will now retry downloading.')
            archive = get_mathlib_archive(self.mathlib_rev, self.cache_url,
                                          True, self.repo)
        self.clean_mathlib()
        self.mathlib_folder.mkdir(parents=True, exist_ok=True)
        unpack_archive(archive, self.mathlib_folder)
        # Let's now touch oleans, just in case
        touch_oleans(self.mathlib_folder)

Source File: open_nmt.py From chimera with MIT License

6 votes

def train(self, save_data, opt):
        save_data_archive = save_temp_bin(save_data)

        save_data_dir = temp_dir()
        shutil.unpack_archive(filename=save_data_archive, extract_dir=save_data_dir, format="gztar")

        save_model = temp_dir()

        opt["data"] = save_data_dir + "data"
        opt["save_model"] = save_model
        if is_cuda:
            opt["world_size"] = 1
            opt["gpu_ranks"] = 0

        run_param('train.py', opt)

        return save_model

Source File: test_net.py From dffml with MIT License

6 votes

def test_cached_download(self, ts=httptest.NoServer()):
        with tempfile.TemporaryDirectory() as tempdir:

            @cached_download(
                ts.url() + "/archive.tar.gz",
                pathlib.Path(tempdir) / "archive.tar.gz",
                ARCHIVE_HASH,
                protocol_allowlist=["http://"],
            )
            async def func(filename):
                return filename

            # Directory to extract to
            extracted = pathlib.Path(tempdir, "extracted")

            # Unpack the archive
            shutil.unpack_archive(await func(), extracted)

            self.verify_extracted_contents(extracted)

Source File: build_data.py From neural_chat with MIT License

6 votes

def untar(path, fname, deleteTar=True):
    """
    Unpack the given archive file to the same directory.

    :param str path:
        The folder containing the archive. Will contain the contents.

    :param str fname:
        The filename of the archive file.

    :param bool deleteTar:
        If true, the archive will be deleted after extraction.
    """
    print('unpacking ' + fname)
    fullpath = os.path.join(path, fname)
    shutil.unpack_archive(fullpath, path)
    if deleteTar:
        os.remove(fullpath)

Source File: download_datasets.py From unify-emotion-datasets with MIT License

6 votes

def download(_, target, droot, __):
    url = target["url"]
    fname = target.get("target", url.split("/")[-1])

    r = requests.get(
        url,
        stream=True,
        headers={
            "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_4) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.1 Safari/605.1.15"
        },
    )
    chars = "-\\|/"
    with open(f"{droot}/{fname}", "wb") as f:
        for i, chunk in enumerate(r.iter_content(chunk_size=1024)):
            arrow(f"Downloading... {chars[i%len(chars)]}", end="\r")
            if chunk:
                f.write(chunk)

    if fname.endswith(".zip") or fname.endswith(".tar.gz"):
        arrow(f"Unpacking {fname}...")
        shutil.unpack_archive(f"{droot}/{fname}", droot)

Source File: buildpkg.py From pyodide with Mozilla Public License 2.0

5 votes

def download_and_extract(buildpath, packagedir, pkg, args):
    srcpath = buildpath / packagedir

    if "source" not in pkg:
        return srcpath

    if "url" in pkg["source"]:
        tarballpath = buildpath / Path(pkg["source"]["url"]).name
        if not tarballpath.is_file():
            try:
                subprocess.run(
                    ["wget", "-q", "-O", str(tarballpath), pkg["source"]["url"]],
                    check=True,
                )
                check_checksum(tarballpath, pkg)
            except Exception:
                tarballpath.unlink()
                raise

        if not srcpath.is_dir():
            shutil.unpack_archive(str(tarballpath), str(buildpath))

    elif "path" in pkg["source"]:
        srcdir = Path(pkg["source"]["path"])

        if not srcdir.is_dir():
            raise ValueError("'path' must point to a path")

        if not srcpath.is_dir():
            shutil.copytree(srcdir, srcpath)
    else:
        raise ValueError("Incorrect source provided")

    return srcpath

Source File: build_data.py From personalized-dialog with MIT License

5 votes

def untar(path, fname):
    print('unpacking ' + fname)
    fullpath = os.path.join(path, fname)
    shutil.unpack_archive(fullpath, path)
    os.remove(fullpath)

Source File: build_experiments.py From personalized-dialog with MIT License

5 votes

def untar(fname):
    print('unpacking ' + fname)
    fullpath = os.path.join(fname)
    shutil.unpack_archive(fullpath)
    os.remove(fullpath)

Source File: build_experiments.py From personalized-dialog with MIT License

5 votes

def untar(fname):
    print('unpacking ' + fname)
    fullpath = os.path.join(fname)
    shutil.unpack_archive(fullpath)
    os.remove(fullpath)

Source File: han_data.py From cogdl with MIT License

5 votes

def untar(path, fname, deleteTar=True):
    """
    Unpacks the given archive file to the same directory, then (by default)
    deletes the archive file.
    """
    print('unpacking ' + fname)
    fullpath = os.path.join(path, fname)
    shutil.unpack_archive(fullpath, path)
    if deleteTar:
        os.remove(fullpath)

Source File: gtn_data.py From cogdl with MIT License

5 votes

def untar(path, fname, deleteTar=True):
    """
    Unpacks the given archive file to the same directory, then (by default)
    deletes the archive file.
    """
    print('unpacking ' + fname)
    fullpath = os.path.join(path, fname)
    shutil.unpack_archive(fullpath, path)
    if deleteTar:
        os.remove(fullpath)

Source File: build_experiments.py From personalized-dialog with MIT License

5 votes

def untar(fname):
    print('unpacking ' + fname)
    fullpath = os.path.join(fname)
    shutil.unpack_archive(fullpath)
    os.remove(fullpath)

Source File: model.py From ebonite with Apache License 2.0

5 votes

def load(self, path):
        file_path = os.path.join(path, self.model_dir_name + self.ext)

        with tempfile.TemporaryDirectory(prefix='ebonite_tf_v2') as tmpdir:
            shutil.unpack_archive(file_path, tmpdir)
            return tf.keras.models.load_model(tmpdir)

Source File: download_results.py From pytracking with GNU General Public License v3.0

5 votes

def unpack_tracking_results(download_path, output_path=None):
    """
    Unpacks zipped benchmark results. The directory 'download_path' should have the following structure
    - root
        - tracker1
            - param1.zip
            - param2.zip
            .
            .
        - tracker2
            - param1.zip
            - param2.zip
        .
        .

    args:
        download_path - Path to the directory where the zipped results are stored
        output_path - Path to the directory where the results will be unpacked. Set to env_settings().results_path
                      by default
    """

    if output_path is None:
        output_path = env_settings().results_path

    if not os.path.exists(output_path):
        os.makedirs(output_path)

    trackers = os.listdir(download_path)

    for t in trackers:
        runfiles = os.listdir(os.path.join(download_path, t))

        for r in runfiles:
            save_path = os.path.join(output_path, t)
            if not os.path.exists(save_path):
                os.makedirs(save_path)
            shutil.unpack_archive(os.path.join(download_path, t, r), os.path.join(save_path, r[:-4]), 'zip')

Source File: fs.py From ClusterRunner with Apache License 2.0

5 votes

def unzip_directory(archive_file: str, target_dir: str=None, delete: bool=False):
    """
    Extract the specified zip file.
    :param archive_file: the zip archive file to extract
    :param target_dir: the directory in which to extract; defaults to same as archive file
    :param delete: whether to delete the zip archive file after unpacking
    """
    if not target_dir:
        target_dir, _ = os.path.split(archive_file)  # default to same directory as archive file

    shutil.unpack_archive(archive_file, target_dir, 'zip')

    if delete:
        os.remove(archive_file)

Source File: data.py From treasure-boxes with MIT License

5 votes

def upload_dataset(database, train_table, test_table):
    import pytd

    apikey = os.environ["TD_API_KEY"]
    apiserver = os.environ["TD_API_SERVER"]
    client = pytd.Client(database=database, apikey=apikey, endpoint=apiserver)

    if client.exists(database, train_table) and client.exists(database, test_table):
        print("Target database and tables exists. Skip")
        return True

    target_url = "http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz"
    file_name = "aclImdb.tar.gz"
    print(f"Start downloading: {target_url}")
    response = urlopen(target_url)

    with open(file_name, "wb") as f:
        shutil.copyfileobj(response, f)
    print(f"Finished donwloading: {target_url} into {file_name}. Unpacking...")
    shutil.unpack_archive(file_name, "resources")

    print("Unpacked. Load as dataframe")
    train_df = load_dataset(os.path.join("resources", "aclImdb", "train"))
    test_df = load_dataset(os.path.join("resources", "aclImdb", "test"))

    print("Loaded. Upload to Treasure Data")
    client.create_database_if_not_exists(database)
    client.load_table_from_dataframe(train_df, train_table, if_exists="overwrite")
    client.load_table_from_dataframe(test_df, test_table, if_exists="overwrite")

    shutil.rmtree(os.path.join("resources"))
    os.remove(file_name)

    return True

Source File: download_preprocessed_tao.py From KOBE with MIT License

5 votes

def untar(path, fname, deleteTar=True):
    """
    Unpacks the given archive file to the same directory, then (by default)
    deletes the archive file.
    """
    print('unpacking ' + fname)
    fullpath = os.path.join(path, fname)
    shutil.unpack_archive(fullpath, path)
    if deleteTar:
        os.remove(fullpath)

Source File: io_utils.py From sagemaker-rl-container with Apache License 2.0

5 votes

def extract_model(tar_gz_folder):
    """
    This function extracts the model.tar.gz and then
    returns a tuple (str, str) of metadata string and model weights URL on disk
    """
    shutil.unpack_archive(filename=os.path.join(tar_gz_folder, "model.tar.gz"), extract_dir=tar_gz_folder)
    return get_vw_model(tar_gz_folder)

Source File: downloader.py From echo360 with MIT License

5 votes

def download(self):
        print('>> Downloading {0} binary file for "{1}"'
              .format(self._name, self.get_os_suffix()))
        # Download bin for this os
        link, filename = self.get_download_link()
        bin_path = self.get_bin_root_path()
        # delete bin directory if exists
        if os.path.exists(bin_path):
            shutil.rmtree(bin_path)
        os.makedirs(bin_path)
        # remove existing binary file or folder
        wget.download(link, out='{0}/{1}'.format(bin_path, filename))
        print('\r\n>> Extracting archive file "{0}"'.format(filename))
        if sys.version_info >= (3, 0):  # compatibility for python 2 & 3
            shutil.unpack_archive('{0}/{1}'.format(bin_path, filename),
                                  extract_dir=bin_path)
        else:
            if '.zip' in filename:
                import zipfile
                with zipfile.ZipFile('{0}/{1}'
                                     .format(bin_path, filename), 'r') as zip:
                    zip.extractall(bin_path)
            elif '.tar' in filename:
                import tarfile
                with tarfile.open('{0}/{1}'.format(bin_path, filename)) as tar:
                    tar.extractall(path=bin_path)
        # Make the extracted bin executable
        st = os.stat(self.get_bin())
        os.chmod(self.get_bin(), st.st_mode | stat.S_IEXEC)

Source File: mkosi.py From mkosi with GNU Lesser General Public License v2.1

5 votes

def install_extra_trees(args: CommandLineArguments, root: str, for_cache: bool) -> None:
    if not args.extra_trees:
        return

    if for_cache:
        return

    with complete_step('Copying in extra file trees'):
        for d in args.extra_trees:
            if os.path.isdir(d):
                copy_path(d, root)
            else:
                shutil.unpack_archive(d, root)

Source File: mkosi.py From mkosi with GNU Lesser General Public License v2.1

5 votes

def install_skeleton_trees(args: CommandLineArguments, root: str, for_cache: bool) -> None:
    if not args.skeleton_trees:
        return

    with complete_step('Copying in skeleton file trees'):
        for d in args.skeleton_trees:
            if os.path.isdir(d):
                copy_path(d, root)
            else:
                shutil.unpack_archive(d, root)

Source File: BlenderUpdater.py From BlenderUpdater with GNU General Public License v3.0

5 votes

def run(self):
        urllib.request.urlretrieve(self.url, self.filename, reporthook=self.progress)
        self.finishedDL.emit()
        shutil.unpack_archive(self.filename, "./blendertemp/")
        self.finishedEX.emit()
        source = next(os.walk("./blendertemp/"))[1]
        copy_tree(os.path.join("./blendertemp/", source[0]), dir_)
        self.finishedCP.emit()
        shutil.rmtree("./blendertemp")
        self.finishedCL.emit()

Source File: common.py From open_model_zoo with Apache License 2.0

5 votes

def apply(self, reporter, output_dir):
        postproc_file = output_dir / self.file

        reporter.print_section_heading('Unpacking {}', postproc_file)

        shutil.unpack_archive(str(postproc_file), str(output_dir), self.format)
        postproc_file.unlink()  # Remove the archive

Source File: download_speech_corpus.py From cdvae-vc with MIT License

5 votes

def download(self, dest_root):
        """
        Downloads archive and extracts audio files.

        Parameters
        ----------
        dest_root : Path
            the root path where directories
            that contains audio files are placed.
        """
        with TemporaryDirectory() as working_dir:
            working_dir = Path(working_dir) # convert from str

            # download archive and extract files in the working directory.
            if self.user_option.verbose:
                print("Downloading", self.name, "from", self.src_url, "...")
            archive_path = DataArchive._download_file(
                self.src_url, working_dir
            )
            if self.user_option.verbose:
                print("Unpack:", archive_path)
            shutil.unpack_archive(str(archive_path), str(working_dir))

            # move audio files to the destination directory.
            self._move_all_audio(
                working_dir / self.audio_root_relative, dest_root
            )

Source File: update_3rdparty.py From qutebrowser with GNU General Public License v3.0

5 votes

def update_pdfjs(target_version=None):
    """Download and extract the latest pdf.js version.

    If target_version is not None, download the given version instead.

    Args:
        target_version: None or version string ('x.y.z')
    """
    if target_version is None:
        version, url = get_latest_pdfjs_url()
    else:
        # We need target_version as x.y.z, without the 'v' prefix, though the
        # user might give it on the command line
        if target_version.startswith('v'):
            target_version = target_version[1:]
        # version should have the prefix to be consistent with the return value
        # of get_latest_pdfjs_url()
        version = 'v' + target_version
        url = ('https://github.com/mozilla/pdf.js/releases/download/'
               'v{0}/pdfjs-{0}-dist.zip').format(target_version)

    os.chdir(os.path.join(os.path.dirname(os.path.abspath(__file__)),
                          '..', '..'))
    target_path = os.path.join('qutebrowser', '3rdparty', 'pdfjs')
    print("=> Downloading pdf.js {}".format(version))
    try:
        (archive_path, _headers) = urllib.request.urlretrieve(url)
    except urllib.error.HTTPError as error:
        print("Could not retrieve pdfjs {}: {}".format(version, error))
        return
    if os.path.isdir(target_path):
        print("Removing old version in {}".format(target_path))
        shutil.rmtree(target_path)
    os.makedirs(target_path)
    print("Extracting new version")
    shutil.unpack_archive(archive_path, target_path, 'zip')
    urllib.request.urlcleanup()

Python shutil.unpack_archive() Examples