Python tarfile.html() Examples

The following are 4 code examples of tarfile.html(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module tarfile , or try the search function .
Example #1
Source File: utils_ipfs.py    From snet-cli with MIT License 6 votes vote down vote up
def safe_extract_proto_from_ipfs(ipfs_client, ipfs_hash, protodir):
    """
    Tar files might be dangerous (see https://bugs.python.org/issue21109,
    and https://docs.python.org/3/library/tarfile.html, TarFile.extractall warning)
    we extract only simple files
    """
    spec_tar = get_from_ipfs_and_checkhash(ipfs_client, ipfs_hash)
    with tarfile.open(fileobj=io.BytesIO(spec_tar)) as f:
        for m in f.getmembers():
            if (os.path.dirname(m.name) != ""):
                raise Exception(
                    "tarball has directories. We do not support it.")
            if (not m.isfile()):
                raise Exception(
                    "tarball contains %s which is not a files" % m.name)
            fullname = os.path.join(protodir, m.name)
            if (os.path.exists(fullname)):
                raise Exception("%s already exists." % fullname)
        # now it is safe to call extractall
        f.extractall(protodir) 
Example #2
Source File: extract.py    From memorious with MIT License 6 votes vote down vote up
def extract_tar(file_path, extract_dir, context):
    with tarfile.open(file_path, "r:*") as tar_ref:
        extracted_files = []
        for name in tar_ref.getnames():
            # Make it safe. See warning at
            # https://docs.python.org/2/library/tarfile.html#tarfile.TarFile.extractall  # noqa
            if name.startswith("..") or name.startswith("/"):
                context.log.info(
                    "Bad path %s while extracting archive at %s",
                    name, file_path
                )
            else:
                tar_ref.extract(name, extract_dir)
                file_path = os.path.join(extract_dir, name)
                if os.path.isfile(file_path):
                    extracted_files.append(file_path)
        return extracted_files 
Example #3
Source File: common.py    From sciwing with MIT License 5 votes vote down vote up
def extract_tar(filename: str, destination_dir: str, mode="r"):
    """ Extracts tar, targz and other files

    Parameters
    ----------
    filename : str
        The tar zipped file
    destination_dir : str
        The destination directory in which the files should be placed
    mode : str
        A valid tar mode. You can refer to https://docs.python.org/3/library/tarfile.html
        for the different modes.

    Returns
    -------

    """
    msg_printer = Printer()
    try:
        with msg_printer.loading(f"Unzipping file {filename} to {destination_dir}"):
            stdout.flush()
            with tarfile.open(filename, mode) as t:
                t.extractall(destination_dir)

        msg_printer.good(f"Finished extraction {filename} to {destination_dir}")
    except tarfile.ExtractError:
        msg_printer.fail("Couldnot extract {filename} to {destination}") 
Example #4
Source File: genetic_maps.py    From stdpopsim with GNU General Public License v3.0 4 votes vote down vote up
def download(self):
        """
        Downloads this genetic map from the source URL and stores it in the
        cache directory. If the map directory already exists it is first
        removed.
        """
        if self.is_cached():
            logger.info(f"Clearing cache {self.map_cache_dir}")
            with tempfile.TemporaryDirectory(dir=self.species_cache_dir) as tempdir:
                # Atomically move to a temporary directory, which will be automatically
                # deleted on exit.
                dest = pathlib.Path(tempdir) / "will_be_deleted"
                os.rename(self.map_cache_dir, dest)
        logger.debug(f"Checking species cache directory {self.species_cache_dir}")
        os.makedirs(self.species_cache_dir, exist_ok=True)

        logger.info(f"Downloading genetic map '{self.id}' from {self.url}")
        # os.rename will not work on some Unixes if the source and dest are on
        # different file systems. Keep the tempdir in the same directory as
        # the destination to ensure it's on the same file system.
        with tempfile.TemporaryDirectory(dir=self.species_cache_dir) as tempdir:
            download_file = os.path.join(tempdir, "downloaded")
            extract_dir = os.path.join(tempdir, "extracted")
            urllib.request.urlretrieve(self.url, filename=download_file)
            logger.debug("Extracting genetic map")
            os.makedirs(extract_dir)
            with tarfile.open(download_file, 'r') as tf:
                for info in tf.getmembers():
                    # TODO test for any prefixes on the name; we should just
                    # expand to a normal file. See  the warning here:
                    # https://docs.python.org/3.5/library/tarfile.html#tarfile.TarFile.extractall
                    if not info.isfile():
                        raise ValueError(
                            f"Tarball format error: member {info.name} not a file")
                with cd(extract_dir):
                    tf.extractall()
            # If this has all gone OK up to here we can now move the
            # extracted directory into the cache location. This should
            # minimise the chances of having malformed maps in the cache.
            logger.info("Storing map in {}".format(self.map_cache_dir))
            # os.rename is atomic, and will raise an OSError if the directory
            # already exists. Therefore, if we see the map exists we assume
            # that some other thread has already dowloaded it and raise a
            # warning.
            try:
                os.rename(extract_dir, self.map_cache_dir)
            except OSError:
                warnings.warn(
                    "Error occured renaming map directory. Are several threads/processes"
                    "downloading this map at the same time?")