Python Examples of bz2.BZ2File

Source File: _datasource.py From lambda-packs with MIT License

6 votes

def _python2_bz2open(fn, mode, encoding, newline):
    """Wrapper to open bz2 in text mode.

    Parameters
    ----------
    fn : str
        File name
    mode : {'r', 'w'}
        File mode. Note that bz2 Text files are not supported.
    encoding : str
        Ignored, text bz2 files not supported in Python2.
    newline : str
        Ignored, text bz2 files not supported in Python2.
    """
    import bz2

    _check_mode(mode, encoding, newline)

    if "t" in mode:
        # BZ2File is missing necessary functions for TextIOWrapper
        warnings.warn("Assuming latin1 encoding for bz2 text file in Python2",
                      RuntimeWarning, stacklevel=5)
        mode = mode.replace("t", "")
    return bz2.BZ2File(fn, mode)

Source File: NRRD.py From ClearMap with GNU General Public License v3.0

6 votes

def _write_data(data, filehandle, options):
    # Now write data directly
    #rawdata = data.transpose([2,0,1]).tostring(order = 'C')
    rawdata = data.transpose([2,1,0]).tostring(order = 'C');
    if options['encoding'] == 'raw':
        filehandle.write(rawdata)
    elif options['encoding'] == 'gzip':
        gzfileobj = gzip.GzipFile(fileobj = filehandle)
        gzfileobj.write(rawdata)
        gzfileobj.close()
    elif options['encoding'] == 'bz2':
        bz2fileobj = bz2.BZ2File(fileobj = filehandle)
        bz2fileobj.write(rawdata)
        bz2fileobj.close()
    else:
        raise NrrdError('Unsupported encoding: "%s"' % options['encoding'])

Source File: outputhandler.py From benchexec with Apache License 2.0

6 votes

def _write_pretty_result_xml_to_file(self, xml, filename):
        """Writes a nicely formatted XML file with DOCTYPE, and compressed if necessary."""
        if self.compress_results:
            actual_filename = filename + ".bz2"
            open_func = bz2.BZ2File
        else:
            # write content to temp file first to prevent losing data
            # in existing file if writing fails
            actual_filename = filename + ".tmp"
            open_func = open

        with io.TextIOWrapper(
            open_func(actual_filename, "wb"), encoding="utf-8"
        ) as file:
            rough_string = ElementTree.tostring(xml, encoding="unicode")
            reparsed = minidom.parseString(rough_string)
            doctype = minidom.DOMImplementation().createDocumentType(
                "result", RESULT_XML_PUBLIC_ID, RESULT_XML_SYSTEM_ID
            )
            reparsed.insertBefore(doctype, reparsed.documentElement)
            reparsed.writexml(
                file, indent="", addindent="  ", newl="\n", encoding="utf-8"
            )

        if self.compress_results:
            # try to delete uncompressed file (would have been overwritten in no-compress-mode)
            try:
                os.remove(filename)
            except OSError:
                pass
            self.all_created_files.discard(filename)
            self.all_created_files.add(actual_filename)
        else:
            os.rename(actual_filename, filename)
            self.all_created_files.add(filename)

        return filename

Source File: _datasource.py From recruit with Apache License 2.0

6 votes

def _python2_bz2open(fn, mode, encoding, newline):
    """Wrapper to open bz2 in text mode.

    Parameters
    ----------
    fn : str
        File name
    mode : {'r', 'w'}
        File mode. Note that bz2 Text files are not supported.
    encoding : str
        Ignored, text bz2 files not supported in Python2.
    newline : str
        Ignored, text bz2 files not supported in Python2.
    """
    import bz2

    _check_mode(mode, encoding, newline)

    if "t" in mode:
        # BZ2File is missing necessary functions for TextIOWrapper
        warnings.warn("Assuming latin1 encoding for bz2 text file in Python2",
                      RuntimeWarning, stacklevel=5)
        mode = mode.replace("t", "")
    return bz2.BZ2File(fn, mode)

Source File: test__datasource.py From recruit with Apache License 2.0

6 votes

def test_Bz2File_text_mode_warning(self):
        try:
            import bz2
        except ImportError:
            # We don't have the bz2 capabilities to test.
            pytest.skip()
        # Test datasource's internal file_opener for BZip2 files.
        filepath = os.path.join(self.tmpdir, 'foobar.txt.bz2')
        fp = bz2.BZ2File(filepath, 'w')
        fp.write(magic_line)
        fp.close()
        with assert_warns(RuntimeWarning):
            fp = self.ds.open(filepath, 'rt')
            result = fp.readline()
            fp.close()
        assert_equal(magic_line, result)

Source File: world.py From psychsim with MIT License

6 votes

def save(self,filename,compressed=True):
        """
        :param compressed: if C{True}, then save in compressed XML; otherwise, save in XML (default is C{True})
        :type compressed: bool
        :returns: the filename used (possibly with a .psy extension added)
        :rtype: str
        """
        if compressed:
            if filename[-4:] != '.psy':
                filename = '%s.psy' % (filename)
        elif filename[-4:] != '.xml':
            filename = '%s.xml' % (filename)
        if compressed:
            f = bz2.BZ2File(filename,'w')
            f.write(self.__xml__().toprettyxml().encode('utf-8'))
        else:
            f = open(filename,'w')
            f.write(self.__xml__().toprettyxml())
        f.close()
        return filename

Source File: tarfile.py From meddle with MIT License

6 votes

def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
        """Open bzip2 compressed tar archive name for reading or writing.
           Appending is not allowed.
        """
        if len(mode) > 1 or mode not in "rw":
            raise ValueError("mode must be 'r' or 'w'.")

        try:
            import bz2
        except ImportError:
            raise CompressionError("bz2 module is not available")

        if fileobj is not None:
            fileobj = _BZ2Proxy(fileobj, mode)
        else:
            fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)

        try:
            t = cls.taropen(name, mode, fileobj, **kwargs)
        except (IOError, EOFError):
            raise ReadError("not a bzip2 file")
        t._extfileobj = False
        return t

    # All *open() methods are registered here.

Source File: BotDigger.py From BotDigger with GNU General Public License v3.0

6 votes

def openFile(filename, modes):
	filetype = file_type(filename)
	if filetype is None:
		return open(filename, modes)
	elif filetype == "bz2":
		return bz2.BZ2File(filename)
	elif filetype == "gz":
		return gzip.open(filename)
	elif filetype == "xz":
		with open(filename, modes) as f:
			return xz.LZMAFile(f)
	elif filetype == "zip":
		return zipfile.ZipFile(filename)
	else:
		# should never get here
		raise LookupError("filetype is invalid")

Source File: test_read_fwf.py From vnpy_crypto with MIT License

6 votes

def test_fwf_compression(self):
        try:
            import gzip
            import bz2
        except ImportError:
            pytest.skip("Need gzip and bz2 to run this test")

        data = """1111111111
        2222222222
        3333333333""".strip()
        widths = [5, 5]
        names = ['one', 'two']
        expected = read_fwf(StringIO(data), widths=widths, names=names)
        if compat.PY3:
            data = bytes(data, encoding='utf-8')
        comps = [('gzip', gzip.GzipFile), ('bz2', bz2.BZ2File)]
        for comp_name, compresser in comps:
            with tm.ensure_clean() as path:
                tmp = compresser(path, mode='wb')
                tmp.write(data)
                tmp.close()
                result = read_fwf(path, widths=widths, names=names,
                                  compression=comp_name)
                tm.assert_frame_equal(result, expected)

Source File: _datasource.py From vnpy_crypto with MIT License

6 votes

def _python2_bz2open(fn, mode, encoding, newline):
    """Wrapper to open bz2 in text mode.

    Parameters
    ----------
    fn : str
        File name
    mode : {'r', 'w'}
        File mode. Note that bz2 Text files are not supported.
    encoding : str
        Ignored, text bz2 files not supported in Python2.
    newline : str
        Ignored, text bz2 files not supported in Python2.
    """
    import bz2

    _check_mode(mode, encoding, newline)

    if "t" in mode:
        # BZ2File is missing necessary functions for TextIOWrapper
        raise ValueError("bz2 text files not supported in python2")
    else:
        return bz2.BZ2File(fn, mode)

Source File: tarfile.py From jawfish with MIT License

6 votes

def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
        """Open bzip2 compressed tar archive name for reading or writing.
           Appending is not allowed.
        """
        if len(mode) > 1 or mode not in "rw":
            raise ValueError("mode must be 'r' or 'w'.")

        try:
            import bz2
        except ImportError:
            raise CompressionError("bz2 module is not available")

        fileobj = bz2.BZ2File(fileobj or name, mode,
                              compresslevel=compresslevel)

        try:
            t = cls.taropen(name, mode, fileobj, **kwargs)
        except (IOError, EOFError):
            fileobj.close()
            raise ReadError("not a bzip2 file")
        t._extfileobj = False
        return t

Source File: compress_files.py From osspolice with GNU General Public License v3.0

5 votes

def open(self):
        return bz2.BZ2File(self.f)

Source File: _iotools.py From vnpy_crypto with MIT License

5 votes

def _to_filehandle(fname, flag='r', return_opened=False):
    """
    Returns the filehandle corresponding to a string or a file.
    If the string ends in '.gz', the file is automatically unzipped.

    Parameters
    ----------
    fname : string, filehandle
        Name of the file whose filehandle must be returned.
    flag : string, optional
        Flag indicating the status of the file ('r' for read, 'w' for write).
    return_opened : boolean, optional
        Whether to return the opening status of the file.
    """
    if _is_string_like(fname):
        if fname.endswith('.gz'):
            import gzip
            fhd = gzip.open(fname, flag)
        elif fname.endswith('.bz2'):
            import bz2
            fhd = bz2.BZ2File(fname)
        else:
            fhd = file(fname, flag)
        opened = True
    elif hasattr(fname, 'seek'):
        fhd = fname
        opened = False
    else:
        raise ValueError('fname must be a string or file handle')
    if return_opened:
        return fhd, opened
    return fhd

Source File: compress_files.py From osspolice with GNU General Public License v3.0

5 votes

def get_compressed_file(filename):
    f = open(filename, 'rb')
    start_of_file = f.read(1024)
    f.seek(0)
    for cls in (ZIPFile, BZ2File, GZFile, SevenZFile, TarFile, XZFile, JARCSFile, MARFile, RARFile, WinZIPFile):
        if cls.is_magic(start_of_file):
            if cls in (GZFile, BZ2File, TarFile):
                return cls(filename)
            else:
                return cls(f)
    return None

Source File: utils.py From topical_word_embeddings with MIT License

5 votes

def make_closing(base, **attrs):
    """
    Add support for `with Base(attrs) as fout:` to the base class if it's missing.
    The base class' `close()` method will be called on context exit, to always close the file properly.

    This is needed for gzip.GzipFile, bz2.BZ2File etc in older Pythons (<=2.6), which otherwise
    raise "AttributeError: GzipFile instance has no attribute '__exit__'".

    """
    if not hasattr(base, '__enter__'):
        attrs['__enter__'] = lambda self: self
    if not hasattr(base, '__exit__'):
        attrs['__exit__'] = lambda self, type, value, traceback: self.close()
    return type('Closing' + base.__name__, (base, object), attrs)

Source File: wikicorpus.py From topical_word_embeddings with MIT License

5 votes

def get_texts(self):
        """
        Iterate over the dump, returning text version of each article as a list
        of tokens.

        Only articles of sufficient length are returned (short articles & redirects
        etc are ignored).

        Note that this iterates over the **texts**; if you want vectors, just use
        the standard corpus interface instead of this function::

        >>> for vec in wiki_corpus:
        >>>     print(vec)
        """
        articles, articles_all = 0, 0
        positions, positions_all = 0, 0
        texts = ((text, self.lemmatize, title, pageid) for title, text, pageid in extract_pages(bz2.BZ2File(self.fname), self.filter_namespaces))
        pool = multiprocessing.Pool(self.processes)
        # process the corpus in smaller chunks of docs, because multiprocessing.Pool
        # is dumb and would load the entire input into RAM at once...
        ignore_namespaces = 'Wikipedia Category File Portal Template MediaWiki User Help Book Draft'.split()
        for group in utils.chunkize(texts, chunksize=10 * self.processes, maxsize=1):
            for tokens, title, pageid in pool.imap(process_article, group): # chunksize=10):
                articles_all += 1
                positions_all += len(tokens)
                # article redirects and short stubs are pruned here
                if len(tokens) < ARTICLE_MIN_WORDS or any(title.startswith(ignore + ':') for ignore in ignore_namespaces):
                    continue
                articles += 1
                positions += len(tokens)
                if self.metadata:
                    yield (tokens, (pageid, title))
                else:
                    yield tokens
        pool.terminate()

        logger.info("finished iterating over Wikipedia corpus of %i documents with %i positions"
            " (total %i articles, %i positions before pruning articles shorter than %i words)" %
            (articles, positions, articles_all, positions_all, ARTICLE_MIN_WORDS))
        self.length = articles # cache corpus length
# endclass WikiCorpus

Source File: _fileio.py From pysat with MIT License

5 votes

def open(self, name, mode='r', compression=None):
        """
            Open a file pointer. Note that a file is *always* opened in text
            mode. The method inherits its input parameters from the constructor
            of :class:`FileObject`.
        """

        if compression == 'use_ext':
            self.get_compression_type(name)
        else:
            self.ctype = compression

        if not self.ctype:
            self.fp = open(name, mode)
        elif self.ctype == 'gzip':
            self.fp = gzip.open(name, mode + 't')
        elif self.ctype == 'bzip2':
            try:
                # Python 3 supports opening bzip2 files in text mode
                # therefore, we prefer to open them this way
                self.fp = bz2.open(name, mode + 't')
            except:
                # BZ2File opens a file in binary mode
                # thus, we have to use codecs.getreader()
                # to be able to use it in text mode
                self.fp_extra = bz2.BZ2File(name, mode)

                if mode == 'r':
                    self.fp = codecs.getreader('ascii')(self.fp_extra)
                else:  # mode == 'w'
                    self.fp = codecs.getwriter('ascii')(self.fp_extra)
        else:  # self.ctype == 'lzma'
            # LZMA is available in Python 2 only if backports.lzma is installed
            # Python 3 supports it by default
            assert lzma_present, 'LZMA compression is unavailable.'
            self.fp = lzma.open(name, mode=mode + 't')

Source File: utils.py From topical_word_embeddings with MIT License

5 votes

def smart_open(fname, mode='rb'):
    _, ext = os.path.splitext(fname)
    if ext == '.bz2':
        from bz2 import BZ2File
        return make_closing(BZ2File)(fname, mode)
    if ext == '.gz':
        from gzip import GzipFile
        return make_closing(GzipFile)(fname, mode)
    return open(fname, mode)

Source File: test_tarfile.py From ironpython2 with Apache License 2.0

5 votes

def test_fileobj_with_offset(self):
        # Skip the first member and store values from the second member
        # of the testtar.
        tar = tarfile.open(self.tarname, mode=self.mode)
        try:
            tar.next()
            t = tar.next()
            name = t.name
            offset = t.offset
            data = tar.extractfile(t).read()
        finally:
            tar.close()

        # Open the testtar and seek to the offset of the second member.
        if self.mode.endswith(":gz"):
            _open = gzip.GzipFile
        elif self.mode.endswith(":bz2"):
            _open = bz2.BZ2File
        else:
            _open = open
        fobj = _open(self.tarname, "rb")
        try:
            fobj.seek(offset)

            # Test if the tarfile starts with the second member.
            tar = tar.open(self.tarname, mode="r:", fileobj=fobj)
            t = tar.next()
            self.assertEqual(t.name, name)
            # Read to the end of fileobj and test if seeking back to the
            # beginning works.
            tar.getmembers()
            self.assertEqual(tar.extractfile(t).read(), data,
                    "seek back did not work")
            tar.close()
        finally:
            fobj.close()

Source File: tarfile.py From kobo-predict with BSD 2-Clause "Simplified" License

5 votes

def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
        """Open bzip2 compressed tar archive name for reading or writing.
           Appending is not allowed.
        """
        if mode not in ("r", "w", "x"):
            raise ValueError("mode must be 'r', 'w' or 'x'")

        try:
            import bz2
        except ImportError:
            raise CompressionError("bz2 module is not available")

        fileobj = bz2.BZ2File(fileobj or name, mode,
                              compresslevel=compresslevel)

        try:
            t = cls.taropen(name, mode, fileobj, **kwargs)
        except (OSError, EOFError):
            fileobj.close()
            if mode == 'r':
                raise ReadError("not a bzip2 file")
            raise
        except:
            fileobj.close()
            raise
        t._extfileobj = False
        return t

Source File: tarfile.py From kobo-predict with BSD 2-Clause "Simplified" License

5 votes

def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
        """Open bzip2 compressed tar archive name for reading or writing.
           Appending is not allowed.
        """
        if len(mode) > 1 or mode not in "rw":
            raise ValueError("mode must be 'r' or 'w'.")

        try:
            import bz2
        except ImportError:
            raise CompressionError("bz2 module is not available")

        if fileobj is not None:
            fileobj = _BZ2Proxy(fileobj, mode)
        else:
            fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)

        try:
            t = cls.taropen(name, mode, fileobj, **kwargs)
        except (IOError, EOFError):
            fileobj.close()
            raise ReadError("not a bzip2 file")
        t._extfileobj = False
        return t

    # All *open() methods are registered here.

Source File: tarfile.py From kobo-predict with BSD 2-Clause "Simplified" License

5 votes

def seekable(self):
        if not hasattr(self.fileobj, "seekable"):
            # XXX gzip.GzipFile and bz2.BZ2File
            return True
        return self.fileobj.seekable()

Source File: tarfile.py From Python24 with MIT License

5 votes

def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
        """Open bzip2 compressed tar archive name for reading or writing.
           Appending is not allowed.
        """
        if len(mode) > 1 or mode not in "rw":
            raise ValueError("mode must be 'r' or 'w'.")

        try:
            import bz2
        except ImportError:
            raise CompressionError("bz2 module is not available")

        if fileobj is not None:
            fileobj = _BZ2Proxy(fileobj, mode)
        else:
            fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)

        try:
            t = cls.taropen(name, mode, fileobj, **kwargs)
        except (IOError, EOFError):
            fileobj.close()
            raise ReadError("not a bzip2 file")
        t._extfileobj = False
        return t

    # All *open() methods are registered here.

Source File: tarfile.py From Python24 with MIT License

5 votes

def seekable(self):
        if not hasattr(self.fileobj, "seekable"):
            # XXX gzip.GzipFile and bz2.BZ2File
            return True
        return self.fileobj.seekable()

Source File: _datasource.py From auto-alt-text-lambda-api with MIT License

5 votes

def _load(self):
        if self._loaded:
            return
        try:
            import bz2
            self._file_openers[".bz2"] = bz2.BZ2File
        except ImportError:
            pass
        try:
            import gzip
            self._file_openers[".gz"] = gzip.open
        except ImportError:
            pass
        self._loaded = True

Source File: test__datasource.py From auto-alt-text-lambda-api with MIT License

5 votes

def test_ValidBz2File(self):
        try:
            import bz2
        except ImportError:
            # We don't have the bz2 capabilities to test.
            raise SkipTest
        # Test datasource's internal file_opener for BZip2 files.
        filepath = os.path.join(self.tmpdir, 'foobar.txt.bz2')
        fp = bz2.BZ2File(filepath, 'w')
        fp.write(magic_line)
        fp.close()
        fp = self.ds.open(filepath)
        result = fp.readline()
        fp.close()
        self.assertEqual(magic_line, result)

Source File: _iotools.py From auto-alt-text-lambda-api with MIT License

5 votes

def _to_filehandle(fname, flag='r', return_opened=False):
    """
    Returns the filehandle corresponding to a string or a file.
    If the string ends in '.gz', the file is automatically unzipped.

    Parameters
    ----------
    fname : string, filehandle
        Name of the file whose filehandle must be returned.
    flag : string, optional
        Flag indicating the status of the file ('r' for read, 'w' for write).
    return_opened : boolean, optional
        Whether to return the opening status of the file.
    """
    if _is_string_like(fname):
        if fname.endswith('.gz'):
            import gzip
            fhd = gzip.open(fname, flag)
        elif fname.endswith('.bz2'):
            import bz2
            fhd = bz2.BZ2File(fname)
        else:
            fhd = file(fname, flag)
        opened = True
    elif hasattr(fname, 'seek'):
        fhd = fname
        opened = False
    else:
        raise ValueError('fname must be a string or file handle')
    if return_opened:
        return fhd, opened
    return fhd

Source File: WikiExtractor.py From comparable-text-miner with Apache License 2.0

5 votes

def open(self, filename):
        if self.compress:
            return bz2.BZ2File(filename + '.bz2', 'w')
        else:
            return open(filename, 'w')

# ----------------------------------------------------------------------
# READER

Source File: test_tarfile.py From ironpython2 with Apache License 2.0

5 votes

def test_detect_stream_bz2(self):
        # Originally, tarfile's stream detection looked for the string
        # "BZh91" at the start of the file. This is incorrect because
        # the '9' represents the blocksize (900kB). If the file was
        # compressed using another blocksize autodetection fails.
        with open(tarname, "rb") as fobj:
            data = fobj.read()

        # Compress with blocksize 100kB, the file starts with "BZh11".
        with bz2.BZ2File(tmpname, "wb", compresslevel=1) as fobj:
            fobj.write(data)

        self._testfunc_file(tmpname, "r|*")

Source File: _datasource.py From lambda-packs with MIT License

5 votes

def _load(self):
        if self._loaded:
            return
        try:
            import bz2
            self._file_openers[".bz2"] = bz2.BZ2File
        except ImportError:
            pass
        try:
            import gzip
            self._file_openers[".gz"] = gzip.open
        except ImportError:
            pass
        self._loaded = True

Python bz2.BZ2File() Examples