Python bz2.BZ2File() Examples

The following are 30 code examples of bz2.BZ2File(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module bz2 , or try the search function .
Example #1
Source File: _datasource.py    From lambda-packs with MIT License 6 votes vote down vote up
def _python2_bz2open(fn, mode, encoding, newline):
    """Wrapper to open bz2 in text mode.

    Parameters
    ----------
    fn : str
        File name
    mode : {'r', 'w'}
        File mode. Note that bz2 Text files are not supported.
    encoding : str
        Ignored, text bz2 files not supported in Python2.
    newline : str
        Ignored, text bz2 files not supported in Python2.
    """
    import bz2

    _check_mode(mode, encoding, newline)

    if "t" in mode:
        # BZ2File is missing necessary functions for TextIOWrapper
        warnings.warn("Assuming latin1 encoding for bz2 text file in Python2",
                      RuntimeWarning, stacklevel=5)
        mode = mode.replace("t", "")
    return bz2.BZ2File(fn, mode) 
Example #2
Source File: NRRD.py    From ClearMap with GNU General Public License v3.0 6 votes vote down vote up
def _write_data(data, filehandle, options):
    # Now write data directly
    #rawdata = data.transpose([2,0,1]).tostring(order = 'C')
    rawdata = data.transpose([2,1,0]).tostring(order = 'C');
    if options['encoding'] == 'raw':
        filehandle.write(rawdata)
    elif options['encoding'] == 'gzip':
        gzfileobj = gzip.GzipFile(fileobj = filehandle)
        gzfileobj.write(rawdata)
        gzfileobj.close()
    elif options['encoding'] == 'bz2':
        bz2fileobj = bz2.BZ2File(fileobj = filehandle)
        bz2fileobj.write(rawdata)
        bz2fileobj.close()
    else:
        raise NrrdError('Unsupported encoding: "%s"' % options['encoding']) 
Example #3
Source File: outputhandler.py    From benchexec with Apache License 2.0 6 votes vote down vote up
def _write_pretty_result_xml_to_file(self, xml, filename):
        """Writes a nicely formatted XML file with DOCTYPE, and compressed if necessary."""
        if self.compress_results:
            actual_filename = filename + ".bz2"
            open_func = bz2.BZ2File
        else:
            # write content to temp file first to prevent losing data
            # in existing file if writing fails
            actual_filename = filename + ".tmp"
            open_func = open

        with io.TextIOWrapper(
            open_func(actual_filename, "wb"), encoding="utf-8"
        ) as file:
            rough_string = ElementTree.tostring(xml, encoding="unicode")
            reparsed = minidom.parseString(rough_string)
            doctype = minidom.DOMImplementation().createDocumentType(
                "result", RESULT_XML_PUBLIC_ID, RESULT_XML_SYSTEM_ID
            )
            reparsed.insertBefore(doctype, reparsed.documentElement)
            reparsed.writexml(
                file, indent="", addindent="  ", newl="\n", encoding="utf-8"
            )

        if self.compress_results:
            # try to delete uncompressed file (would have been overwritten in no-compress-mode)
            try:
                os.remove(filename)
            except OSError:
                pass
            self.all_created_files.discard(filename)
            self.all_created_files.add(actual_filename)
        else:
            os.rename(actual_filename, filename)
            self.all_created_files.add(filename)

        return filename 
Example #4
Source File: _datasource.py    From recruit with Apache License 2.0 6 votes vote down vote up
def _python2_bz2open(fn, mode, encoding, newline):
    """Wrapper to open bz2 in text mode.

    Parameters
    ----------
    fn : str
        File name
    mode : {'r', 'w'}
        File mode. Note that bz2 Text files are not supported.
    encoding : str
        Ignored, text bz2 files not supported in Python2.
    newline : str
        Ignored, text bz2 files not supported in Python2.
    """
    import bz2

    _check_mode(mode, encoding, newline)

    if "t" in mode:
        # BZ2File is missing necessary functions for TextIOWrapper
        warnings.warn("Assuming latin1 encoding for bz2 text file in Python2",
                      RuntimeWarning, stacklevel=5)
        mode = mode.replace("t", "")
    return bz2.BZ2File(fn, mode) 
Example #5
Source File: test__datasource.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_Bz2File_text_mode_warning(self):
        try:
            import bz2
        except ImportError:
            # We don't have the bz2 capabilities to test.
            pytest.skip()
        # Test datasource's internal file_opener for BZip2 files.
        filepath = os.path.join(self.tmpdir, 'foobar.txt.bz2')
        fp = bz2.BZ2File(filepath, 'w')
        fp.write(magic_line)
        fp.close()
        with assert_warns(RuntimeWarning):
            fp = self.ds.open(filepath, 'rt')
            result = fp.readline()
            fp.close()
        assert_equal(magic_line, result) 
Example #6
Source File: world.py    From psychsim with MIT License 6 votes vote down vote up
def save(self,filename,compressed=True):
        """
        :param compressed: if C{True}, then save in compressed XML; otherwise, save in XML (default is C{True})
        :type compressed: bool
        :returns: the filename used (possibly with a .psy extension added)
        :rtype: str
        """
        if compressed:
            if filename[-4:] != '.psy':
                filename = '%s.psy' % (filename)
        elif filename[-4:] != '.xml':
            filename = '%s.xml' % (filename)
        if compressed:
            f = bz2.BZ2File(filename,'w')
            f.write(self.__xml__().toprettyxml().encode('utf-8'))
        else:
            f = open(filename,'w')
            f.write(self.__xml__().toprettyxml())
        f.close()
        return filename 
Example #7
Source File: tarfile.py    From meddle with MIT License 6 votes vote down vote up
def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
        """Open bzip2 compressed tar archive name for reading or writing.
           Appending is not allowed.
        """
        if len(mode) > 1 or mode not in "rw":
            raise ValueError("mode must be 'r' or 'w'.")

        try:
            import bz2
        except ImportError:
            raise CompressionError("bz2 module is not available")

        if fileobj is not None:
            fileobj = _BZ2Proxy(fileobj, mode)
        else:
            fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)

        try:
            t = cls.taropen(name, mode, fileobj, **kwargs)
        except (IOError, EOFError):
            raise ReadError("not a bzip2 file")
        t._extfileobj = False
        return t

    # All *open() methods are registered here. 
Example #8
Source File: BotDigger.py    From BotDigger with GNU General Public License v3.0 6 votes vote down vote up
def openFile(filename, modes):
	filetype = file_type(filename)
	if filetype is None:
		return open(filename, modes)
	elif filetype == "bz2":
		return bz2.BZ2File(filename)
	elif filetype == "gz":
		return gzip.open(filename)
	elif filetype == "xz":
		with open(filename, modes) as f:
			return xz.LZMAFile(f)
	elif filetype == "zip":
		return zipfile.ZipFile(filename)
	else:
		# should never get here
		raise LookupError("filetype is invalid") 
Example #9
Source File: test_read_fwf.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_fwf_compression(self):
        try:
            import gzip
            import bz2
        except ImportError:
            pytest.skip("Need gzip and bz2 to run this test")

        data = """1111111111
        2222222222
        3333333333""".strip()
        widths = [5, 5]
        names = ['one', 'two']
        expected = read_fwf(StringIO(data), widths=widths, names=names)
        if compat.PY3:
            data = bytes(data, encoding='utf-8')
        comps = [('gzip', gzip.GzipFile), ('bz2', bz2.BZ2File)]
        for comp_name, compresser in comps:
            with tm.ensure_clean() as path:
                tmp = compresser(path, mode='wb')
                tmp.write(data)
                tmp.close()
                result = read_fwf(path, widths=widths, names=names,
                                  compression=comp_name)
                tm.assert_frame_equal(result, expected) 
Example #10
Source File: _datasource.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def _python2_bz2open(fn, mode, encoding, newline):
    """Wrapper to open bz2 in text mode.

    Parameters
    ----------
    fn : str
        File name
    mode : {'r', 'w'}
        File mode. Note that bz2 Text files are not supported.
    encoding : str
        Ignored, text bz2 files not supported in Python2.
    newline : str
        Ignored, text bz2 files not supported in Python2.
    """
    import bz2

    _check_mode(mode, encoding, newline)

    if "t" in mode:
        # BZ2File is missing necessary functions for TextIOWrapper
        raise ValueError("bz2 text files not supported in python2")
    else:
        return bz2.BZ2File(fn, mode) 
Example #11
Source File: tarfile.py    From jawfish with MIT License 6 votes vote down vote up
def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
        """Open bzip2 compressed tar archive name for reading or writing.
           Appending is not allowed.
        """
        if len(mode) > 1 or mode not in "rw":
            raise ValueError("mode must be 'r' or 'w'.")

        try:
            import bz2
        except ImportError:
            raise CompressionError("bz2 module is not available")

        fileobj = bz2.BZ2File(fileobj or name, mode,
                              compresslevel=compresslevel)

        try:
            t = cls.taropen(name, mode, fileobj, **kwargs)
        except (IOError, EOFError):
            fileobj.close()
            raise ReadError("not a bzip2 file")
        t._extfileobj = False
        return t 
Example #12
Source File: compress_files.py    From osspolice with GNU General Public License v3.0 5 votes vote down vote up
def open(self):
        return bz2.BZ2File(self.f) 
Example #13
Source File: _iotools.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def _to_filehandle(fname, flag='r', return_opened=False):
    """
    Returns the filehandle corresponding to a string or a file.
    If the string ends in '.gz', the file is automatically unzipped.

    Parameters
    ----------
    fname : string, filehandle
        Name of the file whose filehandle must be returned.
    flag : string, optional
        Flag indicating the status of the file ('r' for read, 'w' for write).
    return_opened : boolean, optional
        Whether to return the opening status of the file.
    """
    if _is_string_like(fname):
        if fname.endswith('.gz'):
            import gzip
            fhd = gzip.open(fname, flag)
        elif fname.endswith('.bz2'):
            import bz2
            fhd = bz2.BZ2File(fname)
        else:
            fhd = file(fname, flag)
        opened = True
    elif hasattr(fname, 'seek'):
        fhd = fname
        opened = False
    else:
        raise ValueError('fname must be a string or file handle')
    if return_opened:
        return fhd, opened
    return fhd 
Example #14
Source File: compress_files.py    From osspolice with GNU General Public License v3.0 5 votes vote down vote up
def get_compressed_file(filename):
    f = open(filename, 'rb')
    start_of_file = f.read(1024)
    f.seek(0)
    for cls in (ZIPFile, BZ2File, GZFile, SevenZFile, TarFile, XZFile, JARCSFile, MARFile, RARFile, WinZIPFile):
        if cls.is_magic(start_of_file):
            if cls in (GZFile, BZ2File, TarFile):
                return cls(filename)
            else:
                return cls(f)
    return None 
Example #15
Source File: utils.py    From topical_word_embeddings with MIT License 5 votes vote down vote up
def make_closing(base, **attrs):
    """
    Add support for `with Base(attrs) as fout:` to the base class if it's missing.
    The base class' `close()` method will be called on context exit, to always close the file properly.

    This is needed for gzip.GzipFile, bz2.BZ2File etc in older Pythons (<=2.6), which otherwise
    raise "AttributeError: GzipFile instance has no attribute '__exit__'".

    """
    if not hasattr(base, '__enter__'):
        attrs['__enter__'] = lambda self: self
    if not hasattr(base, '__exit__'):
        attrs['__exit__'] = lambda self, type, value, traceback: self.close()
    return type('Closing' + base.__name__, (base, object), attrs) 
Example #16
Source File: wikicorpus.py    From topical_word_embeddings with MIT License 5 votes vote down vote up
def get_texts(self):
        """
        Iterate over the dump, returning text version of each article as a list
        of tokens.

        Only articles of sufficient length are returned (short articles & redirects
        etc are ignored).

        Note that this iterates over the **texts**; if you want vectors, just use
        the standard corpus interface instead of this function::

        >>> for vec in wiki_corpus:
        >>>     print(vec)
        """
        articles, articles_all = 0, 0
        positions, positions_all = 0, 0
        texts = ((text, self.lemmatize, title, pageid) for title, text, pageid in extract_pages(bz2.BZ2File(self.fname), self.filter_namespaces))
        pool = multiprocessing.Pool(self.processes)
        # process the corpus in smaller chunks of docs, because multiprocessing.Pool
        # is dumb and would load the entire input into RAM at once...
        ignore_namespaces = 'Wikipedia Category File Portal Template MediaWiki User Help Book Draft'.split()
        for group in utils.chunkize(texts, chunksize=10 * self.processes, maxsize=1):
            for tokens, title, pageid in pool.imap(process_article, group): # chunksize=10):
                articles_all += 1
                positions_all += len(tokens)
                # article redirects and short stubs are pruned here
                if len(tokens) < ARTICLE_MIN_WORDS or any(title.startswith(ignore + ':') for ignore in ignore_namespaces):
                    continue
                articles += 1
                positions += len(tokens)
                if self.metadata:
                    yield (tokens, (pageid, title))
                else:
                    yield tokens
        pool.terminate()

        logger.info("finished iterating over Wikipedia corpus of %i documents with %i positions"
            " (total %i articles, %i positions before pruning articles shorter than %i words)" %
            (articles, positions, articles_all, positions_all, ARTICLE_MIN_WORDS))
        self.length = articles # cache corpus length
# endclass WikiCorpus 
Example #17
Source File: _fileio.py    From pysat with MIT License 5 votes vote down vote up
def open(self, name, mode='r', compression=None):
        """
            Open a file pointer. Note that a file is *always* opened in text
            mode. The method inherits its input parameters from the constructor
            of :class:`FileObject`.
        """

        if compression == 'use_ext':
            self.get_compression_type(name)
        else:
            self.ctype = compression

        if not self.ctype:
            self.fp = open(name, mode)
        elif self.ctype == 'gzip':
            self.fp = gzip.open(name, mode + 't')
        elif self.ctype == 'bzip2':
            try:
                # Python 3 supports opening bzip2 files in text mode
                # therefore, we prefer to open them this way
                self.fp = bz2.open(name, mode + 't')
            except:
                # BZ2File opens a file in binary mode
                # thus, we have to use codecs.getreader()
                # to be able to use it in text mode
                self.fp_extra = bz2.BZ2File(name, mode)

                if mode == 'r':
                    self.fp = codecs.getreader('ascii')(self.fp_extra)
                else:  # mode == 'w'
                    self.fp = codecs.getwriter('ascii')(self.fp_extra)
        else:  # self.ctype == 'lzma'
            # LZMA is available in Python 2 only if backports.lzma is installed
            # Python 3 supports it by default
            assert lzma_present, 'LZMA compression is unavailable.'
            self.fp = lzma.open(name, mode=mode + 't') 
Example #18
Source File: utils.py    From topical_word_embeddings with MIT License 5 votes vote down vote up
def smart_open(fname, mode='rb'):
    _, ext = os.path.splitext(fname)
    if ext == '.bz2':
        from bz2 import BZ2File
        return make_closing(BZ2File)(fname, mode)
    if ext == '.gz':
        from gzip import GzipFile
        return make_closing(GzipFile)(fname, mode)
    return open(fname, mode) 
Example #19
Source File: test_tarfile.py    From ironpython2 with Apache License 2.0 5 votes vote down vote up
def test_fileobj_with_offset(self):
        # Skip the first member and store values from the second member
        # of the testtar.
        tar = tarfile.open(self.tarname, mode=self.mode)
        try:
            tar.next()
            t = tar.next()
            name = t.name
            offset = t.offset
            data = tar.extractfile(t).read()
        finally:
            tar.close()

        # Open the testtar and seek to the offset of the second member.
        if self.mode.endswith(":gz"):
            _open = gzip.GzipFile
        elif self.mode.endswith(":bz2"):
            _open = bz2.BZ2File
        else:
            _open = open
        fobj = _open(self.tarname, "rb")
        try:
            fobj.seek(offset)

            # Test if the tarfile starts with the second member.
            tar = tar.open(self.tarname, mode="r:", fileobj=fobj)
            t = tar.next()
            self.assertEqual(t.name, name)
            # Read to the end of fileobj and test if seeking back to the
            # beginning works.
            tar.getmembers()
            self.assertEqual(tar.extractfile(t).read(), data,
                    "seek back did not work")
            tar.close()
        finally:
            fobj.close() 
Example #20
Source File: tarfile.py    From kobo-predict with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
        """Open bzip2 compressed tar archive name for reading or writing.
           Appending is not allowed.
        """
        if mode not in ("r", "w", "x"):
            raise ValueError("mode must be 'r', 'w' or 'x'")

        try:
            import bz2
        except ImportError:
            raise CompressionError("bz2 module is not available")

        fileobj = bz2.BZ2File(fileobj or name, mode,
                              compresslevel=compresslevel)

        try:
            t = cls.taropen(name, mode, fileobj, **kwargs)
        except (OSError, EOFError):
            fileobj.close()
            if mode == 'r':
                raise ReadError("not a bzip2 file")
            raise
        except:
            fileobj.close()
            raise
        t._extfileobj = False
        return t 
Example #21
Source File: tarfile.py    From kobo-predict with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
        """Open bzip2 compressed tar archive name for reading or writing.
           Appending is not allowed.
        """
        if len(mode) > 1 or mode not in "rw":
            raise ValueError("mode must be 'r' or 'w'.")

        try:
            import bz2
        except ImportError:
            raise CompressionError("bz2 module is not available")

        if fileobj is not None:
            fileobj = _BZ2Proxy(fileobj, mode)
        else:
            fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)

        try:
            t = cls.taropen(name, mode, fileobj, **kwargs)
        except (IOError, EOFError):
            fileobj.close()
            raise ReadError("not a bzip2 file")
        t._extfileobj = False
        return t

    # All *open() methods are registered here. 
Example #22
Source File: tarfile.py    From kobo-predict with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def seekable(self):
        if not hasattr(self.fileobj, "seekable"):
            # XXX gzip.GzipFile and bz2.BZ2File
            return True
        return self.fileobj.seekable() 
Example #23
Source File: tarfile.py    From Python24 with MIT License 5 votes vote down vote up
def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
        """Open bzip2 compressed tar archive name for reading or writing.
           Appending is not allowed.
        """
        if len(mode) > 1 or mode not in "rw":
            raise ValueError("mode must be 'r' or 'w'.")

        try:
            import bz2
        except ImportError:
            raise CompressionError("bz2 module is not available")

        if fileobj is not None:
            fileobj = _BZ2Proxy(fileobj, mode)
        else:
            fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)

        try:
            t = cls.taropen(name, mode, fileobj, **kwargs)
        except (IOError, EOFError):
            fileobj.close()
            raise ReadError("not a bzip2 file")
        t._extfileobj = False
        return t

    # All *open() methods are registered here. 
Example #24
Source File: tarfile.py    From Python24 with MIT License 5 votes vote down vote up
def seekable(self):
        if not hasattr(self.fileobj, "seekable"):
            # XXX gzip.GzipFile and bz2.BZ2File
            return True
        return self.fileobj.seekable() 
Example #25
Source File: _datasource.py    From auto-alt-text-lambda-api with MIT License 5 votes vote down vote up
def _load(self):
        if self._loaded:
            return
        try:
            import bz2
            self._file_openers[".bz2"] = bz2.BZ2File
        except ImportError:
            pass
        try:
            import gzip
            self._file_openers[".gz"] = gzip.open
        except ImportError:
            pass
        self._loaded = True 
Example #26
Source File: test__datasource.py    From auto-alt-text-lambda-api with MIT License 5 votes vote down vote up
def test_ValidBz2File(self):
        try:
            import bz2
        except ImportError:
            # We don't have the bz2 capabilities to test.
            raise SkipTest
        # Test datasource's internal file_opener for BZip2 files.
        filepath = os.path.join(self.tmpdir, 'foobar.txt.bz2')
        fp = bz2.BZ2File(filepath, 'w')
        fp.write(magic_line)
        fp.close()
        fp = self.ds.open(filepath)
        result = fp.readline()
        fp.close()
        self.assertEqual(magic_line, result) 
Example #27
Source File: _iotools.py    From auto-alt-text-lambda-api with MIT License 5 votes vote down vote up
def _to_filehandle(fname, flag='r', return_opened=False):
    """
    Returns the filehandle corresponding to a string or a file.
    If the string ends in '.gz', the file is automatically unzipped.

    Parameters
    ----------
    fname : string, filehandle
        Name of the file whose filehandle must be returned.
    flag : string, optional
        Flag indicating the status of the file ('r' for read, 'w' for write).
    return_opened : boolean, optional
        Whether to return the opening status of the file.
    """
    if _is_string_like(fname):
        if fname.endswith('.gz'):
            import gzip
            fhd = gzip.open(fname, flag)
        elif fname.endswith('.bz2'):
            import bz2
            fhd = bz2.BZ2File(fname)
        else:
            fhd = file(fname, flag)
        opened = True
    elif hasattr(fname, 'seek'):
        fhd = fname
        opened = False
    else:
        raise ValueError('fname must be a string or file handle')
    if return_opened:
        return fhd, opened
    return fhd 
Example #28
Source File: WikiExtractor.py    From comparable-text-miner with Apache License 2.0 5 votes vote down vote up
def open(self, filename):
        if self.compress:
            return bz2.BZ2File(filename + '.bz2', 'w')
        else:
            return open(filename, 'w')

# ----------------------------------------------------------------------
# READER 
Example #29
Source File: test_tarfile.py    From ironpython2 with Apache License 2.0 5 votes vote down vote up
def test_detect_stream_bz2(self):
        # Originally, tarfile's stream detection looked for the string
        # "BZh91" at the start of the file. This is incorrect because
        # the '9' represents the blocksize (900kB). If the file was
        # compressed using another blocksize autodetection fails.
        with open(tarname, "rb") as fobj:
            data = fobj.read()

        # Compress with blocksize 100kB, the file starts with "BZh11".
        with bz2.BZ2File(tmpname, "wb", compresslevel=1) as fobj:
            fobj.write(data)

        self._testfunc_file(tmpname, "r|*") 
Example #30
Source File: _datasource.py    From lambda-packs with MIT License 5 votes vote down vote up
def _load(self):
        if self._loaded:
            return
        try:
            import bz2
            self._file_openers[".bz2"] = bz2.BZ2File
        except ImportError:
            pass
        try:
            import gzip
            self._file_openers[".gz"] = gzip.open
        except ImportError:
            pass
        self._loaded = True