Python bz2.BZ2File() Examples
The following are 30
code examples of bz2.BZ2File().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
bz2
, or try the search function
.
Example #1
Source File: _datasource.py From lambda-packs with MIT License | 6 votes |
def _python2_bz2open(fn, mode, encoding, newline): """Wrapper to open bz2 in text mode. Parameters ---------- fn : str File name mode : {'r', 'w'} File mode. Note that bz2 Text files are not supported. encoding : str Ignored, text bz2 files not supported in Python2. newline : str Ignored, text bz2 files not supported in Python2. """ import bz2 _check_mode(mode, encoding, newline) if "t" in mode: # BZ2File is missing necessary functions for TextIOWrapper warnings.warn("Assuming latin1 encoding for bz2 text file in Python2", RuntimeWarning, stacklevel=5) mode = mode.replace("t", "") return bz2.BZ2File(fn, mode)
Example #2
Source File: NRRD.py From ClearMap with GNU General Public License v3.0 | 6 votes |
def _write_data(data, filehandle, options): # Now write data directly #rawdata = data.transpose([2,0,1]).tostring(order = 'C') rawdata = data.transpose([2,1,0]).tostring(order = 'C'); if options['encoding'] == 'raw': filehandle.write(rawdata) elif options['encoding'] == 'gzip': gzfileobj = gzip.GzipFile(fileobj = filehandle) gzfileobj.write(rawdata) gzfileobj.close() elif options['encoding'] == 'bz2': bz2fileobj = bz2.BZ2File(fileobj = filehandle) bz2fileobj.write(rawdata) bz2fileobj.close() else: raise NrrdError('Unsupported encoding: "%s"' % options['encoding'])
Example #3
Source File: outputhandler.py From benchexec with Apache License 2.0 | 6 votes |
def _write_pretty_result_xml_to_file(self, xml, filename): """Writes a nicely formatted XML file with DOCTYPE, and compressed if necessary.""" if self.compress_results: actual_filename = filename + ".bz2" open_func = bz2.BZ2File else: # write content to temp file first to prevent losing data # in existing file if writing fails actual_filename = filename + ".tmp" open_func = open with io.TextIOWrapper( open_func(actual_filename, "wb"), encoding="utf-8" ) as file: rough_string = ElementTree.tostring(xml, encoding="unicode") reparsed = minidom.parseString(rough_string) doctype = minidom.DOMImplementation().createDocumentType( "result", RESULT_XML_PUBLIC_ID, RESULT_XML_SYSTEM_ID ) reparsed.insertBefore(doctype, reparsed.documentElement) reparsed.writexml( file, indent="", addindent=" ", newl="\n", encoding="utf-8" ) if self.compress_results: # try to delete uncompressed file (would have been overwritten in no-compress-mode) try: os.remove(filename) except OSError: pass self.all_created_files.discard(filename) self.all_created_files.add(actual_filename) else: os.rename(actual_filename, filename) self.all_created_files.add(filename) return filename
Example #4
Source File: _datasource.py From recruit with Apache License 2.0 | 6 votes |
def _python2_bz2open(fn, mode, encoding, newline): """Wrapper to open bz2 in text mode. Parameters ---------- fn : str File name mode : {'r', 'w'} File mode. Note that bz2 Text files are not supported. encoding : str Ignored, text bz2 files not supported in Python2. newline : str Ignored, text bz2 files not supported in Python2. """ import bz2 _check_mode(mode, encoding, newline) if "t" in mode: # BZ2File is missing necessary functions for TextIOWrapper warnings.warn("Assuming latin1 encoding for bz2 text file in Python2", RuntimeWarning, stacklevel=5) mode = mode.replace("t", "") return bz2.BZ2File(fn, mode)
Example #5
Source File: test__datasource.py From recruit with Apache License 2.0 | 6 votes |
def test_Bz2File_text_mode_warning(self): try: import bz2 except ImportError: # We don't have the bz2 capabilities to test. pytest.skip() # Test datasource's internal file_opener for BZip2 files. filepath = os.path.join(self.tmpdir, 'foobar.txt.bz2') fp = bz2.BZ2File(filepath, 'w') fp.write(magic_line) fp.close() with assert_warns(RuntimeWarning): fp = self.ds.open(filepath, 'rt') result = fp.readline() fp.close() assert_equal(magic_line, result)
Example #6
Source File: world.py From psychsim with MIT License | 6 votes |
def save(self,filename,compressed=True): """ :param compressed: if C{True}, then save in compressed XML; otherwise, save in XML (default is C{True}) :type compressed: bool :returns: the filename used (possibly with a .psy extension added) :rtype: str """ if compressed: if filename[-4:] != '.psy': filename = '%s.psy' % (filename) elif filename[-4:] != '.xml': filename = '%s.xml' % (filename) if compressed: f = bz2.BZ2File(filename,'w') f.write(self.__xml__().toprettyxml().encode('utf-8')) else: f = open(filename,'w') f.write(self.__xml__().toprettyxml()) f.close() return filename
Example #7
Source File: tarfile.py From meddle with MIT License | 6 votes |
def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs): """Open bzip2 compressed tar archive name for reading or writing. Appending is not allowed. """ if len(mode) > 1 or mode not in "rw": raise ValueError("mode must be 'r' or 'w'.") try: import bz2 except ImportError: raise CompressionError("bz2 module is not available") if fileobj is not None: fileobj = _BZ2Proxy(fileobj, mode) else: fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel) try: t = cls.taropen(name, mode, fileobj, **kwargs) except (IOError, EOFError): raise ReadError("not a bzip2 file") t._extfileobj = False return t # All *open() methods are registered here.
Example #8
Source File: BotDigger.py From BotDigger with GNU General Public License v3.0 | 6 votes |
def openFile(filename, modes): filetype = file_type(filename) if filetype is None: return open(filename, modes) elif filetype == "bz2": return bz2.BZ2File(filename) elif filetype == "gz": return gzip.open(filename) elif filetype == "xz": with open(filename, modes) as f: return xz.LZMAFile(f) elif filetype == "zip": return zipfile.ZipFile(filename) else: # should never get here raise LookupError("filetype is invalid")
Example #9
Source File: test_read_fwf.py From vnpy_crypto with MIT License | 6 votes |
def test_fwf_compression(self): try: import gzip import bz2 except ImportError: pytest.skip("Need gzip and bz2 to run this test") data = """1111111111 2222222222 3333333333""".strip() widths = [5, 5] names = ['one', 'two'] expected = read_fwf(StringIO(data), widths=widths, names=names) if compat.PY3: data = bytes(data, encoding='utf-8') comps = [('gzip', gzip.GzipFile), ('bz2', bz2.BZ2File)] for comp_name, compresser in comps: with tm.ensure_clean() as path: tmp = compresser(path, mode='wb') tmp.write(data) tmp.close() result = read_fwf(path, widths=widths, names=names, compression=comp_name) tm.assert_frame_equal(result, expected)
Example #10
Source File: _datasource.py From vnpy_crypto with MIT License | 6 votes |
def _python2_bz2open(fn, mode, encoding, newline): """Wrapper to open bz2 in text mode. Parameters ---------- fn : str File name mode : {'r', 'w'} File mode. Note that bz2 Text files are not supported. encoding : str Ignored, text bz2 files not supported in Python2. newline : str Ignored, text bz2 files not supported in Python2. """ import bz2 _check_mode(mode, encoding, newline) if "t" in mode: # BZ2File is missing necessary functions for TextIOWrapper raise ValueError("bz2 text files not supported in python2") else: return bz2.BZ2File(fn, mode)
Example #11
Source File: tarfile.py From jawfish with MIT License | 6 votes |
def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs): """Open bzip2 compressed tar archive name for reading or writing. Appending is not allowed. """ if len(mode) > 1 or mode not in "rw": raise ValueError("mode must be 'r' or 'w'.") try: import bz2 except ImportError: raise CompressionError("bz2 module is not available") fileobj = bz2.BZ2File(fileobj or name, mode, compresslevel=compresslevel) try: t = cls.taropen(name, mode, fileobj, **kwargs) except (IOError, EOFError): fileobj.close() raise ReadError("not a bzip2 file") t._extfileobj = False return t
Example #12
Source File: compress_files.py From osspolice with GNU General Public License v3.0 | 5 votes |
def open(self): return bz2.BZ2File(self.f)
Example #13
Source File: _iotools.py From vnpy_crypto with MIT License | 5 votes |
def _to_filehandle(fname, flag='r', return_opened=False): """ Returns the filehandle corresponding to a string or a file. If the string ends in '.gz', the file is automatically unzipped. Parameters ---------- fname : string, filehandle Name of the file whose filehandle must be returned. flag : string, optional Flag indicating the status of the file ('r' for read, 'w' for write). return_opened : boolean, optional Whether to return the opening status of the file. """ if _is_string_like(fname): if fname.endswith('.gz'): import gzip fhd = gzip.open(fname, flag) elif fname.endswith('.bz2'): import bz2 fhd = bz2.BZ2File(fname) else: fhd = file(fname, flag) opened = True elif hasattr(fname, 'seek'): fhd = fname opened = False else: raise ValueError('fname must be a string or file handle') if return_opened: return fhd, opened return fhd
Example #14
Source File: compress_files.py From osspolice with GNU General Public License v3.0 | 5 votes |
def get_compressed_file(filename): f = open(filename, 'rb') start_of_file = f.read(1024) f.seek(0) for cls in (ZIPFile, BZ2File, GZFile, SevenZFile, TarFile, XZFile, JARCSFile, MARFile, RARFile, WinZIPFile): if cls.is_magic(start_of_file): if cls in (GZFile, BZ2File, TarFile): return cls(filename) else: return cls(f) return None
Example #15
Source File: utils.py From topical_word_embeddings with MIT License | 5 votes |
def make_closing(base, **attrs): """ Add support for `with Base(attrs) as fout:` to the base class if it's missing. The base class' `close()` method will be called on context exit, to always close the file properly. This is needed for gzip.GzipFile, bz2.BZ2File etc in older Pythons (<=2.6), which otherwise raise "AttributeError: GzipFile instance has no attribute '__exit__'". """ if not hasattr(base, '__enter__'): attrs['__enter__'] = lambda self: self if not hasattr(base, '__exit__'): attrs['__exit__'] = lambda self, type, value, traceback: self.close() return type('Closing' + base.__name__, (base, object), attrs)
Example #16
Source File: wikicorpus.py From topical_word_embeddings with MIT License | 5 votes |
def get_texts(self): """ Iterate over the dump, returning text version of each article as a list of tokens. Only articles of sufficient length are returned (short articles & redirects etc are ignored). Note that this iterates over the **texts**; if you want vectors, just use the standard corpus interface instead of this function:: >>> for vec in wiki_corpus: >>> print(vec) """ articles, articles_all = 0, 0 positions, positions_all = 0, 0 texts = ((text, self.lemmatize, title, pageid) for title, text, pageid in extract_pages(bz2.BZ2File(self.fname), self.filter_namespaces)) pool = multiprocessing.Pool(self.processes) # process the corpus in smaller chunks of docs, because multiprocessing.Pool # is dumb and would load the entire input into RAM at once... ignore_namespaces = 'Wikipedia Category File Portal Template MediaWiki User Help Book Draft'.split() for group in utils.chunkize(texts, chunksize=10 * self.processes, maxsize=1): for tokens, title, pageid in pool.imap(process_article, group): # chunksize=10): articles_all += 1 positions_all += len(tokens) # article redirects and short stubs are pruned here if len(tokens) < ARTICLE_MIN_WORDS or any(title.startswith(ignore + ':') for ignore in ignore_namespaces): continue articles += 1 positions += len(tokens) if self.metadata: yield (tokens, (pageid, title)) else: yield tokens pool.terminate() logger.info("finished iterating over Wikipedia corpus of %i documents with %i positions" " (total %i articles, %i positions before pruning articles shorter than %i words)" % (articles, positions, articles_all, positions_all, ARTICLE_MIN_WORDS)) self.length = articles # cache corpus length # endclass WikiCorpus
Example #17
Source File: _fileio.py From pysat with MIT License | 5 votes |
def open(self, name, mode='r', compression=None): """ Open a file pointer. Note that a file is *always* opened in text mode. The method inherits its input parameters from the constructor of :class:`FileObject`. """ if compression == 'use_ext': self.get_compression_type(name) else: self.ctype = compression if not self.ctype: self.fp = open(name, mode) elif self.ctype == 'gzip': self.fp = gzip.open(name, mode + 't') elif self.ctype == 'bzip2': try: # Python 3 supports opening bzip2 files in text mode # therefore, we prefer to open them this way self.fp = bz2.open(name, mode + 't') except: # BZ2File opens a file in binary mode # thus, we have to use codecs.getreader() # to be able to use it in text mode self.fp_extra = bz2.BZ2File(name, mode) if mode == 'r': self.fp = codecs.getreader('ascii')(self.fp_extra) else: # mode == 'w' self.fp = codecs.getwriter('ascii')(self.fp_extra) else: # self.ctype == 'lzma' # LZMA is available in Python 2 only if backports.lzma is installed # Python 3 supports it by default assert lzma_present, 'LZMA compression is unavailable.' self.fp = lzma.open(name, mode=mode + 't')
Example #18
Source File: utils.py From topical_word_embeddings with MIT License | 5 votes |
def smart_open(fname, mode='rb'): _, ext = os.path.splitext(fname) if ext == '.bz2': from bz2 import BZ2File return make_closing(BZ2File)(fname, mode) if ext == '.gz': from gzip import GzipFile return make_closing(GzipFile)(fname, mode) return open(fname, mode)
Example #19
Source File: test_tarfile.py From ironpython2 with Apache License 2.0 | 5 votes |
def test_fileobj_with_offset(self): # Skip the first member and store values from the second member # of the testtar. tar = tarfile.open(self.tarname, mode=self.mode) try: tar.next() t = tar.next() name = t.name offset = t.offset data = tar.extractfile(t).read() finally: tar.close() # Open the testtar and seek to the offset of the second member. if self.mode.endswith(":gz"): _open = gzip.GzipFile elif self.mode.endswith(":bz2"): _open = bz2.BZ2File else: _open = open fobj = _open(self.tarname, "rb") try: fobj.seek(offset) # Test if the tarfile starts with the second member. tar = tar.open(self.tarname, mode="r:", fileobj=fobj) t = tar.next() self.assertEqual(t.name, name) # Read to the end of fileobj and test if seeking back to the # beginning works. tar.getmembers() self.assertEqual(tar.extractfile(t).read(), data, "seek back did not work") tar.close() finally: fobj.close()
Example #20
Source File: tarfile.py From kobo-predict with BSD 2-Clause "Simplified" License | 5 votes |
def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs): """Open bzip2 compressed tar archive name for reading or writing. Appending is not allowed. """ if mode not in ("r", "w", "x"): raise ValueError("mode must be 'r', 'w' or 'x'") try: import bz2 except ImportError: raise CompressionError("bz2 module is not available") fileobj = bz2.BZ2File(fileobj or name, mode, compresslevel=compresslevel) try: t = cls.taropen(name, mode, fileobj, **kwargs) except (OSError, EOFError): fileobj.close() if mode == 'r': raise ReadError("not a bzip2 file") raise except: fileobj.close() raise t._extfileobj = False return t
Example #21
Source File: tarfile.py From kobo-predict with BSD 2-Clause "Simplified" License | 5 votes |
def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs): """Open bzip2 compressed tar archive name for reading or writing. Appending is not allowed. """ if len(mode) > 1 or mode not in "rw": raise ValueError("mode must be 'r' or 'w'.") try: import bz2 except ImportError: raise CompressionError("bz2 module is not available") if fileobj is not None: fileobj = _BZ2Proxy(fileobj, mode) else: fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel) try: t = cls.taropen(name, mode, fileobj, **kwargs) except (IOError, EOFError): fileobj.close() raise ReadError("not a bzip2 file") t._extfileobj = False return t # All *open() methods are registered here.
Example #22
Source File: tarfile.py From kobo-predict with BSD 2-Clause "Simplified" License | 5 votes |
def seekable(self): if not hasattr(self.fileobj, "seekable"): # XXX gzip.GzipFile and bz2.BZ2File return True return self.fileobj.seekable()
Example #23
Source File: tarfile.py From Python24 with MIT License | 5 votes |
def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs): """Open bzip2 compressed tar archive name for reading or writing. Appending is not allowed. """ if len(mode) > 1 or mode not in "rw": raise ValueError("mode must be 'r' or 'w'.") try: import bz2 except ImportError: raise CompressionError("bz2 module is not available") if fileobj is not None: fileobj = _BZ2Proxy(fileobj, mode) else: fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel) try: t = cls.taropen(name, mode, fileobj, **kwargs) except (IOError, EOFError): fileobj.close() raise ReadError("not a bzip2 file") t._extfileobj = False return t # All *open() methods are registered here.
Example #24
Source File: tarfile.py From Python24 with MIT License | 5 votes |
def seekable(self): if not hasattr(self.fileobj, "seekable"): # XXX gzip.GzipFile and bz2.BZ2File return True return self.fileobj.seekable()
Example #25
Source File: _datasource.py From auto-alt-text-lambda-api with MIT License | 5 votes |
def _load(self): if self._loaded: return try: import bz2 self._file_openers[".bz2"] = bz2.BZ2File except ImportError: pass try: import gzip self._file_openers[".gz"] = gzip.open except ImportError: pass self._loaded = True
Example #26
Source File: test__datasource.py From auto-alt-text-lambda-api with MIT License | 5 votes |
def test_ValidBz2File(self): try: import bz2 except ImportError: # We don't have the bz2 capabilities to test. raise SkipTest # Test datasource's internal file_opener for BZip2 files. filepath = os.path.join(self.tmpdir, 'foobar.txt.bz2') fp = bz2.BZ2File(filepath, 'w') fp.write(magic_line) fp.close() fp = self.ds.open(filepath) result = fp.readline() fp.close() self.assertEqual(magic_line, result)
Example #27
Source File: _iotools.py From auto-alt-text-lambda-api with MIT License | 5 votes |
def _to_filehandle(fname, flag='r', return_opened=False): """ Returns the filehandle corresponding to a string or a file. If the string ends in '.gz', the file is automatically unzipped. Parameters ---------- fname : string, filehandle Name of the file whose filehandle must be returned. flag : string, optional Flag indicating the status of the file ('r' for read, 'w' for write). return_opened : boolean, optional Whether to return the opening status of the file. """ if _is_string_like(fname): if fname.endswith('.gz'): import gzip fhd = gzip.open(fname, flag) elif fname.endswith('.bz2'): import bz2 fhd = bz2.BZ2File(fname) else: fhd = file(fname, flag) opened = True elif hasattr(fname, 'seek'): fhd = fname opened = False else: raise ValueError('fname must be a string or file handle') if return_opened: return fhd, opened return fhd
Example #28
Source File: WikiExtractor.py From comparable-text-miner with Apache License 2.0 | 5 votes |
def open(self, filename): if self.compress: return bz2.BZ2File(filename + '.bz2', 'w') else: return open(filename, 'w') # ---------------------------------------------------------------------- # READER
Example #29
Source File: test_tarfile.py From ironpython2 with Apache License 2.0 | 5 votes |
def test_detect_stream_bz2(self): # Originally, tarfile's stream detection looked for the string # "BZh91" at the start of the file. This is incorrect because # the '9' represents the blocksize (900kB). If the file was # compressed using another blocksize autodetection fails. with open(tarname, "rb") as fobj: data = fobj.read() # Compress with blocksize 100kB, the file starts with "BZh11". with bz2.BZ2File(tmpname, "wb", compresslevel=1) as fobj: fobj.write(data) self._testfunc_file(tmpname, "r|*")
Example #30
Source File: _datasource.py From lambda-packs with MIT License | 5 votes |
def _load(self): if self._loaded: return try: import bz2 self._file_openers[".bz2"] = bz2.BZ2File except ImportError: pass try: import gzip self._file_openers[".gz"] = gzip.open except ImportError: pass self._loaded = True