Python Examples of bz2.decompress

Source File: util_test.py From scalyr-agent-2 with Apache License 2.0

6 votes

def test_bad_compression_lib_no_compression(self):
        """Pretend that the zlib/bz2 library compress() method doesn't perform any comnpression"""

        def _mock_get_compress_and_decompress_func(
            compression_type, compression_level=9
        ):
            m = MagicMock()
            # simulate module.compress() method that does not compress input data string
            m.compress = lambda data, compression_level=9: data
            m.decompress = lambda data: data
            return m.compress, m.decompress

        @patch(
            "scalyr_agent.util.get_compress_and_decompress_func",
            new=_mock_get_compress_and_decompress_func,
        )
        def _test(compression_type):
            self.assertIsNone(verify_and_get_compress_func(compression_type))

        _test("deflate")
        _test("bz2")
        _test("lz4")
        _test("zstandard")

Source File: special.py From pygments with BSD 2-Clause "Simplified" License

6 votes

def get_tokens(self, text):
        if isinstance(text, str):
            # raw token stream never has any non-ASCII characters
            text = text.encode('ascii')
        if self.compress == 'gz':
            import gzip
            gzipfile = gzip.GzipFile('', 'rb', 9, BytesIO(text))
            text = gzipfile.read()
        elif self.compress == 'bz2':
            import bz2
            text = bz2.decompress(text)

        # do not call Lexer.get_tokens() because we do not want Unicode
        # decoding to occur, and stripping is not optional.
        text = text.strip(b'\n') + b'\n'
        for i, t, v in self.get_tokens_unprocessed(text):
            yield t, v

Source File: special.py From pigaios with GNU General Public License v3.0

6 votes

def get_tokens(self, text):
        if isinstance(text, text_type):
            # raw token stream never has any non-ASCII characters
            text = text.encode('ascii')
        if self.compress == 'gz':
            import gzip
            gzipfile = gzip.GzipFile('', 'rb', 9, BytesIO(text))
            text = gzipfile.read()
        elif self.compress == 'bz2':
            import bz2
            text = bz2.decompress(text)

        # do not call Lexer.get_tokens() because we do not want Unicode
        # decoding to occur, and stripping is not optional.
        text = text.strip(b'\n') + b'\n'
        for i, t, v in self.get_tokens_unprocessed(text):
            yield t, v

Source File: constants.py From PGPy with BSD 3-Clause "New" or "Revised" License

6 votes

def decompress(self, data):
        if six.PY2:
            data = bytes(data)

        if self is CompressionAlgorithm.Uncompressed:
            return data

        if self is CompressionAlgorithm.ZIP:
            return zlib.decompress(data, -15)

        if self is CompressionAlgorithm.ZLIB:
            return zlib.decompress(data)

        if self is CompressionAlgorithm.BZ2:
            return bz2.decompress(data)

        raise NotImplementedError(self)

Source File: special.py From komodo-wakatime with BSD 3-Clause "New" or "Revised" License

6 votes

def get_tokens(self, text):
        if isinstance(text, text_type):
            # raw token stream never has any non-ASCII characters
            text = text.encode('ascii')
        if self.compress == 'gz':
            import gzip
            gzipfile = gzip.GzipFile('', 'rb', 9, BytesIO(text))
            text = gzipfile.read()
        elif self.compress == 'bz2':
            import bz2
            text = bz2.decompress(text)

        # do not call Lexer.get_tokens() because we do not want Unicode
        # decoding to occur, and stripping is not optional.
        text = text.strip(b'\n') + b'\n'
        for i, t, v in self.get_tokens_unprocessed(text):
            yield t, v

Source File: special.py From pySINDy with MIT License

6 votes

def get_tokens(self, text):
        if isinstance(text, text_type):
            # raw token stream never has any non-ASCII characters
            text = text.encode('ascii')
        if self.compress == 'gz':
            import gzip
            gzipfile = gzip.GzipFile('', 'rb', 9, BytesIO(text))
            text = gzipfile.read()
        elif self.compress == 'bz2':
            import bz2
            text = bz2.decompress(text)

        # do not call Lexer.get_tokens() because we do not want Unicode
        # decoding to occur, and stripping is not optional.
        text = text.strip(b'\n') + b'\n'
        for i, t, v in self.get_tokens_unprocessed(text):
            yield t, v

Source File: test_barman_cloud_wal_archive.py From barman with GNU General Public License v3.0

6 votes

def test_retrieve_bz2_file_obj(self, boto_mock, tmpdir):
        """
        Test the retrieve_file_obj method with a bz2 file
        """
        # Setup the WAL
        source = tmpdir.join('wal_dir/000000080000ABFF000000C1')
        source.write('something'.encode('utf-8'), ensure=True)
        # Create a simple S3WalUploader obj
        uploader = S3WalUploader(
            mock.MagicMock(), 'test-server', compression='bzip2'
        )
        open_file = uploader.retrieve_file_obj(source.strpath)
        # Check the in memory file received
        assert open_file
        # Decompress on the fly to check content
        assert bz2.decompress(open_file.read()) == 'something'.encode('utf-8')

Source File: bless_config.py From bless with Apache License 2.0

6 votes

def _decompress(data, algorithm):
        """
        Decompress a byte string based of the provided algorithm.
        :param data: byte string
        :param algorithm: string  with the name of the compression algorithm used
        :return: decompressed byte string.
        """
        if algorithm is None or algorithm == 'none':
            result = data
        elif algorithm == 'zlib':
            result = zlib.decompress(data)
        elif algorithm == 'bz2':
            result = bz2.decompress(data)
        else:
            raise ValueError("Compression {} is not supported.".format(algorithm))

        return result

Source File: io.py From strax with BSD 3-Clause "New" or "Revised" License

6 votes

def _load_file(f, compressor, dtype):
    try:
        data = f.read()
        if not len(data):
            return np.zeros(0, dtype=dtype)

        data = COMPRESSORS[compressor]['decompress'](data)
        try:
            return np.frombuffer(data, dtype=dtype)
        except ValueError as e:
            raise ValueError(f"ValueError while loading data with dtype =\n\t{dtype}") from e   
            
    except Exception:
        raise strax.DataCorrupted(
            f"Fatal Error while reading file {f}: "
            + strax.utils.formatted_exception())

Source File: utils.py From toil with Apache License 2.0

6 votes

def attributesToBinary(cls, attributes):
        """
        :rtype: (str|None,int)
        :return: the binary data and the number of chunks it was composed from
        """
        chunks = [(int(k), v) for k, v in iteritems(attributes) if cls._isValidChunkName(k)]
        chunks.sort()
        numChunks = int(attributes[u'numChunks'])
        if numChunks:
            if USING_PYTHON2:
                serializedJob = b''.join(v for k, v in chunks)
            else:
                serializedJob = b''.join(v.encode() for k, v in chunks)
            compressed = base64.b64decode(serializedJob)
            if compressed[0] == b'C'[0]:
                binary = bz2.decompress(compressed[1:])
            elif compressed[0] == b'U'[0]:
                binary = compressed[1:]
            else:
                raise RuntimeError('Unexpected prefix {}'.format(compressed[0]))
        else:
            binary = None
        return binary, numChunks

Source File: bz2.py From numcodecs with MIT License

6 votes

def decode(self, buf, out=None):

        # normalise inputs
        buf = ensure_contiguous_ndarray(buf)
        if out is not None:
            out = ensure_contiguous_ndarray(out)

        # N.B., bz2 cannot handle ndarray directly because of truth testing issues
        buf = memoryview(buf)

        # do decompression
        dec = _bz2.decompress(buf)

        # handle destination - Python standard library bz2 module does not
        # support direct decompression into buffer, so we have to copy into
        # out if given
        return ndarray_copy(dec, out)

Source File: SampleCorpora.py From scattertext with Apache License 2.0

6 votes

def get_full_data():
		'''
		Returns all plots and reviews, not just the ones that appear in movies with both plot descriptions and reviews.

		Returns
		-------
		pd.DataFrame

		I.e.,
		>>> convention_df.iloc[0]
		category                                                             plot
		text                    Vijay Singh Rajput (Amitabh Bachchan) is a qui...
		movie_name                                                        aankhen
		has_plot_and_reviews                                                False
		Name: 0, dtype: object
		'''
		try:
			data_stream = pkgutil.get_data('scattertext', 'data/rotten_tomatoes_corpus_full.csv.bz2')
		except:
			url = ROTTEN_TOMATOES_DATA_URL
			data_stream = urlopen(url).read()
		return pd.read_csv(io.BytesIO(bz2.decompress(data_stream)))

Source File: SampleCorpora.py From scattertext with Apache License 2.0

6 votes

def get_data():
		'''
		Returns
		-------
		pd.DataFrame

		I.e.,
		>>> convention_df.iloc[0]
		category                                                    plot
		filename                 subjectivity_html/obj/2002/Abandon.html
		text           A senior at an elite college (Katie Holmes), a...
		movie_name                                               abandon
		'''
		try:
			data_stream = pkgutil.get_data('scattertext', 'data/rotten_tomatoes_corpus.csv.bz2')
		except:
			url = ROTTEN_TOMATOES_DATA_URL
			data_stream = urlopen(url).read()
		return pd.read_csv(io.BytesIO(bz2.decompress(data_stream)))

Source File: reports_test.py From treadmill with Apache License 2.0

6 votes

def test_serialize_dataframe(self):
        """Test serializing a dataframe."""
        df = pd.DataFrame([
            [1, 2, 3],
            [4, 5, 6]
        ], columns=['a', 'b', 'c'])

        result = reports.serialize_dataframe(df)
        self.assertEqual(
            bz2.decompress(result),
            b'\n'.join(
                [
                    b'a,b,c',
                    b'1,2,3',
                    b'4,5,6',
                    b''
                ]
            )
        )

Source File: test_bz2.py From medicare-demo with Apache License 2.0

5 votes

def testDecompressIncomplete(self):
        # "Test decompress() function with incomplete data"
        self.assertRaises(ValueError, bz2.decompress, self.DATA[:-10])

Source File: Bzip2.py From peach with Mozilla Public License 2.0

5 votes

def realDecode(self, data):
        return bz2.decompress(data)

Source File: test_commonast.py From pscript with BSD 2-Clause "Simplified" License

5 votes

def _get_ref_json(filename):
    filename_bz2 = filename[:-2] + 'bz2'
    js_ref = bz2.decompress(open(filename_bz2, 'rb').read()).decode()
    return json.dumps(json.loads(js_ref), indent=2, sort_keys=True)

Source File: test_bz2.py From medicare-demo with Apache License 2.0

5 votes

def testDecompressEmpty(self):
        # "Test decompress() function with empty string"
        text = bz2.decompress("")
        self.assertEqual(text, "")

Source File: test_bz2.py From medicare-demo with Apache License 2.0

5 votes

def decompress(self, data):
            pop = popen2.Popen3("bunzip2", capturestderr=1)
            pop.tochild.write(data)
            pop.tochild.close()
            ret = pop.fromchild.read()
            pop.fromchild.close()
            if pop.wait() != 0:
                ret = bz2.decompress(data)
            return ret

Source File: enterprise.py From virustotal3 with GNU General Public License v3.0

5 votes

def _get_feed(api_key, type_, time, timeout=None):
    """ Get a minute from a feed

    Parameters:
        api_key (str): VT key
        type_ (str): type of feed to get
        time (str): YYYYMMDDhhmm
        timeout (float, optional): The amount of time in seconds the request should wait before timing out.
    
    Returns:
        StringIO: each line is a json string for one report
    """
    if api_key is None:
        raise Exception("You must provide a valid API key")

    try:
        response = requests.get('https://www.virustotal.com/api/v3/feeds/{}/{}'.format(type_, time),
                                headers={'x-apikey': api_key,
                                         'Accept': 'application/json'},
                                timeout=timeout)

        if response.status_code != 200:
            _raise_exception(response)

        return BytesIO(bz2.decompress(response.content))
    except requests.exceptions.RequestException as error:
        print(error)
        exit(1)

Source File: test_bz2.py From medicare-demo with Apache License 2.0

5 votes

def testDecompress(self):
        # "Test decompress() function"
        text = bz2.decompress(self.DATA)
        self.assertEqual(text, self.TEXT)

Source File: test_bz2.py From pyodide with Mozilla Public License 2.0

5 votes

def test_bz2(selenium):
    selenium.run(
        """
        import bz2

        text = "Hello test test test test this is a test test test"
        some_compressed_bytes = bz2.compress(text.encode('utf-8'))
        assert some_compressed_bytes != text
        decompressed_bytes = bz2.decompress(some_compressed_bytes)
        assert decompressed_bytes.decode('utf-8') == text
    """
    )

Source File: UTscapy.py From isip with MIT License

5 votes

def get_local(self):
        return bz2.decompress(base64.decodestring(self.local))

Source File: Bzip2.py From peach with Mozilla Public License 2.0

5 votes

def realEncode(self, data):
        return bz2.decompress(data)

Source File: test_bz2.py From medicare-demo with Apache License 2.0

5 votes

def testWriteLines(self):
        # "Test BZ2File.writelines()"
        bz2f = BZ2File(self.filename, "w")
        self.assertRaises(TypeError, bz2f.writelines)
        sio = StringIO(self.TEXT)
        bz2f.writelines(sio.readlines())
        bz2f.close()
        # patch #1535500
        self.assertRaises(ValueError, bz2f.writelines, ["a"])
        f = open(self.filename, 'rb')
        self.assertEqual(self.decompress(f.read()), self.TEXT)
        f.close()

Source File: UTscapy.py From dash-hack with MIT License

5 votes

def get_local(self):
        return bz2.decompress(base64.decodestring(self.local))

Source File: UTscapy.py From dash-hack with MIT License

5 votes

def get_local(self):
        return bz2.decompress(base64.decodestring(self.local))

Source File: UTscapy.py From dash-hack with MIT License

5 votes

def get_local(self):
        return bz2.decompress(base64.decodestring(self.local))

Source File: compression.py From pyminifier with GNU General Public License v3.0

5 votes

def lzma_pack(source):
    """
    Returns 'source' as a lzma-compressed, self-extracting python script.

    .. note::

        This method uses up more space than the zip_pack method but it has the
        advantage in that the resulting .py file can still be imported into a
        python program.
    """
    import lzma, base64
    out = ""
    # Preserve shebangs (don't care about encodings for this)
    first_line = source.split('\n')[0]
    if analyze.shebang.match(first_line):
        if py3:
            if first_line.rstrip().endswith('python'): # Make it python3
                first_line = first_line.rstrip()
                first_line += '3' #!/usr/bin/env python3
        out = first_line + '\n'
    compressed_source = lzma.compress(source.encode('utf-8'))
    out += 'import lzma, base64\n'
    out += "exec(lzma.decompress(base64.b64decode('"
    out += base64.b64encode(compressed_source).decode('utf-8')
    out += "')))\n"
    return out

Source File: compression.py From pyminifier with GNU General Public License v3.0

5 votes

def gz_pack(source):
    """
    Returns 'source' as a gzip-compressed, self-extracting python script.

    .. note::

        This method uses up more space than the zip_pack method but it has the
        advantage in that the resulting .py file can still be imported into a
        python program.
    """
    import zlib, base64
    out = ""
    # Preserve shebangs (don't care about encodings for this)
    first_line = source.split('\n')[0]
    if analyze.shebang.match(first_line):
        if py3:
            if first_line.rstrip().endswith('python'): # Make it python3
                first_line = first_line.rstrip()
                first_line += '3' #!/usr/bin/env python3
        out = first_line + '\n'
    compressed_source = zlib.compress(source.encode('utf-8'))
    out += 'import zlib, base64\n'
    out += "exec(zlib.decompress(base64.b64decode('"
    out += base64.b64encode(compressed_source).decode('utf-8')
    out += "')))\n"
    return out

Python bz2.decompress() Examples