Python gzip.html() Examples

The following are 8 code examples of gzip.html(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module gzip , or try the search function .
Example #1
Source File: file_utils.py    From mlflow with Apache License 2.0 6 votes vote down vote up
def make_tarfile(output_filename, source_dir, archive_name, custom_filter=None):
    # Helper for filtering out modification timestamps
    def _filter_timestamps(tar_info):
        tar_info.mtime = 0
        return tar_info if custom_filter is None else custom_filter(tar_info)

    unzipped_filename = tempfile.mktemp()
    try:
        with tarfile.open(unzipped_filename, "w") as tar:
            tar.add(source_dir, arcname=archive_name, filter=_filter_timestamps)
        # When gzipping the tar, don't include the tar's filename or modification time in the
        # zipped archive (see https://docs.python.org/3/library/gzip.html#gzip.GzipFile)
        with gzip.GzipFile(filename="", fileobj=open(output_filename, 'wb'), mode='wb', mtime=0) \
                as gzipped_tar, open(unzipped_filename, 'rb') as tar:
            gzipped_tar.write(tar.read())
    finally:
        os.remove(unzipped_filename) 
Example #2
Source File: encoding.py    From cherrypy with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def compress(body, compress_level):
    """Compress 'body' at the given compress_level."""
    import zlib

    # See http://www.gzip.org/zlib/rfc-gzip.html
    yield b'\x1f\x8b'       # ID1 and ID2: gzip marker
    yield b'\x08'           # CM: compression method
    yield b'\x00'           # FLG: none set
    # MTIME: 4 bytes
    yield struct.pack('<L', int(time.time()) & int('FFFFFFFF', 16))
    yield b'\x02'           # XFL: max compression, slowest algo
    yield b'\xff'           # OS: unknown

    crc = zlib.crc32(b'')
    size = 0
    zobj = zlib.compressobj(compress_level,
                            zlib.DEFLATED, -zlib.MAX_WBITS,
                            zlib.DEF_MEM_LEVEL, 0)
    for line in body:
        size += len(line)
        crc = zlib.crc32(line, crc)
        yield zobj.compress(line)
    yield zobj.flush()

    # CRC32: 4 bytes
    yield struct.pack('<L', crc & int('FFFFFFFF', 16))
    # ISIZE: 4 bytes
    yield struct.pack('<L', size & int('FFFFFFFF', 16)) 
Example #3
Source File: encoding.py    From opsbro with MIT License 5 votes vote down vote up
def compress(body, compress_level):
    """Compress 'body' at the given compress_level."""
    import zlib

    # See http://www.gzip.org/zlib/rfc-gzip.html
    yield ntob('\x1f\x8b')       # ID1 and ID2: gzip marker
    yield ntob('\x08')           # CM: compression method
    yield ntob('\x00')           # FLG: none set
    # MTIME: 4 bytes
    yield struct.pack("<L", int(time.time()) & int('FFFFFFFF', 16))
    yield ntob('\x02')           # XFL: max compression, slowest algo
    yield ntob('\xff')           # OS: unknown

    crc = zlib.crc32(ntob(""))
    size = 0
    zobj = zlib.compressobj(compress_level,
                            zlib.DEFLATED, -zlib.MAX_WBITS,
                            zlib.DEF_MEM_LEVEL, 0)
    for line in body:
        size += len(line)
        crc = zlib.crc32(line, crc)
        yield zobj.compress(line)
    yield zobj.flush()

    # CRC32: 4 bytes
    yield struct.pack("<L", crc & int('FFFFFFFF', 16))
    # ISIZE: 4 bytes
    yield struct.pack("<L", size & int('FFFFFFFF', 16)) 
Example #4
Source File: encoding.py    From bazarr with GNU General Public License v3.0 5 votes vote down vote up
def compress(body, compress_level):
    """Compress 'body' at the given compress_level."""
    import zlib

    # See http://www.gzip.org/zlib/rfc-gzip.html
    yield ntob('\x1f\x8b')       # ID1 and ID2: gzip marker
    yield ntob('\x08')           # CM: compression method
    yield ntob('\x00')           # FLG: none set
    # MTIME: 4 bytes
    yield struct.pack('<L', int(time.time()) & int('FFFFFFFF', 16))
    yield ntob('\x02')           # XFL: max compression, slowest algo
    yield ntob('\xff')           # OS: unknown

    crc = zlib.crc32(ntob(''))
    size = 0
    zobj = zlib.compressobj(compress_level,
                            zlib.DEFLATED, -zlib.MAX_WBITS,
                            zlib.DEF_MEM_LEVEL, 0)
    for line in body:
        size += len(line)
        crc = zlib.crc32(line, crc)
        yield zobj.compress(line)
    yield zobj.flush()

    # CRC32: 4 bytes
    yield struct.pack('<L', crc & int('FFFFFFFF', 16))
    # ISIZE: 4 bytes
    yield struct.pack('<L', size & int('FFFFFFFF', 16)) 
Example #5
Source File: encoding.py    From Tautulli with GNU General Public License v3.0 5 votes vote down vote up
def compress(body, compress_level):
    """Compress 'body' at the given compress_level."""
    import zlib

    # See http://www.gzip.org/zlib/rfc-gzip.html
    yield b'\x1f\x8b'       # ID1 and ID2: gzip marker
    yield b'\x08'           # CM: compression method
    yield b'\x00'           # FLG: none set
    # MTIME: 4 bytes
    yield struct.pack('<L', int(time.time()) & int('FFFFFFFF', 16))
    yield b'\x02'           # XFL: max compression, slowest algo
    yield b'\xff'           # OS: unknown

    crc = zlib.crc32(b'')
    size = 0
    zobj = zlib.compressobj(compress_level,
                            zlib.DEFLATED, -zlib.MAX_WBITS,
                            zlib.DEF_MEM_LEVEL, 0)
    for line in body:
        size += len(line)
        crc = zlib.crc32(line, crc)
        yield zobj.compress(line)
    yield zobj.flush()

    # CRC32: 4 bytes
    yield struct.pack('<L', crc & int('FFFFFFFF', 16))
    # ISIZE: 4 bytes
    yield struct.pack('<L', size & int('FFFFFFFF', 16)) 
Example #6
Source File: encoding.py    From moviegrabber with GNU General Public License v3.0 5 votes vote down vote up
def compress(body, compress_level):
    """Compress 'body' at the given compress_level."""
    import zlib
    
    # See http://www.gzip.org/zlib/rfc-gzip.html
    yield ntob('\x1f\x8b')       # ID1 and ID2: gzip marker
    yield ntob('\x08')           # CM: compression method
    yield ntob('\x00')           # FLG: none set
    # MTIME: 4 bytes
    yield struct.pack("<L", int(time.time()) & int('FFFFFFFF', 16))
    yield ntob('\x02')           # XFL: max compression, slowest algo
    yield ntob('\xff')           # OS: unknown
    
    crc = zlib.crc32(ntob(""))
    size = 0
    zobj = zlib.compressobj(compress_level,
                            zlib.DEFLATED, -zlib.MAX_WBITS,
                            zlib.DEF_MEM_LEVEL, 0)
    for line in body:
        size += len(line)
        crc = zlib.crc32(line, crc)
        yield zobj.compress(line)
    yield zobj.flush()
    
    # CRC32: 4 bytes
    yield struct.pack("<L", crc & int('FFFFFFFF', 16))
    # ISIZE: 4 bytes
    yield struct.pack("<L", size & int('FFFFFFFF', 16)) 
Example #7
Source File: util.py    From Carnets with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def fileobj_name(f):
    """
    Returns the 'name' of file-like object f, if it has anything that could be
    called its name.  Otherwise f's class or type is returned.  If f is a
    string f itself is returned.
    """

    if isinstance(f, str):
        return f
    elif isinstance(f, gzip.GzipFile):
        # The .name attribute on GzipFiles does not always represent the name
        # of the file being read/written--it can also represent the original
        # name of the file being compressed
        # See the documentation at
        # https://docs.python.org/3/library/gzip.html#gzip.GzipFile
        # As such, for gzip files only return the name of the underlying
        # fileobj, if it exists
        return fileobj_name(f.fileobj)
    elif hasattr(f, 'name'):
        return f.name
    elif hasattr(f, 'filename'):
        return f.filename
    elif hasattr(f, '__class__'):
        return str(f.__class__)
    else:
        return str(type(f)) 
Example #8
Source File: language_model.py    From abkhazia with GNU General Public License v3.0 4 votes vote down vote up
def _compute_lm(self, G_arpa):
        """Generate an ARPA n-gram from an abkhazia corpus

        This method relies on the following Kaldi programs:
        add-start-end.sh, build-lm.sh and compile-lm. It uses the
        IRSTLM library.

        """
        self.log.info(
            'computing %s %s-gram in ARPA format', self.level, self.order)

        # cut -d' ' -f2 lm_text > text_ready. Train need to
        # remove utt-id on first column of text file
        lm_text = os.path.join(self.a2k._local_path(), 'lm_text.txt')
        lm_lines = utils.open_utf8(lm_text, 'r').readlines()

        text_ready = os.path.join(self.a2k._local_path(), 'text_ready.txt')
        with utils.open_utf8(text_ready, 'w') as ready:
            ready.write('\n'.join(
                [' '.join(line.split()[1:]) for line in lm_lines]))

        text_se = os.path.join(self.a2k._local_path(), 'text_se.txt')
        utils.jobs.run(
            'add-start-end.sh',
            stdin=open(text_ready, 'r'),
            stdout=open(text_se, 'w').write,
            env=kaldi_path(), cwd=self.recipe_dir)
        assert os.path.isfile(text_se), 'LM failed on add-start-end'

        # k option is number of split, useful for huge text files
        # build-lm.sh in kaldi/tools/irstlm/bin
        text_lm = os.path.join(self.a2k._local_path(), 'text_lm.gz')
        self._run_command(
            'build-lm.sh -i {0} -n {1} -o {2} -k 1 -s kneser-ney'
            .format(text_se, self.order, text_lm))
        assert os.path.isfile(text_lm), 'LM failed on build-lm'

        text_blm = os.path.join(self.a2k._local_path(), 'text_blm.gz')
        self._run_command(
            # was with the -i option
            'compile-lm {} --text=yes {}'.format(text_lm, text_blm))

        # gzip the compiled lm (from
        # https://docs.python.org/2/library/gzip.html#examples-of-usage)
        with open(text_blm, 'rb') as fin, gzip.open(G_arpa, 'wb') as fout:
            shutil.copyfileobj(fin, fout)