Python Examples of gzip.html

Source File: file_utils.py From mlflow with Apache License 2.0

6 votes

def make_tarfile(output_filename, source_dir, archive_name, custom_filter=None):
    # Helper for filtering out modification timestamps
    def _filter_timestamps(tar_info):
        tar_info.mtime = 0
        return tar_info if custom_filter is None else custom_filter(tar_info)

    unzipped_filename = tempfile.mktemp()
    try:
        with tarfile.open(unzipped_filename, "w") as tar:
            tar.add(source_dir, arcname=archive_name, filter=_filter_timestamps)
        # When gzipping the tar, don't include the tar's filename or modification time in the
        # zipped archive (see https://docs.python.org/3/library/gzip.html#gzip.GzipFile)
        with gzip.GzipFile(filename="", fileobj=open(output_filename, 'wb'), mode='wb', mtime=0) \
                as gzipped_tar, open(unzipped_filename, 'rb') as tar:
            gzipped_tar.write(tar.read())
    finally:
        os.remove(unzipped_filename)

Source File: encoding.py From cherrypy with BSD 3-Clause "New" or "Revised" License

5 votes

def compress(body, compress_level):
    """Compress 'body' at the given compress_level."""
    import zlib

    # See http://www.gzip.org/zlib/rfc-gzip.html
    yield b'\x1f\x8b'       # ID1 and ID2: gzip marker
    yield b'\x08'           # CM: compression method
    yield b'\x00'           # FLG: none set
    # MTIME: 4 bytes
    yield struct.pack('<L', int(time.time()) & int('FFFFFFFF', 16))
    yield b'\x02'           # XFL: max compression, slowest algo
    yield b'\xff'           # OS: unknown

    crc = zlib.crc32(b'')
    size = 0
    zobj = zlib.compressobj(compress_level,
                            zlib.DEFLATED, -zlib.MAX_WBITS,
                            zlib.DEF_MEM_LEVEL, 0)
    for line in body:
        size += len(line)
        crc = zlib.crc32(line, crc)
        yield zobj.compress(line)
    yield zobj.flush()

    # CRC32: 4 bytes
    yield struct.pack('<L', crc & int('FFFFFFFF', 16))
    # ISIZE: 4 bytes
    yield struct.pack('<L', size & int('FFFFFFFF', 16))

Source File: encoding.py From opsbro with MIT License

5 votes

def compress(body, compress_level):
    """Compress 'body' at the given compress_level."""
    import zlib

    # See http://www.gzip.org/zlib/rfc-gzip.html
    yield ntob('\x1f\x8b')       # ID1 and ID2: gzip marker
    yield ntob('\x08')           # CM: compression method
    yield ntob('\x00')           # FLG: none set
    # MTIME: 4 bytes
    yield struct.pack("<L", int(time.time()) & int('FFFFFFFF', 16))
    yield ntob('\x02')           # XFL: max compression, slowest algo
    yield ntob('\xff')           # OS: unknown

    crc = zlib.crc32(ntob(""))
    size = 0
    zobj = zlib.compressobj(compress_level,
                            zlib.DEFLATED, -zlib.MAX_WBITS,
                            zlib.DEF_MEM_LEVEL, 0)
    for line in body:
        size += len(line)
        crc = zlib.crc32(line, crc)
        yield zobj.compress(line)
    yield zobj.flush()

    # CRC32: 4 bytes
    yield struct.pack("<L", crc & int('FFFFFFFF', 16))
    # ISIZE: 4 bytes
    yield struct.pack("<L", size & int('FFFFFFFF', 16))

Source File: encoding.py From bazarr with GNU General Public License v3.0

5 votes

def compress(body, compress_level):
    """Compress 'body' at the given compress_level."""
    import zlib

    # See http://www.gzip.org/zlib/rfc-gzip.html
    yield ntob('\x1f\x8b')       # ID1 and ID2: gzip marker
    yield ntob('\x08')           # CM: compression method
    yield ntob('\x00')           # FLG: none set
    # MTIME: 4 bytes
    yield struct.pack('<L', int(time.time()) & int('FFFFFFFF', 16))
    yield ntob('\x02')           # XFL: max compression, slowest algo
    yield ntob('\xff')           # OS: unknown

    crc = zlib.crc32(ntob(''))
    size = 0
    zobj = zlib.compressobj(compress_level,
                            zlib.DEFLATED, -zlib.MAX_WBITS,
                            zlib.DEF_MEM_LEVEL, 0)
    for line in body:
        size += len(line)
        crc = zlib.crc32(line, crc)
        yield zobj.compress(line)
    yield zobj.flush()

    # CRC32: 4 bytes
    yield struct.pack('<L', crc & int('FFFFFFFF', 16))
    # ISIZE: 4 bytes
    yield struct.pack('<L', size & int('FFFFFFFF', 16))

Source File: encoding.py From Tautulli with GNU General Public License v3.0

5 votes

def compress(body, compress_level):
    """Compress 'body' at the given compress_level."""
    import zlib

    # See http://www.gzip.org/zlib/rfc-gzip.html
    yield b'\x1f\x8b'       # ID1 and ID2: gzip marker
    yield b'\x08'           # CM: compression method
    yield b'\x00'           # FLG: none set
    # MTIME: 4 bytes
    yield struct.pack('<L', int(time.time()) & int('FFFFFFFF', 16))
    yield b'\x02'           # XFL: max compression, slowest algo
    yield b'\xff'           # OS: unknown

    crc = zlib.crc32(b'')
    size = 0
    zobj = zlib.compressobj(compress_level,
                            zlib.DEFLATED, -zlib.MAX_WBITS,
                            zlib.DEF_MEM_LEVEL, 0)
    for line in body:
        size += len(line)
        crc = zlib.crc32(line, crc)
        yield zobj.compress(line)
    yield zobj.flush()

    # CRC32: 4 bytes
    yield struct.pack('<L', crc & int('FFFFFFFF', 16))
    # ISIZE: 4 bytes
    yield struct.pack('<L', size & int('FFFFFFFF', 16))

Source File: encoding.py From moviegrabber with GNU General Public License v3.0

5 votes

def compress(body, compress_level):
    """Compress 'body' at the given compress_level."""
    import zlib
    
    # See http://www.gzip.org/zlib/rfc-gzip.html
    yield ntob('\x1f\x8b')       # ID1 and ID2: gzip marker
    yield ntob('\x08')           # CM: compression method
    yield ntob('\x00')           # FLG: none set
    # MTIME: 4 bytes
    yield struct.pack("<L", int(time.time()) & int('FFFFFFFF', 16))
    yield ntob('\x02')           # XFL: max compression, slowest algo
    yield ntob('\xff')           # OS: unknown
    
    crc = zlib.crc32(ntob(""))
    size = 0
    zobj = zlib.compressobj(compress_level,
                            zlib.DEFLATED, -zlib.MAX_WBITS,
                            zlib.DEF_MEM_LEVEL, 0)
    for line in body:
        size += len(line)
        crc = zlib.crc32(line, crc)
        yield zobj.compress(line)
    yield zobj.flush()
    
    # CRC32: 4 bytes
    yield struct.pack("<L", crc & int('FFFFFFFF', 16))
    # ISIZE: 4 bytes
    yield struct.pack("<L", size & int('FFFFFFFF', 16))

Source File: util.py From Carnets with BSD 3-Clause "New" or "Revised" License

5 votes

def fileobj_name(f):
    """
    Returns the 'name' of file-like object f, if it has anything that could be
    called its name.  Otherwise f's class or type is returned.  If f is a
    string f itself is returned.
    """

    if isinstance(f, str):
        return f
    elif isinstance(f, gzip.GzipFile):
        # The .name attribute on GzipFiles does not always represent the name
        # of the file being read/written--it can also represent the original
        # name of the file being compressed
        # See the documentation at
        # https://docs.python.org/3/library/gzip.html#gzip.GzipFile
        # As such, for gzip files only return the name of the underlying
        # fileobj, if it exists
        return fileobj_name(f.fileobj)
    elif hasattr(f, 'name'):
        return f.name
    elif hasattr(f, 'filename'):
        return f.filename
    elif hasattr(f, '__class__'):
        return str(f.__class__)
    else:
        return str(type(f))

Source File: language_model.py From abkhazia with GNU General Public License v3.0

4 votes

def _compute_lm(self, G_arpa):
        """Generate an ARPA n-gram from an abkhazia corpus

        This method relies on the following Kaldi programs:
        add-start-end.sh, build-lm.sh and compile-lm. It uses the
        IRSTLM library.

        """
        self.log.info(
            'computing %s %s-gram in ARPA format', self.level, self.order)

        # cut -d' ' -f2 lm_text > text_ready. Train need to
        # remove utt-id on first column of text file
        lm_text = os.path.join(self.a2k._local_path(), 'lm_text.txt')
        lm_lines = utils.open_utf8(lm_text, 'r').readlines()

        text_ready = os.path.join(self.a2k._local_path(), 'text_ready.txt')
        with utils.open_utf8(text_ready, 'w') as ready:
            ready.write('\n'.join(
                [' '.join(line.split()[1:]) for line in lm_lines]))

        text_se = os.path.join(self.a2k._local_path(), 'text_se.txt')
        utils.jobs.run(
            'add-start-end.sh',
            stdin=open(text_ready, 'r'),
            stdout=open(text_se, 'w').write,
            env=kaldi_path(), cwd=self.recipe_dir)
        assert os.path.isfile(text_se), 'LM failed on add-start-end'

        # k option is number of split, useful for huge text files
        # build-lm.sh in kaldi/tools/irstlm/bin
        text_lm = os.path.join(self.a2k._local_path(), 'text_lm.gz')
        self._run_command(
            'build-lm.sh -i {0} -n {1} -o {2} -k 1 -s kneser-ney'
            .format(text_se, self.order, text_lm))
        assert os.path.isfile(text_lm), 'LM failed on build-lm'

        text_blm = os.path.join(self.a2k._local_path(), 'text_blm.gz')
        self._run_command(
            # was with the -i option
            'compile-lm {} --text=yes {}'.format(text_lm, text_blm))

        # gzip the compiled lm (from
        # https://docs.python.org/2/library/gzip.html#examples-of-usage)
        with open(text_blm, 'rb') as fin, gzip.open(G_arpa, 'wb') as fout:
            shutil.copyfileobj(fin, fout)

Python gzip.html() Examples