Python fileinput.hook_compressed() Examples

The following are 12 code examples of fileinput.hook_compressed(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module fileinput , or try the search function .
Example #1
Source File: indic_norm_tok.py    From flores with Creative Commons Attribution Share Alike 4.0 International 5 votes vote down vote up
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--indic-nlp-path", required=True,
                        help="path to Indic NLP Library root")
    parser.add_argument("--language", required=True)
    parser.add_argument("--remove-nuktas", default=False, action="store_true")
    parser.add_argument("input", help="input file; use - for stdin")
    args = parser.parse_args()

    try:
        sys.path.extend([
            args.indic_nlp_path,
            os.path.join(args.indic_nlp_path, "src"),
        ])
        from indicnlp.tokenize import indic_tokenize
        from indicnlp.normalize.indic_normalize import IndicNormalizerFactory
    except:
        raise Exception(
            "Cannot load Indic NLP Library, make sure --indic-nlp-path is correct"
        )

    # create normalizer
    factory = IndicNormalizerFactory()
    normalizer = factory.get_normalizer(
        args.language, remove_nuktas=args.remove_nuktas,
    )

    # normalize and tokenize
    for line in fileinput.input([args.input], openhook=fileinput.hook_compressed):
        line = normalizer.normalize(line.decode("utf-8"))
        line = " ".join(indic_tokenize.trivial_tokenize(line, args.language))
        sys.stdout.write(line.encode("utf-8")) 
Example #2
Source File: detok.py    From fairseq with MIT License 5 votes vote down vote up
def main():
    parser = argparse.ArgumentParser(description='')
    parser.add_argument('files', nargs='*', help='input files')
    args = parser.parse_args()

    detok = sacremoses.MosesDetokenizer()

    for line in fileinput.input(args.files, openhook=fileinput.hook_compressed):
        print(detok.detokenize(line.strip().split(' ')).replace(' @', '').replace('@ ', '').replace(' =', '=').replace('= ', '=').replace(' – ', '–')) 
Example #3
Source File: test_fileinput.py    From Fluid-Designer with GNU General Public License v3.0 5 votes vote down vote up
def test_gz_ext_fake(self):
        original_open = gzip.open
        gzip.open = self.fake_open
        try:
            result = fileinput.hook_compressed("test.gz", 3)
        finally:
            gzip.open = original_open

        self.assertEqual(self.fake_open.invocation_count, 1)
        self.assertEqual(self.fake_open.last_invocation, (("test.gz", 3), {})) 
Example #4
Source File: test_fileinput.py    From Fluid-Designer with GNU General Public License v3.0 5 votes vote down vote up
def test_bz2_ext_fake(self):
        original_open = bz2.BZ2File
        bz2.BZ2File = self.fake_open
        try:
            result = fileinput.hook_compressed("test.bz2", 4)
        finally:
            bz2.BZ2File = original_open

        self.assertEqual(self.fake_open.invocation_count, 1)
        self.assertEqual(self.fake_open.last_invocation, (("test.bz2", 4), {})) 
Example #5
Source File: test_fileinput.py    From Fluid-Designer with GNU General Public License v3.0 5 votes vote down vote up
def do_test_use_builtin_open(self, filename, mode):
        original_open = self.replace_builtin_open(self.fake_open)
        try:
            result = fileinput.hook_compressed(filename, mode)
        finally:
            self.replace_builtin_open(original_open)

        self.assertEqual(self.fake_open.invocation_count, 1)
        self.assertEqual(self.fake_open.last_invocation,
                         ((filename, mode), {})) 
Example #6
Source File: test_fileinput.py    From ironpython3 with Apache License 2.0 5 votes vote down vote up
def test_gz_ext_fake(self):
        original_open = gzip.open
        gzip.open = self.fake_open
        try:
            result = fileinput.hook_compressed("test.gz", 3)
        finally:
            gzip.open = original_open

        self.assertEqual(self.fake_open.invocation_count, 1)
        self.assertEqual(self.fake_open.last_invocation, (("test.gz", 3), {})) 
Example #7
Source File: test_fileinput.py    From ironpython3 with Apache License 2.0 5 votes vote down vote up
def test_bz2_ext_fake(self):
        original_open = bz2.BZ2File
        bz2.BZ2File = self.fake_open
        try:
            result = fileinput.hook_compressed("test.bz2", 4)
        finally:
            bz2.BZ2File = original_open

        self.assertEqual(self.fake_open.invocation_count, 1)
        self.assertEqual(self.fake_open.last_invocation, (("test.bz2", 4), {})) 
Example #8
Source File: test_fileinput.py    From ironpython3 with Apache License 2.0 5 votes vote down vote up
def do_test_use_builtin_open(self, filename, mode):
        original_open = self.replace_builtin_open(self.fake_open)
        try:
            result = fileinput.hook_compressed(filename, mode)
        finally:
            self.replace_builtin_open(original_open)

        self.assertEqual(self.fake_open.invocation_count, 1)
        self.assertEqual(self.fake_open.last_invocation,
                         ((filename, mode), {})) 
Example #9
Source File: test_fileinput.py    From Project-New-Reign---Nemesis-Main with GNU General Public License v3.0 5 votes vote down vote up
def test_gz_ext_fake(self):
        original_open = gzip.open
        gzip.open = self.fake_open
        try:
            result = fileinput.hook_compressed("test.gz", 3)
        finally:
            gzip.open = original_open

        self.assertEqual(self.fake_open.invocation_count, 1)
        self.assertEqual(self.fake_open.last_invocation, (("test.gz", 3), {})) 
Example #10
Source File: test_fileinput.py    From Project-New-Reign---Nemesis-Main with GNU General Public License v3.0 5 votes vote down vote up
def test_bz2_ext_fake(self):
        original_open = bz2.BZ2File
        bz2.BZ2File = self.fake_open
        try:
            result = fileinput.hook_compressed("test.bz2", 4)
        finally:
            bz2.BZ2File = original_open

        self.assertEqual(self.fake_open.invocation_count, 1)
        self.assertEqual(self.fake_open.last_invocation, (("test.bz2", 4), {})) 
Example #11
Source File: test_fileinput.py    From Project-New-Reign---Nemesis-Main with GNU General Public License v3.0 5 votes vote down vote up
def do_test_use_builtin_open(self, filename, mode):
        original_open = self.replace_builtin_open(self.fake_open)
        try:
            result = fileinput.hook_compressed(filename, mode)
        finally:
            self.replace_builtin_open(original_open)

        self.assertEqual(self.fake_open.invocation_count, 1)
        self.assertEqual(self.fake_open.last_invocation,
                         ((filename, mode), {})) 
Example #12
Source File: gtf.py    From seqc with GNU General Public License v2.0 5 votes vote down vote up
def __iter__(self):
        """return an iterator over all non-header records in gtf"""
        hook = fileinput.hook_compressed
        with fileinput.input(self._files, openhook=hook, mode='r') as f:

            # get rid of header lines
            file_iterator = iter(f)
            first_record = next(file_iterator)
            while first_record.startswith('#'):
                first_record = next(file_iterator)

            yield first_record.split('\t')  # avoid loss of first non-comment line

            for record in file_iterator:  # now, run to exhaustion
                yield record.split('\t')