Python Examples of codecs.getdecoder

Source File: strings.py From recruit with Apache License 2.0

6 votes

def str_decode(arr, encoding, errors="strict"):
    """
    Decode character string in the Series/Index using indicated encoding.
    Equivalent to :meth:`str.decode` in python2 and :meth:`bytes.decode` in
    python3.

    Parameters
    ----------
    encoding : str
    errors : str, optional

    Returns
    -------
    decoded : Series/Index of objects
    """
    if encoding in _cpython_optimized_decoders:
        # CPython optimized implementation
        f = lambda x: x.decode(encoding, errors)
    else:
        decoder = codecs.getdecoder(encoding)
        f = lambda x: decoder(x, errors)[0]
    return _na_map(f, arr)

Source File: featuregui.py From CorpusTools with BSD 3-Clause "New" or "Revised" License

6 votes

def accept(self):
        filename = self.pathWidget.value()

        if filename == '':
            reply = QMessageBox.critical(self,
                    "Missing information", "Please specify a path to save the corpus.")
            return

        colDelim = codecs.getdecoder("unicode_escape")(self.columnDelimiterEdit.text())[0]
        if len(colDelim) != 1:
            reply = QMessageBox.critical(self,
                    "Invalid information", "The column delimiter must be a single character.")
            return

        export_feature_matrix_csv(self.specifier,filename,colDelim)

        QDialog.accept(self)

Source File: templates.py From king-phisher with BSD 3-Clause "New" or "Revised" License

6 votes

def _filter_decode(self, data, encoding):
		if its.py_v3 and isinstance(data, bytes):
			data = data.decode('utf-8')
		encoding = encoding.lower()
		encoding = re.sub(r'^(base|rot)-(\d\d)$', r'\1\2', encoding)

		if encoding == 'base16' or encoding == 'hex':
			data = base64.b16decode(data)
		elif encoding == 'base32':
			data = base64.b32decode(data)
		elif encoding == 'base64':
			data = base64.b64decode(data)
		elif encoding == 'rot13':
			data = codecs.getdecoder('rot-13')(data)[0]
		else:
			raise ValueError('Unknown encoding type: ' + encoding)
		if its.py_v3 and isinstance(data, bytes):
			data = data.decode('utf-8')
		return data

Source File: test_codecs.py From ironpython3 with Apache License 2.0

6 votes

def test_all(self):
        api = (
            "encode", "decode",
            "register", "CodecInfo", "Codec", "IncrementalEncoder",
            "IncrementalDecoder", "StreamReader", "StreamWriter", "lookup",
            "getencoder", "getdecoder", "getincrementalencoder",
            "getincrementaldecoder", "getreader", "getwriter",
            "register_error", "lookup_error",
            "strict_errors", "replace_errors", "ignore_errors",
            "xmlcharrefreplace_errors", "backslashreplace_errors",
            "open", "EncodedFile",
            "iterencode", "iterdecode",
            "BOM", "BOM_BE", "BOM_LE",
            "BOM_UTF8", "BOM_UTF16", "BOM_UTF16_BE", "BOM_UTF16_LE",
            "BOM_UTF32", "BOM_UTF32_BE", "BOM_UTF32_LE",
            "BOM32_BE", "BOM32_LE", "BOM64_BE", "BOM64_LE",  # Undocumented
            "StreamReaderWriter", "StreamRecoder",
        )
        self.assertCountEqual(api, codecs.__all__)
        for api in codecs.__all__:
            getattr(codecs, api)

Source File: test_codecs.py From Project-New-Reign---Nemesis-Main with GNU General Public License v3.0

6 votes

def test_all(self):
        api = (
            "encode", "decode",
            "register", "CodecInfo", "Codec", "IncrementalEncoder",
            "IncrementalDecoder", "StreamReader", "StreamWriter", "lookup",
            "getencoder", "getdecoder", "getincrementalencoder",
            "getincrementaldecoder", "getreader", "getwriter",
            "register_error", "lookup_error",
            "strict_errors", "replace_errors", "ignore_errors",
            "xmlcharrefreplace_errors", "backslashreplace_errors",
            "namereplace_errors",
            "open", "EncodedFile",
            "iterencode", "iterdecode",
            "BOM", "BOM_BE", "BOM_LE",
            "BOM_UTF8", "BOM_UTF16", "BOM_UTF16_BE", "BOM_UTF16_LE",
            "BOM_UTF32", "BOM_UTF32_BE", "BOM_UTF32_LE",
            "BOM32_BE", "BOM32_LE", "BOM64_BE", "BOM64_LE",  # Undocumented
            "StreamReaderWriter", "StreamRecoder",
        )
        self.assertCountEqual(api, codecs.__all__)
        for api in codecs.__all__:
            getattr(codecs, api)

Source File: strings.py From predictive-maintenance-using-machine-learning with Apache License 2.0

6 votes

def str_decode(arr, encoding, errors="strict"):
    """
    Decode character string in the Series/Index using indicated encoding.
    Equivalent to :meth:`str.decode` in python2 and :meth:`bytes.decode` in
    python3.

    Parameters
    ----------
    encoding : str
    errors : str, optional

    Returns
    -------
    decoded : Series/Index of objects
    """
    if encoding in _cpython_optimized_decoders:
        # CPython optimized implementation
        f = lambda x: x.decode(encoding, errors)
    else:
        decoder = codecs.getdecoder(encoding)
        f = lambda x: decoder(x, errors)[0]
    return _na_map(f, arr)

Source File: test_codecs.py From ironpython2 with Apache License 2.0

6 votes

def test_all(self):
        api = (
            "encode", "decode",
            "register", "CodecInfo", "Codec", "IncrementalEncoder",
            "IncrementalDecoder", "StreamReader", "StreamWriter", "lookup",
            "getencoder", "getdecoder", "getincrementalencoder",
            "getincrementaldecoder", "getreader", "getwriter",
            "register_error", "lookup_error",
            "strict_errors", "replace_errors", "ignore_errors",
            "xmlcharrefreplace_errors", "backslashreplace_errors",
            "open", "EncodedFile",
            "iterencode", "iterdecode",
            "BOM", "BOM_BE", "BOM_LE",
            "BOM_UTF8", "BOM_UTF16", "BOM_UTF16_BE", "BOM_UTF16_LE",
            "BOM_UTF32", "BOM_UTF32_BE", "BOM_UTF32_LE",
            "BOM32_BE", "BOM32_LE", "BOM64_BE", "BOM64_LE",  # Undocumented
            "StreamReaderWriter", "StreamRecoder",
        )
        self.assertEqual(sorted(api), sorted(codecs.__all__))
        for api in codecs.__all__:
            getattr(codecs, api)

Source File: iogui.py From CorpusTools with BSD 3-Clause "New" or "Revised" License

6 votes

def accept(self):
        filename = self.pathWidget.value()

        if filename == '':
            reply = QMessageBox.critical(self,
                                         "Missing information", "Please specify a path to save the corpus.")
            return

        colDelim = codecs.getdecoder("unicode_escape")(self.columnDelimiterEdit.text())[0]
        if len(colDelim) != 1:
            reply = QMessageBox.critical(self,
                                         "Invalid information", "The column delimiter must be a single character.")
            return
        transDelim = self.transDelimiterEdit.text()
        variant_behavior = self.variantOptions[self.variantWidget.currentIndex()][1]
        export_corpus_csv(self.corpus, filename, colDelim, transDelim, variant_behavior)

        QDialog.accept(self)

Source File: strings.py From vnpy_crypto with MIT License

6 votes

def str_decode(arr, encoding, errors="strict"):
    """
    Decode character string in the Series/Index using indicated encoding.
    Equivalent to :meth:`str.decode` in python2 and :meth:`bytes.decode` in
    python3.

    Parameters
    ----------
    encoding : str
    errors : str, optional

    Returns
    -------
    decoded : Series/Index of objects
    """
    if encoding in _cpython_optimized_decoders:
        # CPython optimized implementation
        f = lambda x: x.decode(encoding, errors)
    else:
        decoder = codecs.getdecoder(encoding)
        f = lambda x: decoder(x, errors)[0]
    return _na_map(f, arr)

Source File: ch10_ex5.py From Mastering-Object-Oriented-Python-Second-Edition with MIT License

6 votes

def alpha_decode(data: bytes, metadata: 'XMetadata', field_metadata: 'XField') -> str:
    """Decode alpha or alphanumeric data.
    metadata has encoding.
    field_metadata is (currently) not used.

    Mock metadata objects
    >>> import types
    >>> meta = types.SimpleNamespace(reclen=6, encoding='ebcdic')
    >>> meta.decode = codecs.getdecoder(meta.encoding)
    >>> field_meta = types.SimpleNamespace()  # Used in other examples...

    >>> data = bytes([0xf9, 0xf8, 0xf7, 0xf6, 0xf5, 0x60])
    >>> alpha_decode(data, meta, field_meta)
    '98765-'

    """
    text, _ = metadata.decode(data)
    return text


# Numeric USAGE DISPLAY trailing sign conversion.
# The COBOL program stored text version of the number.

Source File: ch10_ex5.py From Mastering-Object-Oriented-Python-Second-Edition with MIT License

6 votes

def display_decode(data: bytes, metadata: 'XMetadata', field_metadata: 'XField') -> Decimal:
    """Decode USAGE DISPLAY numeric data.
    metadata has encoding.
    field_metadata has attributes name, start, size, format, precision, usage.

    Mock metadata objects
    >>> import types
    >>> meta= types.SimpleNamespace(reclen=6, encoding='ebcdic')
    >>> meta.decode = codecs.getdecoder(meta.encoding)
    >>> field_meta = types.SimpleNamespace(precision=2)

    >>> data = bytes([0xf9, 0xf8, 0xf7, 0xf6, 0xf5, 0x60])
    >>> display_decode(data, meta, field_meta)
    Decimal('-987.65')

    """
    text, _ = metadata.decode(data)
    precision = field_metadata.precision or 0  # If None, default is 0.
    text, sign = text[:-1], text[-1]
    return Decimal(sign + text[:-precision] + "." + text[-precision:])


# Numeric USAGE COMP-3 conversion.
# The COBOL program encoded the number into packed decimal representation.

Source File: strings.py From elasticintel with GNU General Public License v3.0

6 votes

def str_decode(arr, encoding, errors="strict"):
    """
    Decode character string in the Series/Index using indicated encoding.
    Equivalent to :meth:`str.decode` in python2 and :meth:`bytes.decode` in
    python3.

    Parameters
    ----------
    encoding : str
    errors : str, optional

    Returns
    -------
    decoded : Series/Index of objects
    """
    if encoding in _cpython_optimized_decoders:
        # CPython optimized implementation
        f = lambda x: x.decode(encoding, errors)
    else:
        decoder = codecs.getdecoder(encoding)
        f = lambda x: decoder(x, errors)[0]
    return _na_map(f, arr)

Source File: test_codecs.py From Fluid-Designer with GNU General Public License v3.0

6 votes

def test_all(self):
        api = (
            "encode", "decode",
            "register", "CodecInfo", "Codec", "IncrementalEncoder",
            "IncrementalDecoder", "StreamReader", "StreamWriter", "lookup",
            "getencoder", "getdecoder", "getincrementalencoder",
            "getincrementaldecoder", "getreader", "getwriter",
            "register_error", "lookup_error",
            "strict_errors", "replace_errors", "ignore_errors",
            "xmlcharrefreplace_errors", "backslashreplace_errors",
            "namereplace_errors",
            "open", "EncodedFile",
            "iterencode", "iterdecode",
            "BOM", "BOM_BE", "BOM_LE",
            "BOM_UTF8", "BOM_UTF16", "BOM_UTF16_BE", "BOM_UTF16_LE",
            "BOM_UTF32", "BOM_UTF32_BE", "BOM_UTF32_LE",
            "BOM32_BE", "BOM32_LE", "BOM64_BE", "BOM64_LE",  # Undocumented
            "StreamReaderWriter", "StreamRecoder",
        )
        self.assertCountEqual(api, codecs.__all__)
        for api in codecs.__all__:
            getattr(codecs, api)

Source File: strings.py From Splunking-Crime with GNU Affero General Public License v3.0

6 votes

def str_decode(arr, encoding, errors="strict"):
    """
    Decode character string in the Series/Index using indicated encoding.
    Equivalent to :meth:`str.decode` in python2 and :meth:`bytes.decode` in
    python3.

    Parameters
    ----------
    encoding : str
    errors : str, optional

    Returns
    -------
    decoded : Series/Index of objects
    """
    if encoding in _cpython_optimized_decoders:
        # CPython optimized implementation
        f = lambda x: x.decode(encoding, errors)
    else:
        decoder = codecs.getdecoder(encoding)
        f = lambda x: decoder(x, errors)[0]
    return _na_map(f, arr)

Source File: test_codecs.py From gcblue with BSD 3-Clause "New" or "Revised" License

5 votes

def test_basics(self):
        s = "abc123"
        for encoding in all_string_encodings:
            (bytes, size) = codecs.getencoder(encoding)(s)
            self.assertEqual(size, len(s))
            (chars, size) = codecs.getdecoder(encoding)(bytes)
            self.assertEqual(chars, s, "%r != %r (encoding=%r)" % (chars, s, encoding))

Source File: test_multibytecodec.py From gcblue with BSD 3-Clause "New" or "Revised" License

5 votes

def test_errorcallback_longindex(self):
        dec = codecs.getdecoder('euc-kr')
        myreplace  = lambda exc: (u'', sys.maxint+1)
        codecs.register_error('test.cjktest', myreplace)
        self.assertRaises(IndexError, dec,
                          'apple\x92ham\x93spam', 'test.cjktest')

Source File: test_multibytecodec.py From Project-New-Reign---Nemesis-Main with GNU General Public License v3.0

5 votes

def test_errorcallback_longindex(self):
        dec = codecs.getdecoder('euc-kr')
        myreplace  = lambda exc: ('', sys.maxsize+1)
        codecs.register_error('test.cjktest', myreplace)
        self.assertRaises(IndexError, dec,
                          b'apple\x92ham\x93spam', 'test.cjktest')

Source File: mrknow_urlparserhelper.py From filmkodi with Apache License 2.0

5 votes

def unicode_escape(s):
    decoder = codecs.getdecoder('unicode_escape')
    return re.sub(r'\\u[0-9a-fA-F]{4,}', lambda m: decoder(m.group(0))[0], s).encode('utf-8')

Source File: test_multibytecodec.py From CTFCrackTools-V2 with GNU General Public License v3.0

5 votes

def test_errorcallback_longindex(self):
        dec = codecs.getdecoder('euc-kr')
        myreplace  = lambda exc: (u'', sys.maxint+1)
        codecs.register_error('test.cjktest', myreplace)
        self.assertRaises(IndexError, dec,
                          'apple\x92ham\x93spam', 'test.cjktest')

Source File: mrknow_urlparserhelper.py From filmkodi with Apache License 2.0

5 votes

def unicode_escape(s):
    decoder = codecs.getdecoder('unicode_escape')
    return re.sub(r'\\u[0-9a-fA-F]{4,}', lambda m: decoder(m.group(0))[0], s).encode('utf-8')

Source File: test_codecs.py From CTFCrackTools-V2 with GNU General Public License v3.0

5 votes

def test_decode_callback(self):
        if sys.maxunicode > 0xffff:
            codecs.register_error("UnicodeInternalTest", codecs.ignore_errors)
            decoder = codecs.getdecoder("unicode_internal")
            ab = u"ab".encode("unicode_internal")
            ignored = decoder("%s\x22\x22\x22\x22%s" % (ab[:4], ab[4:]),
                "UnicodeInternalTest")
            self.assertEqual((u"ab", 12), ignored)

Source File: test_codecs.py From gcblue with BSD 3-Clause "New" or "Revised" License

5 votes

def test_bad_decode_args(self):
        for encoding in all_unicode_encodings:
            decoder = codecs.getdecoder(encoding)
            self.assertRaises(TypeError, decoder)
            if encoding not in ("idna", "punycode"):
                self.assertRaises(TypeError, decoder, 42)

Source File: test_codecs.py From gcblue with BSD 3-Clause "New" or "Revised" License

5 votes

def test_getdecoder(self):
        self.assertRaises(TypeError, codecs.getdecoder)
        self.assertRaises(LookupError, codecs.getdecoder, "__spam__")

Source File: test_codecs.py From gcblue with BSD 3-Clause "New" or "Revised" License

5 votes

def test_decode_callback(self):
        if sys.maxunicode > 0xffff:
            codecs.register_error("UnicodeInternalTest", codecs.ignore_errors)
            decoder = codecs.getdecoder("unicode_internal")
            ab = u"ab".encode("unicode_internal")
            ignored = decoder("%s\x22\x22\x22\x22%s" % (ab[:4], ab[4:]),
                "UnicodeInternalTest")
            self.assertEqual((u"ab", 12), ignored)

Source File: test_codecs.py From Project-New-Reign---Nemesis-Main with GNU General Public License v3.0

5 votes

def test_basics(self):
        binput = bytes(range(256))
        for encoding in bytes_transform_encodings:
            with self.subTest(encoding=encoding):
                # generic codecs interface
                (o, size) = codecs.getencoder(encoding)(binput)
                self.assertEqual(size, len(binput))
                (i, size) = codecs.getdecoder(encoding)(o)
                self.assertEqual(size, len(o))
                self.assertEqual(i, binput)

Source File: objects.py From awkward-array with BSD 3-Clause "New" or "Revised" License

5 votes

def encoding(self, value):
        if value is None:
            decodefcn = None
        else:
            decodefcn = codecs.getdecoder(value)
        self._encoding = value
        self._args = (decodefcn,)

Source File: objects.py From awkward-array with BSD 3-Clause "New" or "Revised" License

5 votes

def encoding(self, value):
        if value is None:
            decodefcn = None
        else:
            decodefcn = codecs.getdecoder(value)
        self._encoding = value
        self._args = (decodefcn,)

Source File: iogui.py From CorpusTools with BSD 3-Clause "New" or "Revised" License

5 votes

def delimiters(self):
        wordDelim = None
        colDelim = codecs.getdecoder("unicode_escape")(self.columnDelimiterEdit.text())[0]
        return wordDelim, colDelim

Source File: featuregui.py From CorpusTools with BSD 3-Clause "New" or "Revised" License

5 votes

def generateKwargs(self):
        kwargs = {}
        path = self.pathWidget.value()
        if path == '':
            reply = QMessageBox.critical(self,
                    "Missing information", "Please specify a path to the csv file.")
            return
        if not os.path.exists(path):
            reply = QMessageBox.critical(self,
                    "Invalid information", "Feature matrix file could not be located. Please verify the path and file name.")
            return
        kwargs['path'] = path
        name = self.featureSystemSelect.value()
        if name == '':
            reply = QMessageBox.critical(self,
                    "Missing information", "Please specify the transcription and feature system.")
            return

        if name in get_systems_list(self.settings['storage']):
            msgBox = QMessageBox(QMessageBox.Warning, "Duplicate name",
                    "A feature system named '{}' already exists.  Overwrite?".format(name), QMessageBox.NoButton, self)
            msgBox.addButton("Overwrite", QMessageBox.AcceptRole)
            msgBox.addButton("Abort", QMessageBox.RejectRole)
            if msgBox.exec_() != QMessageBox.AcceptRole:
                return None
        if not name:
            reply = QMessageBox.critical(self,
                    "Missing information", "Please specify a name for the transcription and feature systems.")
            return
        kwargs['name'] = name
        colDelim = codecs.getdecoder("unicode_escape")(self.columnDelimiterEdit.text())[0]
        if not colDelim:
            reply = QMessageBox.critical(self,
                    "Missing information", "Please specify a column delimiter.")
            return
        kwargs['delimiter'] = colDelim
        return kwargs

Source File: pct_visualize.py From CorpusTools with BSD 3-Clause "New" or "Revised" License

5 votes

def main():

    #### Parse command-line arguments
    parser = argparse.ArgumentParser(description = \
             'Phonological CorpusTools: visualization of segment inventory')
    parser.add_argument('distance_file_name', help='Name of input distance file')
    parser.add_argument('-m', '--visualization_method', default='pca', help="Method of visualization: any of principal components analysis ('pca'), hierarchical clustering ('hc'), or a heatmap ('hm')")
    parser.add_argument('-v', '--value_column', default='result', type=str, help='header for column containing distance values')
    parser.add_argument('-s', '--segment_column', default='segment(s)', type=str, help='header for column containing segment pairs')
    parser.add_argument('-d', '--column_delimiter', default='\t', type=str, help='header for column containing segment pairs')

    args = parser.parse_args()

    ####

    delimiter = codecs.getdecoder("unicode_escape")(args.column_delimiter)[0]

    try: # Full path specified
        with open(args.distance_file_name) as infile:
            reader = csv.DictReader(infile, delimiter=delimiter)
            visualize(reader, args.visualization_method, args.value_column, args.segment_column)
    except FileNotFoundError:
        try: # Unix filepaths
            filename, extension = os.path.splitext(os.path.dirname(os.path.realpath(__file__))+'/'+args.csv_file_name)
            reader = csv.DictReader(os.path.dirname(os.path.realpath(__file__))+'/'+args.csv_file_name)
            visualize(reader, args.visualization_method, args.value_column, args.segment_column)
        except FileNotFoundError: # Windows filepaths
            filename, extension = os.path.splitext(os.path.dirname(os.path.realpath(__file__))+'\\'+args.csv_file_name)
            reader = csv.DictReader(os.path.dirname(os.path.realpath(__file__))+'\\'+args.csv_file_name)
            visualize(reader, args.visualization_method, args.value_column, args.segment_column)

Python codecs.getdecoder() Examples