Python codecs.getdecoder() Examples

The following are 30 code examples of codecs.getdecoder(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module codecs , or try the search function .
Example #1
Source File: strings.py    From recruit with Apache License 2.0 6 votes vote down vote up
def str_decode(arr, encoding, errors="strict"):
    """
    Decode character string in the Series/Index using indicated encoding.
    Equivalent to :meth:`str.decode` in python2 and :meth:`bytes.decode` in
    python3.

    Parameters
    ----------
    encoding : str
    errors : str, optional

    Returns
    -------
    decoded : Series/Index of objects
    """
    if encoding in _cpython_optimized_decoders:
        # CPython optimized implementation
        f = lambda x: x.decode(encoding, errors)
    else:
        decoder = codecs.getdecoder(encoding)
        f = lambda x: decoder(x, errors)[0]
    return _na_map(f, arr) 
Example #2
Source File: featuregui.py    From CorpusTools with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def accept(self):
        filename = self.pathWidget.value()

        if filename == '':
            reply = QMessageBox.critical(self,
                    "Missing information", "Please specify a path to save the corpus.")
            return

        colDelim = codecs.getdecoder("unicode_escape")(self.columnDelimiterEdit.text())[0]
        if len(colDelim) != 1:
            reply = QMessageBox.critical(self,
                    "Invalid information", "The column delimiter must be a single character.")
            return

        export_feature_matrix_csv(self.specifier,filename,colDelim)

        QDialog.accept(self) 
Example #3
Source File: templates.py    From king-phisher with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def _filter_decode(self, data, encoding):
		if its.py_v3 and isinstance(data, bytes):
			data = data.decode('utf-8')
		encoding = encoding.lower()
		encoding = re.sub(r'^(base|rot)-(\d\d)$', r'\1\2', encoding)

		if encoding == 'base16' or encoding == 'hex':
			data = base64.b16decode(data)
		elif encoding == 'base32':
			data = base64.b32decode(data)
		elif encoding == 'base64':
			data = base64.b64decode(data)
		elif encoding == 'rot13':
			data = codecs.getdecoder('rot-13')(data)[0]
		else:
			raise ValueError('Unknown encoding type: ' + encoding)
		if its.py_v3 and isinstance(data, bytes):
			data = data.decode('utf-8')
		return data 
Example #4
Source File: test_codecs.py    From ironpython3 with Apache License 2.0 6 votes vote down vote up
def test_all(self):
        api = (
            "encode", "decode",
            "register", "CodecInfo", "Codec", "IncrementalEncoder",
            "IncrementalDecoder", "StreamReader", "StreamWriter", "lookup",
            "getencoder", "getdecoder", "getincrementalencoder",
            "getincrementaldecoder", "getreader", "getwriter",
            "register_error", "lookup_error",
            "strict_errors", "replace_errors", "ignore_errors",
            "xmlcharrefreplace_errors", "backslashreplace_errors",
            "open", "EncodedFile",
            "iterencode", "iterdecode",
            "BOM", "BOM_BE", "BOM_LE",
            "BOM_UTF8", "BOM_UTF16", "BOM_UTF16_BE", "BOM_UTF16_LE",
            "BOM_UTF32", "BOM_UTF32_BE", "BOM_UTF32_LE",
            "BOM32_BE", "BOM32_LE", "BOM64_BE", "BOM64_LE",  # Undocumented
            "StreamReaderWriter", "StreamRecoder",
        )
        self.assertCountEqual(api, codecs.__all__)
        for api in codecs.__all__:
            getattr(codecs, api) 
Example #5
Source File: test_codecs.py    From Project-New-Reign---Nemesis-Main with GNU General Public License v3.0 6 votes vote down vote up
def test_all(self):
        api = (
            "encode", "decode",
            "register", "CodecInfo", "Codec", "IncrementalEncoder",
            "IncrementalDecoder", "StreamReader", "StreamWriter", "lookup",
            "getencoder", "getdecoder", "getincrementalencoder",
            "getincrementaldecoder", "getreader", "getwriter",
            "register_error", "lookup_error",
            "strict_errors", "replace_errors", "ignore_errors",
            "xmlcharrefreplace_errors", "backslashreplace_errors",
            "namereplace_errors",
            "open", "EncodedFile",
            "iterencode", "iterdecode",
            "BOM", "BOM_BE", "BOM_LE",
            "BOM_UTF8", "BOM_UTF16", "BOM_UTF16_BE", "BOM_UTF16_LE",
            "BOM_UTF32", "BOM_UTF32_BE", "BOM_UTF32_LE",
            "BOM32_BE", "BOM32_LE", "BOM64_BE", "BOM64_LE",  # Undocumented
            "StreamReaderWriter", "StreamRecoder",
        )
        self.assertCountEqual(api, codecs.__all__)
        for api in codecs.__all__:
            getattr(codecs, api) 
Example #6
Source File: strings.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def str_decode(arr, encoding, errors="strict"):
    """
    Decode character string in the Series/Index using indicated encoding.
    Equivalent to :meth:`str.decode` in python2 and :meth:`bytes.decode` in
    python3.

    Parameters
    ----------
    encoding : str
    errors : str, optional

    Returns
    -------
    decoded : Series/Index of objects
    """
    if encoding in _cpython_optimized_decoders:
        # CPython optimized implementation
        f = lambda x: x.decode(encoding, errors)
    else:
        decoder = codecs.getdecoder(encoding)
        f = lambda x: decoder(x, errors)[0]
    return _na_map(f, arr) 
Example #7
Source File: test_codecs.py    From ironpython2 with Apache License 2.0 6 votes vote down vote up
def test_all(self):
        api = (
            "encode", "decode",
            "register", "CodecInfo", "Codec", "IncrementalEncoder",
            "IncrementalDecoder", "StreamReader", "StreamWriter", "lookup",
            "getencoder", "getdecoder", "getincrementalencoder",
            "getincrementaldecoder", "getreader", "getwriter",
            "register_error", "lookup_error",
            "strict_errors", "replace_errors", "ignore_errors",
            "xmlcharrefreplace_errors", "backslashreplace_errors",
            "open", "EncodedFile",
            "iterencode", "iterdecode",
            "BOM", "BOM_BE", "BOM_LE",
            "BOM_UTF8", "BOM_UTF16", "BOM_UTF16_BE", "BOM_UTF16_LE",
            "BOM_UTF32", "BOM_UTF32_BE", "BOM_UTF32_LE",
            "BOM32_BE", "BOM32_LE", "BOM64_BE", "BOM64_LE",  # Undocumented
            "StreamReaderWriter", "StreamRecoder",
        )
        self.assertEqual(sorted(api), sorted(codecs.__all__))
        for api in codecs.__all__:
            getattr(codecs, api) 
Example #8
Source File: iogui.py    From CorpusTools with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def accept(self):
        filename = self.pathWidget.value()

        if filename == '':
            reply = QMessageBox.critical(self,
                                         "Missing information", "Please specify a path to save the corpus.")
            return

        colDelim = codecs.getdecoder("unicode_escape")(self.columnDelimiterEdit.text())[0]
        if len(colDelim) != 1:
            reply = QMessageBox.critical(self,
                                         "Invalid information", "The column delimiter must be a single character.")
            return
        transDelim = self.transDelimiterEdit.text()
        variant_behavior = self.variantOptions[self.variantWidget.currentIndex()][1]
        export_corpus_csv(self.corpus, filename, colDelim, transDelim, variant_behavior)

        QDialog.accept(self) 
Example #9
Source File: strings.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def str_decode(arr, encoding, errors="strict"):
    """
    Decode character string in the Series/Index using indicated encoding.
    Equivalent to :meth:`str.decode` in python2 and :meth:`bytes.decode` in
    python3.

    Parameters
    ----------
    encoding : str
    errors : str, optional

    Returns
    -------
    decoded : Series/Index of objects
    """
    if encoding in _cpython_optimized_decoders:
        # CPython optimized implementation
        f = lambda x: x.decode(encoding, errors)
    else:
        decoder = codecs.getdecoder(encoding)
        f = lambda x: decoder(x, errors)[0]
    return _na_map(f, arr) 
Example #10
Source File: ch10_ex5.py    From Mastering-Object-Oriented-Python-Second-Edition with MIT License 6 votes vote down vote up
def alpha_decode(data: bytes, metadata: 'XMetadata', field_metadata: 'XField') -> str:
    """Decode alpha or alphanumeric data.
    metadata has encoding.
    field_metadata is (currently) not used.

    Mock metadata objects
    >>> import types
    >>> meta = types.SimpleNamespace(reclen=6, encoding='ebcdic')
    >>> meta.decode = codecs.getdecoder(meta.encoding)
    >>> field_meta = types.SimpleNamespace()  # Used in other examples...

    >>> data = bytes([0xf9, 0xf8, 0xf7, 0xf6, 0xf5, 0x60])
    >>> alpha_decode(data, meta, field_meta)
    '98765-'

    """
    text, _ = metadata.decode(data)
    return text


# Numeric USAGE DISPLAY trailing sign conversion.
# The COBOL program stored text version of the number. 
Example #11
Source File: ch10_ex5.py    From Mastering-Object-Oriented-Python-Second-Edition with MIT License 6 votes vote down vote up
def display_decode(data: bytes, metadata: 'XMetadata', field_metadata: 'XField') -> Decimal:
    """Decode USAGE DISPLAY numeric data.
    metadata has encoding.
    field_metadata has attributes name, start, size, format, precision, usage.

    Mock metadata objects
    >>> import types
    >>> meta= types.SimpleNamespace(reclen=6, encoding='ebcdic')
    >>> meta.decode = codecs.getdecoder(meta.encoding)
    >>> field_meta = types.SimpleNamespace(precision=2)

    >>> data = bytes([0xf9, 0xf8, 0xf7, 0xf6, 0xf5, 0x60])
    >>> display_decode(data, meta, field_meta)
    Decimal('-987.65')

    """
    text, _ = metadata.decode(data)
    precision = field_metadata.precision or 0  # If None, default is 0.
    text, sign = text[:-1], text[-1]
    return Decimal(sign + text[:-precision] + "." + text[-precision:])


# Numeric USAGE COMP-3 conversion.
# The COBOL program encoded the number into packed decimal representation. 
Example #12
Source File: strings.py    From elasticintel with GNU General Public License v3.0 6 votes vote down vote up
def str_decode(arr, encoding, errors="strict"):
    """
    Decode character string in the Series/Index using indicated encoding.
    Equivalent to :meth:`str.decode` in python2 and :meth:`bytes.decode` in
    python3.

    Parameters
    ----------
    encoding : str
    errors : str, optional

    Returns
    -------
    decoded : Series/Index of objects
    """
    if encoding in _cpython_optimized_decoders:
        # CPython optimized implementation
        f = lambda x: x.decode(encoding, errors)
    else:
        decoder = codecs.getdecoder(encoding)
        f = lambda x: decoder(x, errors)[0]
    return _na_map(f, arr) 
Example #13
Source File: test_codecs.py    From Fluid-Designer with GNU General Public License v3.0 6 votes vote down vote up
def test_all(self):
        api = (
            "encode", "decode",
            "register", "CodecInfo", "Codec", "IncrementalEncoder",
            "IncrementalDecoder", "StreamReader", "StreamWriter", "lookup",
            "getencoder", "getdecoder", "getincrementalencoder",
            "getincrementaldecoder", "getreader", "getwriter",
            "register_error", "lookup_error",
            "strict_errors", "replace_errors", "ignore_errors",
            "xmlcharrefreplace_errors", "backslashreplace_errors",
            "namereplace_errors",
            "open", "EncodedFile",
            "iterencode", "iterdecode",
            "BOM", "BOM_BE", "BOM_LE",
            "BOM_UTF8", "BOM_UTF16", "BOM_UTF16_BE", "BOM_UTF16_LE",
            "BOM_UTF32", "BOM_UTF32_BE", "BOM_UTF32_LE",
            "BOM32_BE", "BOM32_LE", "BOM64_BE", "BOM64_LE",  # Undocumented
            "StreamReaderWriter", "StreamRecoder",
        )
        self.assertCountEqual(api, codecs.__all__)
        for api in codecs.__all__:
            getattr(codecs, api) 
Example #14
Source File: strings.py    From Splunking-Crime with GNU Affero General Public License v3.0 6 votes vote down vote up
def str_decode(arr, encoding, errors="strict"):
    """
    Decode character string in the Series/Index using indicated encoding.
    Equivalent to :meth:`str.decode` in python2 and :meth:`bytes.decode` in
    python3.

    Parameters
    ----------
    encoding : str
    errors : str, optional

    Returns
    -------
    decoded : Series/Index of objects
    """
    if encoding in _cpython_optimized_decoders:
        # CPython optimized implementation
        f = lambda x: x.decode(encoding, errors)
    else:
        decoder = codecs.getdecoder(encoding)
        f = lambda x: decoder(x, errors)[0]
    return _na_map(f, arr) 
Example #15
Source File: test_codecs.py    From gcblue with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_basics(self):
        s = "abc123"
        for encoding in all_string_encodings:
            (bytes, size) = codecs.getencoder(encoding)(s)
            self.assertEqual(size, len(s))
            (chars, size) = codecs.getdecoder(encoding)(bytes)
            self.assertEqual(chars, s, "%r != %r (encoding=%r)" % (chars, s, encoding)) 
Example #16
Source File: test_multibytecodec.py    From gcblue with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_errorcallback_longindex(self):
        dec = codecs.getdecoder('euc-kr')
        myreplace  = lambda exc: (u'', sys.maxint+1)
        codecs.register_error('test.cjktest', myreplace)
        self.assertRaises(IndexError, dec,
                          'apple\x92ham\x93spam', 'test.cjktest') 
Example #17
Source File: test_multibytecodec.py    From Project-New-Reign---Nemesis-Main with GNU General Public License v3.0 5 votes vote down vote up
def test_errorcallback_longindex(self):
        dec = codecs.getdecoder('euc-kr')
        myreplace  = lambda exc: ('', sys.maxsize+1)
        codecs.register_error('test.cjktest', myreplace)
        self.assertRaises(IndexError, dec,
                          b'apple\x92ham\x93spam', 'test.cjktest') 
Example #18
Source File: mrknow_urlparserhelper.py    From filmkodi with Apache License 2.0 5 votes vote down vote up
def unicode_escape(s):
    decoder = codecs.getdecoder('unicode_escape')
    return re.sub(r'\\u[0-9a-fA-F]{4,}', lambda m: decoder(m.group(0))[0], s).encode('utf-8') 
Example #19
Source File: test_multibytecodec.py    From CTFCrackTools-V2 with GNU General Public License v3.0 5 votes vote down vote up
def test_errorcallback_longindex(self):
        dec = codecs.getdecoder('euc-kr')
        myreplace  = lambda exc: (u'', sys.maxint+1)
        codecs.register_error('test.cjktest', myreplace)
        self.assertRaises(IndexError, dec,
                          'apple\x92ham\x93spam', 'test.cjktest') 
Example #20
Source File: mrknow_urlparserhelper.py    From filmkodi with Apache License 2.0 5 votes vote down vote up
def unicode_escape(s):
    decoder = codecs.getdecoder('unicode_escape')
    return re.sub(r'\\u[0-9a-fA-F]{4,}', lambda m: decoder(m.group(0))[0], s).encode('utf-8') 
Example #21
Source File: test_codecs.py    From CTFCrackTools-V2 with GNU General Public License v3.0 5 votes vote down vote up
def test_decode_callback(self):
        if sys.maxunicode > 0xffff:
            codecs.register_error("UnicodeInternalTest", codecs.ignore_errors)
            decoder = codecs.getdecoder("unicode_internal")
            ab = u"ab".encode("unicode_internal")
            ignored = decoder("%s\x22\x22\x22\x22%s" % (ab[:4], ab[4:]),
                "UnicodeInternalTest")
            self.assertEqual((u"ab", 12), ignored) 
Example #22
Source File: test_codecs.py    From gcblue with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_bad_decode_args(self):
        for encoding in all_unicode_encodings:
            decoder = codecs.getdecoder(encoding)
            self.assertRaises(TypeError, decoder)
            if encoding not in ("idna", "punycode"):
                self.assertRaises(TypeError, decoder, 42) 
Example #23
Source File: test_codecs.py    From gcblue with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_getdecoder(self):
        self.assertRaises(TypeError, codecs.getdecoder)
        self.assertRaises(LookupError, codecs.getdecoder, "__spam__") 
Example #24
Source File: test_codecs.py    From gcblue with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_decode_callback(self):
        if sys.maxunicode > 0xffff:
            codecs.register_error("UnicodeInternalTest", codecs.ignore_errors)
            decoder = codecs.getdecoder("unicode_internal")
            ab = u"ab".encode("unicode_internal")
            ignored = decoder("%s\x22\x22\x22\x22%s" % (ab[:4], ab[4:]),
                "UnicodeInternalTest")
            self.assertEqual((u"ab", 12), ignored) 
Example #25
Source File: test_codecs.py    From Project-New-Reign---Nemesis-Main with GNU General Public License v3.0 5 votes vote down vote up
def test_basics(self):
        binput = bytes(range(256))
        for encoding in bytes_transform_encodings:
            with self.subTest(encoding=encoding):
                # generic codecs interface
                (o, size) = codecs.getencoder(encoding)(binput)
                self.assertEqual(size, len(binput))
                (i, size) = codecs.getdecoder(encoding)(o)
                self.assertEqual(size, len(o))
                self.assertEqual(i, binput) 
Example #26
Source File: objects.py    From awkward-array with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def encoding(self, value):
        if value is None:
            decodefcn = None
        else:
            decodefcn = codecs.getdecoder(value)
        self._encoding = value
        self._args = (decodefcn,) 
Example #27
Source File: objects.py    From awkward-array with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def encoding(self, value):
        if value is None:
            decodefcn = None
        else:
            decodefcn = codecs.getdecoder(value)
        self._encoding = value
        self._args = (decodefcn,) 
Example #28
Source File: iogui.py    From CorpusTools with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def delimiters(self):
        wordDelim = None
        colDelim = codecs.getdecoder("unicode_escape")(self.columnDelimiterEdit.text())[0]
        return wordDelim, colDelim 
Example #29
Source File: featuregui.py    From CorpusTools with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def generateKwargs(self):
        kwargs = {}
        path = self.pathWidget.value()
        if path == '':
            reply = QMessageBox.critical(self,
                    "Missing information", "Please specify a path to the csv file.")
            return
        if not os.path.exists(path):
            reply = QMessageBox.critical(self,
                    "Invalid information", "Feature matrix file could not be located. Please verify the path and file name.")
            return
        kwargs['path'] = path
        name = self.featureSystemSelect.value()
        if name == '':
            reply = QMessageBox.critical(self,
                    "Missing information", "Please specify the transcription and feature system.")
            return

        if name in get_systems_list(self.settings['storage']):
            msgBox = QMessageBox(QMessageBox.Warning, "Duplicate name",
                    "A feature system named '{}' already exists.  Overwrite?".format(name), QMessageBox.NoButton, self)
            msgBox.addButton("Overwrite", QMessageBox.AcceptRole)
            msgBox.addButton("Abort", QMessageBox.RejectRole)
            if msgBox.exec_() != QMessageBox.AcceptRole:
                return None
        if not name:
            reply = QMessageBox.critical(self,
                    "Missing information", "Please specify a name for the transcription and feature systems.")
            return
        kwargs['name'] = name
        colDelim = codecs.getdecoder("unicode_escape")(self.columnDelimiterEdit.text())[0]
        if not colDelim:
            reply = QMessageBox.critical(self,
                    "Missing information", "Please specify a column delimiter.")
            return
        kwargs['delimiter'] = colDelim
        return kwargs 
Example #30
Source File: pct_visualize.py    From CorpusTools with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def main():

    #### Parse command-line arguments
    parser = argparse.ArgumentParser(description = \
             'Phonological CorpusTools: visualization of segment inventory')
    parser.add_argument('distance_file_name', help='Name of input distance file')
    parser.add_argument('-m', '--visualization_method', default='pca', help="Method of visualization: any of principal components analysis ('pca'), hierarchical clustering ('hc'), or a heatmap ('hm')")
    parser.add_argument('-v', '--value_column', default='result', type=str, help='header for column containing distance values')
    parser.add_argument('-s', '--segment_column', default='segment(s)', type=str, help='header for column containing segment pairs')
    parser.add_argument('-d', '--column_delimiter', default='\t', type=str, help='header for column containing segment pairs')

    args = parser.parse_args()

    ####

    delimiter = codecs.getdecoder("unicode_escape")(args.column_delimiter)[0]

    try: # Full path specified
        with open(args.distance_file_name) as infile:
            reader = csv.DictReader(infile, delimiter=delimiter)
            visualize(reader, args.visualization_method, args.value_column, args.segment_column)
    except FileNotFoundError:
        try: # Unix filepaths
            filename, extension = os.path.splitext(os.path.dirname(os.path.realpath(__file__))+'/'+args.csv_file_name)
            reader = csv.DictReader(os.path.dirname(os.path.realpath(__file__))+'/'+args.csv_file_name)
            visualize(reader, args.visualization_method, args.value_column, args.segment_column)
        except FileNotFoundError: # Windows filepaths
            filename, extension = os.path.splitext(os.path.dirname(os.path.realpath(__file__))+'\\'+args.csv_file_name)
            reader = csv.DictReader(os.path.dirname(os.path.realpath(__file__))+'\\'+args.csv_file_name)
            visualize(reader, args.visualization_method, args.value_column, args.segment_column)