Python Examples of numpy.unicode

Source File: test_io.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_unicode_roundtrip(self):
        utf8 = b'\xcf\x96'.decode('UTF-8')
        a = np.array([utf8], dtype=np.unicode)
        # our gz wrapper support encoding
        suffixes = ['', '.gz']
        # stdlib 2 versions do not support encoding
        if MAJVER > 2:
            if HAS_BZ2:
                suffixes.append('.bz2')
            if HAS_LZMA:
                suffixes.extend(['.xz', '.lzma'])
        with tempdir() as tmpdir:
            for suffix in suffixes:
                np.savetxt(os.path.join(tmpdir, 'test.csv' + suffix), a,
                           fmt=['%s'], encoding='UTF-16-LE')
                b = np.loadtxt(os.path.join(tmpdir, 'test.csv' + suffix),
                               encoding='UTF-16-LE', dtype=np.unicode)
                assert_array_equal(a, b)

Source File: test_nditer.py From auto-alt-text-lambda-api with MIT License

6 votes

def test_iter_buffering_string():
    # Safe casting disallows shrinking strings
    a = np.array(['abc', 'a', 'abcd'], dtype=np.bytes_)
    assert_equal(a.dtype, np.dtype('S4'))
    assert_raises(TypeError, nditer, a, ['buffered'], ['readonly'],
                  op_dtypes='S2')
    i = nditer(a, ['buffered'], ['readonly'], op_dtypes='S6')
    assert_equal(i[0], asbytes('abc'))
    assert_equal(i[0].dtype, np.dtype('S6'))

    a = np.array(['abc', 'a', 'abcd'], dtype=np.unicode)
    assert_equal(a.dtype, np.dtype('U4'))
    assert_raises(TypeError, nditer, a, ['buffered'], ['readonly'],
                    op_dtypes='U2')
    i = nditer(a, ['buffered'], ['readonly'], op_dtypes='U6')
    assert_equal(i[0], sixu('abc'))
    assert_equal(i[0].dtype, np.dtype('U6'))

Source File: test_nditer.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_iter_buffering_string():
    # Safe casting disallows shrinking strings
    a = np.array(['abc', 'a', 'abcd'], dtype=np.bytes_)
    assert_equal(a.dtype, np.dtype('S4'))
    assert_raises(TypeError, nditer, a, ['buffered'], ['readonly'],
                  op_dtypes='S2')
    i = nditer(a, ['buffered'], ['readonly'], op_dtypes='S6')
    assert_equal(i[0], b'abc')
    assert_equal(i[0].dtype, np.dtype('S6'))

    a = np.array(['abc', 'a', 'abcd'], dtype=np.unicode)
    assert_equal(a.dtype, np.dtype('U4'))
    assert_raises(TypeError, nditer, a, ['buffered'], ['readonly'],
                    op_dtypes='U2')
    i = nditer(a, ['buffered'], ['readonly'], op_dtypes='U6')
    assert_equal(i[0], u'abc')
    assert_equal(i[0].dtype, np.dtype('U6'))

Source File: test_io.py From GraphicDesignPatternByPython with MIT License

6 votes

def test_unicode_roundtrip(self):
        utf8 = b'\xcf\x96'.decode('UTF-8')
        a = np.array([utf8], dtype=np.unicode)
        # our gz wrapper support encoding
        suffixes = ['', '.gz']
        # stdlib 2 versions do not support encoding
        if MAJVER > 2:
            if HAS_BZ2:
                suffixes.append('.bz2')
            if HAS_LZMA:
                suffixes.extend(['.xz', '.lzma'])
        with tempdir() as tmpdir:
            for suffix in suffixes:
                np.savetxt(os.path.join(tmpdir, 'test.csv' + suffix), a,
                           fmt=['%s'], encoding='UTF-16-LE')
                b = np.loadtxt(os.path.join(tmpdir, 'test.csv' + suffix),
                               encoding='UTF-16-LE', dtype=np.unicode)
                assert_array_equal(a, b)

Source File: test_io.py From vnpy_crypto with MIT License

6 votes

def test_utf8_file(self):
        utf8 = b"\xcf\x96"
        latin1 = b"\xf6\xfc\xf6"
        with temppath() as path:
            with open(path, "wb") as f:
                f.write((b"test1,testNonethe" + utf8 + b",test3\n") * 2)
            test = np.genfromtxt(path, dtype=None, comments=None,
                                 delimiter=',', encoding="UTF-8")
            ctl = np.array([
                     ["test1", "testNonethe" + utf8.decode("UTF-8"), "test3"],
                     ["test1", "testNonethe" + utf8.decode("UTF-8"), "test3"]],
                     dtype=np.unicode)
            assert_array_equal(test, ctl)

            # test a mixed dtype
            with open(path, "wb") as f:
                f.write(b"0,testNonethe" + utf8)
            test = np.genfromtxt(path, dtype=None, comments=None,
                                 delimiter=',', encoding="UTF-8")
            assert_equal(test['f0'], 0)
            assert_equal(test['f1'], "testNonethe" + utf8.decode("UTF-8"))

Source File: test_io.py From vnpy_crypto with MIT License

6 votes

def test_unicode_roundtrip(self):
        utf8 = b'\xcf\x96'.decode('UTF-8')
        a = np.array([utf8], dtype=np.unicode)
        # our gz wrapper support encoding
        suffixes = ['', '.gz']
        # stdlib 2 versions do not support encoding
        if MAJVER > 2:
            if HAS_BZ2:
                suffixes.append('.bz2')
            if HAS_LZMA:
                suffixes.extend(['.xz', '.lzma'])
        with tempdir() as tmpdir:
            for suffix in suffixes:
                np.savetxt(os.path.join(tmpdir, 'test.csv' + suffix), a,
                           fmt=['%s'], encoding='UTF-16-LE')
                b = np.loadtxt(os.path.join(tmpdir, 'test.csv' + suffix),
                               encoding='UTF-16-LE', dtype=np.unicode)
                assert_array_equal(a, b)

Source File: test_nditer.py From recruit with Apache License 2.0

6 votes

def test_iter_buffering_string():
    # Safe casting disallows shrinking strings
    a = np.array(['abc', 'a', 'abcd'], dtype=np.bytes_)
    assert_equal(a.dtype, np.dtype('S4'))
    assert_raises(TypeError, nditer, a, ['buffered'], ['readonly'],
                  op_dtypes='S2')
    i = nditer(a, ['buffered'], ['readonly'], op_dtypes='S6')
    assert_equal(i[0], b'abc')
    assert_equal(i[0].dtype, np.dtype('S6'))

    a = np.array(['abc', 'a', 'abcd'], dtype=np.unicode)
    assert_equal(a.dtype, np.dtype('U4'))
    assert_raises(TypeError, nditer, a, ['buffered'], ['readonly'],
                    op_dtypes='U2')
    i = nditer(a, ['buffered'], ['readonly'], op_dtypes='U6')
    assert_equal(i[0], u'abc')
    assert_equal(i[0].dtype, np.dtype('U6'))

Source File: test_io.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_utf8_file(self):
        utf8 = b"\xcf\x96"
        with temppath() as path:
            with open(path, "wb") as f:
                f.write((b"test1,testNonethe" + utf8 + b",test3\n") * 2)
            test = np.genfromtxt(path, dtype=None, comments=None,
                                 delimiter=',', encoding="UTF-8")
            ctl = np.array([
                     ["test1", "testNonethe" + utf8.decode("UTF-8"), "test3"],
                     ["test1", "testNonethe" + utf8.decode("UTF-8"), "test3"]],
                     dtype=np.unicode)
            assert_array_equal(test, ctl)

            # test a mixed dtype
            with open(path, "wb") as f:
                f.write(b"0,testNonethe" + utf8)
            test = np.genfromtxt(path, dtype=None, comments=None,
                                 delimiter=',', encoding="UTF-8")
            assert_equal(test['f0'], 0)
            assert_equal(test['f1'], "testNonethe" + utf8.decode("UTF-8"))

Source File: test_io.py From recruit with Apache License 2.0

6 votes

def test_utf8_file(self):
        utf8 = b"\xcf\x96"
        with temppath() as path:
            with open(path, "wb") as f:
                f.write((b"test1,testNonethe" + utf8 + b",test3\n") * 2)
            test = np.genfromtxt(path, dtype=None, comments=None,
                                 delimiter=',', encoding="UTF-8")
            ctl = np.array([
                     ["test1", "testNonethe" + utf8.decode("UTF-8"), "test3"],
                     ["test1", "testNonethe" + utf8.decode("UTF-8"), "test3"]],
                     dtype=np.unicode)
            assert_array_equal(test, ctl)

            # test a mixed dtype
            with open(path, "wb") as f:
                f.write(b"0,testNonethe" + utf8)
            test = np.genfromtxt(path, dtype=None, comments=None,
                                 delimiter=',', encoding="UTF-8")
            assert_equal(test['f0'], 0)
            assert_equal(test['f1'], "testNonethe" + utf8.decode("UTF-8"))

Source File: test_nditer.py From vnpy_crypto with MIT License

6 votes

def test_iter_buffering_string():
    # Safe casting disallows shrinking strings
    a = np.array(['abc', 'a', 'abcd'], dtype=np.bytes_)
    assert_equal(a.dtype, np.dtype('S4'))
    assert_raises(TypeError, nditer, a, ['buffered'], ['readonly'],
                  op_dtypes='S2')
    i = nditer(a, ['buffered'], ['readonly'], op_dtypes='S6')
    assert_equal(i[0], b'abc')
    assert_equal(i[0].dtype, np.dtype('S6'))

    a = np.array(['abc', 'a', 'abcd'], dtype=np.unicode)
    assert_equal(a.dtype, np.dtype('U4'))
    assert_raises(TypeError, nditer, a, ['buffered'], ['readonly'],
                    op_dtypes='U2')
    i = nditer(a, ['buffered'], ['readonly'], op_dtypes='U6')
    assert_equal(i[0], u'abc')
    assert_equal(i[0].dtype, np.dtype('U6'))

Source File: test_io.py From GraphicDesignPatternByPython with MIT License

6 votes

def test_utf8_file(self):
        utf8 = b"\xcf\x96"
        latin1 = b"\xf6\xfc\xf6"
        with temppath() as path:
            with open(path, "wb") as f:
                f.write((b"test1,testNonethe" + utf8 + b",test3\n") * 2)
            test = np.genfromtxt(path, dtype=None, comments=None,
                                 delimiter=',', encoding="UTF-8")
            ctl = np.array([
                     ["test1", "testNonethe" + utf8.decode("UTF-8"), "test3"],
                     ["test1", "testNonethe" + utf8.decode("UTF-8"), "test3"]],
                     dtype=np.unicode)
            assert_array_equal(test, ctl)

            # test a mixed dtype
            with open(path, "wb") as f:
                f.write(b"0,testNonethe" + utf8)
            test = np.genfromtxt(path, dtype=None, comments=None,
                                 delimiter=',', encoding="UTF-8")
            assert_equal(test['f0'], 0)
            assert_equal(test['f1'], "testNonethe" + utf8.decode("UTF-8"))

Source File: test_nditer.py From Computable with MIT License

6 votes

def test_iter_buffering_string():
    # Safe casting disallows shrinking strings
    a = np.array(['abc', 'a', 'abcd'], dtype=np.bytes_)
    assert_equal(a.dtype, np.dtype('S4'));
    assert_raises(TypeError, nditer, a, ['buffered'], ['readonly'],
                    op_dtypes='S2')
    i = nditer(a, ['buffered'], ['readonly'], op_dtypes='S6')
    assert_equal(i[0], asbytes('abc'))
    assert_equal(i[0].dtype, np.dtype('S6'))

    a = np.array(['abc', 'a', 'abcd'], dtype=np.unicode)
    assert_equal(a.dtype, np.dtype('U4'));
    assert_raises(TypeError, nditer, a, ['buffered'], ['readonly'],
                    op_dtypes='U2')
    i = nditer(a, ['buffered'], ['readonly'], op_dtypes='U6')
    assert_equal(i[0], sixu('abc'))
    assert_equal(i[0].dtype, np.dtype('U6'))

Source File: test_io.py From recruit with Apache License 2.0

6 votes

def test_unicode_roundtrip(self):
        utf8 = b'\xcf\x96'.decode('UTF-8')
        a = np.array([utf8], dtype=np.unicode)
        # our gz wrapper support encoding
        suffixes = ['', '.gz']
        # stdlib 2 versions do not support encoding
        if MAJVER > 2:
            if HAS_BZ2:
                suffixes.append('.bz2')
            if HAS_LZMA:
                suffixes.extend(['.xz', '.lzma'])
        with tempdir() as tmpdir:
            for suffix in suffixes:
                np.savetxt(os.path.join(tmpdir, 'test.csv' + suffix), a,
                           fmt=['%s'], encoding='UTF-16-LE')
                b = np.loadtxt(os.path.join(tmpdir, 'test.csv' + suffix),
                               encoding='UTF-16-LE', dtype=np.unicode)
                assert_array_equal(a, b)

Source File: test_io.py From GraphicDesignPatternByPython with MIT License

5 votes

def test_converters_nodecode(self):
        # test native string converters enabled by setting an encoding
        utf8 = b'\xcf\x96'.decode('UTF-8')
        with temppath() as path:
            with io.open(path, 'wt', encoding='UTF-8') as f:
                f.write(utf8)
            x = self.loadfunc(path, dtype=np.unicode,
                              converters={0: lambda x: x + 't'},
                              encoding='UTF-8')
            a = np.array([utf8 + 't'])
            assert_array_equal(x, a)

Source File: test_io.py From GraphicDesignPatternByPython with MIT License

5 votes

def test_binary_load(self):
        butf8 = b"5,6,7,\xc3\x95scarscar\n\r15,2,3,hello\n\r"\
                b"20,2,3,\xc3\x95scar\n\r"
        sutf8 = butf8.decode("UTF-8").replace("\r", "").splitlines()
        with temppath() as path:
            with open(path, "wb") as f:
                f.write(butf8)
            with open(path, "rb") as f:
                x = np.loadtxt(f, encoding="UTF-8", dtype=np.unicode)
            assert_array_equal(x, sutf8)
            # test broken latin1 conversion people now rely on
            with open(path, "rb") as f:
                x = np.loadtxt(f, encoding="UTF-8", dtype="S")
            x = [b'5,6,7,\xc3\x95scarscar', b'15,2,3,hello', b'20,2,3,\xc3\x95scar']
            assert_array_equal(x, np.array(x, dtype="S"))

Source File: test_io.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

def test_utf8_userconverters_with_explicit_dtype(self):
        utf8 = b'\xcf\x96'
        with temppath() as path:
            with open(path, 'wb') as f:
                f.write(b'skip,skip,2001-01-01' + utf8 + b',1.0,skip')
            test = np.genfromtxt(path, delimiter=",", names=None, dtype=float,
                                 usecols=(2, 3), converters={2: np.unicode},
                                 encoding='UTF-8')
        control = np.array([('2001-01-01' + utf8.decode('UTF-8'), 1.)],
                           dtype=[('', '|U11'), ('', float)])
        assert_equal(test, control)

Source File: test_io.py From GraphicDesignPatternByPython with MIT License

5 votes

def test_gft_using_generator(self):
        # gft doesn't work with unicode.
        def count():
            for i in range(10):
                yield asbytes("%d" % i)

        res = np.genfromtxt(count())
        assert_array_equal(res, np.arange(10))

Source File: test_io.py From GraphicDesignPatternByPython with MIT License

5 votes

def test_utf8_userconverters_with_explicit_dtype(self):
        utf8 = b'\xcf\x96'
        with temppath() as path:
            with open(path, 'wb') as f:
                f.write(b'skip,skip,2001-01-01' + utf8 + b',1.0,skip')
            test = np.genfromtxt(path, delimiter=",", names=None, dtype=float,
                                 usecols=(2, 3), converters={2: np.unicode},
                                 encoding='UTF-8')
        control = np.array([('2001-01-01' + utf8.decode('UTF-8'), 1.)],
                           dtype=[('', '|U11'), ('', float)])
        assert_equal(test, control)

Source File: test_dtype.py From GraphicDesignPatternByPython with MIT License

5 votes

def test_run(self):
        """Only test hash runs at all."""
        for t in [int, float, complex, np.int32, str, object,
                np.unicode]:
            dt = np.dtype(t)
            hash(dt)

Source File: test_io.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

def test_binary_load(self):
        butf8 = b"5,6,7,\xc3\x95scarscar\n\r15,2,3,hello\n\r"\
                b"20,2,3,\xc3\x95scar\n\r"
        sutf8 = butf8.decode("UTF-8").replace("\r", "").splitlines()
        with temppath() as path:
            with open(path, "wb") as f:
                f.write(butf8)
            with open(path, "rb") as f:
                x = np.loadtxt(f, encoding="UTF-8", dtype=np.unicode)
            assert_array_equal(x, sutf8)
            # test broken latin1 conversion people now rely on
            with open(path, "rb") as f:
                x = np.loadtxt(f, encoding="UTF-8", dtype="S")
            x = [b'5,6,7,\xc3\x95scarscar', b'15,2,3,hello', b'20,2,3,\xc3\x95scar']
            assert_array_equal(x, np.array(x, dtype="S"))

Source File: test_regression.py From Computable with MIT License

5 votes

def test_unaligned_unicode_access(self, level=rlevel) :
        """Ticket #825"""
        for i in range(1, 9) :
            msg = 'unicode offset: %d chars'%i
            t = np.dtype([('a', 'S%d'%i), ('b', 'U2')])
            x = np.array([(asbytes('a'), sixu('b'))], dtype=t)
            if sys.version_info[0] >= 3:
                assert_equal(str(x), "[(b'a', 'b')]", err_msg=msg)
            else:
                assert_equal(str(x), "[('a', u'b')]", err_msg=msg)

Source File: test_io.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

def test_converters_nodecode(self):
        # test native string converters enabled by setting an encoding
        utf8 = b'\xcf\x96'.decode('UTF-8')
        with temppath() as path:
            with io.open(path, 'wt', encoding='UTF-8') as f:
                f.write(utf8)
            x = self.loadfunc(path, dtype=np.unicode,
                              converters={0: lambda x: x + 't'},
                              encoding='UTF-8')
            a = np.array([utf8 + 't'])
            assert_array_equal(x, a)

Source File: test_io.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

def test_converters_decode(self):
        # test converters that decode strings
        c = TextIO()
        c.write(b'\xcf\x96')
        c.seek(0)
        x = self.loadfunc(c, dtype=np.unicode,
                          converters={0: lambda x: x.decode('UTF-8')})
        a = np.array([b'\xcf\x96'.decode('UTF-8')])
        assert_array_equal(x, a)

Source File: test_io.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

def test_stringload(self):
        # umlaute
        nonascii = b'\xc3\xb6\xc3\xbc\xc3\xb6'.decode("UTF-8")
        with temppath() as path:
            with open(path, "wb") as f:
                f.write(nonascii.encode("UTF-16"))
            x = self.loadfunc(path, encoding="UTF-16", dtype=np.unicode)
            assert_array_equal(x, nonascii)

Source File: test_io.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

def test_unicode_stringstream(self):
        utf8 = b'\xcf\x96'.decode('UTF-8')
        a = np.array([utf8], dtype=np.unicode)
        s = StringIO()
        np.savetxt(s, a, fmt=['%s'], encoding='UTF-8')
        s.seek(0)
        assert_equal(s.read(), utf8 + '\n')

Source File: test_io.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

def test_unicode_bytestream(self):
        utf8 = b'\xcf\x96'.decode('UTF-8')
        a = np.array([utf8], dtype=np.unicode)
        s = BytesIO()
        np.savetxt(s, a, fmt=['%s'], encoding='UTF-8')
        s.seek(0)
        assert_equal(s.read().decode('UTF-8'), utf8 + '\n')

Source File: test_io.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

def test_unicode(self):
        utf8 = b'\xcf\x96'.decode('UTF-8')
        a = np.array([utf8], dtype=np.unicode)
        with tempdir() as tmpdir:
            # set encoding as on windows it may not be unicode even on py3
            np.savetxt(os.path.join(tmpdir, 'test.csv'), a, fmt=['%s'],
                       encoding='UTF-8')

Source File: test_regression.py From Computable with MIT License

5 votes

def test_unicode_alloc_dealloc_match(self):
        # Ticket #1578, the mismatch only showed up when running
        # python-debug for python versions >= 2.7, and then as
        # a core dump and error message.
        a = np.array(['abc'], dtype=np.unicode)[0]
        del a

Source File: test_regression.py From Computable with MIT License

5 votes

def test_string_truncation_ucs2(self):
        # Ticket #2081. Python compiled with two byte unicode
        # can lead to truncation if itemsize is not properly
        # adjusted for Numpy's four byte unicode.
        if sys.version_info[0] >= 3:
            a = np.array(['abcd'])
        else:
            a = np.array([sixu('abcd')])
        assert_equal(a.dtype.itemsize, 16)

Source File: test_regression.py From GraphicDesignPatternByPython with MIT License

5 votes

def test_string_truncation_ucs2(self):
        # Ticket #2081. Python compiled with two byte unicode
        # can lead to truncation if itemsize is not properly
        # adjusted for NumPy's four byte unicode.
        if sys.version_info[0] >= 3:
            a = np.array(['abcd'])
        else:
            a = np.array([u'abcd'])
        assert_equal(a.dtype.itemsize, 16)

Python numpy.unicode() Examples