Python Examples of unicodedata.east_asian

Source File: statemachine.py From AWS-Transit-Gateway-Demo-MultiAccount with MIT License

6 votes

def pad_double_width(self, pad_char):
        """
        Pad all double-width characters in self by appending `pad_char` to each.
        For East Asian language support.
        """
        if hasattr(unicodedata, 'east_asian_width'):
            east_asian_width = unicodedata.east_asian_width
        else:
            return                      # new in Python 2.4
        for i in range(len(self.data)):
            line = self.data[i]
            if isinstance(line, str):
                new = []
                for char in line:
                    new.append(char)
                    if east_asian_width(char) in 'WF': # 'W'ide & 'F'ull-width
                        new.append(pad_char)
                self.data[i] = ''.join(new)

Source File: terminal.py From TerminalView with MIT License

6 votes

def cursor_right(self, n=1):
        """ESCnC CUF (Cursor Forward)"""
        #logging.debug('cursor_right(%s)' % n)
        if not n:
            n = 1
        n = int(n)
        # This logic takes care of double-width unicode characters
        if self.double_width_right:
            self.double_width_right = False
            return
        self.cursorX += n
        try:
            char = self.screen[self.cursorY][self.cursorX]
        except IndexError: # Cursor is past the right-edge of the screen; ignore
            char = u' ' # This is a safe default/fallback
        if unicodedata.east_asian_width(char) == 'W':
            # This lets us skip the next call (get called 2x for 2x width)
            self.double_width_right = True
        try:
            for callback in self.callbacks[CALLBACK_CURSOR_POS].values():
                callback()
        except TypeError:
            pass

Source File: terminal.py From TerminalView with MIT License

6 votes

def cursor_left(self, n=1):
        """ESCnD CUB (Cursor Back)"""
        # Commented out to save CPU (and the others below too)
        #logging.debug('cursor_left(%s)' % n)
        n = int(n)
        # This logic takes care of double-width unicode characters
        if self.double_width_left:
            self.double_width_left = False
            return
        self.cursorX = max(0, self.cursorX - n) # Ensures positive value
        try:
            char = self.screen[self.cursorY][self.cursorX]
        except IndexError: # Cursor is past the right-edge of the screen; ignore
            char = u' ' # This is a safe default/fallback
        if unicodedata.east_asian_width(char) == 'W':
            # This lets us skip the next call (get called 2x for 2x width)
            self.double_width_left = True
        try:
            for callback in self.callbacks[CALLBACK_CURSOR_POS].values():
                callback()
        except TypeError:
            pass

Source File: roberteldersoftwarediff.py From roberteldersoftwarediff with Apache License 2.0

6 votes

def get_east_asian_width(unicode_str):
    r = unicodedata.east_asian_width(unicode_str)
    if r == "F":    #  Fullwidth
        return 1
    elif r == "H":  #  Half-width
        return 1
    elif r == "W":  #  Wide
        return 2
    elif r == "Na": #  Narrow
        return 1
    elif r == "A":  #  Ambiguous, go with 2
        return 1
    elif r == "N":  #  Neutral
        return 1
    else:
        return 1

Source File: ncm2.py From ncm2 with MIT License

6 votes

def strdisplaywidth(self, s):
        def get_char_display_width(unicode_str):
            r = unicodedata.east_asian_width(unicode_str)
            if r == "F":  # Fullwidth
                return 1
            elif r == "H":  # Half-width
                return 1
            elif r == "W":  # Wide
                return 2
            elif r == "Na":  # Narrow
                return 1
            elif r == "A":  # Ambiguous, go with 2
                return 1
            elif r == "N":  # Neutral
                return 1
            else:
                return 1

        s = unicodedata.normalize('NFC', s)
        w = 0
        for c in s:
            w += get_char_display_width(c)
        return w

Source File: test_regressions.py From ironpython2 with Apache License 2.0

6 votes

def test_ipy2_gh357(self):
        """https://github.com/IronLanguages/ironpython2/issues/357"""

        import unicodedata

        if is_cli:
            self.assertEqual(unicodedata.name(u'\u4e2d'), '<CJK IDEOGRAPH, FIRST>..<CJK IDEOGRAPH, LAST>')
        else:
            self.assertEqual(unicodedata.name(u'\u4e2d'), 'CJK UNIFIED IDEOGRAPH-4E2D')

        self.assertRaises(ValueError, unicodedata.decimal, u'\u4e2d')
        self.assertEqual(unicodedata.decimal(u'\u4e2d', 0), 0)
        self.assertRaises(ValueError, unicodedata.digit, u'\u4e2d')
        self.assertEqual(unicodedata.digit(u'\u4e2d', 0), 0)
        self.assertRaises(ValueError, unicodedata.numeric, u'\u4e2d')
        self.assertEqual(unicodedata.numeric(u'\u4e2d', 0), 0)
        self.assertEqual(unicodedata.category(u'\u4e2d'), 'Lo')
        self.assertEqual(unicodedata.bidirectional(u'\u4e2d'), 'L')
        self.assertEqual(unicodedata.combining(u'\u4e2d'), 0)
        self.assertEqual(unicodedata.east_asian_width(u'\u4e2d'), 'W')
        self.assertEqual(unicodedata.mirrored(u'\u4e2d'), 0)
        self.assertEqual(unicodedata.decomposition(u'\u4e2d'), '')

Source File: terminalui.py From imaginary with MIT License

6 votes

def width(ch):
    """
    Compute the display width of the given character.

    Useful for cursor-repositioning tasks, however this is not entirely
    reliable since different terminal emulators have different behavior in
    this area.

    @see: U{http://unicode.org/reports/tr11/}

    @return: The width in 1/2 ems of the given single-length unicode string.
    @rtype: C{int}
    """
    widthSpecifier = unicodedata.east_asian_width(ch)
    try:
        return _widths[widthSpecifier]
    except KeyError:
        raise KeyError("%r has a width that is not supported: %s"
                       % (ch, widthSpecifier))

Source File: statemachine.py From faces with GNU General Public License v2.0

6 votes

def pad_double_width(self, pad_char):
        """
        Pad all double-width characters in self by appending `pad_char` to each.
        For East Asian language support.
        """
        if hasattr(unicodedata, 'east_asian_width'):
            east_asian_width = unicodedata.east_asian_width
        else:
            return                      # new in Python 2.4
        for i in range(len(self.data)):
            line = self.data[i]
            if isinstance(line, str):
                new = []
                for char in line:
                    new.append(char)
                    if east_asian_width(char) in 'WF': # 'W'ide & 'F'ull-width
                        new.append(pad_char)
                self.data[i] = ''.join(new)

Source File: statemachine.py From faces with GNU General Public License v2.0

6 votes

def pad_double_width(self, pad_char):
        """
        Pad all double-width characters in self by appending `pad_char` to each.
        For East Asian language support.
        """
        if hasattr(unicodedata, 'east_asian_width'):
            east_asian_width = unicodedata.east_asian_width
        else:
            return                      # new in Python 2.4
        for i in range(len(self.data)):
            line = self.data[i]
            if isinstance(line, unicode):
                new = []
                for char in line:
                    new.append(char)
                    if east_asian_width(char) in 'WF': # 'W'ide & 'F'ull-width
                        new.append(pad_char)
                self.data[i] = ''.join(new)

Source File: cpp_lint.py From Deep-Exemplar-based-Colorization with MIT License

6 votes

def GetLineWidth(line):
  """Determines the width of the line in column positions.

  Args:
    line: A string, which may be a Unicode string.

  Returns:
    The width of the line in column positions, accounting for Unicode
    combining characters and wide characters.
  """
  if isinstance(line, unicode):
    width = 0
    for uc in unicodedata.normalize('NFC', line):
      if unicodedata.east_asian_width(uc) in ('W', 'F'):
        width += 2
      elif not unicodedata.combining(uc):
        width += 1
    return width
  else:
    return len(line)

Source File: cpp_lint.py From Deep-Learning-Based-Structural-Damage-Detection with MIT License

6 votes

def GetLineWidth(line):
  """Determines the width of the line in column positions.

  Args:
    line: A string, which may be a Unicode string.

  Returns:
    The width of the line in column positions, accounting for Unicode
    combining characters and wide characters.
  """
  if six.PY2:
    if isinstance(line, unicode):
      width = 0
      for uc in unicodedata.normalize('NFC', line):
        if unicodedata.east_asian_width(uc) in ('W', 'F'):
          width += 2
        elif not unicodedata.combining(uc):
          width += 1
      return width
  return len(line)

Source File: cpplint.py From ecs with MIT License

6 votes

def GetLineWidth(line):
  """Determines the width of the line in column positions.

  Args:
    line: A string, which may be a Unicode string.

  Returns:
    The width of the line in column positions, accounting for Unicode
    combining characters and wide characters.
  """
  if isinstance(line, unicode):
    width = 0
    for uc in unicodedata.normalize('NFC', line):
      if unicodedata.east_asian_width(uc) in ('W', 'F'):
        width += 2
      elif not unicodedata.combining(uc):
        width += 1
    return width
  else:
    return len(line)

Source File: statemachine.py From deepWordBug with Apache License 2.0

6 votes

def pad_double_width(self, pad_char):
        """
        Pad all double-width characters in self by appending `pad_char` to each.
        For East Asian language support.
        """
        if hasattr(unicodedata, 'east_asian_width'):
            east_asian_width = unicodedata.east_asian_width
        else:
            return                      # new in Python 2.4
        for i in range(len(self.data)):
            line = self.data[i]
            if isinstance(line, str):
                new = []
                for char in line:
                    new.append(char)
                    if east_asian_width(char) in 'WF': # 'W'ide & 'F'ull-width
                        new.append(pad_char)
                self.data[i] = ''.join(new)

Source File: statemachine.py From AWS-Transit-Gateway-Demo-MultiAccount with MIT License

6 votes

def pad_double_width(self, pad_char):
        """
        Pad all double-width characters in self by appending `pad_char` to each.
        For East Asian language support.
        """
        if hasattr(unicodedata, 'east_asian_width'):
            east_asian_width = unicodedata.east_asian_width
        else:
            return                      # new in Python 2.4
        for i in range(len(self.data)):
            line = self.data[i]
            if isinstance(line, str):
                new = []
                for char in line:
                    new.append(char)
                    if east_asian_width(char) in 'WF': # 'W'ide & 'F'ull-width
                        new.append(pad_char)
                self.data[i] = ''.join(new)

Source File: statemachine.py From cadquery-freecad-module with GNU Lesser General Public License v3.0

6 votes

def pad_double_width(self, pad_char):
        """
        Pad all double-width characters in self by appending `pad_char` to each.
        For East Asian language support.
        """
        if hasattr(unicodedata, 'east_asian_width'):
            east_asian_width = unicodedata.east_asian_width
        else:
            return                      # new in Python 2.4
        for i in range(len(self.data)):
            line = self.data[i]
            if isinstance(line, unicode):
                new = []
                for char in line:
                    new.append(char)
                    if east_asian_width(char) in 'WF': # 'W'ide & 'F'ull-width
                        new.append(pad_char)
                self.data[i] = ''.join(new)

Source File: test_regressions.py From ironpython3 with Apache License 2.0

6 votes

def test_ipy2_gh357(self):
        """https://github.com/IronLanguages/ironpython2/issues/357"""

        import unicodedata

        if is_cli:
            self.assertEqual(unicodedata.name(u'\u4e2d'), '<CJK IDEOGRAPH, FIRST>..<CJK IDEOGRAPH, LAST>')
        else:
            self.assertEqual(unicodedata.name(u'\u4e2d'), 'CJK UNIFIED IDEOGRAPH-4E2D')

        self.assertRaises(ValueError, unicodedata.decimal, u'\u4e2d')
        self.assertEqual(unicodedata.decimal(u'\u4e2d', 0), 0)
        self.assertRaises(ValueError, unicodedata.digit, u'\u4e2d')
        self.assertEqual(unicodedata.digit(u'\u4e2d', 0), 0)
        self.assertRaises(ValueError, unicodedata.numeric, u'\u4e2d')
        self.assertEqual(unicodedata.numeric(u'\u4e2d', 0), 0)
        self.assertEqual(unicodedata.category(u'\u4e2d'), 'Lo')
        self.assertEqual(unicodedata.bidirectional(u'\u4e2d'), 'L')
        self.assertEqual(unicodedata.combining(u'\u4e2d'), 0)
        self.assertEqual(unicodedata.east_asian_width(u'\u4e2d'), 'W')
        self.assertEqual(unicodedata.mirrored(u'\u4e2d'), 0)
        self.assertEqual(unicodedata.decomposition(u'\u4e2d'), '')

Source File: statemachine.py From blackmamba with MIT License

6 votes

def pad_double_width(self, pad_char):
        """
        Pad all double-width characters in self by appending `pad_char` to each.
        For East Asian language support.
        """
        if hasattr(unicodedata, 'east_asian_width'):
            east_asian_width = unicodedata.east_asian_width
        else:
            return                      # new in Python 2.4
        for i in range(len(self.data)):
            line = self.data[i]
            if isinstance(line, str):
                new = []
                for char in line:
                    new.append(char)
                    if east_asian_width(char) in 'WF': # 'W'ide & 'F'ull-width
                        new.append(pad_char)
                self.data[i] = ''.join(new)

Source File: terminal.py From django-gateone with GNU General Public License v3.0

6 votes

def cursor_right(self, n=1):
        """ESCnC CUF (Cursor Forward)"""
        #logging.debug('cursor_right(%s)' % n)
        if not n:
            n = 1
        n = int(n)
        # This logic takes care of double-width unicode characters
        if self.double_width_right:
            self.double_width_right = False
            return
        self.cursorX += n
        try:
            char = self.screen[self.cursorY][self.cursorX]
        except IndexError: # Cursor is past the right-edge of the screen; ignore
            char = u' ' # This is a safe default/fallback
        if unicodedata.east_asian_width(char) == 'W':
            # This lets us skip the next call (get called 2x for 2x width)
            self.double_width_right = True
        try:
            for callback in self.callbacks[CALLBACK_CURSOR_POS].values():
                callback()
        except TypeError:
            pass

Source File: terminal.py From django-gateone with GNU General Public License v3.0

6 votes

def cursor_left(self, n=1):
        """ESCnD CUB (Cursor Back)"""
        # Commented out to save CPU (and the others below too)
        #logging.debug('cursor_left(%s)' % n)
        n = int(n)
        # This logic takes care of double-width unicode characters
        if self.double_width_left:
            self.double_width_left = False
            return
        self.cursorX = max(0, self.cursorX - n) # Ensures positive value
        try:
            char = self.screen[self.cursorY][self.cursorX]
        except IndexError: # Cursor is past the right-edge of the screen; ignore
            char = u' ' # This is a safe default/fallback
        if unicodedata.east_asian_width(char) == 'W':
            # This lets us skip the next call (get called 2x for 2x width)
            self.double_width_left = True
        try:
            for callback in self.callbacks[CALLBACK_CURSOR_POS].values():
                callback()
        except TypeError:
            pass

Source File: statemachine.py From aws-extender with MIT License

6 votes

def pad_double_width(self, pad_char):
        """
        Pad all double-width characters in self by appending `pad_char` to each.
        For East Asian language support.
        """
        if hasattr(unicodedata, 'east_asian_width'):
            east_asian_width = unicodedata.east_asian_width
        else:
            return                      # new in Python 2.4
        for i in range(len(self.data)):
            line = self.data[i]
            if isinstance(line, unicode):
                new = []
                for char in line:
                    new.append(char)
                    if east_asian_width(char) in 'WF': # 'W'ide & 'F'ull-width
                        new.append(pad_char)
                self.data[i] = ''.join(new)

Source File: statemachine.py From bash-lambda-layer with MIT License

6 votes

def pad_double_width(self, pad_char):
        """
        Pad all double-width characters in self by appending `pad_char` to each.
        For East Asian language support.
        """
        if hasattr(unicodedata, 'east_asian_width'):
            east_asian_width = unicodedata.east_asian_width
        else:
            return                      # new in Python 2.4
        for i in range(len(self.data)):
            line = self.data[i]
            if isinstance(line, str):
                new = []
                for char in line:
                    new.append(char)
                    if east_asian_width(char) in 'WF': # 'W'ide & 'F'ull-width
                        new.append(pad_char)
                self.data[i] = ''.join(new)

Source File: cpp_lint.py From mix-and-match with MIT License

6 votes

def GetLineWidth(line):
  """Determines the width of the line in column positions.

  Args:
    line: A string, which may be a Unicode string.

  Returns:
    The width of the line in column positions, accounting for Unicode
    combining characters and wide characters.
  """
  if isinstance(line, unicode):
    width = 0
    for uc in unicodedata.normalize('NFC', line):
      if unicodedata.east_asian_width(uc) in ('W', 'F'):
        width += 2
      elif not unicodedata.combining(uc):
        width += 1
    return width
  else:
    return len(line)

Source File: __init__.py From coffeegrindsize with MIT License

5 votes

def east_asian_len(data, encoding=None, ambiguous_width=1):
        """
        Calculate display width considering unicode East Asian Width
        """
        if isinstance(data, text_type):
            try:
                data = data.decode(encoding)
            except UnicodeError:
                pass
            return sum(_EAW_MAP.get(east_asian_width(c), ambiguous_width) for c in data)
        else:
            return len(data)

Source File: curses_util.py From ci_edit with Apache License 2.0

5 votes

def charWidth(ch, column, tabWidth=8):
        if ch == u"\t":
            return tabWidth - (column % tabWidth)
        elif ch == u"" or ch < u" ":
            return 0
        elif ch < u"ᄀ":
            # Optimization.
            return 1
        elif unicodedata.east_asian_width(ch) in (u"F", r"W"):
            return 2
        return 1

Source File: curses_util.py From ci_edit with Apache License 2.0

5 votes

def isDoubleWidth(ch):
        if ch == u"" or ch < u"ᄀ":
            # Optimization.
            return False
        width = unicodedata.east_asian_width(ch)
        if width in (u"F", u"W"):
            return True
        return False

Source File: curses_util.py From ci_edit with Apache License 2.0

5 votes

def isZeroWidth(ch):
        return ch == u"" or ch < u" "  #or unicodedata.east_asian_width(ch) == "N"

Source File: curses_util.py From ci_edit with Apache License 2.0

5 votes

def charWidth(ch, column, tabWidth=8):
        if ch == u"\t":
            return tabWidth - (column % tabWidth)
        elif ch == u"" or ch < u" ":
            return 0
        elif ch < u"ᄀ":
            # Optimization.
            return 1
        elif unicodedata.east_asian_width(ch) == u"W":
            return 2
        return 1

Source File: __init__.py From AWS-Transit-Gateway-Demo-MultiAccount with MIT License

5 votes

def column_width(text):
    """Return the column width of text.

    Correct ``len(text)`` for wide East Asian and combining Unicode chars.
    """
    if isinstance(text, str) and sys.version_info < (3,0):
        return len(text)
    try:
        width = sum([east_asian_widths[unicodedata.east_asian_width(c)]
                     for c in text])
    except AttributeError:  # east_asian_width() New in version 2.4.
        width = len(text)
    # correction for combining chars:
    width -= len(find_combining_chars(text))
    return width

Source File: string.py From stig with GNU General Public License v3.0

5 votes

def strwidth(string):
    """Return displayed width of `string`, considering wide characters"""
    return len(string) + sum(1 for char in string
                             if _east_asian_width(char) in 'FW')

Source File: string.py From stig with GNU General Public License v3.0

5 votes

def strcrop(string, width, tail=None):
    """Return `string` cropped to `width`, considering wide characters

    If `tail` is not None, it must be a string that is appended to the cropped
    string.
    """
    def widechar_indexes(s):
        for i,c in enumerate(s):
            if _east_asian_width(c) in 'FW':
                yield i

    if strwidth(string) <= width:
        return string  # string is already short enough

    if tail is not None:
        width -= strwidth(tail)  # Account for tail in final width

    indexes = list(widechar_indexes(string)) + [len(string)]
    if not indexes:
        return string[:width]  # No wide chars, regular cropping is ok

    parts = []
    start = 0
    end = 0
    currwidth = strwidth(''.join(parts))

    while indexes and currwidth < width and end < len(string):
        end = indexes.pop(0)
        if end > 0:
            parts.append(string[start:end])
            currwidth = strwidth(''.join(parts))
            start = end

    if currwidth > width:
        excess = currwidth - width
        parts[-1] = parts[-1][:-excess]

    if tail is not None:
        parts.append(tail)

    return ''.join(parts)

Python unicodedata.east_asian_width() Examples