Python remove accents
13 Python code examples are found related to "
remove accents".
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: linkedin2username.py From linkedin2username with MIT License | 10 votes |
def remove_accents(raw_text): """Removes common accent characters. Our goal is to brute force login mechanisms, and I work primary with companies deploying Engligh-language systems. From my experience, user accounts tend to be created without special accented characters. This function tries to swap those out for standard Engligh alphabet. """ raw_text = re.sub(u"[àáâãäå]", 'a', raw_text) raw_text = re.sub(u"[èéêë]", 'e', raw_text) raw_text = re.sub(u"[ìíîï]", 'i', raw_text) raw_text = re.sub(u"[òóôõö]", 'o', raw_text) raw_text = re.sub(u"[ùúûü]", 'u', raw_text) raw_text = re.sub(u"[ýÿ]", 'y', raw_text) raw_text = re.sub(u"[ß]", 'ss', raw_text) raw_text = re.sub(u"[ñ]", 'n', raw_text) return raw_text
Example 2
Source File: util.py From urduhack with MIT License | 7 votes |
def remove_accents(text: str) -> str: """ Remove accents from any accented unicode characters in ``text`` str, either by transforming them into ascii equivalents or removing them entirely. Args: text (str): Urdu text Returns: str Examples: >>> from urduhack.preprocessing import remove_accents >>>text = "دالتِ عظمیٰ درخواست" >>> remove_accents(text) 'دالت عظمی درخواست' """ return ''.join(c for c in text if not unicodedata.combining(c))
Example 3
Source File: base.py From pyfiscal with MIT License | 6 votes |
def remove_accents(self, text): """ Normalise (normalize) unicode data in Python to remove umlauts, accents etc. Rule 10 - When special characters appear as part of the name, paternal surname and maternal surname, they must be excluded for the calculation of the homonym and the verification digit. The characters will be interpreted, yes and only if, they are individually within the name, paternal surname and maternal surname. Examples: Roberto O’farril Carballo OACR-661121 Rubén D’angelo Fargo DAFR-710108 Luz Ma. Fernández Juárez FEJL-830120 """ #s_no_accents = ''.join((c for c in unicodedata.normalize('NFD', text) if unicodedata.category(c) != 'Mn')) try: text = unicode(text, 'utf-8') except (TypeError, NameError): # unicode is a default on python 3 pass text = unicodedata.normalize('NFD', text) text = text.encode('ascii', 'ignore') text = text.decode("utf-8") return str(text)
Example 4
Source File: text_manipulation.py From text-mining-class with MIT License | 6 votes |
def remove_accents(text): """Replace accentuated characters by their non-accentuated counterparts A simple way to do this would be to decompose accentuated characters in the sequence using one of the unicode decomposition schemes and then filter the resulting sequence to remove combining characters (also known as diacritical marks). Comments: the following solution is a very naive implementation of that only uses basic operations on the sequence of unicode characters. A more efficient approach that works only for languages that use the latin alphabet would use batch conversion to ASCII characters as done in: sklearn.feature_extraction.text.strip_accents_ascii """ text = unicodedata.normalize('NFKD', text) return "".join([c for c in text if not unicodedata.combining(c)])
Example 5
Source File: tv_grab_fetch.py From tvgrabpyAPI with GNU General Public License v3.0 | 6 votes |
def remove_accents(self, name): name = re.sub('á','a', name) name = re.sub('é','e', name) name = re.sub('í','i', name) name = re.sub('ó','o', name) name = re.sub('ú','u', name) name = re.sub('ý','y', name) name = re.sub('à','a', name) name = re.sub('è','e', name) name = re.sub('ì','i', name) name = re.sub('ò','o', name) name = re.sub('ù','u', name) name = re.sub('ä','a', name) name = re.sub('ë','e', name) name = re.sub('ï','i', name) name = re.sub('ö','o', name) name = re.sub('ü','u', name) name = re.sub('ÿ','y', name) name = re.sub('â','a', name) name = re.sub('ê','e', name) name = re.sub('î','i', name) name = re.sub('ô','o', name) name = re.sub('û','u', name) name = re.sub('ã','a', name) name = re.sub('õ','o', name) name = re.sub('@','a', name) return name # end remove_accents()
Example 6
Source File: compat.py From razzy-spinner with GNU General Public License v3.0 | 5 votes |
def remove_accents(text): if isinstance(text, bytes): text = text.decode('ascii') category = unicodedata.category # this gives a small (~10%) speedup return ''.join( c for c in unicodedata.normalize('NFKD', text) if category(c) != 'Mn' ) # Select the best transliteration method:
Example 7
Source File: clean.py From cleanco with MIT License | 5 votes |
def remove_accents(t): """based on https://stackoverflow.com/a/51230541""" nfkd_form = unicodedata.normalize('NFKD', t.casefold()) return ''.join( NON_NFKD_MAP[c] if c in NON_NFKD_MAP else c for part in nfkd_form for c in part if unicodedata.category(part) != 'Mn' )
Example 8
Source File: plugin.py From limnoria-plugins with Do What The F*ck You Want To Public License | 5 votes |
def removeAccents(self, text): text = str(text) normalized = unicodedata.normalize("NFKD", text) normalized = "".join([c for c in normalized if not unicodedata.combining(c)]) return normalized
Example 9
Source File: normalize.py From script.elementum.burst with Do What The F*ck You Want To Public License | 5 votes |
def remove_accents(string): """ Remove any accent in the string :param string: string to remove accents :type string: str or unicode :return: string without accents :rtype: unicode """ if not isinstance(string, unicode): string = normalize_string(string) nfkd_form = unicodedata.normalize('NFKD', string) only_ascii = nfkd_form.encode('ASCII', 'ignore').strip() return string if only_ascii == u'' else only_ascii
Example 10
Source File: simpletokenizer.py From Poetry-Tools with MIT License | 5 votes |
def remove_accents(string): """ Removes unicode accents from a string, downgrading to the base character """ nfkd = unicodedata.normalize('NFKD', string) return u"".join([c for c in nfkd if not unicodedata.combining(c)])
Example 11
Source File: search_util.py From python-compat-runtime with Apache License 2.0 | 5 votes |
def RemoveAccents(text): if not isinstance(text, basestring): return text if isinstance(text, str): text = text.decode('utf-8') return u''.join([c for c in text if not unicodedata.combining(c)])
Example 12
Source File: search_util.py From python-compat-runtime with Apache License 2.0 | 5 votes |
def RemoveAccentsNfkd(text): if not isinstance(text, basestring): return text if isinstance(text, str): text = text.decode('utf-8') return u''.join([c for c in unicodedata.normalize('NFKD', text) if not unicodedata.combining(c)])
Example 13
Source File: compat.py From V1EngineeringInc-Docs with Creative Commons Attribution Share Alike 4.0 International | 4 votes |
def remove_accents(text): if isinstance(text, bytes): text = text.decode('ascii') category = unicodedata.category # this gives a small (~10%) speedup return ''.join( c for c in unicodedata.normalize('NFKD', text) if category(c) != 'Mn' ) # Select the best transliteration method: