Python emoji.UNICODE_EMOJI Examples
The following are 20
code examples of emoji.UNICODE_EMOJI().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
emoji
, or try the search function
.
Example #1
Source File: emojireact.py From Trusty-cogs-archive with MIT License | 6 votes |
def on_message(self, message): channel = message.channel if message.server.id not in self.settings: return if not self.settings[message.server.id]: return emoji_list = [] for word in message.content.split(" "): if word.startswith("<:") and word.endswith(">"): emoji_list.append(word.rpartition(">")[0].partition("<")[2]) if word in UNICODE_EMOJI: emoji_list.append(word) if emoji_list == []: return for emoji in emoji_list: try: await self.bot.add_reaction(message, emoji) except: pass
Example #2
Source File: filter_utils.py From neural_chat with MIT License | 6 votes |
def separate_emojis_and_text(text): emoji_chars = [] non_emoji_chars = [] for c in text: if c in emoji.UNICODE_EMOJI: emoji_chars.append(c) else: non_emoji_chars.append(c) return ''.join(emoji_chars), ''.join(non_emoji_chars)
Example #3
Source File: png.py From wttr.in with Apache License 2.0 | 6 votes |
def _script_category(char): """Returns category of a Unicode character Possible values: default, Cyrillic, Greek, Han, Hiragana """ if char in emoji.UNICODE_EMOJI: return "Emoji" cat = unicodedata2.script_cat(char)[0] if char == u':': return 'Han' if cat in ['Latin', 'Common']: return 'default' return cat
Example #4
Source File: filter_input.py From neural_chat with MIT License | 5 votes |
def read_english(path="english_words.txt", add_emojis=True): # read english words for filtering (includes emojis as part of set) english = set() with codecs.open(path, "r", "utf-8") as f: for line in f: line = line.strip().lower().replace('\n', '') if len(line): english.add(line) if add_emojis: for e in UNICODE_EMOJI: english.add(e) return english
Example #5
Source File: preprocess.py From project-purifier with Apache License 2.0 | 5 votes |
def preprocess_string(text): """ 입력받은 text 를 전처리 하는 함수. :param text: str :return : str """ # 이모티콘부터 제거 no_emoticon = '' for char in text: if char not in emoji.UNICODE_EMOJI: no_emoticon += char # 특수문자 기준 split no_punctuation = re.split(r'([!,?]+)|([.]+)|([,]+)|(["])|([\'])|([&]+)|([(]+)|([)]+)|([~]+)|([♡]+)|([☆,★]+)', no_emoticon.strip()) no_punctuation_text = [] for string in no_punctuation: if (string == '') or (string is None): continue no_punctuation_text.append(string) no_punctuation_text = ' '.join(no_punctuation_text) # 단독으로 쓰인 자모음 분리 split_char = re.split(r'([ㄱ-ㅣ0-9]+)', no_punctuation_text.strip()) split_char = ' '.join(split_char) # 한국어에서 단독으로 자주 쓰이는 자모음 뭉치 분리 split_char = re.split(r'([ㅎ]{2,})|([ㅜ,ㅠ]{2,})|([ㅗ]+)|([ㅋ,ㄱ,ㄲ]{2,})|\s+', split_char.strip()) final_text = [] for string in split_char: if (string == '') or (string is None): continue final_text.append(string) return ' '.join(final_text)
Example #6
Source File: chatline.py From WhatsApp-Analyzer with MIT License | 5 votes |
def extract_emojis(self, string=""): emj = [] for c in string: if c in emoji.UNICODE_EMOJI: emj.append(c) return emj
Example #7
Source File: filter_utils.py From ELSA with MIT License | 5 votes |
def separate_emojis_and_text(text): emoji_chars = [] non_emoji_chars = [] for c in text: if c in emoji.UNICODE_EMOJI: emoji_chars.append(c) else: non_emoji_chars.append(c) return ''.join(emoji_chars), ''.join(non_emoji_chars)
Example #8
Source File: data_processing.py From Sarcasm-Detection with MIT License | 5 votes |
def check_if_emoji(word, emoji_dict): emojis = list(word) for em in emojis: if em in emoji_dict.keys() or em in emoji.UNICODE_EMOJI: return True return False # A strict clean of the twitter data - removing emojis, hashtags, URLs, user mentions
Example #9
Source File: data_processing.py From Sarcasm-Detection with MIT License | 5 votes |
def clean_tweet(tweet, word_list, split_hashtag_method, replace_user_mentions=True, remove_hashtags=False, remove_emojis=False, all_to_lower_case=False): # Add white space before every punctuation sign so that we can split around it and keep it tweet = re.sub('([!?*&%"~`^+{}])', r' \1 ', tweet) tweet = re.sub('\s{2,}', ' ', tweet) tokens = tweet.split() valid_tokens = [] for word in tokens: # Never include #sarca* hashtags if word.lower().startswith('#sarca'): continue # Never include URLs if 'http' in word: continue # Replace specific user mentions with a general user name if replace_user_mentions and word.startswith('@'): word = '@user' # Split or remove hashtags if word.startswith('#'): if remove_hashtags: continue splits = split_hashtag_method(word[1:], word_list) if all_to_lower_case: valid_tokens.extend([split.lower() for split in splits]) else: valid_tokens.extend(splits) continue if remove_emojis and word in emoji.UNICODE_EMOJI: continue if all_to_lower_case: word = word.lower() valid_tokens.append(word) return ' '.join(valid_tokens)
Example #10
Source File: data_processing.py From Sarcasm-Detection with MIT License | 5 votes |
def process_emojis(word, emoji_dict, translate_emojis=True): processed = [] chars = list(word) remaining = "" for c in chars: if c in emoji_dict.keys() or c in emoji.UNICODE_EMOJI: if remaining != "": processed.append(remaining) remaining = "" if translate_emojis: if c in emoji_dict: processed.extend(emoji_dict[c][3].lower().split()) else: processed.extend(c) else: remaining += c if remaining != "": processed.append(remaining) if processed != []: return ' '.join(processed) else: return word # TODO: Numerals - sarcasm heavily relies on them so find a way to extract meaning behind numbers # Attempt to clean each tweet and make it as grammatical as possible
Example #11
Source File: data_processing.py From Sarcasm-Detection with MIT License | 5 votes |
def extract_emojis(tweets): emojis = [] for tw in tweets: tw_emojis = [] for word in tw: chars = list(word) for ch in chars: if ch in emoji.UNICODE_EMOJI: tw_emojis.append(ch) emojis.append(' '.join(tw_emojis)) return emojis # Replace a contraction (coming from possessives, verbs, emphasis or just bad language) by its longer form
Example #12
Source File: extract_ml_features.py From Sarcasm-Detection with MIT License | 5 votes |
def get_pragmatic_features(tweet_tokens): capitalized_words = user_specific = intensifiers = tweet_len_ch = 0 for t in tweet_tokens: tweet_len_ch += len(t) if t.isupper() and len(t) > 1: capitalized_words += 1 # count of capitalized words if t.startswith("@"): user_specific += 1 # count of user mentions if t.startswith("#"): user_specific += 1 # count-based feature of hashtags used (excluding sarcasm or sarcastic) if t.lower().startswith("haha") or re.match('l(o)+l$', t.lower()): user_specific += 1 # binary feature marking the presence of laughter if t in helper.strong_negations: intensifiers += 1 # count-based feature of strong negations if t in helper.strong_affirmatives: intensifiers += 1 # count-based feature of strong affirmatives if t in helper.interjections: intensifiers += 1 # count-based feature of relevant interjections if t in helper.intensifiers: intensifiers += 1 # count-based feature of relevant intensifiers if t in helper.punctuation: user_specific += 1 # count-based feature of relevant punctuation signs if t in emoji.UNICODE_EMOJI: user_specific += 1 # count-based feature of emojis tweet_len_tokens = len(tweet_tokens) # get the length of the tweet in tokens average_token_length = float(tweet_len_tokens) / max(1.0, float(tweet_len_ch)) # average tweet length feature_list = {'tw_len_ch': tweet_len_ch, 'tw_len_tok': tweet_len_tokens, 'avg_len': average_token_length, 'capitalized': capitalized_words, 'user_specific': user_specific, 'intensifiers': intensifiers} return feature_list # Extract the n-grams (specified as a list n = [1, 2, 3, ...]) # e.g if n = [1,2,3] then n-gram_features is a dictionary of all uni-grams, bi-grams and tri-grams # This n-gram extractor works for any kind of tokens i.e both words and pos tags
Example #13
Source File: utils.py From fontObfuscator with MIT License | 5 votes |
def str_has_emoji(s: str) -> bool: for character in s: if character in emoji.UNICODE_EMOJI: return True return False
Example #14
Source File: candidate_data_fetcher.py From BLINK with MIT License | 5 votes |
def get_data_for_entity(self, entity_data): """Given an entity data dictionary that contains some linking data (ex. title or ID), additional information (ex. description, aliases etc.) is added to the given entity dictionary""" data = self.data title = entity_data["wikipedia_title"] if "wikidata_info" in data[title]: if ("aliases" in data[title]["wikidata_info"]) and ( data[title]["wikidata_info"]["aliases"] ) is not None: aliases = [ alias for alias in data[title]["wikidata_info"]["aliases"] if alias not in emoji.UNICODE_EMOJI ] else: aliases = None else: aliases = None entity_data["aliases"] = aliases sents = [] for k in range(0, 10): key = "sent_desc_{}".format(k + 1) sents.append(data[title].get(key, "")) entity_data["sentences"] = sents return entity_data
Example #15
Source File: structure_tools.py From message-analyser with MIT License | 5 votes |
def get_emoji_countered(msgs): """Counts all emojis in messages. Args: msgs (list of MyMessage objects): Messages. Returns: collections.Counter of emojis. """ cnt = Counter() for msg in msgs: for character in msg.text: if character in emoji.UNICODE_EMOJI: cnt[character] += 1 return cnt
Example #16
Source File: bot.py From modmail with GNU Affero General Public License v3.0 | 5 votes |
def convert_emoji(self, name: str) -> str: ctx = SimpleNamespace(bot=self, guild=self.modmail_guild) converter = commands.EmojiConverter() if name not in UNICODE_EMOJI: try: name = await converter.convert(ctx, name.strip(":")) except commands.BadArgument as e: logger.warning("%s is not a valid emoji. %s.", e) raise return name
Example #17
Source File: filter_input.py From DeepMoji with MIT License | 5 votes |
def read_english(path="english_words.txt", add_emojis=True): # read english words for filtering (includes emojis as part of set) english = set() with codecs.open(path, "r", "utf-8") as f: for line in f: line = line.strip().lower().replace('\n', '') if len(line): english.add(line) if add_emojis: for e in UNICODE_EMOJI: english.add(e) return english
Example #18
Source File: filter_utils.py From DeepMoji with MIT License | 5 votes |
def separate_emojis_and_text(text): emoji_chars = [] non_emoji_chars = [] for c in text: if c in emoji.UNICODE_EMOJI: emoji_chars.append(c) else: non_emoji_chars.append(c) return ''.join(emoji_chars), ''.join(non_emoji_chars)
Example #19
Source File: get_tweet.py From Dialog with MIT License | 5 votes |
def screening(text): s = text if s[0:3] == "RT ": s = s.replace(s[0:3], "") while s.find("@") != -1: index_at = s.find("@") if s.find(" ") != -1: index_sp = s.find(" ", index_at) if index_sp != -1: s = s.replace(s[index_at:index_sp + 1], "") else: s = s.replace(s[index_at:], "") else: s = s.replace(s[index_at:], "") while s.find("\n") != -1: index_ret = s.find("\n") s = s.replace(s[index_ret], "") s = s.replace('\n', '') s = re.sub(r'https?://[\w/:%#\$&\?\(\)~\.=\+\-…]+', "", s) non_bmp_map = dict.fromkeys(range(0x10000, sys.maxunicode + 1), '') s = s.translate(non_bmp_map) s = ''.join(c if c not in emoji.UNICODE_EMOJI else '' for c in s) s = re.sub('。+', '。', s) while s.find('#') != -1: index_hash = s.find('#') s = s[0:index_hash] s = neologdn.normalize(s, repeat=4) s = re.sub(r'[^、。!?ー〜1-9a-zA-Zぁ-んァ-ヶ亜-腕纊-黑一-鿕]', '', s) return s
Example #20
Source File: data_ingestion.py From BLINK with MIT License | 4 votes |
def get_data_for_key(data, title): obj = {} obj["id"] = data[title]["wikipedia_id"] obj["title"] = title if ("wikidata_info" in data[title]) and ( data[title]["wikidata_info"]["wikidata_id"] is not None ): obj["wikidata_id"] = data[title]["wikidata_info"]["wikidata_id"] else: obj["wikidata_id"] = data[title]["wikidata_id_from_index"] description = data[title]["intro_concatenated"] obj["desc"] = description if "wikidata_info" in data[title]: if "description" in data[title]["wikidata_info"]: wikidata_description = data[title]["wikidata_info"]["description"] else: wikidata_description = "" if ("aliases" in data[title]["wikidata_info"]) and ( data[title]["wikidata_info"]["aliases"] ) is not None: aliases = " ".join( [ '"{}"'.format(alias) for alias in data[title]["wikidata_info"]["aliases"] if alias not in emoji.UNICODE_EMOJI ] ) else: aliases = "" else: aliases = "" wikidata_description = "" obj["aliases"] = aliases obj["wikidata_desc"] = wikidata_description obj["num_tokens"] = data[title]["num_tokens"] obj["num_incoming_links"] = data[title].get("num_incoming_links", 0) if args.add_sentence_data: for k in range(0, 10): key = "sent_desc_{}".format(k + 1) obj[key] = data[title].get(key, "") return obj