Python emoji.UNICODE_EMOJI Examples

The following are 20 code examples of emoji.UNICODE_EMOJI(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module emoji , or try the search function .
Example #1
Source File: emojireact.py    From Trusty-cogs-archive with MIT License 6 votes vote down vote up
def on_message(self, message):
        channel = message.channel
        if message.server.id not in self.settings:
            return
        if not self.settings[message.server.id]:
            return
        emoji_list = []
        for word in message.content.split(" "):
            if word.startswith("<:") and word.endswith(">"):
                emoji_list.append(word.rpartition(">")[0].partition("<")[2])
            if word in UNICODE_EMOJI:
                emoji_list.append(word)
        if emoji_list == []:
            return
        for emoji in emoji_list:
            try:
                await self.bot.add_reaction(message, emoji)
            except:
                pass 
Example #2
Source File: filter_utils.py    From neural_chat with MIT License 6 votes vote down vote up
def separate_emojis_and_text(text):
    emoji_chars = []
    non_emoji_chars = []
    for c in text:
        if c in emoji.UNICODE_EMOJI:
            emoji_chars.append(c)
        else:
            non_emoji_chars.append(c)
    return ''.join(emoji_chars), ''.join(non_emoji_chars) 
Example #3
Source File: png.py    From wttr.in with Apache License 2.0 6 votes vote down vote up
def _script_category(char):
    """Returns category of a Unicode character

    Possible values:
        default, Cyrillic, Greek, Han, Hiragana
    """

    if char in emoji.UNICODE_EMOJI:
        return "Emoji"

    cat = unicodedata2.script_cat(char)[0]
    if char == u':':
        return 'Han'
    if cat in ['Latin', 'Common']:
        return 'default'
    return cat 
Example #4
Source File: filter_input.py    From neural_chat with MIT License 5 votes vote down vote up
def read_english(path="english_words.txt", add_emojis=True):
    # read english words for filtering (includes emojis as part of set)
    english = set()
    with codecs.open(path, "r", "utf-8") as f:
        for line in f:
            line = line.strip().lower().replace('\n', '')
            if len(line):
                english.add(line)
    if add_emojis:
        for e in UNICODE_EMOJI:
            english.add(e)
    return english 
Example #5
Source File: preprocess.py    From project-purifier with Apache License 2.0 5 votes vote down vote up
def preprocess_string(text):
    """
    입력받은 text 를 전처리 하는 함수.

    :param text: str
    :return : str

    """

    # 이모티콘부터 제거
    no_emoticon = ''
    for char in text:
        if char not in emoji.UNICODE_EMOJI:
            no_emoticon += char

    # 특수문자 기준 split
    no_punctuation = re.split(r'([!,?]+)|([.]+)|([,]+)|(["])|([\'])|([&]+)|([(]+)|([)]+)|([~]+)|([♡]+)|([☆,★]+)',
                              no_emoticon.strip())
    no_punctuation_text = []

    for string in no_punctuation:
        if (string == '') or (string is None): continue
        no_punctuation_text.append(string)

    no_punctuation_text = ' '.join(no_punctuation_text)

    # 단독으로 쓰인 자모음 분리
    split_char = re.split(r'([ㄱ-ㅣ0-9]+)', no_punctuation_text.strip())
    split_char = ' '.join(split_char)

    # 한국어에서 단독으로 자주 쓰이는 자모음 뭉치 분리
    split_char = re.split(r'([ㅎ]{2,})|([ㅜ,ㅠ]{2,})|([ㅗ]+)|([ㅋ,ㄱ,ㄲ]{2,})|\s+', split_char.strip())
    final_text = []
    for string in split_char:
        if (string == '') or (string is None): continue
        final_text.append(string)

    return ' '.join(final_text) 
Example #6
Source File: chatline.py    From WhatsApp-Analyzer with MIT License 5 votes vote down vote up
def extract_emojis(self, string=""):
        emj = []
        for c in string:
            if c in emoji.UNICODE_EMOJI:
                emj.append(c)
        return emj 
Example #7
Source File: filter_utils.py    From ELSA with MIT License 5 votes vote down vote up
def separate_emojis_and_text(text):
    emoji_chars = []
    non_emoji_chars = []
    for c in text:
        if c in emoji.UNICODE_EMOJI:
            emoji_chars.append(c)
        else:
            non_emoji_chars.append(c)
    return ''.join(emoji_chars), ''.join(non_emoji_chars) 
Example #8
Source File: data_processing.py    From Sarcasm-Detection with MIT License 5 votes vote down vote up
def check_if_emoji(word, emoji_dict):
    emojis = list(word)
    for em in emojis:
        if em in emoji_dict.keys() or em in emoji.UNICODE_EMOJI:
            return True
    return False


# A strict clean of the twitter data - removing emojis, hashtags, URLs, user mentions 
Example #9
Source File: data_processing.py    From Sarcasm-Detection with MIT License 5 votes vote down vote up
def clean_tweet(tweet, word_list, split_hashtag_method, replace_user_mentions=True,
                remove_hashtags=False, remove_emojis=False, all_to_lower_case=False):
    # Add white space before every punctuation sign so that we can split around it and keep it
    tweet = re.sub('([!?*&%"~`^+{}])', r' \1 ', tweet)
    tweet = re.sub('\s{2,}', ' ', tweet)
    tokens = tweet.split()
    valid_tokens = []
    for word in tokens:
        # Never include #sarca* hashtags
        if word.lower().startswith('#sarca'):
            continue
        # Never include URLs
        if 'http' in word:
            continue
        # Replace specific user mentions with a general user name
        if replace_user_mentions and word.startswith('@'):
            word = '@user'
        # Split or remove hashtags
        if word.startswith('#'):
            if remove_hashtags:
                continue
            splits = split_hashtag_method(word[1:], word_list)
            if all_to_lower_case:
                valid_tokens.extend([split.lower() for split in splits])
            else:
                valid_tokens.extend(splits)
            continue
        if remove_emojis and word in emoji.UNICODE_EMOJI:
            continue
        if all_to_lower_case:
            word = word.lower()
        valid_tokens.append(word)
    return ' '.join(valid_tokens) 
Example #10
Source File: data_processing.py    From Sarcasm-Detection with MIT License 5 votes vote down vote up
def process_emojis(word, emoji_dict, translate_emojis=True):
    processed = []
    chars = list(word)
    remaining = ""
    for c in chars:
        if c in emoji_dict.keys() or c in emoji.UNICODE_EMOJI:
            if remaining != "":
                processed.append(remaining)
                remaining = ""
            if translate_emojis:
                if c in emoji_dict:
                    processed.extend(emoji_dict[c][3].lower().split())
            else:
                processed.extend(c)
        else:
            remaining += c
    if remaining != "":
        processed.append(remaining)
    if processed != []:
        return ' '.join(processed)
    else:
        return word


# TODO: Numerals - sarcasm heavily relies on them so find a way to extract meaning behind numbers
# Attempt to clean each tweet and make it as grammatical as possible 
Example #11
Source File: data_processing.py    From Sarcasm-Detection with MIT License 5 votes vote down vote up
def extract_emojis(tweets):
    emojis = []
    for tw in tweets:
        tw_emojis = []
        for word in tw:
            chars = list(word)
            for ch in chars:
                if ch in emoji.UNICODE_EMOJI:
                    tw_emojis.append(ch)
        emojis.append(' '.join(tw_emojis))
    return emojis


# Replace a contraction (coming from possessives, verbs, emphasis or just bad language) by its longer form 
Example #12
Source File: extract_ml_features.py    From Sarcasm-Detection with MIT License 5 votes vote down vote up
def get_pragmatic_features(tweet_tokens):
    capitalized_words = user_specific = intensifiers = tweet_len_ch = 0
    for t in tweet_tokens:
        tweet_len_ch += len(t)
        if t.isupper() and len(t) > 1:
            capitalized_words += 1       # count of capitalized words
        if t.startswith("@"):
            user_specific += 1          # count of user mentions
        if t.startswith("#"):
            user_specific += 1          # count-based feature of hashtags used (excluding sarcasm or sarcastic)
        if t.lower().startswith("haha") or re.match('l(o)+l$', t.lower()):
            user_specific += 1          # binary feature marking the presence of laughter
        if t in helper.strong_negations:
            intensifiers += 1           # count-based feature of strong negations
        if t in helper.strong_affirmatives:
            intensifiers += 1           # count-based feature of strong affirmatives
        if t in helper.interjections:
            intensifiers += 1           # count-based feature of relevant interjections
        if t in helper.intensifiers:
            intensifiers += 1           # count-based feature of relevant intensifiers
        if t in helper.punctuation:
            user_specific += 1          # count-based feature of relevant punctuation signs
        if t in emoji.UNICODE_EMOJI:
            user_specific += 1          # count-based feature of emojis
    tweet_len_tokens = len(tweet_tokens)  # get the length of the tweet in tokens
    average_token_length = float(tweet_len_tokens) / max(1.0, float(tweet_len_ch))  # average tweet length
    feature_list = {'tw_len_ch': tweet_len_ch, 'tw_len_tok': tweet_len_tokens, 'avg_len': average_token_length,
                    'capitalized': capitalized_words, 'user_specific': user_specific, 'intensifiers': intensifiers}
    return feature_list


# Extract the n-grams (specified as a list n = [1, 2, 3, ...])
# e.g if n = [1,2,3] then n-gram_features is a dictionary of all uni-grams, bi-grams and tri-grams
# This n-gram extractor works for any kind of tokens i.e both words and pos tags 
Example #13
Source File: utils.py    From fontObfuscator with MIT License 5 votes vote down vote up
def str_has_emoji(s: str) -> bool:
    for character in s:
        if character in emoji.UNICODE_EMOJI:
            return True
    return False 
Example #14
Source File: candidate_data_fetcher.py    From BLINK with MIT License 5 votes vote down vote up
def get_data_for_entity(self, entity_data):
        """Given an entity data dictionary that contains some linking data (ex. title or ID), additional information (ex. description, aliases etc.) is added to the given entity dictionary"""
        data = self.data
        title = entity_data["wikipedia_title"]

        if "wikidata_info" in data[title]:
            if ("aliases" in data[title]["wikidata_info"]) and (
                data[title]["wikidata_info"]["aliases"]
            ) is not None:
                aliases = [
                    alias
                    for alias in data[title]["wikidata_info"]["aliases"]
                    if alias not in emoji.UNICODE_EMOJI
                ]
            else:
                aliases = None
        else:
            aliases = None

        entity_data["aliases"] = aliases

        sents = []

        for k in range(0, 10):
            key = "sent_desc_{}".format(k + 1)
            sents.append(data[title].get(key, ""))

        entity_data["sentences"] = sents

        return entity_data 
Example #15
Source File: structure_tools.py    From message-analyser with MIT License 5 votes vote down vote up
def get_emoji_countered(msgs):
    """Counts all emojis in messages.

    Args:
        msgs (list of MyMessage objects): Messages.

    Returns:
        collections.Counter of emojis.
    """
    cnt = Counter()
    for msg in msgs:
        for character in msg.text:
            if character in emoji.UNICODE_EMOJI:
                cnt[character] += 1
    return cnt 
Example #16
Source File: bot.py    From modmail with GNU Affero General Public License v3.0 5 votes vote down vote up
def convert_emoji(self, name: str) -> str:
        ctx = SimpleNamespace(bot=self, guild=self.modmail_guild)
        converter = commands.EmojiConverter()

        if name not in UNICODE_EMOJI:
            try:
                name = await converter.convert(ctx, name.strip(":"))
            except commands.BadArgument as e:
                logger.warning("%s is not a valid emoji. %s.", e)
                raise
        return name 
Example #17
Source File: filter_input.py    From DeepMoji with MIT License 5 votes vote down vote up
def read_english(path="english_words.txt", add_emojis=True):
    # read english words for filtering (includes emojis as part of set)
    english = set()
    with codecs.open(path, "r", "utf-8") as f:
        for line in f:
            line = line.strip().lower().replace('\n', '')
            if len(line):
                english.add(line)
    if add_emojis:
        for e in UNICODE_EMOJI:
            english.add(e)
    return english 
Example #18
Source File: filter_utils.py    From DeepMoji with MIT License 5 votes vote down vote up
def separate_emojis_and_text(text):
    emoji_chars = []
    non_emoji_chars = []
    for c in text:
        if c in emoji.UNICODE_EMOJI:
            emoji_chars.append(c)
        else:
            non_emoji_chars.append(c)
    return ''.join(emoji_chars), ''.join(non_emoji_chars) 
Example #19
Source File: get_tweet.py    From Dialog with MIT License 5 votes vote down vote up
def screening(text):
    s = text

    if s[0:3] == "RT ":
        s = s.replace(s[0:3], "")
    while s.find("@") != -1:
        index_at = s.find("@")
        if s.find(" ") != -1:
            index_sp = s.find(" ", index_at)
            if index_sp != -1:
                s = s.replace(s[index_at:index_sp + 1], "")
            else:
                s = s.replace(s[index_at:], "")
        else:
            s = s.replace(s[index_at:], "")

    while s.find("\n") != -1:
        index_ret = s.find("\n")
        s = s.replace(s[index_ret], "")
    s = s.replace('\n', '')

    s = re.sub(r'https?://[\w/:%#\$&\?\(\)~\.=\+\-…]+', "", s)
    non_bmp_map = dict.fromkeys(range(0x10000, sys.maxunicode + 1), '')
    s = s.translate(non_bmp_map)
    s = ''.join(c if c not in emoji.UNICODE_EMOJI else '' for c in s)

    s = re.sub('。+', '。', s)

    while s.find('#') != -1:
        index_hash = s.find('#')
        s = s[0:index_hash]

    s = neologdn.normalize(s, repeat=4)

    s = re.sub(r'[^、。!?ー〜1-9a-zA-Zぁ-んァ-ヶ亜-腕纊-黑一-鿕]', '', s)

    return s 
Example #20
Source File: data_ingestion.py    From BLINK with MIT License 4 votes vote down vote up
def get_data_for_key(data, title):
    obj = {}

    obj["id"] = data[title]["wikipedia_id"]
    obj["title"] = title

    if ("wikidata_info" in data[title]) and (
        data[title]["wikidata_info"]["wikidata_id"] is not None
    ):
        obj["wikidata_id"] = data[title]["wikidata_info"]["wikidata_id"]
    else:
        obj["wikidata_id"] = data[title]["wikidata_id_from_index"]

    description = data[title]["intro_concatenated"]
    obj["desc"] = description

    if "wikidata_info" in data[title]:
        if "description" in data[title]["wikidata_info"]:
            wikidata_description = data[title]["wikidata_info"]["description"]
        else:
            wikidata_description = ""

        if ("aliases" in data[title]["wikidata_info"]) and (
            data[title]["wikidata_info"]["aliases"]
        ) is not None:
            aliases = " ".join(
                [
                    '"{}"'.format(alias)
                    for alias in data[title]["wikidata_info"]["aliases"]
                    if alias not in emoji.UNICODE_EMOJI
                ]
            )
        else:
            aliases = ""
    else:
        aliases = ""
        wikidata_description = ""

    obj["aliases"] = aliases
    obj["wikidata_desc"] = wikidata_description
    obj["num_tokens"] = data[title]["num_tokens"]
    obj["num_incoming_links"] = data[title].get("num_incoming_links", 0)

    if args.add_sentence_data:
        for k in range(0, 10):
            key = "sent_desc_{}".format(k + 1)
            obj[key] = data[title].get(key, "")

    return obj