Python Examples of nltk.corpus.wordnet.synsets

Source File: laplacian_tags.py From jingwei with MIT License

7 votes

def tag_semantic_similarity(x, y, ic):
    mx = wn.morphy(x)
    my = wn.morphy(y)

    if mx is None or my is None:
        return 0

    synX = wn.synsets(mx, pos=wn.NOUN)
    synY = wn.synsets(my, pos=wn.NOUN)

    if len(synX) > 0 and len(synY) > 0:
        maxSim = synX[0].lin_similarity(synY[0], ic)
    else:
        maxSim = 0

    return maxSim

Source File: featurizer.py From combine-FEVER-NSMN with MIT License

6 votes

def is_exact_match(token1, token2):
    token1 = token1.lower()
    token2 = token2.lower()

    if token1 == token2:
        return True

    token1_stem = get_stem(token1)

    for synsets in wn.synsets(token2):
        for lemma in synsets.lemma_names():
            if token1_stem == get_stem(lemma):
                return True

    if token1 == "n't" and token2 == "not":
        return True
    elif token1 == "not" and token2 == "n't":
        return True
    elif token1_stem == get_stem(token2):
        return True
    return False

Source File: wordnet_app.py From luscan-devel with GNU General Public License v2.0

6 votes

def get_static_welcome_message():
    """
    Get the static welcome page.
    """
    return \
"""
<h3>Search Help</h3>
<ul><li>The display below the line is an example of the output the browser
shows you when you enter a search word. The search word was <b>green</b>.</li>
<li>The search result shows for different parts of speech the <b>synsets</b>
i.e. different meanings for the word.</li>
<li>All underlined texts are hypertext links. There are two types of links:
word links and others. Clicking a word link carries out a search for the word
in the Wordnet database.</li>
<li>Clicking a link of the other type opens a display section of data attached
to that link. Clicking that link a second time closes the section again.</li>
<li>Clicking <u>S:</u> opens a section showing the relations for that synset.</li>
<li>Clicking on a relation name opens a section that displays the associated
synsets.</li>
<li>Type a search word in the <b>Next Word</b> field and start the search by the
<b>Enter/Return</b> key or click the <b>Search</b> button.</li>
</ul>
"""

Source File: nlp_util.py From How-to-use-SyntaxNet with MIT License

6 votes

def get_synonym(root):

	listofsyns = wordnet.synsets(root[0])
	synonym = listofsyns[3].name().split(".")[0]
	
	if root[1] =='VBD':
		synonym = verb.verb_past(synonym)
	elif root[1] =='VBG':
		synonym = verb.verb_present_participle(synonym)
	elif root[1] =='VBN':
		synonym = verb.verb_past_participle(synonym)
	elif root[1] =='VBP':
		synonym = verb.verb_present(synonym, person=3, negate=True)
	elif root[1] =='VBZ':
		synonym = verb.verb_present(synonym, person=3, negate=False)
		
	return synonym

#retrieve paraphrased sentence

Source File: preprocess_lst_test.py From lexsub with Apache License 2.0

6 votes

def is_atomic_mwe(mwe, verb_lemma, complement_lemma, synsets):
    mwe_count = 0
    for synset in synsets:
        gloss_lemmas = set([WordNetLemmatizer().lemmatize(word) for word in synset.definition.split()])
        if verb_lemma in gloss_lemmas or complement_lemma in gloss_lemmas:
            return False
        for syn_lemma in synset.lemmas:
            if syn_lemma.name != mwe: 
                tokens = syn_lemma.name.split('_')
                for token in tokens:
                    if token == verb_lemma:
                        return False
                if len(tokens) == 2 and tokens[1] == complement_lemma:
                    return False
        else:
            mwe_count += syn_lemma.count()
    return True

Source File: utilities.py From EMNLP2018_NLI with GNU General Public License v3.0

6 votes

def has_relation(token1, token2, rel_name=""):
    token1 = token1.lower()
    token2 = token2.lower()
    t1 = stemmer.stem(token1)
    t2 = stemmer.stem(token2)
    _t1 = wn.synsets(t1)
    _t2 = wn.synsets(t2)

    if(len(_t1)==0 or len(_t2)==0):
        return [0,0]

    token_2_hyponyms = get_hyponyms(_t2[0])
    token_2_hypernyms = get_hypernyms(_t2[0])

    if(_t1[0] in token_2_hyponyms):
        # t1 is a hyponym of t2
        # print("Hyponym {} {}".format(t1, t2))
        return [1,0]
    elif(_t1[0] in token_2_hypernyms):
        # t1 is a hypernym of t2
        return [0,1]
    else:
        return [0,0]

Source File: utilities.py From EMNLP2018_NLI with GNU General Public License v3.0

6 votes

def is_antonyms(token1, token2):
    token1 = token1.lower()
    token2 = token2.lower()
    token1_stem = stemmer.stem(token1)
    antonym_lists_for_token2 = []
    for synsets in wn.synsets(token2):
        for l in synsets.lemmas():
            _ant = l.antonyms()
            if(len(_ant)>0):
                antonym_lists_for_token2.append(_ant[0].name())

        # for lemma_synsets in [wn.synsets(l) for l in synsets.lemma_names()]:
        #     for lemma_syn in lemma_synsets:
        #         for lemma in lemma_syn.lemmas():
        #             for antonym in lemma.antonyms():
                        # antonym_lists_for_token2.append(antonym.name())
    antonym_lists_for_token2 = list(set(antonym_lists_for_token2))
    for atnm in antonym_lists_for_token2:
        if token1_stem == stemmer.stem(atnm):
            return 1
    return 0

Source File: feature_wordnet_similarity.py From kaggle-HomeDepot with MIT License

6 votes

def transform_one(self, obs, target, id):
        obs_tokens = nlp_utils._tokenize(obs, token_pattern)
        target_tokens = nlp_utils._tokenize(target, token_pattern)
        obs_synset_list = [wn.synsets(obs_token) for obs_token in obs_tokens]
        target_synset_list = [wn.synsets(target_token) for target_token in target_tokens]
        val_list = []
        for obs_synset in obs_synset_list:
            _val_list = []
            for target_synset in target_synset_list:
                _s = self._maximum_similarity_for_two_synset_list(obs_synset, target_synset)
                _val_list.append(_s)
            if len(_val_list) == 0:
                _val_list = [config.MISSING_VALUE_NUMERIC]
            val_list.append( _val_list )
        if len(val_list) == 0:
            val_list = [[config.MISSING_VALUE_NUMERIC]]
        return val_list

Source File: short_sentence_similarity.py From Semantic-Texual-Similarity-Toolkits with MIT License

6 votes

def length_dist(synset_1, synset_2):
    """
    Return a measure of the length of the shortest path in the semantic
    ontology (Wordnet in our case as well as the paper's) between two
    synsets.
    """
    l_dist = six.MAXSIZE
    if synset_1 is None or synset_2 is None:
        return 0.0
    if synset_1 == synset_2:
        # if synset_1 and synset_2 are the same synset return 0
        l_dist = 0.0
    else:
        wset_1 = set([str(x.name()) for x in synset_1.lemmas()])
        wset_2 = set([str(x.name()) for x in synset_2.lemmas()])
        if len(wset_1.intersection(wset_2)) > 0:
            # if synset_1 != synset_2 but there is word overlap, return 1.0
            l_dist = 1.0
        else:
            # just compute the shortest path between the two
            l_dist = synset_1.shortest_path_distance(synset_2)
            if l_dist is None:
                l_dist = 0.0
    # normalize path length to the range [0,1]
    return math.exp(-ALPHA * l_dist)

Source File: short_sentence_similarity.py From Semantic-Texual-Similarity-Toolkits with MIT License

6 votes

def get_best_synset_pair(word_1, word_2):
    """
    Choose the pair with highest path similarity among all pairs.
    Mimics pattern-seeking behavior of humans.
    """
    max_sim = -1.0
    synsets_1 = wn.synsets(word_1)
    synsets_2 = wn.synsets(word_2)
    if len(synsets_1) == 0 or len(synsets_2) == 0:
        return None, None
    else:
        max_sim = -1.0
        best_pair = None, None
        for synset_1 in synsets_1:
            for synset_2 in synsets_2:
               sim = wn.path_similarity(synset_1, synset_2)
               if sim is not None and sim > max_sim:
                   max_sim = sim
                   best_pair = synset_1, synset_2
        return best_pair

Source File: nltkmgr.py From sia-cog with MIT License

6 votes

def synset(data):
    result = {}
    syns = wordnet.synsets(data)
    list = []
    for s in syns:
        r = {}
        r["name"] = s.name()
        r["lemma"] = s.lemmas()[0].name()
        r["definition"] = s.definition()
        r["examples"] = s.examples()
        list.append(r)

    result["list"] = list
    synonyms = []
    antonyms = []
    for syn in syns:
        for l in syn.lemmas():
            synonyms.append(l.name())
            if l.antonyms():
                antonyms.append(l.antonyms()[0].name())

    result["synonyms"] = synonyms
    result["antonyms"] = antonyms
    return json.loads(jsonpickle.encode(result, unpicklable=False))