Python nltk.corpus.wordnet.synsets() Examples

The following are 30 code examples of nltk.corpus.wordnet.synsets(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module nltk.corpus.wordnet , or try the search function .
Example #1
Source File: laplacian_tags.py    From jingwei with MIT License 7 votes vote down vote up
def tag_semantic_similarity(x, y, ic):
    mx = wn.morphy(x)
    my = wn.morphy(y)

    if mx is None or my is None:
        return 0

    synX = wn.synsets(mx, pos=wn.NOUN)
    synY = wn.synsets(my, pos=wn.NOUN)

    if len(synX) > 0 and len(synY) > 0:
        maxSim = synX[0].lin_similarity(synY[0], ic)
    else:
        maxSim = 0

    return maxSim 
Example #2
Source File: featurizer.py    From combine-FEVER-NSMN with MIT License 6 votes vote down vote up
def is_exact_match(token1, token2):
    token1 = token1.lower()
    token2 = token2.lower()

    if token1 == token2:
        return True

    token1_stem = get_stem(token1)

    for synsets in wn.synsets(token2):
        for lemma in synsets.lemma_names():
            if token1_stem == get_stem(lemma):
                return True

    if token1 == "n't" and token2 == "not":
        return True
    elif token1 == "not" and token2 == "n't":
        return True
    elif token1_stem == get_stem(token2):
        return True
    return False 
Example #3
Source File: wordnet_app.py    From luscan-devel with GNU General Public License v2.0 6 votes vote down vote up
def get_static_welcome_message():
    """
    Get the static welcome page.
    """
    return \
"""
<h3>Search Help</h3>
<ul><li>The display below the line is an example of the output the browser
shows you when you enter a search word. The search word was <b>green</b>.</li>
<li>The search result shows for different parts of speech the <b>synsets</b>
i.e. different meanings for the word.</li>
<li>All underlined texts are hypertext links. There are two types of links:
word links and others. Clicking a word link carries out a search for the word
in the Wordnet database.</li>
<li>Clicking a link of the other type opens a display section of data attached
to that link. Clicking that link a second time closes the section again.</li>
<li>Clicking <u>S:</u> opens a section showing the relations for that synset.</li>
<li>Clicking on a relation name opens a section that displays the associated
synsets.</li>
<li>Type a search word in the <b>Next Word</b> field and start the search by the
<b>Enter/Return</b> key or click the <b>Search</b> button.</li>
</ul>
""" 
Example #4
Source File: nlp_util.py    From How-to-use-SyntaxNet with MIT License 6 votes vote down vote up
def get_synonym(root):

	listofsyns = wordnet.synsets(root[0])
	synonym = listofsyns[3].name().split(".")[0]
	
	if root[1] =='VBD':
		synonym = verb.verb_past(synonym)
	elif root[1] =='VBG':
		synonym = verb.verb_present_participle(synonym)
	elif root[1] =='VBN':
		synonym = verb.verb_past_participle(synonym)
	elif root[1] =='VBP':
		synonym = verb.verb_present(synonym, person=3, negate=True)
	elif root[1] =='VBZ':
		synonym = verb.verb_present(synonym, person=3, negate=False)
		
	return synonym

#retrieve paraphrased sentence 
Example #5
Source File: preprocess_lst_test.py    From lexsub with Apache License 2.0 6 votes vote down vote up
def is_atomic_mwe(mwe, verb_lemma, complement_lemma, synsets):
    mwe_count = 0
    for synset in synsets:
        gloss_lemmas = set([WordNetLemmatizer().lemmatize(word) for word in synset.definition.split()])
        if verb_lemma in gloss_lemmas or complement_lemma in gloss_lemmas:
            return False
        for syn_lemma in synset.lemmas:
            if syn_lemma.name != mwe: 
                tokens = syn_lemma.name.split('_')
                for token in tokens:
                    if token == verb_lemma:
                        return False
                if len(tokens) == 2 and tokens[1] == complement_lemma:
                    return False
        else:
            mwe_count += syn_lemma.count()
    return True 
Example #6
Source File: utilities.py    From EMNLP2018_NLI with GNU General Public License v3.0 6 votes vote down vote up
def has_relation(token1, token2, rel_name=""):
    token1 = token1.lower()
    token2 = token2.lower()
    t1 = stemmer.stem(token1)
    t2 = stemmer.stem(token2)
    _t1 = wn.synsets(t1)
    _t2 = wn.synsets(t2)

    if(len(_t1)==0 or len(_t2)==0):
        return [0,0]

    token_2_hyponyms = get_hyponyms(_t2[0])
    token_2_hypernyms = get_hypernyms(_t2[0])

    if(_t1[0] in token_2_hyponyms):
        # t1 is a hyponym of t2
        # print("Hyponym {} {}".format(t1, t2))
        return [1,0]
    elif(_t1[0] in token_2_hypernyms):
        # t1 is a hypernym of t2
        return [0,1]
    else:
        return [0,0] 
Example #7
Source File: utilities.py    From EMNLP2018_NLI with GNU General Public License v3.0 6 votes vote down vote up
def is_antonyms(token1, token2):
    token1 = token1.lower()
    token2 = token2.lower()
    token1_stem = stemmer.stem(token1)
    antonym_lists_for_token2 = []
    for synsets in wn.synsets(token2):
        for l in synsets.lemmas():
            _ant = l.antonyms()
            if(len(_ant)>0):
                antonym_lists_for_token2.append(_ant[0].name())

        # for lemma_synsets in [wn.synsets(l) for l in synsets.lemma_names()]:
        #     for lemma_syn in lemma_synsets:
        #         for lemma in lemma_syn.lemmas():
        #             for antonym in lemma.antonyms():
                        # antonym_lists_for_token2.append(antonym.name())
    antonym_lists_for_token2 = list(set(antonym_lists_for_token2))
    for atnm in antonym_lists_for_token2:
        if token1_stem == stemmer.stem(atnm):
            return 1
    return 0 
Example #8
Source File: feature_wordnet_similarity.py    From kaggle-HomeDepot with MIT License 6 votes vote down vote up
def transform_one(self, obs, target, id):
        obs_tokens = nlp_utils._tokenize(obs, token_pattern)
        target_tokens = nlp_utils._tokenize(target, token_pattern)
        obs_synset_list = [wn.synsets(obs_token) for obs_token in obs_tokens]
        target_synset_list = [wn.synsets(target_token) for target_token in target_tokens]
        val_list = []
        for obs_synset in obs_synset_list:
            _val_list = []
            for target_synset in target_synset_list:
                _s = self._maximum_similarity_for_two_synset_list(obs_synset, target_synset)
                _val_list.append(_s)
            if len(_val_list) == 0:
                _val_list = [config.MISSING_VALUE_NUMERIC]
            val_list.append( _val_list )
        if len(val_list) == 0:
            val_list = [[config.MISSING_VALUE_NUMERIC]]
        return val_list 
Example #9
Source File: short_sentence_similarity.py    From Semantic-Texual-Similarity-Toolkits with MIT License 6 votes vote down vote up
def length_dist(synset_1, synset_2):
    """
    Return a measure of the length of the shortest path in the semantic
    ontology (Wordnet in our case as well as the paper's) between two
    synsets.
    """
    l_dist = six.MAXSIZE
    if synset_1 is None or synset_2 is None:
        return 0.0
    if synset_1 == synset_2:
        # if synset_1 and synset_2 are the same synset return 0
        l_dist = 0.0
    else:
        wset_1 = set([str(x.name()) for x in synset_1.lemmas()])
        wset_2 = set([str(x.name()) for x in synset_2.lemmas()])
        if len(wset_1.intersection(wset_2)) > 0:
            # if synset_1 != synset_2 but there is word overlap, return 1.0
            l_dist = 1.0
        else:
            # just compute the shortest path between the two
            l_dist = synset_1.shortest_path_distance(synset_2)
            if l_dist is None:
                l_dist = 0.0
    # normalize path length to the range [0,1]
    return math.exp(-ALPHA * l_dist) 
Example #10
Source File: short_sentence_similarity.py    From Semantic-Texual-Similarity-Toolkits with MIT License 6 votes vote down vote up
def get_best_synset_pair(word_1, word_2):
    """
    Choose the pair with highest path similarity among all pairs.
    Mimics pattern-seeking behavior of humans.
    """
    max_sim = -1.0
    synsets_1 = wn.synsets(word_1)
    synsets_2 = wn.synsets(word_2)
    if len(synsets_1) == 0 or len(synsets_2) == 0:
        return None, None
    else:
        max_sim = -1.0
        best_pair = None, None
        for synset_1 in synsets_1:
            for synset_2 in synsets_2:
               sim = wn.path_similarity(synset_1, synset_2)
               if sim is not None and sim > max_sim:
                   max_sim = sim
                   best_pair = synset_1, synset_2
        return best_pair 
Example #11
Source File: nltkmgr.py    From sia-cog with MIT License 6 votes vote down vote up
def synset(data):
    result = {}
    syns = wordnet.synsets(data)
    list = []
    for s in syns:
        r = {}
        r["name"] = s.name()
        r["lemma"] = s.lemmas()[0].name()
        r["definition"] = s.definition()
        r["examples"] = s.examples()
        list.append(r)

    result["list"] = list
    synonyms = []
    antonyms = []
    for syn in syns:
        for l in syn.lemmas():
            synonyms.append(l.name())
            if l.antonyms():
                antonyms.append(l.antonyms()[0].name())

    result["synonyms"] = synonyms
    result["antonyms"] = antonyms
    return json.loads(jsonpickle.encode(result, unpicklable=False)) 
Example #12
Source File: wn_persistent_api.py    From combine-FEVER-NSMN with MIT License 6 votes vote down vote up
def get_hyponym_stems(item):
    word, pos = item
    stems = set([])

    for synset in wn.synsets(word, pos=pos):
        for lemma in synset.lemma_names():
            for syn_lemma in wn.synsets(lemma, pos=pos):
                try:
                    syn_lemma_hypos = get_hyponyms(syn_lemma)
                except RecursionError:
                    # print(syn_lemma)
                    continue

                for nym in syn_lemma_hypos:
                    stems |= set(nym.lemma_names())
                    stems |= set([get_stem(ln) for ln in nym.lemma_names()])

    return (stems - get_stem_set(word)) 
Example #13
Source File: wn_persistent_api.py    From combine-FEVER-NSMN with MIT License 6 votes vote down vote up
def get_hypernym_stems_p(item, hypernym_stems_dict):
    word, pos = item
    item_key = word + '<(-.-)>' + pos
    if item_key in hypernym_stems_dict:
        return hypernym_stems_dict[item_key]

    stems = set([])

    for synset in wn.synsets(word, pos=pos):
        for lemma in synset.lemma_names():
            for syn_lemma in wn.synsets(lemma, pos=pos):
                try:
                    syn_lemma_hypers = get_hypernyms(syn_lemma)
                except RecursionError:
                    # print(syn_lemma)
                    continue

                for nym in syn_lemma_hypers:
                    stems |= set(nym.lemma_names())
                    stems |= set([get_stem(ln) for ln in nym.lemma_names()])

    result_set = (stems - get_stem_set(word))
    hypernym_stems_dict[item_key] = result_set
    return result_set 
Example #14
Source File: wn_persistent_api.py    From combine-FEVER-NSMN with MIT License 6 votes vote down vote up
def get_hypernym_stems(item):
    word, pos = item
    stems = set([])

    for synset in wn.synsets(word, pos=pos):
        for lemma in synset.lemma_names():
            for syn_lemma in wn.synsets(lemma, pos=pos):
                try:
                    syn_lemma_hypers = get_hypernyms(syn_lemma)
                except RecursionError:
                    # print(syn_lemma)
                    continue

                for nym in syn_lemma_hypers:
                    stems |= set(nym.lemma_names())
                    stems |= set([get_stem(ln) for ln in nym.lemma_names()])

    return (stems - get_stem_set(word)) 
Example #15
Source File: wn_persistent_api.py    From combine-FEVER-NSMN with MIT License 6 votes vote down vote up
def get_hypo_up_to_lv_p(item, level, hypo_lvl_dict):
    word, pos = item
    stems = set([])

    item_key = word + '<(-.-)>' + pos + '#' + str(level)
    if item_key in hypo_lvl_dict:
        return hypo_lvl_dict[item_key]

    for synset in wn.synsets(word, pos=pos):
        for lemma in synset.lemma_names():
            for syn_lemma in wn.synsets(lemma, pos=pos):
                for h in syn_lemma.closure(hypo, level):
                    for h_lemma in h.lemma_names():
                        stems.add(h_lemma)
                        stems.add(get_stem(h_lemma))

    hypo_lvl_dict[item_key] = stems

    return stems 
Example #16
Source File: syngraph.py    From atap with Apache License 2.0 6 votes vote down vote up
def graph_synsets(terms, pos=wn.NOUN, depth=2):
    """
    Create a networkx graph of the given terms to the given depth.
    """

    G = nx.Graph(
        name="WordNet Synsets Graph for {}".format(", ".join(terms)), depth=depth,
    )

    def add_term_links(G, term, current_depth):
        for syn in wn.synsets(term):
            for name in syn.lemma_names():
                G.add_edge(term, name)
                if current_depth < depth:
                    add_term_links(G, name, current_depth+1)

    for term in terms:
        add_term_links(G, term, 0)

    return G 
Example #17
Source File: wn_persistent_api.py    From combine-FEVER-NSMN with MIT License 6 votes vote down vote up
def get_hyponym_stems_p(item, hyponym_stems_dict):
    word, pos = item
    item_key = word + '<(-.-)>' + pos

    if item_key in hyponym_stems_dict:
        return hyponym_stems_dict[item_key]

    stems = set([])
    for synset in wn.synsets(word, pos=pos):
        for lemma in synset.lemma_names():
            for syn_lemma in wn.synsets(lemma, pos=pos):
                try:
                    syn_lemma_hypos = get_hyponyms(syn_lemma)
                except RecursionError:
                    # print(syn_lemma)
                    continue

                for nym in syn_lemma_hypos:
                    stems |= set(nym.lemma_names())
                    stems |= set([get_stem(ln) for ln in nym.lemma_names()])

    result_set = (stems - get_stem_set(word))
    hyponym_stems_dict[item_key] = result_set
    return result_set 
Example #18
Source File: wn_persistent_api.py    From combine-FEVER-NSMN with MIT License 6 votes vote down vote up
def get_antonym_stems_p(item, ant_dict):
    word, pos = item
    stems = set([])
    item_key = word + '<(-.-)>' + pos
    if item_key in ant_dict:
        return ant_dict[item_key]

    for synset in wn.synsets(word, pos=pos):
        for lemma in synset.lemma_names():
            for syn_lemma in wn.synsets(lemma, pos=pos):
                for l in syn_lemma.lemmas():
                    for antonym in l.antonyms():
                        stems.add(antonym.name())
                        stems.add(get_stem(antonym.name()))

    ant_dict[item_key] = stems

    return stems 
Example #19
Source File: featurizer.py    From combine-FEVER-NSMN with MIT License 6 votes vote down vote up
def get_hyponym_stems(item):
    word, pos = item
    stems = set([])

    for synset in wn.synsets(word, pos=pos):
        for lemma in synset.lemma_names():
            for syn_lemma in wn.synsets(lemma, pos=pos):
                try:
                    syn_lemma_hypos = get_hyponyms(syn_lemma)
                except RecursionError:
                    # print(syn_lemma)
                    continue

                for nym in syn_lemma_hypos:
                    stems |= set(nym.lemma_names())
                    stems |= set([get_stem(ln) for ln in nym.lemma_names()])

    return (stems - get_stem_set(word)) 
Example #20
Source File: featurizer.py    From combine-FEVER-NSMN with MIT License 6 votes vote down vote up
def get_hypernym_stems(item):
    word, pos = item
    stems = set([])

    for synset in wn.synsets(word, pos=pos):
        for lemma in synset.lemma_names():
            for syn_lemma in wn.synsets(lemma, pos=pos):
                try:
                    syn_lemma_hypers = get_hypernyms(syn_lemma)
                except RecursionError:
                    # print(syn_lemma)
                    continue

                for nym in syn_lemma_hypers:
                    stems |= set(nym.lemma_names())
                    stems |= set([get_stem(ln) for ln in nym.lemma_names()])

    return (stems - get_stem_set(word)) 
Example #21
Source File: noun.py    From VSE-C with MIT License 6 votes vote down vote up
def valid(word, replacement):
    if replacement.lower() != replacement:
        return False
    synset_word = wn.synsets(word)
    synset_replacement = wn.synsets(replacement)
    for item_1 in synset_word:
        for item_2 in synset_replacement:
            if item_1 == item_2:
                return False
    # one-step hypernymy/hyponymy check
    for item_1 in synset_word:
        for subitem in item_1.hypernyms():
            for item_2 in synset_replacement:
                if item_2 == subitem:
                    return False
        for subitem in item_1.hyponyms():
            for item_2 in synset_replacement:
                if item_2 == subitem:
                    return False
    return True 
Example #22
Source File: imagenet.py    From mmfeat with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def __init__(self, save_dir, config_path='./miner.yaml'):
        super(ImageNetMiner, self).__init__(save_dir, config_path)
        self.__engine__ = 'imagenet'
        self.format_url = 'http://www.image-net.org/api/text/imagenet.synset.geturls?wnid={}'

        # maximum number of synsets to retrieve - we don't need all images necessarily, other-
        # wise we get enormous amounts of synsets for words like 'entity' or 'animal'
        self.max_synsets = 10000

        self.wnl = WordNetLemmatizer()

        # url cache
        self.imgnet_url_cache = {}

        # whether we "level up" in hierarchy if no images found
        self.level_up_if_no_images = True 
Example #23
Source File: context_utils.py    From yelp with GNU Lesser General Public License v2.1 5 votes vote down vote up
def is_noun_in_group(noun, group):
    senses = wordnet.synsets(noun, pos='n')
    return any(i in senses for i in group) 
Example #24
Source File: wordnet.py    From gobbli with Apache License 2.0 5 votes vote down vote up
def _get_lemmas(synsets: List[Any]) -> List[str]:
    """
    Return all the lemma names associated with a list of synsets.
    """
    return [lemma_name for synset in synsets for lemma_name in synset.lemma_names()] 
Example #25
Source File: auxiliary_word2vec.py    From ZeroShotVideoClassification with Apache License 2.0 5 votes vote down vote up
def verbs2basicform(words):
    ret = []
    for w in words:
        analysis = wn.synsets(w)
        if any([a.pos() == 'v' for a in analysis]):
            w = WordNetLemmatizer().lemmatize(w, 'v')
        ret.append(w)
    return ret 
Example #26
Source File: featurizer.py    From combine-FEVER-NSMN with MIT License 5 votes vote down vote up
def get_hyper_up_to_lv(item, level):
    word, pos = item
    stems = set([])

    for synset in wn.synsets(word, pos=pos):
        for lemma in synset.lemma_names():
            for syn_lemma in wn.synsets(lemma, pos=pos):
                for h in syn_lemma.closure(hyper, level):
                    for h_lemma in h.lemma_names():
                        stems.add(h_lemma)
                        stems.add(get_stem(h_lemma))

    return stems 
Example #27
Source File: sideInfo.py    From cesi with Apache License 2.0 5 votes vote down vote up
def wordnetLinking(self):
		
		for trp in self.triples:
			sub, rel, obj 		  = trp['triple']
			raw_sub, raw_rel, raw_obj = trp['raw_triple']
			sub_id, rel_id, obj_id 	  = self.ent2id[sub], self.rel2id[rel], self.ent2id[obj]

			for sentence in trp['src_sentences']:
				# sent = [wrd.lower() for wrd in sentence.split()]
				sent = sentence.split()

				''' 92 is the length of list returned by dir when lesk is successful '''
				self.ent2wnet[sub_id] = self.ent2wnet.get(sub_id, set())
				res = lesk(sent, raw_sub) 
				if len(dir(res)) == 92: self.ent2wnet[sub_id].add(res.name())

				self.ent2wnet[obj_id] = self.ent2wnet.get(obj_id, set())
				res = lesk(sent, raw_obj) 
				if len(dir(res)) == 92: self.ent2wnet[obj_id].add(res.name())

				self.rel2wnet[rel_id] = self.rel2wnet.get(rel_id, set())
				res = lesk(sent, raw_rel) 
				if len(dir(res)) == 92: self.rel2wnet[rel_id].add(res.name())

		self.setHeading('Wordnet Entity Clusters')
		self.printCluster(self.ent2wnet, self.id2ent, 'm2ol')

		# for ent in self.ent_list: self.ent2wnet[self.ent2id[ent]] = [ele.name() for ele in lesk(ent)]
		# for rel in self.rel_list: self.rel2wnet[self.rel2id[rel]] = [ele.name() for ele in wordnet.synsets(rel)]

		self.setHeading('Wordnet Relation Clusters')
		self.printCluster(self.rel2wnet, self.id2rel, 'm2ol') 
Example #28
Source File: synset.py    From ALaCarte with MIT License 5 votes vote down vote up
def SemEval2015Task13():
  with open(FILEDIR+'data-SemEval2015_Task13/test/keys/gold_keys/EN/semeval-2015-task-13-en.key', 'r') as f:
    gold = [(split[1], set(key.split('%')[1] for key in split[2:] if key[:3] == 'wn:')) for split in (line.split() for line in f if '\twn:' in line)]
  ids = {entry[0] for entry in gold}
  with open(FILEDIR+'data-SemEval2015_Task13/test/data/semeval-2015-task-13-en.xml','r') as f:
    soup = BS(f, 'lxml')
  data = [(wf['id'], wf['lemma'], wf['pos'], list(split_on_punctuation(' '.join(child.text for child in sent.children if not child == wf and not child == '\n')))) for text in soup('text') for sent in text('sentence') for wf in sent('wf') if wf['id'] in ids and wf['pos'][0].lower() in POSMAP and wn.synsets(wf['lemma'], POSMAP[wf['pos'][0].lower()])]
  id2keys = defaultdict(lambda: set())
  for entry, keys in gold:
    id2keys[entry] = id2keys[entry].union(keys)
  gold = [(entry, id2keys[entry]) for entry, _, _, _ in data]
  return data, gold 
Example #29
Source File: context_utils.py    From yelp with GNU Lesser General Public License v2.1 5 votes vote down vote up
def generate_senses(review):

    review.senses = set()
    for noun in review.nouns:
        review.senses |= set(wordnet.synsets(noun, pos='n')) 
Example #30
Source File: word_forms.py    From word_forms with MIT License 5 votes vote down vote up
def get_related_lemmas(word):
    """
    args
        - word : a word e.g. "lovely"

    returns a list of related lemmas e.g [Lemma('cover_girl.n.01.lovely'),
        Lemma('lovely.s.01.lovely'), Lemma('adorable.s.01.lovely'),
        Lemma('comeliness.n.01.loveliness')]
    returns [] if Wordnet doesn't recognize the word
    """

    all_lemmas_for_this_word = [lemma for ss in wn.synsets(word)
                                for lemma in ss.lemmas()
                                if lemma.name() == word]
    all_related_lemmas = [lemma for lemma in all_lemmas_for_this_word]
    new_lemmas = []
    for lemma in all_lemmas_for_this_word:
        for new_lemma in (lemma.derivationally_related_forms() +
                          lemma.pertainyms()):
            if (not belongs(new_lemma, all_related_lemmas) and
            not belongs(new_lemma, new_lemmas)):
                new_lemmas.append(new_lemma)
    while len(new_lemmas) > 0:
        all_lemmas_for_new_words = []
        for new_lemma in new_lemmas:
            word = new_lemma.name()
            all_lemmas_for_this_word = [lemma for ss in wn.synsets(word)
                                        for lemma in ss.lemmas()
                                        if lemma.name() == word]
            for lemma in all_lemmas_for_this_word:
                if not belongs(lemma, all_lemmas_for_new_words):
                    all_lemmas_for_new_words.append(lemma)
        all_related_lemmas += all_lemmas_for_new_words
        new_lemmas = []
        for lemma in all_lemmas_for_new_words:
            for new_lemma in (lemma.derivationally_related_forms() +
                              lemma.pertainyms()):
                if (not belongs(new_lemma, all_related_lemmas) and
                not belongs(new_lemma, new_lemmas)):
                    new_lemmas.append(new_lemma)
    return all_related_lemmas