Python nltk.corpus.wordnet.synsets() Examples
The following are 30
code examples of nltk.corpus.wordnet.synsets().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
nltk.corpus.wordnet
, or try the search function
.
Example #1
Source File: laplacian_tags.py From jingwei with MIT License | 7 votes |
def tag_semantic_similarity(x, y, ic): mx = wn.morphy(x) my = wn.morphy(y) if mx is None or my is None: return 0 synX = wn.synsets(mx, pos=wn.NOUN) synY = wn.synsets(my, pos=wn.NOUN) if len(synX) > 0 and len(synY) > 0: maxSim = synX[0].lin_similarity(synY[0], ic) else: maxSim = 0 return maxSim
Example #2
Source File: featurizer.py From combine-FEVER-NSMN with MIT License | 6 votes |
def is_exact_match(token1, token2): token1 = token1.lower() token2 = token2.lower() if token1 == token2: return True token1_stem = get_stem(token1) for synsets in wn.synsets(token2): for lemma in synsets.lemma_names(): if token1_stem == get_stem(lemma): return True if token1 == "n't" and token2 == "not": return True elif token1 == "not" and token2 == "n't": return True elif token1_stem == get_stem(token2): return True return False
Example #3
Source File: wordnet_app.py From luscan-devel with GNU General Public License v2.0 | 6 votes |
def get_static_welcome_message(): """ Get the static welcome page. """ return \ """ <h3>Search Help</h3> <ul><li>The display below the line is an example of the output the browser shows you when you enter a search word. The search word was <b>green</b>.</li> <li>The search result shows for different parts of speech the <b>synsets</b> i.e. different meanings for the word.</li> <li>All underlined texts are hypertext links. There are two types of links: word links and others. Clicking a word link carries out a search for the word in the Wordnet database.</li> <li>Clicking a link of the other type opens a display section of data attached to that link. Clicking that link a second time closes the section again.</li> <li>Clicking <u>S:</u> opens a section showing the relations for that synset.</li> <li>Clicking on a relation name opens a section that displays the associated synsets.</li> <li>Type a search word in the <b>Next Word</b> field and start the search by the <b>Enter/Return</b> key or click the <b>Search</b> button.</li> </ul> """
Example #4
Source File: nlp_util.py From How-to-use-SyntaxNet with MIT License | 6 votes |
def get_synonym(root): listofsyns = wordnet.synsets(root[0]) synonym = listofsyns[3].name().split(".")[0] if root[1] =='VBD': synonym = verb.verb_past(synonym) elif root[1] =='VBG': synonym = verb.verb_present_participle(synonym) elif root[1] =='VBN': synonym = verb.verb_past_participle(synonym) elif root[1] =='VBP': synonym = verb.verb_present(synonym, person=3, negate=True) elif root[1] =='VBZ': synonym = verb.verb_present(synonym, person=3, negate=False) return synonym #retrieve paraphrased sentence
Example #5
Source File: preprocess_lst_test.py From lexsub with Apache License 2.0 | 6 votes |
def is_atomic_mwe(mwe, verb_lemma, complement_lemma, synsets): mwe_count = 0 for synset in synsets: gloss_lemmas = set([WordNetLemmatizer().lemmatize(word) for word in synset.definition.split()]) if verb_lemma in gloss_lemmas or complement_lemma in gloss_lemmas: return False for syn_lemma in synset.lemmas: if syn_lemma.name != mwe: tokens = syn_lemma.name.split('_') for token in tokens: if token == verb_lemma: return False if len(tokens) == 2 and tokens[1] == complement_lemma: return False else: mwe_count += syn_lemma.count() return True
Example #6
Source File: utilities.py From EMNLP2018_NLI with GNU General Public License v3.0 | 6 votes |
def has_relation(token1, token2, rel_name=""): token1 = token1.lower() token2 = token2.lower() t1 = stemmer.stem(token1) t2 = stemmer.stem(token2) _t1 = wn.synsets(t1) _t2 = wn.synsets(t2) if(len(_t1)==0 or len(_t2)==0): return [0,0] token_2_hyponyms = get_hyponyms(_t2[0]) token_2_hypernyms = get_hypernyms(_t2[0]) if(_t1[0] in token_2_hyponyms): # t1 is a hyponym of t2 # print("Hyponym {} {}".format(t1, t2)) return [1,0] elif(_t1[0] in token_2_hypernyms): # t1 is a hypernym of t2 return [0,1] else: return [0,0]
Example #7
Source File: utilities.py From EMNLP2018_NLI with GNU General Public License v3.0 | 6 votes |
def is_antonyms(token1, token2): token1 = token1.lower() token2 = token2.lower() token1_stem = stemmer.stem(token1) antonym_lists_for_token2 = [] for synsets in wn.synsets(token2): for l in synsets.lemmas(): _ant = l.antonyms() if(len(_ant)>0): antonym_lists_for_token2.append(_ant[0].name()) # for lemma_synsets in [wn.synsets(l) for l in synsets.lemma_names()]: # for lemma_syn in lemma_synsets: # for lemma in lemma_syn.lemmas(): # for antonym in lemma.antonyms(): # antonym_lists_for_token2.append(antonym.name()) antonym_lists_for_token2 = list(set(antonym_lists_for_token2)) for atnm in antonym_lists_for_token2: if token1_stem == stemmer.stem(atnm): return 1 return 0
Example #8
Source File: feature_wordnet_similarity.py From kaggle-HomeDepot with MIT License | 6 votes |
def transform_one(self, obs, target, id): obs_tokens = nlp_utils._tokenize(obs, token_pattern) target_tokens = nlp_utils._tokenize(target, token_pattern) obs_synset_list = [wn.synsets(obs_token) for obs_token in obs_tokens] target_synset_list = [wn.synsets(target_token) for target_token in target_tokens] val_list = [] for obs_synset in obs_synset_list: _val_list = [] for target_synset in target_synset_list: _s = self._maximum_similarity_for_two_synset_list(obs_synset, target_synset) _val_list.append(_s) if len(_val_list) == 0: _val_list = [config.MISSING_VALUE_NUMERIC] val_list.append( _val_list ) if len(val_list) == 0: val_list = [[config.MISSING_VALUE_NUMERIC]] return val_list
Example #9
Source File: short_sentence_similarity.py From Semantic-Texual-Similarity-Toolkits with MIT License | 6 votes |
def length_dist(synset_1, synset_2): """ Return a measure of the length of the shortest path in the semantic ontology (Wordnet in our case as well as the paper's) between two synsets. """ l_dist = six.MAXSIZE if synset_1 is None or synset_2 is None: return 0.0 if synset_1 == synset_2: # if synset_1 and synset_2 are the same synset return 0 l_dist = 0.0 else: wset_1 = set([str(x.name()) for x in synset_1.lemmas()]) wset_2 = set([str(x.name()) for x in synset_2.lemmas()]) if len(wset_1.intersection(wset_2)) > 0: # if synset_1 != synset_2 but there is word overlap, return 1.0 l_dist = 1.0 else: # just compute the shortest path between the two l_dist = synset_1.shortest_path_distance(synset_2) if l_dist is None: l_dist = 0.0 # normalize path length to the range [0,1] return math.exp(-ALPHA * l_dist)
Example #10
Source File: short_sentence_similarity.py From Semantic-Texual-Similarity-Toolkits with MIT License | 6 votes |
def get_best_synset_pair(word_1, word_2): """ Choose the pair with highest path similarity among all pairs. Mimics pattern-seeking behavior of humans. """ max_sim = -1.0 synsets_1 = wn.synsets(word_1) synsets_2 = wn.synsets(word_2) if len(synsets_1) == 0 or len(synsets_2) == 0: return None, None else: max_sim = -1.0 best_pair = None, None for synset_1 in synsets_1: for synset_2 in synsets_2: sim = wn.path_similarity(synset_1, synset_2) if sim is not None and sim > max_sim: max_sim = sim best_pair = synset_1, synset_2 return best_pair
Example #11
Source File: nltkmgr.py From sia-cog with MIT License | 6 votes |
def synset(data): result = {} syns = wordnet.synsets(data) list = [] for s in syns: r = {} r["name"] = s.name() r["lemma"] = s.lemmas()[0].name() r["definition"] = s.definition() r["examples"] = s.examples() list.append(r) result["list"] = list synonyms = [] antonyms = [] for syn in syns: for l in syn.lemmas(): synonyms.append(l.name()) if l.antonyms(): antonyms.append(l.antonyms()[0].name()) result["synonyms"] = synonyms result["antonyms"] = antonyms return json.loads(jsonpickle.encode(result, unpicklable=False))
Example #12
Source File: wn_persistent_api.py From combine-FEVER-NSMN with MIT License | 6 votes |
def get_hyponym_stems(item): word, pos = item stems = set([]) for synset in wn.synsets(word, pos=pos): for lemma in synset.lemma_names(): for syn_lemma in wn.synsets(lemma, pos=pos): try: syn_lemma_hypos = get_hyponyms(syn_lemma) except RecursionError: # print(syn_lemma) continue for nym in syn_lemma_hypos: stems |= set(nym.lemma_names()) stems |= set([get_stem(ln) for ln in nym.lemma_names()]) return (stems - get_stem_set(word))
Example #13
Source File: wn_persistent_api.py From combine-FEVER-NSMN with MIT License | 6 votes |
def get_hypernym_stems_p(item, hypernym_stems_dict): word, pos = item item_key = word + '<(-.-)>' + pos if item_key in hypernym_stems_dict: return hypernym_stems_dict[item_key] stems = set([]) for synset in wn.synsets(word, pos=pos): for lemma in synset.lemma_names(): for syn_lemma in wn.synsets(lemma, pos=pos): try: syn_lemma_hypers = get_hypernyms(syn_lemma) except RecursionError: # print(syn_lemma) continue for nym in syn_lemma_hypers: stems |= set(nym.lemma_names()) stems |= set([get_stem(ln) for ln in nym.lemma_names()]) result_set = (stems - get_stem_set(word)) hypernym_stems_dict[item_key] = result_set return result_set
Example #14
Source File: wn_persistent_api.py From combine-FEVER-NSMN with MIT License | 6 votes |
def get_hypernym_stems(item): word, pos = item stems = set([]) for synset in wn.synsets(word, pos=pos): for lemma in synset.lemma_names(): for syn_lemma in wn.synsets(lemma, pos=pos): try: syn_lemma_hypers = get_hypernyms(syn_lemma) except RecursionError: # print(syn_lemma) continue for nym in syn_lemma_hypers: stems |= set(nym.lemma_names()) stems |= set([get_stem(ln) for ln in nym.lemma_names()]) return (stems - get_stem_set(word))
Example #15
Source File: wn_persistent_api.py From combine-FEVER-NSMN with MIT License | 6 votes |
def get_hypo_up_to_lv_p(item, level, hypo_lvl_dict): word, pos = item stems = set([]) item_key = word + '<(-.-)>' + pos + '#' + str(level) if item_key in hypo_lvl_dict: return hypo_lvl_dict[item_key] for synset in wn.synsets(word, pos=pos): for lemma in synset.lemma_names(): for syn_lemma in wn.synsets(lemma, pos=pos): for h in syn_lemma.closure(hypo, level): for h_lemma in h.lemma_names(): stems.add(h_lemma) stems.add(get_stem(h_lemma)) hypo_lvl_dict[item_key] = stems return stems
Example #16
Source File: syngraph.py From atap with Apache License 2.0 | 6 votes |
def graph_synsets(terms, pos=wn.NOUN, depth=2): """ Create a networkx graph of the given terms to the given depth. """ G = nx.Graph( name="WordNet Synsets Graph for {}".format(", ".join(terms)), depth=depth, ) def add_term_links(G, term, current_depth): for syn in wn.synsets(term): for name in syn.lemma_names(): G.add_edge(term, name) if current_depth < depth: add_term_links(G, name, current_depth+1) for term in terms: add_term_links(G, term, 0) return G
Example #17
Source File: wn_persistent_api.py From combine-FEVER-NSMN with MIT License | 6 votes |
def get_hyponym_stems_p(item, hyponym_stems_dict): word, pos = item item_key = word + '<(-.-)>' + pos if item_key in hyponym_stems_dict: return hyponym_stems_dict[item_key] stems = set([]) for synset in wn.synsets(word, pos=pos): for lemma in synset.lemma_names(): for syn_lemma in wn.synsets(lemma, pos=pos): try: syn_lemma_hypos = get_hyponyms(syn_lemma) except RecursionError: # print(syn_lemma) continue for nym in syn_lemma_hypos: stems |= set(nym.lemma_names()) stems |= set([get_stem(ln) for ln in nym.lemma_names()]) result_set = (stems - get_stem_set(word)) hyponym_stems_dict[item_key] = result_set return result_set
Example #18
Source File: wn_persistent_api.py From combine-FEVER-NSMN with MIT License | 6 votes |
def get_antonym_stems_p(item, ant_dict): word, pos = item stems = set([]) item_key = word + '<(-.-)>' + pos if item_key in ant_dict: return ant_dict[item_key] for synset in wn.synsets(word, pos=pos): for lemma in synset.lemma_names(): for syn_lemma in wn.synsets(lemma, pos=pos): for l in syn_lemma.lemmas(): for antonym in l.antonyms(): stems.add(antonym.name()) stems.add(get_stem(antonym.name())) ant_dict[item_key] = stems return stems
Example #19
Source File: featurizer.py From combine-FEVER-NSMN with MIT License | 6 votes |
def get_hyponym_stems(item): word, pos = item stems = set([]) for synset in wn.synsets(word, pos=pos): for lemma in synset.lemma_names(): for syn_lemma in wn.synsets(lemma, pos=pos): try: syn_lemma_hypos = get_hyponyms(syn_lemma) except RecursionError: # print(syn_lemma) continue for nym in syn_lemma_hypos: stems |= set(nym.lemma_names()) stems |= set([get_stem(ln) for ln in nym.lemma_names()]) return (stems - get_stem_set(word))
Example #20
Source File: featurizer.py From combine-FEVER-NSMN with MIT License | 6 votes |
def get_hypernym_stems(item): word, pos = item stems = set([]) for synset in wn.synsets(word, pos=pos): for lemma in synset.lemma_names(): for syn_lemma in wn.synsets(lemma, pos=pos): try: syn_lemma_hypers = get_hypernyms(syn_lemma) except RecursionError: # print(syn_lemma) continue for nym in syn_lemma_hypers: stems |= set(nym.lemma_names()) stems |= set([get_stem(ln) for ln in nym.lemma_names()]) return (stems - get_stem_set(word))
Example #21
Source File: noun.py From VSE-C with MIT License | 6 votes |
def valid(word, replacement): if replacement.lower() != replacement: return False synset_word = wn.synsets(word) synset_replacement = wn.synsets(replacement) for item_1 in synset_word: for item_2 in synset_replacement: if item_1 == item_2: return False # one-step hypernymy/hyponymy check for item_1 in synset_word: for subitem in item_1.hypernyms(): for item_2 in synset_replacement: if item_2 == subitem: return False for subitem in item_1.hyponyms(): for item_2 in synset_replacement: if item_2 == subitem: return False return True
Example #22
Source File: imagenet.py From mmfeat with BSD 3-Clause "New" or "Revised" License | 6 votes |
def __init__(self, save_dir, config_path='./miner.yaml'): super(ImageNetMiner, self).__init__(save_dir, config_path) self.__engine__ = 'imagenet' self.format_url = 'http://www.image-net.org/api/text/imagenet.synset.geturls?wnid={}' # maximum number of synsets to retrieve - we don't need all images necessarily, other- # wise we get enormous amounts of synsets for words like 'entity' or 'animal' self.max_synsets = 10000 self.wnl = WordNetLemmatizer() # url cache self.imgnet_url_cache = {} # whether we "level up" in hierarchy if no images found self.level_up_if_no_images = True
Example #23
Source File: context_utils.py From yelp with GNU Lesser General Public License v2.1 | 5 votes |
def is_noun_in_group(noun, group): senses = wordnet.synsets(noun, pos='n') return any(i in senses for i in group)
Example #24
Source File: wordnet.py From gobbli with Apache License 2.0 | 5 votes |
def _get_lemmas(synsets: List[Any]) -> List[str]: """ Return all the lemma names associated with a list of synsets. """ return [lemma_name for synset in synsets for lemma_name in synset.lemma_names()]
Example #25
Source File: auxiliary_word2vec.py From ZeroShotVideoClassification with Apache License 2.0 | 5 votes |
def verbs2basicform(words): ret = [] for w in words: analysis = wn.synsets(w) if any([a.pos() == 'v' for a in analysis]): w = WordNetLemmatizer().lemmatize(w, 'v') ret.append(w) return ret
Example #26
Source File: featurizer.py From combine-FEVER-NSMN with MIT License | 5 votes |
def get_hyper_up_to_lv(item, level): word, pos = item stems = set([]) for synset in wn.synsets(word, pos=pos): for lemma in synset.lemma_names(): for syn_lemma in wn.synsets(lemma, pos=pos): for h in syn_lemma.closure(hyper, level): for h_lemma in h.lemma_names(): stems.add(h_lemma) stems.add(get_stem(h_lemma)) return stems
Example #27
Source File: sideInfo.py From cesi with Apache License 2.0 | 5 votes |
def wordnetLinking(self): for trp in self.triples: sub, rel, obj = trp['triple'] raw_sub, raw_rel, raw_obj = trp['raw_triple'] sub_id, rel_id, obj_id = self.ent2id[sub], self.rel2id[rel], self.ent2id[obj] for sentence in trp['src_sentences']: # sent = [wrd.lower() for wrd in sentence.split()] sent = sentence.split() ''' 92 is the length of list returned by dir when lesk is successful ''' self.ent2wnet[sub_id] = self.ent2wnet.get(sub_id, set()) res = lesk(sent, raw_sub) if len(dir(res)) == 92: self.ent2wnet[sub_id].add(res.name()) self.ent2wnet[obj_id] = self.ent2wnet.get(obj_id, set()) res = lesk(sent, raw_obj) if len(dir(res)) == 92: self.ent2wnet[obj_id].add(res.name()) self.rel2wnet[rel_id] = self.rel2wnet.get(rel_id, set()) res = lesk(sent, raw_rel) if len(dir(res)) == 92: self.rel2wnet[rel_id].add(res.name()) self.setHeading('Wordnet Entity Clusters') self.printCluster(self.ent2wnet, self.id2ent, 'm2ol') # for ent in self.ent_list: self.ent2wnet[self.ent2id[ent]] = [ele.name() for ele in lesk(ent)] # for rel in self.rel_list: self.rel2wnet[self.rel2id[rel]] = [ele.name() for ele in wordnet.synsets(rel)] self.setHeading('Wordnet Relation Clusters') self.printCluster(self.rel2wnet, self.id2rel, 'm2ol')
Example #28
Source File: synset.py From ALaCarte with MIT License | 5 votes |
def SemEval2015Task13(): with open(FILEDIR+'data-SemEval2015_Task13/test/keys/gold_keys/EN/semeval-2015-task-13-en.key', 'r') as f: gold = [(split[1], set(key.split('%')[1] for key in split[2:] if key[:3] == 'wn:')) for split in (line.split() for line in f if '\twn:' in line)] ids = {entry[0] for entry in gold} with open(FILEDIR+'data-SemEval2015_Task13/test/data/semeval-2015-task-13-en.xml','r') as f: soup = BS(f, 'lxml') data = [(wf['id'], wf['lemma'], wf['pos'], list(split_on_punctuation(' '.join(child.text for child in sent.children if not child == wf and not child == '\n')))) for text in soup('text') for sent in text('sentence') for wf in sent('wf') if wf['id'] in ids and wf['pos'][0].lower() in POSMAP and wn.synsets(wf['lemma'], POSMAP[wf['pos'][0].lower()])] id2keys = defaultdict(lambda: set()) for entry, keys in gold: id2keys[entry] = id2keys[entry].union(keys) gold = [(entry, id2keys[entry]) for entry, _, _, _ in data] return data, gold
Example #29
Source File: context_utils.py From yelp with GNU Lesser General Public License v2.1 | 5 votes |
def generate_senses(review): review.senses = set() for noun in review.nouns: review.senses |= set(wordnet.synsets(noun, pos='n'))
Example #30
Source File: word_forms.py From word_forms with MIT License | 5 votes |
def get_related_lemmas(word): """ args - word : a word e.g. "lovely" returns a list of related lemmas e.g [Lemma('cover_girl.n.01.lovely'), Lemma('lovely.s.01.lovely'), Lemma('adorable.s.01.lovely'), Lemma('comeliness.n.01.loveliness')] returns [] if Wordnet doesn't recognize the word """ all_lemmas_for_this_word = [lemma for ss in wn.synsets(word) for lemma in ss.lemmas() if lemma.name() == word] all_related_lemmas = [lemma for lemma in all_lemmas_for_this_word] new_lemmas = [] for lemma in all_lemmas_for_this_word: for new_lemma in (lemma.derivationally_related_forms() + lemma.pertainyms()): if (not belongs(new_lemma, all_related_lemmas) and not belongs(new_lemma, new_lemmas)): new_lemmas.append(new_lemma) while len(new_lemmas) > 0: all_lemmas_for_new_words = [] for new_lemma in new_lemmas: word = new_lemma.name() all_lemmas_for_this_word = [lemma for ss in wn.synsets(word) for lemma in ss.lemmas() if lemma.name() == word] for lemma in all_lemmas_for_this_word: if not belongs(lemma, all_lemmas_for_new_words): all_lemmas_for_new_words.append(lemma) all_related_lemmas += all_lemmas_for_new_words new_lemmas = [] for lemma in all_lemmas_for_new_words: for new_lemma in (lemma.derivationally_related_forms() + lemma.pertainyms()): if (not belongs(new_lemma, all_related_lemmas) and not belongs(new_lemma, new_lemmas)): new_lemmas.append(new_lemma) return all_related_lemmas