Python nltk.corpus.wordnet.NOUN Examples
The following are 30
code examples of nltk.corpus.wordnet.NOUN().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
nltk.corpus.wordnet
, or try the search function
.
Example #1
Source File: laplacian_tags.py From jingwei with MIT License | 7 votes |
def tag_semantic_similarity(x, y, ic): mx = wn.morphy(x) my = wn.morphy(y) if mx is None or my is None: return 0 synX = wn.synsets(mx, pos=wn.NOUN) synY = wn.synsets(my, pos=wn.NOUN) if len(synX) > 0 and len(synY) > 0: maxSim = synX[0].lin_similarity(synY[0], ic) else: maxSim = 0 return maxSim
Example #2
Source File: _common.py From tmtoolkit with Apache License 2.0 | 6 votes |
def pos_tag_convert_penn_to_wn(tag): """ Convert POS tag from Penn tagset to WordNet tagset. :param tag: a tag from Penn tagset :return: a tag from WordNet tagset or None if no corresponding tag could be found """ from nltk.corpus import wordnet as wn if tag in ['JJ', 'JJR', 'JJS']: return wn.ADJ elif tag in ['RB', 'RBR', 'RBS']: return wn.ADV elif tag in ['NN', 'NNS', 'NNP', 'NNPS']: return wn.NOUN elif tag in ['VB', 'VBD', 'VBG', 'VBN', 'VBP', 'VBZ']: return wn.VERB return None
Example #3
Source File: syngraph.py From atap with Apache License 2.0 | 6 votes |
def graph_synsets(terms, pos=wn.NOUN, depth=2): """ Create a networkx graph of the given terms to the given depth. """ G = nx.Graph( name="WordNet Synsets Graph for {}".format(", ".join(terms)), depth=depth, ) def add_term_links(G, term, current_depth): for syn in wn.synsets(term): for name in syn.lemma_names(): G.add_edge(term, name) if current_depth < depth: add_term_links(G, name, current_depth+1) for term in terms: add_term_links(G, term, 0) return G
Example #4
Source File: deploy.py From Election-Meddling with MIT License | 6 votes |
def get_wordnet_pos(self,treebank_tag): """ return WORDNET POS compliance to WORDENT lemmatization (a,n,r,v) """ if treebank_tag.startswith('J'): return wordnet.ADJ elif treebank_tag.startswith('V'): return wordnet.VERB elif treebank_tag.startswith('N'): return wordnet.NOUN elif treebank_tag.startswith('R'): return wordnet.ADV else: return wordnet.NOUN
Example #5
Source File: wn_persistent_api.py From combine-FEVER-NSMN with MIT License | 6 votes |
def convert_to_wn_pos(pos): if pos.startswith("J"): return wn.ADJ elif pos.startswith("V"): return wn.VERB elif pos.startswith("N"): return wn.NOUN elif pos.startswith("R"): return wn.ADV else: return ""
Example #6
Source File: normalization.py From text-analytics-with-python with Apache License 2.0 | 6 votes |
def pos_tag_text(text): def penn_to_wn_tags(pos_tag): if pos_tag.startswith('J'): return wn.ADJ elif pos_tag.startswith('V'): return wn.VERB elif pos_tag.startswith('N'): return wn.NOUN elif pos_tag.startswith('R'): return wn.ADV else: return None tagged_text = tag(text) tagged_lower_text = [(word.lower(), penn_to_wn_tags(pos_tag)) for word, pos_tag in tagged_text] return tagged_lower_text # lemmatize text based on POS tags
Example #7
Source File: normalization.py From text-analytics-with-python with Apache License 2.0 | 6 votes |
def pos_tag_text(text): def penn_to_wn_tags(pos_tag): if pos_tag.startswith('J'): return wn.ADJ elif pos_tag.startswith('V'): return wn.VERB elif pos_tag.startswith('N'): return wn.NOUN elif pos_tag.startswith('R'): return wn.ADV else: return None tagged_text = tag(text) tagged_lower_text = [(word.lower(), penn_to_wn_tags(pos_tag)) for word, pos_tag in tagged_text] return tagged_lower_text # lemmatize text based on POS tags
Example #8
Source File: normalization.py From text-analytics-with-python with Apache License 2.0 | 6 votes |
def pos_tag_text(text): def penn_to_wn_tags(pos_tag): if pos_tag.startswith('J'): return wn.ADJ elif pos_tag.startswith('V'): return wn.VERB elif pos_tag.startswith('N'): return wn.NOUN elif pos_tag.startswith('R'): return wn.ADV else: return None tagged_text = tag(text) tagged_lower_text = [(word.lower(), penn_to_wn_tags(pos_tag)) for word, pos_tag in tagged_text] return tagged_lower_text # lemmatize text based on POS tags
Example #9
Source File: normalization.py From text-analytics-with-python with Apache License 2.0 | 6 votes |
def pos_tag_text(text): def penn_to_wn_tags(pos_tag): if pos_tag.startswith('J'): return wn.ADJ elif pos_tag.startswith('V'): return wn.VERB elif pos_tag.startswith('N'): return wn.NOUN elif pos_tag.startswith('R'): return wn.ADV else: return None tagged_text = tag(text) tagged_lower_text = [(word.lower(), penn_to_wn_tags(pos_tag)) for word, pos_tag in tagged_text] return tagged_lower_text # lemmatize text based on POS tags
Example #10
Source File: dcs.py From broca with MIT License | 5 votes |
def _disambiguate_doc(self, tagged_tokens): """ Takes a list of tagged tokens, representing a document, in the form: [(token, tag), ...] And returns a mapping of terms to their disambiguated concepts (synsets). """ # Group tokens by PoS pos_groups = {pos: [] for pos in [wn.NOUN, wn.VERB, wn.ADJ, wn.ADV]} for tok, tag in tagged_tokens: if tag in pos_groups: pos_groups[tag].append(tok) #print(pos_groups) # Map of final term -> concept mappings map = {} for tag, toks in pos_groups.items(): map.update(self._disambiguate_pos(toks, tag)) #nice_map = {k: map[k].lemma_names() for k in map.keys()} #print(json.dumps(nice_map, indent=4, sort_keys=True)) return map
Example #11
Source File: wordnet_app.py From luscan-devel with GNU General Public License v2.0 | 5 votes |
def _pos_tuples(): return [ (wn.NOUN,'N','noun'), (wn.VERB,'V','verb'), (wn.ADJ,'J','adj'), (wn.ADV,'R','adv')]
Example #12
Source File: normalize.py From atap with Apache License 2.0 | 5 votes |
def lemmatize(self, token, pos_tag): tag = { 'N': wn.NOUN, 'V': wn.VERB, 'R': wn.ADV, 'J': wn.ADJ }.get(pos_tag[0], wn.NOUN) return self.lemmatizer.lemmatize(token, tag)
Example #13
Source File: preprocess_lst_test.py From lexsub with Apache License 2.0 | 5 votes |
def lemmatize(pairs): triples = [] for pair in pairs: word = pair[0] pos = pair[1] wordnet_pos = wordnet.NOUN if (len(pos)>=2): pos_prefix = pos[:2] if (pos_prefix in to_wordnet_pos): wordnet_pos = to_wordnet_pos[pos_prefix] lemma = WordNetLemmatizer().lemmatize(word, wordnet_pos).lower(); triples.append([word, wordnet_pos, lemma]) return triples
Example #14
Source File: __init__.py From wordai with MIT License | 5 votes |
def _sentence_to_mongo(typ, items): import nltk from nltk.corpus import wordnet def wordnet_pos(tag): if tag.startswith('J'): return wordnet.ADJ elif tag.startswith('V'): return wordnet.VERB elif tag.startswith('N'): return wordnet.NOUN elif tag.startswith('R'): return wordnet.ADV else: return wordnet.NOUN # nltk.download('punkt') nltk.download('averaged_perceptron_tagger') nltk.download('stopwords') nltk.download('wordnet') nltk.download('punkt') stop_words = set(nltk.corpus.stopwords.words('english')) stemmer = nltk.stem.WordNetLemmatizer() sentences = [] for trans in items: eng, chn = trans.getsource(), trans.gettarget() tokens = nltk.word_tokenize(eng) pos_tag = [pos[1] for pos in nltk.pos_tag(tokens)] roots = [stemmer.lemmatize(word, wordnet_pos(pos_tag[idx])) for idx, word in enumerate(tokens)] cleanword = [token for token in roots if token.isalpha() and token not in stop_words and len(token) >= 3] # remove duplicates clean_word = list(dict.fromkeys(cleanword)) if len(clean_word) > 0: score = Word.search_words(*clean_word).sum('star') / len(clean_word) else: score = -1 sentence = Sentence(eng=eng, chn=chn, words=tokens, pos_tag=pos_tag, roots=roots, score=score, typ=typ) sentences.append(sentence) if len(sentences) > 50: Sentence.objects.insert(sentences) sentences = []
Example #15
Source File: transformer.py From atap with Apache License 2.0 | 5 votes |
def lemmatize(self, token, pos_tag): tag = { 'N': wn.NOUN, 'V': wn.VERB, 'R': wn.ADV, 'J': wn.ADJ }.get(pos_tag[0], wn.NOUN) return self.lemmatizer.lemmatize(token, tag)
Example #16
Source File: build.py From atap with Apache License 2.0 | 5 votes |
def lemmatize(self, token, pos_tag): tag = { 'N': wn.NOUN, 'V': wn.VERB, 'R': wn.ADV, 'J': wn.ADJ }.get(pos_tag[0], wn.NOUN) return self.lemmatizer.lemmatize(token, tag)
Example #17
Source File: synset_analysis.py From Quadflor with BSD 3-Clause "New" or "Revised" License | 5 votes |
def __init__(self): NltkNormalizer.install_nltk_corpora('averaged_perceptron_tagger') self.normalizer = NltkNormalizer() self.lem = nltk.WordNetLemmatizer() self.tagger = nltk.PerceptronTagger() self.translation_dict = {'J': wn.ADJ, 'N': wn.NOUN, 'R': wn.ADV, 'V': wn.VERB}
Example #18
Source File: agglomerative.py From atap with Apache License 2.0 | 5 votes |
def wnpos(tag): # Return the WordNet POS tag from the Penn Treebank tag return { 'N': wn.NOUN, 'V': wn.VERB, 'R': wn.ADV, 'J': wn.ADJ }.get(tag[0], wn.NOUN)
Example #19
Source File: preprocessing.py From TextRank with MIT License | 5 votes |
def __get_wordnet_pos(treebank_tag): """Maps the treebank tags to WordNet part of speech names""" if treebank_tag.startswith('J'): return wordnet.ADJ elif treebank_tag.startswith('V'): return wordnet.VERB elif treebank_tag.startswith('N'): return wordnet.NOUN elif treebank_tag.startswith('R'): return wordnet.ADV else: return None
Example #20
Source File: kmeans.py From atap with Apache License 2.0 | 5 votes |
def wnpos(tag): # Return the WordNet POS tag from the Penn Treebank tag return { 'N': wn.NOUN, 'V': wn.VERB, 'R': wn.ADV, 'J': wn.ADJ }.get(tag[0], wn.NOUN)
Example #21
Source File: transformers.py From atap with Apache License 2.0 | 5 votes |
def lemmatize(self, token, pos_tag): tag = { 'N': wn.NOUN, 'V': wn.VERB, 'R': wn.ADV, 'J': wn.ADJ }.get(pos_tag[0], wn.NOUN) return self.lemmatizer.lemmatize(token, tag)
Example #22
Source File: transformer.py From atap with Apache License 2.0 | 5 votes |
def lemmatize(self, token, pos_tag): tag = { 'N': wn.NOUN, 'V': wn.VERB, 'R': wn.ADV, 'J': wn.ADJ }.get(pos_tag[0], wn.NOUN) return self.lemmatizer.lemmatize(token, tag)
Example #23
Source File: sentiwordnet.py From yenlp with GNU General Public License v3.0 | 5 votes |
def wordnet_pos_code(tag): '''Translation from nltk tags to Wordnet code''' if tag.startswith('NN'): return wordnet.NOUN elif tag.startswith('VB'): return wordnet.VERB elif tag.startswith('JJ'): return wordnet.ADJ elif tag.startswith('RB'): return wordnet.ADV else: return ''
Example #24
Source File: test_preprocess_func.py From tmtoolkit with Apache License 2.0 | 5 votes |
def test_pos_tag_convert_penn_to_wn(): assert pos_tag_convert_penn_to_wn('JJ') == wn.ADJ assert pos_tag_convert_penn_to_wn('RB') == wn.ADV assert pos_tag_convert_penn_to_wn('NN') == wn.NOUN assert pos_tag_convert_penn_to_wn('VB') == wn.VERB for tag in ('', 'invalid', None): assert pos_tag_convert_penn_to_wn(tag) is None
Example #25
Source File: wordnet_app.py From razzy-spinner with GNU General Public License v3.0 | 5 votes |
def _pos_tuples(): return [ (wn.NOUN,'N','noun'), (wn.VERB,'V','verb'), (wn.ADJ,'J','adj'), (wn.ADV,'R','adv')]
Example #26
Source File: main.py From tensorflow-XNN with MIT License | 5 votes |
def get_wordnet_pos(treebank_tag): if treebank_tag.startswith('J'): return wordnet.ADJ elif treebank_tag.startswith('V'): return wordnet.VERB elif treebank_tag.startswith('N'): return wordnet.NOUN elif treebank_tag.startswith('R'): return wordnet.ADV else: return None
Example #27
Source File: main.py From tensorflow-XNN with MIT License | 5 votes |
def lemmatize_word(word, pos=wordnet.NOUN): return LEMMATIZER.lemmatize(word, pos)
Example #28
Source File: main.py From tensorflow-XNN with MIT License | 5 votes |
def lemmatize_sentence(sentence): res = [] sentence_ = get_valid_words(sentence) for word, pos in pos_tag(sentence_): wordnet_pos = get_wordnet_pos(pos) or wordnet.NOUN res.append(lemmatize_word(word, pos=wordnet_pos)) return res
Example #29
Source File: check_availability.py From jingwei with MIT License | 5 votes |
def check_robustpca(trainCollection, testCollection, feature): ready = True # check matlab if not check_matlab(): print_msg('RobustPCA (%s, %s, %s)' % (trainCollection, testCollection, feature), 'Matlab is not available or incorrectly configured.') ready = False # check if knn is available if not check_knn(trainCollection, testCollection, feature): print_msg('RobustPCA (%s, %s, %s)' % (trainCollection, testCollection, feature), 'KNN is not available.') ready = False # check data files datafiles = [ os.path.join(ROOT_PATH, trainCollection, 'TextData', 'id.userid.lemmtags.txt'), os.path.join(ROOT_PATH, trainCollection, 'FeatureData', feature)] res = find_missing_files(datafiles) if res: print_msg('RobustPCA (%s, %s, %s)' % (trainCollection, testCollection, feature), 'the following files or folders are missing:\n%s' % res) return False # check external dependencies try: import h5py import numpy import scipy.io import scipy.sparse from nltk.corpus import wordnet as wn from nltk.corpus import wordnet_ic brown_ic = wordnet_ic.ic('ic-brown.dat') wn.morphy('cat') wn.synsets('cat', pos=wn.NOUN) except Exception, e: try: import nltk nltk.download('brown') nltk.download('wordnet') nltk.download('wordnet_ic') except Exception, e: print e ready = False
Example #30
Source File: wordnet_similarity.py From jingwei with MIT License | 5 votes |
def wup_similarity(tagx, tagy): scores = [] for pos in [wn.NOUN, wn.VERB, wn.ADJ, wn.ADJ_SAT, wn.ADV]: try: synsetx = wn.synset('%s.%s.01' % (tagx,pos)) synsety = wn.synset('%s.%s.01' % (tagy,pos)) score = synsetx.wup_similarity(synsety) if score is None: score = 0 except Exception, e: score = 0 scores.append(score)