Python nltk.corpus.wordnet.NOUN Examples

The following are 30 code examples of nltk.corpus.wordnet.NOUN(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module nltk.corpus.wordnet , or try the search function .
Example #1
Source File: laplacian_tags.py    From jingwei with MIT License 7 votes vote down vote up
def tag_semantic_similarity(x, y, ic):
    mx = wn.morphy(x)
    my = wn.morphy(y)

    if mx is None or my is None:
        return 0

    synX = wn.synsets(mx, pos=wn.NOUN)
    synY = wn.synsets(my, pos=wn.NOUN)

    if len(synX) > 0 and len(synY) > 0:
        maxSim = synX[0].lin_similarity(synY[0], ic)
    else:
        maxSim = 0

    return maxSim 
Example #2
Source File: _common.py    From tmtoolkit with Apache License 2.0 6 votes vote down vote up
def pos_tag_convert_penn_to_wn(tag):
    """
    Convert POS tag from Penn tagset to WordNet tagset.

    :param tag: a tag from Penn tagset
    :return: a tag from WordNet tagset or None if no corresponding tag could be found
    """
    from nltk.corpus import wordnet as wn

    if tag in ['JJ', 'JJR', 'JJS']:
        return wn.ADJ
    elif tag in ['RB', 'RBR', 'RBS']:
        return wn.ADV
    elif tag in ['NN', 'NNS', 'NNP', 'NNPS']:
        return wn.NOUN
    elif tag in ['VB', 'VBD', 'VBG', 'VBN', 'VBP', 'VBZ']:
        return wn.VERB
    return None 
Example #3
Source File: syngraph.py    From atap with Apache License 2.0 6 votes vote down vote up
def graph_synsets(terms, pos=wn.NOUN, depth=2):
    """
    Create a networkx graph of the given terms to the given depth.
    """

    G = nx.Graph(
        name="WordNet Synsets Graph for {}".format(", ".join(terms)), depth=depth,
    )

    def add_term_links(G, term, current_depth):
        for syn in wn.synsets(term):
            for name in syn.lemma_names():
                G.add_edge(term, name)
                if current_depth < depth:
                    add_term_links(G, name, current_depth+1)

    for term in terms:
        add_term_links(G, term, 0)

    return G 
Example #4
Source File: deploy.py    From Election-Meddling with MIT License 6 votes vote down vote up
def get_wordnet_pos(self,treebank_tag):
        """
        return WORDNET POS compliance to WORDENT lemmatization (a,n,r,v) 
        """

        if treebank_tag.startswith('J'):
            return wordnet.ADJ

        elif treebank_tag.startswith('V'):
            return wordnet.VERB

        elif treebank_tag.startswith('N'):
            return wordnet.NOUN

        elif treebank_tag.startswith('R'):
            return wordnet.ADV

        else:
            return wordnet.NOUN 
Example #5
Source File: wn_persistent_api.py    From combine-FEVER-NSMN with MIT License 6 votes vote down vote up
def convert_to_wn_pos(pos):
    if pos.startswith("J"):
        return wn.ADJ
    elif pos.startswith("V"):
        return wn.VERB
    elif pos.startswith("N"):
        return wn.NOUN
    elif pos.startswith("R"):
        return wn.ADV
    else:
        return "" 
Example #6
Source File: normalization.py    From text-analytics-with-python with Apache License 2.0 6 votes vote down vote up
def pos_tag_text(text):
    
    def penn_to_wn_tags(pos_tag):
        if pos_tag.startswith('J'):
            return wn.ADJ
        elif pos_tag.startswith('V'):
            return wn.VERB
        elif pos_tag.startswith('N'):
            return wn.NOUN
        elif pos_tag.startswith('R'):
            return wn.ADV
        else:
            return None
    
    tagged_text = tag(text)
    tagged_lower_text = [(word.lower(), penn_to_wn_tags(pos_tag))
                         for word, pos_tag in
                         tagged_text]
    return tagged_lower_text
    
# lemmatize text based on POS tags 
Example #7
Source File: normalization.py    From text-analytics-with-python with Apache License 2.0 6 votes vote down vote up
def pos_tag_text(text):
    
    def penn_to_wn_tags(pos_tag):
        if pos_tag.startswith('J'):
            return wn.ADJ
        elif pos_tag.startswith('V'):
            return wn.VERB
        elif pos_tag.startswith('N'):
            return wn.NOUN
        elif pos_tag.startswith('R'):
            return wn.ADV
        else:
            return None
    
    tagged_text = tag(text)
    tagged_lower_text = [(word.lower(), penn_to_wn_tags(pos_tag))
                         for word, pos_tag in
                         tagged_text]
    return tagged_lower_text
    
# lemmatize text based on POS tags 
Example #8
Source File: normalization.py    From text-analytics-with-python with Apache License 2.0 6 votes vote down vote up
def pos_tag_text(text):
    
    def penn_to_wn_tags(pos_tag):
        if pos_tag.startswith('J'):
            return wn.ADJ
        elif pos_tag.startswith('V'):
            return wn.VERB
        elif pos_tag.startswith('N'):
            return wn.NOUN
        elif pos_tag.startswith('R'):
            return wn.ADV
        else:
            return None
    
    tagged_text = tag(text)
    tagged_lower_text = [(word.lower(), penn_to_wn_tags(pos_tag))
                         for word, pos_tag in
                         tagged_text]
    return tagged_lower_text
    
# lemmatize text based on POS tags 
Example #9
Source File: normalization.py    From text-analytics-with-python with Apache License 2.0 6 votes vote down vote up
def pos_tag_text(text):
    
    def penn_to_wn_tags(pos_tag):
        if pos_tag.startswith('J'):
            return wn.ADJ
        elif pos_tag.startswith('V'):
            return wn.VERB
        elif pos_tag.startswith('N'):
            return wn.NOUN
        elif pos_tag.startswith('R'):
            return wn.ADV
        else:
            return None
    
    tagged_text = tag(text)
    tagged_lower_text = [(word.lower(), penn_to_wn_tags(pos_tag))
                         for word, pos_tag in
                         tagged_text]
    return tagged_lower_text
    
# lemmatize text based on POS tags 
Example #10
Source File: dcs.py    From broca with MIT License 5 votes vote down vote up
def _disambiguate_doc(self, tagged_tokens):
        """
        Takes a list of tagged tokens, representing a document,
        in the form:

            [(token, tag), ...]

        And returns a mapping of terms to their disambiguated concepts (synsets).
        """

        # Group tokens by PoS
        pos_groups = {pos: [] for pos in [wn.NOUN, wn.VERB, wn.ADJ, wn.ADV]}
        for tok, tag in tagged_tokens:
            if tag in pos_groups:
                pos_groups[tag].append(tok)

        #print(pos_groups)

        # Map of final term -> concept mappings
        map = {}
        for tag, toks in pos_groups.items():
            map.update(self._disambiguate_pos(toks, tag))

        #nice_map = {k: map[k].lemma_names() for k in map.keys()}
        #print(json.dumps(nice_map, indent=4, sort_keys=True))

        return map 
Example #11
Source File: wordnet_app.py    From luscan-devel with GNU General Public License v2.0 5 votes vote down vote up
def _pos_tuples():
    return [
        (wn.NOUN,'N','noun'),
        (wn.VERB,'V','verb'),
        (wn.ADJ,'J','adj'),
        (wn.ADV,'R','adv')] 
Example #12
Source File: normalize.py    From atap with Apache License 2.0 5 votes vote down vote up
def lemmatize(self, token, pos_tag):
        tag = {
            'N': wn.NOUN,
            'V': wn.VERB,
            'R': wn.ADV,
            'J': wn.ADJ
        }.get(pos_tag[0], wn.NOUN)

        return self.lemmatizer.lemmatize(token, tag) 
Example #13
Source File: preprocess_lst_test.py    From lexsub with Apache License 2.0 5 votes vote down vote up
def lemmatize(pairs):
    triples = []
    for pair in pairs:
        word = pair[0]
        pos = pair[1]
        wordnet_pos = wordnet.NOUN
        if (len(pos)>=2):
            pos_prefix = pos[:2]
            if (pos_prefix in to_wordnet_pos):
                wordnet_pos = to_wordnet_pos[pos_prefix]
        lemma = WordNetLemmatizer().lemmatize(word, wordnet_pos).lower();
        triples.append([word, wordnet_pos, lemma])
    return triples 
Example #14
Source File: __init__.py    From wordai with MIT License 5 votes vote down vote up
def _sentence_to_mongo(typ, items):
    import nltk
    from nltk.corpus import wordnet

    def wordnet_pos(tag):
        if tag.startswith('J'):
            return wordnet.ADJ
        elif tag.startswith('V'):
            return wordnet.VERB
        elif tag.startswith('N'):
            return wordnet.NOUN
        elif tag.startswith('R'):
            return wordnet.ADV
        else:
            return wordnet.NOUN

    # nltk.download('punkt')
    nltk.download('averaged_perceptron_tagger')
    nltk.download('stopwords')
    nltk.download('wordnet')
    nltk.download('punkt')
    stop_words = set(nltk.corpus.stopwords.words('english'))
    stemmer = nltk.stem.WordNetLemmatizer()
    sentences = []
    for trans in items:
        eng, chn = trans.getsource(), trans.gettarget()
        tokens = nltk.word_tokenize(eng)
        pos_tag = [pos[1] for pos in nltk.pos_tag(tokens)]
        roots = [stemmer.lemmatize(word, wordnet_pos(pos_tag[idx])) for idx, word in enumerate(tokens)]
        cleanword = [token for token in roots if token.isalpha() and token not in stop_words and len(token) >= 3]
        # remove duplicates
        clean_word = list(dict.fromkeys(cleanword))
        if len(clean_word) > 0:
            score = Word.search_words(*clean_word).sum('star') / len(clean_word)
        else:
            score = -1
        sentence = Sentence(eng=eng, chn=chn, words=tokens, pos_tag=pos_tag, roots=roots, score=score, typ=typ)
        sentences.append(sentence)
        if len(sentences) > 50:
            Sentence.objects.insert(sentences)
            sentences = [] 
Example #15
Source File: transformer.py    From atap with Apache License 2.0 5 votes vote down vote up
def lemmatize(self, token, pos_tag):
        tag = {
            'N': wn.NOUN,
            'V': wn.VERB,
            'R': wn.ADV,
            'J': wn.ADJ
        }.get(pos_tag[0], wn.NOUN)

        return self.lemmatizer.lemmatize(token, tag) 
Example #16
Source File: build.py    From atap with Apache License 2.0 5 votes vote down vote up
def lemmatize(self, token, pos_tag):
        tag = {
            'N': wn.NOUN,
            'V': wn.VERB,
            'R': wn.ADV,
            'J': wn.ADJ
        }.get(pos_tag[0], wn.NOUN)

        return self.lemmatizer.lemmatize(token, tag) 
Example #17
Source File: synset_analysis.py    From Quadflor with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def __init__(self):
        NltkNormalizer.install_nltk_corpora('averaged_perceptron_tagger')
        self.normalizer = NltkNormalizer()
        self.lem = nltk.WordNetLemmatizer()
        self.tagger = nltk.PerceptronTagger()
        self.translation_dict = {'J': wn.ADJ, 'N': wn.NOUN, 'R': wn.ADV, 'V': wn.VERB} 
Example #18
Source File: agglomerative.py    From atap with Apache License 2.0 5 votes vote down vote up
def wnpos(tag):
    # Return the WordNet POS tag from the Penn Treebank tag
    return {
        'N': wn.NOUN,
        'V': wn.VERB,
        'R': wn.ADV,
        'J': wn.ADJ
    }.get(tag[0], wn.NOUN) 
Example #19
Source File: preprocessing.py    From TextRank with MIT License 5 votes vote down vote up
def __get_wordnet_pos(treebank_tag):
        """Maps the treebank tags to WordNet part of speech names"""
        if treebank_tag.startswith('J'):
            return wordnet.ADJ
        elif treebank_tag.startswith('V'):
            return wordnet.VERB
        elif treebank_tag.startswith('N'):
            return wordnet.NOUN
        elif treebank_tag.startswith('R'):
            return wordnet.ADV
        else:
            return None 
Example #20
Source File: kmeans.py    From atap with Apache License 2.0 5 votes vote down vote up
def wnpos(tag):
    # Return the WordNet POS tag from the Penn Treebank tag
    return {
        'N': wn.NOUN,
        'V': wn.VERB,
        'R': wn.ADV,
        'J': wn.ADJ
    }.get(tag[0], wn.NOUN) 
Example #21
Source File: transformers.py    From atap with Apache License 2.0 5 votes vote down vote up
def lemmatize(self, token, pos_tag):
        tag = {
            'N': wn.NOUN,
            'V': wn.VERB,
            'R': wn.ADV,
            'J': wn.ADJ
        }.get(pos_tag[0], wn.NOUN)

        return self.lemmatizer.lemmatize(token, tag) 
Example #22
Source File: transformer.py    From atap with Apache License 2.0 5 votes vote down vote up
def lemmatize(self, token, pos_tag):
        tag = {
            'N': wn.NOUN,
            'V': wn.VERB,
            'R': wn.ADV,
            'J': wn.ADJ
        }.get(pos_tag[0], wn.NOUN)

        return self.lemmatizer.lemmatize(token, tag) 
Example #23
Source File: sentiwordnet.py    From yenlp with GNU General Public License v3.0 5 votes vote down vote up
def wordnet_pos_code(tag):
    '''Translation from nltk tags to Wordnet code'''
    if tag.startswith('NN'):
        return wordnet.NOUN
    elif tag.startswith('VB'):
        return wordnet.VERB
    elif tag.startswith('JJ'):
        return wordnet.ADJ
    elif tag.startswith('RB'):
        return wordnet.ADV
    else:
        return '' 
Example #24
Source File: test_preprocess_func.py    From tmtoolkit with Apache License 2.0 5 votes vote down vote up
def test_pos_tag_convert_penn_to_wn():
    assert pos_tag_convert_penn_to_wn('JJ') == wn.ADJ
    assert pos_tag_convert_penn_to_wn('RB') == wn.ADV
    assert pos_tag_convert_penn_to_wn('NN') == wn.NOUN
    assert pos_tag_convert_penn_to_wn('VB') == wn.VERB

    for tag in ('', 'invalid', None):
        assert pos_tag_convert_penn_to_wn(tag) is None 
Example #25
Source File: wordnet_app.py    From razzy-spinner with GNU General Public License v3.0 5 votes vote down vote up
def _pos_tuples():
    return [
        (wn.NOUN,'N','noun'),
        (wn.VERB,'V','verb'),
        (wn.ADJ,'J','adj'),
        (wn.ADV,'R','adv')] 
Example #26
Source File: main.py    From tensorflow-XNN with MIT License 5 votes vote down vote up
def get_wordnet_pos(treebank_tag):
    if treebank_tag.startswith('J'):
        return wordnet.ADJ
    elif treebank_tag.startswith('V'):
        return wordnet.VERB
    elif treebank_tag.startswith('N'):
        return wordnet.NOUN
    elif treebank_tag.startswith('R'):
        return wordnet.ADV
    else:
        return None 
Example #27
Source File: main.py    From tensorflow-XNN with MIT License 5 votes vote down vote up
def lemmatize_word(word, pos=wordnet.NOUN):
    return LEMMATIZER.lemmatize(word, pos) 
Example #28
Source File: main.py    From tensorflow-XNN with MIT License 5 votes vote down vote up
def lemmatize_sentence(sentence):
    res = []
    sentence_ = get_valid_words(sentence)
    for word, pos in pos_tag(sentence_):
        wordnet_pos = get_wordnet_pos(pos) or wordnet.NOUN
        res.append(lemmatize_word(word, pos=wordnet_pos))
    return res 
Example #29
Source File: check_availability.py    From jingwei with MIT License 5 votes vote down vote up
def check_robustpca(trainCollection, testCollection, feature):
    ready = True
    
    # check matlab    
    if not check_matlab():
        print_msg('RobustPCA (%s, %s, %s)' % (trainCollection, testCollection, feature), 'Matlab is not available or incorrectly configured.')
        ready = False
    
    # check if knn is available
    if not check_knn(trainCollection, testCollection, feature):
        print_msg('RobustPCA (%s, %s, %s)' % (trainCollection, testCollection, feature), 'KNN is not available.')        
        ready = False

    # check data files
    datafiles = [ os.path.join(ROOT_PATH, trainCollection, 'TextData', 'id.userid.lemmtags.txt'),
                  os.path.join(ROOT_PATH, trainCollection, 'FeatureData', feature)]
    res = find_missing_files(datafiles)
    if res:
        print_msg('RobustPCA (%s, %s, %s)' % (trainCollection, testCollection, feature), 'the following files or folders are missing:\n%s' % res)
        return False    
              
    # check external dependencies  
    try:
        import h5py
        import numpy
        import scipy.io
        import scipy.sparse
        from nltk.corpus import wordnet as wn
        from nltk.corpus import wordnet_ic
        brown_ic = wordnet_ic.ic('ic-brown.dat')
        wn.morphy('cat')
        wn.synsets('cat', pos=wn.NOUN)
    except Exception, e:
        try:
            import nltk
            nltk.download('brown')
            nltk.download('wordnet')
            nltk.download('wordnet_ic')
        except Exception, e:
            print e
            ready = False 
Example #30
Source File: wordnet_similarity.py    From jingwei with MIT License 5 votes vote down vote up
def wup_similarity(tagx, tagy):
    scores = []
    for pos in [wn.NOUN, wn.VERB, wn.ADJ, wn.ADJ_SAT, wn.ADV]:
        try:
            synsetx = wn.synset('%s.%s.01' % (tagx,pos))
            synsety = wn.synset('%s.%s.01' % (tagy,pos))
            score = synsetx.wup_similarity(synsety)
            if score is None:
                score = 0
        except Exception, e:
            score = 0
        scores.append(score)