Python nltk.corpus.wordnet.synset_from_pos_and_offset() Examples

The following are 7 code examples of nltk.corpus.wordnet.synset_from_pos_and_offset(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module nltk.corpus.wordnet , or try the search function .
Example #1
Source File: wordnet.py    From wordkit with GNU General Public License v3.0 6 votes vote down vote up
def fit(self, X):
        """Fit the transformer."""
        super().fit(X)
        X = self._unpack(X)

        related = Counter()
        for x in X:
            for (offset, pos) in x:
                s = wn.synset_from_pos_and_offset(pos, int(offset))
                related.update(self.recursive_related(s))

        if self.prune:
            related = {k: v for k, v in related.items() if v > 1}
        self.feature_names = set(chain.from_iterable(X))
        self.features = {k: idx for idx, k in enumerate(related)}
        self.vec_len = len(self.features)
        return self 
Example #2
Source File: definition_preprocessor.py    From EWISE with Apache License 2.0 6 votes vote down vote up
def procecss_wn18_definitions(self, wn18_path=""):
        definitions_file = os.path.join(wn18_path, "wordnet-mlj12-definitions.txt")
        with open(definitions_file) as f:
            lines = f.readlines()

        pos_conversion_map = {'NN':'n', 'VB':'v', 'JJ':'a', 'RB':'r'}
        self.definition_map = {}
        n_empty_definitions = 0
        for line in tqdm(lines):
            synset_offset, synset_tag, _ = line.split('\t')
            #definition_map[synset_idx.strip()] = definition.strip()
            #fetch definition from wordnet
            pos = synset_tag.split('_')[-2]
            wn_ss = wn.synset_from_pos_and_offset(pos_conversion_map[pos], int(synset_offset))
            definition = wn_ss.definition().strip()
            if len(definition) == 0:
                n_empty_definitions = n_empty_definitions + 1

            self.definition_map[synset_offset.strip()] = definition

        print ("#Empty definitions {}/{}".format(n_empty_definitions, len(self.definition_map)))
        synsets = sorted(self.definition_map.keys())
        self.synset_to_idx = {v:i for i,v in enumerate(synsets)}
        self.idx_to_synset = {v:i for i,v in self.synset_to_idx.items()}
        self.definitions = [self.definition_map[k] for k in synsets] 
Example #3
Source File: sentiwordnet.py    From V1EngineeringInc-Docs with Creative Commons Attribution Share Alike 4.0 International 6 votes vote down vote up
def senti_synset(self, *vals):
        from nltk.corpus import wordnet as wn

        if tuple(vals) in self._db:
            pos_score, neg_score = self._db[tuple(vals)]
            pos, offset = vals
            if pos == 's':
                pos = 'a'
            synset = wn.synset_from_pos_and_offset(pos, offset)
            return SentiSynset(pos_score, neg_score, synset)
        else:
            synset = wn.synset(vals[0])
            pos = synset.pos()
            if pos == 's':
                pos = 'a'
            offset = synset.offset()
            if (pos, offset) in self._db:
                pos_score, neg_score = self._db[(pos, offset)]
                return SentiSynset(pos_score, neg_score, synset)
            else:
                return None 
Example #4
Source File: NLP.py    From Financial-NLP with Apache License 2.0 5 votes vote down vote up
def id2ss(self,ID):
        """
        Parameters
            ----------
            ID : str
                the id of a chinese word found in zh_wordnet.
        Returns
            ----------
            nltk.corpus.reader.wordnet.Synset
                an object in en_wordnet.
        """
        return wn.synset_from_pos_and_offset(str(ID[-1:]), int(ID[:8])) 
Example #5
Source File: wordnet.py    From wordkit with GNU General Public License v3.0 5 votes vote down vote up
def vectorize(self, x):
        """Vectorize a word."""
        vec = np.zeros(self.vec_len)
        for (offset, pos) in x:
            s = wn.synset_from_pos_and_offset(pos, int(offset))
            res = self.recursive_related(s)
            if self.prune:
                res = [x for x in res if x in self.features]
            s = [self.features[s] for s in res]
            vec[s] = 1

        return vec 
Example #6
Source File: make_induced_graph.py    From DGP with MIT License 5 votes vote down vote up
def getnode(x):
    return wn.synset_from_pos_and_offset('n', int(x[1:])) 
Example #7
Source File: sentiwordnet.py    From V1EngineeringInc-Docs with Creative Commons Attribution Share Alike 4.0 International 5 votes vote down vote up
def all_senti_synsets(self):
        from nltk.corpus import wordnet as wn

        for key, fields in self._db.items():
            pos, offset = key
            pos_score, neg_score = fields
            synset = wn.synset_from_pos_and_offset(pos, offset)
            yield SentiSynset(pos_score, neg_score, synset)