Python nltk.corpus.wordnet.synset_from_pos_and_offset() Examples
The following are 7
code examples of nltk.corpus.wordnet.synset_from_pos_and_offset().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
nltk.corpus.wordnet
, or try the search function
.
Example #1
Source File: wordnet.py From wordkit with GNU General Public License v3.0 | 6 votes |
def fit(self, X): """Fit the transformer.""" super().fit(X) X = self._unpack(X) related = Counter() for x in X: for (offset, pos) in x: s = wn.synset_from_pos_and_offset(pos, int(offset)) related.update(self.recursive_related(s)) if self.prune: related = {k: v for k, v in related.items() if v > 1} self.feature_names = set(chain.from_iterable(X)) self.features = {k: idx for idx, k in enumerate(related)} self.vec_len = len(self.features) return self
Example #2
Source File: definition_preprocessor.py From EWISE with Apache License 2.0 | 6 votes |
def procecss_wn18_definitions(self, wn18_path=""): definitions_file = os.path.join(wn18_path, "wordnet-mlj12-definitions.txt") with open(definitions_file) as f: lines = f.readlines() pos_conversion_map = {'NN':'n', 'VB':'v', 'JJ':'a', 'RB':'r'} self.definition_map = {} n_empty_definitions = 0 for line in tqdm(lines): synset_offset, synset_tag, _ = line.split('\t') #definition_map[synset_idx.strip()] = definition.strip() #fetch definition from wordnet pos = synset_tag.split('_')[-2] wn_ss = wn.synset_from_pos_and_offset(pos_conversion_map[pos], int(synset_offset)) definition = wn_ss.definition().strip() if len(definition) == 0: n_empty_definitions = n_empty_definitions + 1 self.definition_map[synset_offset.strip()] = definition print ("#Empty definitions {}/{}".format(n_empty_definitions, len(self.definition_map))) synsets = sorted(self.definition_map.keys()) self.synset_to_idx = {v:i for i,v in enumerate(synsets)} self.idx_to_synset = {v:i for i,v in self.synset_to_idx.items()} self.definitions = [self.definition_map[k] for k in synsets]
Example #3
Source File: sentiwordnet.py From V1EngineeringInc-Docs with Creative Commons Attribution Share Alike 4.0 International | 6 votes |
def senti_synset(self, *vals): from nltk.corpus import wordnet as wn if tuple(vals) in self._db: pos_score, neg_score = self._db[tuple(vals)] pos, offset = vals if pos == 's': pos = 'a' synset = wn.synset_from_pos_and_offset(pos, offset) return SentiSynset(pos_score, neg_score, synset) else: synset = wn.synset(vals[0]) pos = synset.pos() if pos == 's': pos = 'a' offset = synset.offset() if (pos, offset) in self._db: pos_score, neg_score = self._db[(pos, offset)] return SentiSynset(pos_score, neg_score, synset) else: return None
Example #4
Source File: NLP.py From Financial-NLP with Apache License 2.0 | 5 votes |
def id2ss(self,ID): """ Parameters ---------- ID : str the id of a chinese word found in zh_wordnet. Returns ---------- nltk.corpus.reader.wordnet.Synset an object in en_wordnet. """ return wn.synset_from_pos_and_offset(str(ID[-1:]), int(ID[:8]))
Example #5
Source File: wordnet.py From wordkit with GNU General Public License v3.0 | 5 votes |
def vectorize(self, x): """Vectorize a word.""" vec = np.zeros(self.vec_len) for (offset, pos) in x: s = wn.synset_from_pos_and_offset(pos, int(offset)) res = self.recursive_related(s) if self.prune: res = [x for x in res if x in self.features] s = [self.features[s] for s in res] vec[s] = 1 return vec
Example #6
Source File: make_induced_graph.py From DGP with MIT License | 5 votes |
def getnode(x): return wn.synset_from_pos_and_offset('n', int(x[1:]))
Example #7
Source File: sentiwordnet.py From V1EngineeringInc-Docs with Creative Commons Attribution Share Alike 4.0 International | 5 votes |
def all_senti_synsets(self): from nltk.corpus import wordnet as wn for key, fields in self._db.items(): pos, offset = key pos_score, neg_score = fields synset = wn.synset_from_pos_and_offset(pos, offset) yield SentiSynset(pos_score, neg_score, synset)