Python nltk.ConditionalFreqDist() Examples
The following are 6
code examples of nltk.ConditionalFreqDist().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
nltk
, or try the search function
.
Example #1
Source File: categories.py From nltk_teach with Apache License 2.0 | 6 votes |
def build_word_associations(): cfd = nltk.ConditionalFreqDist() # get a list of all English stop words stopwords_list = nltk.corpus.stopwords.words('english') # count words that occur within a window of size 5 ahead of other words for sentence in nltk.corpus.brown.tagged_sents(): sentence = [(token.lower(), tag) for (token, tag) in sentence if token.lower() not in stopwords_list] for (index, (token, tag)) in enumerate(sentence): if token not in stopwords_list: window = sentence[index+1:index+5] for (window_token, window_tag) in window: if window_token not in stopwords_list and window_tag[0] is tag[0]: cfd[token].inc(window_token) return cfd
Example #2
Source File: sentiwordnet.py From yenlp with GNU General Public License v3.0 | 6 votes |
def word_sense_cdf(word, context, wn_pos): '''Word sense disambiguation in terms of matching words frequency between the context each sense's definition. Adapted from www.slideshare.net/faigg/tutotial-of-sentiment-analysis''' senses = wordnet.synsets(word, wn_pos) if len(senses) > 0: cfd = nltk.ConditionalFreqDist((sense, def_word) for sense in senses for def_word in sense.definition().split() if def_word in context) best_sense = senses[0] for sense in senses: try: if cfd[sense].max() > cfd[best_sense].max(): best_sense = sense except: pass return best_sense else: return None
Example #3
Source File: test_cfd_mutation.py From V1EngineeringInc-Docs with Creative Commons Attribution Share Alike 4.0 International | 6 votes |
def test_increment(self): # make sure that we can still mutate cfd normally text = "cow cat mouse cat tiger" cfd = ConditionalFreqDist() # create cfd with word length as condition for word in tokenize.word_tokenize(text): condition = len(word) cfd[condition][word] += 1 self.assertEqual(cfd.conditions(), [3,5]) # incrementing previously unseen key is still possible cfd[2]['hi'] += 1 self.assertEqual(set(cfd.conditions()),set([3,5,2])) # new condition added self.assertEqual(cfd[2]['hi'], 1) # key's frequency incremented from 0 (unseen) to 1
Example #4
Source File: hmm.py From deep_disfluency with MIT License | 5 votes |
def train_markov_model_from_constraint_matrix(self, csv_path, mm_path, delim="\t"): table = [line.split(delim) for line in open(csv_path)] tags = [] range_states = table.pop(0)[1:] for row in table: domain = row[0] for i, r in enumerate(row[1:]): s = r.replace(" ", "").strip("\n") if (s == ''): continue if int(s) > 0: for _ in range(0, int(s)): tags.append((domain, range_states[i])) self.cfd_tags = nltk.ConditionalFreqDist(tags) print "cfd trained, counts:" self.cfd_tags.tabulate() print "test:" print tabulate_cfd(self.cfd_tags) # save this new cfd for later use pickle.dump(self.cfd_tags, open(mm_path, "wb")) # initialize the cpd self.cpd_tags = nltk.ConditionalProbDist(self.cfd_tags, nltk.MLEProbDist) # print "cpd summary:" # print self.cpd_tags.viewitems() print tabulate_cfd(self.cpd_tags) all_outcomes = [v.keys() for v in self.cfd_tags.values()] self.tag_set = set(self.cfd_tags.keys() + [y for x in all_outcomes for y in x]) self.viterbi_init() # initialize viterbi
Example #5
Source File: test_cfd_mutation.py From V1EngineeringInc-Docs with Creative Commons Attribution Share Alike 4.0 International | 5 votes |
def test_tabulate(self): empty = ConditionalFreqDist() self.assertEqual(empty.conditions(),[]) try: empty.tabulate(conditions="BUG") # nonexistent keys shouldn't be added except: pass self.assertEqual(empty.conditions(), [])
Example #6
Source File: test_cfd_mutation.py From V1EngineeringInc-Docs with Creative Commons Attribution Share Alike 4.0 International | 5 votes |
def test_plot(self): empty = ConditionalFreqDist() self.assertEqual(empty.conditions(),[]) try: empty.plot(conditions=["BUG"]) # nonexistent keys shouldn't be added except: pass self.assertEqual(empty.conditions(),[])