Python nltk.ConditionalFreqDist() Examples

The following are 6 code examples of nltk.ConditionalFreqDist(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module nltk , or try the search function .
Example #1
Source File: categories.py    From nltk_teach with Apache License 2.0 6 votes vote down vote up
def build_word_associations():
    cfd = nltk.ConditionalFreqDist()

    # get a list of all English stop words
    stopwords_list = nltk.corpus.stopwords.words('english')

    # count words that occur within a window of size 5 ahead of other words
    for sentence in nltk.corpus.brown.tagged_sents():
        sentence = [(token.lower(), tag) for (token, tag) in sentence if token.lower() not in stopwords_list]
        for (index, (token, tag)) in enumerate(sentence):
            if token not in stopwords_list:
                window = sentence[index+1:index+5]
                for (window_token, window_tag) in window:
                    if window_token not in stopwords_list and window_tag[0] is tag[0]:
                        cfd[token].inc(window_token)
    return cfd 
Example #2
Source File: sentiwordnet.py    From yenlp with GNU General Public License v3.0 6 votes vote down vote up
def word_sense_cdf(word, context, wn_pos):
    '''Word sense disambiguation in terms of matching words frequency 
    between the context each sense's definition. Adapted from
    www.slideshare.net/faigg/tutotial-of-sentiment-analysis'''
    senses = wordnet.synsets(word, wn_pos)
    if len(senses) > 0:
        cfd = nltk.ConditionalFreqDist((sense, def_word)
                       for sense in senses
                       for def_word in sense.definition().split()
                       if def_word in context)
        best_sense = senses[0]
        for sense in senses:
            try:
                if cfd[sense].max() > cfd[best_sense].max():
                    best_sense = sense
            except: 
                pass                
        return best_sense
    else:
        return None 
Example #3
Source File: test_cfd_mutation.py    From V1EngineeringInc-Docs with Creative Commons Attribution Share Alike 4.0 International 6 votes vote down vote up
def test_increment(self):
        # make sure that we can still mutate cfd normally
        text = "cow cat mouse cat tiger"
        cfd = ConditionalFreqDist()

        # create cfd with word length as condition 
        for word in tokenize.word_tokenize(text):
            condition = len(word)
            cfd[condition][word] += 1

        self.assertEqual(cfd.conditions(), [3,5])

        # incrementing previously unseen key is still possible
        cfd[2]['hi'] += 1
        self.assertEqual(set(cfd.conditions()),set([3,5,2])) # new condition added
        self.assertEqual(cfd[2]['hi'], 1) # key's frequency incremented from 0 (unseen) to 1 
Example #4
Source File: hmm.py    From deep_disfluency with MIT License 5 votes vote down vote up
def train_markov_model_from_constraint_matrix(self, csv_path, mm_path,
                                                  delim="\t"):
        table = [line.split(delim) for line in open(csv_path)]
        tags = []
        range_states = table.pop(0)[1:]
        for row in table:
            domain = row[0]
            for i, r in enumerate(row[1:]):
                s = r.replace(" ", "").strip("\n")
                if (s == ''):
                    continue
                if int(s) > 0:
                    for _ in range(0, int(s)):
                        tags.append((domain, range_states[i]))
        self.cfd_tags = nltk.ConditionalFreqDist(tags)
        print "cfd trained, counts:"
        self.cfd_tags.tabulate()
        print "test:"
        print tabulate_cfd(self.cfd_tags)
        # save this new cfd for later use
        pickle.dump(self.cfd_tags, open(mm_path, "wb"))
        # initialize the cpd
        self.cpd_tags = nltk.ConditionalProbDist(self.cfd_tags,
                                                 nltk.MLEProbDist)
        # print "cpd summary:"
        # print self.cpd_tags.viewitems()
        print tabulate_cfd(self.cpd_tags)
        all_outcomes = [v.keys() for v in self.cfd_tags.values()]
        self.tag_set = set(self.cfd_tags.keys() +
                           [y for x in all_outcomes for y in x])
        self.viterbi_init()  # initialize viterbi 
Example #5
Source File: test_cfd_mutation.py    From V1EngineeringInc-Docs with Creative Commons Attribution Share Alike 4.0 International 5 votes vote down vote up
def test_tabulate(self):
        empty = ConditionalFreqDist()
        self.assertEqual(empty.conditions(),[])
        try:
            empty.tabulate(conditions="BUG") # nonexistent keys shouldn't be added
        except:
            pass
        self.assertEqual(empty.conditions(), []) 
Example #6
Source File: test_cfd_mutation.py    From V1EngineeringInc-Docs with Creative Commons Attribution Share Alike 4.0 International 5 votes vote down vote up
def test_plot(self):
        empty = ConditionalFreqDist()
        self.assertEqual(empty.conditions(),[])
        try:
            empty.plot(conditions=["BUG"]) # nonexistent keys shouldn't be added
        except:
            pass
        self.assertEqual(empty.conditions(),[])