Python nltk.corpus.brown.sents() Examples
The following are 6
code examples of nltk.corpus.brown.sents().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
nltk.corpus.brown
, or try the search function
.
Example #1
Source File: tnt.py From razzy-spinner with GNU General Public License v3.0 | 6 votes |
def demo(): from nltk.corpus import brown sents = list(brown.tagged_sents()) test = list(brown.sents()) # create and train the tagger tagger = TnT() tagger.train(sents[200:1000]) # tag some data tagged_data = tagger.tagdata(test[100:120]) # print results for j in range(len(tagged_data)): s = tagged_data[j] t = sents[j+100] for i in range(len(s)): print(s[i],'--', t[i]) print()
Example #2
Source File: short_sentence_similarity.py From Semantic-Texual-Similarity-Toolkits with MIT License | 6 votes |
def info_content(lookup_word): """ Uses the Brown corpus available in NLTK to calculate a Laplace smoothed frequency distribution of words, then uses this information to compute the information content of the lookup_word. """ global N if N == 0: # poor man's lazy evaluation for sent in brown.sents(): for word in sent: word = word.lower() if not word in brown_freqs: brown_freqs[word] = 0 brown_freqs[word] = brown_freqs[word] + 1 N = N + 1 lookup_word = lookup_word.lower() n = 0 if not lookup_word in brown_freqs else brown_freqs[lookup_word] return 1.0 - (math.log(n + 1) / math.log(N + 1))
Example #3
Source File: tnt.py From luscan-devel with GNU General Public License v2.0 | 6 votes |
def demo(): from nltk.tag import tnt from nltk.corpus import brown sents = list(brown.tagged_sents()) test = list(brown.sents()) # create and train the tagger tagger = tnt.TnT() tagger.train(sents[200:1000]) # tag some data tagged_data = tagger.tagdata(test[100:120]) # print results for j in range(len(tagged_data)): s = tagged_data[j] t = sents[j+100] for i in range(len(s)): print s[i],'--', t[i] print
Example #4
Source File: tnt.py From V1EngineeringInc-Docs with Creative Commons Attribution Share Alike 4.0 International | 6 votes |
def demo(): from nltk.corpus import brown sents = list(brown.tagged_sents()) test = list(brown.sents()) # create and train the tagger tagger = TnT() tagger.train(sents[200:1000]) # tag some data tagged_data = tagger.tagdata(test[100:120]) # print results for j in range(len(tagged_data)): s = tagged_data[j] t = sents[j + 100] for i in range(len(s)): print(s[i], '--', t[i]) print()
Example #5
Source File: data_load.py From neural_tokenizer with MIT License | 5 votes |
def load_data(mode="train"): word2idx, idx2word = load_vocab() from nltk.corpus import brown sents = [" ".join(words) for words in brown.sents()] xs, ys = [], [] for sent in sents: sent = re.sub(r"[^ A-Za-z']", "", sent) if hp.minlen <= len(sent) <= hp.maxlen: x, y = [], [] for word in sent.split(): for char in word: x.append(word2idx[char]) y.append(0) # 0: no space y[-1] = 1 # space for end of a word y[-1] = 0 # no space for end of sentence xs.append(x + [0] * (hp.maxlen-len(x))) ys.append(y + [0] * (hp.maxlen-len(x))) # Convert to ndarrays X = np.array(xs, np.int32) Y = np.array(ys, np.int32) # mode if mode=="train": X, Y = X[: int(len(X) * .8)], Y[: int(len(Y) * .8)] # X, Y = X[: 128], Y[: 128] elif mode=="val": X, Y = X[int(len(X) * .8): -int(len(X) * .1)], Y[int(len(X) * .8): -int(len(X) * .1)] else: X, Y = X[-int(len(X) * .1):], Y[-int(len(X) * .1):] return X, Y
Example #6
Source File: do_benchmark.py From PyRATA with Apache License 2.0 | 4 votes |
def test_clause(): """ """ print ('Measuring time performance on # {} sentences over # {} iterations for recognizing Clause'.format(size, iteration_number)) from nltk.corpus import brown brown_sents = brown.sents()[:size] import nltk global brown_pos_tag_sents brown_pos_tag_sents = [nltk.pos_tag(sentence) for sentence in brown_sents] #print (brown_pos_tag_sents[0]) # ---------------------------------------------------- # nltk_parser # ---------------------------------------------------- analyzer_name='nltk_parser' times, averagetime, mintime = measure_time(nltk_parse_clause_in_the_whole_text, iteration_number) grammar = "clause" print ('{}\t{}\t{}\t{}'.format(analyzer_name, grammar, averagetime, mintime)) # ---------------------------------------------------- # pyrata # ---------------------------------------------------- analyzer_name='pyrata' global sentences_dict_list_list sentences_dict_list_list = [] for s in brown_pos_tag_sents: sentences_dict_list_list.append([{'raw':w, 'pos':p} for (w, p) in s]) # data -> sentences_dict_list_list #data = data[0] # flatten a list of list i.e. sentences of words becomes a text of words # data = [val for sublist in data for val in sublist] #print (data[:10]) #print ('len(data):', len(data)) times, averagetime, mintime = measure_time(pyrata_recognize_clause_in_the_whole_text, iteration_number) grammar = "clause" print ('{}\t{}\t{}\t{}'.format(analyzer_name, grammar, averagetime, mintime))