Python keras.datasets.imdb.get_word_index() Examples
The following are 16
code examples of keras.datasets.imdb.get_word_index().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
keras.datasets.imdb
, or try the search function
.
Example #1
Source File: test_datasets.py From DeepLearning_Wavelet-LSTM with MIT License | 5 votes |
def test_reuters(): # only run data download tests 20% of the time # to speed up frequent testing random.seed(time.time()) if random.random() > 0.8: (x_train, y_train), (x_test, y_test) = reuters.load_data() assert len(x_train) == len(y_train) assert len(x_test) == len(y_test) assert len(x_train) + len(x_test) == 11228 (x_train, y_train), (x_test, y_test) = reuters.load_data(maxlen=10) assert len(x_train) == len(y_train) assert len(x_test) == len(y_test) word_index = reuters.get_word_index() assert isinstance(word_index, dict)
Example #2
Source File: test_datasets.py From DeepLearning_Wavelet-LSTM with MIT License | 5 votes |
def test_imdb(): # only run data download tests 20% of the time # to speed up frequent testing random.seed(time.time()) if random.random() > 0.8: (x_train, y_train), (x_test, y_test) = imdb.load_data() (x_train, y_train), (x_test, y_test) = imdb.load_data(maxlen=40) assert len(x_train) == len(y_train) assert len(x_test) == len(y_test) word_index = imdb.get_word_index() assert isinstance(word_index, dict)
Example #3
Source File: test_datasets.py From DeepLearning_Wavelet-LSTM with MIT License | 5 votes |
def test_reuters(): # only run data download tests 20% of the time # to speed up frequent testing random.seed(time.time()) if random.random() > 0.8: (x_train, y_train), (x_test, y_test) = reuters.load_data() assert len(x_train) == len(y_train) assert len(x_test) == len(y_test) assert len(x_train) + len(x_test) == 11228 (x_train, y_train), (x_test, y_test) = reuters.load_data(maxlen=10) assert len(x_train) == len(y_train) assert len(x_test) == len(y_test) word_index = reuters.get_word_index() assert isinstance(word_index, dict)
Example #4
Source File: test_datasets.py From DeepLearning_Wavelet-LSTM with MIT License | 5 votes |
def test_imdb(): # only run data download tests 20% of the time # to speed up frequent testing random.seed(time.time()) if random.random() > 0.8: (x_train, y_train), (x_test, y_test) = imdb.load_data() (x_train, y_train), (x_test, y_test) = imdb.load_data(maxlen=40) assert len(x_train) == len(y_train) assert len(x_test) == len(y_test) word_index = imdb.get_word_index() assert isinstance(word_index, dict)
Example #5
Source File: test_datasets.py From DeepLearning_Wavelet-LSTM with MIT License | 5 votes |
def test_reuters(): # only run data download tests 20% of the time # to speed up frequent testing random.seed(time.time()) if random.random() > 0.8: (x_train, y_train), (x_test, y_test) = reuters.load_data() assert len(x_train) == len(y_train) assert len(x_test) == len(y_test) assert len(x_train) + len(x_test) == 11228 (x_train, y_train), (x_test, y_test) = reuters.load_data(maxlen=10) assert len(x_train) == len(y_train) assert len(x_test) == len(y_test) word_index = reuters.get_word_index() assert isinstance(word_index, dict)
Example #6
Source File: test_datasets.py From DeepLearning_Wavelet-LSTM with MIT License | 5 votes |
def test_reuters(): # only run data download tests 20% of the time # to speed up frequent testing random.seed(time.time()) if random.random() > 0.8: (x_train, y_train), (x_test, y_test) = reuters.load_data() assert len(x_train) == len(y_train) assert len(x_test) == len(y_test) assert len(x_train) + len(x_test) == 11228 (x_train, y_train), (x_test, y_test) = reuters.load_data(maxlen=10) assert len(x_train) == len(y_train) assert len(x_test) == len(y_test) word_index = reuters.get_word_index() assert isinstance(word_index, dict)
Example #7
Source File: test_datasets.py From DeepLearning_Wavelet-LSTM with MIT License | 5 votes |
def test_imdb(): # only run data download tests 20% of the time # to speed up frequent testing random.seed(time.time()) if random.random() > 0.8: (x_train, y_train), (x_test, y_test) = imdb.load_data() (x_train, y_train), (x_test, y_test) = imdb.load_data(maxlen=40) assert len(x_train) == len(y_train) assert len(x_test) == len(y_test) word_index = imdb.get_word_index() assert isinstance(word_index, dict)
Example #8
Source File: test_datasets.py From DeepLearning_Wavelet-LSTM with MIT License | 5 votes |
def test_reuters(): # only run data download tests 20% of the time # to speed up frequent testing random.seed(time.time()) if random.random() > 0.8: (x_train, y_train), (x_test, y_test) = reuters.load_data() assert len(x_train) == len(y_train) assert len(x_test) == len(y_test) assert len(x_train) + len(x_test) == 11228 (x_train, y_train), (x_test, y_test) = reuters.load_data(maxlen=10) assert len(x_train) == len(y_train) assert len(x_test) == len(y_test) word_index = reuters.get_word_index() assert isinstance(word_index, dict)
Example #9
Source File: test_datasets.py From DeepLearning_Wavelet-LSTM with MIT License | 5 votes |
def test_imdb(): # only run data download tests 20% of the time # to speed up frequent testing random.seed(time.time()) if random.random() > 0.8: (x_train, y_train), (x_test, y_test) = imdb.load_data() (x_train, y_train), (x_test, y_test) = imdb.load_data(maxlen=40) assert len(x_train) == len(y_train) assert len(x_test) == len(y_test) word_index = imdb.get_word_index() assert isinstance(word_index, dict)
Example #10
Source File: test_datasets.py From DeepLearning_Wavelet-LSTM with MIT License | 5 votes |
def test_reuters(): # only run data download tests 20% of the time # to speed up frequent testing random.seed(time.time()) if random.random() > 0.8: (x_train, y_train), (x_test, y_test) = reuters.load_data() assert len(x_train) == len(y_train) assert len(x_test) == len(y_test) assert len(x_train) + len(x_test) == 11228 (x_train, y_train), (x_test, y_test) = reuters.load_data(maxlen=10) assert len(x_train) == len(y_train) assert len(x_test) == len(y_test) word_index = reuters.get_word_index() assert isinstance(word_index, dict)
Example #11
Source File: test_datasets.py From DeepLearning_Wavelet-LSTM with MIT License | 5 votes |
def test_reuters(): # only run data download tests 20% of the time # to speed up frequent testing random.seed(time.time()) if random.random() > 0.8: (x_train, y_train), (x_test, y_test) = reuters.load_data() assert len(x_train) == len(y_train) assert len(x_test) == len(y_test) assert len(x_train) + len(x_test) == 11228 (x_train, y_train), (x_test, y_test) = reuters.load_data(maxlen=10) assert len(x_train) == len(y_train) assert len(x_test) == len(y_test) word_index = reuters.get_word_index() assert isinstance(word_index, dict)
Example #12
Source File: test_datasets.py From DeepLearning_Wavelet-LSTM with MIT License | 5 votes |
def test_imdb(): # only run data download tests 20% of the time # to speed up frequent testing random.seed(time.time()) if random.random() > 0.8: (x_train, y_train), (x_test, y_test) = imdb.load_data() (x_train, y_train), (x_test, y_test) = imdb.load_data(maxlen=40) assert len(x_train) == len(y_train) assert len(x_test) == len(y_test) word_index = imdb.get_word_index() assert isinstance(word_index, dict)
Example #13
Source File: test_datasets.py From DeepLearning_Wavelet-LSTM with MIT License | 5 votes |
def test_reuters(): # only run data download tests 20% of the time # to speed up frequent testing random.seed(time.time()) if random.random() > 0.8: (x_train, y_train), (x_test, y_test) = reuters.load_data() assert len(x_train) == len(y_train) assert len(x_test) == len(y_test) assert len(x_train) + len(x_test) == 11228 (x_train, y_train), (x_test, y_test) = reuters.load_data(maxlen=10) assert len(x_train) == len(y_train) assert len(x_test) == len(y_test) word_index = reuters.get_word_index() assert isinstance(word_index, dict)
Example #14
Source File: test_datasets.py From DeepLearning_Wavelet-LSTM with MIT License | 5 votes |
def test_imdb(): # only run data download tests 20% of the time # to speed up frequent testing random.seed(time.time()) if random.random() > 0.8: (x_train, y_train), (x_test, y_test) = imdb.load_data() (x_train, y_train), (x_test, y_test) = imdb.load_data(maxlen=40) assert len(x_train) == len(y_train) assert len(x_test) == len(y_test) word_index = imdb.get_word_index() assert isinstance(word_index, dict)
Example #15
Source File: sentiment_cnn.py From CNN-for-Sentence-Classification-in-Keras with MIT License | 5 votes |
def load_data(data_source): assert data_source in ["keras_data_set", "local_dir"], "Unknown data source" if data_source == "keras_data_set": (x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_words, start_char=None, oov_char=None, index_from=None) x_train = sequence.pad_sequences(x_train, maxlen=sequence_length, padding="post", truncating="post") x_test = sequence.pad_sequences(x_test, maxlen=sequence_length, padding="post", truncating="post") vocabulary = imdb.get_word_index() vocabulary_inv = dict((v, k) for k, v in vocabulary.items()) vocabulary_inv[0] = "<PAD/>" else: x, y, vocabulary, vocabulary_inv_list = data_helpers.load_data() vocabulary_inv = {key: value for key, value in enumerate(vocabulary_inv_list)} y = y.argmax(axis=1) # Shuffle data shuffle_indices = np.random.permutation(np.arange(len(y))) x = x[shuffle_indices] y = y[shuffle_indices] train_len = int(len(x) * 0.9) x_train = x[:train_len] y_train = y[:train_len] x_test = x[train_len:] y_test = y[train_len:] return x_train, y_train, x_test, y_test, vocabulary_inv # Data Preparation
Example #16
Source File: SentimentExtraction.py From MachineLearningSamples-SentimentAnalysis with MIT License | 4 votes |
def get_vectors_from_text(dataset_list,word_to_ind=imdb.get_word_index(), start_char=1, index_from=3, maxlen=400, num_words=5000, oov_char=2,skip_top=0): ''' Gets the list vector mapped according to the word to indices dictionary. @param dataset_list = list of review texts in unicode format word_to_ind = word to indices dictionary hyperparameters: start_char-->sentence starting after this char. index_from-->indices below this will not be encoded. max-len-->maximum length of the sequence to be considered. num_words-->number of words to be considered according to the rank.Rank is given according to the frequency of occurence oov_char-->out of variable character. skip_top-->no of top rank words to be skipped @returns: x_train: Final list of vectors(as list) of the review texts ''' x_train = [] for review_string in dataset_list: review_string_list = text_to_word_sequence(review_string) review_string_list = [ele for ele in review_string_list] x_predict = [] for i in range(len(review_string_list)): if review_string_list[i] not in word_to_ind: continue x_predict.append(word_to_ind[review_string_list[i]]) x_train.append((x_predict)) # add te start char and also take care of indexfrom if start_char is not None: x_train = [[start_char] + [w + index_from for w in x] for x in x_train] elif index_from: x_train = [[w + index_from for w in x] for x in x_train] # only maxlen is out criteria x_train=[ele[:maxlen] for ele in x_train] # if num is not given take care if not num_words: num_words = max([max(x) for x in x_train]) # by convention, use 2 as OOV word # reserve 'index_from' (=3 by default) characters: # 0 (padding), 1 (start), 2 (OOV) if oov_char is not None: x_train = [[w if (skip_top <= w < num_words) else oov_char for w in x] for x in x_train] else: x_train = [[w for w in x if (skip_top <= w < num_words)] for x in x_train] # padd the sequences x_train = sequence.pad_sequences(x_train, maxlen=maxlen) # return the vectors form of the text return x_train