Python keras.datasets.imdb.load_data() Examples
The following are 30
code examples of keras.datasets.imdb.load_data().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
keras.datasets.imdb
, or try the search function
.
Example #1
Source File: test_datasets.py From CAPTCHA-breaking with MIT License | 6 votes |
def test_cifar(self): print('cifar10') (X_train, y_train), (X_test, y_test) = cifar10.load_data() print(X_train.shape) print(X_test.shape) print(y_train.shape) print(y_test.shape) print('cifar100 fine') (X_train, y_train), (X_test, y_test) = cifar100.load_data('fine') print(X_train.shape) print(X_test.shape) print(y_train.shape) print(y_test.shape) print('cifar100 coarse') (X_train, y_train), (X_test, y_test) = cifar100.load_data('coarse') print(X_train.shape) print(X_test.shape) print(y_train.shape) print(y_test.shape)
Example #2
Source File: frontend_keras.py From plaidbench with Apache License 2.0 | 6 votes |
def setup_cifar(train, epoch_size): # Setup if train: # Training setup from keras.datasets import cifar10 from keras.utils.np_utils import to_categorical click.echo('Loading CIFAR data') (x_train, y_train_cats), (_, _) = cifar10.load_data() x_train = x_train[:epoch_size] y_train_cats = y_train_cats[:epoch_size] y_train = to_categorical(y_train_cats, num_classes=1000) else: # Inference setup this_dir = os.path.dirname(os.path.abspath(__file__)) cifar_path = os.path.join(this_dir, 'cifar16.npy') x_train = np.load(cifar_path).repeat(1 + epoch_size // 16, axis=0)[:epoch_size] y_train = None return x_train, y_train
Example #3
Source File: frontend_keras.py From plaidbench with Apache License 2.0 | 6 votes |
def setup_imdb(train, epoch_size): # Setup if train: # Training setup from keras.datasets import imdb from keras.preprocessing import sequence click.echo('Loading IMDB data') (x_train, y_train), (_, _) = imdb.load_data(num_words=imdb_max_features) x_train = sequence.pad_sequences(x_train, maxlen=imdb_max_length) x_train = x_train[:epoch_size] y_train = y_train[:epoch_size] else: # Inference setup this_dir = os.path.dirname(os.path.abspath(__file__)) imdb_path = os.path.join(this_dir, 'imdb16.npy') x_train = np.load(imdb_path).repeat(1 + epoch_size // 16, axis=0)[:epoch_size] y_train = None return x_train, y_train
Example #4
Source File: lstm.py From hyperas with MIT License | 6 votes |
def data(): maxlen = 100 max_features = 20000 print('Loading data...') (X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=max_features) print(len(X_train), 'train sequences') print(len(X_test), 'test sequences') print("Pad sequences (samples x time)") X_train = sequence.pad_sequences(X_train, maxlen=maxlen) X_test = sequence.pad_sequences(X_test, maxlen=maxlen) print('X_train shape:', X_train.shape) print('X_test shape:', X_test.shape) return X_train, X_test, y_train, y_test, max_features, maxlen
Example #5
Source File: data.py From kopt with MIT License | 6 votes |
def data(max_features=5000, maxlen=400): print('Loading data...') (x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features) # subset the data x_train = x_train[:1000] y_train = y_train[:1000] x_test = x_test[:100] y_test = y_test[:100] print(len(x_train), 'train sequences') print(len(x_test), 'test sequences') print('Pad sequences (samples x time)') x_train = sequence.pad_sequences(x_train, maxlen=maxlen) x_test = sequence.pad_sequences(x_test, maxlen=maxlen) print('x_train shape:', x_train.shape) print('x_test shape:', x_test.shape) return (x_train, y_train, [1, 2, 3, "dummy_data"]), (x_test, y_test)
Example #6
Source File: datasets.py From DEC-keras with MIT License | 6 votes |
def load_retures_keras(): from keras.preprocessing.text import Tokenizer from keras.datasets import reuters max_words = 1000 print('Loading data...') (x, y), (_, _) = reuters.load_data(num_words=max_words, test_split=0.) print(len(x), 'train sequences') num_classes = np.max(y) + 1 print(num_classes, 'classes') print('Vectorizing sequence data...') tokenizer = Tokenizer(num_words=max_words) x = tokenizer.sequences_to_matrix(x, mode='binary') print('x_train shape:', x.shape) return x.astype(float), y
Example #7
Source File: test_datasets.py From CAPTCHA-breaking with MIT License | 6 votes |
def test_imdb(self): print('imdb') (X_train, y_train), (X_test, y_test) = imdb.load_data()
Example #8
Source File: imdb_sentiment.py From Deep-Learning-Quick-Reference with MIT License | 6 votes |
def main(): data = load_data(20000) data = pad_sequences(data) model = build_network(vocab_size=data["vocab_size"], embedding_dim=100, sequence_length=data["sequence_length"]) callbacks = create_callbacks("sentiment") model.fit(x=data["X_train"], y=data["y_train"], batch_size=32, epochs=10, validation_data=(data["X_test"], data["y_test"]), callbacks=callbacks) model.save("sentiment.h5") score, acc = model.evaluate(data["X_test"], data["y_test"], batch_size=32) print('Test loss:', score) print('Test accuracy:', acc)
Example #9
Source File: datasets.py From DEC-keras with MIT License | 6 votes |
def load_imdb(): from keras.preprocessing.text import Tokenizer from keras.datasets import imdb max_words = 1000 print('Loading data...') (x1, y1), (x2, y2) = imdb.load_data(num_words=max_words) x = np.concatenate((x1, x2)) y = np.concatenate((y1, y2)) print(len(x), 'train sequences') num_classes = np.max(y) + 1 print(num_classes, 'classes') print('Vectorizing sequence data...') tokenizer = Tokenizer(num_words=max_words) x = tokenizer.sequences_to_matrix(x, mode='binary') print('x_train shape:', x.shape) return x.astype(float), y
Example #10
Source File: test_datasets.py From DeepLearning_Wavelet-LSTM with MIT License | 5 votes |
def test_imdb(): # only run data download tests 20% of the time # to speed up frequent testing random.seed(time.time()) if random.random() > 0.8: (x_train, y_train), (x_test, y_test) = imdb.load_data() (x_train, y_train), (x_test, y_test) = imdb.load_data(maxlen=40) assert len(x_train) == len(y_train) assert len(x_test) == len(y_test) word_index = imdb.get_word_index() assert isinstance(word_index, dict)
Example #11
Source File: test_datasets.py From DeepLearning_Wavelet-LSTM with MIT License | 5 votes |
def test_mnist(): # only run data download tests 20% of the time # to speed up frequent testing random.seed(time.time()) if random.random() > 0.8: (x_train, y_train), (x_test, y_test) = mnist.load_data() assert len(x_train) == len(y_train) == 60000 assert len(x_test) == len(y_test) == 10000
Example #12
Source File: test_datasets.py From DeepLearning_Wavelet-LSTM with MIT License | 5 votes |
def test_imdb(): # only run data download tests 20% of the time # to speed up frequent testing random.seed(time.time()) if random.random() > 0.8: (x_train, y_train), (x_test, y_test) = imdb.load_data() (x_train, y_train), (x_test, y_test) = imdb.load_data(maxlen=40) assert len(x_train) == len(y_train) assert len(x_test) == len(y_test) word_index = imdb.get_word_index() assert isinstance(word_index, dict)
Example #13
Source File: test_datasets.py From DeepLearning_Wavelet-LSTM with MIT License | 5 votes |
def test_boston_housing(): # only run data download tests 20% of the time # to speed up frequent testing random.seed(time.time()) if random.random() > 0.8: (x_train, y_train), (x_test, y_test) = boston_housing.load_data() assert len(x_train) == len(y_train) assert len(x_test) == len(y_test)
Example #14
Source File: test_datasets.py From DeepLearning_Wavelet-LSTM with MIT License | 5 votes |
def test_cifar(): # only run data download tests 20% of the time # to speed up frequent testing random.seed(time.time()) if random.random() > 0.8: (x_train, y_train), (x_test, y_test) = cifar10.load_data() assert len(x_train) == len(y_train) == 50000 assert len(x_test) == len(y_test) == 10000 (x_train, y_train), (x_test, y_test) = cifar100.load_data('fine') assert len(x_train) == len(y_train) == 50000 assert len(x_test) == len(y_test) == 10000 (x_train, y_train), (x_test, y_test) = cifar100.load_data('coarse') assert len(x_train) == len(y_train) == 50000 assert len(x_test) == len(y_test) == 10000
Example #15
Source File: test_datasets.py From DeepLearning_Wavelet-LSTM with MIT License | 5 votes |
def test_reuters(): # only run data download tests 20% of the time # to speed up frequent testing random.seed(time.time()) if random.random() > 0.8: (x_train, y_train), (x_test, y_test) = reuters.load_data() assert len(x_train) == len(y_train) assert len(x_test) == len(y_test) assert len(x_train) + len(x_test) == 11228 (x_train, y_train), (x_test, y_test) = reuters.load_data(maxlen=10) assert len(x_train) == len(y_train) assert len(x_test) == len(y_test) word_index = reuters.get_word_index() assert isinstance(word_index, dict)
Example #16
Source File: test_datasets.py From DeepLearning_Wavelet-LSTM with MIT License | 5 votes |
def test_mnist(): # only run data download tests 20% of the time # to speed up frequent testing random.seed(time.time()) if random.random() > 0.8: (x_train, y_train), (x_test, y_test) = mnist.load_data() assert len(x_train) == len(y_train) == 60000 assert len(x_test) == len(y_test) == 10000
Example #17
Source File: test_datasets.py From DeepLearning_Wavelet-LSTM with MIT License | 5 votes |
def test_imdb(): # only run data download tests 20% of the time # to speed up frequent testing random.seed(time.time()) if random.random() > 0.8: (x_train, y_train), (x_test, y_test) = imdb.load_data() (x_train, y_train), (x_test, y_test) = imdb.load_data(maxlen=40) assert len(x_train) == len(y_train) assert len(x_test) == len(y_test) word_index = imdb.get_word_index() assert isinstance(word_index, dict)
Example #18
Source File: test_datasets.py From DeepLearning_Wavelet-LSTM with MIT License | 5 votes |
def test_boston_housing(): # only run data download tests 20% of the time # to speed up frequent testing random.seed(time.time()) if random.random() > 0.8: (x_train, y_train), (x_test, y_test) = boston_housing.load_data() assert len(x_train) == len(y_train) assert len(x_test) == len(y_test)
Example #19
Source File: test_datasets.py From DeepLearning_Wavelet-LSTM with MIT License | 5 votes |
def test_cifar(): # only run data download tests 20% of the time # to speed up frequent testing random.seed(time.time()) if random.random() > 0.8: (x_train, y_train), (x_test, y_test) = cifar10.load_data() assert len(x_train) == len(y_train) == 50000 assert len(x_test) == len(y_test) == 10000 (x_train, y_train), (x_test, y_test) = cifar100.load_data('fine') assert len(x_train) == len(y_train) == 50000 assert len(x_test) == len(y_test) == 10000 (x_train, y_train), (x_test, y_test) = cifar100.load_data('coarse') assert len(x_train) == len(y_train) == 50000 assert len(x_test) == len(y_test) == 10000
Example #20
Source File: test_datasets.py From DeepLearning_Wavelet-LSTM with MIT License | 5 votes |
def test_reuters(): # only run data download tests 20% of the time # to speed up frequent testing random.seed(time.time()) if random.random() > 0.8: (x_train, y_train), (x_test, y_test) = reuters.load_data() assert len(x_train) == len(y_train) assert len(x_test) == len(y_test) assert len(x_train) + len(x_test) == 11228 (x_train, y_train), (x_test, y_test) = reuters.load_data(maxlen=10) assert len(x_train) == len(y_train) assert len(x_test) == len(y_test) word_index = reuters.get_word_index() assert isinstance(word_index, dict)
Example #21
Source File: test_datasets.py From DeepLearning_Wavelet-LSTM with MIT License | 5 votes |
def test_mnist(): # only run data download tests 20% of the time # to speed up frequent testing random.seed(time.time()) if random.random() > 0.8: (x_train, y_train), (x_test, y_test) = mnist.load_data() assert len(x_train) == len(y_train) == 60000 assert len(x_test) == len(y_test) == 10000
Example #22
Source File: test_datasets.py From DeepLearning_Wavelet-LSTM with MIT License | 5 votes |
def test_boston_housing(): # only run data download tests 20% of the time # to speed up frequent testing random.seed(time.time()) if random.random() > 0.8: (x_train, y_train), (x_test, y_test) = boston_housing.load_data() assert len(x_train) == len(y_train) assert len(x_test) == len(y_test)
Example #23
Source File: test_datasets.py From CAPTCHA-breaking with MIT License | 5 votes |
def test_reuters(self): print('reuters') (X_train, y_train), (X_test, y_test) = reuters.load_data()
Example #24
Source File: cnn_lstm.py From hyperas with MIT License | 5 votes |
def data(): np.random.seed(1337) # for reproducibility max_features = 20000 maxlen = 100 (X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=max_features) X_train = sequence.pad_sequences(X_train, maxlen=maxlen) X_test = sequence.pad_sequences(X_test, maxlen=maxlen) return X_train, X_test, y_train, y_test, maxlen, max_features
Example #25
Source File: imdb_sentiment.py From Deep-Learning-Quick-Reference with MIT License | 5 votes |
def load_data(vocab_size): data = dict() data["vocab_size"] = vocab_size (data["X_train"], data["y_train"]), (data["X_test"], data["y_test"]) = imdb.load_data(num_words=vocab_size) return data
Example #26
Source File: data.py From kopt with MIT License | 5 votes |
def data(max_features=5000, maxlen=80): (x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features) x_train = sequence.pad_sequences(x_train, maxlen=maxlen) x_test = sequence.pad_sequences(x_test, maxlen=maxlen) return (x_train[:100], y_train[:100], max_features), (x_test, y_test)
Example #27
Source File: sentiment_cnn.py From CNN-for-Sentence-Classification-in-Keras with MIT License | 5 votes |
def load_data(data_source): assert data_source in ["keras_data_set", "local_dir"], "Unknown data source" if data_source == "keras_data_set": (x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_words, start_char=None, oov_char=None, index_from=None) x_train = sequence.pad_sequences(x_train, maxlen=sequence_length, padding="post", truncating="post") x_test = sequence.pad_sequences(x_test, maxlen=sequence_length, padding="post", truncating="post") vocabulary = imdb.get_word_index() vocabulary_inv = dict((v, k) for k, v in vocabulary.items()) vocabulary_inv[0] = "<PAD/>" else: x, y, vocabulary, vocabulary_inv_list = data_helpers.load_data() vocabulary_inv = {key: value for key, value in enumerate(vocabulary_inv_list)} y = y.argmax(axis=1) # Shuffle data shuffle_indices = np.random.permutation(np.arange(len(y))) x = x[shuffle_indices] y = y[shuffle_indices] train_len = int(len(x) * 0.9) x_train = x[:train_len] y_train = y[:train_len] x_test = x[train_len:] y_test = y[train_len:] return x_train, y_train, x_test, y_test, vocabulary_inv # Data Preparation
Example #28
Source File: datasets.py From DEC-keras with MIT License | 5 votes |
def load_mnist(): # the data, shuffled and split between train and test sets from keras.datasets import mnist (x_train, y_train), (x_test, y_test) = mnist.load_data() x = np.concatenate((x_train, x_test)) y = np.concatenate((y_train, y_test)) x = x.reshape((x.shape[0], -1)) x = np.divide(x, 255.) print('MNIST samples', x.shape) return x, y
Example #29
Source File: datasets.py From DEC-keras with MIT License | 5 votes |
def load_fashion_mnist(): from keras.datasets import fashion_mnist # this requires keras>=2.0.9 (x_train, y_train), (x_test, y_test) = fashion_mnist.load_data() x = np.concatenate((x_train, x_test)) y = np.concatenate((y_train, y_test)) x = x.reshape((x.shape[0], -1)) x = np.divide(x, 255.) print('Fashion MNIST samples', x.shape) return x, y
Example #30
Source File: datasets.py From DEC-keras with MIT License | 5 votes |
def load_cifar10(data_path='./data/cifar10'): from keras.datasets import cifar10 (train_x, train_y), (test_x, test_y) = cifar10.load_data() x = np.concatenate((train_x, test_x)) y = np.concatenate((train_y, test_y)).reshape((60000,)) # if features are ready, return them import os.path if os.path.exists(data_path + '/cifar10_features.npy'): return np.load(data_path + '/cifar10_features.npy'), y # extract features features = np.zeros((60000, 4096)) for i in range(6): idx = range(i*10000, (i+1)*10000) print("The %dth 10000 samples" % i) features[idx] = extract_vgg16_features(x[idx]) # scale to [0,1] from sklearn.preprocessing import MinMaxScaler features = MinMaxScaler().fit_transform(features) # save features np.save(data_path + '/cifar10_features.npy', features) print('features saved to ' + data_path + '/cifar10_features.npy') return features, y