Python data_helpers.load_data() Examples
The following are 7
code examples of data_helpers.load_data().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
data_helpers
, or try the search function
.
Example #1
Source File: text_cnn.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 | 5 votes |
def data_iter(batch_size, num_embed, pre_trained_word2vec=False): print('Loading data...') if pre_trained_word2vec: word2vec = data_helpers.load_pretrained_word2vec('data/rt.vec') x, y = data_helpers.load_data_with_word2vec(word2vec) # reshpae for convolution input x = np.reshape(x, (x.shape[0], 1, x.shape[1], x.shape[2])) embed_size = x.shape[-1] sentence_size = x.shape[2] vocab_size = -1 else: x, y, vocab, vocab_inv = data_helpers.load_data() embed_size = num_embed sentence_size = x.shape[1] vocab_size = len(vocab) # randomly shuffle data np.random.seed(10) shuffle_indices = np.random.permutation(np.arange(len(y))) x_shuffled = x[shuffle_indices] y_shuffled = y[shuffle_indices] # split train/valid set x_train, x_dev = x_shuffled[:-1000], x_shuffled[-1000:] y_train, y_dev = y_shuffled[:-1000], y_shuffled[-1000:] print('Train/Valid split: %d/%d' % (len(y_train), len(y_dev))) print('train shape:', x_train.shape) print('valid shape:', x_dev.shape) print('sentence max words', sentence_size) print('embedding size', embed_size) print('vocab size', vocab_size) train = mx.io.NDArrayIter( x_train, y_train, batch_size, shuffle=True) valid = mx.io.NDArrayIter( x_dev, y_dev, batch_size) return (train, valid, sentence_size, embed_size, vocab_size)
Example #2
Source File: text_cnn.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 | 5 votes |
def data_iter(batch_size, num_embed, pre_trained_word2vec=False): logger.info('Loading data...') if pre_trained_word2vec: word2vec = data_helpers.load_pretrained_word2vec('data/rt.vec') x, y = data_helpers.load_data_with_word2vec(word2vec) # reshpae for convolution input x = np.reshape(x, (x.shape[0], 1, x.shape[1], x.shape[2])) embed_size = x.shape[-1] sentence_size = x.shape[2] vocab_size = -1 else: x, y, vocab, vocab_inv = data_helpers.load_data() embed_size = num_embed sentence_size = x.shape[1] vocab_size = len(vocab) # randomly shuffle data np.random.seed(10) shuffle_indices = np.random.permutation(np.arange(len(y))) x_shuffled = x[shuffle_indices] y_shuffled = y[shuffle_indices] # split train/valid set x_train, x_dev = x_shuffled[:-1000], x_shuffled[-1000:] y_train, y_dev = y_shuffled[:-1000], y_shuffled[-1000:] logger.info('Train/Valid split: %d/%d' % (len(y_train), len(y_dev))) logger.info('train shape: %(shape)s', {'shape': x_train.shape}) logger.info('valid shape: %(shape)s', {'shape': x_dev.shape}) logger.info('sentence max words: %(shape)s', {'shape': sentence_size}) logger.info('embedding size: %(msg)s', {'msg': embed_size}) logger.info('vocab size: %(msg)s', {'msg': vocab_size}) train = mx.io.NDArrayIter( x_train, y_train, batch_size, shuffle=True) valid = mx.io.NDArrayIter( x_dev, y_dev, batch_size) return (train, valid, sentence_size, embed_size, vocab_size)
Example #3
Source File: text_cnn.py From training_results_v0.6 with Apache License 2.0 | 5 votes |
def data_iter(batch_size, num_embed, pre_trained_word2vec=False): print('Loading data...') if pre_trained_word2vec: word2vec = data_helpers.load_pretrained_word2vec('data/rt.vec') x, y = data_helpers.load_data_with_word2vec(word2vec) # reshpae for convolution input x = np.reshape(x, (x.shape[0], 1, x.shape[1], x.shape[2])) embed_size = x.shape[-1] sentence_size = x.shape[2] vocab_size = -1 else: x, y, vocab, vocab_inv = data_helpers.load_data() embed_size = num_embed sentence_size = x.shape[1] vocab_size = len(vocab) # randomly shuffle data np.random.seed(10) shuffle_indices = np.random.permutation(np.arange(len(y))) x_shuffled = x[shuffle_indices] y_shuffled = y[shuffle_indices] # split train/valid set x_train, x_dev = x_shuffled[:-1000], x_shuffled[-1000:] y_train, y_dev = y_shuffled[:-1000], y_shuffled[-1000:] print('Train/Valid split: %d/%d' % (len(y_train), len(y_dev))) print('train shape:', x_train.shape) print('valid shape:', x_dev.shape) print('sentence max words', sentence_size) print('embedding size', embed_size) print('vocab size', vocab_size) train = mx.io.NDArrayIter( x_train, y_train, batch_size, shuffle=True) valid = mx.io.NDArrayIter( x_dev, y_dev, batch_size) return (train, valid, sentence_size, embed_size, vocab_size)
Example #4
Source File: text_cnn.py From training_results_v0.6 with Apache License 2.0 | 5 votes |
def data_iter(batch_size, num_embed, pre_trained_word2vec=False): logger.info('Loading data...') if pre_trained_word2vec: word2vec = data_helpers.load_pretrained_word2vec('data/rt.vec') x, y = data_helpers.load_data_with_word2vec(word2vec) # reshpae for convolution input x = np.reshape(x, (x.shape[0], 1, x.shape[1], x.shape[2])) embed_size = x.shape[-1] sentence_size = x.shape[2] vocab_size = -1 else: x, y, vocab, vocab_inv = data_helpers.load_data() embed_size = num_embed sentence_size = x.shape[1] vocab_size = len(vocab) # randomly shuffle data np.random.seed(10) shuffle_indices = np.random.permutation(np.arange(len(y))) x_shuffled = x[shuffle_indices] y_shuffled = y[shuffle_indices] # split train/valid set x_train, x_dev = x_shuffled[:-1000], x_shuffled[-1000:] y_train, y_dev = y_shuffled[:-1000], y_shuffled[-1000:] logger.info('Train/Valid split: %d/%d' % (len(y_train), len(y_dev))) logger.info('train shape: %(shape)s', {'shape': x_train.shape}) logger.info('valid shape: %(shape)s', {'shape': x_dev.shape}) logger.info('sentence max words: %(shape)s', {'shape': sentence_size}) logger.info('embedding size: %(msg)s', {'msg': embed_size}) logger.info('vocab size: %(msg)s', {'msg': vocab_size}) train = mx.io.NDArrayIter( x_train, y_train, batch_size, shuffle=True) valid = mx.io.NDArrayIter( x_dev, y_dev, batch_size) return (train, valid, sentence_size, embed_size, vocab_size)
Example #5
Source File: sentiment_cnn.py From CNN-for-Sentence-Classification-in-Keras with MIT License | 5 votes |
def load_data(data_source): assert data_source in ["keras_data_set", "local_dir"], "Unknown data source" if data_source == "keras_data_set": (x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_words, start_char=None, oov_char=None, index_from=None) x_train = sequence.pad_sequences(x_train, maxlen=sequence_length, padding="post", truncating="post") x_test = sequence.pad_sequences(x_test, maxlen=sequence_length, padding="post", truncating="post") vocabulary = imdb.get_word_index() vocabulary_inv = dict((v, k) for k, v in vocabulary.items()) vocabulary_inv[0] = "<PAD/>" else: x, y, vocabulary, vocabulary_inv_list = data_helpers.load_data() vocabulary_inv = {key: value for key, value in enumerate(vocabulary_inv_list)} y = y.argmax(axis=1) # Shuffle data shuffle_indices = np.random.permutation(np.arange(len(y))) x = x[shuffle_indices] y = y[shuffle_indices] train_len = int(len(x) * 0.9) x_train = x[:train_len] y_train = y[:train_len] x_test = x[train_len:] y_test = y[train_len:] return x_train, y_train, x_test, y_test, vocabulary_inv # Data Preparation
Example #6
Source File: text_cnn.py From SNIPER-mxnet with Apache License 2.0 | 5 votes |
def data_iter(batch_size, num_embed, pre_trained_word2vec=False): print('Loading data...') if pre_trained_word2vec: word2vec = data_helpers.load_pretrained_word2vec('data/rt.vec') x, y = data_helpers.load_data_with_word2vec(word2vec) # reshpae for convolution input x = np.reshape(x, (x.shape[0], 1, x.shape[1], x.shape[2])) embed_size = x.shape[-1] sentence_size = x.shape[2] vocab_size = -1 else: x, y, vocab, vocab_inv = data_helpers.load_data() embed_size = num_embed sentence_size = x.shape[1] vocab_size = len(vocab) # randomly shuffle data np.random.seed(10) shuffle_indices = np.random.permutation(np.arange(len(y))) x_shuffled = x[shuffle_indices] y_shuffled = y[shuffle_indices] # split train/valid set x_train, x_dev = x_shuffled[:-1000], x_shuffled[-1000:] y_train, y_dev = y_shuffled[:-1000], y_shuffled[-1000:] print('Train/Valid split: %d/%d' % (len(y_train), len(y_dev))) print('train shape:', x_train.shape) print('valid shape:', x_dev.shape) print('sentence max words', sentence_size) print('embedding size', embed_size) print('vocab size', vocab_size) train = mx.io.NDArrayIter( x_train, y_train, batch_size, shuffle=True) valid = mx.io.NDArrayIter( x_dev, y_dev, batch_size) return (train, valid, sentence_size, embed_size, vocab_size)
Example #7
Source File: text_cnn.py From SNIPER-mxnet with Apache License 2.0 | 5 votes |
def data_iter(batch_size, num_embed, pre_trained_word2vec=False): logger.info('Loading data...') if pre_trained_word2vec: word2vec = data_helpers.load_pretrained_word2vec('data/rt.vec') x, y = data_helpers.load_data_with_word2vec(word2vec) # reshpae for convolution input x = np.reshape(x, (x.shape[0], 1, x.shape[1], x.shape[2])) embed_size = x.shape[-1] sentence_size = x.shape[2] vocab_size = -1 else: x, y, vocab, vocab_inv = data_helpers.load_data() embed_size = num_embed sentence_size = x.shape[1] vocab_size = len(vocab) # randomly shuffle data np.random.seed(10) shuffle_indices = np.random.permutation(np.arange(len(y))) x_shuffled = x[shuffle_indices] y_shuffled = y[shuffle_indices] # split train/valid set x_train, x_dev = x_shuffled[:-1000], x_shuffled[-1000:] y_train, y_dev = y_shuffled[:-1000], y_shuffled[-1000:] logger.info('Train/Valid split: %d/%d' % (len(y_train), len(y_dev))) logger.info('train shape: %(shape)s', {'shape': x_train.shape}) logger.info('valid shape: %(shape)s', {'shape': x_dev.shape}) logger.info('sentence max words: %(shape)s', {'shape': sentence_size}) logger.info('embedding size: %(msg)s', {'msg': embed_size}) logger.info('vocab size: %(msg)s', {'msg': vocab_size}) train = mx.io.NDArrayIter( x_train, y_train, batch_size, shuffle=True) valid = mx.io.NDArrayIter( x_dev, y_dev, batch_size) return (train, valid, sentence_size, embed_size, vocab_size)