Python utils.get_data() Examples
The following are 11
code examples of utils.get_data().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
utils
, or try the search function
.
Example #1
Source File: interpolation_in_text.py From Hands-On-Generative-Adversarial-Networks-with-Keras with MIT License | 6 votes |
def infer(data_filepath='data/flowers.hdf5', z_dim=128, out_dir='gan', n_steps=10): G = load_model(out_dir) val_data = get_data(data_filepath, 'train') val_data = next(iterate_minibatches(val_data, 2)) emb_source, emb_target = val_data[1] txts = val_data[2] z = np.random.uniform(-1, 1, size=(1, z_dim)) G.trainable = False for i in range(n_steps+1): p = i/float(n_steps) emb = emb_source * (1-p) + emb_target * p emb = emb[None, :] fake_image = G.predict([z, emb])[0] img = ((fake_image + 1)*0.5) plt.imsave("{}/fake_text_interpolation_i{}".format(out_dir, i), img) print(i, str(txts[int(round(p))]).strip(), file=open("{}/fake_text_interpolation.txt".format(out_dir), "a"))
Example #2
Source File: inference.py From Hands-On-Generative-Adversarial-Networks-with-Keras with MIT License | 6 votes |
def infer(data_filepath='data/flowers.hdf5', z_dim=128, out_dir='gan', n_samples=5): G = load_model(out_dir) val_data = get_data(data_filepath, 'train') val_data = next(iterate_minibatches(val_data, n_samples)) emb, txts = val_data[1], val_data[2] # sample z vector for inference z = np.random.uniform(-1, 1, size=(n_samples, z_dim)) G.trainable = False fake_images = G.predict([z, emb]) for i in range(n_samples): img = ((fake_images[i] + 1)*0.5) plt.imsave("{}/fake_{}".format(out_dir, i), img) print(i, str(txts[i]).strip(), file=open("{}/fake_text.txt".format(out_dir), "a"))
Example #3
Source File: baselines.py From cactus-maml with MIT License | 5 votes |
def embedding_nearest_neighbour(n_neighbors=FLAGS.n_neighbours, num_classes=FLAGS.way, num_shots=FLAGS.shot, num_tasks=FLAGS.num_tasks, num_encoding_dims=FLAGS.num_encoding_dims, test_set=FLAGS.test_set, dataset=FLAGS.dataset): print('{}-way {}-shot embedding nearest neighbour'.format(num_classes, num_shots)) if dataset != 'celeba': _, _, _, X_test, Y_test, Z_test = get_data(dataset, num_encoding_dims, test_set) task_generator = TaskGenerator(num_classes=num_classes, num_train_samples_per_class=num_shots, num_samples_per_class=num_shots+5) partition = task_generator.get_partition_from_labels(Y_test) partitions = [partition] else: _, _, _, X_test, attributes_test, Z_test = get_data(dataset, num_encoding_dims, test_set) task_generator = TaskGenerator(num_classes=num_classes, num_train_samples_per_class=num_shots, num_samples_per_class=num_shots+5) partitions = task_generator.get_celeba_task_pool(attributes_test) tasks = task_generator.get_tasks(num_tasks=num_tasks, partitions=partitions) accuracies = [] for i_task, task in enumerate(tasks): if (i_task + 1) % (num_tasks // 10) == 0: print('test {}, accuracy {:.5}'.format(i_task + 1, np.mean(accuracies))) ind_train_few, Y_train_few, ind_test_few, Y_test_few = task Z_train_few, Z_test_few = Z_test[ind_train_few], Z_test[ind_test_few] knn = KNeighborsClassifier(n_neighbors=n_neighbors, n_jobs=-1) knn.fit(Z_train_few, Y_train_few) accuracy = knn.score(Z_test_few, Y_test_few) accuracies.append(accuracy) print('{}-way {}-shot embedding nearest neighbour: {:.5} with 95% CI {:.5} over {} tests'.format(num_classes, num_shots, np.mean(accuracies), 1.96*np.std(accuracies)/np.sqrt(num_tasks), num_tasks))
Example #4
Source File: baselines.py From cactus-maml with MIT License | 5 votes |
def embedding_logistic_regression(C=FLAGS.inverse_reg, penalty='l2', multi_class='multinomial', num_classes=FLAGS.way, num_shots=FLAGS.shot, num_tasks=FLAGS.num_tasks, num_encoding_dims=FLAGS.num_encoding_dims, test_set=FLAGS.test_set, dataset=FLAGS.dataset): print('{}-way {}-shot logistic regression'.format(num_classes, num_shots)) if dataset != 'celeba': _, _, _, X_test, Y_test, Z_test = get_data(dataset, num_encoding_dims, test_set) task_generator = TaskGenerator(num_classes=num_classes, num_train_samples_per_class=num_shots, num_samples_per_class=num_shots+5) partition = task_generator.get_partition_from_labels(Y_test) partitions = [partition] else: _, _, _, X_test, attributes_test, Z_test = get_data(dataset, num_encoding_dims, test_set) task_generator = TaskGenerator(num_classes=num_classes, num_train_samples_per_class=num_shots, num_samples_per_class=num_shots+5) partitions = task_generator.get_celeba_task_pool(attributes_test) tasks = task_generator.get_tasks(num_tasks=num_tasks, partitions=partitions) train_accuracies, test_accuracies = [], [] start = time.time() for i_task, task in enumerate(tasks): if (i_task + 1) % (num_tasks // 10) == 0: print('test {}, train accuracy {:.5}, test accuracy {:.5}'.format(i_task + 1, np.mean(train_accuracies), np.mean(test_accuracies))) ind_train_few, Y_train_few, ind_test_few, Y_test_few = task Z_train_few, Z_test_few = Z_test[ind_train_few], Z_test[ind_test_few] logistic_regression = LogisticRegression(n_jobs=-1, penalty=penalty, C=C, multi_class=multi_class, solver='saga', max_iter=1000) logistic_regression.fit(Z_train_few, Y_train_few) test_accuracies.append(logistic_regression.score(Z_test_few, Y_test_few)) train_accuracies.append(logistic_regression.score(Z_train_few, Y_train_few)) print('penalty={}, C={}, multi_class={}'.format(penalty, C, multi_class)) print('{}-way {}-shot logistic regression: {:.5} with 95% CI {:.5} over {} tests'.format(num_classes, num_shots, np.mean(test_accuracies), 1.96*np.std(test_accuracies)/np.sqrt(num_tasks), num_tasks)) print('Mean training accuracy: {:.5}; standard deviation: {:.5}'.format(np.mean(train_accuracies), np.std(train_accuracies))) print('{} few-shot classification tasks: {:.5} seconds.'.format(num_tasks, time.time() - start))
Example #5
Source File: models.py From philo2vec with MIT License | 5 votes |
def cbow(): params = { 'model': Philo2Vec.CBOW, 'loss_fct': Philo2Vec.NCE, 'context_window': 5, } x_train = get_data() validation_words = ['kant', 'descartes', 'human', 'natural'] x_validation = [StemmingLookup.stem(w) for w in validation_words] vb = VocabBuilder(x_train, min_frequency=5) pv = Philo2Vec(vb, **params) pv.fit(epochs=30, validation_data=x_validation) return pv
Example #6
Source File: models.py From philo2vec with MIT License | 5 votes |
def skip_gram(): params = { 'model': Philo2Vec.SKIP_GRAM, 'loss_fct': Philo2Vec.SOFTMAX, 'context_window': 2, 'num_skips': 4, 'neg_sample_size': 2, } x_train = get_data() validation_words = ['kant', 'descartes', 'human', 'natural'] x_validation = [StemmingLookup.stem(w) for w in validation_words] vb = VocabBuilder(x_train, min_frequency=5) pv = Philo2Vec(vb, **params) pv.fit(epochs=30, validation_data=x_validation) return pv
Example #7
Source File: main.py From bitfinex-ohlc-import with MIT License | 5 votes |
def get_candles(symbol, start_date, end_date, timeframe='1m', limit=1000): """ Return symbol candles between two dates. https://docs.bitfinex.com/v2/reference#rest-public-candles """ # timestamps need to include milliseconds start_date = start_date.int_timestamp * 1000 end_date = end_date.int_timestamp * 1000 url = f'{API_URL}/candles/trade:{timeframe}:t{symbol.upper()}/hist' \ f'?start={start_date}&end={end_date}&limit={limit}' data = get_data(url) return data
Example #8
Source File: baselines.py From cactus-maml with MIT License | 4 votes |
def embedding_cluster_matching(num_classes=FLAGS.way, num_shots=FLAGS.shot, num_tasks=FLAGS.num_tasks, num_clusters=FLAGS.num_clusters, num_encoding_dims=FLAGS.num_encoding_dims, dataset=FLAGS.dataset, test_set=FLAGS.test_set): if dataset != 'celeba': _, _, Z_train, X_test, Y_test, Z_test = get_data(dataset, num_encoding_dims, test_set) else: _, _, Z_train, X_test, attributes_test, Z_test = get_data(dataset, num_encoding_dims, test_set) start = time.time() kmeans = KMeans(n_clusters=num_clusters, init='k-means++', random_state=0, precompute_distances=True, n_jobs=10, n_init=10, max_iter=3000).fit(Z_train) print("Ran KMeans with n_clusters={} in {:.5} seconds, objective {}.".format(num_clusters, time.time() - start, kmeans.score(Z_train))) if dataset != 'celeba': task_generator = TaskGenerator(num_classes=num_classes, num_train_samples_per_class=num_shots, num_samples_per_class=num_shots+5) partition = task_generator.get_partition_from_labels(Y_test) partitions = [partition] else: task_generator = TaskGenerator(num_classes=num_classes, num_train_samples_per_class=num_shots, num_samples_per_class=num_shots+5) partitions = task_generator.get_celeba_task_pool(attributes_test) tasks = task_generator.get_tasks(num_tasks=num_tasks, partitions=partitions) for num_shots in [FLAGS.shot]: accuracies = [] start = time.time() num_degenerate_tasks = 0 for i_task, task in enumerate(tasks): if (i_task + 1) % (num_tasks // 10) == 0: print('test {}, accuracy {:.5}'.format(i_task + 1, np.mean(accuracies))) ind_train_few, Y_train_few, ind_test_few, Y_test_few = task Z_train_few, Z_test_few = Z_test[ind_train_few], Z_test[ind_test_few] clusters_to_labels_few = defaultdict(list) examples_to_clusters_few = kmeans.predict(Z_train_few) for i in range(len(Y_train_few)): clusters_to_labels_few[examples_to_clusters_few[i]].append(Y_train_few[i]) for (cluster, labels) in list(clusters_to_labels_few.items()): uniques, counts = np.unique(labels, return_counts=True) clusters_to_labels_few[cluster] = [uniques[np.argmax(counts)]] # if len(np.unique(labels)) > 1: # delete degenerate clusters # del clusters_to_labels_few[cluster] if len(clusters_to_labels_few) == 0: num_degenerate_tasks += 1 continue centroid_ind_to_cluster = np.array(list(clusters_to_labels_few.keys())) centroids = kmeans.cluster_centers_[centroid_ind_to_cluster] distances = distance.cdist(Z_test_few, centroids) predicted_clusters = centroid_ind_to_cluster[np.argmin(distances, axis=1)] predictions = [] for cluster in predicted_clusters: predictions.append(clusters_to_labels_few[cluster][0]) accuracies.append(accuracy_score(Y_test_few, predictions)) print('dataset={}, encoder={}, num_encoding_dims={}, num_clusters={}'.format(dataset, FLAGS.encoder, num_clusters, num_encoding_dims)) print('{}-way {}-shot nearest-cluster after clustering embeddings: {:.5} with 95% CI {:.5} over {} tests'.format(num_classes, num_shots, np.mean(accuracies), 1.96*np.std(accuracies)/np.sqrt(num_tasks), num_tasks)) print('{} few-shot classification tasks: {:.5} seconds with {} degenerate tasks.'.format(num_tasks, time.time() - start, num_degenerate_tasks))
Example #9
Source File: baselines.py From cactus-maml with MIT License | 4 votes |
def embedding_mlp(num_classes=FLAGS.way, num_shots=FLAGS.shot, num_tasks=FLAGS.num_tasks, num_encoding_dims=FLAGS.num_encoding_dims, test_set=FLAGS.test_set, dataset=FLAGS.dataset, units=FLAGS.units, dropout=FLAGS.dropout): import keras from keras.layers import Dense, Dropout from keras.losses import categorical_crossentropy from keras.callbacks import EarlyStopping from keras import backend as K if dataset != 'celeba': _, _, _, X_test, Y_test, Z_test = get_data(dataset, num_encoding_dims, test_set) task_generator = TaskGenerator(num_classes=num_classes, num_train_samples_per_class=num_shots, num_samples_per_class=num_shots+5) partition = task_generator.get_partition_from_labels(Y_test) partitions = [partition] else: _, _, _, X_test, attributes_test, Z_test = get_data(dataset, num_encoding_dims, test_set) task_generator = TaskGenerator(num_classes=num_classes, num_train_samples_per_class=num_shots, num_samples_per_class=num_shots+5) partitions = task_generator.get_celeba_task_pool(attributes_test) tasks = task_generator.get_tasks(num_tasks=num_tasks, partitions=partitions) train_accuracies, test_accuracies = [], [] start = time.time() for i_task, task in enumerate(tqdm(tasks)): if (i_task + 1) % (num_tasks // 10) == 0: tqdm.write('test {}, accuracy {:.5}'.format(i_task + 1, np.mean(test_accuracies))) ind_train_few, Y_train_few, ind_test_few, Y_test_few = task Z_train_few, Z_test_few = Z_test[ind_train_few], Z_test[ind_test_few] Y_train_few, Y_test_few = keras.utils.to_categorical(Y_train_few, num_classes=num_classes), keras.utils.to_categorical(Y_test_few, num_classes=num_classes) model = keras.Sequential() model.add(Dense(units=units, activation='relu', input_dim=Z_train_few.shape[1])) model.add(Dropout(rate=dropout)) model.add(Dense(units=num_classes, activation='softmax')) model.compile(loss=categorical_crossentropy, optimizer=keras.optimizers.Adam(), metrics=['accuracy']) early_stopping = EarlyStopping(monitor='val_loss', patience=2) model.fit(Z_train_few, Y_train_few, batch_size=Z_train_few.shape[0], epochs=500, verbose=0, validation_data=(Z_test_few, Y_test_few), callbacks=[early_stopping]) train_score = model.evaluate(Z_train_few, Y_train_few, verbose=0) train_accuracies.append(train_score[1]) test_score = model.evaluate(Z_test_few, Y_test_few, verbose=0) test_accuracies.append(test_score[1]) K.clear_session() print('units={}, dropout={}'.format(units, dropout)) print('{}-way {}-shot embedding mlp: {:.5} with 95% CI {:.5} over {} tests'.format(num_classes, num_shots, np.mean(test_accuracies), 1.96*np.std(test_accuracies)/np.sqrt(num_tasks), num_tasks)) print('Mean training accuracy: {:.5}; standard deviation: {:.5}'.format(np.mean(train_accuracies), np.std(train_accuracies))) print('{} few-shot classification tasks: {:.5} seconds.'.format(num_tasks, time.time() - start))
Example #10
Source File: baselines.py From cactus-maml with MIT License | 4 votes |
def cluster_color_logistic_regression(C=FLAGS.inverse_reg, penalty='l2', multi_class='multinomial', n_clusters=FLAGS.num_clusters, num_classes=FLAGS.way, num_shots=FLAGS.shot, num_tasks=FLAGS.num_tasks, num_encoding_dims=FLAGS.num_encoding_dims, test_set=FLAGS.test_set, dataset=FLAGS.dataset): if dataset != 'celeba': _, _, Z_train, X_test, Y_test, Z_test = get_data(dataset, num_encoding_dims, test_set) else: _, _, Z_train, X_test, attributes_test, Z_test = get_data(dataset, num_encoding_dims, test_set) start = time.time() kmeans = KMeans(n_clusters=n_clusters, precompute_distances=True, n_jobs=-1, n_init=100).fit(Z_train) print("Ran KMeans with n_clusters={} in {:.5} seconds.".format(n_clusters, time.time() - start)) uniques, counts = np.unique(kmeans.labels_, return_counts=True) if dataset != 'celeba': task_generator = TaskGenerator(num_classes=num_classes, num_train_samples_per_class=num_shots, num_samples_per_class=num_shots+5) partition = task_generator.get_partition_from_labels(Y_test) partitions = [partition] else: task_generator = TaskGenerator(num_classes=num_classes, num_train_samples_per_class=num_shots, num_samples_per_class=num_shots+5) partitions = task_generator.get_celeba_task_pool(attributes_test) tasks = task_generator.get_tasks(num_tasks=num_tasks, partitions=partitions) train_accuracies, test_accuracies = [], [] start = time.time() clusters_to_indices = task_generator.get_partition_from_labels(kmeans.labels_) for i_task, task in enumerate(tasks): if (i_task + 1) % (num_tasks // 10) == 0: print('test {}, train accuracy {:.5}, test accuracy {:.5}'.format(i_task + 1, np.mean(train_accuracies), np.mean(test_accuracies))) ind_train_few, Y_train_few, ind_test_few, Y_test_few = task Z_train_few, Z_test_few = Z_test[ind_train_few], Z_test[ind_test_few] clusters_to_labels_few = defaultdict(list) indices_to_clusters_few = kmeans.predict(Z_train_few) for i in range(Z_train_few.shape[0]): clusters_to_labels_few[indices_to_clusters_few[i]].append(Y_train_few[i]) Z_train_fit, Y_train_fit = [], [] for cluster in list(clusters_to_labels_few.keys()): labels = clusters_to_labels_few[cluster] if len(np.unique(labels)) == 1: # skip degenerate clusters Z_train_fit.extend(Z_train[clusters_to_indices[cluster]]) # propagate labels to unlabeled datapoints Y_train_fit.extend([labels[0] for i in range(len(clusters_to_indices[cluster]))]) Z_train_fit, Y_train_fit = np.stack(Z_train_fit, axis=0), np.stack(Y_train_fit, axis=0) Z_train_fit = np.concatenate((Z_train_fit, Z_train_few), axis=0) Y_train_fit = np.concatenate((Y_train_fit, Y_train_few), axis=0) logistic_regression = LogisticRegression(n_jobs=-1, penalty=penalty, C=C, multi_class=multi_class, solver='saga', max_iter=500) logistic_regression.fit(Z_train_fit, Y_train_fit) test_accuracies.append(logistic_regression.score(Z_test_few, Y_test_few)) train_accuracies.append(logistic_regression.score(Z_train_fit, Y_train_fit)) print('n_clusters={}, penalty={}, C={}, multi_class={}'.format(n_clusters, penalty, C, multi_class)) print('{}-way {}-shot logistic regression after clustering: {:.5} with 95% CI {:.5} over {} tests'.format(num_classes, num_shots, np.mean(test_accuracies), 1.96*np.std(test_accuracies)/np.sqrt(num_tasks), num_tasks)) print('Mean training accuracy: {:.5}; standard deviation: {:.5}'.format(np.mean(train_accuracies), np.std(train_accuracies))) print('{} few-shot classification tasks: {:.5} seconds.'.format(num_tasks, time.time() - start))
Example #11
Source File: arithmetic_in_text.py From Hands-On-Generative-Adversarial-Networks-with-Keras with MIT License | 4 votes |
def infer(data_filepath='data/flowers.hdf5', z_dim=128, out_dir='gan', n_steps=10): G = load_model(out_dir) val_data = get_data(data_filepath, 'train') val_data = next(iterate_minibatches(val_data, 2)) emb_a, emb_b = val_data[1] txts = val_data[2] # add batch dimension emb_a, emb_b = emb_a[None, :], emb_b[None, :] # sample z vector for inference z = np.random.uniform(-1, 1, size=(1, z_dim)) G.trainable = False # predict using embeddings a and b fake_image_a = G.predict([z, emb_a])[0] fake_image_b = G.predict([z, emb_b])[0] # add and subtract emb_add = (emb_a + emb_b) emb_a_sub_b = (emb_a - emb_b) emb_b_sub_a = (emb_b - emb_a) # generate images fake_a = G.predict([z, emb_a])[0] fake_b = G.predict([z, emb_b])[0] fake_add = G.predict([z, emb_add])[0] fake_a_sub_b = G.predict([z, emb_a_sub_b])[0] fake_b_sub_a = G.predict([z, emb_b_sub_a])[0] fake_a = ((fake_a + 1)*0.5) fake_b = ((fake_b + 1)*0.5) fake_add = ((fake_add + 1)*0.5) fake_a_sub_b = ((fake_a_sub_b + 1)*0.5) fake_b_sub_a = ((fake_b_sub_a + 1)*0.5) plt.imsave("{}/fake_text_arithmetic_a".format(out_dir), fake_a) plt.imsave("{}/fake_text_arithmetic_b".format(out_dir), fake_b) plt.imsave("{}/fake_text_arithmetic_add".format(out_dir), fake_add) plt.imsave("{}/fake_text_arithmetic_a_sub_b".format(out_dir), fake_a_sub_b) plt.imsave("{}/fake_text_arithmetic_b_sub_a".format(out_dir), fake_b_sub_a) print(str(txts[0]), str(txts[1]), file=open("{}/fake_text_arithmetic.txt".format(out_dir), "a"))