Python utils.get_data() Examples

The following are 11 code examples of utils.get_data(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module utils , or try the search function .
Example #1
Source File: interpolation_in_text.py    From Hands-On-Generative-Adversarial-Networks-with-Keras with MIT License 6 votes vote down vote up
def infer(data_filepath='data/flowers.hdf5', z_dim=128, out_dir='gan',
          n_steps=10):

    G = load_model(out_dir)
    val_data = get_data(data_filepath, 'train')
    val_data = next(iterate_minibatches(val_data, 2))
    emb_source, emb_target = val_data[1]
    txts = val_data[2]

    z = np.random.uniform(-1, 1, size=(1, z_dim))

    G.trainable = False
    for i in range(n_steps+1):
        p = i/float(n_steps)
        emb = emb_source * (1-p) + emb_target * p
        emb = emb[None, :]
        fake_image = G.predict([z, emb])[0]
        img = ((fake_image + 1)*0.5)
        plt.imsave("{}/fake_text_interpolation_i{}".format(out_dir, i), img)
        print(i, str(txts[int(round(p))]).strip(),
              file=open("{}/fake_text_interpolation.txt".format(out_dir), "a")) 
Example #2
Source File: inference.py    From Hands-On-Generative-Adversarial-Networks-with-Keras with MIT License 6 votes vote down vote up
def infer(data_filepath='data/flowers.hdf5', z_dim=128, out_dir='gan',
          n_samples=5):

    G = load_model(out_dir)
    val_data = get_data(data_filepath, 'train')
    val_data = next(iterate_minibatches(val_data, n_samples))    
    emb, txts = val_data[1], val_data[2]

    # sample z vector for inference
    z = np.random.uniform(-1, 1, size=(n_samples, z_dim))

    G.trainable = False
    fake_images = G.predict([z, emb])
    for i in range(n_samples):
        img = ((fake_images[i] + 1)*0.5)
        plt.imsave("{}/fake_{}".format(out_dir, i), img)
        print(i, str(txts[i]).strip(),
              file=open("{}/fake_text.txt".format(out_dir), "a")) 
Example #3
Source File: baselines.py    From cactus-maml with MIT License 5 votes vote down vote up
def embedding_nearest_neighbour(n_neighbors=FLAGS.n_neighbours, num_classes=FLAGS.way, num_shots=FLAGS.shot, num_tasks=FLAGS.num_tasks,
                                num_encoding_dims=FLAGS.num_encoding_dims, test_set=FLAGS.test_set,
                                dataset=FLAGS.dataset):
    print('{}-way {}-shot embedding nearest neighbour'.format(num_classes, num_shots))
    if dataset != 'celeba':
        _, _, _, X_test, Y_test, Z_test = get_data(dataset, num_encoding_dims, test_set)
        task_generator = TaskGenerator(num_classes=num_classes, num_train_samples_per_class=num_shots, num_samples_per_class=num_shots+5)
        partition = task_generator.get_partition_from_labels(Y_test)
        partitions = [partition]
    else:
        _, _, _, X_test, attributes_test, Z_test = get_data(dataset, num_encoding_dims, test_set)
        task_generator = TaskGenerator(num_classes=num_classes, num_train_samples_per_class=num_shots, num_samples_per_class=num_shots+5)
        partitions = task_generator.get_celeba_task_pool(attributes_test)
    tasks = task_generator.get_tasks(num_tasks=num_tasks, partitions=partitions)

    accuracies = []

    for i_task, task in enumerate(tasks):
        if (i_task + 1) % (num_tasks // 10) == 0:
            print('test {}, accuracy {:.5}'.format(i_task + 1, np.mean(accuracies)))
        ind_train_few, Y_train_few, ind_test_few, Y_test_few = task
        Z_train_few, Z_test_few = Z_test[ind_train_few], Z_test[ind_test_few]

        knn = KNeighborsClassifier(n_neighbors=n_neighbors, n_jobs=-1)
        knn.fit(Z_train_few, Y_train_few)
        accuracy = knn.score(Z_test_few, Y_test_few)

        accuracies.append(accuracy)

    print('{}-way {}-shot embedding nearest neighbour: {:.5} with 95% CI {:.5} over {} tests'.format(num_classes, num_shots, np.mean(accuracies), 1.96*np.std(accuracies)/np.sqrt(num_tasks), num_tasks)) 
Example #4
Source File: baselines.py    From cactus-maml with MIT License 5 votes vote down vote up
def embedding_logistic_regression(C=FLAGS.inverse_reg, penalty='l2', multi_class='multinomial', num_classes=FLAGS.way, num_shots=FLAGS.shot, num_tasks=FLAGS.num_tasks,
                                num_encoding_dims=FLAGS.num_encoding_dims, test_set=FLAGS.test_set,
                                dataset=FLAGS.dataset):
    print('{}-way {}-shot logistic regression'.format(num_classes, num_shots))
    if dataset != 'celeba':
        _, _, _, X_test, Y_test, Z_test = get_data(dataset, num_encoding_dims, test_set)
        task_generator = TaskGenerator(num_classes=num_classes, num_train_samples_per_class=num_shots, num_samples_per_class=num_shots+5)
        partition = task_generator.get_partition_from_labels(Y_test)
        partitions = [partition]
    else:
        _, _, _, X_test, attributes_test, Z_test = get_data(dataset, num_encoding_dims, test_set)
        task_generator = TaskGenerator(num_classes=num_classes, num_train_samples_per_class=num_shots, num_samples_per_class=num_shots+5)
        partitions = task_generator.get_celeba_task_pool(attributes_test)
    tasks = task_generator.get_tasks(num_tasks=num_tasks, partitions=partitions)

    train_accuracies, test_accuracies = [], []

    start = time.time()
    for i_task, task in enumerate(tasks):
        if (i_task + 1) % (num_tasks // 10) == 0:
            print('test {}, train accuracy {:.5}, test accuracy {:.5}'.format(i_task + 1, np.mean(train_accuracies), np.mean(test_accuracies)))
        ind_train_few, Y_train_few, ind_test_few, Y_test_few = task
        Z_train_few, Z_test_few = Z_test[ind_train_few], Z_test[ind_test_few]

        logistic_regression = LogisticRegression(n_jobs=-1, penalty=penalty, C=C, multi_class=multi_class, solver='saga', max_iter=1000)
        logistic_regression.fit(Z_train_few, Y_train_few)
        test_accuracies.append(logistic_regression.score(Z_test_few, Y_test_few))
        train_accuracies.append(logistic_regression.score(Z_train_few, Y_train_few))
    print('penalty={}, C={}, multi_class={}'.format(penalty, C, multi_class))
    print('{}-way {}-shot logistic regression: {:.5} with 95% CI {:.5} over {} tests'.format(num_classes, num_shots, np.mean(test_accuracies), 1.96*np.std(test_accuracies)/np.sqrt(num_tasks), num_tasks))
    print('Mean training accuracy: {:.5}; standard deviation: {:.5}'.format(np.mean(train_accuracies), np.std(train_accuracies)))
    print('{} few-shot classification tasks: {:.5} seconds.'.format(num_tasks, time.time() - start)) 
Example #5
Source File: models.py    From philo2vec with MIT License 5 votes vote down vote up
def cbow():
    params = {
        'model': Philo2Vec.CBOW,
        'loss_fct': Philo2Vec.NCE,
        'context_window': 5,
    }
    x_train = get_data()
    validation_words = ['kant', 'descartes', 'human', 'natural']
    x_validation = [StemmingLookup.stem(w) for w in validation_words]
    vb = VocabBuilder(x_train, min_frequency=5)
    pv = Philo2Vec(vb, **params)
    pv.fit(epochs=30, validation_data=x_validation)
    return pv 
Example #6
Source File: models.py    From philo2vec with MIT License 5 votes vote down vote up
def skip_gram():
    params = {
        'model': Philo2Vec.SKIP_GRAM,
        'loss_fct': Philo2Vec.SOFTMAX,
        'context_window': 2,
        'num_skips': 4,
        'neg_sample_size': 2,
    }
    x_train = get_data()
    validation_words = ['kant', 'descartes', 'human', 'natural']
    x_validation = [StemmingLookup.stem(w) for w in validation_words]
    vb = VocabBuilder(x_train, min_frequency=5)
    pv = Philo2Vec(vb, **params)
    pv.fit(epochs=30, validation_data=x_validation)
    return pv 
Example #7
Source File: main.py    From bitfinex-ohlc-import with MIT License 5 votes vote down vote up
def get_candles(symbol, start_date, end_date, timeframe='1m', limit=1000):
    """
    Return symbol candles between two dates.
    https://docs.bitfinex.com/v2/reference#rest-public-candles
    """
    # timestamps need to include milliseconds
    start_date = start_date.int_timestamp * 1000
    end_date = end_date.int_timestamp * 1000

    url = f'{API_URL}/candles/trade:{timeframe}:t{symbol.upper()}/hist' \
          f'?start={start_date}&end={end_date}&limit={limit}'
    data = get_data(url)
    return data 
Example #8
Source File: baselines.py    From cactus-maml with MIT License 4 votes vote down vote up
def embedding_cluster_matching(num_classes=FLAGS.way, num_shots=FLAGS.shot, num_tasks=FLAGS.num_tasks,
           num_clusters=FLAGS.num_clusters, num_encoding_dims=FLAGS.num_encoding_dims,
           dataset=FLAGS.dataset, test_set=FLAGS.test_set):
    if dataset != 'celeba':
        _, _, Z_train, X_test, Y_test, Z_test = get_data(dataset, num_encoding_dims, test_set)
    else:
        _, _, Z_train, X_test, attributes_test, Z_test = get_data(dataset, num_encoding_dims, test_set)

    start = time.time()
    kmeans = KMeans(n_clusters=num_clusters, init='k-means++', random_state=0, precompute_distances=True, n_jobs=10, n_init=10, max_iter=3000).fit(Z_train)
    print("Ran KMeans with n_clusters={} in {:.5} seconds, objective {}.".format(num_clusters, time.time() - start, kmeans.score(Z_train)))

    if dataset != 'celeba':
        task_generator = TaskGenerator(num_classes=num_classes, num_train_samples_per_class=num_shots, num_samples_per_class=num_shots+5)
        partition = task_generator.get_partition_from_labels(Y_test)
        partitions = [partition]
    else:
        task_generator = TaskGenerator(num_classes=num_classes, num_train_samples_per_class=num_shots, num_samples_per_class=num_shots+5)
        partitions = task_generator.get_celeba_task_pool(attributes_test)
    tasks = task_generator.get_tasks(num_tasks=num_tasks, partitions=partitions)

    for num_shots in [FLAGS.shot]:
        accuracies = []
        start = time.time()
        num_degenerate_tasks = 0

        for i_task, task in enumerate(tasks):
            if (i_task + 1) % (num_tasks // 10) == 0:
                print('test {}, accuracy {:.5}'.format(i_task + 1, np.mean(accuracies)))

            ind_train_few, Y_train_few, ind_test_few, Y_test_few = task
            Z_train_few, Z_test_few = Z_test[ind_train_few], Z_test[ind_test_few]

            clusters_to_labels_few = defaultdict(list)
            examples_to_clusters_few = kmeans.predict(Z_train_few)
            for i in range(len(Y_train_few)):
                clusters_to_labels_few[examples_to_clusters_few[i]].append(Y_train_few[i])
            for (cluster, labels) in list(clusters_to_labels_few.items()):
                uniques, counts = np.unique(labels, return_counts=True)
                clusters_to_labels_few[cluster] = [uniques[np.argmax(counts)]]
                # if len(np.unique(labels)) > 1:      # delete degenerate clusters
                #     del clusters_to_labels_few[cluster]
            if len(clusters_to_labels_few) == 0:
                num_degenerate_tasks += 1
                continue
            centroid_ind_to_cluster = np.array(list(clusters_to_labels_few.keys()))
            centroids = kmeans.cluster_centers_[centroid_ind_to_cluster]
            distances = distance.cdist(Z_test_few, centroids)
            predicted_clusters = centroid_ind_to_cluster[np.argmin(distances, axis=1)]
            predictions = []
            for cluster in predicted_clusters:
                predictions.append(clusters_to_labels_few[cluster][0])

            accuracies.append(accuracy_score(Y_test_few, predictions))
        print('dataset={}, encoder={}, num_encoding_dims={}, num_clusters={}'.format(dataset, FLAGS.encoder, num_clusters, num_encoding_dims))
        print('{}-way {}-shot nearest-cluster after clustering embeddings: {:.5} with 95% CI {:.5} over {} tests'.format(num_classes, num_shots, np.mean(accuracies), 1.96*np.std(accuracies)/np.sqrt(num_tasks), num_tasks))
        print('{} few-shot classification tasks: {:.5} seconds with {} degenerate tasks.'.format(num_tasks, time.time() - start, num_degenerate_tasks)) 
Example #9
Source File: baselines.py    From cactus-maml with MIT License 4 votes vote down vote up
def embedding_mlp(num_classes=FLAGS.way, num_shots=FLAGS.shot, num_tasks=FLAGS.num_tasks,
                  num_encoding_dims=FLAGS.num_encoding_dims, test_set=FLAGS.test_set, dataset=FLAGS.dataset,
                  units=FLAGS.units, dropout=FLAGS.dropout):
    import keras
    from keras.layers import Dense, Dropout
    from keras.losses import categorical_crossentropy
    from keras.callbacks import EarlyStopping
    from keras import backend as K

    if dataset != 'celeba':
        _, _, _, X_test, Y_test, Z_test = get_data(dataset, num_encoding_dims, test_set)
        task_generator = TaskGenerator(num_classes=num_classes, num_train_samples_per_class=num_shots, num_samples_per_class=num_shots+5)
        partition = task_generator.get_partition_from_labels(Y_test)
        partitions = [partition]
    else:
        _, _, _, X_test, attributes_test, Z_test = get_data(dataset, num_encoding_dims, test_set)
        task_generator = TaskGenerator(num_classes=num_classes, num_train_samples_per_class=num_shots, num_samples_per_class=num_shots+5)
        partitions = task_generator.get_celeba_task_pool(attributes_test)
    tasks = task_generator.get_tasks(num_tasks=num_tasks, partitions=partitions)

    train_accuracies, test_accuracies = [], []

    start = time.time()
    for i_task, task in enumerate(tqdm(tasks)):
        if (i_task + 1) % (num_tasks // 10) == 0:
            tqdm.write('test {}, accuracy {:.5}'.format(i_task + 1, np.mean(test_accuracies)))
        ind_train_few, Y_train_few, ind_test_few, Y_test_few = task
        Z_train_few, Z_test_few = Z_test[ind_train_few], Z_test[ind_test_few]
        Y_train_few, Y_test_few = keras.utils.to_categorical(Y_train_few, num_classes=num_classes), keras.utils.to_categorical(Y_test_few, num_classes=num_classes)

        model = keras.Sequential()
        model.add(Dense(units=units, activation='relu', input_dim=Z_train_few.shape[1]))
        model.add(Dropout(rate=dropout))
        model.add(Dense(units=num_classes, activation='softmax'))
        model.compile(loss=categorical_crossentropy, optimizer=keras.optimizers.Adam(), metrics=['accuracy'])
        early_stopping = EarlyStopping(monitor='val_loss', patience=2)
        model.fit(Z_train_few, Y_train_few, batch_size=Z_train_few.shape[0], epochs=500, verbose=0, validation_data=(Z_test_few, Y_test_few), callbacks=[early_stopping])
        train_score = model.evaluate(Z_train_few, Y_train_few, verbose=0)
        train_accuracies.append(train_score[1])
        test_score = model.evaluate(Z_test_few, Y_test_few, verbose=0)
        test_accuracies.append(test_score[1])
        K.clear_session()

    print('units={}, dropout={}'.format(units, dropout))
    print('{}-way {}-shot embedding mlp: {:.5} with 95% CI {:.5} over {} tests'.format(num_classes, num_shots, np.mean(test_accuracies), 1.96*np.std(test_accuracies)/np.sqrt(num_tasks), num_tasks))
    print('Mean training accuracy: {:.5}; standard deviation: {:.5}'.format(np.mean(train_accuracies), np.std(train_accuracies)))
    print('{} few-shot classification tasks: {:.5} seconds.'.format(num_tasks, time.time() - start)) 
Example #10
Source File: baselines.py    From cactus-maml with MIT License 4 votes vote down vote up
def cluster_color_logistic_regression(C=FLAGS.inverse_reg, penalty='l2', multi_class='multinomial', n_clusters=FLAGS.num_clusters, num_classes=FLAGS.way, num_shots=FLAGS.shot, num_tasks=FLAGS.num_tasks,
                                num_encoding_dims=FLAGS.num_encoding_dims, test_set=FLAGS.test_set,
                                dataset=FLAGS.dataset):
    if dataset != 'celeba':
        _, _, Z_train, X_test, Y_test, Z_test = get_data(dataset, num_encoding_dims, test_set)
    else:
        _, _, Z_train, X_test, attributes_test, Z_test = get_data(dataset, num_encoding_dims, test_set)

    start = time.time()
    kmeans = KMeans(n_clusters=n_clusters, precompute_distances=True, n_jobs=-1, n_init=100).fit(Z_train)
    print("Ran KMeans with n_clusters={} in {:.5} seconds.".format(n_clusters, time.time() - start))
    uniques, counts = np.unique(kmeans.labels_, return_counts=True)

    if dataset != 'celeba':
        task_generator = TaskGenerator(num_classes=num_classes, num_train_samples_per_class=num_shots, num_samples_per_class=num_shots+5)
        partition = task_generator.get_partition_from_labels(Y_test)
        partitions = [partition]
    else:
        task_generator = TaskGenerator(num_classes=num_classes, num_train_samples_per_class=num_shots, num_samples_per_class=num_shots+5)
        partitions = task_generator.get_celeba_task_pool(attributes_test)
    tasks = task_generator.get_tasks(num_tasks=num_tasks, partitions=partitions)

    train_accuracies, test_accuracies = [], []
    start = time.time()
    clusters_to_indices = task_generator.get_partition_from_labels(kmeans.labels_)
    for i_task, task in enumerate(tasks):
        if (i_task + 1) % (num_tasks // 10) == 0:
            print('test {}, train accuracy {:.5}, test accuracy {:.5}'.format(i_task + 1, np.mean(train_accuracies), np.mean(test_accuracies)))

        ind_train_few, Y_train_few, ind_test_few, Y_test_few = task
        Z_train_few, Z_test_few = Z_test[ind_train_few], Z_test[ind_test_few]
        clusters_to_labels_few = defaultdict(list)
        indices_to_clusters_few = kmeans.predict(Z_train_few)
        for i in range(Z_train_few.shape[0]):
            clusters_to_labels_few[indices_to_clusters_few[i]].append(Y_train_few[i])
        Z_train_fit, Y_train_fit = [], []
        for cluster in list(clusters_to_labels_few.keys()):
            labels = clusters_to_labels_few[cluster]
            if len(np.unique(labels)) == 1:     # skip degenerate clusters
                Z_train_fit.extend(Z_train[clusters_to_indices[cluster]])   # propagate labels to unlabeled datapoints
                Y_train_fit.extend([labels[0] for i in range(len(clusters_to_indices[cluster]))])
        Z_train_fit, Y_train_fit = np.stack(Z_train_fit, axis=0), np.stack(Y_train_fit, axis=0)
        Z_train_fit = np.concatenate((Z_train_fit, Z_train_few), axis=0)
        Y_train_fit = np.concatenate((Y_train_fit, Y_train_few), axis=0)

        logistic_regression = LogisticRegression(n_jobs=-1, penalty=penalty, C=C, multi_class=multi_class, solver='saga', max_iter=500)
        logistic_regression.fit(Z_train_fit, Y_train_fit)
        test_accuracies.append(logistic_regression.score(Z_test_few, Y_test_few))
        train_accuracies.append(logistic_regression.score(Z_train_fit, Y_train_fit))
    print('n_clusters={}, penalty={}, C={}, multi_class={}'.format(n_clusters, penalty, C, multi_class))
    print('{}-way {}-shot logistic regression after clustering: {:.5} with 95% CI {:.5} over {} tests'.format(num_classes, num_shots, np.mean(test_accuracies), 1.96*np.std(test_accuracies)/np.sqrt(num_tasks), num_tasks))
    print('Mean training accuracy: {:.5}; standard deviation: {:.5}'.format(np.mean(train_accuracies), np.std(train_accuracies)))
    print('{} few-shot classification tasks: {:.5} seconds.'.format(num_tasks, time.time() - start)) 
Example #11
Source File: arithmetic_in_text.py    From Hands-On-Generative-Adversarial-Networks-with-Keras with MIT License 4 votes vote down vote up
def infer(data_filepath='data/flowers.hdf5', z_dim=128, out_dir='gan',
          n_steps=10):

    G = load_model(out_dir)
    val_data = get_data(data_filepath, 'train')
    val_data = next(iterate_minibatches(val_data, 2))
    emb_a, emb_b = val_data[1]
    txts = val_data[2]

    # add batch dimension
    emb_a, emb_b = emb_a[None, :], emb_b[None, :]

    # sample z vector for inference
    z = np.random.uniform(-1, 1, size=(1, z_dim))

    G.trainable = False
    # predict using embeddings a and b
    fake_image_a = G.predict([z, emb_a])[0]
    fake_image_b  = G.predict([z, emb_b])[0]

    # add and subtract
    emb_add = (emb_a + emb_b)
    emb_a_sub_b = (emb_a - emb_b)
    emb_b_sub_a = (emb_b - emb_a)

    # generate images
    fake_a = G.predict([z, emb_a])[0]
    fake_b = G.predict([z, emb_b])[0]
    fake_add = G.predict([z, emb_add])[0]
    fake_a_sub_b = G.predict([z, emb_a_sub_b])[0]
    fake_b_sub_a = G.predict([z, emb_b_sub_a])[0]

    fake_a = ((fake_a + 1)*0.5)
    fake_b = ((fake_b + 1)*0.5)
    fake_add = ((fake_add + 1)*0.5)
    fake_a_sub_b = ((fake_a_sub_b + 1)*0.5)
    fake_b_sub_a = ((fake_b_sub_a + 1)*0.5)

    plt.imsave("{}/fake_text_arithmetic_a".format(out_dir), fake_a)
    plt.imsave("{}/fake_text_arithmetic_b".format(out_dir), fake_b)
    plt.imsave("{}/fake_text_arithmetic_add".format(out_dir), fake_add)
    plt.imsave("{}/fake_text_arithmetic_a_sub_b".format(out_dir), fake_a_sub_b)
    plt.imsave("{}/fake_text_arithmetic_b_sub_a".format(out_dir), fake_b_sub_a)
    print(str(txts[0]), str(txts[1]),
          file=open("{}/fake_text_arithmetic.txt".format(out_dir), "a"))