Python dataset.DataSet() Examples

The following are 12 code examples of dataset.DataSet(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module dataset , or try the search function .
Example #1
Source File: datagenerator.py    From plant-disease-classification with MIT License 6 votes vote down vote up
def read_train_sets(train_path, image_size, classes, validation_size):
    data_set = DataSet()

    images, labels, img_names, class_array = load_train_data(train_path, image_size, classes)
    images, labels, img_names, class_array = shuffle(images, labels, img_names, class_array)  

    if isinstance(validation_size, float):
        validation_size = int(validation_size * images.shape[0])

    validation_images = images[:validation_size]
    validation_labels = labels[:validation_size]
    validation_img_names = img_names[:validation_size]
    validation_cls = class_array[:validation_size]

    train_images = images[validation_size:]
    train_labels = labels[validation_size:]
    train_img_names = img_names[validation_size:]
    train_cls = class_array[validation_size:]

    data_set.train = DataSet(train_images, train_labels, train_img_names, train_cls)
    data_set.valid = DataSet(validation_images, validation_labels, validation_img_names, validation_cls)

    return data_set 
Example #2
Source File: compare_variational_mcmc.py    From deep_gp_random_features with Apache License 2.0 6 votes vote down vote up
def generate_toy_data():

    N = 50
    DATA_X = np.random.uniform(-5.0, 5.0, [N, 1])

    true_log_lambda = -2.0
    true_std = np.exp(true_log_lambda) / 2.0  # 0.1
    DATA_y = f(DATA_X) + np.random.normal(0.0, true_std, [N, 1])

    Xtest = np.asarray(np.arange(-10.0, 10.0, 0.1))
    Xtest = Xtest[:, np.newaxis]
    ytest = f(Xtest) # + np.random.normal(0, true_std, [Xtest.shape[0], 1])

    data = DataSet(DATA_X, DATA_y)
    test = DataSet(Xtest, ytest, shuffle=False)

    return data, test 
Example #3
Source File: mnist.py    From meta-optim-public with MIT License 6 votes vote down vote up
def read_data_sets(train_dir, seed=0):

    one_hot = False

    class DataSets(object):
        pass

    data_sets = DataSets()
    TRAIN_IMAGES = "train-images-idx3-ubyte.gz"
    TRAIN_LABELS = "train-labels-idx1-ubyte.gz"
    TEST_IMAGES = "t10k-images-idx3-ubyte.gz"
    TEST_LABELS = "t10k-labels-idx1-ubyte.gz"

    local_file = maybe_download(TRAIN_IMAGES, train_dir)
    train_images = extract_images(local_file)

    local_file = maybe_download(TRAIN_LABELS, train_dir)
    train_labels = extract_labels(local_file, one_hot=one_hot)

    local_file = maybe_download(TEST_IMAGES, train_dir)
    test_images = extract_images(local_file)

    local_file = maybe_download(TEST_LABELS, train_dir)
    test_labels = extract_labels(local_file, one_hot=one_hot)

    print('Train', train_images.shape)
    print('Test', test_images.shape)
    data_sets.train = DataSet(train_images, train_labels, seed=seed)
    data_sets.test = DataSet(test_images, test_labels, seed=seed)

    return data_sets 
Example #4
Source File: cifar.py    From MachineLearning with Apache License 2.0 5 votes vote down vote up
def __init__(self):
        self.train = dataset.DataSet()
        self.test = dataset.DataSet() 
Example #5
Source File: svhn.py    From MachineLearning with Apache License 2.0 5 votes vote down vote up
def __init__(self):
        self.train = dataset.DataSet()
        self.test = dataset.DataSet() 
Example #6
Source File: dgp_rff_infmnist.py    From deep_gp_random_features with Apache License 2.0 5 votes vote down vote up
def import_mnist():
    """
    This import mnist and saves the data as an object of our DataSet class
    :return:
    """
    VALIDATION_SIZE = 0
    ONE_HOT = True
    TRAIN_DIR = 'INFMNIST_data/'


    train_images = extract_images_2(open(TRAIN_DIR + 'mnist8m-patterns-idx3-ubyte.gz'))

    train_labels = extract_labels(open(TRAIN_DIR + 'mnist8m-labels-idx1-ubyte.gz'), one_hot=ONE_HOT)

    test_images = extract_images(open(TRAIN_DIR + 'test10k-patterns.gz'))

    test_labels = extract_labels(open(TRAIN_DIR + 'test10k-labels.gz'), one_hot=ONE_HOT)

    validation_images = train_images[:VALIDATION_SIZE]
    validation_labels = train_labels[:VALIDATION_SIZE]
    train_images = train_images[VALIDATION_SIZE:]
    train_labels = train_labels[VALIDATION_SIZE:]

    ## Process images
    train_images = process_mnist(train_images)
    validation_images = process_mnist(validation_images)
    test_images = process_mnist(test_images)

    ## Standardize data
    train_mean, train_std = get_data_info(train_images)
#    train_images = standardize_data(train_images, train_mean, train_std)
#    validation_images = standardize_data(validation_images, train_mean, train_std)
#    test_images = standardize_data(test_images, train_mean, train_std)

    data = DataSet(train_images, train_labels)
    test = DataSet(test_images, test_labels)
    val = DataSet(validation_images, validation_labels)

    return data, test, val 
Example #7
Source File: dgp_rff_classification.py    From deep_gp_random_features with Apache License 2.0 5 votes vote down vote up
def import_dataset(dataset, fold):

    train_X = np.loadtxt('FOLDS/' + dataset + '_ARD_Xtrain__FOLD_' + fold, delimiter=' ')
    train_Y = np.loadtxt('FOLDS/' + dataset + '_ARD_ytrain__FOLD_' + fold, delimiter=' ')
    test_X = np.loadtxt('FOLDS/' + dataset + '_ARD_Xtest__FOLD_' + fold, delimiter=' ')
    test_Y = np.loadtxt('FOLDS/' + dataset + '_ARD_ytest__FOLD_' + fold, delimiter=' ')

    data = DataSet(train_X, train_Y)
    test = DataSet(test_X, test_Y)

    return data, test 
Example #8
Source File: dgp_rff_regression.py    From deep_gp_random_features with Apache License 2.0 5 votes vote down vote up
def import_dataset(dataset, fold):

    train_X = np.loadtxt('FOLDS/' + dataset + '_ARD_Xtrain__FOLD_' + fold, delimiter=' ')
    train_Y = np.loadtxt('FOLDS/' + dataset + '_ARD_ytrain__FOLD_' + fold, delimiter=' ')
    train_Y = np.reshape(train_Y, (-1, 1))
    test_X = np.loadtxt('FOLDS/' + dataset + '_ARD_Xtest__FOLD_' + fold, delimiter=' ')
    test_Y = np.loadtxt('FOLDS/' + dataset + '_ARD_ytest__FOLD_' + fold, delimiter=' ')
    test_Y = np.reshape(test_Y, (-1, 1))

    data = DataSet(train_X, train_Y)
    test = DataSet(test_X, test_Y)

    return data, test 
Example #9
Source File: training.py    From reweighted-ws with GNU Affero General Public License v3.0 5 votes vote down vote up
def load_data(self):
        dataset = self.dataset
        assert isinstance(dataset, DataSet)

        n_datapoints = dataset.n_datapoints
        assert n_datapoints == dataset.X.shape[0]

        X, Y = dataset.preproc(dataset.X, dataset.Y)
        self.train_X = theano.shared(X, "train_X")
        self.train_Y = theano.shared(Y, "train_Y")

        self.train_perm = theano.shared(np.random.permutation(n_datapoints)) 
Example #10
Source File: dgp_rff_mnist.py    From deep_gp_random_features with Apache License 2.0 4 votes vote down vote up
def import_mnist():
    """
    This import mnist and saves the data as an object of our DataSet class
    :return:
    """
    SOURCE_URL = 'http://yann.lecun.com/exdb/mnist/'
    TRAIN_IMAGES = 'train-images-idx3-ubyte.gz'
    TRAIN_LABELS = 'train-labels-idx1-ubyte.gz'
    TEST_IMAGES = 't10k-images-idx3-ubyte.gz'
    TEST_LABELS = 't10k-labels-idx1-ubyte.gz'
    VALIDATION_SIZE = 0
    ONE_HOT = True
    TRAIN_DIR = 'MNIST_data'


    local_file = base.maybe_download(TRAIN_IMAGES, TRAIN_DIR,
                                     SOURCE_URL + TRAIN_IMAGES)
    train_images = extract_images(open(local_file, 'rb'))

    local_file = base.maybe_download(TRAIN_LABELS, TRAIN_DIR,
                                     SOURCE_URL + TRAIN_LABELS)
    train_labels = extract_labels(open(local_file, 'rb'), one_hot=ONE_HOT)

    local_file = base.maybe_download(TEST_IMAGES, TRAIN_DIR,
                                     SOURCE_URL + TEST_IMAGES)
    test_images = extract_images(open(local_file, 'rb'))

    local_file = base.maybe_download(TEST_LABELS, TRAIN_DIR,
                                     SOURCE_URL + TEST_LABELS)
    test_labels = extract_labels(open(local_file, 'rb'), one_hot=ONE_HOT)

    validation_images = train_images[:VALIDATION_SIZE]
    validation_labels = train_labels[:VALIDATION_SIZE]
    train_images = train_images[VALIDATION_SIZE:]
    train_labels = train_labels[VALIDATION_SIZE:]

    ## Process images
    train_images = process_mnist(train_images)
    validation_images = process_mnist(validation_images)
    test_images = process_mnist(test_images)

    ## Standardize data
    train_mean, train_std = get_data_info(train_images)
#    train_images = standardize_data(train_images, train_mean, train_std)
#    validation_images = standardize_data(validation_images, train_mean, train_std)
#    test_images = standardize_data(test_images, train_mean, train_std)

    data = DataSet(train_images, train_labels)
    test = DataSet(test_images, test_labels)
    val = DataSet(validation_images, validation_labels)

    return data, test, val 
Example #11
Source File: cifar10.py    From meta-optim-public with MIT License 4 votes vote down vote up
def read_data_sets(data_folder, seed=0):
    train_img = []
    train_label = []
    test_img = []
    test_label = []
    filename = 'cifar-10-python.tar.gz'
    maybe_download(filename, data_folder)

    train_file_list = [
        "data_batch_1", "data_batch_2", "data_batch_3", "data_batch_4", "data_batch_5"
    ]
    test_file_list = ["test_batch"]

    for i in six.moves.xrange(len(train_file_list)):
        tmp_dict = np.load(
            os.path.join(data_folder, 'cifar-10-batches-py', train_file_list[i]), encoding='latin1')
        train_img.append(tmp_dict["data"])
        train_label.append(tmp_dict["labels"])

    tmp_dict = np.load(
        os.path.join(data_folder, 'cifar-10-batches-py', test_file_list[0]), encoding='latin1')
    test_img.append(tmp_dict["data"])
    test_label.append(tmp_dict["labels"])

    train_img = np.concatenate(train_img)
    train_label = np.concatenate(train_label)
    test_img = np.concatenate(test_img)
    test_label = np.concatenate(test_label)

    train_img = np.reshape(train_img, [-1, 3, 32, 32])
    test_img = np.reshape(test_img, [-1, 3, 32, 32])

    # change format from [B, C, H, W] to [B, H, W, C] for feeding to Tensorflow
    train_img = np.transpose(train_img, [0, 2, 3, 1])
    test_img = np.transpose(test_img, [0, 2, 3, 1])

    class DataSets(object):
        pass

    data_sets = DataSets()
    data_sets.train = DataSet(train_img, train_label, seed=seed)
    data_sets.test = DataSet(test_img, test_label, seed=seed)

    return data_sets 
Example #12
Source File: model.py    From VietnameseOCR with Apache License 2.0 4 votes vote down vote up
def train(self, learning_rate, training_epochs, batch_size, keep_prob):
        self.dataset = DataSet()

        self.Y = tf.placeholder(tf.float32, [None, NO_LABEL], name='Y')
        self.cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=self.logits, labels=self.Y))
        self.optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(self.cost)

        if self.log:
            tf.summary.scalar('cost', self.cost)
            self.merged = tf.summary.merge_all()
            self.train_writer = tf.summary.FileWriter('./log_train', self.sess.graph)

        self.sess.run(tf.global_variables_initializer())
        self.sess.run(tf.local_variables_initializer())

        print('Training...')
        weights = []

        for epoch in range(training_epochs):
            avg_cost = 0
            total_batch = int(len(self.dataset.train_idx) / batch_size)
            # print('total_batch', total_batch)
            for i in range(total_batch + 1):
                batch_xs, batch_ys = self.dataset.next_batch(batch_size)
                feed_dict = {
                    self.X: batch_xs.reshape([batch_xs.shape[0], 28, 28, 1]),
                    self.Y: batch_ys,
                    self.keep_prob: keep_prob
                }

                weights, summary, c, _ = self.sess.run([self.parameters, self.merged, self.cost, self.optimizer],
                                                       feed_dict=feed_dict)
                avg_cost += c / total_batch

            if self.log:
                self.train_writer.add_summary(summary, epoch + 1)

            print('Epoch:', '%02d' % (epoch + 1), 'cost =', '{:.9f}'.format(avg_cost))

        print('Training finished!')

        saver = tf.train.Saver()
        save_path = saver.save(self.sess, "viet_ocr_brain.ckpt")
        print("Trainned model is saved in file: %s" % save_path)