Python keras.datasets() Examples

The following are 12 code examples of keras.datasets(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module keras , or try the search function .
Example #1
Source File: test_shap.py    From AIX360 with Apache License 2.0 8 votes vote down vote up
def test_ShapLinearExplainer(self):
        corpus, y = shap.datasets.imdb()
        corpus_train, corpus_test, y_train, y_test = train_test_split(corpus, y, test_size=0.2, random_state=7)

        vectorizer = TfidfVectorizer(min_df=10)
        X_train = vectorizer.fit_transform(corpus_train)
        X_test = vectorizer.transform(corpus_test)

        model = sklearn.linear_model.LogisticRegression(penalty="l1", C=0.1, solver='liblinear')
        model.fit(X_train, y_train)

        shapexplainer = LinearExplainer(model, X_train, feature_dependence="independent")
        shap_values = shapexplainer.explain_instance(X_test)
        print("Invoked Shap LinearExplainer")

    # comment this test as travis runs out of resources 
Example #2
Source File: test_shap.py    From AIX360 with Apache License 2.0 6 votes vote down vote up
def test_ShapGradientExplainer(self):

    #     model = VGG16(weights='imagenet', include_top=True)
    #     X, y = shap.datasets.imagenet50()
    #     to_explain = X[[39, 41]]
    #
    #     url = "https://s3.amazonaws.com/deep-learning-models/image-models/imagenet_class_index.json"
    #     fname = shap.datasets.cache(url)
    #     with open(fname) as f:
    #         class_names = json.load(f)
    #
    #     def map2layer(x, layer):
    #         feed_dict = dict(zip([model.layers[0].input], [preprocess_input(x.copy())]))
    #         return K.get_session().run(model.layers[layer].input, feed_dict)
    #
    #     e = GradientExplainer((model.layers[7].input, model.layers[-1].output),
    #                           map2layer(preprocess_input(X.copy()), 7))
    #     shap_values, indexes = e.explain_instance(map2layer(to_explain, 7), ranked_outputs=2)
    #
          print("Skipped Shap GradientExplainer") 
Example #3
Source File: 05_nn_mnist.py    From Practical-Computer-Vision with MIT License 6 votes vote down vote up
def get_dataset():
    """
    Return processed and reshaped dataset for training
    In this cases Fashion-mnist dataset.
    """
    # load mnist dataset
    (x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
    
    # test and train datasets
    print("Nb Train:", x_train.shape[0], "Nb test:",x_test.shape[0])
    x_train = x_train.reshape(x_train.shape[0], img_h, img_w, 1)
    x_test = x_test.reshape(x_test.shape[0], img_h, img_w, 1)
    in_shape = (img_h, img_w, 1)

    # normalize inputs
    x_train = x_train.astype('float32')
    x_test = x_test.astype('float32')
    x_train /= 255.0
    x_test /= 255.0

    # convert to one hot vectors 
    y_train = keras.utils.to_categorical(y_train, nb_class)
    y_test = keras.utils.to_categorical(y_test, nb_class)
    return x_train, x_test, y_train, y_test 
Example #4
Source File: 05_nn_vis.py    From Practical-Computer-Vision with MIT License 6 votes vote down vote up
def get_dataset():
    """
    Return processed and reshaped dataset for training
    In this cases Fashion-mnist dataset.
    """
    # load mnist dataset
    (x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
    
    # test and train datasets
    print("Nb Train:", x_train.shape[0], "Nb test:",x_test.shape[0])
    x_train = x_train.reshape(x_train.shape[0], img_h, img_w, 1)
    x_test = x_test.reshape(x_test.shape[0], img_h, img_w, 1)
    in_shape = (img_h, img_w, 1)

    # normalize inputs
    x_train = x_train.astype('float32')
    x_test = x_test.astype('float32')
    x_train /= 255.0
    x_test /= 255.0

    # convert to one hot vectors 
    y_train = keras.utils.to_categorical(y_train, nb_class)
    y_test = keras.utils.to_categorical(y_test, nb_class)
    return x_train, x_test, y_train, y_test 
Example #5
Source File: datasets.py    From TensorFlow-MIL with MIT License 5 votes vote down vote up
def build_data(self):
        """Returns the train and test datasets and their labels"""

        # load original mnist dataset and expand each number with embedded "0"s
        (x_train, y_train), (x_test, y_test) = mnist.load_data()
        embedding_img = x_train[1]
        x_train = np.array([self.expand_img(embedding_img, img) for img, label in zip(x_train, y_train)])
        x_test = np.array([self.expand_img(embedding_img, img) for img, label in zip(x_test, y_test)])

        if K.image_data_format() == 'channels_first':
            x_train = x_train.reshape(x_train.shape[0], 1, self.img_rows, self.img_cols)
            x_test = x_test.reshape(x_test.shape[0], 1, self.img_rows, self.img_cols)
        else:
            x_train = x_train.reshape(x_train.shape[0], self.img_rows, self.img_cols, 1)
            x_test = x_test.reshape(x_test.shape[0], self.img_rows, self.img_cols, 1)

        # normalize and cast
        x_train = x_train.astype('float32')
        x_test = x_test.astype('float32')
        x_train /= 255
        x_test /= 255

        # convert class vectors to binary class matrices
        y_train = keras.utils.to_categorical(y_train, self.num_classes)
        y_test = keras.utils.to_categorical(y_test, self.num_classes)

        return x_train, x_test, y_train, y_test 
Example #6
Source File: dataset.py    From neural-fingerprinting with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def data_cifar10(train_start=0, train_end=50000, test_start=0, test_end=10000):
    """
    Preprocess CIFAR10 dataset
    :return:
    """

    global keras
    if keras is None:
        import keras
        from keras.datasets import cifar10
        from keras.utils import np_utils

    # These values are specific to CIFAR10
    img_rows = 32
    img_cols = 32
    nb_classes = 10

    # the data, shuffled and split between train and test sets
    (x_train, y_train), (x_test, y_test) = cifar10.load_data()

    if keras.backend.image_dim_ordering() == 'th':
        x_train = x_train.reshape(x_train.shape[0], 3, img_rows, img_cols)
        x_test = x_test.reshape(x_test.shape[0], 3, img_rows, img_cols)
    else:
        x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 3)
        x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 3)
    x_train = x_train.astype('float32')
    x_test = x_test.astype('float32')
    x_train /= 255
    x_test /= 255
    print('x_train shape:', x_train.shape)
    print(x_train.shape[0], 'train samples')
    print(x_test.shape[0], 'test samples')

    # convert class vectors to binary class matrices
    y_train = np_utils.to_categorical(y_train, nb_classes)
    y_test = np_utils.to_categorical(y_test, nb_classes)

    x_train = x_train[train_start:train_end, :, :, :]
    y_train = y_train[train_start:train_end, :]
    x_test = x_test[test_start:test_end, :]
    y_test = y_test[test_start:test_end, :]

    return x_train, y_train, x_test, y_test 
Example #7
Source File: test_shap.py    From AIX360 with Apache License 2.0 4 votes vote down vote up
def test_Shap(self):

        np.random.seed(1)
        X_train, X_test, Y_train, Y_test = train_test_split(*shap.datasets.iris(), test_size=0.2, random_state=0)

        # K-nearest neighbors
        knn = sklearn.neighbors.KNeighborsClassifier()
        knn.fit(X_train, Y_train)
        v = 100*np.sum(knn.predict(X_test) == Y_test)/len(Y_test)
        print("Accuracy = {0}%".format(v))

        # Explain a single prediction from the test set
        shapexplainer = KernelExplainer(knn.predict_proba, X_train)
        shap_values = shapexplainer.explain_instance(X_test.iloc[0,:])  # TODO test against original SHAP Lib
        print('knn X_test iloc_0')
        print(shap_values)
        print(shapexplainer.explainer.expected_value[0])
        print(shap_values[0])

        # Explain all the predictions in the test set
        shap_values = shapexplainer.explain_instance(X_test)
        print('knn X_test')
        print(shap_values)
        print(shapexplainer.explainer.expected_value[0])
        print(shap_values[0])

        # SV machine with a linear kernel
        svc_linear = sklearn.svm.SVC(kernel='linear', probability=True)
        svc_linear.fit(X_train, Y_train)
        v = 100*np.sum(svc_linear.predict(X_test) == Y_test)/len(Y_test)
        print("Accuracy = {0}%".format(v))

        # Explain all the predictions in the test set
        shapexplainer = KernelExplainer(svc_linear.predict_proba, X_train)
        shap_values = shapexplainer.explain_instance(X_test)
        print('svc X_test')
        print(shap_values)
        print(shapexplainer.explainer.expected_value[0])
        print(shap_values[0])

        np.random.seed(1)
        X,y = shap.datasets.adult()
        X_train, X_valid, y_train, y_valid = sklearn.model_selection.train_test_split(X, y, test_size=0.2, random_state=7)

        knn = sklearn.neighbors.KNeighborsClassifier()
        knn.fit(X_train, y_train)

        f = lambda x: knn.predict_proba(x)[:,1]
        med = X_train.median().values.reshape((1,X_train.shape[1]))
        shapexplainer = KernelExplainer(f, med)
        shap_values_single = shapexplainer.explain_instance(X.iloc[0,:], nsamples=1000)
        print('Shap Tabular Example')
        print(shapexplainer.explainer.expected_value)
        print(shap_values_single)
        print("Invoked Shap KernelExplainer") 
Example #8
Source File: test_shap.py    From AIX360 with Apache License 2.0 4 votes vote down vote up
def test_ShapTreeExplainer(self):
        X, y = shap.datasets.nhanesi()
        X_display, y_display = shap.datasets.nhanesi(display=True)  # human readable feature values

        xgb_full = xgboost.DMatrix(X, label=y)

        # create a train/test split
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=7)
        xgb_train = xgboost.DMatrix(X_train, label=y_train)
        xgb_test = xgboost.DMatrix(X_test, label=y_test)

        # use validation set to choose # of trees
        params = {
            "eta": 0.002,
            "max_depth": 3,
            "objective": "survival:cox",
            "subsample": 0.5
        }
        model_train = xgboost.train(params, xgb_train, 10000, evals=[(xgb_test, "test")], verbose_eval=1000)

        # train final model on the full data set
        params = {
            "eta": 0.002,
            "max_depth": 3,
            "objective": "survival:cox",
            "subsample": 0.5
        }
        model = xgboost.train(params, xgb_full, 5000, evals=[(xgb_full, "test")], verbose_eval=1000)

        def c_statistic_harrell(pred, labels):
            total = 0
            matches = 0
            for i in range(len(labels)):
                for j in range(len(labels)):
                    if labels[j] > 0 and abs(labels[i]) > labels[j]:
                        total += 1
                        if pred[j] > pred[i]:
                            matches += 1
            return matches / total

        # see how well we can order people by survival
        c_statistic_harrell(model_train.predict(xgb_test, ntree_limit=5000), y_test)

        shap_values = TreeExplainer(model).explain_instance(X)
        print("Invoked Shap TreeExplainer") 
Example #9
Source File: utils_mnist.py    From robust_physical_perturbations with MIT License 4 votes vote down vote up
def data_mnist(datadir='/tmp/', train_start=0, train_end=60000, test_start=0,
               test_end=10000):
    """
    Load and preprocess MNIST dataset
    :param datadir: path to folder where data should be stored
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :return: tuple of four arrays containing training data, training labels,
             testing data and testing labels.
    """
    assert isinstance(train_start, int)
    assert isinstance(train_end, int)
    assert isinstance(test_start, int)
    assert isinstance(test_end, int)

    if 'tensorflow' in sys.modules:
        from tensorflow.examples.tutorials.mnist import input_data
        mnist = input_data.read_data_sets(datadir, one_hot=True, reshape=False)
        X_train = np.vstack((mnist.train.images, mnist.validation.images))
        Y_train = np.vstack((mnist.train.labels, mnist.validation.labels))
        X_test = mnist.test.images
        Y_test = mnist.test.labels
    else:
        warnings.warn("CleverHans support for Theano is deprecated and "
                      "will be dropped on 2017-11-08.")
        import keras
        from keras.datasets import mnist
        from keras.utils import np_utils

        # These values are specific to MNIST
        img_rows = 28
        img_cols = 28
        nb_classes = 10

        # the data, shuffled and split between train and test sets
        (X_train, y_train), (X_test, y_test) = mnist.load_data()

        if keras.backend.image_dim_ordering() == 'th':
            X_train = X_train.reshape(X_train.shape[0], 1, img_rows, img_cols)
            X_test = X_test.reshape(X_test.shape[0], 1, img_rows, img_cols)

        X_train = X_train.astype('float32')
        X_test = X_test.astype('float32')
        X_train /= 255
        X_test /= 255

        # convert class vectors to binary class matrices
        Y_train = np_utils.to_categorical(y_train, nb_classes)
        Y_test = np_utils.to_categorical(y_test, nb_classes)

    X_train = X_train[train_start:train_end]
    Y_train = Y_train[train_start:train_end]
    X_test = X_test[test_start:test_end]
    Y_test = Y_test[test_start:test_end]

    print('X_train shape:', X_train.shape)
    print('X_test shape:', X_test.shape)

    return X_train, Y_train, X_test, Y_test 
Example #10
Source File: utils_mnist.py    From robust_physical_perturbations with MIT License 4 votes vote down vote up
def data_mnist(datadir='/tmp/', train_start=0, train_end=60000, test_start=0,
               test_end=10000):
    """
    Load and preprocess MNIST dataset
    :param datadir: path to folder where data should be stored
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :return: tuple of four arrays containing training data, training labels,
             testing data and testing labels.
    """
    assert isinstance(train_start, int)
    assert isinstance(train_end, int)
    assert isinstance(test_start, int)
    assert isinstance(test_end, int)

    if 'tensorflow' in sys.modules:
        from tensorflow.examples.tutorials.mnist import input_data
        mnist = input_data.read_data_sets(datadir, one_hot=True, reshape=False)
        X_train = np.vstack((mnist.train.images, mnist.validation.images))
        Y_train = np.vstack((mnist.train.labels, mnist.validation.labels))
        X_test = mnist.test.images
        Y_test = mnist.test.labels
    else:
        warnings.warn("CleverHans support for Theano is deprecated and "
                      "will be dropped on 2017-11-08.")
        import keras
        from keras.datasets import mnist
        from keras.utils import np_utils

        # These values are specific to MNIST
        img_rows = 28
        img_cols = 28
        nb_classes = 10

        # the data, shuffled and split between train and test sets
        (X_train, y_train), (X_test, y_test) = mnist.load_data()

        if keras.backend.image_dim_ordering() == 'th':
            X_train = X_train.reshape(X_train.shape[0], 1, img_rows, img_cols)
            X_test = X_test.reshape(X_test.shape[0], 1, img_rows, img_cols)

        X_train = X_train.astype('float32')
        X_test = X_test.astype('float32')
        X_train /= 255
        X_test /= 255

        # convert class vectors to binary class matrices
        Y_train = np_utils.to_categorical(y_train, nb_classes)
        Y_test = np_utils.to_categorical(y_test, nb_classes)

    X_train = X_train[train_start:train_end]
    Y_train = Y_train[train_start:train_end]
    X_test = X_test[test_start:test_end]
    Y_test = Y_test[test_start:test_end]

    print('X_train shape:', X_train.shape)
    print('X_test shape:', X_test.shape)

    return X_train, Y_train, X_test, Y_test 
Example #11
Source File: cifar10_eval.py    From DenseNet-Cifar10 with MIT License 4 votes vote down vote up
def eval_model():
    model = createDenseNet(nb_classes=nb_classes,img_dim=img_dim,depth=densenet_depth,
                  growth_rate = densenet_growth_rate)
    model.load_weights(check_point_file)
    optimizer = Adam()
    model.compile(loss='categorical_crossentropy',optimizer=optimizer,metrics=['accuracy'])
    
    label_list_path = 'datasets/cifar-10-batches-py/batches.meta'   
    keras_dir = os.path.expanduser(os.path.join('~', '.keras'))
    datadir_base = os.path.expanduser(keras_dir)
    if not os.access(datadir_base, os.W_OK):
        datadir_base = os.path.join('/tmp', '.keras')
    label_list_path = os.path.join(datadir_base, label_list_path)
    with open(label_list_path, mode='rb') as f:
        labels = pickle.load(f)
    
    (x_train,y_train),(x_test,y_test) = cifar10.load_data()
    x_test = x_test.astype('float32')
    x_test /= 255
    y_test= keras.utils.to_categorical(y_test, nb_classes)
    test_datagen = getDataGenerator(train_phase=False)
    test_datagen = test_datagen.flow(x_test,y_test,batch_size = batch_size,shuffle=False)
    
    # Evaluate model with test data set and share sample prediction results
    evaluation = model.evaluate_generator(test_datagen,
                                        steps=x_test.shape[0] // batch_size,
                                        workers=4)
    print('Model Accuracy = %.2f' % (evaluation[1]))
    
    counter = 0
    figure = plt.figure()
    plt.subplots_adjust(left=0.1,bottom=0.1, right=0.9, top=0.9,hspace=0.5, wspace=0.3)
    for x_batch,y_batch in test_datagen:
        predict_res = model.predict_on_batch(x_batch)
        for i in range(batch_size):
            actual_label = labels['label_names'][np.argmax(y_batch[i])]
            predicted_label = labels['label_names'][np.argmax(predict_res[i])]
            if actual_label != predicted_label:
                counter += 1
                pics_raw = x_batch[i]
                pics_raw *= 255
                pics = array_to_img(pics_raw)
                ax = plt.subplot(25//5, 5, counter)
                ax.axis('off')
                ax.set_title(predicted_label)
                plt.imshow(pics)
            if counter >= 25:
                plt.savefig("./wrong_predicted.jpg")
                break
        if counter >= 25:
                break
    print("Everything seems OK...") 
Example #12
Source File: data_input.py    From DenseNet-Cifar10 with MIT License 4 votes vote down vote up
def testDataGenerator(pics_num):
    """visualize the pics after data augmentation
    Args:
        pics_num:
            the number of pics you want to observe
    return:
        None
    """
    
    print("Now, we are testing data generator......")
    
    (x_train,y_train),(x_test,y_test) = cifar10.load_data()
    x_train = x_train.astype('float32')
    y_train = keras.utils.to_categorical(y_train, 10)
    
    # Load label names to use in prediction results
    label_list_path = 'datasets/cifar-10-batches-py/batches.meta'
    keras_dir = os.path.expanduser(os.path.join('~', '.keras'))
    datadir_base = os.path.expanduser(keras_dir)
    if not os.access(datadir_base, os.W_OK):
        datadir_base = os.path.join('/tmp', '.keras')
    label_list_path = os.path.join(datadir_base, label_list_path)
    with open(label_list_path, mode='rb') as f:
        labels = pickle.load(f)
    
    datagen = getDataGenerator(train_phase=True)
    """
    x_batch is a [-1,row,col,channel] np array
    y_batch is a [-1,labels] np array
    """
    figure = plt.figure()
    plt.subplots_adjust(left=0.1,bottom=0.1, right=0.9, top=0.9,hspace=0.5, wspace=0.3)
    for x_batch,y_batch in datagen.flow(x_train,y_train,batch_size = pics_num):
        for i in range(pics_num):
            pics_raw = x_batch[i]
            pics = array_to_img(pics_raw)
            ax = plt.subplot(pics_num//5, 5, i+1)
            ax.axis('off')
            ax.set_title(labels['label_names'][np.argmax(y_batch[i])])
            plt.imshow(pics)
        plt.savefig("./processed_data.jpg")
        break   
    print("Everything seems OK...")