Python Examples of sklearn.naive

Source File: hybrid_nb.py From Jacinle with MIT License

6 votes

def __init__(self, distributions, weights=None, **kwargs):
        self.models = []
        for dist in distributions:
            dist = NaiveBayesianDistribution.from_string(dist)
            if dist is NaiveBayesianDistribution.GAUSSIAN:
                model = nb.GaussianNB(**kwargs)
            elif dist is NaiveBayesianDistribution.MULTINOMIAL:
                model = nb.MultinomialNB(**kwargs)
            elif dist is NaiveBayesianDistribution.BERNOULLI:
                model = nb.BernoulliNB(**kwargs)
            else:
                raise ValueError('Unknown distribution: {}.'.format(dist))
            kwargs['fit_prior'] = False  # Except the first model.
            self.models.append(model)

        self.weights = weights

Source File: test_GaussianNB.py From differential-privacy-library with MIT License

6 votes

def test_different_results(self):
        from sklearn.naive_bayes import GaussianNB as sk_nb
        from sklearn import datasets

        global_seed(12345)
        dataset = datasets.load_iris()

        x_train, x_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size=.2)

        bounds = ([4.3, 2.0, 1.0, 0.1], [7.9, 4.4, 6.9, 2.5])

        clf_dp = GaussianNB(epsilon=1.0, bounds=bounds)
        clf_non_private = sk_nb()

        for clf in [clf_dp, clf_non_private]:
            clf.fit(x_train, y_train)

        same_prediction = clf_dp.predict(x_test) == clf_non_private.predict(x_test)

        self.assertFalse(np.all(same_prediction))

Source File: testScoreWithAdapaSklearn.py From nyoka with Apache License 2.0

6 votes

def test_24_gaussian_nb(self):
        print("\ntest 24 (GaussianNB with preprocessing) [multi-class]\n")
        X, X_test, y, features, target, test_file = self.data_utility.get_data_for_multi_class_classification()

        model = GaussianNB()
        pipeline_obj = Pipeline([
            ('scaler', StandardScaler()),
            ("model", model)
        ])
        pipeline_obj.fit(X,y)
        file_name = 'test24sklearn.pmml'
        
        skl_to_pmml(pipeline_obj, features, target, file_name)
        model_name  = self.adapa_utility.upload_to_zserver(file_name)
        predictions, probabilities = self.adapa_utility.score_in_zserver(model_name, test_file)
        model_pred = pipeline_obj.predict(X_test)
        model_prob = pipeline_obj.predict_proba(X_test)
        self.assertEqual(self.adapa_utility.compare_predictions(predictions, model_pred), True)
        self.assertEqual(self.adapa_utility.compare_probability(probabilities, model_prob), True)

Source File: test_naive_bayes.py From dask-ml with BSD 3-Clause "New" or "Revised" License

6 votes

def test_smoke():
    a = nb.GaussianNB()
    b = nb_.GaussianNB()
    a.fit(X, y)
    X_ = X.compute()
    y_ = y.compute()
    b.fit(X_, y_)

    assert_eq(a.class_prior_.compute(), b.class_prior_)
    assert_eq(a.class_count_.compute(), b.class_count_)
    assert_eq(a.theta_.compute(), b.theta_)
    assert_eq(a.sigma_.compute(), b.sigma_)

    assert_eq(a.predict_proba(X).compute(), b.predict_proba(X_))
    assert_eq(a.predict(X).compute(), b.predict(X_))
    assert_eq(a.predict_log_proba(X).compute(), b.predict_log_proba(X_))

Source File: testScoreWithAdapaSklearn.py From nyoka with Apache License 2.0

6 votes

def test_22_gaussian_nb(self):
        print("\ntest 22 (GaussianNB without preprocessing) [binary-class]\n")
        X, X_test, y, features, target, test_file = self.data_utility.get_data_for_binary_classification()

        model = GaussianNB()
        pipeline_obj = Pipeline([
            ("model", model)
        ])
        pipeline_obj.fit(X,y)
        file_name = 'test22sklearn.pmml'
        
        skl_to_pmml(pipeline_obj, features, target, file_name)
        model_name  = self.adapa_utility.upload_to_zserver(file_name)
        predictions, probabilities = self.adapa_utility.score_in_zserver(model_name, test_file)
        model_pred = pipeline_obj.predict(X_test)
        model_prob = pipeline_obj.predict_proba(X_test)
        self.assertEqual(self.adapa_utility.compare_predictions(predictions, model_pred), True)
        self.assertEqual(self.adapa_utility.compare_probability(probabilities, model_prob), True)

Source File: testScoreWithAdapaSklearn.py From nyoka with Apache License 2.0

6 votes

def test_23_gaussian_nb(self):
        print("\ntest 23 (GaussianNB without preprocessing) [multi-class]\n")
        X, X_test, y, features, target, test_file = self.data_utility.get_data_for_multi_class_classification()

        model = GaussianNB()
        pipeline_obj = Pipeline([
            ("model", model)
        ])
        pipeline_obj.fit(X,y)
        file_name = 'test23sklearn.pmml'
        
        skl_to_pmml(pipeline_obj, features, target, file_name)
        model_name  = self.adapa_utility.upload_to_zserver(file_name)
        predictions, probabilities = self.adapa_utility.score_in_zserver(model_name, test_file)
        model_pred = pipeline_obj.predict(X_test)
        model_prob = pipeline_obj.predict_proba(X_test)
        self.assertEqual(self.adapa_utility.compare_predictions(predictions, model_pred), True)
        self.assertEqual(self.adapa_utility.compare_probability(probabilities, model_prob), True)

Source File: classifier.py From libfaceid with MIT License

6 votes

def __init__(self, classifier=FaceClassifierModels.DEFAULT):
        self._clf = None
        if classifier == FaceClassifierModels.LINEAR_SVM:
            self._clf = SVC(C=1.0, kernel="linear", probability=True)
        elif classifier == FaceClassifierModels.NAIVE_BAYES:
            self._clf = GaussianNB()
        elif classifier == FaceClassifierModels.RBF_SVM:
            self._clf = SVC(C=1, kernel='rbf', probability=True, gamma=2)
        elif classifier == FaceClassifierModels.NEAREST_NEIGHBORS:
            self._clf = KNeighborsClassifier(1)
        elif classifier == FaceClassifierModels.DECISION_TREE:
            self._clf = DecisionTreeClassifier(max_depth=5)
        elif classifier == FaceClassifierModels.RANDOM_FOREST:
            self._clf = RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1)
        elif classifier == FaceClassifierModels.NEURAL_NET:
            self._clf = MLPClassifier(alpha=1)
        elif classifier == FaceClassifierModels.ADABOOST:
            self._clf = AdaBoostClassifier()
        elif classifier == FaceClassifierModels.QDA:
            self._clf = QuadraticDiscriminantAnalysis()
        print("classifier={}".format(FaceClassifierModels(classifier)))

Source File: A10.SFA.py From Machine-Learning with MIT License

6 votes

def Faceidentifier( trainDataSimplified,trainLabel,testDataSimplified,testLabel):     #three different kinds of classifers
    print("=====================================")    
    print("GaussianNB")
    clf1 = GaussianNB()
    clf1.fit(trainDataSimplified,np.ravel(trainLabel))
    predictTestLabel1 = clf1.predict(testDataSimplified)
    show_accuracy(predictTestLabel1,testLabel)
    print()
    
    print("SVC")
    clf3 = SVC(C=8.0)
    clf3.fit(trainDataSimplified,np.ravel(trainLabel))
    predictTestLabel3 = clf3.predict(testDataSimplified)
    show_accuracy(predictTestLabel3,testLabel)
    print()
    
    print("LogisticRegression")
    clf4 = LogisticRegression()
    clf4.fit(trainDataSimplified,np.ravel(trainLabel))
    predictTestLabel4 = clf4.predict(testDataSimplified)
    show_accuracy(predictTestLabel4,testLabel)
    print()
    print("=====================================")

Source File: tests_classification.py From discomll with Apache License 2.0

6 votes

def test_naivebayes_breastcancer_cont(self):
        # python -m unittest tests_classification.Tests_Classification.test_naivebayes_breastcancer_cont
        from sklearn.naive_bayes import GaussianNB
        from discomll.classification import naivebayes

        x_train, y_train, x_test, y_test = datasets.breastcancer_cont(replication=1)
        train_data, test_data = datasets.breastcancer_cont_discomll(replication=1)

        clf = GaussianNB()
        probs_log1 = clf.fit(x_train, y_train).predict_proba(x_test)

        fitmodel_url = naivebayes.fit(train_data)
        prediction_url = naivebayes.predict(test_data, fitmodel_url)
        probs_log2 = [v[1] for _, v in result_iterator(prediction_url)]

        self.assertTrue(np.allclose(probs_log1, probs_log2, atol=1e-8))

Source File: test_partition.py From scikit-multilearn with BSD 2-Clause "Simplified" License

5 votes

def get_labelpowerset_with_nb(self):
        return LabelPowerset(classifier=GaussianNB(), require_dense=[True, True])

Source File: test_rakeld.py From scikit-multilearn with BSD 2-Clause "Simplified" License

5 votes

def get_rakeld_with_nb(self):
        return RakelD(
            base_classifier=GaussianNB(),
            base_classifier_require_dense=[True, True],
            labelset_size=TEST_LABELSET_SIZE
        )

Source File: test_rakelo.py From scikit-multilearn with BSD 2-Clause "Simplified" License

5 votes

def get_rakeld_with_nb(self):
        return RakelO(
            base_classifier=GaussianNB(),
            base_classifier_require_dense=[True, True],
            labelset_size=TEST_LABELSET_SIZE,
            model_count=TEST_MODEL_COUNT
        )

Source File: test_br.py From scikit-multilearn with BSD 2-Clause "Simplified" License

5 votes

def test_if_works_with_cross_validation(self):
        classifier = BinaryRelevance(
            classifier=GaussianNB(), require_dense=[True, True])

        self.assertClassifierWorksWithCV(classifier)

Source File: preprocessing_surf.py From Indian-Sign-Language-Recognition with MIT License

5 votes

def predict_nb(X_train, X_test, y_train, y_test):
    clf = nb()
    print("nb started")
    clf.fit(X_train,y_train)
    y_pred=clf.predict(X_test)
    calc_accuracy("Naive Bayes",y_test,y_pred)
    np.savetxt('submission_surf_nb.csv', np.c_[range(1,len(y_test)+1),y_pred,y_test], delimiter=',', header = 'ImageId,Label,TrueLabel', comments = '', fmt='%d')

Source File: preprocessing_orb.py From Indian-Sign-Language-Recognition with MIT License

5 votes

def predict_nb(X_train, X_test, y_train, y_test):
    clf = nb()
    print("nb started")
    clf.fit(X_train,y_train)
    y_pred=clf.predict(X_test)
    calc_accuracy("Naive Bayes",y_test,y_pred)

Source File: rscls.py From Remote-Sensing-Image-Classification with MIT License

5 votes

def GNB(trainx,trainy):
    clf = GaussianNB()
    p = clf.fit(trainx, trainy)
    return p

Source File: rscls.py From Remote-Sensing-Image-Classification with MIT License

5 votes

def GNB(trainx,trainy):
    clf = GaussianNB()
    p = clf.fit(trainx, trainy)
    return p

Source File: test_codec.py From Splunking-Crime with GNU Affero General Public License v3.0

5 votes

def test_GaussianNB(self):
        GaussianNB_Algo.register_codecs()
        self.classifier_util(GaussianNB)

Source File: GaussianNB.py From Splunking-Crime with GNU Affero General Public License v3.0

5 votes

def __init__(self, options):
        self.handle_options(options)
        self.estimator = _GaussianNB()

Source File: rscls.py From Double-Branch-Dual-Attention-Mechanism-Network with GNU Affero General Public License v3.0

5 votes

def GNB(trainx, trainy):
    clf = GaussianNB()
    p = clf.fit(trainx, trainy)
    return p

Source File: rscls.py From Double-Branch-Dual-Attention-Mechanism-Network with GNU Affero General Public License v3.0

5 votes

def GNB(trainx, trainy):
    clf = GaussianNB()
    p = clf.fit(trainx, trainy)
    return p

Source File: rscls.py From Double-Branch-Dual-Attention-Mechanism-Network with GNU Affero General Public License v3.0

5 votes

def GNB(trainx, trainy):
    clf = GaussianNB()
    p = clf.fit(trainx, trainy)
    return p

Source File: sentiment_ensemble.py From textlytics with MIT License

5 votes

def sentiment_ensemble_lexi_ml(self, lexicon_predictions,
                                   ml_predictions,
                                   classifiers={'GaussianNB': GaussianNB()},
                                   n_folds=2):
        """ Fusion classification for s analysis
        :type lexicon_predictions: dict with lexicon name as keys and lists of
            predicted values as values
        :type ml_predictions: dict with classifiers name as keys and lists of
            predicted values as values
        :type classifiers: dict with name of classifier and classifier object
        :return: dict with measures and time for supervised learning process
        """
        ensemble_features = self.features_array(lexicon_predictions.values(),
                                                ml_predictions.values())
        self.feature_set = ensemble_features
        # temp_X = self.feature_set.T
        s = Sentiment()
        # print self.classes
        predictions = s.sentiment_classification(
            # X=self.feature_set,
            X=self.feature_set.T,
            # X=self.feature_set,
            y=self.classes,
            n_folds=n_folds,
            classifiers=classifiers)

        # print '+++++++++++++++++++++++ After ensemble +++++++++++++++++'
        # print
        # pprint(s.results)
        # TODO dodac predictions do results

        return s.results

Source File: classifiercobra.py From pycobra with MIT License

5 votes

def load_default(self, machine_list='basic'):
        """
        Loads 4 different scikit-learn regressors by default. The advanced list adds more machines. 
        As of current release SGD algorithm is not included in the advanced list.

        Parameters
        ----------
        machine_list: optional, list of strings
            List of default machine names to be loaded.
        Returns
        -------
        self : returns an instance of self.
        """
        if machine_list == 'basic':
            machine_list = ['sgd', 'tree', 'knn', 'svm']
        if machine_list == 'advanced':
            machine_list = ['tree', 'knn', 'svm', 'logreg', 'naive_bayes', 'lda', 'neural_network']

        for machine in machine_list:
            try:
                if machine == 'svm':
                    self.estimators_['svm'] = svm.SVC().fit(self.X_k_, self.y_k_)
                if machine == 'knn':
                    self.estimators_['knn'] = neighbors.KNeighborsClassifier().fit(self.X_k_, self.y_k_)
                if machine == 'tree':
                    self.estimators_['tree'] = tree.DecisionTreeClassifier().fit(self.X_k_, self.y_k_)
                if machine == 'logreg':
                    self.estimators_['logreg'] = LogisticRegression(random_state=self.random_state).fit(self.X_k_, self.y_k_)
                if machine == 'naive_bayes':
                    self.estimators_['naive_bayes'] = GaussianNB().fit(self.X_k_, self.y_k_)
                if machine == 'lda':
                    self.estimators_['lda'] = LinearDiscriminantAnalysis().fit(self.X_k_, self.y_k_)
                if machine == 'neural_network':
                    self.estimators_['neural_network'] = MLPClassifier(random_state=self.random_state).fit(self.X_k_, self.y_k_)
            except ValueError:
                continue

        return self

Source File: export_tests.py From tpot with GNU Lesser General Public License v3.0

5 votes

def test_generate_import_code():
    """Assert that generate_import_code() returns the correct set of dependancies for a given pipeline."""

    pipeline = creator.Individual.from_string('GaussianNB(RobustScaler(input_matrix))', tpot_obj._pset)

    expected_code = """import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import RobustScaler
"""
    assert expected_code == generate_import_code(pipeline, tpot_obj.operators)

Source File: test_cc.py From scikit-multilearn with BSD 2-Clause "Simplified" License

5 votes

def test_if_order_is_set_when_explicitly_given(self):
        X, y = self.get_multilabel_data_for_tests(sparsity_indicator='sparse')[0]

        reversed_chain = list(reversed(range(y.shape[1])))
        classifier = ClassifierChain(
            classifier=GaussianNB(), require_dense=[True, True], order=reversed_chain
        )

        classifier.fit(X, y)

        self.assertEqual(classifier._order(), reversed_chain)

Source File: export_tests.py From tpot with GNU Lesser General Public License v3.0

5 votes

def test_generate_pipeline_code():
    """Assert that generate_pipeline_code() returns the correct code given a specific pipeline."""

    tpot_obj._fit_init()
    pipeline = [
        'KNeighborsClassifier',
        [
            'CombineDFs',
            [
                'GradientBoostingClassifier',
                'input_matrix',
                38.0,
                5,
                5,
                5,
                0.05,
                0.5],
            [
                'GaussianNB',
                [
                    'ZeroCount',
                    'input_matrix'
                ]
            ]
        ],
        18,
        'uniform',
        2
    ]

    expected_code = """make_pipeline(
    make_union(
        StackingEstimator(estimator=GradientBoostingClassifier(learning_rate=38.0, max_depth=5, max_features=5, min_samples_leaf=5, min_samples_split=0.05, n_estimators=0.5)),
        StackingEstimator(estimator=make_pipeline(
            ZeroCount(),
            GaussianNB()
        ))
    ),
    KNeighborsClassifier(n_neighbors=18, p="uniform", weights=2)
)"""
    assert expected_code == generate_pipeline_code(pipeline, tpot_obj.operators)

Source File: Pipeline.py From VDiscover with GNU General Public License v3.0

5 votes

def makeTrainPipelineBOW(ftype):

    if ftype is "dynamic":

        realpath = os.path.dirname(os.path.realpath(__file__))
        f = open(realpath + "/data/dyn_events.dic")

        event_dict = []

        for line in f.readlines():
            event_dict.append(line.replace("\n", ""))

        return Pipeline(steps=[
            ('selector', ItemSelector(key='dynamic')),
            ('dvectorizer', CountVectorizer(tokenizer=dynamicTokenizer,
                                            ngram_range=(1, 3), lowercase=False, vocabulary=event_dict)),
            ('todense', DenseTransformer()),
            ('cutfoff', CutoffMax(16)),
            ('classifier', RandomForestClassifier(
                n_estimators=1000, max_features=None, max_depth=100))
            #('classifier',  GaussianNB())

        ])
    elif ftype is "static":
        return Pipeline(steps=[
            ('selector', ItemSelector(key='static')),
            ('dvectorizer', CountVectorizer(
                tokenizer=static_tokenizer, ngram_range=(1, 1), lowercase=False)),
            ('todense', DenseTransformer()),
            ('classifier', LogisticRegression(penalty="l2", C=1e-07, tol=1e-06))
        ])
    else:
        assert(0)

Source File: gaussian_nb.py From lale with Apache License 2.0

5 votes

def __init__(self, priors=None, var_smoothing=1e-09):
        self._hyperparams = {
            'priors': priors,
            'var_smoothing': var_smoothing}
        self._wrapped_model = Op(**self._hyperparams)

Source File: classification.py From Indian-Sign-Language-Recognition with MIT License

5 votes

def run_nb():
	clf = nb()
	print("nb started")
	clf.fit(x,y)
	#print(clf.classes_)
	#print clf.n_layers_
	pred=clf.predict(x_)
	#print(pred)
	np.savetxt('submission_nb.csv', np.c_[range(1,len(test)+1),pred,label_test], delimiter=',', header = 'ImageId,Label,TrueLabel', comments = '', fmt='%d')
	calc_accuracy("Naive Bayes",label_test,pred)

Python sklearn.naive_bayes.GaussianNB() Examples