Python sklearn.naive_bayes.BernoulliNB() Examples
The following are 30
code examples of sklearn.naive_bayes.BernoulliNB().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.naive_bayes
, or try the search function
.
Example #1
Source File: test_naive_bayes.py From twitter-stock-recommendation with MIT License | 6 votes |
def test_feature_log_prob_bnb(): # Test for issue #4268. # Tests that the feature log prob value computed by BernoulliNB when # alpha=1.0 is equal to the expression given in Manning, Raghavan, # and Schuetze's "Introduction to Information Retrieval" book: # http://nlp.stanford.edu/IR-book/html/htmledition/the-bernoulli-model-1.html X = np.array([[0, 0, 0], [1, 1, 0], [0, 1, 0], [1, 0, 1], [0, 1, 0]]) Y = np.array([0, 0, 1, 2, 2]) # Fit Bernoulli NB w/ alpha = 1.0 clf = BernoulliNB(alpha=1.0) clf.fit(X, Y) # Manually form the (log) numerator and denominator that # constitute P(feature presence | class) num = np.log(clf.feature_count_ + 1.0) denom = np.tile(np.log(clf.class_count_ + 2.0), (X.shape[1], 1)).T # Check manual estimate matches assert_array_almost_equal(clf.feature_log_prob_, (num - denom))
Example #2
Source File: test_naive_bayes.py From twitter-stock-recommendation with MIT License | 6 votes |
def test_discretenb_provide_prior_with_partial_fit(): # Test whether discrete NB classes use provided prior # when using partial_fit iris = load_iris() iris_data1, iris_data2, iris_target1, iris_target2 = train_test_split( iris.data, iris.target, test_size=0.4, random_state=415) for cls in [BernoulliNB, MultinomialNB]: for prior in [None, [0.3, 0.3, 0.4]]: clf_full = cls(class_prior=prior) clf_full.fit(iris.data, iris.target) clf_partial = cls(class_prior=prior) clf_partial.partial_fit(iris_data1, iris_target1, classes=[0, 1, 2]) clf_partial.partial_fit(iris_data2, iris_target2) assert_array_almost_equal(clf_full.class_log_prior_, clf_partial.class_log_prior_)
Example #3
Source File: test_naive_bayes.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_feature_log_prob_bnb(): # Test for issue #4268. # Tests that the feature log prob value computed by BernoulliNB when # alpha=1.0 is equal to the expression given in Manning, Raghavan, # and Schuetze's "Introduction to Information Retrieval" book: # https://nlp.stanford.edu/IR-book/html/htmledition/the-bernoulli-model-1.html X = np.array([[0, 0, 0], [1, 1, 0], [0, 1, 0], [1, 0, 1], [0, 1, 0]]) Y = np.array([0, 0, 1, 2, 2]) # Fit Bernoulli NB w/ alpha = 1.0 clf = BernoulliNB(alpha=1.0) clf.fit(X, Y) # Manually form the (log) numerator and denominator that # constitute P(feature presence | class) num = np.log(clf.feature_count_ + 1.0) denom = np.tile(np.log(clf.class_count_ + 2.0), (X.shape[1], 1)).T # Check manual estimate matches assert_array_almost_equal(clf.feature_log_prob_, (num - denom))
Example #4
Source File: hybrid_nb.py From Jacinle with MIT License | 6 votes |
def __init__(self, distributions, weights=None, **kwargs): self.models = [] for dist in distributions: dist = NaiveBayesianDistribution.from_string(dist) if dist is NaiveBayesianDistribution.GAUSSIAN: model = nb.GaussianNB(**kwargs) elif dist is NaiveBayesianDistribution.MULTINOMIAL: model = nb.MultinomialNB(**kwargs) elif dist is NaiveBayesianDistribution.BERNOULLI: model = nb.BernoulliNB(**kwargs) else: raise ValueError('Unknown distribution: {}.'.format(dist)) kwargs['fit_prior'] = False # Except the first model. self.models.append(model) self.weights = weights
Example #5
Source File: test_naive_bayes.py From twitter-stock-recommendation with MIT License | 6 votes |
def test_input_check_partial_fit(): for cls in [BernoulliNB, MultinomialNB]: # check shape consistency assert_raises(ValueError, cls().partial_fit, X2, y2[:-1], classes=np.unique(y2)) # classes is required for first call to partial fit assert_raises(ValueError, cls().partial_fit, X2, y2) # check consistency of consecutive classes values clf = cls() clf.partial_fit(X2, y2, classes=np.unique(y2)) assert_raises(ValueError, clf.partial_fit, X2, y2, classes=np.arange(42)) # check consistency of input shape for partial_fit assert_raises(ValueError, clf.partial_fit, X2[:, :-1], y2) # check consistency of input shape for predict assert_raises(ValueError, clf.predict, X2[:, :-1])
Example #6
Source File: test_naive_bayes.py From twitter-stock-recommendation with MIT License | 6 votes |
def test_discretenb_pickle(): # Test picklability of discrete naive Bayes classifiers for cls in [BernoulliNB, MultinomialNB, GaussianNB]: clf = cls().fit(X2, y2) y_pred = clf.predict(X2) store = BytesIO() pickle.dump(clf, store) clf = pickle.load(BytesIO(store.getvalue())) assert_array_equal(y_pred, clf.predict(X2)) if cls is not GaussianNB: # TODO re-enable me when partial_fit is implemented for GaussianNB # Test pickling of estimator trained with partial_fit clf2 = cls().partial_fit(X2[:3], y2[:3], classes=np.unique(y2)) clf2.partial_fit(X2[3:], y2[3:]) store = BytesIO() pickle.dump(clf2, store) clf2 = pickle.load(BytesIO(store.getvalue())) assert_array_equal(y_pred, clf2.predict(X2))
Example #7
Source File: test_naive_bayes.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_discrete_prior(): # Test whether class priors are properly set. for cls in [BernoulliNB, MultinomialNB]: clf = cls().fit(X2, y2) assert_array_almost_equal(np.log(np.array([2, 2, 2]) / 6.0), clf.class_log_prior_, 8)
Example #8
Source File: test_naive_bayes.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_coef_intercept_shape(): # coef_ and intercept_ should have shapes as in other linear models. # Non-regression test for issue #2127. X = [[1, 0, 0], [1, 1, 1]] y = [1, 2] # binary classification for clf in [MultinomialNB(), BernoulliNB()]: clf.fit(X, y) assert_equal(clf.coef_.shape, (1, 3)) assert_equal(clf.intercept_.shape, (1,))
Example #9
Source File: test_naive_bayes.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_sample_weight_multiclass(): for cls in [BernoulliNB, MultinomialNB]: # check shape consistency for number of samples at fit time yield check_sample_weight_multiclass, cls
Example #10
Source File: test_naive_bayes.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_discretenb_provide_prior(): # Test whether discrete NB classes use provided prior for cls in [BernoulliNB, MultinomialNB]: clf = cls(class_prior=[0.5, 0.5]) clf.fit([[0], [0], [1]], [0, 0, 1]) prior = np.exp(clf.class_log_prior_) assert_array_equal(prior, np.array([.5, .5])) # Inconsistent number of classes with prior assert_raises(ValueError, clf.fit, [[0], [1], [2]], [0, 1, 2]) assert_raises(ValueError, clf.partial_fit, [[0], [1]], [0, 1], classes=[0, 1, 1])
Example #11
Source File: test_naive_bayes.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_discretenb_uniform_prior(): # Test whether discrete NB classes fit a uniform prior # when fit_prior=False and class_prior=None for cls in [BernoulliNB, MultinomialNB]: clf = cls() clf.set_params(fit_prior=False) clf.fit([[0], [0], [1]], [0, 0, 1]) prior = np.exp(clf.class_log_prior_) assert_array_equal(prior, np.array([.5, .5]))
Example #12
Source File: test_naive_bayes.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_input_check_fit(): # Test input checks for the fit method for cls in [BernoulliNB, MultinomialNB, GaussianNB]: # check shape consistency for number of samples at fit time assert_raises(ValueError, cls().fit, X2, y2[:-1]) # check shape consistency for number of input features at predict time clf = cls().fit(X2, y2) assert_raises(ValueError, clf.predict, X2[:, :-1])
Example #13
Source File: test_naive_bayes.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_discretenb_partial_fit(): for cls in [MultinomialNB, BernoulliNB]: yield check_partial_fit, cls
Example #14
Source File: test_naive_bayes.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_discrete_prior(): # Test whether class priors are properly set. for cls in [BernoulliNB, MultinomialNB]: clf = cls().fit(X2, y2) assert_array_almost_equal(np.log(np.array([2, 2, 2]) / 6.0), clf.class_log_prior_, 8)
Example #15
Source File: test_naive_bayes.py From pandas-ml with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_objectmapper(self): df = pdml.ModelFrame([]) self.assertIs(df.naive_bayes.GaussianNB, nb.GaussianNB) self.assertIs(df.naive_bayes.MultinomialNB, nb.MultinomialNB) self.assertIs(df.naive_bayes.BernoulliNB, nb.BernoulliNB)
Example #16
Source File: classification_example.py From intro_ds with Apache License 2.0 | 5 votes |
def trainModel(data): """ 使用random forest embedding+伯努利模型对数据建模 """ pipe = Pipeline([("embedding", RandomTreesEmbedding(random_state=1024)), ("model", BernoulliNB())]) pipe.fit(data[["x1", "x2"]], data["y"]) return pipe
Example #17
Source File: text_classification.py From intro_ds with Apache License 2.0 | 5 votes |
def trainBernoulliNB(data): """ 使用伯努利模型对数据建模 """ vect = CountVectorizer(token_pattern=r"(?u)\b\w+\b", binary=True) X = vect.fit_transform(data["content"]) le = LabelEncoder() Y = le.fit_transform(data["label"]) model = BernoulliNB() model.fit(X, Y) return vect, le, model
Example #18
Source File: sentiment_analysis.py From pychennai-sentiment-analysis with Apache License 2.0 | 5 votes |
def learn_model(data,target): # preparing data for split validation. 60% training, 40% test data_train,data_test,target_train,target_test = cross_validation.train_test_split(data,target,test_size=0.4,random_state=43) classifier = BernoulliNB().fit(data_train,target_train) predicted = classifier.predict(data_test) evaluate_model(target_test,predicted) # read more about model evaluation metrics here # http://scikit-learn.org/stable/modules/model_evaluation.html
Example #19
Source File: test_codec.py From Splunking-Crime with GNU Affero General Public License v3.0 | 5 votes |
def test_BernoulliNB(self): BernoulliNB_Algo.register_codecs() self.classifier_util(BernoulliNB)
Example #20
Source File: BernoulliNB.py From Splunking-Crime with GNU Affero General Public License v3.0 | 5 votes |
def __init__(self, options): self.handle_options(options) out_params = convert_params( options.get('params', {}), floats=['alpha', 'binarize'], bools=['fit_prior'], ) self.estimator = _BernoulliNB(**out_params)
Example #21
Source File: export_tests.py From tpot with GNU Lesser General Public License v3.0 | 5 votes |
def test_export_random_ind(): """Assert that the TPOTClassifier can generate the same pipeline export with random seed of 39.""" tpot_obj = TPOTClassifier(random_state=39, config_dict="TPOT light") tpot_obj._fit_init() tpot_obj._pbar = tqdm(total=1, disable=True) pipeline = tpot_obj._toolbox.individual() expected_code = """import numpy as np import pandas as pd from sklearn.model_selection import train_test_split from sklearn.naive_bayes import BernoulliNB # NOTE: Make sure that the outcome column is labeled 'target' in the data file tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64) features = tpot_data.drop('target', axis=1) training_features, testing_features, training_target, testing_target = \\ train_test_split(features, tpot_data['target'], random_state=39) exported_pipeline = BernoulliNB(alpha=1.0, fit_prior=False) # Fix random state in exported estimator if hasattr(exported_pipeline, 'random_state'): setattr(exported_pipeline, 'random_state', 39) exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features) """ exported_code = export_pipeline(pipeline, tpot_obj.operators, tpot_obj._pset, random_state=tpot_obj.random_state) assert expected_code == exported_code
Example #22
Source File: bernoulli_nb.py From lale with Apache License 2.0 | 5 votes |
def __init__(self, alpha=1.0, binarize=0.0, fit_prior=True, class_prior=None): self._hyperparams = { 'alpha': alpha, 'binarize': binarize, 'fit_prior': fit_prior, 'class_prior': class_prior} self._wrapped_model = Op(**self._hyperparams)
Example #23
Source File: models.py From automl-phase-2 with MIT License | 5 votes |
def __init__(self, info, verbose=True, debug_mode=False): self.label_num=info['label_num'] self.target_num=info['target_num'] self.task = info['task'] self.metric = info['metric'] self.postprocessor = None #self.postprocessor = MultiLabelEnsemble(LogisticRegression(), balance=True) # To calibrate proba self.postprocessor = MultiLabelEnsemble(LogisticRegression(), balance=False) # To calibrate proba if debug_mode>=2: self.name = "RandomPredictor" self.model = RandomPredictor(self.target_num) self.predict_method = self.model.predict_proba return if info['task']=='regression': if info['is_sparse']==True: self.name = "BaggingRidgeRegressor" self.model = BaggingRegressor(base_estimator=Ridge(), n_estimators=1, verbose=verbose) # unfortunately, no warm start... else: self.name = "GradientBoostingRegressor" self.model = GradientBoostingRegressor(n_estimators=1, verbose=verbose, warm_start = True) self.predict_method = self.model.predict # Always predict probabilities else: if info['has_categorical']: # Out of lazziness, we do not convert categorical variables... self.name = "RandomForestClassifier" self.model = RandomForestClassifier(n_estimators=1, verbose=verbose) # unfortunately, no warm start... elif info['is_sparse']: self.name = "BaggingNBClassifier" self.model = BaggingClassifier(base_estimator=BernoulliNB(), n_estimators=1, verbose=verbose) # unfortunately, no warm start... else: self.name = "GradientBoostingClassifier" self.model = eval(self.name + "(n_estimators=1, verbose=" + str(verbose) + ", min_samples_split=10, random_state=1, warm_start = True)") if info['task']=='multilabel.classification': self.model = MultiLabelEnsemble(self.model) self.predict_method = self.model.predict_proba
Example #24
Source File: test_naive_bayes.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_check_accuracy_on_digits(): # Non regression test to make sure that any further refactoring / optim # of the NB models do not harm the performance on a slightly non-linearly # separable dataset digits = load_digits() X, y = digits.data, digits.target binary_3v8 = np.logical_or(digits.target == 3, digits.target == 8) X_3v8, y_3v8 = X[binary_3v8], y[binary_3v8] # Multinomial NB scores = cross_val_score(MultinomialNB(alpha=10), X, y, cv=10) assert_greater(scores.mean(), 0.86) scores = cross_val_score(MultinomialNB(alpha=10), X_3v8, y_3v8, cv=10) assert_greater(scores.mean(), 0.94) # Bernoulli NB scores = cross_val_score(BernoulliNB(alpha=10), X > 4, y, cv=10) assert_greater(scores.mean(), 0.83) scores = cross_val_score(BernoulliNB(alpha=10), X_3v8 > 4, y_3v8, cv=10) assert_greater(scores.mean(), 0.92) # Gaussian NB scores = cross_val_score(GaussianNB(), X, y, cv=10) assert_greater(scores.mean(), 0.77) scores = cross_val_score(GaussianNB(var_smoothing=0.1), X, y, cv=10) assert_greater(scores.mean(), 0.89) scores = cross_val_score(GaussianNB(), X_3v8, y_3v8, cv=10) assert_greater(scores.mean(), 0.86)
Example #25
Source File: test_naive_bayes.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_coef_intercept_shape(): # coef_ and intercept_ should have shapes as in other linear models. # Non-regression test for issue #2127. X = [[1, 0, 0], [1, 1, 1]] y = [1, 2] # binary classification for clf in [MultinomialNB(), BernoulliNB()]: clf.fit(X, y) assert_equal(clf.coef_.shape, (1, 3)) assert_equal(clf.intercept_.shape, (1,))
Example #26
Source File: test_naive_bayes.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_discretenb_predict_proba(): # Test discrete NB classes' probability scores # The 100s below distinguish Bernoulli from multinomial. # FIXME: write a test to show this. X_bernoulli = [[1, 100, 0], [0, 1, 0], [0, 100, 1]] X_multinomial = [[0, 1], [1, 3], [4, 0]] # test binary case (1-d output) y = [0, 0, 2] # 2 is regression test for binary case, 02e673 for cls, X in zip([BernoulliNB, MultinomialNB], [X_bernoulli, X_multinomial]): clf = cls().fit(X, y) assert_equal(clf.predict(X[-1:]), 2) assert_equal(clf.predict_proba([X[0]]).shape, (1, 2)) assert_array_almost_equal(clf.predict_proba(X[:2]).sum(axis=1), np.array([1., 1.]), 6) # test multiclass case (2-d output, must sum to one) y = [0, 1, 2] for cls, X in zip([BernoulliNB, MultinomialNB], [X_bernoulli, X_multinomial]): clf = cls().fit(X, y) assert_equal(clf.predict_proba(X[0:1]).shape, (1, 3)) assert_equal(clf.predict_proba(X[:2]).shape, (2, 3)) assert_almost_equal(np.sum(clf.predict_proba([X[1]])), 1) assert_almost_equal(np.sum(clf.predict_proba([X[-1]])), 1) assert_almost_equal(np.sum(np.exp(clf.class_log_prior_)), 1) assert_almost_equal(np.sum(np.exp(clf.intercept_)), 1)
Example #27
Source File: test_naive_bayes.py From Mastering-Elasticsearch-7.0 with MIT License | 4 votes |
def test_alpha(): # Setting alpha=0 should not output nan results when p(x_i|y_j)=0 is a case X = np.array([[1, 0], [1, 1]]) y = np.array([0, 1]) nb = BernoulliNB(alpha=0.) assert_warns(UserWarning, nb.partial_fit, X, y, classes=[0, 1]) assert_warns(UserWarning, nb.fit, X, y) prob = np.array([[1, 0], [0, 1]]) assert_array_almost_equal(nb.predict_proba(X), prob) nb = MultinomialNB(alpha=0.) assert_warns(UserWarning, nb.partial_fit, X, y, classes=[0, 1]) assert_warns(UserWarning, nb.fit, X, y) prob = np.array([[2./3, 1./3], [0, 1]]) assert_array_almost_equal(nb.predict_proba(X), prob) # Test sparse X X = scipy.sparse.csr_matrix(X) nb = BernoulliNB(alpha=0.) assert_warns(UserWarning, nb.fit, X, y) prob = np.array([[1, 0], [0, 1]]) assert_array_almost_equal(nb.predict_proba(X), prob) nb = MultinomialNB(alpha=0.) assert_warns(UserWarning, nb.fit, X, y) prob = np.array([[2./3, 1./3], [0, 1]]) assert_array_almost_equal(nb.predict_proba(X), prob) # Test for alpha < 0 X = np.array([[1, 0], [1, 1]]) y = np.array([0, 1]) expected_msg = ('Smoothing parameter alpha = -1.0e-01. ' 'alpha should be > 0.') b_nb = BernoulliNB(alpha=-0.1) m_nb = MultinomialNB(alpha=-0.1) assert_raise_message(ValueError, expected_msg, b_nb.fit, X, y) assert_raise_message(ValueError, expected_msg, m_nb.fit, X, y) b_nb = BernoulliNB(alpha=-0.1) m_nb = MultinomialNB(alpha=-0.1) assert_raise_message(ValueError, expected_msg, b_nb.partial_fit, X, y, classes=[0, 1]) assert_raise_message(ValueError, expected_msg, m_nb.partial_fit, X, y, classes=[0, 1])
Example #28
Source File: run_al_cl.py From AL with GNU General Public License v2.0 | 4 votes |
def retrieve_args(self): """Adds arguments to the parser for each respective setting of the command line interface""" # Classifier self.parser.add_argument("-c","--classifier", choices=['KNeighborsClassifier', 'LogisticRegression', 'SVC', 'BernoulliNB', 'DecisionTreeClassifier', 'RandomForestClassifier', 'AdaBoostClassifier', 'GaussianNB', 'MultinomialNB'], default='MultinomialNB', help="Represents the classifier that will be used (default: MultinomialNB) .") # Classifier's arguments self.parser.add_argument("-a","--arguments", default='', help="Represents the arguments that will be passed to the classifier (default: '').") # Data: Testing and training already split self.parser.add_argument("-d", '--data', nargs=2, metavar=('pool', 'test'), default=["data/imdb-binary-pool-mindf5-ng11", "data/imdb-binary-test-mindf5-ng11"], help='Files that contain the data, pool and test, and number of features (default: data/imdb-binary-pool-mindf5-ng11 data/imdb-binary-test-mindf5-ng11 27272).') # Data: Single File self.parser.add_argument("-sd", '--sdata', type=str, default='', help='Single file that contains the data. Cross validation will be performed (default: None).') # Whether to make the data dense self.parser.add_argument('-make_dense', default=False, action='store_true', help='Whether to make the sparse data dense. Some classifiers require this.') # Number of Folds self.parser.add_argument("-cv", type=int, default=10, help="Number of folds for cross validation. Works only if a single dataset is loaded (default: 10).") # File: Name of file that will be written the results self.parser.add_argument("-f", '--file', type=str, default=None, help='This feature represents the name that will be written with the result. If it is left blank, the file will not be written (default: None ).') # Number of Trials self.parser.add_argument("-nt", "--num_trials", type=int, default=10, help="Number of trials (default: 10).") # Strategies self.parser.add_argument("-st", "--strategies", choices=['erreduct', 'loggain', 'qbc', 'rand','unc'], nargs='*',default=['rand'], help="Represent a list of strategies for choosing next samples (default: rand).") # Boot Strap self.parser.add_argument("-bs", '--bootstrap', default=10, type=int, help='Sets the Boot strap (default: 10).') # Budget self.parser.add_argument("-b", '--budget', default=500, type=int, help='Sets the budget (default: 500).') # Step size self.parser.add_argument("-sz", '--stepsize', default=10, type=int, help='Sets the step size (default: 10).') # Sub pool size self.parser.add_argument("-sp", '--subpool', default=None, type=int, help='Sets the sub pool size (default: None).')
Example #29
Source File: test_naive_bayes.py From twitter-stock-recommendation with MIT License | 4 votes |
def test_bnb(): # Tests that BernoulliNB when alpha=1.0 gives the same values as # those given for the toy example in Manning, Raghavan, and # Schuetze's "Introduction to Information Retrieval" book: # http://nlp.stanford.edu/IR-book/html/htmledition/the-bernoulli-model-1.html # Training data points are: # Chinese Beijing Chinese (class: China) # Chinese Chinese Shanghai (class: China) # Chinese Macao (class: China) # Tokyo Japan Chinese (class: Japan) # Features are Beijing, Chinese, Japan, Macao, Shanghai, and Tokyo X = np.array([[1, 1, 0, 0, 0, 0], [0, 1, 0, 0, 1, 0], [0, 1, 0, 1, 0, 0], [0, 1, 1, 0, 0, 1]]) # Classes are China (0), Japan (1) Y = np.array([0, 0, 0, 1]) # Fit BernoulliBN w/ alpha = 1.0 clf = BernoulliNB(alpha=1.0) clf.fit(X, Y) # Check the class prior is correct class_prior = np.array([0.75, 0.25]) assert_array_almost_equal(np.exp(clf.class_log_prior_), class_prior) # Check the feature probabilities are correct feature_prob = np.array([[0.4, 0.8, 0.2, 0.4, 0.4, 0.2], [1/3.0, 2/3.0, 2/3.0, 1/3.0, 1/3.0, 2/3.0]]) assert_array_almost_equal(np.exp(clf.feature_log_prob_), feature_prob) # Testing data point is: # Chinese Chinese Chinese Tokyo Japan X_test = np.array([[0, 1, 1, 0, 0, 1]]) # Check the predictive probabilities are correct unnorm_predict_proba = np.array([[0.005183999999999999, 0.02194787379972565]]) predict_proba = unnorm_predict_proba / np.sum(unnorm_predict_proba) assert_array_almost_equal(clf.predict_proba(X_test), predict_proba)
Example #30
Source File: test_naive_bayes.py From twitter-stock-recommendation with MIT License | 4 votes |
def test_alpha(): # Setting alpha=0 should not output nan results when p(x_i|y_j)=0 is a case X = np.array([[1, 0], [1, 1]]) y = np.array([0, 1]) nb = BernoulliNB(alpha=0.) assert_warns(UserWarning, nb.partial_fit, X, y, classes=[0, 1]) assert_warns(UserWarning, nb.fit, X, y) prob = np.array([[1, 0], [0, 1]]) assert_array_almost_equal(nb.predict_proba(X), prob) nb = MultinomialNB(alpha=0.) assert_warns(UserWarning, nb.partial_fit, X, y, classes=[0, 1]) assert_warns(UserWarning, nb.fit, X, y) prob = np.array([[2./3, 1./3], [0, 1]]) assert_array_almost_equal(nb.predict_proba(X), prob) # Test sparse X X = scipy.sparse.csr_matrix(X) nb = BernoulliNB(alpha=0.) assert_warns(UserWarning, nb.fit, X, y) prob = np.array([[1, 0], [0, 1]]) assert_array_almost_equal(nb.predict_proba(X), prob) nb = MultinomialNB(alpha=0.) assert_warns(UserWarning, nb.fit, X, y) prob = np.array([[2./3, 1./3], [0, 1]]) assert_array_almost_equal(nb.predict_proba(X), prob) # Test for alpha < 0 X = np.array([[1, 0], [1, 1]]) y = np.array([0, 1]) expected_msg = ('Smoothing parameter alpha = -1.0e-01. ' 'alpha should be > 0.') b_nb = BernoulliNB(alpha=-0.1) m_nb = MultinomialNB(alpha=-0.1) assert_raise_message(ValueError, expected_msg, b_nb.fit, X, y) assert_raise_message(ValueError, expected_msg, m_nb.fit, X, y) b_nb = BernoulliNB(alpha=-0.1) m_nb = MultinomialNB(alpha=-0.1) assert_raise_message(ValueError, expected_msg, b_nb.partial_fit, X, y, classes=[0, 1]) assert_raise_message(ValueError, expected_msg, m_nb.partial_fit, X, y, classes=[0, 1])