Python sklearn.naive_bayes.MultinomialNB() Examples
The following are 30
code examples of sklearn.naive_bayes.MultinomialNB().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.naive_bayes
, or try the search function
.
Example #1
Source File: test_multiclass.py From Mastering-Elasticsearch-7.0 with MIT License | 9 votes |
def test_ovr_multilabel(): # Toy dataset where features correspond directly to labels. X = np.array([[0, 4, 5], [0, 5, 0], [3, 3, 3], [4, 0, 6], [6, 0, 0]]) y = np.array([[0, 1, 1], [0, 1, 0], [1, 1, 1], [1, 0, 1], [1, 0, 0]]) for base_clf in (MultinomialNB(), LinearSVC(random_state=0), LinearRegression(), Ridge(), ElasticNet(), Lasso(alpha=0.5)): clf = OneVsRestClassifier(base_clf).fit(X, y) y_pred = clf.predict([[0, 4, 4]])[0] assert_array_equal(y_pred, [0, 1, 1]) assert clf.multilabel_
Example #2
Source File: test_naive_bayes.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_mnb_prior_unobserved_targets(): # test smoothing of prior for yet unobserved targets # Create toy training data X = np.array([[0, 1], [1, 0]]) y = np.array([0, 1]) clf = MultinomialNB() assert_no_warnings( clf.partial_fit, X, y, classes=[0, 1, 2] ) assert clf.predict([[0, 1]]) == 0 assert clf.predict([[1, 0]]) == 1 assert clf.predict([[1, 1]]) == 0 # add a training example with previously unobserved class assert_no_warnings( clf.partial_fit, [[1, 1]], [2] ) assert clf.predict([[0, 1]]) == 0 assert clf.predict([[1, 0]]) == 1 assert clf.predict([[1, 1]]) == 2
Example #3
Source File: test_sklearn_calibrated_classifier_cv_converter.py From sklearn-onnx with MIT License | 6 votes |
def test_model_calibrated_classifier_cv_binary(self): data = load_iris() X, y = data.data, data.target y[y > 1] = 1 clf = MultinomialNB().fit(X, y) model = CalibratedClassifierCV(clf, cv=2, method="sigmoid").fit(X, y) model_onnx = convert_sklearn( model, "scikit-learn CalibratedClassifierCV", [("input", FloatTensorType([None, X.shape[1]]))], target_opset=TARGET_OPSET ) self.assertTrue(model_onnx is not None) dump_data_and_model( X.astype(np.float32), model, model_onnx, basename="SklearnCalibratedClassifierCVBinaryMNB", allow_failure="StrictVersion(onnxruntime.__version__)" "<= StrictVersion('0.2.1')", )
Example #4
Source File: test_multiclass.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_ovr_multiclass(): # Toy dataset where features correspond directly to labels. X = np.array([[0, 0, 5], [0, 5, 0], [3, 0, 0], [0, 0, 6], [6, 0, 0]]) y = ["eggs", "spam", "ham", "eggs", "ham"] Y = np.array([[0, 0, 1], [0, 1, 0], [1, 0, 0], [0, 0, 1], [1, 0, 0]]) classes = set("ham eggs spam".split()) for base_clf in (MultinomialNB(), LinearSVC(random_state=0), LinearRegression(), Ridge(), ElasticNet()): clf = OneVsRestClassifier(base_clf).fit(X, y) assert_equal(set(clf.classes_), classes) y_pred = clf.predict(np.array([[0, 0, 4]]))[0] assert_array_equal(y_pred, ["eggs"]) # test input as label indicator matrix clf = OneVsRestClassifier(base_clf).fit(X, Y) y_pred = clf.predict([[0, 0, 4]])[0] assert_array_equal(y_pred, [0, 0, 1])
Example #5
Source File: test_base.py From scikit-multilearn with BSD 2-Clause "Simplified" License | 6 votes |
def test_model_selection_works(self): for x,y in self.get_multilabel_data_for_tests('dense'): parameters = { 'classifier': [LabelPowerset(), BinaryRelevance()], 'clusterer': [RandomLabelSpaceClusterer(None, None, False)], 'clusterer__cluster_size': list(range(2, 3)), 'clusterer__cluster_count': [3], 'clusterer__allow_overlap': [False], 'classifier__classifier': [MultinomialNB()], 'classifier__classifier__alpha': [0.7, 1.0], } clf = GridSearchCV(LabelSpacePartitioningClassifier(), parameters, scoring='f1_macro') clf.fit(x, y) for p in list(parameters.keys()): self.assertIn(p, clf.best_params_) self.assertIsNotNone(clf.best_score_)
Example #6
Source File: test_multiclass.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_ovr_fit_predict(): # A classifier which implements decision_function. ovr = OneVsRestClassifier(LinearSVC(random_state=0)) pred = ovr.fit(iris.data, iris.target).predict(iris.data) assert_equal(len(ovr.estimators_), n_classes) clf = LinearSVC(random_state=0) pred2 = clf.fit(iris.data, iris.target).predict(iris.data) assert_equal(np.mean(iris.target == pred), np.mean(iris.target == pred2)) # A classifier which implements predict_proba. ovr = OneVsRestClassifier(MultinomialNB()) pred = ovr.fit(iris.data, iris.target).predict(iris.data) assert_greater(np.mean(iris.target == pred), 0.65) # 0.23. warning about tol not having its correct default value.
Example #7
Source File: test_sklearn_calibrated_classifier_cv_converter.py From sklearn-onnx with MIT License | 6 votes |
def test_model_calibrated_classifier_cv_int(self): data = load_digits() X, y = data.data, data.target clf = MultinomialNB().fit(X, y) model = CalibratedClassifierCV(clf, cv=2, method="sigmoid").fit(X, y) model_onnx = convert_sklearn( model, "scikit-learn CalibratedClassifierCVMNB", [("input", Int64TensorType([None, X.shape[1]]))], target_opset=TARGET_OPSET ) self.assertTrue(model_onnx is not None) dump_data_and_model( X.astype(np.int64), model, model_onnx, basename="SklearnCalibratedClassifierCVInt-Dec4", allow_failure="StrictVersion(onnxruntime.__version__)" "<= StrictVersion('0.2.1')", )
Example #8
Source File: test_naive_bayes.py From sparkit-learn with Apache License 2.0 | 6 votes |
def test_same_prediction(self): X, y, Z = self.make_classification(4, 100000, nonnegative=True) local = MultinomialNB() dist = SparkMultinomialNB() y_local = local.fit(X, y).predict(X) y_dist = dist.fit(Z, classes=np.unique(y)).predict(Z[:, 'X']) y_converted = dist.to_scikit().predict(X) assert_true(check_rdd_dtype(y_dist, (np.ndarray,))) assert_array_almost_equal(y_local, y_dist.toarray()) assert_array_almost_equal(y_local, y_converted) y_proba_local = local.fit(X, y).predict_proba(X) y_proba_dist = dist.fit(Z, classes=np.unique(y)).predict_proba(Z[:, 'X']) y_proba_converted = dist.to_scikit().predict_proba(X) assert_true(check_rdd_dtype(y_dist, (np.ndarray,))) assert_array_almost_equal(y_proba_local, y_proba_dist.toarray(), 5) assert_array_almost_equal(y_proba_local, y_proba_converted, 5)
Example #9
Source File: test_grid_search.py From sparkit-learn with Apache License 2.0 | 6 votes |
def test_same_result(self): X, y, Z = self.make_classification(2, 40000, nonnegative=True) parameters = {'alpha': [0.1, 1, 10]} fit_params = {'classes': np.unique(y)} local_estimator = MultinomialNB() local_grid = GridSearchCV(estimator=local_estimator, param_grid=parameters) estimator = SparkMultinomialNB() grid = SparkGridSearchCV(estimator=estimator, param_grid=parameters, fit_params=fit_params) local_grid.fit(X, y) grid.fit(Z) locscores = [r.mean_validation_score for r in local_grid.grid_scores_] scores = [r.mean_validation_score for r in grid.grid_scores_] assert_array_almost_equal(locscores, scores, decimal=2)
Example #10
Source File: meta_des.py From DESlib with BSD 3-Clause "New" or "Revised" License | 6 votes |
def _fit_meta_classifier(self, X_meta, y_meta): """Train the meta-classifier :math:`\\lambda`, using the meta-training dataset. Parameters ---------- X_meta : array of shape = [n_meta_examples, n_meta_features] The meta-training examples. y_meta : array of shape = [n_meta_examples] Class labels of each example in X_test. 1 whether the base classifier made the correct prediction, otherwise 0. """ if isinstance(self.meta_classifier_, MultinomialNB): # Digitize the data (Same implementation we have on PRTools) X_meta = np.digitize(X_meta, np.linspace(0.1, 1, 10)) self.meta_classifier_.fit(X_meta, y_meta)
Example #11
Source File: ml.py From vector-homomorphic-encryption with MIT License | 6 votes |
def trainNB(trainX,trainY,testX,testY,samples,limit): start = time.clock() clf = MultinomialNB() clf.fit(trainX[:samples], trainY[:samples]) print time.clock()-start start = time.clock() predicted = clf.predict(trainX[0:samples]) print "percent Trained correct: ", percentCorrect(trainY[:samples],predicted) print "f-score: ", f1_score(trainY[:samples],predicted) metric = precision_recall_fscore_support(trainY[:samples],predicted) print "precision: ", metric[0] print "recall: ", metric[1] predicted = clf.predict(testX[0:limit]) print "percent Test correct: ", percentCorrect(testY[:limit],predicted) print "f-score: ", f1_score(testY[:limit],predicted) metric = precision_recall_fscore_support(testY[:limit],predicted) print "precision: ", metric[0] print "recall: ", metric[1] print time.clock()-start return clf
Example #12
Source File: test_multiclass.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_ovr_single_label_predict_proba(): base_clf = MultinomialNB(alpha=1) X, Y = iris.data, iris.target X_train, Y_train = X[:80], Y[:80] X_test = X[80:] clf = OneVsRestClassifier(base_clf).fit(X_train, Y_train) # Decision function only estimator. decision_only = OneVsRestClassifier(svm.SVR(gamma='scale') ).fit(X_train, Y_train) assert not hasattr(decision_only, 'predict_proba') Y_pred = clf.predict(X_test) Y_proba = clf.predict_proba(X_test) assert_almost_equal(Y_proba.sum(axis=1), 1.0) # predict assigns a label if the probability that the # sample has the label is greater than 0.5. pred = np.array([l.argmax() for l in Y_proba]) assert not (pred - Y_pred).any()
Example #13
Source File: test_naive_bayes.py From twitter-stock-recommendation with MIT License | 6 votes |
def test_discretenb_pickle(): # Test picklability of discrete naive Bayes classifiers for cls in [BernoulliNB, MultinomialNB, GaussianNB]: clf = cls().fit(X2, y2) y_pred = clf.predict(X2) store = BytesIO() pickle.dump(clf, store) clf = pickle.load(BytesIO(store.getvalue())) assert_array_equal(y_pred, clf.predict(X2)) if cls is not GaussianNB: # TODO re-enable me when partial_fit is implemented for GaussianNB # Test pickling of estimator trained with partial_fit clf2 = cls().partial_fit(X2[:3], y2[:3], classes=np.unique(y2)) clf2.partial_fit(X2[3:], y2[3:]) store = BytesIO() pickle.dump(clf2, store) clf2 = pickle.load(BytesIO(store.getvalue())) assert_array_equal(y_pred, clf2.predict(X2))
Example #14
Source File: test_sklearn_calibrated_classifier_cv_converter.py From sklearn-onnx with MIT License | 6 votes |
def test_model_calibrated_classifier_cv_float_nozipmap(self): data = load_iris() X, y = data.data, data.target clf = MultinomialNB().fit(X, y) model = CalibratedClassifierCV(clf, cv=2, method="sigmoid").fit(X, y) model_onnx = convert_sklearn( model, "scikit-learn CalibratedClassifierCVMNB", [("input", FloatTensorType([None, X.shape[1]]))], target_opset=TARGET_OPSET, options={id(model): {'zipmap': False}}) self.assertTrue(model_onnx is not None) dump_data_and_model( X.astype(np.float32), model, model_onnx, basename="SklearnCalibratedClassifierCVFloatNoZipMap", allow_failure="StrictVersion(onnxruntime.__version__)" "<= StrictVersion('0.2.1')")
Example #15
Source File: test_sklearn_calibrated_classifier_cv_converter.py From sklearn-onnx with MIT License | 6 votes |
def test_model_calibrated_classifier_cv_float(self): data = load_iris() X, y = data.data, data.target clf = MultinomialNB().fit(X, y) model = CalibratedClassifierCV(clf, cv=2, method="sigmoid").fit(X, y) model_onnx = convert_sklearn( model, "scikit-learn CalibratedClassifierCVMNB", [("input", FloatTensorType([None, X.shape[1]]))], target_opset=TARGET_OPSET ) self.assertTrue(model_onnx is not None) dump_data_and_model( X.astype(np.float32), model, model_onnx, basename="SklearnCalibratedClassifierCVFloat", allow_failure="StrictVersion(onnxruntime.__version__)" "<= StrictVersion('0.2.1')", )
Example #16
Source File: 04_sent.py From Building-Machine-Learning-Systems-With-Python-Second-Edition with MIT License | 6 votes |
def create_union_model(params=None): def preprocessor(tweet): tweet = tweet.lower() for k in emo_repl_order: tweet = tweet.replace(k, emo_repl[k]) for r, repl in re_repl.iteritems(): tweet = re.sub(r, repl, tweet) return tweet.replace("-", " ").replace("_", " ") tfidf_ngrams = TfidfVectorizer(preprocessor=preprocessor, analyzer="word") ling_stats = LinguisticVectorizer() all_features = FeatureUnion( [('ling', ling_stats), ('tfidf', tfidf_ngrams)]) #all_features = FeatureUnion([('tfidf', tfidf_ngrams)]) #all_features = FeatureUnion([('ling', ling_stats)]) clf = MultinomialNB() pipeline = Pipeline([('all', all_features), ('clf', clf)]) if params: pipeline.set_params(**params) return pipeline
Example #17
Source File: mp_train.py From atap with Apache License 2.0 | 6 votes |
def fit_naive_bayes(path, saveto=None, cv=12): model = Pipeline([ ('norm', TextNormalizer()), ('tfidf', TfidfVectorizer(tokenizer=identity, lowercase=False)), ('clf', MultinomialNB()) ]) if saveto is None: saveto = "naive_bayes_{}.pkl".format(time.time()) scores, delta = train_model(path, model, saveto, cv) logger.info(( "naive bayes training took {:0.2f} seconds " "with an average score of {:0.3f}" ).format(delta, scores.mean()))
Example #18
Source File: multinomial_nb_clf.py From 2020plus with Apache License 2.0 | 6 votes |
def __init__(self, df, weight=True, min_ct=0, total_iter=5): self.logger = logging.getLogger(__name__) super(MultinomialNaiveBayes, self).__init__(total_iterations=total_iter) # call base constructor #self.set_min_count(min_ct) self.is_weighted_sample = weight # process data #df = self._filter_rows(df) # filter out low count rows # row_sums = df.sum(axis=1).astype(float) # df = df.div(row_sums, axis=0) # normalize each row # df = df.mul(100) # df.to_csv('tmp.nbclf.txt', sep='\t') df = df.fillna(df.mean()) total = df['total'] df = df[['recurrent missense', 'recurrent indel', 'frame shift', 'nonsense', 'missense', 'synonymous', 'inframe indel', 'no protein', 'lost stop', 'splicing mutation']] df = df.mul(total, axis=0).astype(int) # get back counts instead of pct self.x, self.y = features.randomize(df) # setup classifier self.clf = MultinomialNB(alpha=1, # laplacian smooth, i.e. pseudocounts fit_prior=True) # use data for prior class probs
Example #19
Source File: test_naive_bayes.py From twitter-stock-recommendation with MIT License | 6 votes |
def test_discretenb_provide_prior_with_partial_fit(): # Test whether discrete NB classes use provided prior # when using partial_fit iris = load_iris() iris_data1, iris_data2, iris_target1, iris_target2 = train_test_split( iris.data, iris.target, test_size=0.4, random_state=415) for cls in [BernoulliNB, MultinomialNB]: for prior in [None, [0.3, 0.3, 0.4]]: clf_full = cls(class_prior=prior) clf_full.fit(iris.data, iris.target) clf_partial = cls(class_prior=prior) clf_partial.partial_fit(iris_data1, iris_target1, classes=[0, 1, 2]) clf_partial.partial_fit(iris_data2, iris_target2) assert_array_almost_equal(clf_full.class_log_prior_, clf_partial.class_log_prior_)
Example #20
Source File: hybrid_nb.py From Jacinle with MIT License | 6 votes |
def __init__(self, distributions, weights=None, **kwargs): self.models = [] for dist in distributions: dist = NaiveBayesianDistribution.from_string(dist) if dist is NaiveBayesianDistribution.GAUSSIAN: model = nb.GaussianNB(**kwargs) elif dist is NaiveBayesianDistribution.MULTINOMIAL: model = nb.MultinomialNB(**kwargs) elif dist is NaiveBayesianDistribution.BERNOULLI: model = nb.BernoulliNB(**kwargs) else: raise ValueError('Unknown distribution: {}.'.format(dist)) kwargs['fit_prior'] = False # Except the first model. self.models.append(model) self.weights = weights
Example #21
Source File: test_naive_bayes.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_discretenb_uniform_prior(): # Test whether discrete NB classes fit a uniform prior # when fit_prior=False and class_prior=None for cls in [BernoulliNB, MultinomialNB]: clf = cls() clf.set_params(fit_prior=False) clf.fit([[0], [0], [1]], [0, 0, 1]) prior = np.exp(clf.class_log_prior_) assert_array_equal(prior, np.array([.5, .5]))
Example #22
Source File: text_classification.py From intro_ds with Apache License 2.0 | 5 votes |
def trainMultinomialNB(data): """ 使用多项式模型对数据进行建模 """ pipe = Pipeline([("vect", CountVectorizer(token_pattern=r"(?u)\b\w+\b")), ("model", MultinomialNB())]) le = LabelEncoder() Y = le.fit_transform(data["label"]) pipe.fit(data["content"], Y) return le, pipe
Example #23
Source File: multinomial_nb.py From lale with Apache License 2.0 | 5 votes |
def __init__(self, alpha=1.0, fit_prior=True, class_prior=None): self._hyperparams = { 'alpha': alpha, 'fit_prior': fit_prior, 'class_prior': class_prior} self._wrapped_model = Op(**self._hyperparams)
Example #24
Source File: recipe_classification.py From Flavor-Network with GNU General Public License v3.0 | 5 votes |
def nb_test(X,y): X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=1) model = MultinomialNB() model.fit(X_train, y_train) y_pred = model.predict(X_test) print metrics.accuracy_score(y_test,y_pred)
Example #25
Source File: nb.py From asreview with Apache License 2.0 | 5 votes |
def __init__(self, alpha=3.822): """Initialize the SKLearn Naive Bayes model. Arguments: ---------- alpha: float Parameter to set the regularization strength of the model. """ super(NBModel, self).__init__() self.alpha = alpha self._model = MultinomialNB(alpha=alpha) logging.debug(self._model)
Example #26
Source File: naivebayesclassifier.py From SQG with GNU General Public License v3.0 | 5 votes |
def __init__(self, model_file_path=None): super(NaiveBayesClassifier, self).__init__(model_file_path) self.pipeline = Pipeline( [('vect', CountVectorizer()), ('tf-idf', TfidfTransformer()), ('naive-bayes', MultinomialNB())]) self.parameters = {'vect__ngram_range': [(1, 1), (1, 2)], 'tf-idf__use_idf': (True, False), 'naive-bayes__alpha': (1e-2, 1e-3)}
Example #27
Source File: classifier_train.py From pygameweb with BSD 2-Clause "Simplified" License | 5 votes |
def make_classifier(): pipeline = Pipeline([ ('count_vectorizer', CountVectorizer(ngram_range=(1, 2))), ('classifier', MultinomialNB()) ]) return pipeline
Example #28
Source File: bayes.py From opentc with MIT License | 5 votes |
def fit(self, dataset, filename): self.logger.debug("fit") self.clf = Pipeline([('vect', CountVectorizer()), ('tfidf', TfidfTransformer()), ('clf', MultinomialNB()) ]) self.clf.fit(dataset.get_dataset()['data'], dataset.get_dataset()['target']) joblib.dump(self.clf, filename + ".pkl", compress=9)
Example #29
Source File: main.py From Python-DevOps with MIT License | 5 votes |
def train_bayes(corpus,tokenizing=True,cleaning=True,normalizing=True,stem=True,vector='tfidf',split=0.2): multinomial,labels,vectorize = None, None, None if vector.lower().find('tfidf') < 0 and vector.lower().find('bow'): raise Exception('Invalid vectorization technique') if isinstance(corpus, str): trainset = sklearn.datasets.load_files(container_path = corpus, encoding = 'UTF-8') trainset.data, trainset.target = separate_dataset(trainset) data, target = trainset.data, trainset.target labels = trainset.target_names if isinstance(corpus, list) or isinstance(corpus, tuple): corpus = np.array(corpus) data, target = corpus[:,0].tolist(),corpus[:,1].tolist() labels = np.unique(target).tolist() target = LabelEncoder().fit_transform(target) c = list(zip(data, target)) random.shuffle(c) data, target = zip(*c) data, target = list(data), list(target) if stem: for i in range(len(data)): data[i] = ' '.join([stemming(k) for k in data[i].split()]) if cleaning: for i in range(len(data)): data[i] = clearstring(data[i],tokenizing) if vector.lower().find('tfidf') >= 0: vectorize = TfidfVectorizer().fit(data) vectors = vectorize.transform(data) else: vectorize = CountVectorizer().fit(data) vectors = vectorize.transform(data) multinomial = MultinomialNB() if split: train_X, test_X, train_Y, test_Y = train_test_split(vectors, target, test_size = split) multinomial.partial_fit(train_X, train_Y,classes=np.unique(target)) predicted = multinomial.predict(test_X) print(metrics.classification_report(test_Y, predicted, target_names = labels)) else: multinomial.partial_fit(vectors,target,classes=np.unique(target)) predicted = multinomial.predict(vectors) print(metrics.classification_report(target, predicted, target_names = labels)) return USER_BAYES(multinomial,labels,vectorize)
Example #30
Source File: simulation_exp4p.py From striatum with BSD 2-Clause "Simplified" License | 5 votes |
def train_expert(history_context, history_action): n_round = len(history_context) history_context = np.array([history_context[t] for t in range(n_round)]) history_action = np.array([history_action[t] for t in range(n_round)]) logreg = OneVsRestClassifier(LogisticRegression()) mnb = OneVsRestClassifier(MultinomialNB()) logreg.fit(history_context, history_action) mnb.fit(history_context, history_action) return [logreg, mnb]