Python sklearn.linear_model.SGDClassifier() Examples

The following are 30 code examples of sklearn.linear_model.SGDClassifier(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.linear_model , or try the search function .
Example #1
Source File: _validateSchema.py    From nyoka with Apache License 2.0 7 votes vote down vote up
def test_validate_sklearn_sgd_with_text_cv(self):
        categories = ['alt.atheism','talk.religion.misc']
        data = fetch_20newsgroups(subset='train', categories=categories)
        X = data.data[:4]
        Y = data.target[:4]
        features = ['input']
        target = 'output'
        model = SGDClassifier(loss="log")
        file_name = model.__class__.__name__ + '_CountVec_.pmml'
        pipeline = Pipeline([
            ('vect', CountVectorizer()),
            ('clf', model)
        ])
        pipeline.fit(X, Y)
        skl_to_pmml(pipeline, features , target, file_name)
        self.assertEqual(self.schema.is_valid(file_name), True) 
Example #2
Source File: _validateSchema.py    From nyoka with Apache License 2.0 6 votes vote down vote up
def test_validate_sklearn_sgd_with_text(self):
        categories = ['alt.atheism','talk.religion.misc']
        data = fetch_20newsgroups(subset='train', categories=categories)
        X = data.data[:4]
        Y = data.target[:4]
        features = ['input']
        target = 'output'
        model = SGDClassifier(loss="log")
        file_name = model.__class__.__name__ + '_TfIdfVec_.pmml'
        pipeline = Pipeline([
            ('vect', TfidfVectorizer()),
            ('clf', model)
        ])
        pipeline.fit(X, Y)
        skl_to_pmml(pipeline, features , target, file_name)
        self.assertEqual(self.schema.is_valid(file_name), True) 
Example #3
Source File: test_multioutput.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_multi_output_classification_partial_fit_parallelism():
    sgd_linear_clf = SGDClassifier(loss='log', random_state=1, max_iter=5)
    mor = MultiOutputClassifier(sgd_linear_clf, n_jobs=4)
    mor.partial_fit(X, y, classes)
    est1 = mor.estimators_[0]
    mor.partial_fit(X, y)
    est2 = mor.estimators_[0]
    if cpu_count() > 1:
        # parallelism requires this to be the case for a sane implementation
        assert est1 is not est2


# check predict_proba passes 
Example #4
Source File: test_sgd.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_sgd_predict_proba_method_access(klass):
    # Checks that SGDClassifier predict_proba and predict_log_proba methods
    # can either be accessed or raise an appropriate error message
    # otherwise. See
    # https://github.com/scikit-learn/scikit-learn/issues/10938 for more
    # details.
    for loss in linear_model.SGDClassifier.loss_functions:
        clf = SGDClassifier(loss=loss)
        if loss in ('log', 'modified_huber'):
            assert hasattr(clf, 'predict_proba')
            assert hasattr(clf, 'predict_log_proba')
        else:
            message = ("probability estimates are not "
                       "available for loss={!r}".format(loss))
            assert not hasattr(clf, 'predict_proba')
            assert not hasattr(clf, 'predict_log_proba')
            with pytest.raises(AttributeError,
                               match=message):
                clf.predict_proba
            with pytest.raises(AttributeError,
                               match=message):
                clf.predict_log_proba 
Example #5
Source File: test_sgd.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_l1_ratio():
    # Test if l1 ratio extremes match L1 and L2 penalty settings.
    X, y = datasets.make_classification(n_samples=1000,
                                        n_features=100, n_informative=20,
                                        random_state=1234)

    # test if elasticnet with l1_ratio near 1 gives same result as pure l1
    est_en = SGDClassifier(alpha=0.001, penalty='elasticnet', tol=None,
                           max_iter=6, l1_ratio=0.9999999999,
                           random_state=42).fit(X, y)
    est_l1 = SGDClassifier(alpha=0.001, penalty='l1', max_iter=6,
                           random_state=42, tol=None).fit(X, y)
    assert_array_almost_equal(est_en.coef_, est_l1.coef_)

    # test if elasticnet with l1_ratio near 0 gives same result as pure l2
    est_en = SGDClassifier(alpha=0.001, penalty='elasticnet', tol=None,
                           max_iter=6, l1_ratio=0.0000000001,
                           random_state=42).fit(X, y)
    est_l2 = SGDClassifier(alpha=0.001, penalty='l2', max_iter=6,
                           random_state=42, tol=None).fit(X, y)
    assert_array_almost_equal(est_en.coef_, est_l2.coef_) 
Example #6
Source File: test_from_model.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_prefit():
    # Test all possible combinations of the prefit parameter.

    # Passing a prefit parameter with the selected model
    # and fitting a unfit model with prefit=False should give same results.
    clf = SGDClassifier(alpha=0.1, max_iter=10, shuffle=True,
                        random_state=0, tol=None)
    model = SelectFromModel(clf)
    model.fit(data, y)
    X_transform = model.transform(data)
    clf.fit(data, y)
    model = SelectFromModel(clf, prefit=True)
    assert_array_almost_equal(model.transform(data), X_transform)

    # Check that the model is rewritten if prefit=False and a fitted model is
    # passed
    model = SelectFromModel(clf, prefit=False)
    model.fit(data, y)
    assert_array_almost_equal(model.transform(data), X_transform)

    # Check that prefit=True and calling fit raises a ValueError
    model = SelectFromModel(clf, prefit=True)
    assert_raises(ValueError, model.fit, data, y) 
Example #7
Source File: SGDClassifier.py    From Splunking-Crime with GNU Affero General Public License v3.0 6 votes vote down vote up
def __init__(self, options):
        self.handle_options(options)

        out_params = convert_params(
            options.get('params', {}),
            bools=['fit_intercept'],
            ints=['random_state', 'n_iter'],
            floats=['l1_ratio', 'alpha', 'eta0', 'power_t'],
            strs=['loss', 'penalty', 'learning_rate'],
        )

        if 'loss' in out_params:
            try:
                assert (out_params['loss'] in ['hinge', 'log', 'modified_huber', 'squared_hinge', 'perceptron'])
            except AssertionError:
                raise RuntimeError(
                    'Value for parameter "loss" has to be one of "hinge", "log", "modified_huber", "squared_hinge", or "perceptron"')

        self.scaler = StandardScaler()
        self.estimator = _SGDClassifier(**out_params) 
Example #8
Source File: linear_trainers.py    From ReAgent with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def train(self, data: TrainingData, iterations: int = 1, num_samples: int = 0):
        logging.info("SGDClassifierTrainer.train...")
        self._model = None
        best_score = float("-inf")
        for _ in range(iterations):
            x, y, _ = super()._sample(
                data.train_x, data.train_y, data.train_weight, num_samples, True
            )
            sx, sy, ssw = super()._sample(
                data.validation_x, data.validation_y, data.validation_weight
            )
            for alpha in np.logspace(-8, -1, num=8, base=10):
                model = SGDClassifier(
                    loss=self._loss,
                    alpha=alpha,
                    random_state=0,
                    max_iter=self._max_iter,
                )
                model.fit(x, y)
                score = model.score(sx, sy, ssw)
                logging.info(f"  alpha: {alpha}, score: {score}")
                if score > best_score:
                    best_score = score
                    self._model = model 
Example #9
Source File: ensemble_glm.py    From jh-kaggle-util with Apache License 2.0 6 votes vote down vote up
def fit_ensemble(x,y):
    fit_type = jhkaggle.jhkaggle_config['FIT_TYPE']
    if 1:
        if fit_type == jhkaggle.const.FIT_TYPE_BINARY_CLASSIFICATION:
            blend = SGDClassifier(loss="log", penalty="elasticnet")  # LogisticRegression()
        else:
            # blend = SGDRegressor()
            #blend = LinearRegression()
            #blend = RandomForestRegressor(n_estimators=10, n_jobs=-1, max_depth=5, criterion='mae')
            blend = LassoLarsCV(normalize=True)
            #blend = ElasticNetCV(normalize=True)
            #blend = LinearRegression(normalize=True)
        blend.fit(x, y)
    else:
        blend = LogisticRegression()
        blend.fit(x, y)


    return blend 
Example #10
Source File: test_multioutput.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_multi_output_predict_proba():
    sgd_linear_clf = SGDClassifier(random_state=1, max_iter=5, tol=1e-3)
    param = {'loss': ('hinge', 'log', 'modified_huber')}

    # inner function for custom scoring
    def custom_scorer(estimator, X, y):
        if hasattr(estimator, "predict_proba"):
            return 1.0
        else:
            return 0.0
    grid_clf = GridSearchCV(sgd_linear_clf, param_grid=param,
                            scoring=custom_scorer, cv=3, error_score=np.nan)
    multi_target_linear = MultiOutputClassifier(grid_clf)
    multi_target_linear.fit(X, y)

    multi_target_linear.predict_proba(X)

    # SGDClassifier defaults to loss='hinge' which is not a probabilistic
    # loss function; therefore it does not expose a predict_proba method
    sgd_linear_clf = SGDClassifier(random_state=1, max_iter=5, tol=1e-3)
    multi_target_linear = MultiOutputClassifier(sgd_linear_clf)
    multi_target_linear.fit(X, y)
    err_msg = "The base estimator should implement predict_proba method"
    with pytest.raises(ValueError, match=err_msg):
        multi_target_linear.predict_proba(X)


# 0.23. warning about tol not having its correct default value. 
Example #11
Source File: test_multioutput.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_multi_output_classification_partial_fit_sample_weights():
    # weighted classifier
    Xw = [[1, 2, 3], [4, 5, 6], [1.5, 2.5, 3.5]]
    yw = [[3, 2], [2, 3], [3, 2]]
    w = np.asarray([2., 1., 1.])
    sgd_linear_clf = SGDClassifier(random_state=1, max_iter=20)
    clf_w = MultiOutputClassifier(sgd_linear_clf)
    clf_w.fit(Xw, yw, w)

    # unweighted, but with repeated samples
    X = [[1, 2, 3], [1, 2, 3], [4, 5, 6], [1.5, 2.5, 3.5]]
    y = [[3, 2], [3, 2], [2, 3], [3, 2]]
    sgd_linear_clf = SGDClassifier(random_state=1, max_iter=20)
    clf = MultiOutputClassifier(sgd_linear_clf)
    clf.fit(X, y)
    X_test = [[1.5, 2.5, 3.5]]
    assert_array_almost_equal(clf.predict(X_test), clf_w.predict(X_test)) 
Example #12
Source File: _test_file_stream_multiple_cfier.py    From scikit-multiflow with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def demo():

    # The classifier we will use (other options: SAMKNNClassifier, LeveragingBaggingClassifier, SGD)
    h1 = [HoeffdingTreeClassifier(), SAMKNNClassifier(), LeveragingBaggingClassifier(random_state=1), SGDClassifier()]
    h2 = [HoeffdingTreeClassifier(), SAMKNNClassifier(), LeveragingBaggingClassifier(random_state=1), SGDClassifier()]
    h3 = [HoeffdingTreeClassifier(), SAMKNNClassifier(), LeveragingBaggingClassifier(random_state=1), SGDClassifier()]
    model_names = ['HT', 'SAMKNNClassifier', 'LBkNN', 'SGDC']

    # Demo 1 -- plot should not fail
    demo_parameterized(h1, model_names=model_names)

    # Demo 2 -- csv output should look nice
    demo_parameterized(h2, "sea_stream.csv", False, model_names)

    # Demo 3 -- should not give "'NoneType' object is not iterable" error
    demo_parameterized(h3, "covtype.csv", False, model_names) 
Example #13
Source File: test_robust_weighted_estimator.py    From scikit-learn-extra with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_predict_proba(weighting):
    clf = RobustWeightedEstimator(
        SGDClassifier(loss="log"),
        loss="log",
        max_iter=100,
        weighting=weighting,
        k=0,
        c=1e7,
        burn_in=0,
        random_state=rng,
    )
    clf_not_rob = SGDClassifier(loss="log", random_state=rng)
    clf.fit(X_c, y_c)
    clf_not_rob.fit(X_c, y_c)
    pred1 = clf.base_estimator_.predict_proba(X_c)[:, 1]
    pred2 = clf_not_rob.predict_proba(X_c)[:, 1]

    assert (
        np.linalg.norm(pred1 - pred2) / np.linalg.norm(pred2)
        - np.linalg.norm(pred1 - y_c) / np.linalg.norm(y_c)
        < 0.1
    )


# Regression test with outliers 
Example #14
Source File: test_robust_weighted_estimator.py    From scikit-learn-extra with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_not_robust_classif(loss, weighting):
    clf = RobustWeightedEstimator(
        SGDClassifier(),
        loss=loss,
        max_iter=100,
        weighting=weighting,
        k=0,
        c=1e7,
        burn_in=0,
        random_state=rng,
    )
    clf_not_rob = SGDClassifier(loss=loss, random_state=rng)
    clf.fit(X_c, y_c)
    clf_not_rob.fit(X_c, y_c)
    pred1 = clf.base_estimator_.decision_function(X_c)
    pred2 = clf_not_rob.decision_function(X_c)

    assert (
        np.linalg.norm(pred1 - pred2) / np.linalg.norm(pred2)
        - np.linalg.norm(pred1 - y_c) / np.linalg.norm(y_c)
        < 0.1
    )


# Case "log" loss, test predict_proba 
Example #15
Source File: test_robust_weighted_estimator.py    From scikit-learn-extra with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_corrupted_classif(loss, weighting):
    clf = RobustWeightedEstimator(
        SGDClassifier(),
        loss=loss,
        max_iter=50,
        weighting=weighting,
        k=5,
        c=None,
        random_state=rng,
    )
    clf.fit(X_cc, y_cc)
    score = clf.score(X_cc, y_cc)
    assert score > 0.75


# Classification test without outliers 
Example #16
Source File: test_logistic.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_elastic_net_versus_sgd(C, l1_ratio):
    # Compare elasticnet penalty in LogisticRegression() and SGD(loss='log')
    n_samples = 500
    X, y = make_classification(n_samples=n_samples, n_classes=2, n_features=5,
                               n_informative=5, n_redundant=0, n_repeated=0,
                               random_state=1)
    X = scale(X)

    sgd = SGDClassifier(
        penalty='elasticnet', random_state=1, fit_intercept=False, tol=-np.inf,
        max_iter=2000, l1_ratio=l1_ratio, alpha=1. / C / n_samples, loss='log')
    log = LogisticRegression(
        penalty='elasticnet', random_state=1, fit_intercept=False, tol=1e-5,
        max_iter=1000, l1_ratio=l1_ratio, C=C, solver='saga')

    sgd.fit(X, y)
    log.fit(X, y)
    assert_array_almost_equal(sgd.coef_, log.coef_, decimal=1) 
Example #17
Source File: text_classifier.py    From textar with MIT License 6 votes vote down vote up
def make_classifier(self, name, ids, labels):
        """Entrenar un clasificador SVM sobre los textos cargados.

        Crea un clasificador que se guarda en el objeto bajo el nombre `name`.

        Args:
            name (str): Nombre para el clasidicador.
            ids (list): Se espera una lista de N ids de textos ya almacenados
                en el TextClassifier.
            labels (list): Se espera una lista de N etiquetas. Una por cada id
                de texto presente en ids.
        Nota:
            Usa el clasificador de `Scikit-learn <http://scikit-learn.org/>`_
        """
        if not all(np.in1d(ids, self.ids)):
            raise ValueError("Hay ids de textos que no se encuentran \
                              almacenados.")
        setattr(self, name, SGDClassifier())
        classifier = getattr(self, name)
        indices = np.searchsorted(self.ids, ids)
        classifier.fit(self.tfidf_mat[indices, :], labels) 
Example #18
Source File: language_detector.py    From text-mining-class with MIT License 6 votes vote down vote up
def build_language_classifier(texts, labels, verbose=False, random_state=None):
    """Train a text classifier with scikit-learn

    The text classifier is composed of two elements assembled in a pipeline:

    - A text feature extractor (`TfidfVectorizer`) that extract the relative
      frequencies of unigrams, bigrams and trigrams of characters in the text.

    - An instance of `SGDClassifier` for the classification it-self. To speed
      up training it is recommended to enable early stopping.

    `random_state` is passed to the underlying `SGDClassifier` instance.
    """
    language_classifier = make_pipeline(
        TfidfVectorizer(analyzer="char", ngram_range=(1, 3),
                        min_df=2, max_df=0.9, norm="l2", dtype=np.float32),
        SGDClassifier(early_stopping=True, validation_fraction=0.2,
                      n_iter_no_change=3, max_iter=1000, tol=1e-3,
                      alpha=1e-5, penalty="l2", verbose=verbose,
                      random_state=random_state)
    )
    return language_classifier.fit(texts, labels) 
Example #19
Source File: testScoreWithAdapaSklearn.py    From nyoka with Apache License 2.0 6 votes vote down vote up
def test_09_sgd_classifier(self):
        print("\ntest 09 (SGD Classifier with preprocessing) [multi-class]\n")
        X, X_test, y, features, target, test_file = self.data_utility.get_data_for_multi_class_classification()

        model = SGDClassifier(loss="log")
        pipeline_obj = Pipeline([
            ("scaler", StandardScaler()),
            ("model", model)
        ])
        pipeline_obj.fit(X,y)
        file_name = 'test09sklearn.pmml'
        
        skl_to_pmml(pipeline_obj, features, target, file_name)
        model_name  = self.adapa_utility.upload_to_zserver(file_name)
        predictions, probabilities = self.adapa_utility.score_in_zserver(model_name, test_file)
        model_pred = pipeline_obj.predict(X_test)
        model_prob = pipeline_obj.predict_proba(X_test)
        self.assertEqual(self.adapa_utility.compare_predictions(predictions, model_pred), True)
        self.assertEqual(self.adapa_utility.compare_probability(probabilities, model_prob), True) 
Example #20
Source File: test_incremental.py    From dask-ml with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_incremental_text_pipeline(container):
    X = pd.Series(["a list", "of words", "for classification"] * 100)
    X = dd.from_pandas(X, npartitions=3)

    if container == "bag":
        X = X.to_bag()

    y = da.from_array(np.array([0, 0, 1] * 100), chunks=(100,) * 3)

    assert tuple(X.map_partitions(len).compute()) == y.chunks[0]

    sgd = SGDClassifier(max_iter=5, tol=1e-3)
    clf = Incremental(sgd, scoring="accuracy", assume_equal_chunks=True)
    vect = dask_ml.feature_extraction.text.HashingVectorizer()
    pipe = make_pipeline(vect, clf)

    pipe.fit(X, y, incremental__classes=[0, 1])
    X2 = pipe.steps[0][1].transform(X)
    assert hasattr(clf, "coef_")

    X2.compute_chunk_sizes()
    assert X2.shape == (300, vect.n_features) 
Example #21
Source File: testScoreWithAdapaSklearn.py    From nyoka with Apache License 2.0 6 votes vote down vote up
def test_10_sgd_classifier(self):
        print("\ntest 10 (SGD Classifier with preprocessing) [binary-class]\n")
        X, X_test, y, features, target, test_file = self.data_utility.get_data_for_binary_classification()

        model = SGDClassifier(loss="log")
        pipeline_obj = Pipeline([
            ("scaler", StandardScaler()),
            ("model", model)
        ])
        pipeline_obj.fit(X,y)
        file_name = 'test10sklearn.pmml'
        
        skl_to_pmml(pipeline_obj, features, target, file_name)
        model_name  = self.adapa_utility.upload_to_zserver(file_name)
        predictions, probabilities = self.adapa_utility.score_in_zserver(model_name, test_file)
        model_pred = pipeline_obj.predict(X_test)
        model_prob = pipeline_obj.predict_proba(X_test)
        self.assertEqual(self.adapa_utility.compare_predictions(predictions, model_pred), True)
        self.assertEqual(self.adapa_utility.compare_probability(probabilities, model_prob), True) 
Example #22
Source File: svm.py    From opentc with MIT License 5 votes vote down vote up
def fit(self, dataset, filename):
        self.logger.debug("fit")
        self.clf = Pipeline([('vect', CountVectorizer()),
                             ('tfidf', TfidfTransformer()),
                             ('clf', SGDClassifier(loss='log', penalty='l2', alpha=1e-3, n_iter=5, random_state=42)),
                             ])
        self.clf.fit(dataset.get_dataset()['data'], dataset.get_dataset()['target'])
        joblib.dump(self.clf, filename + ".pkl", compress=9) 
Example #23
Source File: test_incremental.py    From dask-ml with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_in_gridsearch(scheduler, xy_classification):
    X, y = xy_classification
    clf = Incremental(SGDClassifier(random_state=0, tol=1e-3))
    param_grid = {"estimator__alpha": [0.1, 10]}
    gs = sklearn.model_selection.GridSearchCV(clf, param_grid, cv=3)

    with scheduler() as (s, [a, b]):
        gs.fit(X, y, classes=[0, 1]) 
Example #24
Source File: Classifier.py    From tatk with Apache License 2.0 5 votes vote down vote up
def train(self, X, y):
        model = SGDClassifier(loss="log", penalty="l2")
        model.probability=True
        model.fit(X,y)
        self.model = model 
Example #25
Source File: test_incremental.py    From dask-ml with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_fit_ndarrays():
    X = np.ones((10, 5))
    y = np.concatenate([np.zeros(5), np.ones(5)])

    sgd = SGDClassifier(tol=1e-3)
    inc = Incremental(sgd)

    inc.partial_fit(X, y, classes=[0, 1])
    sgd.fit(X, y)

    assert inc.estimator is sgd
    assert_eq(inc.coef_, inc.estimator_.coef_) 
Example #26
Source File: run.py    From LeadQualifier with MIT License 5 votes vote down vote up
def runSGD(X_train, y_train):
    sgd = SGDClassifier(n_iter=500, loss='modified_huber', penalty='elasticnet', random_state=42)
    sgd.fit(X_train, y_train)
    return sgd 
Example #27
Source File: model.py    From polyaxon-examples with Apache License 2.0 5 votes vote down vote up
def model(X, y, loss, penalty, l1_ratio, max_iter, tol):
    classifier = SGDClassifier(
        loss=loss,
        penalty=penalty,
        l1_ratio=l1_ratio,
        max_iter=max_iter,
        tol=tol,
    )
    return cross_val_score(classifier, X, y, cv=5) 
Example #28
Source File: model.py    From polyaxon-examples with Apache License 2.0 5 votes vote down vote up
def model(X, y, loss, penalty, l1_ratio, max_iter, tol):
    classifier = SGDClassifier(
        loss=loss,
        penalty=penalty,
        l1_ratio=l1_ratio,
        max_iter=max_iter,
        tol=tol,
    )
    return cross_val_score(classifier, X, y, cv=5) 
Example #29
Source File: sgd_separator.py    From sklearn_pydata2015 with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def plot_sgd_separator():
    # we create 50 separable points
    X, Y = make_blobs(n_samples=50, centers=2,
                      random_state=0, cluster_std=0.60)

    # fit the model
    clf = SGDClassifier(loss="hinge", alpha=0.01,
                        n_iter=200, fit_intercept=True)
    clf.fit(X, Y)

    # plot the line, the points, and the nearest vectors to the plane
    xx = np.linspace(-1, 5, 10)
    yy = np.linspace(-1, 5, 10)

    X1, X2 = np.meshgrid(xx, yy)
    Z = np.empty(X1.shape)
    for (i, j), val in np.ndenumerate(X1):
        x1 = val
        x2 = X2[i, j]
        p = clf.decision_function([x1, x2])
        Z[i, j] = p[0]
    levels = [-1.0, 0.0, 1.0]
    linestyles = ['dashed', 'solid', 'dashed']
    colors = 'k'

    ax = plt.axes()
    ax.contour(X1, X2, Z, levels, colors=colors, linestyles=linestyles)
    ax.scatter(X[:, 0], X[:, 1], c=Y, cmap=plt.cm.Paired)

    ax.axis('tight') 
Example #30
Source File: sentiment_analysis_ml.py    From Sentiment_Analysis_cnn_lstm_cnnlstm_textcnn_bilstm with Apache License 2.0 5 votes vote down vote up
def SGD_classifier(train_vecs,y_train,test_vecs,y_test):
    clf = SGDClassifier(alpha=0.001, max_iter=100)
    clf.fit(train_vecs,y_train)
    joblib.dump(clf,storedpaths+'model_sgd.pkl')
    test_scores=clf.score(test_vecs,y_test)
    return test_scores
    
# 训练多层感知机分类算法