Python sklearn.base.is_classifier() Examples
The following are 19
code examples of sklearn.base.is_classifier().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.base
, or try the search function
.
Example #1
Source File: pipline.py From MachineLearning with Apache License 2.0 | 6 votes |
def transform(self, X, y=None): cv = check_cv(self.cv, y, classifier=is_classifier(self.estimator)) X_prob = np.zeros((X.shape[0], self.n_classes)) X_pred = np.zeros(X.shape[0]) for estimator, (_, test) in zip(self.estimators_, cv.split(X)): X_prob[test] = estimator.predict_proba(X[test]) X_pred[test] = estimator.predict(X[test]) return np.hstack([X_prob, np.array([X_pred]).T])
Example #2
Source File: grid_search_cv.py From sklearn-onnx with MIT License | 6 votes |
def convert_sklearn_grid_search_cv(scope, operator, container): """ Converter for scikit-learn's GridSearchCV. """ opts = scope.get_options(operator.raw_operator) grid_search_op = operator.raw_operator best_estimator = grid_search_op.best_estimator_ op_type = sklearn_operator_name_map[type(best_estimator)] grid_search_operator = scope.declare_local_operator(op_type) grid_search_operator.raw_operator = best_estimator container.add_options(id(best_estimator), opts) grid_search_operator.inputs = operator.inputs label_name = scope.declare_local_variable('label') grid_search_operator.outputs.append(label_name) if is_classifier(best_estimator): proba_name = scope.declare_local_variable('probability_tensor', FloatTensorType()) grid_search_operator.outputs.append(proba_name) apply_identity(scope, label_name.full_name, operator.outputs[0].full_name, container) if is_classifier(best_estimator): apply_identity(scope, proba_name.full_name, operator.outputs[1].full_name, container)
Example #3
Source File: externals.py From sports-betting with MIT License | 6 votes |
def fit(self, X, y, sample_weight=None): """Fit a separate classifier for each output variable.""" for _, clf in self.classifiers: if not hasattr(clf, 'fit'): raise ValueError('Every base classifier should implement a fit method.') X, y = check_X_y(X, y, multi_output=True, accept_sparse=True) if is_classifier(self): check_classification_targets(y) if y.ndim == 1: raise ValueError('Output y must have at least two dimensions for multi-output classification but has only one.') if sample_weight is not None and any([not has_fit_parameter(clf, 'sample_weight') for _, clf in self.classifiers]): raise ValueError('One of base classifiers does not support sample weights.') self.classifiers_ = Parallel(n_jobs=self.n_jobs)(delayed(_fit_estimator)(clf, X, y[:, i], sample_weight) for i, (_, clf) in zip(range(y.shape[1]), self.classifiers)) return self
Example #4
Source File: cart.py From Hands-on-Supervised-Machine-Learning-with-Python with MIT License | 6 votes |
def __init__(self, X, y, criterion, min_samples_split, max_depth, n_val_sample, random_state): # make sure max_depth > 1 if max_depth < 2: raise ValueError("max depth must be > 1") # check the input arrays, and if it's classification validate the # target values in y X, y = check_X_y(X, y, accept_sparse=False, dtype=None, copy=True) if is_classifier(self): check_classification_targets(y) # hyper parameters so we can later inspect attributes of the model self.min_samples_split = min_samples_split self.max_depth = max_depth self.n_val_sample = n_val_sample self.random_state = random_state # create the splitting class random_state = check_random_state(random_state) self.splitter = RandomSplitter(random_state, criterion, n_val_sample) # grow the tree depth first self.tree = self._find_next_split(X, y, 0)
Example #5
Source File: __init__.py From oddt with BSD 3-Clause "New" or "Revised" License | 6 votes |
def __init__(self, models): """Proxy class to build an ensemble of models with an API as one Parameters ---------- models: array An array of models """ self._models = models if len(models) else None if self._models is not None: if is_classifier(self._models[0]): check_type = is_classifier self._scoring_fun = accuracy_score elif is_regressor(self._models[0]): check_type = is_regressor self._scoring_fun = r2_score else: raise ValueError('Expected regressors or classifiers,' ' got %s instead' % type(self._models[0])) for model in self._models: if not check_type(model): raise ValueError('Different types of models found, privide' ' either regressors or classifiers.')
Example #6
Source File: model.py From gobbli with Apache License 2.0 | 6 votes |
def _validate_estimator(estimator: BaseEstimator): """ Run some checks on the given object to determine if it's an estimator which is valid for our purposes. """ # sklearn has a function that does a lot more intensive checking regarding # the interface of a candidate Estimator # (sklearn.utils.estimator_checks.check_estimator), but the function # doesn't work well for our use case as of version 0.22. It doesn't properly # detect Pipeline X_types based on the first pipeline component and won't # test anything that doesn't accept a 2-D numpy array as input. We'll settle # for lax checks here until sklearn has something that works better for us. if not is_classifier(estimator): raise ValueError( "Estimator must be a classifier according to sklearn.base.is_classifier()" ) if not hasattr(estimator, "predict_proba"): raise ValueError( "Estimator must support the predict_proba() method to fulfill gobbli's " "interface requirements for a prediction model." )
Example #7
Source File: sklearn_pipeline.py From ramp-workflow with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_submission(self, estimator_fitted, X): """Predict using a fitted estimator. Parameters ---------- estimator_fitted : estimator object A fitted scikit-learn estimator. X : {array-like, sparse matrix, dataframe} of shape \ (n_samples, n_features) The test data set. Returns ------- pred : ndarray of shape (n_samples, n_classes) or (n_samples) """ if is_classifier(estimator_fitted): return estimator_fitted.predict_proba(X) return estimator_fitted.predict(X)
Example #8
Source File: test_base.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_is_classifier(): svc = SVC() assert_true(is_classifier(svc)) assert_true(is_classifier(GridSearchCV(svc, {'C': [0.1, 1]}))) assert_true(is_classifier(Pipeline([('svc', svc)]))) assert_true(is_classifier(Pipeline( [('svc_cv', GridSearchCV(svc, {'C': [0.1, 1]}))])))
Example #9
Source File: pipline.py From MachineLearning with Apache License 2.0 | 5 votes |
def fit(self, X, y): y_labels = self._get_labels(y) cv = check_cv(self.cv, y_labels, classifier=is_classifier(self.estimator)) self.estimators_ = [] for train, _ in cv.split(X, y_labels): self.estimators_.append( clone(self.estimator).fit(X[train], y_labels[train]) ) return self
Example #10
Source File: _validation.py From mriqc with BSD 3-Clause "New" or "Revised" License | 5 votes |
def cross_val_score( estimator, X, y=None, groups=None, scoring=None, cv=None, n_jobs=1, verbose=0, fit_params=None, pre_dispatch="2*n_jobs", ): """ Evaluate a score by cross-validation """ if not isinstance(scoring, (list, tuple)): scoring = [scoring] X, y, groups = indexable(X, y, groups) cv = check_cv(cv, y, classifier=is_classifier(estimator)) splits = list(cv.split(X, y, groups)) scorer = [check_scoring(estimator, scoring=s) for s in scoring] # We clone the estimator to make sure that all the folds are # independent, and that it is pickle-able. parallel = Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch) scores = parallel( delayed(_fit_and_score)( clone(estimator), X, y, scorer, train, test, verbose, None, fit_params ) for train, test in splits ) group_order = [] if hasattr(cv, "groups"): group_order = [np.array(cv.groups)[test].tolist()[0] for _, test in splits] return np.squeeze(np.array(scores)), group_order
Example #11
Source File: cross_validation.py From Pyspatialml with GNU General Public License v3.0 | 5 votes |
def fit(self, X, y=None, groups=None, **fit_params): """ Run fit method with all sets of parameters Args ---- X : array-like, shape = [n_samples, n_features] Training vector, where n_samples is the number of samples and n_features is the number of features y : array-like, shape = [n_samples] or [n_samples, n_output], optional Target relative to X for classification or regression; None for unsupervised learning groups : array-like, shape = [n_samples], optional Training vector groups for cross-validation **fit_params : dict of string -> object Parameters passed to the ``fit`` method of the estimator """ # check estimator and cv methods are valid self.cv = check_cv(self.cv, y, classifier=is_classifier(self.estimator)) # check for binary response if len(np.unique(y)) > 2: raise ValueError('Only a binary response vector is currently supported') # check that scoring metric has been specified if self.scoring is None: raise ValueError('No score function is defined') # perform cross validation prediction self.y_pred_ = cross_val_predict( estimator=self.estimator, X=X, y=y, groups=groups, cv=self.cv, method='predict_proba', n_jobs=self.n_jobs, **fit_params) self.y_true = y # add fold id to the predictions self.test_idx_ = [indexes[1] for indexes in self.cv.split(X, y, groups)]
Example #12
Source File: test_sgd.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_validation_set_not_used_for_training(klass): X, Y = iris.data, iris.target validation_fraction = 0.4 seed = 42 shuffle = False max_iter = 10 clf1 = klass(early_stopping=True, random_state=np.random.RandomState(seed), validation_fraction=validation_fraction, learning_rate='constant', eta0=0.01, tol=None, max_iter=max_iter, shuffle=shuffle) clf1.fit(X, Y) assert clf1.n_iter_ == max_iter clf2 = klass(early_stopping=False, random_state=np.random.RandomState(seed), learning_rate='constant', eta0=0.01, tol=None, max_iter=max_iter, shuffle=shuffle) if is_classifier(clf2): cv = StratifiedShuffleSplit(test_size=validation_fraction, random_state=seed) else: cv = ShuffleSplit(test_size=validation_fraction, random_state=seed) idx_train, idx_val = next(cv.split(X, Y)) idx_train = np.sort(idx_train) # remove shuffling clf2.fit(X[idx_train], Y[idx_train]) assert clf2.n_iter_ == max_iter assert_array_equal(clf1.coef_, clf2.coef_)
Example #13
Source File: test_sgd.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_late_onset_averaging_not_reached(klass): clf1 = klass(average=600) clf2 = klass() for _ in range(100): if is_classifier(clf1): clf1.partial_fit(X, Y, classes=np.unique(Y)) clf2.partial_fit(X, Y, classes=np.unique(Y)) else: clf1.partial_fit(X, Y) clf2.partial_fit(X, Y) assert_array_almost_equal(clf1.coef_, clf2.coef_, decimal=16) assert_almost_equal(clf1.intercept_, clf2.intercept_, decimal=16)
Example #14
Source File: test_base.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_is_classifier(): svc = SVC() assert is_classifier(svc) assert is_classifier(GridSearchCV(svc, {'C': [0.1, 1]})) assert is_classifier(Pipeline([('svc', svc)])) assert is_classifier(Pipeline( [('svc_cv', GridSearchCV(svc, {'C': [0.1, 1]}))]))
Example #15
Source File: _validation.py From mriqc with BSD 3-Clause "New" or "Revised" License | 4 votes |
def permutation_test_score( estimator, X, y, groups=None, cv=None, n_permutations=100, n_jobs=1, random_state=0, verbose=0, scoring=None, ): """ Evaluate the significance of a cross-validated score with permutations, as in test 1 of [Ojala2010]_. A modification of original sklearn's permutation test score function to evaluate p-value outside this function, so that the score can be reused from outside. .. [Ojala2010] Ojala and Garriga. Permutation Tests for Studying Classifier Performance. The Journal of Machine Learning Research (2010) vol. 11 """ X, y, groups = indexable(X, y, groups) cv = check_cv(cv, y, classifier=is_classifier(estimator)) scorer = check_scoring(estimator, scoring=scoring) random_state = check_random_state(random_state) # We clone the estimator to make sure that all the folds are # independent, and that it is pickle-able. permutation_scores = Parallel(n_jobs=n_jobs, verbose=verbose)( delayed(_permutation_test_score)( clone(estimator), X, _shuffle(y, groups, random_state), groups, cv, scorer ) for _ in range(n_permutations) ) permutation_scores = np.array(permutation_scores) return permutation_scores
Example #16
Source File: sklearn_patches.py From tslearn with BSD 2-Clause "Simplified" License | 4 votes |
def yield_all_checks(name, estimator): tags = estimator._get_tags() if "2darray" not in tags["X_types"]: warnings.warn("Can't test estimator {} which requires input " " of type {}".format(name, tags["X_types"]), SkipTestWarning) return if tags["_skip_test"]: warnings.warn("Explicit SKIP via _skip_test tag for estimator " "{}.".format(name), SkipTestWarning) return yield from _yield_checks(name, estimator) if is_classifier(estimator): yield from _yield_classifier_checks(name, estimator) if is_regressor(estimator): yield from _yield_regressor_checks(name, estimator) if hasattr(estimator, 'transform'): if not tags["allow_variable_length"]: # Transformer tests ensure that shapes are the same at fit and # transform time, hence we need to skip them for estimators that # allow variable-length inputs yield from _yield_transformer_checks(name, estimator) if isinstance(estimator, ClusterMixin): yield from _yield_clustering_checks(name, estimator) if is_outlier_detector(estimator): yield from _yield_outliers_checks(name, estimator) # We are not strict on presence/absence of the 3rd dimension # yield check_fit2d_predict1d if not tags["non_deterministic"]: yield check_methods_subset_invariance yield check_fit2d_1sample yield check_fit2d_1feature yield check_fit1d yield check_get_params_invariance yield check_set_params yield check_dict_unchanged yield check_dont_overwrite_parameters yield check_fit_idempotent if (is_classifier(estimator) or is_regressor(estimator) or isinstance(estimator, ClusterMixin)): if tags["allow_variable_length"]: yield check_different_length_fit_predict_transform
Example #17
Source File: sklearn.py From optuna with MIT License | 4 votes |
def _cross_validate_with_pruning( self, trial, # type: trial_module.Trial estimator, # type: BaseEstimator ): # type: (...) -> Dict[str, OneDimArrayLikeType] if is_classifier(estimator): partial_fit_params = self.fit_params.copy() classes = np.unique(self.y) partial_fit_params.setdefault("classes", classes) else: partial_fit_params = self.fit_params n_splits = self.cv.get_n_splits(self.X, self.y, groups=self.groups) estimators = [clone(estimator) for _ in range(n_splits)] scores = { "fit_time": np.zeros(n_splits), "score_time": np.zeros(n_splits), "test_score": np.empty(n_splits), } if self.return_train_score: scores["train_score"] = np.empty(n_splits) for step in range(self.max_iter): for i, (train, test) in enumerate(self.cv.split(self.X, self.y, groups=self.groups)): out = self._partial_fit_and_score(estimators[i], train, test, partial_fit_params) if self.return_train_score: scores["train_score"][i] = out.pop(0) scores["test_score"][i] = out[0] scores["fit_time"][i] += out[1] scores["score_time"][i] += out[2] intermediate_value = np.nanmean(scores["test_score"]) trial.report(intermediate_value, step=step) if trial.should_prune(): self._store_scores(trial, scores) raise TrialPruned("trial was pruned at iteration {}.".format(step)) return scores
Example #18
Source File: test_export.py From twitter-stock-recommendation with MIT License | 4 votes |
def test_precision(): rng_reg = RandomState(2) rng_clf = RandomState(8) for X, y, clf in zip( (rng_reg.random_sample((5, 2)), rng_clf.random_sample((1000, 4))), (rng_reg.random_sample((5, )), rng_clf.randint(2, size=(1000, ))), (DecisionTreeRegressor(criterion="friedman_mse", random_state=0, max_depth=1), DecisionTreeClassifier(max_depth=1, random_state=0))): clf.fit(X, y) for precision in (4, 3): dot_data = export_graphviz(clf, out_file=None, precision=precision, proportion=True) # With the current random state, the impurity and the threshold # will have the number of precision set in the export_graphviz # function. We will check the number of precision with a strict # equality. The value reported will have only 2 precision and # therefore, only a less equal comparison will be done. # check value for finding in finditer("value = \d+\.\d+", dot_data): assert_less_equal( len(search("\.\d+", finding.group()).group()), precision + 1) # check impurity if is_classifier(clf): pattern = "gini = \d+\.\d+" else: pattern = "friedman_mse = \d+\.\d+" # check impurity for finding in finditer(pattern, dot_data): assert_equal(len(search("\.\d+", finding.group()).group()), precision + 1) # check threshold for finding in finditer("<= \d+\.\d+", dot_data): assert_equal(len(search("\.\d+", finding.group()).group()), precision + 1)
Example #19
Source File: test_export.py From Mastering-Elasticsearch-7.0 with MIT License | 4 votes |
def test_precision(): rng_reg = RandomState(2) rng_clf = RandomState(8) for X, y, clf in zip( (rng_reg.random_sample((5, 2)), rng_clf.random_sample((1000, 4))), (rng_reg.random_sample((5, )), rng_clf.randint(2, size=(1000, ))), (DecisionTreeRegressor(criterion="friedman_mse", random_state=0, max_depth=1), DecisionTreeClassifier(max_depth=1, random_state=0))): clf.fit(X, y) for precision in (4, 3): dot_data = export_graphviz(clf, out_file=None, precision=precision, proportion=True) # With the current random state, the impurity and the threshold # will have the number of precision set in the export_graphviz # function. We will check the number of precision with a strict # equality. The value reported will have only 2 precision and # therefore, only a less equal comparison will be done. # check value for finding in finditer(r"value = \d+\.\d+", dot_data): assert_less_equal( len(search(r"\.\d+", finding.group()).group()), precision + 1) # check impurity if is_classifier(clf): pattern = r"gini = \d+\.\d+" else: pattern = r"friedman_mse = \d+\.\d+" # check impurity for finding in finditer(pattern, dot_data): assert_equal(len(search(r"\.\d+", finding.group()).group()), precision + 1) # check threshold for finding in finditer(r"<= \d+\.\d+", dot_data): assert_equal(len(search(r"\.\d+", finding.group()).group()), precision + 1)