Python sklearn.base.BaseEstimator() Examples
The following are 30
code examples of sklearn.base.BaseEstimator().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.base
, or try the search function
.
Example #1
Source File: test_builder.py From gordo with GNU Affero General Public License v3.0 | 7 votes |
def test_get_metadata_helper(model: BaseEstimator, expect_empty_dict: bool): """ Ensure the builder works with various model configs and that each has expected/valid metadata results. """ X, y = np.random.random((1000, 4)), np.random.random((1000,)) model.fit(X, y) metadata = ModelBuilder._extract_metadata_from_model(model) # All the metadata we've implemented so far is 'history', so we'll check that if not expect_empty_dict: assert "history" in metadata assert all( name in metadata["history"] for name in ("params", "loss", "accuracy") ) else: assert dict() == metadata
Example #2
Source File: mis_classifier.py From autoimpute with MIT License | 6 votes |
def __init__(self, classifier=None, predictors="all"): """Create an instance of the MissingnessClassifier. The MissingnessClassifier inherits from sklearn BaseEstimator and ClassifierMixin. This inheritence and this class' implementation ensure that the MissingnessClassifier is a valid classifier that will work in an sklearn pipeline. Args: classifier (classifier, optional): valid classifier from sklearn. If None, default is xgboost. Note that classifier must conform to sklearn style. This means it must implement the `predict_proba` method and act as a porper classifier. predictors (str, iter, dict, optiona): defaults to all, i.e. use all predictors. If all, every column will be used for every class prediction. If a list, subset of columns used for all predictions. If a dict, specify which columns to use as predictors for each imputation. Columns not specified in dict will receive `all` by default. """ self.classifier = classifier self.predictors = predictors
Example #3
Source File: uncertainty.py From modAL with MIT License | 6 votes |
def classifier_uncertainty(classifier: BaseEstimator, X: modALinput, **predict_proba_kwargs) -> np.ndarray: """ Classification uncertainty of the classifier for the provided samples. Args: classifier: The classifier for which the uncertainty is to be measured. X: The samples for which the uncertainty of classification is to be measured. **predict_proba_kwargs: Keyword arguments to be passed for the :meth:`predict_proba` of the classifier. Returns: Classifier uncertainty, which is 1 - P(prediction is correct). """ # calculate uncertainty for each point provided try: classwise_uncertainty = classifier.predict_proba(X, **predict_proba_kwargs) except NotFittedError: return np.ones(shape=(X.shape[0], )) # for each point, select the maximum uncertainty uncertainty = 1 - np.max(classwise_uncertainty, axis=1) return uncertainty
Example #4
Source File: base.py From modAL with MIT License | 6 votes |
def __init__(self, estimator: BaseEstimator, query_strategy: Callable, X_training: Optional[modALinput] = None, y_training: Optional[modALinput] = None, bootstrap_init: bool = False, force_all_finite: bool = True, **fit_kwargs ) -> None: assert callable(query_strategy), 'query_strategy must be callable' self.estimator = estimator self.query_strategy = query_strategy self.X_training = X_training self.y_training = y_training if X_training is not None: self._fit_to_known(bootstrap=bootstrap_init, **fit_kwargs) assert isinstance(force_all_finite, bool), 'force_all_finite must be a bool' self.force_all_finite = force_all_finite
Example #5
Source File: validation.py From modAL with MIT License | 6 votes |
def check_class_labels(*args: BaseEstimator) -> bool: """ Checks the known class labels for each classifier. Args: *args: Classifier objects to check the known class labels. Returns: True, if class labels match for all classifiers, False otherwise. """ try: classes_ = [estimator.classes_ for estimator in args] except AttributeError: raise NotFittedError('Not all estimators are fitted. Fit all estimators before using this method.') for classifier_idx in range(len(args) - 1): if not np.array_equal(classes_[classifier_idx], classes_[classifier_idx+1]): return False return True
Example #6
Source File: test_weight_boosting.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_sample_weight_adaboost_regressor(): """ AdaBoostRegressor should work without sample_weights in the base estimator The random weighted sampling is done internally in the _boost method in AdaBoostRegressor. """ class DummyEstimator(BaseEstimator): def fit(self, X, y): pass def predict(self, X): return np.zeros(X.shape[0]) boost = AdaBoostRegressor(DummyEstimator(), n_estimators=3) boost.fit(X, y_regr) assert_equal(len(boost.estimator_weights_), len(boost.estimator_errors_))
Example #7
Source File: base.py From Neuraxle with Apache License 2.0 | 6 votes |
def tosklearn(self): class NeuraxleToSKLearnPipelineWrapper(BaseEstimator): def __init__(self, neuraxle_step): self.p: Union[BaseStep, TruncableSteps] = neuraxle_step def set_params(self, **params) -> BaseEstimator: self.p.set_hyperparams(HyperparameterSpace(params)) return self def get_params(self, deep=True): neuraxle_params = HyperparameterSamples(self.p.get_hyperparams()).to_flat_as_dict_primitive() return neuraxle_params def get_params_space(self, deep=True): neuraxle_params = HyperparameterSpace(self.p.get_hyperparams_space()).to_flat_as_dict_primitive() return neuraxle_params def fit(self, **args) -> BaseEstimator: self.p = self.p.fit(**args) def transform(self, **args) -> BaseEstimator: return self.p.transform(**args) return NeuraxleToSKLearnPipelineWrapper(self)
Example #8
Source File: run.py From nyaggle with MIT License | 6 votes |
def _dispatch_models(algorithm_type: Union[str, Type[BaseEstimator]], target_type: str, custom_eval: Optional[Callable] = None): if not isinstance(algorithm_type, str): assert issubclass(algorithm_type, BaseEstimator), "algorithm_type should be str or subclass of BaseEstimator" return algorithm_type, _dispatch_eval_func(target_type, custom_eval), None cat_features = { 'lgbm': 'categorical_feature', 'cat': 'cat_features', 'xgb': None } gbdt_class = _dispatch_gbdt_class(algorithm_type, target_type) eval_func = _dispatch_eval_func(target_type, custom_eval) return gbdt_class, eval_func, cat_features[algorithm_type]
Example #9
Source File: model.py From gobbli with Apache License 2.0 | 6 votes |
def persist_estimator(estimator: BaseEstimator) -> Path: """ Saves the given estimator to a gobbli-managed filepath, where it can be loaded from disk by the SKLearnClassifier. This is useful if you want to use an estimator but don't want to bother with saving it to disk on your own. Args: estimator: The estimator to load. Returns: The path where the estimator was saved. """ estimator_dir = ( SKLearnClassifier.model_class_dir() / "user_estimators" / generate_uuid() ) estimator_dir.mkdir(exist_ok=True, parents=True) estimator_path = estimator_dir / SKLearnClassifier._TRAIN_OUTPUT_CHECKPOINT SKLearnClassifier._dump_estimator(estimator, estimator_path) return estimator_path
Example #10
Source File: combination.py From modAL with MIT License | 6 votes |
def make_query_strategy(utility_measure: Callable, selector: Callable) -> Callable: """ Takes the given utility measure and selector functions and makes a query strategy by combining them. Args: utility_measure: Utility measure, for instance :func:`~modAL.disagreement.vote_entropy`, but it can be a custom function as well. Should take a classifier and the unlabelled data and should return an array containing the utility scores. selector: Function selecting instances for query. Should take an array of utility scores and should return an array containing the queried items. Returns: A function which returns queried instances given a classifier and an unlabelled pool. """ def query_strategy(classifier: BaseEstimator, X: modALinput) -> Tuple: utility = utility_measure(classifier, X) query_idx = selector(utility) return query_idx, X[query_idx] return query_strategy
Example #11
Source File: test_calibration.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_calibration_accepts_ndarray(X): """Test that calibration accepts n-dimensional arrays as input""" y = [1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0] class MockTensorClassifier(BaseEstimator): """A toy estimator that accepts tensor inputs""" def fit(self, X, y): self.classes_ = np.unique(y) return self def decision_function(self, X): # toy decision function that just needs to have the right shape: return X.reshape(X.shape[0], -1).sum(axis=1) calibrated_clf = CalibratedClassifierCV(MockTensorClassifier()) # we should be able to fit this classifier with no error calibrated_clf.fit(X, y)
Example #12
Source File: build_model.py From gordo with GNU Affero General Public License v3.0 | 6 votes |
def _determine_offset( model: BaseEstimator, X: Union[np.ndarray, pd.DataFrame] ) -> int: """ Determine the model's offset. How much does the output of the model differ from its input? Parameters ---------- model: sklearn.base.BaseEstimator Trained model with either ``predict`` or ``transform`` method, preference given to ``predict``. X: Union[np.ndarray, pd.DataFrame] Data to pass to the model's ``predict`` or ``transform`` method. Returns ------- int The difference between X and the model's output lengths. """ out = model.predict(X) if hasattr(model, "predict") else model.transform(X) return len(X) - len(out)
Example #13
Source File: uncertainty.py From modAL with MIT License | 6 votes |
def classifier_entropy(classifier: BaseEstimator, X: modALinput, **predict_proba_kwargs) -> np.ndarray: """ Entropy of predictions of the for the provided samples. Args: classifier: The classifier for which the prediction entropy is to be measured. X: The samples for which the prediction entropy is to be measured. **predict_proba_kwargs: Keyword arguments to be passed for the :meth:`predict_proba` of the classifier. Returns: Entropy of the class probabilities. """ try: classwise_uncertainty = classifier.predict_proba(X, **predict_proba_kwargs) except NotFittedError: return np.zeros(shape=(X.shape[0], )) return np.transpose(entropy(np.transpose(classwise_uncertainty)))
Example #14
Source File: utils.py From gordo with GNU Affero General Public License v3.0 | 6 votes |
def load_model(directory: str, name: str) -> BaseEstimator: """ Load a given model from the directory by name. Parameters ---------- directory: str Directory to look for the model name: str Name of the model to load, this would be the sub directory within the directory parameter. Returns ------- BaseEstimator """ start_time = timeit.default_timer() model = serializer.load(os.path.join(directory, name)) logger.debug(f"Time to load model: {timeit.default_timer() - start_time}s") return model
Example #15
Source File: model.py From gobbli with Apache License 2.0 | 6 votes |
def _validate_estimator(estimator: BaseEstimator): """ Run some checks on the given object to determine if it's an estimator which is valid for our purposes. """ # sklearn has a function that does a lot more intensive checking regarding # the interface of a candidate Estimator # (sklearn.utils.estimator_checks.check_estimator), but the function # doesn't work well for our use case as of version 0.22. It doesn't properly # detect Pipeline X_types based on the first pipeline component and won't # test anything that doesn't accept a 2-D numpy array as input. We'll settle # for lax checks here until sklearn has something that works better for us. if not is_classifier(estimator): raise ValueError( "Estimator must be a classifier according to sklearn.base.is_classifier()" ) if not hasattr(estimator, "predict_proba"): raise ValueError( "Estimator must support the predict_proba() method to fulfill gobbli's " "interface requirements for a prediction model." )
Example #16
Source File: uncertainty.py From modAL with MIT License | 6 votes |
def classifier_margin(classifier: BaseEstimator, X: modALinput, **predict_proba_kwargs) -> np.ndarray: """ Classification margin uncertainty of the classifier for the provided samples. This uncertainty measure takes the first and second most likely predictions and takes the difference of their probabilities, which is the margin. Args: classifier: The classifier for which the prediction margin is to be measured. X: The samples for which the prediction margin of classification is to be measured. **predict_proba_kwargs: Keyword arguments to be passed for the :meth:`predict_proba` of the classifier. Returns: Margin uncertainty, which is the difference of the probabilities of first and second most likely predictions. """ try: classwise_uncertainty = classifier.predict_proba(X, **predict_proba_kwargs) except NotFittedError: return np.zeros(shape=(X.shape[0], )) if classwise_uncertainty.shape[1] == 1: return np.zeros(shape=(classwise_uncertainty.shape[0],)) part = np.partition(-classwise_uncertainty, 1, axis=1) margin = - part[:, 0] + part[:, 1] return margin
Example #17
Source File: __init__.py From sklearn2pmml with GNU Affero General Public License v3.0 | 6 votes |
def make_pmml_pipeline(obj, active_fields = None, target_fields = None): """Translates a regular Scikit-Learn estimator or pipeline to a PMML pipeline. Parameters: ---------- obj: BaseEstimator The object. active_fields: list of strings, optional Feature names. If missing, "x1", "x2", .., "xn" are assumed. target_fields: list of strings, optional Label name(s). If missing, "y" is assumed. """ steps = _filter_steps(_get_steps(obj)) pipeline = PMMLPipeline(steps) if active_fields is not None: pipeline.active_fields = numpy.asarray(active_fields) if target_fields is not None: pipeline.target_fields = numpy.asarray(target_fields) return pipeline
Example #18
Source File: cli.py From skorch with BSD 3-Clause "New" or "Revised" License | 6 votes |
def print_help(model, defaults=None): """Print help for the command line arguments of the given model. Parameters ---------- model : sklearn.base.BaseEstimator The basic model, e.g. a ``NeuralNet`` or sklearn ``Pipeline``. defautls : dict or None (default=None) Optionally, change the default values to use custom defaults. Commandline arguments have precedence over defaults. """ defaults = defaults or {} print("This is the help for the model-specific parameters.") print("To invoke help for the remaining options, run:") print("python {} -- --help".format(sys.argv[0])) print() lines = (_get_help_for_estimator(prefix, estimator, defaults=defaults) for prefix, estimator in _yield_estimators(model)) print('\n'.join(chain(*lines)))
Example #19
Source File: _normalize.py From dask-ml with BSD 3-Clause "New" or "Revised" License | 6 votes |
def normalize_estimator(est): """Normalize an estimator. Note: Since scikit-learn requires duck-typing, but not sub-typing from ``BaseEstimator``, we sometimes need to call this function directly.""" base = [type(est).__name__, normalize_token(est.get_params())] # fitted attributes: https://github.com/dask/dask-ml/issues/658 attrs = [x for x in dir(est) if x.endswith("_") and not x.startswith("_")] exclude = {"cv_results_", "model_history_", "history_", "refit_time_"} with warnings.catch_warnings(): warnings.simplefilter("ignore", FutureWarning) for attr in attrs: if attr in exclude: continue try: val = getattr(est, attr) except (sklearn.exceptions.NotFittedError, AttributeError): continue base.append(val) return tuple(base)
Example #20
Source File: test_sklearn_model_io.py From kryptoflow with GNU General Public License v3.0 | 6 votes |
def test_trainable_model_from_file(sklearn_model, project_manager): skl = SklearnModel(artifact=sklearn_model) # lr = LogisticRegression() # trainable = TrainableModel(artifact=lr) skl.store(name='clf') trainable = TrainableModel.from_file(run_number=1, name='clf', model_type='sklearn') assert isinstance(trainable.model, BaseEstimator) for root, dirs, files in os.walk(project_manager.CONFIG['saved-models']): for f in files: os.unlink(os.path.join(root, f)) for d in dirs: shutil.rmtree(os.path.join(root, d)) with open(os.path.join(project_manager.CONFIG['saved-models'], '.gitkeep'), 'w') as gitkeep: gitkeep.write('empty')
Example #21
Source File: test_sklearn_model_io.py From kryptoflow with GNU General Public License v3.0 | 6 votes |
def test_loader(sklearn_model, project_manager): skl = SklearnModel(artifact=sklearn_model) skl.store(name='clf') reloaded = skl.load(name='clf') assert isinstance(reloaded, BaseEstimator) skl2 = SklearnModel(artifact=sklearn_model) skl2.store(name='clf') reload_first = skl.load(run_number=1, name='clf') assert isinstance(reload_first, BaseEstimator) for root, dirs, files in os.walk(project_manager.CONFIG['saved-models']): for f in files: os.unlink(os.path.join(root, f)) for d in dirs: shutil.rmtree(os.path.join(root, d)) with open(os.path.join(project_manager.CONFIG['saved-models'], '.gitkeep'), 'w') as gitkeep: gitkeep.write('empty')
Example #22
Source File: __init__.py From sklearn2pmml with GNU Affero General Public License v3.0 | 5 votes |
def verify(self, X, predict_params = {}, predict_proba_params = {}, precision = 1e-13, zeroThreshold = 1e-13): active_fields = _get_column_names(X) if self.active_fields is None or active_fields is None: raise ValueError("Cannot perform model validation with anonymous data") if self.active_fields.tolist() != active_fields.tolist(): raise ValueError("The columns between training data {} and verification data {} do not match".format(self.active_fields, active_fields)) active_values = _get_values(X) y = self.predict(X, **predict_params) target_values = _get_values(y) estimator = self._final_estimator if isinstance(estimator, BaseEstimator): if isinstance(estimator, RegressorMixin): self.verification = _Verification(active_values, target_values, precision, zeroThreshold) elif isinstance(estimator, ClassifierMixin): self.verification = _Verification(active_values, target_values, precision, zeroThreshold) if hasattr(estimator, "predict_proba"): try: y_proba = self.predict_proba(X, **predict_proba_params) self.verification.probability_values = _get_values(y_proba) except AttributeError: pass # elif isinstance(estimator, H2OEstimator): elif hasattr(estimator, "_estimator_type") and hasattr(estimator, "download_mojo"): if estimator._estimator_type == "regressor": self.verification = _Verification(active_values, target_values, precision, zeroThreshold) elif estimator._estimator_type == "classifier": probability_values = target_values[:, 1:] target_values = target_values[:, 0] self.verification = _Verification(active_values, target_values, precision, zeroThreshold) self.verification.probability_values = probability_values
Example #23
Source File: __init__.py From sklearn2pmml with GNU Affero General Public License v3.0 | 5 votes |
def _get_steps(obj): if isinstance(obj, Pipeline): return obj.steps elif isinstance(obj, BaseEstimator): return [("estimator", obj)] else: raise ValueError()
Example #24
Source File: test_preprocessing.py From skl-groups with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_basic(): bags = [np.random.normal(5, 3, size=(np.random.randint(10, 100), 20)) for _ in xrange(50)] feats = Features(bags, stack=True) stder = BagStandardizer() stdized = stder.fit_transform(bags) stdized.make_stacked() assert np.allclose(np.mean(stdized.stacked_features), 0) assert np.allclose(np.std(stdized.stacked_features), 1) first_five = stder.transform(bags[:5]) assert first_five == stdized[:5] minmaxer = BagMinMaxScaler([3, 7]) minmaxed = minmaxer.fit_transform(feats) minmaxed.make_stacked() assert np.allclose(np.min(minmaxed.stacked_features, 0), 3) assert np.allclose(np.max(minmaxed.stacked_features, 0), 7) normer = BagNormalizer('l1') normed = normer.fit_transform(Features(bags)) normed.make_stacked() assert np.allclose(np.sum(np.abs(normed.stacked_features), 1), 1) class GetMean(BaseEstimator, TransformerMixin): def fit(self, X, y=None): return self def transform(self, X): return X.mean(axis=1)[None, :] m = BagPreprocesser(GetMean()) assert_raises(ValueError, lambda: m.transform(bags))
Example #25
Source File: learners.py From modAL with MIT License | 5 votes |
def __init__(self, estimator: BaseEstimator, query_strategy: Callable = uncertainty_sampling, X_training: Optional[modALinput] = None, y_training: Optional[modALinput] = None, bootstrap_init: bool = False, **fit_kwargs ) -> None: super().__init__(estimator, query_strategy, X_training, y_training, bootstrap_init, **fit_kwargs)
Example #26
Source File: disagreement.py From modAL with MIT License | 5 votes |
def max_std_sampling(regressor: BaseEstimator, X: modALinput, n_instances: int = 1, random_tie_break=False, **predict_kwargs) -> Tuple[np.ndarray, modALinput]: """ Regressor standard deviation sampling strategy. Args: regressor: The regressor for which the labels are to be queried. X: The pool of samples to query from. n_instances: Number of samples to be queried. random_tie_break: If True, shuffles utility scores to randomize the order. This can be used to break the tie when the highest utility score is not unique. **predict_kwargs: Keyword arguments to be passed to :meth:`predict` of the CommiteeRegressor. Returns: The indices of the instances from X chosen to be labelled; the instances from X chosen to be labelled. """ _, std = regressor.predict(X, return_std=True, **predict_kwargs) std = std.reshape(X.shape[0], ) if not random_tie_break: query_idx = multi_argmax(std, n_instances=n_instances) else: query_idx = shuffled_argmax(std, n_instances=n_instances) return query_idx, X[query_idx]
Example #27
Source File: VectorQuantizer.py From stochastic_PMF with GNU General Public License v3.0 | 5 votes |
def __init__(self, clusterer=None, n_atoms=32, sparse=True, batch_size=1024, n_quantizers=1): '''Vector quantization by closest centroid: A[i] > 0 <=> i in argmin ||X - C_i|| This implementation also supports soft encoding by mapping to the top k closest centroids. Arguments: ---------- n_atoms : int Number of dictionary elements to extract clusterer : {None, BaseEstimator} Instantiation of a clustering object (eg. sklearn.cluster.MiniBatchKMeans) default: sklearn.cluster.MiniBatchKMeans n_atoms : int If no clusterer is provided, the number of atoms to use sparse : bool Represent encoded data as a sparse matrix or ndarray batch_size : int Number of points to transform in parallel n_quantizers : int Number of quantizers to use for each point. By default, it uses 1 (hard VQ). Larger values use multiple codewords to represent each point. ''' if clusterer is None: self.clusterer = sklearn.cluster.MiniBatchKMeans(n_clusters=n_atoms) else: self.clusterer = clusterer self.sparse = sparse self.batch_size = batch_size self.n_quantizers = n_quantizers
Example #28
Source File: fit.py From parfit with MIT License | 5 votes |
def fitOne(model, X, y, params): """ Makes one model fit using provided data and parameters :param model: The instantiated model you wish to pass, e.g. LogisticRegression() :param X: The independent variable data :param y: The response variable data :param params: The parameters passed through to the model from the parameter grid :return: Returns the fitted model """ if isinstance(model, BaseEstimator): model.set_params(**params) else: model = model(**params) return model.fit(X, y)
Example #29
Source File: test_step.py From baikal with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_get_params_without_init(self, teardown): """Test edge case where the base class does not define an __init__ method. get_params should resolve to object.__init__ which results in an empty dict. """ class TransformerWithoutInit(TransformerMixin, BaseEstimator): pass class TransformerWithoutInitStep(Step, TransformerWithoutInit): pass step = TransformerWithoutInitStep() assert step.get_params() == {}
Example #30
Source File: test_sklearn_model_io.py From kryptoflow with GNU General Public License v3.0 | 5 votes |
def test_trainable_model(sklearn_model): assert isinstance(sklearn_model, BaseEstimator) trainable = TrainableModel(sklearn_model) assert isinstance(trainable.model, BaseEstimator) assert isinstance(trainable.serializer, SklearnModel)