Python sklearn.base.ClusterMixin() Examples

The following are 4 code examples of sklearn.base.ClusterMixin(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.base , or try the search function

Example #1

Source File: _test.py From ibex with BSD 3-Clause "New" or "Revised" License

5 votes

def _generate_bases_test(est, pd_est):
    def test(self):
        self.assertTrue(isinstance(pd_est, FrameMixin), pd_est)
        self.assertFalse(isinstance(est, FrameMixin))
        self.assertTrue(isinstance(pd_est, base.BaseEstimator))
        try:
            mixins = [
                base.ClassifierMixin,
                base.ClusterMixin,
                base.BiclusterMixin,
                base.TransformerMixin,
                base.DensityMixin,
                base.MetaEstimatorMixin,
                base.ClassifierMixin,
                base.RegressorMixin]
        except:
            if _sklearn_ver > 17:
                raise
            mixins = [
                base.ClassifierMixin,
                base.ClusterMixin,
                base.BiclusterMixin,
                base.TransformerMixin,
                base.MetaEstimatorMixin,
                base.ClassifierMixin,
                base.RegressorMixin]
        for mixin in mixins:
            self.assertEqual(
                isinstance(pd_est, mixin),
                isinstance(est, mixin),
                mixin)

    return test

Example #2

Source File: ABuMLExecute.py From abu with GNU General Public License v3.0

5 votes

def run_silhouette_cv_estimator(estimator, x, n_folds=10):
    """
    只针对kmean的cv验证，使用silhouette_score对聚类后的结果labels_
    进行度量使用silhouette_score，kmean的cv验证只是简单的通过np.random.choice
    进行随机筛选x数据进行聚类的silhouette_score度量，并不涉及训练集测试集
    :param estimator: keman或者支持estimator.labels_, 只通过if not isinstance(estimator, ClusterMixin)进行过滤
    :param x: x特征矩阵
    :param n_folds: int，透传KFold参数，切割训练集测试集参数，默认10
    :return: eg: array([ 0.693 ,  0.652 ,  0.6845,  0.6696,  0.6732,  0.6874,  0.668 ,
                         0.6743,  0.6748,  0.671 ])
    """

    if not isinstance(estimator, ClusterMixin):
        print('estimator must be ClusterMixin')
        return

    silhouette_list = list()
    # eg: n_folds = 10, len(x) = 150 -> 150 * 0.9 = 135
    choice_cnt = int(len(x) * ((n_folds - 1) / n_folds))
    choice_source = np.arange(0, x.shape[0])

    # 所有执行fit的操作使用clone一个新的
    estimator = clone(estimator)
    for _ in np.arange(0, n_folds):
        # 只是简单的通过np.random.choice进行随机筛选x数据
        choice_index = np.random.choice(choice_source, choice_cnt)
        x_choice = x[choice_index]
        estimator.fit(x_choice)
        # 进行聚类的silhouette_score度量
        silhouette_score = metrics.silhouette_score(x_choice, estimator.labels_, metric='euclidean')
        silhouette_list.append(silhouette_score)
    return silhouette_list

Example #3

Source File: des_clustering.py From DESlib with BSD 3-Clause "New" or "Revised" License

5 votes

def _check_parameters(self):
        """Check if the parameters passed as argument are correct.

        Raises
        ------
        ValueError
            If the hyper-parameters are incorrect.
        """
        if self.metric_diversity not in ['DF', 'Q', 'ratio']:
            raise ValueError(
                'Diversity metric must be one of the following values:'
                ' "DF", "Q" or "Ratio"')

        try:
            getattr(metrics, self.metric_performance)
        except AttributeError:
            raise ValueError(
                "Parameter metric_performance must be a sklearn metrics")

        if self.N_ <= 0 or self.J_ <= 0:
            raise ValueError("The values of N_ and J_ should be higher than 0"
                             "N_ = {}, J_= {} ".format(self.N_, self.J_))
        if self.N_ < self.J_:
            raise ValueError(
                "The value of N_ should be greater or equals than J_"
                "N_ = {}, J_= {} ".format(self.N_, self.J_))

        if self.clustering is not None:
            if not isinstance(self.clustering, ClusterMixin):
                raise ValueError(
                    "Parameter clustering must be a sklearn"
                    " cluster estimator.")

Example #4

Source File: sklearn_patches.py From tslearn with BSD 2-Clause "Simplified" License

4 votes

def yield_all_checks(name, estimator):
    tags = estimator._get_tags()
    if "2darray" not in tags["X_types"]:
        warnings.warn("Can't test estimator {} which requires input "
                      " of type {}".format(name, tags["X_types"]),
                      SkipTestWarning)
        return
    if tags["_skip_test"]:
        warnings.warn("Explicit SKIP via _skip_test tag for estimator "
                      "{}.".format(name),
                      SkipTestWarning)
        return

    yield from _yield_checks(name, estimator)
    if is_classifier(estimator):
        yield from _yield_classifier_checks(name, estimator)
    if is_regressor(estimator):
        yield from _yield_regressor_checks(name, estimator)
    if hasattr(estimator, 'transform'):
        if not tags["allow_variable_length"]:
            # Transformer tests ensure that shapes are the same at fit and
            # transform time, hence we need to skip them for estimators that
            # allow variable-length inputs
            yield from _yield_transformer_checks(name, estimator)
    if isinstance(estimator, ClusterMixin):
        yield from _yield_clustering_checks(name, estimator)
    if is_outlier_detector(estimator):
        yield from _yield_outliers_checks(name, estimator)
    # We are not strict on presence/absence of the 3rd dimension
    # yield check_fit2d_predict1d

    if not tags["non_deterministic"]:
        yield check_methods_subset_invariance

    yield check_fit2d_1sample
    yield check_fit2d_1feature
    yield check_fit1d
    yield check_get_params_invariance
    yield check_set_params
    yield check_dict_unchanged
    yield check_dont_overwrite_parameters
    yield check_fit_idempotent

    if (is_classifier(estimator) or
            is_regressor(estimator) or
            isinstance(estimator, ClusterMixin)):
        if tags["allow_variable_length"]:
            yield check_different_length_fit_predict_transform