Python Examples of sklearn.model_selection.ParameterGrid

Source File: test_bagging.py From Mastering-Elasticsearch-7.0 with MIT License

9 votes

def test_regression():
    # Check regression for various parameter settings.
    rng = check_random_state(0)
    X_train, X_test, y_train, y_test = train_test_split(boston.data[:50],
                                                        boston.target[:50],
                                                        random_state=rng)
    grid = ParameterGrid({"max_samples": [0.5, 1.0],
                          "max_features": [0.5, 1.0],
                          "bootstrap": [True, False],
                          "bootstrap_features": [True, False]})

    for base_estimator in [None,
                           DummyRegressor(),
                           DecisionTreeRegressor(),
                           KNeighborsRegressor(),
                           SVR(gamma='scale')]:
        for params in grid:
            BaggingRegressor(base_estimator=base_estimator,
                             random_state=rng,
                             **params).fit(X_train, y_train).predict(X_test)

Source File: test_iforest.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_iforest_sparse():
    """Check IForest for various parameter settings on sparse input."""
    rng = check_random_state(0)
    X_train, X_test, y_train, y_test = train_test_split(boston.data[:50],
                                                        boston.target[:50],
                                                        random_state=rng)
    grid = ParameterGrid({"max_samples": [0.5, 1.0],
                          "bootstrap": [True, False]})

    for sparse_format in [csc_matrix, csr_matrix]:
        X_train_sparse = sparse_format(X_train)
        X_test_sparse = sparse_format(X_test)

        for params in grid:
            # Trained on sparse format
            sparse_classifier = IsolationForest(
                n_estimators=10, random_state=1, **params).fit(X_train_sparse)
            sparse_results = sparse_classifier.predict(X_test_sparse)

            # Trained on dense format
            dense_classifier = IsolationForest(
                n_estimators=10, random_state=1, **params).fit(X_train)
            dense_results = dense_classifier.predict(X_test)

            assert_array_equal(sparse_results, dense_results)

Source File: test_tune.py From sktime with BSD 3-Clause "New" or "Revised" License

6 votes

def test_gscv_fit(forecaster, param_dict, cv, scoring):
    param_grid = ParameterGrid(param_dict)

    y = load_airline()
    gscv = ForecastingGridSearchCV(forecaster, param_grid=param_dict, cv=cv,
                                   scoring=scoring)
    gscv.fit(y)

    # check scores
    gscv_scores = gscv.cv_results_[f"mean_test_{scoring.name}"]
    expected_scores = compute_expected_gscv_scores(forecaster, cv, param_grid,
                                                   y, scoring)
    np.testing.assert_array_equal(gscv_scores, expected_scores)

    # check best parameters
    assert gscv.best_params_ == param_grid[gscv_scores.argmin()]

    # check best forecaster is the one with best parameters
    assert {key: value for key, value in
            gscv.best_forecaster_.get_params().items() if
            key in gscv.best_params_.keys()} == gscv.best_params_

Source File: tune.py From revscoring with MIT License

6 votes

def _model_param_grid(params_config):
    for name, config in params_config.items():
        try:
            Model = yamlconf.import_module(config['class'])
        except Exception:
            logger.warn("Could not load model {0}"
                        .format(config['class']))
            logger.warn("Exception:\n" + traceback.format_exc())
            continue

        if not hasattr(Model, "train"):
            logger.warn("Model {0} does not have a train() method."
                        .format(config['class']))
            continue

        param_grid = ParameterGrid(config['params'])

        yield name, Model, param_grid

Source File: fit.py From parfit with MIT License

6 votes

def fitModels(model, paramGrid, X, y, n_jobs=-1, verbose=10):
    """
    Parallelizes fitting all models using all combinations of parameters in paramGrid on provided data.
    :param model: The instantiated model you wish to pass, e.g. LogisticRegression()
    :param paramGrid: The ParameterGrid object created from sklearn.model_selection
    :param X: The independent variable data
    :param y: The response variable data
    :param n_jobs: Number of cores to use in parallelization (defaults to -1: all cores)
    :param verbose: The level of verbosity of reporting updates on parallel process
        Default is 10 (send an update at the completion of each job)
    :return: Returns a list of fitted models

    Example usage:
        from sklearn.linear_model import LogisticRegression
        from sklearn.model_selection import ParameterGrid
        model = LogisticRegression()
        grid = {
            'C': [1e-4, 1e-3], # regularization
            'penalty': ['l1','l2'], # penalty type
            'n_jobs': [-1] # parallelize within each fit over all cores
        }
        paramGrid = ParameterGrid(grid)
        myModels = fitModels(model, paramGrid, X_train, y_train)
    """
    return Parallel(n_jobs=n_jobs, verbose=verbose)(delayed(fitOne)(model, X, y, params) for params in paramGrid)

Source File: Estimator.py From tbats with MIT License

6 votes

def _prepare_components_grid(self, seasonal_harmonics=None):
        """Provides a grid of all allowed model component combinations.

        Parameters
        ----------
        seasonal_harmonics: array-like or None
            When provided all component combinations shall contain those harmonics
        """
        allowed_combinations = []

        use_box_cox = self.use_box_cox

        base_combination = {
            'use_box_cox': self.__prepare_component_boolean_combinations(use_box_cox),
            'box_cox_bounds': [self.box_cox_bounds],
            'use_arma_errors': [self.use_arma_errors],
            'seasonal_periods': [self.seasonal_periods],
        }
        if seasonal_harmonics is not None:
            base_combination['seasonal_harmonics'] = [seasonal_harmonics]

        if self.use_trend is not True:  # False or None
            allowed_combinations.append({
                **base_combination,
                **{
                    'use_trend': [False],
                    'use_damped_trend': [False],  # Damped trend must be False when trend is False
                }
            })

        if self.use_trend is not False:  # True or None
            allowed_combinations.append({
                **base_combination,
                **{
                    'use_trend': [True],
                    'use_damped_trend': self.__prepare_component_boolean_combinations(self.use_damped_trend),
                }
            })
        return ParameterGrid(allowed_combinations)

Source File: test_optimization.py From sports-betting with MIT License

6 votes

def test_apply_backtesting():
    """Test backtesting function."""

    # Input data
    bettor = Bettor(classifier=DummyClassifier(), targets=['D', 'H'])
    param_grid = {'classifier__strategy': ['uniform', 'stratified']}
    risk_factors = [0.0, 0.2, 0.4]
    random_state = 0
    X = np.random.random((100, 2))
    scores = np.repeat([1, 0], 50), np.repeat([0, 1], 50), np.repeat([1, 0], 50), np.repeat([0, 1], 50)
    odds = np.repeat([2.0, 2.0], 100).reshape(-1, 2)
    cv = TimeSeriesSplit(2, 0.3)
    n_runs = 3
    n_jobs = -1

    # Output
    results = apply_backtesting(bettor, param_grid, risk_factors, X, scores, odds, cv, random_state, n_runs, n_jobs)

    assert list(results.columns) == ['parameters', 'risk_factor', 'coverage', 'mean_yield', 'std_yield', 'std_mean_yield']
    assert len(results) == len(risk_factors) * len(ParameterGrid(param_grid))

Source File: test_bagging.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_classification():
    # Check classification for various parameter settings.
    rng = check_random_state(0)
    X_train, X_test, y_train, y_test = train_test_split(iris.data,
                                                        iris.target,
                                                        random_state=rng)
    grid = ParameterGrid({"max_samples": [0.5, 1.0],
                          "max_features": [1, 2, 4],
                          "bootstrap": [True, False],
                          "bootstrap_features": [True, False]})

    for base_estimator in [None,
                           DummyClassifier(),
                           Perceptron(tol=1e-3),
                           DecisionTreeClassifier(),
                           KNeighborsClassifier(),
                           SVC(gamma="scale")]:
        for params in grid:
            BaggingClassifier(base_estimator=base_estimator,
                              random_state=rng,
                              **params).fit(X_train, y_train).predict(X_test)

Source File: grid_search.py From spark-sklearn with Apache License 2.0

6 votes

def fit(self, X, y=None, groups=None):
        """Run fit with all sets of parameters.
        
        Parameters
        ----------
        
        X : array-like, shape = [n_samples, n_features]
            Training vector, where n_samples is the number of samples and
            n_features is the number of features.
        
        y : array-like, shape = [n_samples] or [n_samples, n_output], optional
            Target relative to X for classification or regression;
            None for unsupervised learning.
        
        groups : array-like, with shape (n_samples,), optional
            Group labels for the samples used while splitting the dataset into
            train/test set.
        """
        return self._fit(X, y, groups, ParameterGrid(self.param_grid))

Source File: time_bench.py From astroalign with MIT License

6 votes

def get_parameters(sizes, stars, noises, comb_number, repeats, seed):
    """Create a list of dictionaries with all the combinations of the given
    parameters.

    """
    grid = ParameterGrid({
        "size": sizes, "stars": stars, "noise": noises})
    grid = list(grid) * comb_number

    # set the random state for run in parallel
    random = np.random.RandomState(seed)
    images_seeds = random.randint(1_000_000, size=len(grid))

    for idx, g in enumerate(grid):
        g["idx"] = idx
        g["seed"] = seed
        g["images_seed"] = images_seeds[idx]
        g["repeats"] = repeats

    return grid

Source File: test_iforest.py From twitter-stock-recommendation with MIT License

6 votes

def test_iforest_sparse():
    """Check IForest for various parameter settings on sparse input."""
    rng = check_random_state(0)
    X_train, X_test, y_train, y_test = train_test_split(boston.data[:50],
                                                        boston.target[:50],
                                                        random_state=rng)
    grid = ParameterGrid({"max_samples": [0.5, 1.0],
                          "bootstrap": [True, False]})

    for sparse_format in [csc_matrix, csr_matrix]:
        X_train_sparse = sparse_format(X_train)
        X_test_sparse = sparse_format(X_test)

        for params in grid:
            # Trained on sparse format
            sparse_classifier = IsolationForest(
                n_estimators=10, random_state=1, **params).fit(X_train_sparse)
            sparse_results = sparse_classifier.predict(X_test_sparse)

            # Trained on dense format
            dense_classifier = IsolationForest(
                n_estimators=10, random_state=1, **params).fit(X_train)
            dense_results = dense_classifier.predict(X_test)

            assert_array_equal(sparse_results, dense_results)

Source File: test_bagging.py From twitter-stock-recommendation with MIT License

6 votes

def test_classification():
    # Check classification for various parameter settings.
    rng = check_random_state(0)
    X_train, X_test, y_train, y_test = train_test_split(iris.data,
                                                        iris.target,
                                                        random_state=rng)
    grid = ParameterGrid({"max_samples": [0.5, 1.0],
                          "max_features": [1, 2, 4],
                          "bootstrap": [True, False],
                          "bootstrap_features": [True, False]})

    for base_estimator in [None,
                           DummyClassifier(),
                           Perceptron(tol=1e-3),
                           DecisionTreeClassifier(),
                           KNeighborsClassifier(),
                           SVC()]:
        for params in grid:
            BaggingClassifier(base_estimator=base_estimator,
                              random_state=rng,
                              **params).fit(X_train, y_train).predict(X_test)

Source File: test_bagging.py From twitter-stock-recommendation with MIT License

6 votes

def test_regression():
    # Check regression for various parameter settings.
    rng = check_random_state(0)
    X_train, X_test, y_train, y_test = train_test_split(boston.data[:50],
                                                        boston.target[:50],
                                                        random_state=rng)
    grid = ParameterGrid({"max_samples": [0.5, 1.0],
                          "max_features": [0.5, 1.0],
                          "bootstrap": [True, False],
                          "bootstrap_features": [True, False]})

    for base_estimator in [None,
                           DummyRegressor(),
                           DecisionTreeRegressor(),
                           KNeighborsRegressor(),
                           SVR()]:
        for params in grid:
            BaggingRegressor(base_estimator=base_estimator,
                             random_state=rng,
                             **params).fit(X_train, y_train).predict(X_test)

Source File: Estimator.py From tbats with MIT License

5 votes

def _prepare_non_seasonal_components_grid(self):
        """Provides a grid of all allowed  non-season model component combinations."""
        allowed_combinations = []

        use_box_cox = self.use_box_cox

        base_combination = {
            'use_box_cox': self.__prepare_component_boolean_combinations(use_box_cox),
            'box_cox_bounds': [self.box_cox_bounds],
            'use_arma_errors': [self.use_arma_errors],
            'seasonal_periods': [[]],
        }

        if self.use_trend is not True:  # False or None
            allowed_combinations.append({
                **base_combination,
                **{
                    'use_trend': [False],
                    'use_damped_trend': [False],  # Damped trend must be False when trend is False
                }
            })

        if self.use_trend is not False:  # True or None
            allowed_combinations.append({
                **base_combination,
                **{
                    'use_trend': [True],
                    'use_damped_trend': self.__prepare_component_boolean_combinations(self.use_damped_trend),
                }
            })
        return ParameterGrid(allowed_combinations)

Source File: _incremental.py From dask-ml with BSD 3-Clause "New" or "Revised" License

5 votes

def _adapt(self, info):
        # First, have an adaptive algorithm
        if self.n_initial_parameters == "grid":
            start = len(ParameterGrid(self.parameters))
        else:
            start = self.n_initial_parameters

        def inverse(time):
            """ Decrease target number of models inversely with time """
            return int(start / (1 + time) ** self.decay_rate)

        example = toolz.first(info.values())
        time_step = example[-1]["partial_fit_calls"]

        current_time_step = time_step + 1
        next_time_step = current_time_step

        if inverse(current_time_step) == 0:
            # we'll never get out of here
            next_time_step = 1

        while inverse(current_time_step) == inverse(next_time_step) and (
            self.decay_rate
            and not self.patience
            or next_time_step - current_time_step < self.fits_per_score
        ):
            next_time_step += 1

        target = max(1, inverse(next_time_step))
        best = toolz.topk(target, info, key=lambda k: info[k][-1]["score"])

        if len(best) == 1:
            [best] = best
            return {best: 0}
        steps = next_time_step - current_time_step
        instructions = {b: steps for b in best}
        return instructions

Source File: optimization.py From sports-betting with MIT License

5 votes

def apply_backtesting(bettor, param_grid, risk_factors, X, scores, odds, cv, random_state, n_runs, n_jobs):
    """Apply backtesting to evaluate bettor."""
    
    # Check random states
    random_states = check_random_states(random_state, n_runs)

    # Check arrays
    X = check_array(X, dtype=None, force_all_finite=False)
    normalized_scores = []
    for score in scores:
        normalized_scores.append(check_array(score, dtype=None, ensure_2d=False))
    odds = check_array(odds, dtype=None)

    # Extract parameters
    parameters = ParameterGrid(param_grid)

    # Run backtesting
    data = Parallel(n_jobs=n_jobs)(delayed(fit_bet)(bettor, params, risk_factors, random_state, X, normalized_scores, odds, train_indices, test_indices) 
           for params, random_state, (train_indices, test_indices) in tqdm(list(product(parameters, random_states, cv.split(X))), desc='Tasks'))
    
    # Combine data
    data = pd.concat(data, ignore_index=True)
    data = data.groupby(['parameters', 'risk_factor', 'experiment']).apply(lambda df: np.concatenate(df.yields.values)).reset_index()
    data[['coverage', 'mean_yield', 'std_yield']] = pd.DataFrame(data[0].apply(lambda yields: extract_yields_stats(yields)).values.tolist())
    
    # Calculate results
    results = data.drop(columns=['experiment', 0]).groupby(['parameters', 'risk_factor']).mean().reset_index()
    results['std_mean_yield'] = data.groupby(['parameters', 'risk_factor'])['mean_yield'].std().values
    results = results.sort_values('mean_yield', ascending=False).reset_index(drop=True)

    return results

Source File: _incremental.py From dask-ml with BSD 3-Clause "New" or "Revised" License

5 votes

def _get_params(self):
        if self.n_initial_parameters == "grid":
            return ParameterGrid(self.parameters)
        else:
            return ParameterSampler(
                self.parameters,
                self.n_initial_parameters,
                random_state=self.random_state,
            )

Source File: experiments.py From AnomalyDetectionTransformations with MIT License

5 votes

def _raw_ocsvm_experiment(dataset_load_fn, dataset_name, single_class_ind):
    (x_train, y_train), (x_test, y_test) = dataset_load_fn()

    x_train = x_train.reshape((len(x_train), -1))
    x_test = x_test.reshape((len(x_test), -1))

    x_train_task = x_train[y_train.flatten() == single_class_ind]
    if dataset_name in ['cats-vs-dogs']:  # OC-SVM is quadratic on the number of examples, so subsample training set
        subsample_inds = np.random.choice(len(x_train_task), 5000, replace=False)
        x_train_task = x_train_task[subsample_inds]

    pg = ParameterGrid({'nu': np.linspace(0.1, 0.9, num=9),
                        'gamma': np.logspace(-7, 2, num=10, base=2)})

    results = Parallel(n_jobs=6)(
        delayed(_train_ocsvm_and_score)(d, x_train_task, y_test.flatten() == single_class_ind, x_test)
        for d in pg)

    best_params, best_auc_score = max(zip(pg, results), key=lambda t: t[-1])
    best_ocsvm = OneClassSVM(**best_params).fit(x_train_task)
    scores = best_ocsvm.decision_function(x_test)
    labels = y_test.flatten() == single_class_ind

    res_file_name = '{}_raw-oc-svm_{}_{}.npz'.format(dataset_name,
                                                     get_class_name_from_index(single_class_ind, dataset_name),
                                                     datetime.now().strftime('%Y-%m-%d-%H%M'))
    res_file_path = os.path.join(RESULTS_DIR, dataset_name, res_file_name)
    save_roc_pr_curve_data(scores, labels, res_file_path)

Source File: _incremental.py From dask-ml with BSD 3-Clause "New" or "Revised" License

5 votes

def _get_cv_results(self, history, model_hist):
        cv_results = {}
        best_scores = {}
        best_scores = {k: hist[-1]["score"] for k, hist in model_hist.items()}

        cv_results = {}
        for k, hist in model_hist.items():
            pf_times = list(toolz.pluck("partial_fit_time", hist))
            score_times = list(toolz.pluck("score_time", hist))
            cv_results[k] = {
                "mean_partial_fit_time": np.mean(pf_times),
                "mean_score_time": np.mean(score_times),
                "std_partial_fit_time": np.std(pf_times),
                "std_score_time": np.std(score_times),
                "test_score": best_scores[k],
                "model_id": k,
                "params": hist[0]["params"],
                "partial_fit_calls": hist[-1]["partial_fit_calls"],
            }
        cv_results = list(cv_results.values())  # list of dicts
        cv_results = {k: [res[k] for res in cv_results] for k in cv_results[0]}

        # Every model will have the same params because this class uses either
        # ParameterSampler or ParameterGrid
        params = defaultdict(list)
        for model_params in cv_results["params"]:
            for k, v in model_params.items():
                params[k].append(v)

        for k, v in params.items():
            cv_results["param_" + k] = v

        cv_results = {k: np.array(v) for k, v in cv_results.items()}
        cv_results["rank_test_score"] = scipy.stats.rankdata(
            -cv_results["test_score"], method="min"
        ).astype(int)
        return cv_results

Source File: test_model_selection.py From pandas-ml with BSD 3-Clause "New" or "Revised" License

5 votes

def test_objectmapper(self):
        df = pdml.ModelFrame([])

        # Splitter Classes
        self.assertIs(df.model_selection.KFold, ms.KFold)
        self.assertIs(df.model_selection.GroupKFold, ms.GroupKFold)
        self.assertIs(df.model_selection.StratifiedKFold, ms.StratifiedKFold)

        self.assertIs(df.model_selection.LeaveOneGroupOut, ms.LeaveOneGroupOut)
        self.assertIs(df.model_selection.LeavePGroupsOut, ms.LeavePGroupsOut)
        self.assertIs(df.model_selection.LeaveOneOut, ms.LeaveOneOut)
        self.assertIs(df.model_selection.LeavePOut, ms.LeavePOut)

        self.assertIs(df.model_selection.ShuffleSplit, ms.ShuffleSplit)
        self.assertIs(df.model_selection.GroupShuffleSplit,
                      ms.GroupShuffleSplit)
        # self.assertIs(df.model_selection.StratifiedShuffleSplit,
        #               ms.StratifiedShuffleSplit)
        self.assertIs(df.model_selection.PredefinedSplit, ms.PredefinedSplit)
        self.assertIs(df.model_selection.TimeSeriesSplit, ms.TimeSeriesSplit)

        # Splitter Functions

        # Hyper-parameter optimizers
        self.assertIs(df.model_selection.GridSearchCV, ms.GridSearchCV)
        self.assertIs(df.model_selection.RandomizedSearchCV, ms.RandomizedSearchCV)
        self.assertIs(df.model_selection.ParameterGrid, ms.ParameterGrid)
        self.assertIs(df.model_selection.ParameterSampler, ms.ParameterSampler)

        # Model validation

Source File: test_model_selection.py From pandas-ml with BSD 3-Clause "New" or "Revised" License

5 votes

def test_objectmapper_abbr(self):
        df = pdml.ModelFrame([])

        # Splitter Classes
        self.assertIs(df.ms.KFold, ms.KFold)
        self.assertIs(df.ms.GroupKFold, ms.GroupKFold)
        self.assertIs(df.ms.StratifiedKFold, ms.StratifiedKFold)

        self.assertIs(df.ms.LeaveOneGroupOut, ms.LeaveOneGroupOut)
        self.assertIs(df.ms.LeavePGroupsOut, ms.LeavePGroupsOut)
        self.assertIs(df.ms.LeaveOneOut, ms.LeaveOneOut)
        self.assertIs(df.ms.LeavePOut, ms.LeavePOut)

        self.assertIs(df.ms.ShuffleSplit, ms.ShuffleSplit)
        self.assertIs(df.ms.GroupShuffleSplit,
                      ms.GroupShuffleSplit)
        # self.assertIs(df.ms.StratifiedShuffleSplit,
        #               ms.StratifiedShuffleSplit)
        self.assertIs(df.ms.PredefinedSplit, ms.PredefinedSplit)
        self.assertIs(df.ms.TimeSeriesSplit, ms.TimeSeriesSplit)

        # Splitter Functions

        # Hyper-parameter optimizers
        self.assertIs(df.ms.GridSearchCV, ms.GridSearchCV)
        self.assertIs(df.ms.RandomizedSearchCV, ms.RandomizedSearchCV)
        self.assertIs(df.ms.ParameterGrid, ms.ParameterGrid)
        self.assertIs(df.ms.ParameterSampler, ms.ParameterSampler)

        # Model validation

Source File: gridsearch.py From chainladder-python with Mozilla Public License 2.0

5 votes

def fit(self, X, y=None, **fit_params):
        """Fit the model with X.

        Parameters
        ----------
        X : Triangle-like
            Set of LDFs to which the tail will be applied.
        y : Ignored
        fit_params : (optional) dict of string -> object
            Parameters passed to the ``fit`` method of the estimator

        Returns
        -------
        self : object
            Returns the instance itself.
        """
        if type(self.scoring) is not dict:
            scoring = dict(score=self.scoring)
        else:
            scoring = self.scoring
        grid = list(ParameterGrid(self.param_grid))
        results_ = []
        for num, item in enumerate(grid):
            est = copy.deepcopy(self.estimator).set_params(**item)
            model = est.fit(X, y, **fit_params)
            for score in scoring.keys():
                item[score] = scoring[score](model)
            results_.append(item)
        self.results_ = pd.DataFrame(results_)
        return self

Source File: validation.py From sk-dist with Apache License 2.0

5 votes

def _check_n_iter(n_iter, param_distributions):
    """
    Check if n_iter is greater than the total number 
    of possible param sets from the given distribution.
    """
    all_lists = np.all([not hasattr(v, "rvs") 
        for v in param_distributions.values()])
    if all_lists:
        param_grid = ParameterGrid(param_distributions)
        grid_size = len(param_grid)
    else:
        grid_size = n_iter
    return min(grid_size, n_iter)

Source File: grid_searcher.py From autogluon with Apache License 2.0

5 votes

def __init__(self, configspace, **kwargs):
        super().__init__(
            configspace, reward_attribute=kwargs.get('reward_attribute'))
        param_grid = {}
        hp_ordering = configspace.get_hyperparameter_names()
        for hp in hp_ordering:
            hp_obj = configspace.get_hyperparameter(hp)
            hp_type = str(type(hp_obj)).lower()
            assert 'categorical' in hp_type, \
                'Only Categorical is supported, but {} is {}'.format(hp, hp_type)
            param_grid[hp] = hp_obj.choices

        self._configs = list(ParameterGrid(param_grid))
        print('Number of configurations for grid search is {}'.format(len(self._configs)))

Source File: test_iforest.py From twitter-stock-recommendation with MIT License

5 votes

def test_iforest():
    """Check Isolation Forest for various parameter settings."""
    X_train = np.array([[0, 1], [1, 2]])
    X_test = np.array([[2, 1], [1, 1]])

    grid = ParameterGrid({"n_estimators": [3],
                          "max_samples": [0.5, 1.0, 3],
                          "bootstrap": [True, False]})

    with ignore_warnings():
        for params in grid:
            IsolationForest(random_state=rng,
                            **params).fit(X_train).predict(X_test)

Source File: test_search.py From twitter-stock-recommendation with MIT License

5 votes

def test_parameter_grid():
    # Test basic properties of ParameterGrid.
    params1 = {"foo": [1, 2, 3]}
    grid1 = ParameterGrid(params1)
    assert_true(isinstance(grid1, Iterable))
    assert_true(isinstance(grid1, Sized))
    assert_equal(len(grid1), 3)
    assert_grid_iter_equals_getitem(grid1)

    params2 = {"foo": [4, 2],
               "bar": ["ham", "spam", "eggs"]}
    grid2 = ParameterGrid(params2)
    assert_equal(len(grid2), 6)

    # loop to assert we can iterate over the grid multiple times
    for i in xrange(2):
        # tuple + chain transforms {"a": 1, "b": 2} to ("a", 1, "b", 2)
        points = set(tuple(chain(*(sorted(p.items())))) for p in grid2)
        assert_equal(points,
                     set(("bar", x, "foo", y)
                         for x, y in product(params2["bar"], params2["foo"])))
    assert_grid_iter_equals_getitem(grid2)

    # Special case: empty grid (useful to get default estimator settings)
    empty = ParameterGrid({})
    assert_equal(len(empty), 1)
    assert_equal(list(empty), [{}])
    assert_grid_iter_equals_getitem(empty)
    assert_raises(IndexError, lambda: empty[1])

    has_empty = ParameterGrid([{'C': [1, 10]}, {}, {'C': [.5]}])
    assert_equal(len(has_empty), 4)
    assert_equal(list(has_empty), [{'C': 1}, {'C': 10}, {}, {'C': .5}])
    assert_grid_iter_equals_getitem(has_empty)

Source File: test_bicluster.py From twitter-stock-recommendation with MIT License

5 votes

def test_spectral_coclustering():
    # Test Dhillon's Spectral CoClustering on a simple problem.
    param_grid = {'svd_method': ['randomized', 'arpack'],
                  'n_svd_vecs': [None, 20],
                  'mini_batch': [False, True],
                  'init': ['k-means++'],
                  'n_init': [10],
                  'n_jobs': [1]}
    random_state = 0
    S, rows, cols = make_biclusters((30, 30), 3, noise=0.5,
                                    random_state=random_state)
    S -= S.min()  # needs to be nonnegative before making it sparse
    S = np.where(S < 1, 0, S)  # threshold some values
    for mat in (S, csr_matrix(S)):
        for kwargs in ParameterGrid(param_grid):
            model = SpectralCoclustering(n_clusters=3,
                                         random_state=random_state,
                                         **kwargs)
            model.fit(mat)

            assert_equal(model.rows_.shape, (3, 30))
            assert_array_equal(model.rows_.sum(axis=0), np.ones(30))
            assert_array_equal(model.columns_.sum(axis=0), np.ones(30))
            assert_equal(consensus_score(model.biclusters_,
                                         (rows, cols)), 1)

            _test_shape_indices(model)

Source File: test_search.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

def test_validate_parameter_grid_input(input, error_type, error_message):
    with pytest.raises(error_type, match=error_message):
        ParameterGrid(input)

Source File: time_regression.py From astroalign with MIT License

5 votes

def get_parameters(min_size, max_size, step_size, stars,
                   noise, seed, comb_number, repeats):
    """Create a list of dictionaries with all the combinations of the given
    parameters.

    """

    sample_size = int((max_size - min_size) / step_size)
    sizes = np.linspace(min_size, max_size, sample_size, dtype=int)

    grid = ParameterGrid({
        "size": sizes, "stars": [stars],
        "noise": [noise], "repeats": [repeats]})
    grid = list(grid) * comb_number

    # set the random state for run in parallel
    random = np.random.RandomState(seed)
    images_seeds = random.randint(1_000_000, size=len(grid))

    for idx, g in enumerate(grid):
        g["idx"] = idx
        g["seed"] = seed
        g["min_size"] = min_size
        g["max_size"] = max_size
        g["step_size"] = step_size
        g["images_seed"] = images_seeds[idx]
    return grid

Source File: fixes.py From skutil with BSD 3-Clause "New" or "Revised" License

5 votes

def fit(self, X, y=None):
        """Run fit with all sets of parameters.

        Parameters
        ----------
        X : array-like, shape = [n_samples, n_features]
            Training vector, where n_samples is the number of samples and
            n_features is the number of features.
        y : array-like, shape = [n_samples] or [n_samples, n_output], optional
            Target relative to X for classification or regression;
            None for unsupervised learning.
        """
        return self._fit(X, y, ParameterGrid(self.param_grid))

Python sklearn.model_selection.ParameterGrid() Examples