Python Examples of hyperopt.space

Source File: automl.py From kddcup2019-automl with MIT License

8 votes

def hyperopt_lightgbm(X: pd.DataFrame, y: pd.Series, params: Dict, config: Config):
    X_train, X_val, y_train, y_val = data_split(X, y, test_size=0.5)
    train_data = lgb.Dataset(X_train, label=y_train)
    valid_data = lgb.Dataset(X_val, label=y_val)

    space = {
        "max_depth": hp.choice("max_depth", np.arange(2, 10, 1, dtype=int)),
        # smaller than 2^(max_depth)
        "num_leaves": hp.choice("num_leaves", np.arange(4, 200, 4, dtype=int)),
        "feature_fraction": hp.quniform("feature_fraction", 0.2, 0.8, 0.1),
        # "bagging_fraction": hp.quniform("bagging_fraction", 0.2, 0.8, 0.1),
        # "bagging_freq": hp.choice("bagging_freq", np.linspace(0, 10, 2, dtype=int)),
        # "scale_pos_weight":hp.uniform('scale_pos_weight',1.0, 10.0),
        # "colsample_by_tree":hp.uniform("colsample_bytree",0.5,1.0),
        "min_child_weight": hp.quniform('min_child_weight', 2, 50, 2),
        "reg_alpha": hp.uniform("reg_alpha", 2.0, 8.0),
        "reg_lambda": hp.uniform("reg_lambda", 2.0, 8.0),
        "learning_rate": hp.quniform("learning_rate", 0.05, 0.4, 0.01),
        # "learning_rate": hp.loguniform("learning_rate", np.log(0.04), np.log(0.5)),
        #
        "min_data_in_leaf": hp.choice('min_data_in_leaf', np.arange(200, 2000, 100, dtype=int)),
        #"is_unbalance": hp.choice("is_unbalance", [True])
    }

    def objective(hyperparams):
        model = lgb.train({**params, **hyperparams}, train_data, 300,
                          valid_data, early_stopping_rounds=45, verbose_eval=0)

        score = model.best_score["valid_0"][params["metric"]]

        # in classification, less is better
        return {'loss': -score, 'status': STATUS_OK}

    trials = Trials()
    best = hyperopt.fmin(fn=objective, space=space, trials=trials,
                         algo=tpe.suggest, max_evals=150, verbose=1,
                         rstate=np.random.RandomState(1))

    hyperparams = space_eval(space, best)
    log(f"auc = {-trials.best_trial['result']['loss']:0.4f} {hyperparams}")
    return hyperparams

Source File: automl.py From Kaggler with MIT License

7 votes

def optimize_hyperparam(self, X, y, test_size=.2, n_eval=100):
        X_trn, X_val, y_trn, y_val = train_test_split(X, y, test_size=test_size, shuffle=self.shuffle)

        def objective(hyperparams):
            model = XGBModel(n_estimators=self.n_est, **self.params, **hyperparams)
            model.fit(X=X_trn, y=y_trn,
                      eval_set=[(X_val, y_val)],
                      eval_metric=self.metric,
                      early_stopping_rounds=self.n_stop,
                      verbose=False)
            score = model.evals_result()['validation_0'][self.metric][model.best_iteration] * self.loss_sign

            return {'loss': score, 'status': STATUS_OK, 'model': model}

        trials = Trials()
        best = hyperopt.fmin(fn=objective, space=self.space, trials=trials,
                             algo=tpe.suggest, max_evals=n_eval, verbose=1,
                             rstate=self.random_state)

        hyperparams = space_eval(self.space, best)
        return hyperparams, trials

Source File: automl.py From Kaggler with MIT License

7 votes

def optimize_hyperparam(self, X, y, test_size=.2, n_eval=100):
        X_trn, X_val, y_trn, y_val = train_test_split(X, y, test_size=test_size, shuffle=self.shuffle)

        train_data = lgb.Dataset(X_trn, label=y_trn)
        valid_data = lgb.Dataset(X_val, label=y_val)

        def objective(hyperparams):
            model = lgb.train({**self.params, **hyperparams}, train_data, self.n_est,
                              valid_data, early_stopping_rounds=self.n_stop, verbose_eval=0)

            score = model.best_score["valid_0"][self.metric] * self.loss_sign

            return {'loss': score, 'status': STATUS_OK, 'model': model}

        trials = Trials()
        best = hyperopt.fmin(fn=objective, space=self.space, trials=trials,
                             algo=tpe.suggest, max_evals=n_eval, verbose=1,
                             rstate=self.random_state)

        hyperparams = space_eval(self.space, best)
        return hyperparams, trials

Source File: bayesian_optimizer.py From pykg2vec with MIT License

7 votes

def optimize(self):
        """Function that performs bayesian optimization"""
        trials = Trials()

        self._best_result = fmin(fn=self._get_loss, space=self.search_space, trials=trials,
                                 algo=tpe.suggest, max_evals=self.max_evals)
        
        columns = list(self.search_space.keys())   
        results = pd.DataFrame(columns=['iteration'] + columns + ['loss'])
        
        for idx, trial in enumerate(trials.trials):
            row = [idx]
            translated_eval = space_eval(self.search_space, {k: v[0] for k, v in trial['misc']['vals'].items()})
            for k in columns:
                row.append(translated_eval[k])
            row.append(trial['result']['loss'])
            results.loc[idx] = row

        path = self.config_local.path_result / self.model_name
        path.mkdir(parents=True, exist_ok=True)
        results.to_csv(str(path / "trials.csv"), index=False)
        
        self._logger.info(results)
        self._logger.info('Found golden setting:')
        self._logger.info(space_eval(self.search_space, self._best_result))

Source File: task.py From HRERE with MIT License

6 votes

def run(self):
        trials = Trials()
        best = fmin(self._obj, self.model_param_space._build_space(),
                tpe.suggest, self.max_evals, trials)
        best_params = space_eval(self.model_param_space._build_space(), best)
        best_params = self.model_param_space._convert_into_param(best_params)
        trial_loss = np.asarray(trials.losses(), dtype=float)
        best_ind = np.argmin(trial_loss)
        best_ap = trial_loss[best_ind]
        best_loss = trials.trial_attachments(trials.trials[best_ind])["loss"]
        best_acc = trials.trial_attachments(trials.trials[best_ind])["acc"]
        self.logger.info("-" * 50)
        self.logger.info("Best Average Precision: %.3f" % best_ap)
        self.logger.info("with Loss %.3f, Accuracy %.3f" % (best_loss, best_acc))
        self.logger.info("Best Param:")
        self.task._print_param_dict(best_params)
        self.logger.info("-" * 50)

Source File: automl.py From KDDCup2019_admin with MIT License

5 votes

def hyperopt_lightgbm(X_train: pd.DataFrame, y_train: pd.Series, params: Dict, config: Config, max_evals=10):
    X_train, X_test, y_train, y_test = data_split_by_time(X_train, y_train, test_size=0.2)
    X_train, X_val, y_train, y_val = data_split_by_time(X_train, y_train, test_size=0.3)
    train_data = lgb.Dataset(X_train, label=y_train)
    valid_data = lgb.Dataset(X_val, label=y_val)

    space = {
        "learning_rate": hp.loguniform("learning_rate", np.log(0.01), np.log(0.5)),
        #"max_depth": hp.choice("max_depth", [-1, 2, 3, 4, 5, 6]),
        "max_depth": hp.choice("max_depth", [1, 2, 3, 4, 5, 6]),
        "num_leaves": hp.choice("num_leaves", np.linspace(10, 200, 50, dtype=int)),
        "feature_fraction": hp.quniform("feature_fraction", 0.5, 1.0, 0.1),
        "bagging_fraction": hp.quniform("bagging_fraction", 0.5, 1.0, 0.1),
        "bagging_freq": hp.choice("bagging_freq", np.linspace(0, 50, 10, dtype=int)),
        "reg_alpha": hp.uniform("reg_alpha", 0, 2),
        "reg_lambda": hp.uniform("reg_lambda", 0, 2),
        "min_child_weight": hp.uniform('min_child_weight', 0.5, 10),
    }

    def objective(hyperparams):
        if config.time_left() < 50:
            return {'status': STATUS_FAIL}
        else:
            model = lgb.train({**params, **hyperparams}, train_data, 100,
                          valid_data, early_stopping_rounds=10, verbose_eval=0)
            pred = model.predict(X_test)
            score = roc_auc_score(y_test, pred)

            #score = model.best_score["valid_0"][params["metric"]]

            # in classification, less is better
            return {'loss': -score, 'status': STATUS_OK}

    trials = Trials()
    best = hyperopt.fmin(fn=objective, space=space, trials=trials,
                         algo=tpe.suggest, max_evals=max_evals, verbose=1,
                         rstate=np.random.RandomState(1))

    hyperparams = space_eval(space, best)
    log(f"auc = {-trials.best_trial['result']['loss']:0.4f} {hyperparams}")
    return hyperparams

Source File: bayesian_optimizer.py From pykg2vec with MIT License

5 votes

def return_best(self):
        """Function to return the best hyper-parameters"""
        assert hasattr(self, '_best_result') is True, 'Cannot find golden setting. Has optimize() been called?'
        return space_eval(self.search_space, self._best_result)

Source File: task.py From kaggle-HomeDepot with MIT License

5 votes

def run(self):
        start = time.time()
        trials = Trials()
        best = fmin(self._obj, self.model_param_space._build_space(), tpe.suggest, self.max_evals, trials)
        best_params = space_eval(self.model_param_space._build_space(), best)
        best_params = self.model_param_space._convert_int_param(best_params)
        trial_rmses = np.asarray(trials.losses(), dtype=float)
        best_ind = np.argmin(trial_rmses)
        best_rmse_mean = trial_rmses[best_ind]
        best_rmse_std = trials.trial_attachments(trials.trials[best_ind])["std"]
        self.logger.info("-"*50)
        self.logger.info("Best RMSE")
        self.logger.info("      Mean: %.6f"%best_rmse_mean)
        self.logger.info("      std: %.6f"%best_rmse_std)
        self.logger.info("Best param")
        self.task._print_param_dict(best_params)
        end = time.time()
        _sec = end - start
        _min = int(_sec/60.)
        self.logger.info("Time")
        if _min > 0:
            self.logger.info("      %d mins"%_min)
        else:
            self.logger.info("      %d secs"%_sec)
        self.logger.info("-"*50)


#------------------------ Main -------------------------

Source File: gaussian_process.py From PES-Learn with BSD 3-Clause "New" or "Revised" License

5 votes

def optimize_model(self):
        print("Beginning hyperparameter optimization...")
        print("Trying {} combinations of hyperparameters".format(self.hp_maxit))
        print("Training with {} points (Full dataset contains {} points).".format(self.ntrain, self.n_datapoints))
        print("Using {} training set point sampling.".format(self.sampler))
        print("Errors are root-mean-square error in wavenumbers (cm-1)")
        self.hyperopt_trials = Trials()
        self.itercount = 1  # keep track of hyperopt iterations 
        if self.input_obj.keywords['rseed']:
            rstate = np.random.RandomState(self.input_obj.keywords['rseed'])
        else:
            rstate = None
        best = fmin(self.hyperopt_model,
                    space=self.hyperparameter_space,
                    algo=tpe.suggest,
                    max_evals=self.hp_maxit*2,
                    rstate=rstate, 
                    show_progressbar=False,
                    trials=self.hyperopt_trials)
        hyperopt_complete()
        print("Best performing hyperparameters are:")
        final = space_eval(self.hyperparameter_space, best)
        print(str(sorted(final.items())))
        self.optimal_hyperparameters  = dict(final)
        # obtain final model from best hyperparameters
        print("Fine-tuning final model architecture...")
        self.build_model(self.optimal_hyperparameters, nrestarts=10, maxit=1000)
        print("Final model performance (cm-1):")
        self.test_error = self.vet_model(self.model)
        self.save_model(self.optimal_hyperparameters)

Source File: task.py From NFETC with MIT License

5 votes

def run(self):
        trials = Trials()
        best = fmin(self._obj, self.model_param_space._build_space(), tpe.suggest, self.max_evals, trials)
        best_params = space_eval(self.model_param_space._build_space(), best)
        best_params = self.model_param_space._convert_into_param(best_params)
        trial_loss = np.asarray(trials.losses(), dtype=float)
        best_ind = np.argmin(trial_loss)
        best_loss = -trial_loss[best_ind]
        best_pacc = trials.trial_attachments(trials.trials[best_ind])["pacc"]
        # best_eacc = trials.trial_attachments(trials.trials[best_ind])["eacc"]
        self.logger.info("-" * 50)
        self.logger.info("Best Exact Accuracy %.3f with Parital Accuracy %.3f" % (best_loss, best_pacc))
        self.logger.info("Best Param:")
        self.task._print_param_dict(best_params)
        self.logger.info("-" * 50)

Source File: utils.py From hyperas with MIT License

5 votes

def eval_hyperopt_space(space, vals):
    """
    Evaluate a set of parameter values within the hyperopt space.
    Optionally unpacks the values, if they are wrapped in lists.
    :param space: dict
        the hyperopt space dictionary
    :param vals: dict
        the values from a hyperopt trial
    :return: evaluated space
    """
    unpacked_vals = unpack_hyperopt_vals(vals)
    return space_eval(space, unpacked_vals)

Source File: cash.py From pyodds with MIT License

5 votes

def model_selector(self, max_evals=50):
        trials = Trials()
        best_clf = fmin(self.f, CUMULATIVE_SEARCH_SPACE, algo=tpe.suggest,
                        max_evals=max_evals, trials=trials)
        config = space_eval(CUMULATIVE_SEARCH_SPACE, best_clf)
        print(config)
        return construct_classifier(config)

Source File: automl.py From KDDCup2019_admin with MIT License

4 votes

def hyperopt_lightgbm_basic(X, y, params, config, max_evals=50):
    X_train, X_test, y_train, y_test = data_split_by_time(X, y, test_size=0.2)
    X_train, X_val, y_train, y_val = data_split_by_time(X, y, test_size=0.3)
    train_data = lgb.Dataset(X_train, label=y_train)
    val_data = lgb.Dataset(X_val, label=y_val)

    space = {
        "learning_rate": hp.loguniform("learning_rate", np.log(0.01), np.log(0.5)),
        #"forgetting_factor": hp.loguniform("forgetting_factor", 0.01, 0.1)
        #"max_depth": hp.choice("max_depth", [-1, 2, 3, 4, 5, 6]),
        "max_depth": hp.choice("max_depth", [1, 2, 3, 4, 5, 6]),
        "num_leaves": hp.choice("num_leaves", np.linspace(10, 200, 50, dtype=int)),
        "feature_fraction": hp.quniform("feature_fraction", 0.5, 1.0, 0.1),
        "bagging_fraction": hp.quniform("bagging_fraction", 0.5, 1.0, 0.1),
        "bagging_freq": hp.choice("bagging_freq", np.linspace(0, 50, 10, dtype=int)),
        "reg_alpha": hp.uniform("reg_alpha", 0, 2),
        "reg_lambda": hp.uniform("reg_lambda", 0, 2),
        "min_child_weight": hp.uniform('min_child_weight', 0.5, 10),
    }

    def objective(hyperparams):
        model = lgb.train({**params, **hyperparams}, train_data, 100,
                        val_data, early_stopping_rounds=30, verbose_eval=0)
        pred = model.predict(X_test)
        score = roc_auc_score(y_test, pred)
        return {'loss': -score, 'status': STATUS_OK}

    trials = Trials()
    best = hyperopt.fmin(fn=objective, space=space, trials=trials,
                         algo=tpe.suggest, max_evals=max_evals, verbose=1,
                         rstate=np.random.RandomState(1))

    hyperparams = space_eval(space, best)
    log(f"auc = {-trials.best_trial['result']['loss']:0.4f} {hyperparams}")
    return hyperparams

Source File: neural_network.py From PES-Learn with BSD 3-Clause "New" or "Revised" License

4 votes

def optimize_model(self):
        if not self.input_obj.keywords['validation_points']:
            print("Number of validation points not specified. Splitting test set in half --> 50% test, 50% validation")
        print("Training with {} points. Validating with {} points. Full dataset contains {} points.".format(self.ntrain, self.nvalid, self.n_datapoints))
        print("Using {} training set point sampling.".format(self.sampler))
        print("Errors are root-mean-square error in wavenumbers (cm-1)")
        print("\nPerforming neural architecture search...\n")
        best_hlayers = self.neural_architecture_search(trial_layers = self.trial_layers)
        print("\nNeural architecture search complete. Best hidden layer structures: {}\n".format(best_hlayers))
        print("Beginning hyperparameter optimization...")
        print("Trying {} combinations of hyperparameters".format(self.hp_maxit))
        self.set_hyperparameter('layers', hp.choice('layers', best_hlayers))
        self.hyperopt_trials = Trials()
        self.itercount = 1
        if self.input_obj.keywords['rseed']:
            rstate = np.random.RandomState(self.input_obj.keywords['rseed'])
        else:
            rstate = None
        best = fmin(self.hyperopt_model,
                    space=self.hyperparameter_space,
                    algo=tpe.suggest,
                    max_evals=self.hp_maxit*2,
                    rstate=rstate, 
                    show_progressbar=False,
                    trials=self.hyperopt_trials)
        hyperopt_complete()
        print("Best performing hyperparameters are:")
        final = space_eval(self.hyperparameter_space, best)
        print(str(sorted(final.items())))
        self.optimal_hyperparameters  = dict(final)
        print("Optimizing learning rate...")
        
        if self.input_obj.keywords['nn_precision'] == 64:
            precision = 64
        elif self.input_obj.keywords['nn_precision'] == 32:
            precision = 32
        else: 
            precision = 32
        learning_rates = [1.0, 0.8, 0.6, 0.5, 0.4, 0.2]
        val_errors = []
        for i in learning_rates:
            self.optimal_hyperparameters['lr'] = i
            test_error, val_error = self.build_model(self.optimal_hyperparameters, maxit=5000, val_freq=10, es_patience=5, opt='lbfgs', tol=0.5,  decay=False, verbose=False, precision=precision)
            val_errors.append(val_error)
        best_lr = learning_rates[np.argsort(val_errors)[0]]
        self.optimal_hyperparameters['lr'] = best_lr
        print("Fine-tuning final model...")
        model, test_error, val_error, full_error = self.build_model(self.optimal_hyperparameters, maxit=5000, val_freq=1, es_patience=100, opt='lbfgs', tol=0.1,  decay=True, verbose=True,precision=precision,return_model=True)
        performance = [test_error, val_error, full_error]
        print("Model optimization complete. Saving final model...")
        self.save_model(self.optimal_hyperparameters, model, performance)

Python hyperopt.space_eval() Examples