Python lightgbm.Booster() Examples

The following are 29 code examples of lightgbm.Booster(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module lightgbm , or try the search function .
Example #1
Source File: lightgbm.py    From talkingdata-adtracking-fraud-detection with MIT License 6 votes vote down vote up
def train_and_predict(self, train, valid, weight, categorical_features: List[str], target: str, params: dict) \
            -> Tuple[Booster, dict]:
        if type(train) != pd.DataFrame or type(valid) != pd.DataFrame:
            raise ValueError('Parameter train and valid must be pandas.DataFrame')

        if list(train.columns) != list(valid.columns):
            raise ValueError('Train and valid must have a same column list')

        predictors = train.columns.drop(target)
        if weight is None:
            d_train = lgb.Dataset(train[predictors], label=train[target].values)
        else:
            print(weight)
            d_train = lgb.Dataset(train[predictors], label=train[target].values, weight=weight)
        d_valid = lgb.Dataset(valid[predictors], label=valid[target].values)

        eval_results = {}
        model: Booster = lgb.train(params['model_params'],
                                   d_train,
                                   categorical_feature=categorical_features,
                                   valid_sets=[d_train, d_valid],
                                   valid_names=['train', 'valid'],
                                   evals_result=eval_results,
                                   **params['train_params'])
        return model, eval_results 
Example #2
Source File: EndgameEmber.py    From multiscanner with Mozilla Public License 2.0 6 votes vote down vote up
def check(conf=DEFAULTCONF):
    if not conf['ENABLED']:
        return False
    if not has_ember:
        return False

    if not Path(conf['path-to-model']).is_file():
        print("'{}' does not exist. Check config.ini for model location.".format(conf['path-to-model']))
        return False

    try:
        global LGBM_MODEL
        LGBM_MODEL = lgb.Booster(model_file=conf['path-to-model'])
    except lgb.LightGBMError as e:
        print("Unable to load model, {}. ({})".format(conf['path-to-model'], e))
        return False

    return True 
Example #3
Source File: PixelClassifier.py    From sentinel2-cloud-detector with Creative Commons Attribution Share Alike 4.0 International 6 votes vote down vote up
def image_predict_proba(self, X, **kwargs):
        """
        Predicts class probabilities for the entire image.

        :param X: Array of images to be classified.
        :type X: numpy array, shape = [n_images, n_pixels_y, n_pixels_x, n_bands]
        :param kwargs: Any keyword arguments that will be passed to the classifier's prediction method
        :return: classification probability map
        :rtype: numpy array, [n_samples, n_pixels_y, n_pixels_x]
        """
        pixels = self.extract_pixels(X)

        if isinstance(self.classifier, Booster):
            probabilities = self.classifier.predict(pixels, **kwargs)
            probabilities = np.vstack((1. - probabilities, probabilities)).transpose()
        else:
            probabilities = self.classifier.predict_proba(pixels, **kwargs)

        return probabilities.reshape(X.shape[0], X.shape[1], X.shape[2], probabilities.shape[1]) 
Example #4
Source File: PixelClassifier.py    From sentinel2-cloud-detector with Creative Commons Attribution Share Alike 4.0 International 6 votes vote down vote up
def image_predict(self, X, **kwargs):
        """
        Predicts class labels for the entire image.

        :param X: Array of images to be classified.
        :type X: numpy array, shape = [n_images, n_pixels_y, n_pixels_x, n_bands]
        :param kwargs: Any keyword arguments that will be passed to the classifier's prediction method
        :return: raster classification map
        :rtype: numpy array, [n_samples, n_pixels_y, n_pixels_x]
        """
        pixels = self.extract_pixels(X)

        if isinstance(self.classifier, Booster):
            raise NotImplementedError('An instance of lightgbm.Booster can only return prediction probabilities, '
                                      'use PixelClassifier.image_predict_proba instead')

        predictions = self.classifier.predict(pixels, **kwargs)

        return predictions.reshape(X.shape[0], X.shape[1], X.shape[2]) 
Example #5
Source File: atpe_optimizer.py    From hypermax with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def __init__(self):
        scalingModelData = json.loads(pkg_resources.resource_string(__name__, "../atpe_models/scaling_model.json"))
        self.featureScalingModels = {}
        for key in self.atpeModelFeatureKeys:
            self.featureScalingModels[key] = sklearn.preprocessing.StandardScaler()
            self.featureScalingModels[key].scale_ = numpy.array(scalingModelData[key]['scales'])
            self.featureScalingModels[key].mean_ = numpy.array(scalingModelData[key]['means'])
            self.featureScalingModels[key].var_ = numpy.array(scalingModelData[key]['variances'])

        self.parameterModels = {}
        self.parameterModelConfigurations = {}
        for param in self.atpeParameters:
            modelData = pkg_resources.resource_string(__name__, "../atpe_models/model-" + param + '.txt')
            with hypermax.file_utils.ClosedNamedTempFile(modelData) as model_file_name:
                self.parameterModels[param] = lightgbm.Booster(model_file=model_file_name)

            configString = pkg_resources.resource_string(__name__, "../atpe_models/model-" + param + '-configuration.json')
            data = json.loads(configString)
            self.parameterModelConfigurations[param] = data

        self.lastATPEParameters = None
        self.lastLockedParameters = []
        self.atpeParamDetails = None 
Example #6
Source File: optimize.py    From optuna with MIT License 6 votes vote down vote up
def _get_booster_best_score(self, booster: "lgb.Booster") -> float:

        metric = self._get_metric_for_objective()
        valid_sets = self.lgbm_kwargs.get("valid_sets")  # type: Optional[VALID_SET_TYPE]

        if self.lgbm_kwargs.get("valid_names") is not None:
            if type(self.lgbm_kwargs["valid_names"]) is str:
                valid_name = self.lgbm_kwargs["valid_names"]
            elif type(self.lgbm_kwargs["valid_names"]) in [list, tuple]:
                valid_name = self.lgbm_kwargs["valid_names"][-1]
            else:
                raise NotImplementedError

        elif type(valid_sets) is lgb.Dataset:
            valid_name = "valid_0"

        elif isinstance(valid_sets, (list, tuple)) and len(valid_sets) > 0:
            valid_set_idx = len(valid_sets) - 1
            valid_name = "valid_{}".format(valid_set_idx)

        else:
            raise NotImplementedError

        val_score = booster.best_score[valid_name][metric]
        return val_score 
Example #7
Source File: lightgbm.py    From mlflow with Apache License 2.0 6 votes vote down vote up
def load_model(model_uri):
    """
    Load a LightGBM model from a local file or a run.

    :param model_uri: The location, in URI format, of the MLflow model. For example:

                      - ``/Users/me/path/to/local/model``
                      - ``relative/path/to/local/model``
                      - ``s3://my_bucket/path/to/model``
                      - ``runs:/<mlflow_run_id>/run-relative/path/to/model``

                      For more information about supported URI schemes, see
                      `Referencing Artifacts <https://www.mlflow.org/docs/latest/tracking.html#
                      artifact-locations>`_.

    :return: A LightGBM model (an instance of `lightgbm.Booster`_).
    """
    local_model_path = _download_artifact_from_uri(artifact_uri=model_uri)
    flavor_conf = _get_flavor_configuration(model_path=local_model_path, flavor_name=FLAVOR_NAME)
    lgb_model_file_path = os.path.join(local_model_path, flavor_conf.get("data", "model.lgb"))
    return _load_model(path=lgb_model_file_path) 
Example #8
Source File: lightgbm_model_artifact.py    From BentoML with Apache License 2.0 6 votes vote down vote up
def __init__(self, spec, model):

        super(_LightGBMModelArtifactWrapper, self).__init__(spec)

        try:
            import lightgbm as lgb
        except ImportError:
            raise MissingDependencyException(
                "lightgbm package is required to use LightGBMModelArtifact"
            )

        if not isinstance(model, lgb.Booster):
            raise InvalidArgument(
                "Expect `model` argument to be a `lightgbm.Booster` instance"
            )

        self._model = model 
Example #9
Source File: models.py    From malware_evasion_competition with GNU Affero General Public License v3.0 6 votes vote down vote up
def __init__(self, model_path=EMBER_MODEL_PATH, thresh=0.8336, name='ember'):
        # load lightgbm model
        self.model = lgb.Booster(model_file=model_path)
        self.thresh = thresh
        self.__name__ = 'ember' 
Example #10
Source File: train_lightgbm.py    From jh-kaggle-util with Apache License 2.0 5 votes vote down vote up
def load_model(path,name):
        root = jhkaggle.jhkaggle_config['PATH']
        model_path = os.path.join(root,path)
        meta_filename = os.path.join(model_path,"meta.json")
        with open(meta_filename, 'r') as fp:
            meta = json.load(fp)
        result = TrainLightGBM(meta['data_source'],meta['params'],False)
        result.model = lgb.Booster(model_file=os.path.join(model_path,name+".txt"))
        return result 
Example #11
Source File: convert.py    From onnxmltools with MIT License 5 votes vote down vote up
def convert(model, name=None, initial_types=None, doc_string='', target_opset=None,
            targeted_onnx=onnx.__version__, custom_conversion_functions=None,
            custom_shape_calculators=None):
    '''
    This function produces an equivalent ONNX model of the given lightgbm model.
    The supported lightgbm modules are listed below.

    * `LGBMClassifiers <https://lightgbm.readthedocs.io/en/latest/pythonapi/lightgbm.LGBMClassifier.html>`_
    * `LGBMRegressor <https://lightgbm.readthedocs.io/en/latest/pythonapi/lightgbm.LGBMRegressor.html>`_
    * `Booster <https://lightgbm.readthedocs.io/en/latest/pythonapi/lightgbm.Booster.html>`_

    :param model: A LightGBM model
    :param initial_types: a python list. Each element is a tuple of a variable name and a type defined in data_types.py
    :param name: The name of the graph (type: GraphProto) in the produced ONNX model (type: ModelProto)
    :param doc_string: A string attached onto the produced ONNX model
    :param target_opset: number, for example, 7 for ONNX 1.2, and 8 for ONNX 1.3.
    :param targeted_onnx: A string (for example, '1.1.2' and '1.2') used to specify the targeted ONNX version of the
        produced model. If ONNXMLTools cannot find a compatible ONNX python package, an error may be thrown.
    :param custom_conversion_functions: a dictionary for specifying the user customized conversion function
    :param custom_shape_calculators: a dictionary for specifying the user customized shape calculator
    :return: An ONNX model (type: ModelProto) which is equivalent to the input lightgbm model
    '''
    if initial_types is None:
        raise ValueError('Initial types are required. See usage of convert(...) in '
                         'onnxmltools.convert.lightgbm.convert for details')
    if isinstance(model, lightgbm.Booster):
        model = WrappedBooster(model)
    if name is None:
        name = str(uuid4().hex)

    target_opset = target_opset if target_opset else get_maximum_opset_supported()
    topology = parse_lightgbm(model, initial_types, target_opset, custom_conversion_functions, custom_shape_calculators)
    topology.compile()
    onnx_model = convert_topology(topology, name, doc_string, target_opset, targeted_onnx)
    return onnx_model 
Example #12
Source File: tests_helper.py    From onnxmltools with MIT License 5 votes vote down vote up
def convert_model(model, name, input_types):
    """
    Runs the appropriate conversion method.

    :param model: model
    :return: *onnx* model
    """
    from sklearn.base import BaseEstimator
    if model.__class__.__name__.startswith("LGBM"):
        from onnxmltools.convert import convert_lightgbm
        model, prefix = convert_lightgbm(model, name, input_types), "LightGbm"
    elif model.__class__.__name__.startswith("XGB"):
        from onnxmltools.convert import convert_xgboost
        model, prefix = convert_xgboost(model, name, input_types), "XGB"
    elif model.__class__.__name__ == 'Booster':
        import lightgbm
        if isinstance(model, lightgbm.Booster):
            from onnxmltools.convert import convert_lightgbm
            model, prefix = convert_lightgbm(model, name, input_types), "LightGbm"
        else:
            raise RuntimeError("Unable to convert model of type '{0}'.".format(type(model)))
    elif model.__class__.__name__.startswith("CatBoost"):
        from onnxmltools.convert import convert_catboost
        model, prefix = convert_catboost(model, name, input_types), "CatBoost"
    elif isinstance(model, BaseEstimator):
        from onnxmltools.convert import convert_sklearn
        model, prefix = convert_sklearn(model, name, input_types), "Sklearn"
    else:
        from onnxmltools.convert import convert_coreml
        model, prefix = convert_coreml(model, name, input_types), "Cml"
    if model is None:
        raise RuntimeError("Unable to convert model of type '{0}'.".format(type(model)))
    return model, prefix 
Example #13
Source File: trainer.py    From autogbt-alt with MIT License 5 votes vote down vote up
def get_model(self, trial_id):
        model_dir = self.work_dir/str(trial_id)
        models = []
        for model_path in model_dir.glob('*.lgbm'):
            model = lgb.Booster(model_file=str(model_path))
            models.append(model)
        return AveragingLGBMClassifier(models) 
Example #14
Source File: S2PixelCloudDetector.py    From sentinel2-cloud-detector with Creative Commons Attribution Share Alike 4.0 International 5 votes vote down vote up
def classifier(self):
        """
        Provides a classifier object. It also loads it if it hasn't been loaded yet. This way the classifier is loaded
        only when it is actually required.
        """
        if self._classifier is None:
            self._classifier = PixelClassifier(Booster(model_file=self.model_filename))

        return self._classifier 
Example #15
Source File: lightgbm.py    From mljar-supervised with MIT License 5 votes vote down vote up
def load(self, model_file_path):
        logger.debug("LightgbmAlgorithm load model from %s" % model_file_path)
        self.model = lgb.Booster(model_file=model_file_path) 
Example #16
Source File: PixelClassifier.py    From sentinel2-cloud-detector with Creative Commons Attribution Share Alike 4.0 International 5 votes vote down vote up
def _check_classifier(classifier):
        """
        Checks if the classifier is of correct type or if it implements predict and predict_proba methods
        """
        if isinstance(classifier, Booster):
            return

        predict = getattr(classifier, 'predict', None)
        if not callable(predict):
            raise ValueError('Classifier does not have a predict method!')

        predict_proba = getattr(classifier, 'predict_proba', None)
        if not callable(predict_proba):
            raise ValueError('Classifier does not have a predict_proba method!') 
Example #17
Source File: PixelClassifier.py    From sentinel2-cloud-detector with Creative Commons Attribution Share Alike 4.0 International 5 votes vote down vote up
def __init__(self, classifier):
        """
        :param classifier: An instance of trained classifier that will be executed over an entire image
        :type classifier: Booster or object that implements methods predict and predict_proba
        """
        self._check_classifier(classifier)
        self.classifier = classifier 
Example #18
Source File: model.py    From ebonite with Apache License 2.0 5 votes vote down vote up
def dump(self, model: lgb.Booster) -> FilesContextManager:
        with tempfile.TemporaryDirectory(prefix='ebonite_lightgbm_dump') as f:
            path = os.path.join(f, self.model_path)
            model.save_model(path)
            yield Blobs({self.model_path: LocalFileBlob(path)}) 
Example #19
Source File: optimize.py    From optuna with MIT License 5 votes vote down vote up
def get_best_booster(self) -> "lgb.Booster":
        """Return the best booster.

        If the best booster cannot be found, :class:`ValueError` will be raised. To prevent the
        errors, please save boosters by specifying the ``model_dir`` arguments of
        :meth:`~optuna.integration.lightgbm.LightGBMTuner.__init__` when you resume tuning
        or you run tuning in parallel.
        """
        if self._best_booster_with_trial_number is not None:
            if self._best_booster_with_trial_number[1] == self.study.best_trial.number:
                return self._best_booster_with_trial_number[0]
        if len(self.study.trials) == 0:
            raise ValueError("The best booster is not available because no trials completed.")

        # The best booster exists, but this instance does not have it.
        # This may be due to resuming or parallelization.
        if self._model_dir is None:
            raise ValueError(
                "The best booster cannot be found. It may be found in the other processes due to "
                "resuming or distributed computing. Please set the `model_dir` argument of "
                "`LightGBMTuner.__init__` and make sure that boosters are shared with all "
                "processes."
            )

        best_trial = self.study.best_trial
        path = os.path.join(self._model_dir, "{}.pkl".format(best_trial.number))
        if not os.path.exists(path):
            raise ValueError(
                "The best booster cannot be found in {}. If you execute `LightGBMTuner` in "
                "distributed environment, please use network file system (e.g., NFS) to share "
                "models with multiple workers.".format(self._model_dir)
            )

        with open(path, "rb") as fin:
            booster = pickle.load(fin)

        return booster 
Example #20
Source File: optimize.py    From optuna with MIT License 5 votes vote down vote up
def best_booster(self) -> "lgb.Booster":
        """Return the best booster."""

        return self.get_best_booster() 
Example #21
Source File: optimize.py    From optuna with MIT License 5 votes vote down vote up
def __init__(
        self,
        target_param_names: List[str],
        lgbm_params: Dict[str, Any],
        train_set: "lgb.Dataset",
        lgbm_kwargs: Dict[str, Any],
        best_score: float,
        step_name: str,
        model_dir: Optional[str],
        pbar: Optional[tqdm.tqdm] = None,
    ):

        self.target_param_names = target_param_names
        self.pbar = pbar
        self.lgbm_params = lgbm_params
        self.lgbm_kwargs = lgbm_kwargs
        self.train_set = train_set

        self.trial_count = 0
        self.best_score = best_score
        self.best_booster_with_trial_number = None  # type: Optional[Tuple["lgb.Booster", int]]
        self.step_name = step_name
        self.model_dir = model_dir

        self._check_target_names_supported()
        self.pbar_fmt = "{}, val_score: {:.6f}" 
Example #22
Source File: model.py    From ebonite with Apache License 2.0 5 votes vote down vote up
def load(self, path):
        model_file = os.path.join(path, self.model_path)
        return lgb.Booster(model_file=model_file) 
Example #23
Source File: lightgbm_model_artifact.py    From BentoML with Apache License 2.0 5 votes vote down vote up
def load(self, path):
        try:
            import lightgbm as lgb
        except ImportError:
            raise MissingDependencyException(
                "lightgbm package is required to use LightGBMModelArtifact"
            )
        bst = lgb.Booster(model_file=self._model_file_path(path))

        return self.pack(bst) 
Example #24
Source File: lightgbm.py    From mlflow with Apache License 2.0 5 votes vote down vote up
def _load_model(path):
    import lightgbm as lgb
    return lgb.Booster(model_file=path) 
Example #25
Source File: simulation.py    From hypermax with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def executeLightGBMModel(params, model=None):
    global lightGBMModel
    if model == 'textextraction':
        if lightGBMModel is None:
            lightGBMModel = lgb.Booster(model_file='LightGBM_model_text_extraction.txt')

        vectorKeys = [# They are in this order for a reason - thats what was in our training data file.
            'layer_0.max_depth',
            'layer_0.min_data_in_leaf',
            'layer_0.boosting_rounds',
            'layer_1.input_window',
            'layer_0.num_leaves',
            'layer_1.min_data_in_leaf',
            'layer_1.boosting_rounds',
            'layer_1.learning_rate',
            'layer_1.num_leaves',
            'layer_0.bagging_fraction',
            'layer_1.max_depth',
            'layer_0.learning_rate',
            'layer_0.input_window',
            'layer_0.feature_fraction']

        vector = []
        for param in vectorKeys:
            vector.append(params[param])

        result = lightGBMModel.predict([vector])[0]

        return {"loss": result, "status": "ok"}
    elif model == 'cifar_resnet':
        if lightGBMModel is None:
            lightGBMModel = lgb.Booster(model_file='LightGBM_model_cifar_resnet.txt')

        vectorKeys = [# They are in this order for a reason - thats what was in our training data file.
            'activation',
            'layer1_layers',
            'layer1_size',
            'layer2_layers',
            'layer2_size',
            'layer3_layers',
            'layer3_size',
            'layer4_layers',
            'layer4_size',
            'learning_rate',
            'weight_decay'
        ]

        vector = []
        for param in vectorKeys:
            if param == 'activation':
                values = ['relu', 'elu', "selu", "rrelu"]
                if isinstance(params[param], str):
                    vector.append(values.index(params[param]))
                else:
                    vector.append(params[param])
            else:
                vector.append(params[param])

        result = lightGBMModel.predict([vector])[0]

        return {"loss": result, "status": "ok"} 
Example #26
Source File: lightgbm_model.py    From interpret-community with MIT License 4 votes vote down vote up
def _load(properties):
        """Load a LGBMExplainableModel from the given properties.

        :param properties: A serialized dictionary representation of the LGBMExplainableModel.
        :type properties: dict
        :return: The deserialized LGBMExplainableModel.
        :rtype: interpret_community.mimic.models.LGBMExplainableModel
        """
        # create the LGBMExplainableModel without any properties using the __new__ function, similar to pickle
        lightgbm = LGBMExplainableModel.__new__(LGBMExplainableModel)
        # Get _n_features
        _n_features = properties.pop(_N_FEATURES)
        # If classification case get _n_classes
        if json.loads(properties[LightGBMSerializationConstants.MULTICLASS]):
            _n_classes = properties.pop(_N_CLASSES)
        # load all of the properties
        for key, value in properties.items():
            # Regenerate the properties on the fly
            if key in LightGBMSerializationConstants.nonify_properties:
                if key == LightGBMSerializationConstants.LOGGER:
                    parent = logging.getLogger(__name__)
                    lightgbm_identity = json.loads(properties[LightGBMSerializationConstants.IDENTITY])
                    lightgbm.__dict__[key] = parent.getChild(lightgbm_identity)
                elif key == LightGBMSerializationConstants.TREE_EXPLAINER:
                    lightgbm.__dict__[key] = None
                else:
                    raise Exception("Unknown nonify key on deserialize in LightGBMExplainableModel: {}".format(key))
            elif key in LightGBMSerializationConstants.save_properties:
                # Load the booster from file and re-create the LGBMClassifier or LGBMRegressor
                # This is not recommended but can be necessary to get around pickle being not secure
                # See here for more info:
                # https://github.com/Microsoft/LightGBM/issues/1942
                # https://github.com/Microsoft/LightGBM/issues/1217
                booster_args = {LightGBMSerializationConstants.MODEL_STR: value}
                is_multiclass = json.loads(properties[LightGBMSerializationConstants.MULTICLASS])
                if is_multiclass:
                    objective = LightGBMSerializationConstants.MULTICLASS
                else:
                    objective = LightGBMSerializationConstants.REGRESSION
                if LightGBMSerializationConstants.MODEL_STR in inspect.getargspec(Booster).args:
                    extras = {LightGBMSerializationConstants.OBJECTIVE: objective}
                    lgbm_booster = Booster(**booster_args, params=extras)
                else:
                    # For backwards compatibility with older versions of lightgbm
                    booster_args[LightGBMSerializationConstants.OBJECTIVE] = objective
                    lgbm_booster = Booster(params=booster_args)
                if is_multiclass:
                    new_lgbm = LGBMClassifier()
                    new_lgbm._Booster = lgbm_booster
                    new_lgbm._n_classes = _n_classes
                else:
                    new_lgbm = LGBMRegressor()
                    new_lgbm._Booster = lgbm_booster
                new_lgbm._n_features = _n_features
                lightgbm.__dict__[key] = new_lgbm
            elif key in LightGBMSerializationConstants.enum_properties:
                # NOTE: If more enums added in future, will need to handle this differently
                lightgbm.__dict__[key] = ShapValuesOutput(json.loads(value))
            else:
                lightgbm.__dict__[key] = json.loads(value)
        return lightgbm 
Example #27
Source File: optimize.py    From optuna with MIT License 4 votes vote down vote up
def __init__(
        self,
        params: Dict[str, Any],
        train_set: "lgb.Dataset",
        num_boost_round: int = 1000,
        valid_sets: Optional["VALID_SET_TYPE"] = None,
        valid_names: Optional[Any] = None,
        fobj: Optional[Callable[..., Any]] = None,
        feval: Optional[Callable[..., Any]] = None,
        feature_name: str = "auto",
        categorical_feature: str = "auto",
        early_stopping_rounds: Optional[int] = None,
        evals_result: Optional[Dict[Any, Any]] = None,
        verbose_eval: Optional[Union[bool, int]] = True,
        learning_rates: Optional[List[float]] = None,
        keep_training_booster: Optional[bool] = False,
        callbacks: Optional[List[Callable[..., Any]]] = None,
        time_budget: Optional[int] = None,
        sample_size: Optional[int] = None,
        study: Optional[optuna.study.Study] = None,
        optuna_callbacks: Optional[List[Callable[[Study, FrozenTrial], None]]] = None,
        model_dir: Optional[str] = None,
        verbosity: Optional[int] = 1,
    ) -> None:

        super(LightGBMTuner, self).__init__(
            params,
            train_set,
            num_boost_round=num_boost_round,
            fobj=fobj,
            feval=feval,
            feature_name=feature_name,
            categorical_feature=categorical_feature,
            early_stopping_rounds=early_stopping_rounds,
            verbose_eval=verbose_eval,
            callbacks=callbacks,
            time_budget=time_budget,
            sample_size=sample_size,
            study=study,
            optuna_callbacks=optuna_callbacks,
            verbosity=verbosity,
        )

        self.lgbm_kwargs["valid_sets"] = valid_sets
        self.lgbm_kwargs["valid_names"] = valid_names
        self.lgbm_kwargs["evals_result"] = evals_result
        self.lgbm_kwargs["learning_rates"] = learning_rates
        self.lgbm_kwargs["keep_training_booster"] = keep_training_booster

        self._best_booster_with_trial_number = None  # type: Optional[Tuple[lgb.Booster, int]]
        self._model_dir = model_dir

        if self._model_dir is not None and not os.path.exists(self._model_dir):
            os.mkdir(self._model_dir)

        if valid_sets is None:
            raise ValueError("`valid_sets` is required.") 
Example #28
Source File: lightgbm.py    From mlflow with Apache License 2.0 4 votes vote down vote up
def log_model(lgb_model, artifact_path, conda_env=None, registered_model_name=None,
              signature: ModelSignature=None, input_example: ModelInputExample=None,
              **kwargs):
    """
    Log a LightGBM model as an MLflow artifact for the current run.

    :param lgb_model: LightGBM model (an instance of `lightgbm.Booster`_) to be saved.
                      Note that models that implement the `scikit-learn API`_  are not supported.
    :param artifact_path: Run-relative artifact path.
    :param conda_env: Either a dictionary representation of a Conda environment or the path to a
                      Conda environment yaml file. If provided, this describes the environment
                      this model should be run in. At minimum, it should specify the dependencies
                      contained in :func:`get_default_conda_env()`. If ``None``, the default
                      :func:`get_default_conda_env()` environment is added to the model.
                      The following is an *example* dictionary representation of a Conda
                      environment::

                        {
                            'name': 'mlflow-env',
                            'channels': ['defaults'],
                            'dependencies': [
                                'python=3.7.0',
                                'pip': [
                                    'lightgbm==2.3.0'
                                ]
                            ]
                        }
    :param registered_model_name: (Experimental) If given, create a model version under
                                  ``registered_model_name``, also creating a registered model if one
                                  with the given name does not exist.

    :param signature: (Experimental) :py:class:`ModelSignature <mlflow.models.ModelSignature>`
                      describes model input and output :py:class:`Schema <mlflow.types.Schema>`.
                      The model signature can be :py:func:`inferred <mlflow.models.infer_signature>`
                      from datasets with valid model input (e.g. the training dataset with target
                      column omitted) and valid model output (e.g. model predictions generated on
                      the training dataset), for example:

                      .. code-block:: python

                        from mlflow.models.signature import infer_signature
                        train = df.drop_column("target_label")
                        predictions = ... # compute model predictions
                        signature = infer_signature(train, predictions)
    :param input_example: (Experimental) Input example provides one or several instances of valid
                          model input. The example can be used as a hint of what data to feed the
                          model. The given example will be converted to a Pandas DataFrame and then
                          serialized to json using the Pandas split-oriented format. Bytes are
                          base64-encoded.

    :param kwargs: kwargs to pass to `lightgbm.Booster.save_model`_ method.
    """
    Model.log(artifact_path=artifact_path, flavor=mlflow.lightgbm,
              registered_model_name=registered_model_name,
              lgb_model=lgb_model, conda_env=conda_env,
              signature=signature, input_example=input_example,
              **kwargs) 
Example #29
Source File: engines.py    From santander-product-recommendation-8th-place with MIT License 4 votes vote down vote up
def lightgbm(XY_train, XY_validate, test_df, features, XY_all=None, restore=False):
    train = lgbm.Dataset(XY_train[list(features)], label=XY_train["y"], weight=XY_train["weight"], feature_name=features)
    validate = lgbm.Dataset(XY_validate[list(features)], label=XY_validate["y"], weight=XY_validate["weight"], feature_name=features, reference=train)

    params = {
        'task' : 'train',
        'boosting_type' : 'gbdt',
        'objective' : 'multiclass',
        'num_class': 24,
        'metric' : {'multi_logloss'},
        'is_training_metric': True,
        'max_bin': 255,
        'num_leaves' : 64,
        'learning_rate' : 0.1,
        'feature_fraction' : 0.8,
        'min_data_in_leaf': 10,
        'min_sum_hessian_in_leaf': 5,
        # 'num_threads': 16,
    }
    print(params)

    if not restore:
        with Timer("train lightgbm_lib"):
            model = lgbm.train(params, train, num_boost_round=1000, valid_sets=validate, early_stopping_rounds=20)
            best_iteration = model.best_iteration
            model.save_model("tmp/lgbm.model.txt")
            pickle.dump(best_iteration, open("tmp/lgbm.model.meta", "wb"))
    else:
        with Timer("restore lightgbm_lib model"):
            model = lgbm.Booster(model_file="tmp/lgbm.model.txt")
            best_iteration = pickle.load(open("tmp/lgbm.model.meta", "rb"))

    if XY_all is not None:
        best_iteration = int(best_iteration * len(XY_all) / len(XY_train))
        all_train = lgbm.Dataset(XY_all[list(features)], label=XY_all["y"], weight=XY_all["weight"], feature_name=features)
        with Timer("retrain lightgbm_lib with all data"):
            model = lgbm.train(params, all_train, num_boost_round=best_iteration)
        model.save_model("tmp/lgbm.all.model.txt")

    print("Feature importance by split:")
    for kv in sorted([(k,v) for k,v in zip(features, model.feature_importance("split"))], key=lambda kv: kv[1], reverse=True):
        print(kv)
    print("Feature importance by gain:")
    for kv in sorted([(k,v) for k,v in zip(features, model.feature_importance("gain"))], key=lambda kv: kv[1], reverse=True):
        print(kv)

    return model.predict(test_df[list(features)], num_iteration=best_iteration)