Python Examples of lightgbm.Booster

Source File: lightgbm.py From talkingdata-adtracking-fraud-detection with MIT License

6 votes

def train_and_predict(self, train, valid, weight, categorical_features: List[str], target: str, params: dict) \
            -> Tuple[Booster, dict]:
        if type(train) != pd.DataFrame or type(valid) != pd.DataFrame:
            raise ValueError('Parameter train and valid must be pandas.DataFrame')

        if list(train.columns) != list(valid.columns):
            raise ValueError('Train and valid must have a same column list')

        predictors = train.columns.drop(target)
        if weight is None:
            d_train = lgb.Dataset(train[predictors], label=train[target].values)
        else:
            print(weight)
            d_train = lgb.Dataset(train[predictors], label=train[target].values, weight=weight)
        d_valid = lgb.Dataset(valid[predictors], label=valid[target].values)

        eval_results = {}
        model: Booster = lgb.train(params['model_params'],
                                   d_train,
                                   categorical_feature=categorical_features,
                                   valid_sets=[d_train, d_valid],
                                   valid_names=['train', 'valid'],
                                   evals_result=eval_results,
                                   **params['train_params'])
        return model, eval_results

Source File: EndgameEmber.py From multiscanner with Mozilla Public License 2.0

6 votes

def check(conf=DEFAULTCONF):
    if not conf['ENABLED']:
        return False
    if not has_ember:
        return False

    if not Path(conf['path-to-model']).is_file():
        print("'{}' does not exist. Check config.ini for model location.".format(conf['path-to-model']))
        return False

    try:
        global LGBM_MODEL
        LGBM_MODEL = lgb.Booster(model_file=conf['path-to-model'])
    except lgb.LightGBMError as e:
        print("Unable to load model, {}. ({})".format(conf['path-to-model'], e))
        return False

    return True

Source File: PixelClassifier.py From sentinel2-cloud-detector with Creative Commons Attribution Share Alike 4.0 International

6 votes

def image_predict_proba(self, X, **kwargs):
        """
        Predicts class probabilities for the entire image.

        :param X: Array of images to be classified.
        :type X: numpy array, shape = [n_images, n_pixels_y, n_pixels_x, n_bands]
        :param kwargs: Any keyword arguments that will be passed to the classifier's prediction method
        :return: classification probability map
        :rtype: numpy array, [n_samples, n_pixels_y, n_pixels_x]
        """
        pixels = self.extract_pixels(X)

        if isinstance(self.classifier, Booster):
            probabilities = self.classifier.predict(pixels, **kwargs)
            probabilities = np.vstack((1. - probabilities, probabilities)).transpose()
        else:
            probabilities = self.classifier.predict_proba(pixels, **kwargs)

        return probabilities.reshape(X.shape[0], X.shape[1], X.shape[2], probabilities.shape[1])

Source File: PixelClassifier.py From sentinel2-cloud-detector with Creative Commons Attribution Share Alike 4.0 International

6 votes

def image_predict(self, X, **kwargs):
        """
        Predicts class labels for the entire image.

        :param X: Array of images to be classified.
        :type X: numpy array, shape = [n_images, n_pixels_y, n_pixels_x, n_bands]
        :param kwargs: Any keyword arguments that will be passed to the classifier's prediction method
        :return: raster classification map
        :rtype: numpy array, [n_samples, n_pixels_y, n_pixels_x]
        """
        pixels = self.extract_pixels(X)

        if isinstance(self.classifier, Booster):
            raise NotImplementedError('An instance of lightgbm.Booster can only return prediction probabilities, '
                                      'use PixelClassifier.image_predict_proba instead')

        predictions = self.classifier.predict(pixels, **kwargs)

        return predictions.reshape(X.shape[0], X.shape[1], X.shape[2])

Source File: atpe_optimizer.py From hypermax with BSD 3-Clause "New" or "Revised" License

6 votes

def __init__(self):
        scalingModelData = json.loads(pkg_resources.resource_string(__name__, "../atpe_models/scaling_model.json"))
        self.featureScalingModels = {}
        for key in self.atpeModelFeatureKeys:
            self.featureScalingModels[key] = sklearn.preprocessing.StandardScaler()
            self.featureScalingModels[key].scale_ = numpy.array(scalingModelData[key]['scales'])
            self.featureScalingModels[key].mean_ = numpy.array(scalingModelData[key]['means'])
            self.featureScalingModels[key].var_ = numpy.array(scalingModelData[key]['variances'])

        self.parameterModels = {}
        self.parameterModelConfigurations = {}
        for param in self.atpeParameters:
            modelData = pkg_resources.resource_string(__name__, "../atpe_models/model-" + param + '.txt')
            with hypermax.file_utils.ClosedNamedTempFile(modelData) as model_file_name:
                self.parameterModels[param] = lightgbm.Booster(model_file=model_file_name)

            configString = pkg_resources.resource_string(__name__, "../atpe_models/model-" + param + '-configuration.json')
            data = json.loads(configString)
            self.parameterModelConfigurations[param] = data

        self.lastATPEParameters = None
        self.lastLockedParameters = []
        self.atpeParamDetails = None

Source File: optimize.py From optuna with MIT License

6 votes

def _get_booster_best_score(self, booster: "lgb.Booster") -> float:

        metric = self._get_metric_for_objective()
        valid_sets = self.lgbm_kwargs.get("valid_sets")  # type: Optional[VALID_SET_TYPE]

        if self.lgbm_kwargs.get("valid_names") is not None:
            if type(self.lgbm_kwargs["valid_names"]) is str:
                valid_name = self.lgbm_kwargs["valid_names"]
            elif type(self.lgbm_kwargs["valid_names"]) in [list, tuple]:
                valid_name = self.lgbm_kwargs["valid_names"][-1]
            else:
                raise NotImplementedError

        elif type(valid_sets) is lgb.Dataset:
            valid_name = "valid_0"

        elif isinstance(valid_sets, (list, tuple)) and len(valid_sets) > 0:
            valid_set_idx = len(valid_sets) - 1
            valid_name = "valid_{}".format(valid_set_idx)

        else:
            raise NotImplementedError

        val_score = booster.best_score[valid_name][metric]
        return val_score

Source File: lightgbm.py From mlflow with Apache License 2.0

6 votes

def load_model(model_uri):
    """
    Load a LightGBM model from a local file or a run.

    :param model_uri: The location, in URI format, of the MLflow model. For example:

                      - ``/Users/me/path/to/local/model``
                      - ``relative/path/to/local/model``
                      - ``s3://my_bucket/path/to/model``
                      - ``runs:/<mlflow_run_id>/run-relative/path/to/model``

                      For more information about supported URI schemes, see
                      `Referencing Artifacts <https://www.mlflow.org/docs/latest/tracking.html#
                      artifact-locations>`_.

    :return: A LightGBM model (an instance of `lightgbm.Booster`_).
    """
    local_model_path = _download_artifact_from_uri(artifact_uri=model_uri)
    flavor_conf = _get_flavor_configuration(model_path=local_model_path, flavor_name=FLAVOR_NAME)
    lgb_model_file_path = os.path.join(local_model_path, flavor_conf.get("data", "model.lgb"))
    return _load_model(path=lgb_model_file_path)

Source File: lightgbm_model_artifact.py From BentoML with Apache License 2.0

6 votes

def __init__(self, spec, model):

        super(_LightGBMModelArtifactWrapper, self).__init__(spec)

        try:
            import lightgbm as lgb
        except ImportError:
            raise MissingDependencyException(
                "lightgbm package is required to use LightGBMModelArtifact"
            )

        if not isinstance(model, lgb.Booster):
            raise InvalidArgument(
                "Expect `model` argument to be a `lightgbm.Booster` instance"
            )

        self._model = model

Source File: models.py From malware_evasion_competition with GNU Affero General Public License v3.0

6 votes

def __init__(self, model_path=EMBER_MODEL_PATH, thresh=0.8336, name='ember'):
        # load lightgbm model
        self.model = lgb.Booster(model_file=model_path)
        self.thresh = thresh
        self.__name__ = 'ember'

Source File: train_lightgbm.py From jh-kaggle-util with Apache License 2.0

5 votes

def load_model(path,name):
        root = jhkaggle.jhkaggle_config['PATH']
        model_path = os.path.join(root,path)
        meta_filename = os.path.join(model_path,"meta.json")
        with open(meta_filename, 'r') as fp:
            meta = json.load(fp)
        result = TrainLightGBM(meta['data_source'],meta['params'],False)
        result.model = lgb.Booster(model_file=os.path.join(model_path,name+".txt"))
        return result

Source File: convert.py From onnxmltools with MIT License

5 votes

def convert(model, name=None, initial_types=None, doc_string='', target_opset=None,
            targeted_onnx=onnx.__version__, custom_conversion_functions=None,
            custom_shape_calculators=None):
    '''
    This function produces an equivalent ONNX model of the given lightgbm model.
    The supported lightgbm modules are listed below.

    * `LGBMClassifiers <https://lightgbm.readthedocs.io/en/latest/pythonapi/lightgbm.LGBMClassifier.html>`_
    * `LGBMRegressor <https://lightgbm.readthedocs.io/en/latest/pythonapi/lightgbm.LGBMRegressor.html>`_
    * `Booster <https://lightgbm.readthedocs.io/en/latest/pythonapi/lightgbm.Booster.html>`_

    :param model: A LightGBM model
    :param initial_types: a python list. Each element is a tuple of a variable name and a type defined in data_types.py
    :param name: The name of the graph (type: GraphProto) in the produced ONNX model (type: ModelProto)
    :param doc_string: A string attached onto the produced ONNX model
    :param target_opset: number, for example, 7 for ONNX 1.2, and 8 for ONNX 1.3.
    :param targeted_onnx: A string (for example, '1.1.2' and '1.2') used to specify the targeted ONNX version of the
        produced model. If ONNXMLTools cannot find a compatible ONNX python package, an error may be thrown.
    :param custom_conversion_functions: a dictionary for specifying the user customized conversion function
    :param custom_shape_calculators: a dictionary for specifying the user customized shape calculator
    :return: An ONNX model (type: ModelProto) which is equivalent to the input lightgbm model
    '''
    if initial_types is None:
        raise ValueError('Initial types are required. See usage of convert(...) in '
                         'onnxmltools.convert.lightgbm.convert for details')
    if isinstance(model, lightgbm.Booster):
        model = WrappedBooster(model)
    if name is None:
        name = str(uuid4().hex)

    target_opset = target_opset if target_opset else get_maximum_opset_supported()
    topology = parse_lightgbm(model, initial_types, target_opset, custom_conversion_functions, custom_shape_calculators)
    topology.compile()
    onnx_model = convert_topology(topology, name, doc_string, target_opset, targeted_onnx)
    return onnx_model

Source File: tests_helper.py From onnxmltools with MIT License

5 votes

def convert_model(model, name, input_types):
    """
    Runs the appropriate conversion method.

    :param model: model
    :return: *onnx* model
    """
    from sklearn.base import BaseEstimator
    if model.__class__.__name__.startswith("LGBM"):
        from onnxmltools.convert import convert_lightgbm
        model, prefix = convert_lightgbm(model, name, input_types), "LightGbm"
    elif model.__class__.__name__.startswith("XGB"):
        from onnxmltools.convert import convert_xgboost
        model, prefix = convert_xgboost(model, name, input_types), "XGB"
    elif model.__class__.__name__ == 'Booster':
        import lightgbm
        if isinstance(model, lightgbm.Booster):
            from onnxmltools.convert import convert_lightgbm
            model, prefix = convert_lightgbm(model, name, input_types), "LightGbm"
        else:
            raise RuntimeError("Unable to convert model of type '{0}'.".format(type(model)))
    elif model.__class__.__name__.startswith("CatBoost"):
        from onnxmltools.convert import convert_catboost
        model, prefix = convert_catboost(model, name, input_types), "CatBoost"
    elif isinstance(model, BaseEstimator):
        from onnxmltools.convert import convert_sklearn
        model, prefix = convert_sklearn(model, name, input_types), "Sklearn"
    else:
        from onnxmltools.convert import convert_coreml
        model, prefix = convert_coreml(model, name, input_types), "Cml"
    if model is None:
        raise RuntimeError("Unable to convert model of type '{0}'.".format(type(model)))
    return model, prefix

Source File: trainer.py From autogbt-alt with MIT License

5 votes

def get_model(self, trial_id):
        model_dir = self.work_dir/str(trial_id)
        models = []
        for model_path in model_dir.glob('*.lgbm'):
            model = lgb.Booster(model_file=str(model_path))
            models.append(model)
        return AveragingLGBMClassifier(models)

Source File: S2PixelCloudDetector.py From sentinel2-cloud-detector with Creative Commons Attribution Share Alike 4.0 International

5 votes

def classifier(self):
        """
        Provides a classifier object. It also loads it if it hasn't been loaded yet. This way the classifier is loaded
        only when it is actually required.
        """
        if self._classifier is None:
            self._classifier = PixelClassifier(Booster(model_file=self.model_filename))

        return self._classifier

Source File: lightgbm.py From mljar-supervised with MIT License

5 votes

def load(self, model_file_path):
        logger.debug("LightgbmAlgorithm load model from %s" % model_file_path)
        self.model = lgb.Booster(model_file=model_file_path)

Source File: PixelClassifier.py From sentinel2-cloud-detector with Creative Commons Attribution Share Alike 4.0 International

5 votes

def _check_classifier(classifier):
        """
        Checks if the classifier is of correct type or if it implements predict and predict_proba methods
        """
        if isinstance(classifier, Booster):
            return

        predict = getattr(classifier, 'predict', None)
        if not callable(predict):
            raise ValueError('Classifier does not have a predict method!')

        predict_proba = getattr(classifier, 'predict_proba', None)
        if not callable(predict_proba):
            raise ValueError('Classifier does not have a predict_proba method!')

Source File: PixelClassifier.py From sentinel2-cloud-detector with Creative Commons Attribution Share Alike 4.0 International

5 votes

def __init__(self, classifier):
        """
        :param classifier: An instance of trained classifier that will be executed over an entire image
        :type classifier: Booster or object that implements methods predict and predict_proba
        """
        self._check_classifier(classifier)
        self.classifier = classifier

Source File: model.py From ebonite with Apache License 2.0

5 votes

def dump(self, model: lgb.Booster) -> FilesContextManager:
        with tempfile.TemporaryDirectory(prefix='ebonite_lightgbm_dump') as f:
            path = os.path.join(f, self.model_path)
            model.save_model(path)
            yield Blobs({self.model_path: LocalFileBlob(path)})

Source File: optimize.py From optuna with MIT License

5 votes

def get_best_booster(self) -> "lgb.Booster":
        """Return the best booster.

        If the best booster cannot be found, :class:`ValueError` will be raised. To prevent the
        errors, please save boosters by specifying the ``model_dir`` arguments of
        :meth:`~optuna.integration.lightgbm.LightGBMTuner.__init__` when you resume tuning
        or you run tuning in parallel.
        """
        if self._best_booster_with_trial_number is not None:
            if self._best_booster_with_trial_number[1] == self.study.best_trial.number:
                return self._best_booster_with_trial_number[0]
        if len(self.study.trials) == 0:
            raise ValueError("The best booster is not available because no trials completed.")

        # The best booster exists, but this instance does not have it.
        # This may be due to resuming or parallelization.
        if self._model_dir is None:
            raise ValueError(
                "The best booster cannot be found. It may be found in the other processes due to "
                "resuming or distributed computing. Please set the `model_dir` argument of "
                "`LightGBMTuner.__init__` and make sure that boosters are shared with all "
                "processes."
            )

        best_trial = self.study.best_trial
        path = os.path.join(self._model_dir, "{}.pkl".format(best_trial.number))
        if not os.path.exists(path):
            raise ValueError(
                "The best booster cannot be found in {}. If you execute `LightGBMTuner` in "
                "distributed environment, please use network file system (e.g., NFS) to share "
                "models with multiple workers.".format(self._model_dir)
            )

        with open(path, "rb") as fin:
            booster = pickle.load(fin)

        return booster

Source File: optimize.py From optuna with MIT License

5 votes

def best_booster(self) -> "lgb.Booster":
        """Return the best booster."""

        return self.get_best_booster()

Source File: optimize.py From optuna with MIT License

5 votes

def __init__(
        self,
        target_param_names: List[str],
        lgbm_params: Dict[str, Any],
        train_set: "lgb.Dataset",
        lgbm_kwargs: Dict[str, Any],
        best_score: float,
        step_name: str,
        model_dir: Optional[str],
        pbar: Optional[tqdm.tqdm] = None,
    ):

        self.target_param_names = target_param_names
        self.pbar = pbar
        self.lgbm_params = lgbm_params
        self.lgbm_kwargs = lgbm_kwargs
        self.train_set = train_set

        self.trial_count = 0
        self.best_score = best_score
        self.best_booster_with_trial_number = None  # type: Optional[Tuple["lgb.Booster", int]]
        self.step_name = step_name
        self.model_dir = model_dir

        self._check_target_names_supported()
        self.pbar_fmt = "{}, val_score: {:.6f}"

Source File: model.py From ebonite with Apache License 2.0

5 votes

def load(self, path):
        model_file = os.path.join(path, self.model_path)
        return lgb.Booster(model_file=model_file)

Source File: lightgbm_model_artifact.py From BentoML with Apache License 2.0

5 votes

def load(self, path):
        try:
            import lightgbm as lgb
        except ImportError:
            raise MissingDependencyException(
                "lightgbm package is required to use LightGBMModelArtifact"
            )
        bst = lgb.Booster(model_file=self._model_file_path(path))

        return self.pack(bst)

Source File: lightgbm.py From mlflow with Apache License 2.0

5 votes

def _load_model(path):
    import lightgbm as lgb
    return lgb.Booster(model_file=path)

Source File: simulation.py From hypermax with BSD 3-Clause "New" or "Revised" License

4 votes

def executeLightGBMModel(params, model=None):
    global lightGBMModel
    if model == 'textextraction':
        if lightGBMModel is None:
            lightGBMModel = lgb.Booster(model_file='LightGBM_model_text_extraction.txt')

        vectorKeys = [# They are in this order for a reason - thats what was in our training data file.
            'layer_0.max_depth',
            'layer_0.min_data_in_leaf',
            'layer_0.boosting_rounds',
            'layer_1.input_window',
            'layer_0.num_leaves',
            'layer_1.min_data_in_leaf',
            'layer_1.boosting_rounds',
            'layer_1.learning_rate',
            'layer_1.num_leaves',
            'layer_0.bagging_fraction',
            'layer_1.max_depth',
            'layer_0.learning_rate',
            'layer_0.input_window',
            'layer_0.feature_fraction']

        vector = []
        for param in vectorKeys:
            vector.append(params[param])

        result = lightGBMModel.predict([vector])[0]

        return {"loss": result, "status": "ok"}
    elif model == 'cifar_resnet':
        if lightGBMModel is None:
            lightGBMModel = lgb.Booster(model_file='LightGBM_model_cifar_resnet.txt')

        vectorKeys = [# They are in this order for a reason - thats what was in our training data file.
            'activation',
            'layer1_layers',
            'layer1_size',
            'layer2_layers',
            'layer2_size',
            'layer3_layers',
            'layer3_size',
            'layer4_layers',
            'layer4_size',
            'learning_rate',
            'weight_decay'
        ]

        vector = []
        for param in vectorKeys:
            if param == 'activation':
                values = ['relu', 'elu', "selu", "rrelu"]
                if isinstance(params[param], str):
                    vector.append(values.index(params[param]))
                else:
                    vector.append(params[param])
            else:
                vector.append(params[param])

        result = lightGBMModel.predict([vector])[0]

        return {"loss": result, "status": "ok"}

Source File: lightgbm_model.py From interpret-community with MIT License

4 votes

def _load(properties):
        """Load a LGBMExplainableModel from the given properties.

        :param properties: A serialized dictionary representation of the LGBMExplainableModel.
        :type properties: dict
        :return: The deserialized LGBMExplainableModel.
        :rtype: interpret_community.mimic.models.LGBMExplainableModel
        """
        # create the LGBMExplainableModel without any properties using the __new__ function, similar to pickle
        lightgbm = LGBMExplainableModel.__new__(LGBMExplainableModel)
        # Get _n_features
        _n_features = properties.pop(_N_FEATURES)
        # If classification case get _n_classes
        if json.loads(properties[LightGBMSerializationConstants.MULTICLASS]):
            _n_classes = properties.pop(_N_CLASSES)
        # load all of the properties
        for key, value in properties.items():
            # Regenerate the properties on the fly
            if key in LightGBMSerializationConstants.nonify_properties:
                if key == LightGBMSerializationConstants.LOGGER:
                    parent = logging.getLogger(__name__)
                    lightgbm_identity = json.loads(properties[LightGBMSerializationConstants.IDENTITY])
                    lightgbm.__dict__[key] = parent.getChild(lightgbm_identity)
                elif key == LightGBMSerializationConstants.TREE_EXPLAINER:
                    lightgbm.__dict__[key] = None
                else:
                    raise Exception("Unknown nonify key on deserialize in LightGBMExplainableModel: {}".format(key))
            elif key in LightGBMSerializationConstants.save_properties:
                # Load the booster from file and re-create the LGBMClassifier or LGBMRegressor
                # This is not recommended but can be necessary to get around pickle being not secure
                # See here for more info:
                # https://github.com/Microsoft/LightGBM/issues/1942
                # https://github.com/Microsoft/LightGBM/issues/1217
                booster_args = {LightGBMSerializationConstants.MODEL_STR: value}
                is_multiclass = json.loads(properties[LightGBMSerializationConstants.MULTICLASS])
                if is_multiclass:
                    objective = LightGBMSerializationConstants.MULTICLASS
                else:
                    objective = LightGBMSerializationConstants.REGRESSION
                if LightGBMSerializationConstants.MODEL_STR in inspect.getargspec(Booster).args:
                    extras = {LightGBMSerializationConstants.OBJECTIVE: objective}
                    lgbm_booster = Booster(**booster_args, params=extras)
                else:
                    # For backwards compatibility with older versions of lightgbm
                    booster_args[LightGBMSerializationConstants.OBJECTIVE] = objective
                    lgbm_booster = Booster(params=booster_args)
                if is_multiclass:
                    new_lgbm = LGBMClassifier()
                    new_lgbm._Booster = lgbm_booster
                    new_lgbm._n_classes = _n_classes
                else:
                    new_lgbm = LGBMRegressor()
                    new_lgbm._Booster = lgbm_booster
                new_lgbm._n_features = _n_features
                lightgbm.__dict__[key] = new_lgbm
            elif key in LightGBMSerializationConstants.enum_properties:
                # NOTE: If more enums added in future, will need to handle this differently
                lightgbm.__dict__[key] = ShapValuesOutput(json.loads(value))
            else:
                lightgbm.__dict__[key] = json.loads(value)
        return lightgbm

Source File: optimize.py From optuna with MIT License

4 votes

def __init__(
        self,
        params: Dict[str, Any],
        train_set: "lgb.Dataset",
        num_boost_round: int = 1000,
        valid_sets: Optional["VALID_SET_TYPE"] = None,
        valid_names: Optional[Any] = None,
        fobj: Optional[Callable[..., Any]] = None,
        feval: Optional[Callable[..., Any]] = None,
        feature_name: str = "auto",
        categorical_feature: str = "auto",
        early_stopping_rounds: Optional[int] = None,
        evals_result: Optional[Dict[Any, Any]] = None,
        verbose_eval: Optional[Union[bool, int]] = True,
        learning_rates: Optional[List[float]] = None,
        keep_training_booster: Optional[bool] = False,
        callbacks: Optional[List[Callable[..., Any]]] = None,
        time_budget: Optional[int] = None,
        sample_size: Optional[int] = None,
        study: Optional[optuna.study.Study] = None,
        optuna_callbacks: Optional[List[Callable[[Study, FrozenTrial], None]]] = None,
        model_dir: Optional[str] = None,
        verbosity: Optional[int] = 1,
    ) -> None:

        super(LightGBMTuner, self).__init__(
            params,
            train_set,
            num_boost_round=num_boost_round,
            fobj=fobj,
            feval=feval,
            feature_name=feature_name,
            categorical_feature=categorical_feature,
            early_stopping_rounds=early_stopping_rounds,
            verbose_eval=verbose_eval,
            callbacks=callbacks,
            time_budget=time_budget,
            sample_size=sample_size,
            study=study,
            optuna_callbacks=optuna_callbacks,
            verbosity=verbosity,
        )

        self.lgbm_kwargs["valid_sets"] = valid_sets
        self.lgbm_kwargs["valid_names"] = valid_names
        self.lgbm_kwargs["evals_result"] = evals_result
        self.lgbm_kwargs["learning_rates"] = learning_rates
        self.lgbm_kwargs["keep_training_booster"] = keep_training_booster

        self._best_booster_with_trial_number = None  # type: Optional[Tuple[lgb.Booster, int]]
        self._model_dir = model_dir

        if self._model_dir is not None and not os.path.exists(self._model_dir):
            os.mkdir(self._model_dir)

        if valid_sets is None:
            raise ValueError("`valid_sets` is required.")

Source File: lightgbm.py From mlflow with Apache License 2.0

4 votes

def log_model(lgb_model, artifact_path, conda_env=None, registered_model_name=None,
              signature: ModelSignature=None, input_example: ModelInputExample=None,
              **kwargs):
    """
    Log a LightGBM model as an MLflow artifact for the current run.

    :param lgb_model: LightGBM model (an instance of `lightgbm.Booster`_) to be saved.
                      Note that models that implement the `scikit-learn API`_  are not supported.
    :param artifact_path: Run-relative artifact path.
    :param conda_env: Either a dictionary representation of a Conda environment or the path to a
                      Conda environment yaml file. If provided, this describes the environment
                      this model should be run in. At minimum, it should specify the dependencies
                      contained in :func:`get_default_conda_env()`. If ``None``, the default
                      :func:`get_default_conda_env()` environment is added to the model.
                      The following is an *example* dictionary representation of a Conda
                      environment::

                        {
                            'name': 'mlflow-env',
                            'channels': ['defaults'],
                            'dependencies': [
                                'python=3.7.0',
                                'pip': [
                                    'lightgbm==2.3.0'
                                ]
                            ]
                        }
    :param registered_model_name: (Experimental) If given, create a model version under
                                  ``registered_model_name``, also creating a registered model if one
                                  with the given name does not exist.

    :param signature: (Experimental) :py:class:`ModelSignature <mlflow.models.ModelSignature>`
                      describes model input and output :py:class:`Schema <mlflow.types.Schema>`.
                      The model signature can be :py:func:`inferred <mlflow.models.infer_signature>`
                      from datasets with valid model input (e.g. the training dataset with target
                      column omitted) and valid model output (e.g. model predictions generated on
                      the training dataset), for example:

                      .. code-block:: python

                        from mlflow.models.signature import infer_signature
                        train = df.drop_column("target_label")
                        predictions = ... # compute model predictions
                        signature = infer_signature(train, predictions)
    :param input_example: (Experimental) Input example provides one or several instances of valid
                          model input. The example can be used as a hint of what data to feed the
                          model. The given example will be converted to a Pandas DataFrame and then
                          serialized to json using the Pandas split-oriented format. Bytes are
                          base64-encoded.

    :param kwargs: kwargs to pass to `lightgbm.Booster.save_model`_ method.
    """
    Model.log(artifact_path=artifact_path, flavor=mlflow.lightgbm,
              registered_model_name=registered_model_name,
              lgb_model=lgb_model, conda_env=conda_env,
              signature=signature, input_example=input_example,
              **kwargs)

Source File: engines.py From santander-product-recommendation-8th-place with MIT License

4 votes

def lightgbm(XY_train, XY_validate, test_df, features, XY_all=None, restore=False):
    train = lgbm.Dataset(XY_train[list(features)], label=XY_train["y"], weight=XY_train["weight"], feature_name=features)
    validate = lgbm.Dataset(XY_validate[list(features)], label=XY_validate["y"], weight=XY_validate["weight"], feature_name=features, reference=train)

    params = {
        'task' : 'train',
        'boosting_type' : 'gbdt',
        'objective' : 'multiclass',
        'num_class': 24,
        'metric' : {'multi_logloss'},
        'is_training_metric': True,
        'max_bin': 255,
        'num_leaves' : 64,
        'learning_rate' : 0.1,
        'feature_fraction' : 0.8,
        'min_data_in_leaf': 10,
        'min_sum_hessian_in_leaf': 5,
        # 'num_threads': 16,
    }
    print(params)

    if not restore:
        with Timer("train lightgbm_lib"):
            model = lgbm.train(params, train, num_boost_round=1000, valid_sets=validate, early_stopping_rounds=20)
            best_iteration = model.best_iteration
            model.save_model("tmp/lgbm.model.txt")
            pickle.dump(best_iteration, open("tmp/lgbm.model.meta", "wb"))
    else:
        with Timer("restore lightgbm_lib model"):
            model = lgbm.Booster(model_file="tmp/lgbm.model.txt")
            best_iteration = pickle.load(open("tmp/lgbm.model.meta", "rb"))

    if XY_all is not None:
        best_iteration = int(best_iteration * len(XY_all) / len(XY_train))
        all_train = lgbm.Dataset(XY_all[list(features)], label=XY_all["y"], weight=XY_all["weight"], feature_name=features)
        with Timer("retrain lightgbm_lib with all data"):
            model = lgbm.train(params, all_train, num_boost_round=best_iteration)
        model.save_model("tmp/lgbm.all.model.txt")

    print("Feature importance by split:")
    for kv in sorted([(k,v) for k,v in zip(features, model.feature_importance("split"))], key=lambda kv: kv[1], reverse=True):
        print(kv)
    print("Feature importance by gain:")
    for kv in sorted([(k,v) for k,v in zip(features, model.feature_importance("gain"))], key=lambda kv: kv[1], reverse=True):
        print(kv)

    return model.predict(test_df[list(features)], num_iteration=best_iteration)

Python lightgbm.Booster() Examples