Python lightgbm.Booster() Examples
The following are 29
code examples of lightgbm.Booster().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
lightgbm
, or try the search function
.
Example #1
Source File: lightgbm.py From talkingdata-adtracking-fraud-detection with MIT License | 6 votes |
def train_and_predict(self, train, valid, weight, categorical_features: List[str], target: str, params: dict) \ -> Tuple[Booster, dict]: if type(train) != pd.DataFrame or type(valid) != pd.DataFrame: raise ValueError('Parameter train and valid must be pandas.DataFrame') if list(train.columns) != list(valid.columns): raise ValueError('Train and valid must have a same column list') predictors = train.columns.drop(target) if weight is None: d_train = lgb.Dataset(train[predictors], label=train[target].values) else: print(weight) d_train = lgb.Dataset(train[predictors], label=train[target].values, weight=weight) d_valid = lgb.Dataset(valid[predictors], label=valid[target].values) eval_results = {} model: Booster = lgb.train(params['model_params'], d_train, categorical_feature=categorical_features, valid_sets=[d_train, d_valid], valid_names=['train', 'valid'], evals_result=eval_results, **params['train_params']) return model, eval_results
Example #2
Source File: EndgameEmber.py From multiscanner with Mozilla Public License 2.0 | 6 votes |
def check(conf=DEFAULTCONF): if not conf['ENABLED']: return False if not has_ember: return False if not Path(conf['path-to-model']).is_file(): print("'{}' does not exist. Check config.ini for model location.".format(conf['path-to-model'])) return False try: global LGBM_MODEL LGBM_MODEL = lgb.Booster(model_file=conf['path-to-model']) except lgb.LightGBMError as e: print("Unable to load model, {}. ({})".format(conf['path-to-model'], e)) return False return True
Example #3
Source File: PixelClassifier.py From sentinel2-cloud-detector with Creative Commons Attribution Share Alike 4.0 International | 6 votes |
def image_predict_proba(self, X, **kwargs): """ Predicts class probabilities for the entire image. :param X: Array of images to be classified. :type X: numpy array, shape = [n_images, n_pixels_y, n_pixels_x, n_bands] :param kwargs: Any keyword arguments that will be passed to the classifier's prediction method :return: classification probability map :rtype: numpy array, [n_samples, n_pixels_y, n_pixels_x] """ pixels = self.extract_pixels(X) if isinstance(self.classifier, Booster): probabilities = self.classifier.predict(pixels, **kwargs) probabilities = np.vstack((1. - probabilities, probabilities)).transpose() else: probabilities = self.classifier.predict_proba(pixels, **kwargs) return probabilities.reshape(X.shape[0], X.shape[1], X.shape[2], probabilities.shape[1])
Example #4
Source File: PixelClassifier.py From sentinel2-cloud-detector with Creative Commons Attribution Share Alike 4.0 International | 6 votes |
def image_predict(self, X, **kwargs): """ Predicts class labels for the entire image. :param X: Array of images to be classified. :type X: numpy array, shape = [n_images, n_pixels_y, n_pixels_x, n_bands] :param kwargs: Any keyword arguments that will be passed to the classifier's prediction method :return: raster classification map :rtype: numpy array, [n_samples, n_pixels_y, n_pixels_x] """ pixels = self.extract_pixels(X) if isinstance(self.classifier, Booster): raise NotImplementedError('An instance of lightgbm.Booster can only return prediction probabilities, ' 'use PixelClassifier.image_predict_proba instead') predictions = self.classifier.predict(pixels, **kwargs) return predictions.reshape(X.shape[0], X.shape[1], X.shape[2])
Example #5
Source File: atpe_optimizer.py From hypermax with BSD 3-Clause "New" or "Revised" License | 6 votes |
def __init__(self): scalingModelData = json.loads(pkg_resources.resource_string(__name__, "../atpe_models/scaling_model.json")) self.featureScalingModels = {} for key in self.atpeModelFeatureKeys: self.featureScalingModels[key] = sklearn.preprocessing.StandardScaler() self.featureScalingModels[key].scale_ = numpy.array(scalingModelData[key]['scales']) self.featureScalingModels[key].mean_ = numpy.array(scalingModelData[key]['means']) self.featureScalingModels[key].var_ = numpy.array(scalingModelData[key]['variances']) self.parameterModels = {} self.parameterModelConfigurations = {} for param in self.atpeParameters: modelData = pkg_resources.resource_string(__name__, "../atpe_models/model-" + param + '.txt') with hypermax.file_utils.ClosedNamedTempFile(modelData) as model_file_name: self.parameterModels[param] = lightgbm.Booster(model_file=model_file_name) configString = pkg_resources.resource_string(__name__, "../atpe_models/model-" + param + '-configuration.json') data = json.loads(configString) self.parameterModelConfigurations[param] = data self.lastATPEParameters = None self.lastLockedParameters = [] self.atpeParamDetails = None
Example #6
Source File: optimize.py From optuna with MIT License | 6 votes |
def _get_booster_best_score(self, booster: "lgb.Booster") -> float: metric = self._get_metric_for_objective() valid_sets = self.lgbm_kwargs.get("valid_sets") # type: Optional[VALID_SET_TYPE] if self.lgbm_kwargs.get("valid_names") is not None: if type(self.lgbm_kwargs["valid_names"]) is str: valid_name = self.lgbm_kwargs["valid_names"] elif type(self.lgbm_kwargs["valid_names"]) in [list, tuple]: valid_name = self.lgbm_kwargs["valid_names"][-1] else: raise NotImplementedError elif type(valid_sets) is lgb.Dataset: valid_name = "valid_0" elif isinstance(valid_sets, (list, tuple)) and len(valid_sets) > 0: valid_set_idx = len(valid_sets) - 1 valid_name = "valid_{}".format(valid_set_idx) else: raise NotImplementedError val_score = booster.best_score[valid_name][metric] return val_score
Example #7
Source File: lightgbm.py From mlflow with Apache License 2.0 | 6 votes |
def load_model(model_uri): """ Load a LightGBM model from a local file or a run. :param model_uri: The location, in URI format, of the MLflow model. For example: - ``/Users/me/path/to/local/model`` - ``relative/path/to/local/model`` - ``s3://my_bucket/path/to/model`` - ``runs:/<mlflow_run_id>/run-relative/path/to/model`` For more information about supported URI schemes, see `Referencing Artifacts <https://www.mlflow.org/docs/latest/tracking.html# artifact-locations>`_. :return: A LightGBM model (an instance of `lightgbm.Booster`_). """ local_model_path = _download_artifact_from_uri(artifact_uri=model_uri) flavor_conf = _get_flavor_configuration(model_path=local_model_path, flavor_name=FLAVOR_NAME) lgb_model_file_path = os.path.join(local_model_path, flavor_conf.get("data", "model.lgb")) return _load_model(path=lgb_model_file_path)
Example #8
Source File: lightgbm_model_artifact.py From BentoML with Apache License 2.0 | 6 votes |
def __init__(self, spec, model): super(_LightGBMModelArtifactWrapper, self).__init__(spec) try: import lightgbm as lgb except ImportError: raise MissingDependencyException( "lightgbm package is required to use LightGBMModelArtifact" ) if not isinstance(model, lgb.Booster): raise InvalidArgument( "Expect `model` argument to be a `lightgbm.Booster` instance" ) self._model = model
Example #9
Source File: models.py From malware_evasion_competition with GNU Affero General Public License v3.0 | 6 votes |
def __init__(self, model_path=EMBER_MODEL_PATH, thresh=0.8336, name='ember'): # load lightgbm model self.model = lgb.Booster(model_file=model_path) self.thresh = thresh self.__name__ = 'ember'
Example #10
Source File: train_lightgbm.py From jh-kaggle-util with Apache License 2.0 | 5 votes |
def load_model(path,name): root = jhkaggle.jhkaggle_config['PATH'] model_path = os.path.join(root,path) meta_filename = os.path.join(model_path,"meta.json") with open(meta_filename, 'r') as fp: meta = json.load(fp) result = TrainLightGBM(meta['data_source'],meta['params'],False) result.model = lgb.Booster(model_file=os.path.join(model_path,name+".txt")) return result
Example #11
Source File: convert.py From onnxmltools with MIT License | 5 votes |
def convert(model, name=None, initial_types=None, doc_string='', target_opset=None, targeted_onnx=onnx.__version__, custom_conversion_functions=None, custom_shape_calculators=None): ''' This function produces an equivalent ONNX model of the given lightgbm model. The supported lightgbm modules are listed below. * `LGBMClassifiers <https://lightgbm.readthedocs.io/en/latest/pythonapi/lightgbm.LGBMClassifier.html>`_ * `LGBMRegressor <https://lightgbm.readthedocs.io/en/latest/pythonapi/lightgbm.LGBMRegressor.html>`_ * `Booster <https://lightgbm.readthedocs.io/en/latest/pythonapi/lightgbm.Booster.html>`_ :param model: A LightGBM model :param initial_types: a python list. Each element is a tuple of a variable name and a type defined in data_types.py :param name: The name of the graph (type: GraphProto) in the produced ONNX model (type: ModelProto) :param doc_string: A string attached onto the produced ONNX model :param target_opset: number, for example, 7 for ONNX 1.2, and 8 for ONNX 1.3. :param targeted_onnx: A string (for example, '1.1.2' and '1.2') used to specify the targeted ONNX version of the produced model. If ONNXMLTools cannot find a compatible ONNX python package, an error may be thrown. :param custom_conversion_functions: a dictionary for specifying the user customized conversion function :param custom_shape_calculators: a dictionary for specifying the user customized shape calculator :return: An ONNX model (type: ModelProto) which is equivalent to the input lightgbm model ''' if initial_types is None: raise ValueError('Initial types are required. See usage of convert(...) in ' 'onnxmltools.convert.lightgbm.convert for details') if isinstance(model, lightgbm.Booster): model = WrappedBooster(model) if name is None: name = str(uuid4().hex) target_opset = target_opset if target_opset else get_maximum_opset_supported() topology = parse_lightgbm(model, initial_types, target_opset, custom_conversion_functions, custom_shape_calculators) topology.compile() onnx_model = convert_topology(topology, name, doc_string, target_opset, targeted_onnx) return onnx_model
Example #12
Source File: tests_helper.py From onnxmltools with MIT License | 5 votes |
def convert_model(model, name, input_types): """ Runs the appropriate conversion method. :param model: model :return: *onnx* model """ from sklearn.base import BaseEstimator if model.__class__.__name__.startswith("LGBM"): from onnxmltools.convert import convert_lightgbm model, prefix = convert_lightgbm(model, name, input_types), "LightGbm" elif model.__class__.__name__.startswith("XGB"): from onnxmltools.convert import convert_xgboost model, prefix = convert_xgboost(model, name, input_types), "XGB" elif model.__class__.__name__ == 'Booster': import lightgbm if isinstance(model, lightgbm.Booster): from onnxmltools.convert import convert_lightgbm model, prefix = convert_lightgbm(model, name, input_types), "LightGbm" else: raise RuntimeError("Unable to convert model of type '{0}'.".format(type(model))) elif model.__class__.__name__.startswith("CatBoost"): from onnxmltools.convert import convert_catboost model, prefix = convert_catboost(model, name, input_types), "CatBoost" elif isinstance(model, BaseEstimator): from onnxmltools.convert import convert_sklearn model, prefix = convert_sklearn(model, name, input_types), "Sklearn" else: from onnxmltools.convert import convert_coreml model, prefix = convert_coreml(model, name, input_types), "Cml" if model is None: raise RuntimeError("Unable to convert model of type '{0}'.".format(type(model))) return model, prefix
Example #13
Source File: trainer.py From autogbt-alt with MIT License | 5 votes |
def get_model(self, trial_id): model_dir = self.work_dir/str(trial_id) models = [] for model_path in model_dir.glob('*.lgbm'): model = lgb.Booster(model_file=str(model_path)) models.append(model) return AveragingLGBMClassifier(models)
Example #14
Source File: S2PixelCloudDetector.py From sentinel2-cloud-detector with Creative Commons Attribution Share Alike 4.0 International | 5 votes |
def classifier(self): """ Provides a classifier object. It also loads it if it hasn't been loaded yet. This way the classifier is loaded only when it is actually required. """ if self._classifier is None: self._classifier = PixelClassifier(Booster(model_file=self.model_filename)) return self._classifier
Example #15
Source File: lightgbm.py From mljar-supervised with MIT License | 5 votes |
def load(self, model_file_path): logger.debug("LightgbmAlgorithm load model from %s" % model_file_path) self.model = lgb.Booster(model_file=model_file_path)
Example #16
Source File: PixelClassifier.py From sentinel2-cloud-detector with Creative Commons Attribution Share Alike 4.0 International | 5 votes |
def _check_classifier(classifier): """ Checks if the classifier is of correct type or if it implements predict and predict_proba methods """ if isinstance(classifier, Booster): return predict = getattr(classifier, 'predict', None) if not callable(predict): raise ValueError('Classifier does not have a predict method!') predict_proba = getattr(classifier, 'predict_proba', None) if not callable(predict_proba): raise ValueError('Classifier does not have a predict_proba method!')
Example #17
Source File: PixelClassifier.py From sentinel2-cloud-detector with Creative Commons Attribution Share Alike 4.0 International | 5 votes |
def __init__(self, classifier): """ :param classifier: An instance of trained classifier that will be executed over an entire image :type classifier: Booster or object that implements methods predict and predict_proba """ self._check_classifier(classifier) self.classifier = classifier
Example #18
Source File: model.py From ebonite with Apache License 2.0 | 5 votes |
def dump(self, model: lgb.Booster) -> FilesContextManager: with tempfile.TemporaryDirectory(prefix='ebonite_lightgbm_dump') as f: path = os.path.join(f, self.model_path) model.save_model(path) yield Blobs({self.model_path: LocalFileBlob(path)})
Example #19
Source File: optimize.py From optuna with MIT License | 5 votes |
def get_best_booster(self) -> "lgb.Booster": """Return the best booster. If the best booster cannot be found, :class:`ValueError` will be raised. To prevent the errors, please save boosters by specifying the ``model_dir`` arguments of :meth:`~optuna.integration.lightgbm.LightGBMTuner.__init__` when you resume tuning or you run tuning in parallel. """ if self._best_booster_with_trial_number is not None: if self._best_booster_with_trial_number[1] == self.study.best_trial.number: return self._best_booster_with_trial_number[0] if len(self.study.trials) == 0: raise ValueError("The best booster is not available because no trials completed.") # The best booster exists, but this instance does not have it. # This may be due to resuming or parallelization. if self._model_dir is None: raise ValueError( "The best booster cannot be found. It may be found in the other processes due to " "resuming or distributed computing. Please set the `model_dir` argument of " "`LightGBMTuner.__init__` and make sure that boosters are shared with all " "processes." ) best_trial = self.study.best_trial path = os.path.join(self._model_dir, "{}.pkl".format(best_trial.number)) if not os.path.exists(path): raise ValueError( "The best booster cannot be found in {}. If you execute `LightGBMTuner` in " "distributed environment, please use network file system (e.g., NFS) to share " "models with multiple workers.".format(self._model_dir) ) with open(path, "rb") as fin: booster = pickle.load(fin) return booster
Example #20
Source File: optimize.py From optuna with MIT License | 5 votes |
def best_booster(self) -> "lgb.Booster": """Return the best booster.""" return self.get_best_booster()
Example #21
Source File: optimize.py From optuna with MIT License | 5 votes |
def __init__( self, target_param_names: List[str], lgbm_params: Dict[str, Any], train_set: "lgb.Dataset", lgbm_kwargs: Dict[str, Any], best_score: float, step_name: str, model_dir: Optional[str], pbar: Optional[tqdm.tqdm] = None, ): self.target_param_names = target_param_names self.pbar = pbar self.lgbm_params = lgbm_params self.lgbm_kwargs = lgbm_kwargs self.train_set = train_set self.trial_count = 0 self.best_score = best_score self.best_booster_with_trial_number = None # type: Optional[Tuple["lgb.Booster", int]] self.step_name = step_name self.model_dir = model_dir self._check_target_names_supported() self.pbar_fmt = "{}, val_score: {:.6f}"
Example #22
Source File: model.py From ebonite with Apache License 2.0 | 5 votes |
def load(self, path): model_file = os.path.join(path, self.model_path) return lgb.Booster(model_file=model_file)
Example #23
Source File: lightgbm_model_artifact.py From BentoML with Apache License 2.0 | 5 votes |
def load(self, path): try: import lightgbm as lgb except ImportError: raise MissingDependencyException( "lightgbm package is required to use LightGBMModelArtifact" ) bst = lgb.Booster(model_file=self._model_file_path(path)) return self.pack(bst)
Example #24
Source File: lightgbm.py From mlflow with Apache License 2.0 | 5 votes |
def _load_model(path): import lightgbm as lgb return lgb.Booster(model_file=path)
Example #25
Source File: simulation.py From hypermax with BSD 3-Clause "New" or "Revised" License | 4 votes |
def executeLightGBMModel(params, model=None): global lightGBMModel if model == 'textextraction': if lightGBMModel is None: lightGBMModel = lgb.Booster(model_file='LightGBM_model_text_extraction.txt') vectorKeys = [# They are in this order for a reason - thats what was in our training data file. 'layer_0.max_depth', 'layer_0.min_data_in_leaf', 'layer_0.boosting_rounds', 'layer_1.input_window', 'layer_0.num_leaves', 'layer_1.min_data_in_leaf', 'layer_1.boosting_rounds', 'layer_1.learning_rate', 'layer_1.num_leaves', 'layer_0.bagging_fraction', 'layer_1.max_depth', 'layer_0.learning_rate', 'layer_0.input_window', 'layer_0.feature_fraction'] vector = [] for param in vectorKeys: vector.append(params[param]) result = lightGBMModel.predict([vector])[0] return {"loss": result, "status": "ok"} elif model == 'cifar_resnet': if lightGBMModel is None: lightGBMModel = lgb.Booster(model_file='LightGBM_model_cifar_resnet.txt') vectorKeys = [# They are in this order for a reason - thats what was in our training data file. 'activation', 'layer1_layers', 'layer1_size', 'layer2_layers', 'layer2_size', 'layer3_layers', 'layer3_size', 'layer4_layers', 'layer4_size', 'learning_rate', 'weight_decay' ] vector = [] for param in vectorKeys: if param == 'activation': values = ['relu', 'elu', "selu", "rrelu"] if isinstance(params[param], str): vector.append(values.index(params[param])) else: vector.append(params[param]) else: vector.append(params[param]) result = lightGBMModel.predict([vector])[0] return {"loss": result, "status": "ok"}
Example #26
Source File: lightgbm_model.py From interpret-community with MIT License | 4 votes |
def _load(properties): """Load a LGBMExplainableModel from the given properties. :param properties: A serialized dictionary representation of the LGBMExplainableModel. :type properties: dict :return: The deserialized LGBMExplainableModel. :rtype: interpret_community.mimic.models.LGBMExplainableModel """ # create the LGBMExplainableModel without any properties using the __new__ function, similar to pickle lightgbm = LGBMExplainableModel.__new__(LGBMExplainableModel) # Get _n_features _n_features = properties.pop(_N_FEATURES) # If classification case get _n_classes if json.loads(properties[LightGBMSerializationConstants.MULTICLASS]): _n_classes = properties.pop(_N_CLASSES) # load all of the properties for key, value in properties.items(): # Regenerate the properties on the fly if key in LightGBMSerializationConstants.nonify_properties: if key == LightGBMSerializationConstants.LOGGER: parent = logging.getLogger(__name__) lightgbm_identity = json.loads(properties[LightGBMSerializationConstants.IDENTITY]) lightgbm.__dict__[key] = parent.getChild(lightgbm_identity) elif key == LightGBMSerializationConstants.TREE_EXPLAINER: lightgbm.__dict__[key] = None else: raise Exception("Unknown nonify key on deserialize in LightGBMExplainableModel: {}".format(key)) elif key in LightGBMSerializationConstants.save_properties: # Load the booster from file and re-create the LGBMClassifier or LGBMRegressor # This is not recommended but can be necessary to get around pickle being not secure # See here for more info: # https://github.com/Microsoft/LightGBM/issues/1942 # https://github.com/Microsoft/LightGBM/issues/1217 booster_args = {LightGBMSerializationConstants.MODEL_STR: value} is_multiclass = json.loads(properties[LightGBMSerializationConstants.MULTICLASS]) if is_multiclass: objective = LightGBMSerializationConstants.MULTICLASS else: objective = LightGBMSerializationConstants.REGRESSION if LightGBMSerializationConstants.MODEL_STR in inspect.getargspec(Booster).args: extras = {LightGBMSerializationConstants.OBJECTIVE: objective} lgbm_booster = Booster(**booster_args, params=extras) else: # For backwards compatibility with older versions of lightgbm booster_args[LightGBMSerializationConstants.OBJECTIVE] = objective lgbm_booster = Booster(params=booster_args) if is_multiclass: new_lgbm = LGBMClassifier() new_lgbm._Booster = lgbm_booster new_lgbm._n_classes = _n_classes else: new_lgbm = LGBMRegressor() new_lgbm._Booster = lgbm_booster new_lgbm._n_features = _n_features lightgbm.__dict__[key] = new_lgbm elif key in LightGBMSerializationConstants.enum_properties: # NOTE: If more enums added in future, will need to handle this differently lightgbm.__dict__[key] = ShapValuesOutput(json.loads(value)) else: lightgbm.__dict__[key] = json.loads(value) return lightgbm
Example #27
Source File: optimize.py From optuna with MIT License | 4 votes |
def __init__( self, params: Dict[str, Any], train_set: "lgb.Dataset", num_boost_round: int = 1000, valid_sets: Optional["VALID_SET_TYPE"] = None, valid_names: Optional[Any] = None, fobj: Optional[Callable[..., Any]] = None, feval: Optional[Callable[..., Any]] = None, feature_name: str = "auto", categorical_feature: str = "auto", early_stopping_rounds: Optional[int] = None, evals_result: Optional[Dict[Any, Any]] = None, verbose_eval: Optional[Union[bool, int]] = True, learning_rates: Optional[List[float]] = None, keep_training_booster: Optional[bool] = False, callbacks: Optional[List[Callable[..., Any]]] = None, time_budget: Optional[int] = None, sample_size: Optional[int] = None, study: Optional[optuna.study.Study] = None, optuna_callbacks: Optional[List[Callable[[Study, FrozenTrial], None]]] = None, model_dir: Optional[str] = None, verbosity: Optional[int] = 1, ) -> None: super(LightGBMTuner, self).__init__( params, train_set, num_boost_round=num_boost_round, fobj=fobj, feval=feval, feature_name=feature_name, categorical_feature=categorical_feature, early_stopping_rounds=early_stopping_rounds, verbose_eval=verbose_eval, callbacks=callbacks, time_budget=time_budget, sample_size=sample_size, study=study, optuna_callbacks=optuna_callbacks, verbosity=verbosity, ) self.lgbm_kwargs["valid_sets"] = valid_sets self.lgbm_kwargs["valid_names"] = valid_names self.lgbm_kwargs["evals_result"] = evals_result self.lgbm_kwargs["learning_rates"] = learning_rates self.lgbm_kwargs["keep_training_booster"] = keep_training_booster self._best_booster_with_trial_number = None # type: Optional[Tuple[lgb.Booster, int]] self._model_dir = model_dir if self._model_dir is not None and not os.path.exists(self._model_dir): os.mkdir(self._model_dir) if valid_sets is None: raise ValueError("`valid_sets` is required.")
Example #28
Source File: lightgbm.py From mlflow with Apache License 2.0 | 4 votes |
def log_model(lgb_model, artifact_path, conda_env=None, registered_model_name=None, signature: ModelSignature=None, input_example: ModelInputExample=None, **kwargs): """ Log a LightGBM model as an MLflow artifact for the current run. :param lgb_model: LightGBM model (an instance of `lightgbm.Booster`_) to be saved. Note that models that implement the `scikit-learn API`_ are not supported. :param artifact_path: Run-relative artifact path. :param conda_env: Either a dictionary representation of a Conda environment or the path to a Conda environment yaml file. If provided, this describes the environment this model should be run in. At minimum, it should specify the dependencies contained in :func:`get_default_conda_env()`. If ``None``, the default :func:`get_default_conda_env()` environment is added to the model. The following is an *example* dictionary representation of a Conda environment:: { 'name': 'mlflow-env', 'channels': ['defaults'], 'dependencies': [ 'python=3.7.0', 'pip': [ 'lightgbm==2.3.0' ] ] } :param registered_model_name: (Experimental) If given, create a model version under ``registered_model_name``, also creating a registered model if one with the given name does not exist. :param signature: (Experimental) :py:class:`ModelSignature <mlflow.models.ModelSignature>` describes model input and output :py:class:`Schema <mlflow.types.Schema>`. The model signature can be :py:func:`inferred <mlflow.models.infer_signature>` from datasets with valid model input (e.g. the training dataset with target column omitted) and valid model output (e.g. model predictions generated on the training dataset), for example: .. code-block:: python from mlflow.models.signature import infer_signature train = df.drop_column("target_label") predictions = ... # compute model predictions signature = infer_signature(train, predictions) :param input_example: (Experimental) Input example provides one or several instances of valid model input. The example can be used as a hint of what data to feed the model. The given example will be converted to a Pandas DataFrame and then serialized to json using the Pandas split-oriented format. Bytes are base64-encoded. :param kwargs: kwargs to pass to `lightgbm.Booster.save_model`_ method. """ Model.log(artifact_path=artifact_path, flavor=mlflow.lightgbm, registered_model_name=registered_model_name, lgb_model=lgb_model, conda_env=conda_env, signature=signature, input_example=input_example, **kwargs)
Example #29
Source File: engines.py From santander-product-recommendation-8th-place with MIT License | 4 votes |
def lightgbm(XY_train, XY_validate, test_df, features, XY_all=None, restore=False): train = lgbm.Dataset(XY_train[list(features)], label=XY_train["y"], weight=XY_train["weight"], feature_name=features) validate = lgbm.Dataset(XY_validate[list(features)], label=XY_validate["y"], weight=XY_validate["weight"], feature_name=features, reference=train) params = { 'task' : 'train', 'boosting_type' : 'gbdt', 'objective' : 'multiclass', 'num_class': 24, 'metric' : {'multi_logloss'}, 'is_training_metric': True, 'max_bin': 255, 'num_leaves' : 64, 'learning_rate' : 0.1, 'feature_fraction' : 0.8, 'min_data_in_leaf': 10, 'min_sum_hessian_in_leaf': 5, # 'num_threads': 16, } print(params) if not restore: with Timer("train lightgbm_lib"): model = lgbm.train(params, train, num_boost_round=1000, valid_sets=validate, early_stopping_rounds=20) best_iteration = model.best_iteration model.save_model("tmp/lgbm.model.txt") pickle.dump(best_iteration, open("tmp/lgbm.model.meta", "wb")) else: with Timer("restore lightgbm_lib model"): model = lgbm.Booster(model_file="tmp/lgbm.model.txt") best_iteration = pickle.load(open("tmp/lgbm.model.meta", "rb")) if XY_all is not None: best_iteration = int(best_iteration * len(XY_all) / len(XY_train)) all_train = lgbm.Dataset(XY_all[list(features)], label=XY_all["y"], weight=XY_all["weight"], feature_name=features) with Timer("retrain lightgbm_lib with all data"): model = lgbm.train(params, all_train, num_boost_round=best_iteration) model.save_model("tmp/lgbm.all.model.txt") print("Feature importance by split:") for kv in sorted([(k,v) for k,v in zip(features, model.feature_importance("split"))], key=lambda kv: kv[1], reverse=True): print(kv) print("Feature importance by gain:") for kv in sorted([(k,v) for k,v in zip(features, model.feature_importance("gain"))], key=lambda kv: kv[1], reverse=True): print(kv) return model.predict(test_df[list(features)], num_iteration=best_iteration)