Python Examples of mlflow.log

Source File: runner.py From ai-platform with MIT License

6 votes

def predict(self, df_test):
        """
         Makes prediction for the next 7 days electricity consumption.
        """
        # load model from file
        loaded_model = mlflow.sklearn.load_model("model")
        # make predictions for test data
        xts, yts = df_test.drop(['Value'], axis=1), df_test['Value'].values
        p = loaded_model.predict(xgb.DMatrix(xts))
        prediction = pd.DataFrame({'Prediction': p})

        mape, rmse, mae, r2 = ForecastRunner.evaluation_metrics(yts, p)
        print('MAPE: {}'.format(mape))
        print('RMSE: {}'.format(rmse))
        print('R2: {}'.format(r2))
        print('MAE: {}'.format(mae))
        mlflow.log_metric("MAPE", mape)
        mlflow.log_metric("RMSE", rmse)
        mlflow.log_metric("R2", r2)
        mlflow.log_metric("MAE", mae)
        ForecastRunner.plot_result(yts, p)
        self.save_output(df_test, prediction)

Source File: test_tracking.py From mlflow with Apache License 2.0

6 votes

def test_log_metrics_uses_millisecond_timestamp_resolution_client():
    with start_run() as active_run, mock.patch("time.time") as time_mock:
        time_mock.side_effect = lambda: 123
        mlflow_client = tracking.MlflowClient()
        run_id = active_run.info.run_id

        mlflow_client.log_metric(run_id=run_id, key="name_1", value=25)
        mlflow_client.log_metric(run_id=run_id, key="name_2", value=-3)
        mlflow_client.log_metric(run_id=run_id, key="name_1", value=30)
        mlflow_client.log_metric(run_id=run_id, key="name_1", value=40)

    metric_history_name1 = mlflow_client.get_metric_history(run_id, "name_1")
    assert set([(m.value, m.timestamp) for m in metric_history_name1]) == set([
        (25, 123 * 1000),
        (30, 123 * 1000),
        (40, 123 * 1000),
    ])
    metric_history_name2 = mlflow_client.get_metric_history(run_id, "name_2")
    assert set([(m.value, m.timestamp) for m in metric_history_name2]) == set([
        (-3, 123 * 1000),
    ])

Source File: train.py From mlflow with Apache License 2.0

6 votes

def on_epoch_end(self, epoch, logs=None):
        """
        Log Keras metrics with MLflow. Update the best model if the model improved on the validation
        data.
        """
        if not logs:
            return
        for name, value in logs.items():
            if name.startswith("val_"):
                name = "valid_" + name[4:]
            else:
                name = "train_" + name
            mlflow.log_metric(name, value)
        val_loss = logs["val_loss"]
        if val_loss < self._best_val_loss:
            # Save the "best" weights
            self._best_val_loss = val_loss
            self._best_weights = [x.copy() for x in self._model.get_weights()]

Source File: pipeline_train.py From models with Apache License 2.0

6 votes

def on_epoch_end(self, epoch, logs=None):
        """
        Log Keras metrics with MLflow. Update the best model if the model improved on the validation
        data.
        """
        if not logs:
            return
        for name, value in logs.items():
            if name.startswith("val_"):
                name = "valid_" + name[4:]
            else:
                name = "train_" + name
            mlflow.log_metric(name, value)
        val_loss = logs["val_loss"]
        if val_loss < self._best_val_loss:
            # Save the "best" weights
            self._best_val_loss = val_loss
            self._best_weights = [x.copy() for x in self._model.get_weights()]

Source File: pipeline_train.py From models with Apache License 2.0

6 votes

def on_epoch_end(self, epoch, logs=None):
        """
        Log Keras metrics with MLflow. Update the best model if the model improved on the validation
        data.
        """
        if not logs:
            return
        for name, value in logs.items():
            if name.startswith("val_"):
                name = "valid_" + name[4:]
            else:
                name = "train_" + name
            mlflow.log_metric(name, value)
        val_loss = logs["val_loss"]
        if val_loss < self._best_val_loss:
            # Save the "best" weights
            self._best_val_loss = val_loss
            self._best_weights = [x.copy() for x in self._model.get_weights()]

Source File: test_run.py From nyaggle with MIT License

6 votes

def test_ignore_errors_in_mlflow_params(tmpdir_name):
    mlflow.start_run()
    mlflow.log_param('features', 'ABC')
    mlflow.log_metric('Overall', -99)

    params = {
        'objective': 'binary',
        'max_depth': 8
    }
    X, y = make_classification_df()

    result = run_experiment(params, X, y, with_mlflow=True, logging_directory=tmpdir_name, feature_list=[])

    client = mlflow.tracking.MlflowClient()
    data = client.get_run(mlflow.active_run().info.run_id).data

    assert data.metrics['Overall'] == result.metrics[-1]
    assert data.params['features'] == 'ABC'  # params cannot be overwritten

    mlflow.end_run()

Source File: experiment.py From nyaggle with MIT License

6 votes

def log_metric(self, name: str, score: float):
        """
        Log a metric under the logging directory.

        Args:
            name:
                Metric name.
            score:
                Metric value.
        """
        name = _sanitize(name)
        score = _sanitize(score)
        self.metrics[name] = score

        if self.with_mlflow:
            import mlflow
            from mlflow.exceptions import MlflowException

            try:
                mlflow.log_metric(name, score)
            except MlflowException as e:
                warnings.warn('Error in logging metric {} to mlflow. Skipped. {}'.format(name, e))

Source File: experiment.py From nyaggle with MIT License

5 votes

def add_leaderboard_score(logging_directory: str, score: float):
    """
    Record leaderboard score to the existing experiment directory.

    Args:
        logging_directory:
            The directory to be added
        score:
            Leaderboard score
    """
    with Experiment.continue_from(logging_directory) as e:
        e.log_metric('LB', score)

Source File: mlflow.py From optuna with MIT License

5 votes

def __call__(self, study: optuna.study.Study, trial: optuna.trial.FrozenTrial) -> None:

        # This sets the tracking_uri for MLflow.
        if self._tracking_uri is not None:
            mlflow.set_tracking_uri(self._tracking_uri)

        # This sets the experiment of MLflow.
        mlflow.set_experiment(study.study_name)

        with mlflow.start_run(run_name=str(trial.number)):

            # This sets the metric for MLflow.
            trial_value = trial.value if trial.value is not None else float("nan")
            mlflow.log_metric(self._metric_name, trial_value)

            # This sets the params for MLflow.
            mlflow.log_params(trial.params)

            # This sets the tags for MLflow.
            tags = {}  # type: Dict[str, str]
            tags["number"] = str(trial.number)
            tags["datetime_start"] = str(trial.datetime_start)
            tags["datetime_complete"] = str(trial.datetime_complete)

            # Set state and convert it to str and remove the common prefix.
            trial_state = trial.state
            if isinstance(trial_state, TrialState):
                tags["state"] = str(trial_state).split(".")[-1]

            # Set direction and convert it to str and remove the common prefix.
            study_direction = study.direction
            if isinstance(study_direction, StudyDirection):
                tags["direction"] = str(study_direction).split(".")[-1]

            tags.update(trial.user_attrs)
            distributions = {
                (k + "_distribution"): str(v) for (k, v) in trial.distributions.items()
            }
            tags.update(distributions)
            mlflow.set_tags(tags)

Source File: test_tracking.py From mlflow with Apache License 2.0

5 votes

def test_search_runs_multiple_experiments():
    experiment_ids = [mlflow.create_experiment("exp__{}".format(exp_id)) for exp_id in range(1, 4)]
    for eid in experiment_ids:
        with mlflow.start_run(experiment_id=eid):
            mlflow.log_metric("m0", 1)
            mlflow.log_metric("m_{}".format(eid), 2)

    assert len(MlflowClient().search_runs(experiment_ids, "metrics.m0 > 0", ViewType.ALL)) == 3

    assert len(MlflowClient().search_runs(experiment_ids, "metrics.m_1 > 0", ViewType.ALL)) == 1
    assert len(MlflowClient().search_runs(experiment_ids, "metrics.m_2 = 2", ViewType.ALL)) == 1
    assert len(MlflowClient().search_runs(experiment_ids, "metrics.m_3 < 4", ViewType.ALL)) == 1

Source File: test_tracking.py From mlflow with Apache License 2.0

5 votes

def test_log_metric_validation():
    with start_run() as active_run:
        run_id = active_run.info.run_id
        with pytest.raises(MlflowException) as e:
            mlflow.log_metric("name_1", "apple")
    assert e.value.error_code == ErrorCode.Name(INVALID_PARAMETER_VALUE)
    finished_run = tracking.MlflowClient().get_run(run_id)
    assert len(finished_run.data.metrics) == 0

Source File: mlflow.py From tf-yarn with Apache License 2.0

5 votes

def log_metric(key: str, value: float, step: int = None):
    mlflow.log_metric(key, value, step)

Source File: test_tracking.py From mlflow with Apache License 2.0

5 votes

def test_log_metric():
    with start_run() as active_run, mock.patch("time.time") as time_mock:
        time_mock.side_effect = [123 for _ in range(100)]
        run_id = active_run.info.run_id
        mlflow.log_metric("name_1", 25)
        mlflow.log_metric("name_2", -3)
        mlflow.log_metric("name_1", 30, 5)
        mlflow.log_metric("name_1", 40, -2)
        mlflow.log_metric("nested/nested/name", 40)
    finished_run = tracking.MlflowClient().get_run(run_id)
    # Validate metrics
    assert len(finished_run.data.metrics) == 3
    expected_pairs = {"name_1": 30, "name_2": -3, "nested/nested/name": 40}
    for key, value in finished_run.data.metrics.items():
        assert expected_pairs[key] == value
    client = tracking.MlflowClient()
    metric_history_name1 = client.get_metric_history(run_id, "name_1")
    assert set([(m.value, m.timestamp, m.step) for m in metric_history_name1]) == set([
        (25, 123 * 1000, 0),
        (30, 123 * 1000, 5),
        (40, 123 * 1000, -2),
    ])
    metric_history_name2 = client.get_metric_history(run_id, "name_2")
    assert set([(m.value, m.timestamp, m.step) for m in metric_history_name2]) == set([
        (-3, 123 * 1000, 0),
    ])

Source File: test_tracking.py From mlflow with Apache License 2.0

5 votes

def test_metric_timestamp():
    with mlflow.start_run() as active_run:
        mlflow.log_metric("name_1", 25)
        mlflow.log_metric("name_1", 30)
        run_id = active_run.info.run_uuid
    # Check that metric timestamps are between run start and finish
    client = mlflow.tracking.MlflowClient()
    history = client.get_metric_history(run_id, "name_1")
    finished_run = client.get_run(run_id)
    assert len(history) == 2
    assert all([
        m.timestamp >= finished_run.info.start_time and m.timestamp <= finished_run.info.end_time
        for m in history
    ])

Source File: generate_ui_test_data.py From mlflow with Apache License 2.0

5 votes

def log_metrics(metrics):
    for k, values in metrics.items():
        for v in values:
            mlflow.log_metric(k, v)

Source File: docker_tracking_test.py From mlflow with Apache License 2.0

5 votes

def call_tracking_apis():
    mlflow.log_metric("some_key", 3)
    with tempfile.NamedTemporaryFile("w") as temp_file:
        temp_file.write("Temporary content.")
        mlflow.log_artifact(temp_file.name)

Source File: check_conda_env.py From mlflow with Apache License 2.0

5 votes

def main(expected_env_name):
    actual_conda_env = os.environ.get("CONDA_DEFAULT_ENV", None)
    assert actual_conda_env == expected_env_name,\
        "Script expected to be run from conda env %s but was actually run from env" \
        " %s" % (expected_env_name, actual_conda_env)
    mlflow.log_metric("CPU usage", psutil.cpu_percent())

Source File: tracking_test.py From mlflow with Apache License 2.0

5 votes

def call_tracking_apis():
    mlflow.log_metric("some_key", 3)

Source File: als.py From mlflow with Apache License 2.0

5 votes

def train_als(ratings_data, split_prop, max_iter, reg_param, rank, cold_start_strategy):
    seed = 42

    spark = pyspark.sql.SparkSession.builder.getOrCreate()

    ratings_df = spark.read.parquet(ratings_data)
    (training_df, test_df) = ratings_df.randomSplit([split_prop, 1 - split_prop], seed=seed)
    training_df.cache()
    test_df.cache()

    mlflow.log_metric("training_nrows", training_df.count())
    mlflow.log_metric("test_nrows", test_df.count())

    print('Training: {0}, test: {1}'.format(training_df.count(), test_df.count()))

    als = (ALS()
           .setUserCol("userId")
           .setItemCol("movieId")
           .setRatingCol("rating")
           .setPredictionCol("predictions")
           .setMaxIter(max_iter)
           .setSeed(seed)
           .setRegParam(reg_param)
           .setColdStartStrategy(cold_start_strategy)
           .setRank(rank))

    als_model = Pipeline(stages=[als]).fit(training_df)

    reg_eval = RegressionEvaluator(predictionCol="predictions", labelCol="rating", metricName="mse")

    predicted_test_dF = als_model.transform(test_df)

    test_mse = reg_eval.evaluate(predicted_test_dF)
    train_mse = reg_eval.evaluate(als_model.transform(training_df))

    print('The model had a MSE on the test set of {0}'.format(test_mse))
    print('The model had a MSE on the (train) set of {0}'.format(train_mse))
    mlflow.log_metric("test_mse", test_mse)
    mlflow.log_metric("train_mse", train_mse)
    mlflow.spark.log_model(als_model, "als-model")

Source File: train.py From mlflow with Apache License 2.0

5 votes

def eval_and_log_metrics(prefix, actual, pred, epoch):
    rmse = np.sqrt(mean_squared_error(actual, pred))
    mlflow.log_metric("{}_rmse".format(prefix), rmse, step=epoch)
    return rmse

Source File: random_forest.py From mlflow with Apache License 2.0

5 votes

def train_random_forest(ntrees):
    with mlflow.start_run():
        rf = H2ORandomForestEstimator(ntrees=ntrees)
        train_cols = [n for n in wine.col_names if n != "quality"]
        rf.train(train_cols, "quality", training_frame=train, validation_frame=test)

        mlflow.log_param("ntrees", ntrees)

        mlflow.log_metric("rmse", rf.rmse())
        mlflow.log_metric("r2", rf.r2())
        mlflow.log_metric("mae", rf.mae())

        mlflow.h2o.log_model(rf, "model")

Source File: train.py From mlflow with Apache License 2.0

5 votes

def on_train_end(self, *args, **kwargs):
        """
        Log the best model with MLflow and evaluate it on the train and validation data so that the
        metrics stored with MLflow reflect the logged model.
        """
        self._model.set_weights(self._best_weights)
        x, y = self._train
        train_res = self._model.evaluate(x=x, y=y)
        for name, value in zip(self._model.metrics_names, train_res):
            mlflow.log_metric("train_{}".format(name), value)
        x, y = self._valid
        valid_res = self._model.evaluate(x=x, y=y)
        for name, value in zip(self._model.metrics_names, valid_res):
            mlflow.log_metric("valid_{}".format(name), value)
        log_model(keras_model=self._model, **self._pyfunc_params)

Source File: main.py From ai-platform with MIT License

5 votes

def log_scalar(name, value, step):
    """Log a scalar value to both MLflow and TensorBoard"""
    writer.add_scalar(name, value, step)
    mlflow.log_metric(name, value)

Source File: mnist_tensorboard_artifact.py From mlflow with Apache License 2.0

5 votes

def log_scalar(name, value, step):
    """Log a scalar value to both MLflow and TensorBoard"""
    writer.add_scalar(name, value, step)
    mlflow.log_metric(name, value)

Source File: pipeline_train.py From models with Apache License 2.0

5 votes

def on_train_end(self, *args, **kwargs):
        """
        Log the best model with MLflow and evaluate it on the train and validation data so that the
        metrics stored with MLflow reflect the logged model.
        """
        self._model.set_weights(self._best_weights)
        x, y = self._train
        train_res = self._model.evaluate(x=x, y=y)
        for name, value in zip(self._model.metrics_names, train_res):
            mlflow.log_metric("train_{}".format(name), value)
        x, y = self._valid
        valid_res = self._model.evaluate(x=x, y=y)
        for name, value in zip(self._model.metrics_names, valid_res):
            mlflow.log_metric("valid_{}".format(name), value)

Source File: loggers.py From OpenKiwi with GNU Affero General Public License v3.0

5 votes

def log_metric(key, value):
        pass

Source File: pipeline_train.py From models with Apache License 2.0

5 votes

def on_train_end(self, *args, **kwargs):
        """
        Log the best model with MLflow and evaluate it on the train and validation data so that the
        metrics stored with MLflow reflect the logged model.
        """
        self._model.set_weights(self._best_weights)
        x, y = self._train
        train_res = self._model.evaluate(x=x, y=y)
        for name, value in zip(self._model.metrics_names, train_res):
            mlflow.log_metric("train_{}".format(name), value)
        x, y = self._valid
        valid_res = self._model.evaluate(x=x, y=y)
        for name, value in zip(self._model.metrics_names, valid_res):
            mlflow.log_metric("valid_{}".format(name), value)
        log_model(keras_model=self._model, **self._pyfunc_params)

Source File: loggers.py From OpenKiwi with GNU Affero General Public License v3.0

5 votes

def log_metric(key, value):
        mlflow.log_metric(key, value)

Source File: task.py From ml-on-gcp with Apache License 2.0

5 votes

def _mlflow_log_metrics(metrics, metric_name):
    """Record metric value during each epoch using the step parameter in
    mlflow.log_metric.

    :param metrics:
    :param metric_name:
    :return:
    """
    for epoch, metric in enumerate(metrics[metric_name], 1): mlflow.log_metric(
        metric_name, metric,
        step=epoch)

Source File: mlflow_logger.py From LaSO with BSD 3-Clause "New" or "Revised" License

5 votes

def _update(
        self,
        engine,                 # type: Engine
        attach_id,              # type: int
        prefix,                 # type: str
        update_period,          # type: int
        metric_names=None,      # type: List
        output_transform=None,  # type: Callable
        param_history=False     # type: bool
    ):
        step = self.metrics_step[attach_id]
        self.metrics_step[attach_id] += 1
        if step % update_period != 0:
            return

        #
        # Get all the metrics
        #
        metrics = []
        if metric_names is not None:
            if not all(metric in engine.state.metrics for metric in metric_names):
                raise KeyError("metrics not found in engine.state.metrics")

            metrics.extend([(name, engine.state.metrics[name]) for name in metric_names])

        if output_transform is not None:
            output_dict = output_transform(engine.state.output)

            if not isinstance(output_dict, dict):
                output_dict = {"output": output_dict}

            metrics.extend([(name, value) for name, value in output_dict.items()])

        if param_history:
            metrics.extend([(name, value[-1][0]) for name, value in engine.state.param_history.items()])

        if not metrics:
            return

        for metric_name, new_value in metrics:
            mlflow.log_metric(prefix + metric_name, new_value)

Python mlflow.log_metric() Examples