Python Examples of mlflow.set

Source File: test_mlflow.py From interpret-community with MIT License

6 votes

def test_upload_as_model(self, iris, tabular_explainer, tracking_uri):
        mlflow.set_tracking_uri(tracking_uri)
        x_train = iris[DatasetConstants.X_TRAIN]
        x_test = iris[DatasetConstants.X_TEST]
        y_train = iris[DatasetConstants.Y_TRAIN]

        model = create_sklearn_random_forest_classifier(x_train, y_train)

        explainer = tabular_explainer(model, x_train)
        global_explanation = explainer.explain_global(x_test)
        mlflow.set_experiment(TEST_EXPERIMENT)
        with mlflow.start_run() as run:
            log_explanation(TEST_EXPLANATION, global_explanation)
            os.makedirs(TEST_DOWNLOAD, exist_ok=True)
            run_id = run.info.run_id
        downloaded_explanation_mlflow = get_explanation(run_id, TEST_EXPLANATION)
        _assert_explanation_equivalence(global_explanation, downloaded_explanation_mlflow)

Source File: test_tracking.py From mlflow with Apache License 2.0

5 votes

def test_get_artifact_uri_appends_to_uri_path_component_correctly(
        artifact_location, expected_uri_format):
    client = MlflowClient()
    client.create_experiment("get-artifact-uri-test", artifact_location=artifact_location)
    mlflow.set_experiment("get-artifact-uri-test")
    with mlflow.start_run():
        run_id = mlflow.active_run().info.run_id
        for artifact_path in ["path/to/artifact", "/artifact/path", "arty.txt"]:
            artifact_uri = mlflow.get_artifact_uri(artifact_path)
            assert artifact_uri == tracking.artifact_utils.get_artifact_uri(run_id, artifact_path)
            assert artifact_uri == expected_uri_format.format(
                run_id=run_id, path=artifact_path.lstrip("/"))

Source File: test_dnn.py From mlflow-apps with Apache License 2.0

5 votes

def test_dnn():
    old_uri = tracking.get_tracking_uri()
    try:
        with TempDir(chdr=False, remove_on_exit=True) as tmp:
            diamonds = tmp.path("diamonds")
            estimator = tmp.path("estimator")
            artifacts = tmp.path("artifacts")
            os.mkdir(diamonds)
            os.mkdir(estimator)
            os.mkdir(artifacts)
            tracking.set_tracking_uri(artifacts)
            mlflow.set_experiment("test-experiment")
            # Download the diamonds dataset via mlflow run
            run(".", entry_point="main", version=None,
                parameters={"dest-dir": diamonds},
                mode="local", cluster_spec=None, git_username=None, git_password=None,
                use_conda=True, storage_dir=None)

            # Run the main dnn app via mlflow
            submitted_run = run(
                "apps/dnn-regression", entry_point="main", version=None,
                parameters={"model-dir": estimator,
                            "train": os.path.join(diamonds, "train_diamonds.parquet"),
                            "test": os.path.join(diamonds, "test_diamonds.parquet"),
                            "hidden-units": "30,30",
                            "label-col": "price",
                            "steps": 5000,
                            "batch-size": 128},
                mode="local",
                cluster_spec=None, git_username=None, git_password=None, use_conda=True,
                storage_dir=None)

            # Loading the saved model as a pyfunc.
            pyfunc = load_pyfunc("model", submitted_run.run_id)

            df = pandas.read_parquet(os.path.join(diamonds, "test_diamonds.parquet"))

            predict_df = pyfunc.predict(df)
            assert isinstance(predict_df['predictions'][0], numpy.float32)
    finally:
        tracking.set_tracking_uri(old_uri)

Source File: test_mlflow.py From interpret-community with MIT License

5 votes

def test_upload_two_explanations(self, iris, tabular_explainer, tracking_uri):
        mlflow.set_tracking_uri(tracking_uri)
        x_train = iris[DatasetConstants.X_TRAIN]
        x_test = iris[DatasetConstants.X_TEST]
        y_train = iris[DatasetConstants.Y_TRAIN]

        model = create_sklearn_random_forest_classifier(x_train, y_train)

        explainer = tabular_explainer(model, x_train)
        global_explanation = explainer.explain_global(x_test)
        local_explanation = explainer.explain_local(x_test)
        mlflow.set_experiment(TEST_EXPERIMENT)
        with mlflow.start_run() as run:
            log_explanation('global_explanation', global_explanation)
            log_explanation('local_explanation', local_explanation)
            os.makedirs(TEST_DOWNLOAD, exist_ok=True)
            run_id = run.info.run_id
        downloaded_explanation_mlflow = get_explanation(run_id, 'global_explanation')
        _assert_explanation_equivalence(global_explanation, downloaded_explanation_mlflow)

Source File: mlflow.py From optuna with MIT License

5 votes

def __call__(self, study: optuna.study.Study, trial: optuna.trial.FrozenTrial) -> None:

        # This sets the tracking_uri for MLflow.
        if self._tracking_uri is not None:
            mlflow.set_tracking_uri(self._tracking_uri)

        # This sets the experiment of MLflow.
        mlflow.set_experiment(study.study_name)

        with mlflow.start_run(run_name=str(trial.number)):

            # This sets the metric for MLflow.
            trial_value = trial.value if trial.value is not None else float("nan")
            mlflow.log_metric(self._metric_name, trial_value)

            # This sets the params for MLflow.
            mlflow.log_params(trial.params)

            # This sets the tags for MLflow.
            tags = {}  # type: Dict[str, str]
            tags["number"] = str(trial.number)
            tags["datetime_start"] = str(trial.datetime_start)
            tags["datetime_complete"] = str(trial.datetime_complete)

            # Set state and convert it to str and remove the common prefix.
            trial_state = trial.state
            if isinstance(trial_state, TrialState):
                tags["state"] = str(trial_state).split(".")[-1]

            # Set direction and convert it to str and remove the common prefix.
            study_direction = study.direction
            if isinstance(study_direction, StudyDirection):
                tags["direction"] = str(study_direction).split(".")[-1]

            tags.update(trial.user_attrs)
            distributions = {
                (k + "_distribution"): str(v) for (k, v) in trial.distributions.items()
            }
            tags.update(distributions)
            mlflow.set_tags(tags)

Source File: test_fluent.py From mlflow with Apache License 2.0

5 votes

def test_start_run_existing_run_from_environment_with_set_environment(empty_active_run_stack):
    mock_run = mock.Mock()
    mock_run.info.lifecycle_stage = LifecycleStage.ACTIVE

    run_id = uuid.uuid4().hex
    env_patch = mock.patch.dict("os.environ", {_RUN_ID_ENV_VAR: run_id})

    with env_patch, mock.patch.object(MlflowClient, "get_run", return_value=mock_run):
        with pytest.raises(MlflowException):
            set_experiment("test-run")
            start_run()

Source File: test_fluent.py From mlflow with Apache License 2.0

5 votes

def test_get_experiment_id_in_databricks_with_active_experiment_returns_active_experiment_id():
    with TempDir(chdr=True):
        exp_name = "random experiment %d" % random.randint(1, 1e6)
        exp_id = mlflow.create_experiment(exp_name)
        mlflow.set_experiment(exp_name)
        notebook_id = str(int(exp_id) + 73)

    with mock.patch("mlflow.tracking.fluent.is_in_databricks_notebook") as notebook_detection_mock,\
            mock.patch("mlflow.tracking.fluent.get_notebook_id") as notebook_id_mock:
        notebook_detection_mock.return_value = True
        notebook_id_mock.return_value = notebook_id

        assert _get_experiment_id() != notebook_id
        assert _get_experiment_id() == exp_id

Source File: utils.py From FARM with Apache License 2.0

5 votes

def init_experiment(self, experiment_name, run_name=None, nested=True):
        try:
            mlflow.set_tracking_uri(self.tracking_uri)
            mlflow.set_experiment(experiment_name)
            mlflow.start_run(run_name=run_name, nested=nested)
        except ConnectionError:
            raise Exception(
                f"MLFlow cannot connect to the remote server at {self.tracking_uri}.\n"
                f"MLFlow also supports logging runs locally to files. Set the MLFlowLogger "
                f"tracking_uri to an empty string to use that."
            )

Source File: test_tracking.py From mlflow with Apache License 2.0

5 votes

def test_start_run_exp_id_0():
    mlflow.set_experiment("some-experiment")
    # Create a run and verify that the current active experiment is the one we just set
    with mlflow.start_run() as active_run:
        exp_id = active_run.info.experiment_id
        assert exp_id != FileStore.DEFAULT_EXPERIMENT_ID
        assert MlflowClient().get_experiment(exp_id).name == "some-experiment"
    # Set experiment ID to 0 when creating a run, verify that the specified experiment ID is honored
    with mlflow.start_run(experiment_id=0) as active_run:
        assert active_run.info.experiment_id == FileStore.DEFAULT_EXPERIMENT_ID

Source File: test_tracking.py From mlflow with Apache License 2.0

5 votes

def test_set_experiment_with_zero_id(reset_mock):
    reset_mock(MlflowClient, "get_experiment_by_name",
               mock.Mock(return_value=attrdict.AttrDict(
                   experiment_id=0,
                   lifecycle_stage=LifecycleStage.ACTIVE)))
    reset_mock(MlflowClient, "create_experiment", mock.Mock())

    mlflow.set_experiment("my_exp")

    MlflowClient.get_experiment_by_name.assert_called_once()
    MlflowClient.create_experiment.assert_not_called()

Source File: test_tracking.py From mlflow with Apache License 2.0

5 votes

def test_set_experiment_with_deleted_experiment_name():
    name = "dead_exp"
    mlflow.set_experiment(name)
    with start_run() as run:
        exp_id = run.info.experiment_id

    tracking.MlflowClient().delete_experiment(exp_id)

    with pytest.raises(MlflowException):
        mlflow.set_experiment(name)

Source File: test_tracking.py From mlflow with Apache License 2.0

5 votes

def test_set_experiment():
    with pytest.raises(TypeError):
        mlflow.set_experiment()  # pylint: disable=no-value-for-parameter

    with pytest.raises(Exception):
        mlflow.set_experiment(None)

    with pytest.raises(Exception):
        mlflow.set_experiment("")

    name = "random_exp"
    exp_id = mlflow.create_experiment(name)
    mlflow.set_experiment(name)
    with start_run() as run:
        assert run.info.experiment_id == exp_id

    another_name = "another_experiment"
    mlflow.set_experiment(another_name)
    exp_id2 = mlflow.tracking.MlflowClient().get_experiment_by_name(another_name)
    with start_run() as another_run:
        assert another_run.info.experiment_id == exp_id2.experiment_id

Source File: experiment.py From LaSO with BSD 3-Clause "New" or "Revised" License

5 votes

def start(self):
        """Start the whole thing"""

        self._setup_logging()

        if self.generate_config:
            self.write_config()

        #
        # Setup mlflow
        #
        import mlflow
        mlflow.set_tracking_uri(self.mlflow_server)
        experiment_id = mlflow.set_experiment(self.name)

        #
        # Run the script under mlflow
        #
        with mlflow.start_run(experiment_id=experiment_id):
            #
            # Log the run parametres to mlflow.
            #
            mlflow.log_param("results_path", self.results_path)

            cls = self.__class__
            for k, trait in sorted(cls.class_own_traits(config=True).items()):
                mlflow.log_param(trait.name, repr(trait.get(self)))

            self.run()

Source File: evaluate.py From orbyter-cookiecutter with MIT License

4 votes

def log_experiment(
    params={},
    metrics={},
    artifacts={},
    experiment_name="my_experiment",
    mlflow_tracking_uri="./experiments",
    mlflow_artifact_location=None,
):
    """
    Evaluate the model and log it with mlflow

    Args:
        params (dict): dictionary of parameters to log
        metrics (dict): dictionary of metrics to log
        artifacts (dict): dictionary of artifacts (path) to log
        experiment_name (str): experiment name
        mlflow_tracking_uri (str): path or sql url for mlflow logging
        mlflow_artifact_location (str): path or s3bucket url for artifact
            logging. If none, it will default to a standard.

    Returns:
        None
    """
    # Try to create an experiment if it doesn't exist
    try:
        exp_0 = mlflow.create_experiment(
            experiment_name, artifact_location=mlflow_artifact_location
        )
        # set uri
        mlflow.set_tracking_uri(mlflow_tracking_uri)
        logger.info(f"Created new experiment id: {exp_0}")
    except Exception as E:
        logger.info(f"{E}. Writing to same URI/artifact store")
    # Always set the experiment
    mlflow.set_experiment(experiment_name)
    logger.info(f"Running experiment {experiment_name}")
    with mlflow.start_run():
        # param logging
        for key, val in params.items():
            logger.info(f"Logging param {key}")
            mlflow.log_param(key, val)
        # metric logging
        for key, val in metrics.items():
            logger.info(f"Logging metric {key}")
            mlflow.log_metric(key, val)
        # artifact logging
        for key, val in artifacts.items():
            logger.info(f"Logging artifact {key}")
            mlflow.log_artifact(val)

Source File: test_gbt.py From mlflow-apps with Apache License 2.0

4 votes

def test_gbt():
    old_uri = tracking.get_tracking_uri()
    with TempDir(chdr=False, remove_on_exit=True) as tmp:
        try:
            diamonds = tmp.path("diamonds")
            artifacts = tmp.path("artifacts")
            os.mkdir(diamonds)
            os.mkdir(artifacts)
            tracking.set_tracking_uri(artifacts)
            mlflow.set_experiment("test-experiment")
            # Download the diamonds dataset via mlflow run
            run(".", entry_point="main", version=None,
                parameters={"dest-dir": diamonds},
                mode="local", cluster_spec=None, git_username=None, git_password=None,
                use_conda=True, storage_dir=None)

            # Run the main gbt app via mlflow
            submitted_run = run(
                "apps/gbt-regression", entry_point="main", version=None,
                parameters={"train": os.path.join(diamonds, "train_diamonds.parquet"),
                            "test": os.path.join(diamonds, "test_diamonds.parquet"),
                            "n-trees": 10,
                            "m-depth": 3,
                            "learning-rate": .1,
                            "loss": "rmse",
                            "label-col": "price"},
                mode="local",
                cluster_spec=None, git_username=None, git_password=None, use_conda=True,
                storage_dir=None)

            pyfunc = load_pyfunc("model", run_id=submitted_run.run_id)
            df = pandas.read_parquet(os.path.join(diamonds, "test_diamonds.parquet"))

            # Removing the price column from the DataFrame so we can use the features to predict
            df = df.drop(columns="price")

            # Predicting from the saved pyfunc
            predict = pyfunc.predict(df)

            # Make sure the data is of the right type
            assert isinstance(predict[0], numpy.float32)
        finally:
            tracking.set_tracking_uri(old_uri)

Source File: test_linear.py From mlflow-apps with Apache License 2.0

4 votes

def test_linear():
    old_uri = tracking.get_tracking_uri()
    with TempDir(chdr=False, remove_on_exit=True) as tmp:
        try:
            diamonds = tmp.path("diamonds")
            root_tracking_dir = tmp.path("root_tracking_dir")
            os.mkdir(diamonds)
            os.mkdir(root_tracking_dir)
            tracking.set_tracking_uri(root_tracking_dir)
            # Download the diamonds dataset via mlflow run
            mlflow.set_experiment("test-experiment")
            run(".", entry_point="main", version=None,
                parameters={"dest-dir": diamonds},
                mode="local", cluster_spec=None, git_username=None, git_password=None,
                use_conda=True, storage_dir=None)

            # Run the main linear app via mlflow
            submitted_run = run(
                "apps/linear-regression", entry_point="main", version=None,
                parameters={"train": os.path.join(diamonds, "train_diamonds.parquet"),
                            "test": os.path.join(diamonds, "test_diamonds.parquet"),
                            "alpha": .001,
                            "l1-ratio": .5,
                            "label-col": "price"},
                mode="local",
                cluster_spec=None, git_username=None, git_password=None, use_conda=True,
                storage_dir=None)

            pyfunc = load_pyfunc(path="model", run_id=submitted_run.run_id)

            df = pandas.read_parquet(os.path.join(diamonds, "test_diamonds.parquet"))

            # Removing the price column from the DataFrame so we can use the features to predict
            df = df.drop(columns="price")

            # Predicting from the saved pyfunc
            predict = pyfunc.predict(df)

            # Make sure the data is of the right type
            assert isinstance(predict[0], numpy.float64)
        finally:
            tracking.set_tracking_uri(old_uri)

Source File: mlflow_utils.py From nucleus7 with Mozilla Public License 2.0

4 votes

def create_new_or_continue_experiment(project_dir: str):
    """
    Creates a new experiment or continues already existing one.

    Experiment name is the name of the project_dir

    Parameters
    ----------
    project_dir
        project directory
    """
    mlflow.set_tracking_uri(None)
    experiment_name = project_utils.get_project_name_from_directory(project_dir)
    if "MLFLOW_TRACKING_URI" not in os.environ:
        tracking_uri = os.path.join(os.path.split(project_dir)[0], "mlruns")
        tracking_uri = os.path.realpath(tracking_uri)
        mlflow.set_tracking_uri(tracking_uri)
    mlflow.set_experiment(experiment_name)

Python mlflow.set_experiment() Examples