Python Examples of mlflow.log

Source File: experiment.py From nyaggle with MIT License

6 votes

def stop(self):
        """
        Stop current experiment.
        """
        self._save_dict(self.metrics, 'metrics.json')
        self._save_dict(self.params, 'params.json')

        if not self.is_custom:
            for h in self.logger.handlers:
                h.close()

        if self.with_mlflow:
            import mlflow
            from mlflow.exceptions import MlflowException

            try:
                mlflow.log_artifact(self.log_path)
                mlflow.log_artifact(os.path.join(self.logging_directory, 'metrics.json'))
                mlflow.log_artifact(os.path.join(self.logging_directory, 'params.json'))
            except MlflowException as e:
                warnings.warn('Error in saving artifacts to mlflow. The result may not be saved.: {}'.format(e))
            if not self.inherit_existing_run:
                mlflow.end_run()

Source File: experiment.py From nyaggle with MIT License

6 votes

def log_numpy(self, name: str, array: np.ndarray):
        """
        Log a numpy ndarray under the logging directory.

        Args:
            name:
                Name of the file. A .npy extension will be appended to the file name if it does not already have one.
            array:
                Array data to be saved.
        """
        path = os.path.join(self.logging_directory, name)
        np.save(path, array)

        if self.with_mlflow:
            import mlflow
            mlflow.log_artifact(path + '.npy')

Source File: test_cli.py From mlflow with Apache License 2.0

6 votes

def test_download_artifacts_from_uri():
    with mlflow.start_run() as run:
        with TempDir() as tmp:
            local_path = tmp.path("test")
            with open(local_path, "w") as f:
                f.write("test")
            mlflow.log_artifact(local_path, "test")
    command = ["mlflow", "artifacts", "download", "-u"]
    # Test with run uri
    run_uri = "runs:/{run_id}/test".format(run_id=run.info.run_id)
    actual_uri = posixpath.join(run.info.artifact_uri, "test")
    for uri in (run_uri, actual_uri):
        p = Popen(command + [uri], stdout=PIPE,
                  stderr=STDOUT)
        output = p.stdout.readlines()
        downloaded_file_path = output[-1].strip()
        downloaded_file = os.listdir(downloaded_file_path)[0]
        with open(os.path.join(downloaded_file_path, downloaded_file), "r") as f:
            assert f.read() == "test"

Source File: experiment.py From nyaggle with MIT License

6 votes

def log_artifact(self, src_file_path: str):
        """
        Make a copy of the file under the logging directory.

        Args:
            src_file_path:
                Path of the file. If path is not a child of the logging directory, the file will be copied.
                If ``with_mlflow`` is True, ``mlflow.log_artifact`` will be called (then another copy will be made).
        """
        logging_path = os.path.abspath(self.logging_directory)
        src_file_path = os.path.abspath(src_file_path)

        if os.path.commonpath([logging_path]) != os.path.commonpath([logging_path, src_file_path]):
            src_file = os.path.basename(src_file_path)
            shutil.copy(src_file, self.logging_directory)

        if self.with_mlflow:
            import mlflow
            mlflow.log_artifact(src_file_path)

Source File: tensorflow.py From mlflow with Apache License 2.0

6 votes

def on_train_begin(self, logs=None):  # pylint: disable=unused-argument
        config = self.model.optimizer.get_config()
        for attribute in config:
            try_mlflow_log(mlflow.log_param, "opt_" + attribute, config[attribute])

        sum_list = []
        self.model.summary(print_fn=sum_list.append)
        summary = '\n'.join(sum_list)
        tempdir = tempfile.mkdtemp()
        try:
            summary_file = os.path.join(tempdir, "model_summary.txt")
            with open(summary_file, 'w') as f:
                f.write(summary)
            try_mlflow_log(mlflow.log_artifact, local_path=summary_file)
        finally:
            shutil.rmtree(tempdir)

Source File: load_raw_data.py From mlflow with Apache License 2.0

6 votes

def load_raw_data(url):
    with mlflow.start_run() as mlrun:
        local_dir = tempfile.mkdtemp()
        local_filename = os.path.join(local_dir, "ml-20m.zip")
        print("Downloading %s to %s" % (url, local_filename))
        r = requests.get(url, stream=True)
        with open(local_filename, 'wb') as f:
            for chunk in r.iter_content(chunk_size=1024):
                if chunk:  # filter out keep-alive new chunks
                    f.write(chunk)

        extracted_dir = os.path.join(local_dir, 'ml-20m')
        print("Extracting %s into %s" % (local_filename, extracted_dir))
        with zipfile.ZipFile(local_filename, 'r') as zip_ref:
            zip_ref.extractall(local_dir)

        ratings_file = os.path.join(extracted_dir, 'ratings.csv')

        print("Uploading ratings: %s" % ratings_file)
        mlflow.log_artifact(ratings_file, "ratings-csv-dir")

Source File: test_artifact_utils.py From mlflow with Apache License 2.0

6 votes

def test_download_artifact_from_absolute_uri_persists_data_to_specified_output_directory(tmpdir):
    artifact_file_name = "artifact.txt"
    artifact_text = "Sample artifact text"
    local_artifact_path = tmpdir.join(artifact_file_name).strpath
    with open(local_artifact_path, "w") as out:
        out.write(artifact_text)

    logged_artifact_subdir = "logged_artifact"
    with mlflow.start_run():
        mlflow.log_artifact(local_path=local_artifact_path, artifact_path=logged_artifact_subdir)
        artifact_uri = mlflow.get_artifact_uri(artifact_path=logged_artifact_subdir)

    artifact_output_path = tmpdir.join("artifact_output").strpath
    os.makedirs(artifact_output_path)
    _download_artifact_from_uri(artifact_uri=artifact_uri, output_path=artifact_output_path)
    assert logged_artifact_subdir in os.listdir(artifact_output_path)
    assert artifact_file_name in os.listdir(
        os.path.join(artifact_output_path, logged_artifact_subdir))
    with open(os.path.join(
            artifact_output_path, logged_artifact_subdir, artifact_file_name), "r") as f:
        assert f.read() == artifact_text

Source File: exp_tracking.py From ignite with BSD 3-Clause "New" or "Revised" License

5 votes

def _mlflow_log_artifact(fp):
    mlflow.log_artifact(fp)

Source File: mlflow.py From tf-yarn with Apache License 2.0

5 votes

def log_artifact(local_path: str, artifact_path: str = None):
    mlflow.log_artifact(local_path, artifact_path)

Source File: exp_tracking.py From ignite with BSD 3-Clause "New" or "Revised" License

5 votes

def _plx_log_artifact(fp):
    from polyaxon_client.tracking import Experiment

    plx_exp = Experiment()
    plx_exp.log_artifact(fp)

Source File: exp_tracking.py From ignite with BSD 3-Clause "New" or "Revised" License

5 votes

def _mlflow_log_artifact(fp):
    mlflow.log_artifact(fp)

Source File: exp_tracking.py From ignite with BSD 3-Clause "New" or "Revised" License

5 votes

def _plx_log_artifact(fp):
    from polyaxon_client.tracking import Experiment

    plx_exp = Experiment()
    plx_exp.log_artifact(fp)

Source File: test_tracking.py From mlflow with Apache License 2.0

5 votes

def test_log_artifact():
    artifact_src_dir = tempfile.mkdtemp()
    # Create artifacts
    _, path0 = tempfile.mkstemp(dir=artifact_src_dir)
    _, path1 = tempfile.mkstemp(dir=artifact_src_dir)
    for i, path in enumerate([path0, path1]):
        with open(path, "w") as handle:
            handle.write("%s" % str(i))
    # Log an artifact, verify it exists in the directory returned by get_artifact_uri
    # after the run finishes
    artifact_parent_dirs = ["some_parent_dir", None]
    for parent_dir in artifact_parent_dirs:
        with start_run():
            artifact_uri = mlflow.get_artifact_uri()
            run_artifact_dir = local_file_uri_to_path(artifact_uri)
            mlflow.log_artifact(path0, parent_dir)
        expected_dir = os.path.join(run_artifact_dir, parent_dir) \
            if parent_dir is not None else run_artifact_dir
        assert os.listdir(expected_dir) == [os.path.basename(path0)]
        logged_artifact_path = os.path.join(expected_dir, path0)
        assert filecmp.cmp(logged_artifact_path, path0, shallow=False)
    # Log multiple artifacts, verify they exist in the directory returned by get_artifact_uri
    for parent_dir in artifact_parent_dirs:
        with start_run():
            artifact_uri = mlflow.get_artifact_uri()
            run_artifact_dir = local_file_uri_to_path(artifact_uri)

            mlflow.log_artifacts(artifact_src_dir, parent_dir)
        # Check that the logged artifacts match
        expected_artifact_output_dir = os.path.join(run_artifact_dir, parent_dir) \
            if parent_dir is not None else run_artifact_dir
        dir_comparison = filecmp.dircmp(artifact_src_dir, expected_artifact_output_dir)
        assert len(dir_comparison.left_only) == 0
        assert len(dir_comparison.right_only) == 0
        assert len(dir_comparison.diff_files) == 0
        assert len(dir_comparison.funny_files) == 0

Source File: test_tracking.py From mlflow with Apache License 2.0

5 votes

def test_log_artifact_with_dirs(tmpdir):
    # Test log artifact with a directory
    art_dir = tmpdir.mkdir("parent")
    file0 = art_dir.join("file0")
    file0.write("something")
    file1 = art_dir.join("file1")
    file1.write("something")
    sub_dir = art_dir.mkdir("child")
    with start_run():
        artifact_uri = mlflow.get_artifact_uri()
        run_artifact_dir = local_file_uri_to_path(artifact_uri)
        mlflow.log_artifact(str(art_dir))
        base = os.path.basename(str(art_dir))
        assert os.listdir(run_artifact_dir) == [base]
        assert set(os.listdir(os.path.join(run_artifact_dir, base))) == \
            {'child', 'file0', 'file1'}
        with open(os.path.join(run_artifact_dir, base, "file0")) as f:
            assert f.read() == "something"
    # Test log artifact with directory and specified parent folder
    art_dir = tmpdir.mkdir("dir")
    with start_run():
        artifact_uri = mlflow.get_artifact_uri()
        run_artifact_dir = local_file_uri_to_path(artifact_uri)
        mlflow.log_artifact(str(art_dir), "some_parent")
        assert os.listdir(run_artifact_dir) == [os.path.basename("some_parent")]
        assert os.listdir(os.path.join(run_artifact_dir, "some_parent")) == \
            [os.path.basename(str(art_dir))]
    sub_dir = art_dir.mkdir("another_dir")
    with start_run():
        artifact_uri = mlflow.get_artifact_uri()
        run_artifact_dir = local_file_uri_to_path(artifact_uri)
        mlflow.log_artifact(str(art_dir), "parent/and_child")
        assert os.listdir(os.path.join(run_artifact_dir, "parent", "and_child")) == \
            [os.path.basename(str(art_dir))]
        assert os.listdir(os.path.join(run_artifact_dir,
                                       "parent", "and_child",
                                       os.path.basename(str(art_dir)))) == \
            [os.path.basename(str(sub_dir))]

Source File: test_projects.py From mlflow with Apache License 2.0

5 votes

def test_run_with_artifact_path(tmpdir):
    artifact_file = tmpdir.join("model.pkl")
    artifact_file.write("Hello world")
    with mlflow.start_run() as run:
        mlflow.log_artifact(artifact_file)
        submitted_run = mlflow.projects.run(
            TEST_PROJECT_DIR, entry_point="test_artifact_path",
            parameters={"model": "runs:/%s/model.pkl" % run.info.run_id},
            use_conda=False, experiment_id=FileStore.DEFAULT_EXPERIMENT_ID)
        validate_exit_status(submitted_run.get_status(), RunStatus.FINISHED)

Source File: docker_tracking_test.py From mlflow with Apache License 2.0

5 votes

def call_tracking_apis():
    mlflow.log_metric("some_key", 3)
    with tempfile.NamedTemporaryFile("w") as temp_file:
        temp_file.write("Temporary content.")
        mlflow.log_artifact(temp_file.name)

Source File: tensorflow.py From mlflow with Apache License 2.0

5 votes

def on_train_begin(self, logs=None):  # pylint: disable=unused-argument
        opt = self.model.optimizer
        if hasattr(opt, '_name'):
            try_mlflow_log(mlflow.log_param, 'optimizer_name', opt._name)
        # Elif checks are if the optimizer is a TensorFlow optimizer rather than a Keras one.
        elif hasattr(opt, 'optimizer'):
            # TensorFlow optimizer parameters are associated with the inner optimizer variable.
            # Therefore, we assign opt to be opt.optimizer for logging parameters.
            opt = opt.optimizer
            try_mlflow_log(mlflow.log_param, 'optimizer_name', type(opt).__name__)
        if hasattr(opt, 'lr'):
            lr = opt.lr if type(opt.lr) is float else tensorflow.keras.backend.eval(opt.lr)
            try_mlflow_log(mlflow.log_param, 'learning_rate', lr)
        elif hasattr(opt, '_lr'):
            lr = opt._lr if type(opt._lr) is float else tensorflow.keras.backend.eval(opt._lr)
            try_mlflow_log(mlflow.log_param, 'learning_rate', lr)
        if hasattr(opt, 'epsilon'):
            epsilon = opt.epsilon if type(opt.epsilon) is float \
                else tensorflow.keras.backend.eval(opt.epsilon)
            try_mlflow_log(mlflow.log_param, 'epsilon', epsilon)
        elif hasattr(opt, '_epsilon'):
            epsilon = opt._epsilon if type(opt._epsilon) is float \
                else tensorflow.keras.backend.eval(opt._epsilon)
            try_mlflow_log(mlflow.log_param, 'epsilon', epsilon)

        sum_list = []
        self.model.summary(print_fn=sum_list.append)
        summary = '\n'.join(sum_list)
        tempdir = tempfile.mkdtemp()
        try:
            summary_file = os.path.join(tempdir, "model_summary.txt")
            with open(summary_file, 'w') as f:
                f.write(summary)
            try_mlflow_log(mlflow.log_artifact, local_path=summary_file)
        finally:
            shutil.rmtree(tempdir)

Source File: experiment.py From nyaggle with MIT License

5 votes

def log_dataframe(self, name: str, df: pd.DataFrame, file_format: str = 'feather'):
        """
        Log a pandas dataframe under the logging directory.

        Args:
            name:
                Name of the file. A ``.f`` or ``.csv`` extension will be appended to the file name
                if it does not already have one.
            df:
                A dataframe to be saved.
            file_format:
                A format of output file. ``csv`` and ``feather`` are supported.
        """
        path = os.path.join(self.logging_directory, name)
        if file_format == 'feather':
            if not path.endswith('.f'):
                path += '.f'
            df.to_feather(path)
        elif file_format == 'csv':
            if not path.endswith('.csv'):
                path += '.csv'
            df.to_csv(path, index=False)
        else:
            raise RuntimeError('format not supported')

        if self.with_mlflow:
            import mlflow
            mlflow.log_artifact(path)

Source File: loggers.py From OpenKiwi with GNU Affero General Public License v3.0

5 votes

def log_artifact(local_path, artifact_path=None):
        t = threading.Thread(
            target=mlflow.log_artifact,
            args=(local_path,),
            kwargs={'artifact_path': artifact_path},
            daemon=True,
        )
        t.start()

Source File: loggers.py From OpenKiwi with GNU Affero General Public License v3.0

5 votes

def log_artifact(local_path, artifact_path=None):
        pass

Source File: mlflow.py From tf-yarn with Apache License 2.0

5 votes

def save_text_to_mlflow(content, filename):
    if not _is_pyarrow_installed():
        logger.warning(f"Pyarrow is not installed. {filename} artifact won't be stored on HDFS")
        return

    logger.info(f"save file {filename} to mlflow")
    with tempfile.TemporaryDirectory() as tempdir:
        path = os.path.join(tempdir, filename)
        with open(path, 'w') as f:
            f.write(content)
        mlflow.log_artifact(path)

Source File: evaluate.py From orbyter-cookiecutter with MIT License

4 votes

def log_experiment(
    params={},
    metrics={},
    artifacts={},
    experiment_name="my_experiment",
    mlflow_tracking_uri="./experiments",
    mlflow_artifact_location=None,
):
    """
    Evaluate the model and log it with mlflow

    Args:
        params (dict): dictionary of parameters to log
        metrics (dict): dictionary of metrics to log
        artifacts (dict): dictionary of artifacts (path) to log
        experiment_name (str): experiment name
        mlflow_tracking_uri (str): path or sql url for mlflow logging
        mlflow_artifact_location (str): path or s3bucket url for artifact
            logging. If none, it will default to a standard.

    Returns:
        None
    """
    # Try to create an experiment if it doesn't exist
    try:
        exp_0 = mlflow.create_experiment(
            experiment_name, artifact_location=mlflow_artifact_location
        )
        # set uri
        mlflow.set_tracking_uri(mlflow_tracking_uri)
        logger.info(f"Created new experiment id: {exp_0}")
    except Exception as E:
        logger.info(f"{E}. Writing to same URI/artifact store")
    # Always set the experiment
    mlflow.set_experiment(experiment_name)
    logger.info(f"Running experiment {experiment_name}")
    with mlflow.start_run():
        # param logging
        for key, val in params.items():
            logger.info(f"Logging param {key}")
            mlflow.log_param(key, val)
        # metric logging
        for key, val in metrics.items():
            logger.info(f"Logging metric {key}")
            mlflow.log_metric(key, val)
        # artifact logging
        for key, val in artifacts.items():
            logger.info(f"Logging artifact {key}")
            mlflow.log_artifact(val)

Source File: yolo_image.py From ai-platform with MIT License

4 votes

def process_image(keras_model_path, size,dataset = "coco", photo_name = "data/horses.jpg"):
    dataset = "data/{}.data".format(dataset)
    if not os.path.exists("outputs"): os.mkdir("outputs")
    
    warnings.filterwarnings("ignore")
    np.random.seed(40)
    model = load_model(keras_model_path)
    # define the expected input shape for the model
    input_w, input_h = size, size
    output_photo_name = 'outputs/objects_' + os.path.basename(photo_name)
    # load and prepare image
    image, image_w, image_h = load_image_pixels(photo_name, (input_w, input_h))
    # make prediction
    yhat = model.predict(image)
    # summarize the shape of the list of arrays
    print("Bounding box coordinates")
    print([a.shape for a in yhat])
    # define the anchors
    anchors = [[116,90, 156,198, 373,326], [30,61, 62,45, 59,119], [10,13, 16,30, 33,23]]
    # define the probability threshold for detected objects
    class_threshold = 0.5
    boxes = list()
    for i in range(len(yhat)):
        # decode the output of the network
        boxes += decode_netout(yhat[i][0], anchors[i], class_threshold, input_h, input_w)
    # correct the sizes of the bounding boxes for the shape of the image
    correct_yolo_boxes(boxes, image_h, image_w, input_h, input_w)
    # suppress non-maximal boxes
    do_nms(boxes, 0.4)
    # define the labels
    _,labels = dataset_process(dataset) 
    with mlflow.start_run(nested = True):
        mlflow.log_param("keras_model_path", keras_model_path)
        mlflow.log_param("photo_name", photo_name)
    
        # get the details of the detected objects
        v_boxes, v_labels, v_scores = get_boxes(boxes, labels, class_threshold)
        # summarize what we found
        for i in range(len(v_boxes)):
            print(v_labels[i], v_scores[i])
            mlflow.set_tag(str(i) + '_' + v_labels[i], v_scores[i])
        # draw what we found
        draw_boxes(photo_name, v_boxes, v_labels, v_scores, output_photo_name)
        print("Image created after object detection task and saved as:", output_photo_name)
    
        mlflow.log_artifact(output_photo_name, "output_photo_name")

Python mlflow.log_artifact() Examples