Python Examples of dill.dump

Source File: persister.py From Quadflor with BSD 3-Clause "New" or "Revised" License

6 votes

def persist(self, X, y, thesaurus):
        """
        Save the data and the processed thesaurus.

        Parameters
        ----------
        X: sparse matrix
            The train data: Will be compressed.
        y: sparse matrix
            The label data: Will be compressed.
        thesaurus: ThesaurusReader
            ThesaurusReader object: Will be pickled.
        """
        print('Persisting features to disk')
        self._delete_old_files()
        self._save(self._persist_name('X'), X)
        self._save(self._persist_name('y'), y)
        with open(self._persist_name('TR'), mode='wb') as f:
            pickle.dump(thesaurus, f)

Source File: base_preprocessor.py From MatchZoo with Apache License 2.0

6 votes

def save(self, dirpath: typing.Union[str, Path]):
        """
        Save the :class:`DSSMPreprocessor` object.

        A saved :class:`DSSMPreprocessor` is represented as a directory with
        the `context` object (fitted parameters on training data), it will
        be saved by `pickle`.

        :param dirpath: directory path of the saved :class:`DSSMPreprocessor`.
        """
        dirpath = Path(dirpath)
        data_file_path = dirpath.joinpath(self.DATA_FILENAME)

        if data_file_path.exists():
            raise FileExistsError(
                f'{data_file_path} instance exist, fail to save.')
        elif not dirpath.exists():
            dirpath.mkdir()

        dill.dump(self, open(data_file_path, mode='wb'))

Source File: base_preprocessor.py From MatchZoo-py with Apache License 2.0

6 votes

def save(self, dirpath: typing.Union[str, Path]):
        """
        Save the :class:`DSSMPreprocessor` object.

        A saved :class:`DSSMPreprocessor` is represented as a directory with
        the `context` object (fitted parameters on training data), it will
        be saved by `pickle`.

        :param dirpath: directory path of the saved :class:`DSSMPreprocessor`.
        """
        dirpath = Path(dirpath)
        data_file_path = dirpath.joinpath(self.DATA_FILENAME)

        if not dirpath.exists():
            dirpath.mkdir(parents=True)

        dill.dump(self, open(data_file_path, mode='wb'))

Source File: data_pack.py From MatchZoo-py with Apache License 2.0

6 votes

def save(self, dirpath: typing.Union[str, Path]):
        """
        Save the :class:`DataPack` object.

        A saved :class:`DataPack` is represented as a directory with a
        :class:`DataPack` object (transformed user input as features and
        context), it will be saved by `pickle`.

        :param dirpath: directory path of the saved :class:`DataPack`.
        """
        dirpath = Path(dirpath)
        data_file_path = dirpath.joinpath(self.DATA_FILENAME)

        if not dirpath.exists():
            dirpath.mkdir(parents=True)

        dill.dump(self, open(data_file_path, mode='wb'))

Source File: AllenCahn_contracting_circle_SDC.py From pySDC with BSD 2-Clause "Simplified" License

6 votes

def main(cwd=''):
    """
    Main driver

    Args:
        cwd (str): current working directory (need this for testing)
    """

    # Loop over variants, exact and inexact solves
    results = {}
    for variant in ['multi-implicit', 'semi-implicit', 'fully-implicit', 'semi-implicit_v2', 'multi-implicit_v2']:

        results[(variant, 'exact')] = run_SDC_variant(variant=variant, inexact=False)
        results[(variant, 'inexact')] = run_SDC_variant(variant=variant, inexact=True)

    # dump result
    fname = 'data/results_SDC_variants_AllenCahn_1E-03'
    file = open(cwd + fname + '.pkl', 'wb')
    dill.dump(results, file)
    file.close()
    assert os.path.isfile(cwd + fname + '.pkl'), 'ERROR: dill did not create file'

    # visualize
    # show_results(fname, cwd=cwd)

Source File: data_pack.py From MatchZoo with Apache License 2.0

6 votes

def save(self, dirpath: typing.Union[str, Path]):
        """
        Save the :class:`DataPack` object.

        A saved :class:`DataPack` is represented as a directory with a
        :class:`DataPack` object (transformed user input as features and
        context), it will be saved by `pickle`.

        :param dirpath: directory path of the saved :class:`DataPack`.
        """
        dirpath = Path(dirpath)
        data_file_path = dirpath.joinpath(self.DATA_FILENAME)

        if data_file_path.exists():
            raise FileExistsError(
                f'{data_file_path} already exist, fail to save')
        elif not dirpath.exists():
            dirpath.mkdir()

        dill.dump(self, open(data_file_path, mode='wb'))

Source File: test_cases.py From ebonite with Apache License 2.0

6 votes

def test_requirements_analyzer__model_works(tmpdir):
    from proxy_model import model
    reqs = get_object_requirements(model)

    for r in reqs.custom:
        for p, src in r.to_sources_dict().items():
            join = os.path.join(tmpdir, p)
            os.makedirs(os.path.dirname(join), exist_ok=True)
            with open(join, 'w') as f:
                f.write(src)

    with open(os.path.join(tmpdir, 'model.pkl'), 'wb') as f:
        dill.dump(model, f)

    shutil.copy(fs.current_module_path('use_model.py'), tmpdir)

    cp = subprocess.run('python use_model.py', shell=True, cwd=tmpdir)
    assert cp.returncode == 0

Source File: exec_in_new_process.py From petastorm with Apache License 2.0

6 votes

def exec_in_new_process(func, *args, **kargs):
    """Launches a function in a separate process. Takes variable number of arguments which are passed to the function.
    The process IS NOT FORKED by 'exec'ed.

    :param func: Function to be executed in a separate process.
    :param args: position arguments passed to the func
    :param kargs: named arguments passed to the func
    :return:
    """

    # Store function handle and arguments into a pickle
    new_process_runnable_handle, new_process_runnable_file = mkstemp(suffix='runnable')
    with os.fdopen(new_process_runnable_handle, 'wb') as f:
        dill.dump((func, args, kargs), f)

    bootstrap_package_name = '{}.{}'.format(__package__, os.path.splitext(os.path.basename(__file__))[0])
    # Popen this script (__main__) below will be an entry point
    process = subprocess.Popen(args=[sys.executable,
                                     '-m',
                                     bootstrap_package_name,
                                     new_process_runnable_file],
                               executable=sys.executable)
    return process

Source File: AllenCahn_contracting_circle_FFT.py From pySDC with BSD 2-Clause "Simplified" License

6 votes

def main(cwd=''):
    """
    Main driver

    Args:
        cwd (str): current working directory (need this for testing)
    """

    # Loop over variants, exact and inexact solves
    results = {}
    for variant in ['semi-implicit-stab']:

        results[(variant, 'exact')] = run_SDC_variant(variant=variant)

    # dump result
    fname = 'data/results_SDC_variants_AllenCahn_1E-03'
    file = open(cwd + fname + '.pkl', 'wb')
    dill.dump(results, file)
    file.close()
    assert os.path.isfile(cwd + fname + '.pkl'), 'ERROR: dill did not create file'

    # visualize
    show_results(fname, cwd=cwd)

Source File: abstractFileStore.py From toil with Apache License 2.0

6 votes

def write(self, fileName):
            """
            Write the current state into a temporary file then atomically rename it to the main
            state file.

            :param str fileName: Path to the state file.
            """
            with open(fileName + '.tmp', 'wb') as fH:
                # Based on answer by user "Mark" at:
                # http://stackoverflow.com/questions/2709800/how-to-pickle-yourself
                # We can't pickle nested classes. So we have to pickle the variables of the class
                # If we ever change this, we need to ensure it doesn't break FileID
                dill.dump(self.__dict__, fH)
            os.rename(fileName + '.tmp', fileName)

    # Functions related to logging

Source File: monitorbot.py From python-webpage-monitor-slackbot with MIT License

6 votes

def dill_soup(bs4_obj, url):
	'''Serializes a beautifulsoup object after converting it to a string.
     saves the file using the url'''
 
	dill_file = os.path.join('webpage_cache', strip_url(url) + '.dill')
	with open(dill_file, 'wb') as f:
		dill.dump(str(bs4_obj), f)

Source File: nonCachingFileStore.py From toil with Apache License 2.0

6 votes

def _createJobStateFile(self):
        """
        Create the job state file for the current job and fill in the required
        values.

        :return: Path to the job state file
        :rtype: str
        """
        jobStateFile = os.path.join(self.localTempDir, '.jobState')
        jobState = {'jobProcessName': get_process_name(self.workDir),
                    'jobName': self.jobName,
                    'jobDir': self.localTempDir}
        with open(jobStateFile + '.tmp', 'wb') as fH:
            dill.dump(jobState, fH)
        os.rename(jobStateFile + '.tmp', jobStateFile)
        return jobStateFile

Source File: exp_baseline_linear.py From marseille with BSD 3-Clause "New" or "Revised" License

6 votes

def saga_cv_cache(*args):

    arghash = sha1(repr(args).encode('utf-8')).hexdigest()
    fn = "res/baseline_linear_{}.dill".format(arghash)

    try:
        with open(fn, 'rb') as f:
            out = dill.load(f)
        logging.info("Loaded cached version.")
    except FileNotFoundError:
        logging.info("Computing...")
        out = saga_cv(*args)
        with open(fn, 'wb') as f:
            dill.dump(out, f)

    return out

Source File: exp_svmstruct.py From marseille with BSD 3-Clause "New" or "Revised" License

5 votes

def svmstruct_cv_score(dataset, C, class_weight, constraints,
                       compat_features, second_order_features):

    fn = cache_fname("svmstruct_cv_score", (dataset, C, class_weight,
                                            constraints, compat_features,
                                            second_order_features))

    if os.path.exists(fn):
        logging.info("Cached file already exists.")
        with open(fn, "rb") as f:
            return dill.load(f)

    load, ids = get_dataset_loader(dataset, split="train")

    n_folds = 5 if dataset == 'ukp' else 3

    # below are boolean logical ops
    grandparents = second_order_features and dataset == 'ukp'
    coparents = second_order_features
    siblings = second_order_features and dataset == 'cdcp'

    scores = []
    all_Y_pred = []

    for k, (tr, val) in enumerate(KFold(n_folds).split(ids)):
        train_docs = list(load(ids[tr]))
        val_docs = list(load(ids[val]))

        clf, Y_val, Y_pred = fit_predict(train_docs, val_docs, dataset, C,
                                         class_weight,
                                         constraints, compat_features,
                                         second_order_features, grandparents,
                                         coparents, siblings)
        all_Y_pred.extend(Y_pred)
        scores.append(clf.model._score(Y_val, Y_pred))

    with open(fn, "wb") as f:
        dill.dump((scores, all_Y_pred), f)

    return scores, all_Y_pred

Source File: train.py From aivivn-tone with MIT License

5 votes

def load_data_in_parts(train_src, train_tgt, val_src, val_tgt, batch_size=64, save_path="checkpoint"):
    # prepare dataset
    print("Reading data...")
    val = Seq2SeqDataset.from_file(val_src, val_tgt)

    print("Building vocab...")
    val.build_vocab(max_size=300)

    src_vocab = val.src_field.vocab
    tgt_vocab = val.tgt_field.vocab

    # save vocab
    with open(os.path.join(save_path, "vocab.src"), "wb") as f:
        dill.dump(src_vocab, f)
    with open(os.path.join(save_path, "vocab.tgt"), "wb") as f:
        dill.dump(tgt_vocab, f)

    print("Source vocab size:", len(src_vocab))
    print("Target vocab size:", len(tgt_vocab))

    # data iterator
    # keep sort=False and shuffle=False to speed up training and reduce memory usage
    val_iterator = BucketIterator(dataset=val, batch_size=batch_size, train=False,
                                  sort=False, sort_within_batch=True,
                                  sort_key=lambda x: len(x.src),
                                  shuffle=False, device=device)

    return src_vocab, tgt_vocab, list(zip(train_src, train_tgt)), val, val_iterator, batch_size

Source File: utils.py From lifetimes with MIT License

5 votes

def _save_obj_without_attr(
    obj, 
    attr_list, 
    path, 
    values_to_save=None
):
    """
    Save object with attributes from attr_list.

    Parameters
    ----------
    obj: obj
        Object of class with __dict__ attribute.
    attr_list: list
        List with attributes to exclude from saving to dill object. If empty
        list all attributes will be saved.
    path: str
        Where to save dill object.
    values_to_save: list, optional
        Placeholders for original attributes for saving object. If None will be
        extended to attr_list length like [None] * len(attr_list)
    """

    if values_to_save is None:
        values_to_save = [None] * len(attr_list)

    saved_attr_dict = {}
    for attr, val_save in zip(attr_list, values_to_save):
        if attr in obj.__dict__:
            item = obj.__dict__.pop(attr)
            saved_attr_dict[attr] = item
            setattr(obj, attr, val_save)

    with open(path, "wb") as out_file:
        dill.dump(obj, out_file)

    for attr, item in saved_attr_dict.items():
        setattr(obj, attr, item)

Source File: deferred.py From toil with Apache License 2.0

5 votes

def open(self):
        """
        Yields a single-argument function that allows for deferred functions of
        type :class:`toil.DeferredFunction` to be registered.  We use this
        design so deferred functions can be registered only inside this context
        manager.

        Not thread safe.
        """

        # Clean up other jobs before we run, so our job has a nice clean node
        self._runOrphanedDeferredFunctions()
    
        try:
            def defer(deferredFunction):
                # Just serialize defered functions one after the other.
                # If serializing later ones fails, eariler ones will still be intact.
                # We trust dill to protect sufficiently against partial reads later.
                logger.debug("Deferring function %s" % repr(deferredFunction))
                dill.dump(deferredFunction, self.stateFileOut)
                # Flush before returning so we can guarantee the write is on disk if we die.
                self.stateFileOut.flush()

            logger.debug("Running job")
            yield defer
        finally:
            self._runOwnDeferredFunctions()
            self._runOrphanedDeferredFunctions()

Source File: backends.py From kale with Apache License 2.0

5 votes

def resource_function_save(obj, path, **kwargs):
    """Save a Python function."""
    log.info("Saving function: %s", _get_obj_name(path))
    with open(path + ".pyfn", "wb") as f:
        dill.dump(obj, f)

Source File: exp_linear.py From marseille with BSD 3-Clause "New" or "Revised" License

5 votes

def linear_cv_score(dataset, alpha, l1_ratio, constraints):

    fn = cache_fname("linear_cv_score", (dataset, alpha, l1_ratio,
                                         constraints))
    if os.path.exists(fn):
        logging.info("Loading {}".format(fn))
        with open(fn, "rb") as f:
            return dill.load(f)

    load, ids = get_dataset_loader(dataset, split="train")
    n_folds = 5 if dataset == 'ukp' else 3

    scores = []
    for k, (tr, val) in enumerate(KFold(n_folds).split(ids)):
        Y_marg, bl = saga_decision_function(dataset, k, alpha, alpha, l1_ratio)

        val_docs = list(load(ids[val]))
        Y_true = [doc.label for doc in val_docs]
        Y_pred = bl.fast_decode(Y_marg, val_docs, constraints)

        scores.append(bl._score(Y_true, Y_pred))

    with open(fn, "wb") as f:
        logging.info("Saving {}".format(fn))
        dill.dump(scores, f)
    return scores

Source File: batcher.py From bootcamp with Apache License 2.0

5 votes

def update_triplets_history(self):
        model_inputs = []
        speakers = list(self.audio.speakers_to_utterances.keys())
        np.random.shuffle(speakers)
        selected_speakers = speakers[: self.nb_speakers]
        embeddings_utterances = []
        for speaker_id in selected_speakers:
            train_utterances = self.sp_to_utt_train[speaker_id]
            for selected_utterance in np.random.choice(a=train_utterances, size=self.nb_per_speaker, replace=False):
                mfcc = sample_from_mfcc_file(selected_utterance, self.max_length)
                embeddings_utterances.append(selected_utterance)
                model_inputs.append(mfcc)
        embeddings = self.model.m.predict(np.array(model_inputs))
        assert embeddings.shape[-1] == 512
        embeddings = np.reshape(embeddings, (len(selected_speakers), self.nb_per_speaker, 512))
        self.history_embeddings_train.extend(list(embeddings.reshape((-1, 512))))
        self.history_utterances_train.extend(embeddings_utterances)
        self.history_model_inputs_train.extend(model_inputs)

        # reason: can't index a deque with a np.array.
        self.history_embeddings = np.array(self.history_embeddings_train)
        self.history_utterances = np.array(self.history_utterances_train)
        self.history_model_inputs = np.array(self.history_model_inputs_train)

        with open(self.metadata_output_file, 'w') as w:
            json.dump(obj=dict(self.metadata_train_speakers), fp=w, indent=2)

Source File: batcher.py From bootcamp with Apache License 2.0

5 votes

def persist_to_disk(self):
        with open(os.path.join(self.output_dir, 'categorical_speakers.pkl'), 'wb') as w:
            dill.dump(self.categorical_speakers, w)
        np.save(os.path.join(self.output_dir, 'kx_train.npy'), self.kx_train)
        np.save(os.path.join(self.output_dir, 'kx_test.npy'), self.kx_test)
        np.save(os.path.join(self.output_dir, 'ky_train.npy'), self.ky_train)
        np.save(os.path.join(self.output_dir, 'ky_test.npy'), self.ky_test)

Source File: procedure_continuous_tasks.py From action-branching-agents with MIT License

5 votes

def save(self, path):
        """Save model to a pickle located at `path`"""
        with tempfile.TemporaryDirectory() as td:
            U.save_state(os.path.join(td, "model"))
            arc_name = os.path.join(td, "packed.zip")
            with zipfile.ZipFile(arc_name, 'w') as zipf:
                for root, dirs, files in os.walk(td):
                    for fname in files:
                        file_path = os.path.join(root, fname)
                        if file_path != arc_name:
                            zipf.write(file_path, os.path.relpath(file_path, td))
            with open(arc_name, "rb") as f:
                model_data = f.read()
        with open(path, "wb") as f:
            dill.dump((model_data, self._act_params), f)

Source File: base_model.py From MatchZoo with Apache License 2.0

5 votes

def save(self, dirpath: typing.Union[str, Path]):
        """
        Save the model.

        A saved model is represented as a directory with two files. One is a
        model parameters file saved by `pickle`, and the other one is a model
        h5 file saved by `keras`.

        :param dirpath: directory path of the saved model

        Example:

            >>> import matchzoo as mz
            >>> model = mz.models.Naive()
            >>> model.guess_and_fill_missing_params(verbose=0)
            >>> model.build()
            >>> model.save('temp-model')
            >>> import shutil
            >>> shutil.rmtree('temp-model')

        """
        dirpath = Path(dirpath)
        params_path = dirpath.joinpath(self.PARAMS_FILENAME)
        weights_path = dirpath.joinpath(self.BACKEND_WEIGHTS_FILENAME)

        if not dirpath.exists():
            dirpath.mkdir(parents=True)
        else:
            raise FileExistsError(f'{dirpath} already exist, fail to save.')

        self._backend.save_weights(weights_path)
        with open(params_path, mode='wb') as params_file:
            dill.dump(self._params, params_file)

Source File: fileIO.py From bayesloop with MIT License

5 votes

def save(filename, study):
    """
    Save an instance of a bayesloop study class to file.

    Args:
        filename(str): Path + filename to store bayesloop study
        study: Instance of study class (Study, HyperStudy, etc.)
    """
    with open(filename, 'wb') as f:
        dill.dump(study, f, protocol=dill.HIGHEST_PROTOCOL)
    print('+ Successfully saved current study.')

Source File: serialization.py From kubeface with Apache License 2.0

5 votes

def dump(obj, fd):
    check(obj)
    return dill.dump(obj, fd, protocol=PICKLE_PROTOCOL)

Source File: tv.py From Kairos with GNU General Public License v3.0

5 votes

def save_browser_state(browser):
    # Serialize and save on disk
    fp = open(FILENAME, 'wb')
    # pickle()
    dill.dump(browser, fp)
    fp.close()

Source File: core.py From jaxnet with Apache License 2.0

5 votes

def save(parameters, path: Path):
    with path.open('wb') as file:
        dill.dump(parameters, file)

Source File: feature_sampler.py From VerifAI with BSD 3-Clause "New" or "Revised" License

5 votes

def saveToFile(self, path):
        with open(path, 'wb') as outfile:
            randState = random.getstate()
            numpyRandState = np.random.get_state()
            allState = (randState, numpyRandState, self)
            dill.dump(allState, outfile)

Source File: resource_save.py From kale with Apache License 2.0

5 votes

def resource_all(o, path, *args, **kwargs):
    """Save any type of object in a general way."""
    log.info("Saving general object: %s", path.split('/')[-1])
    with open(path + ".dillpkl", "wb") as f:
        dill.dump(o, f)

Source File: _results.py From nelpy with MIT License

5 votes

def save_pkl(fname, res, zip=True, overwrite=False):
    """Write pickled data to disk, possible compressing."""
    if os.path.isfile(fname):
            # file exists
        if overwrite:
            pass
        else:
            print('File "{}" already exists! Aborting...'.format(fname))
            return
    if zip:
        save_large_file_without_zip = False
        with gzip.open(fname, "wb") as fid:
            try:
                pickle.dump(res, fid)
            except OverflowError:
                print('writing to disk using protocol=4, which supports file sizes > 4 GiB, and ignoring zip=True (zip is not supported for large files yet)')
                save_large_file_without_zip = True

        if save_large_file_without_zip:
            with open(fname, "wb") as fid:
                pickle.dump(res, fid, protocol=4)
    else:
        with open(fname, "wb") as fid:
            try:
                pickle.dump(res, fid)
            except OverflowError:
                print('writing to disk using protocol=4, which supports file sizes > 4 GiB')
                pickle.dump(res, fid, protocol=4)

Python dill.dump() Examples