Python dill.dump() Examples

The following are 30 code examples of dill.dump(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module dill , or try the search function .
Example #1
Source File: persister.py    From Quadflor with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def persist(self, X, y, thesaurus):
        """
        Save the data and the processed thesaurus.

        Parameters
        ----------
        X: sparse matrix
            The train data: Will be compressed.
        y: sparse matrix
            The label data: Will be compressed.
        thesaurus: ThesaurusReader
            ThesaurusReader object: Will be pickled.
        """
        print('Persisting features to disk')
        self._delete_old_files()
        self._save(self._persist_name('X'), X)
        self._save(self._persist_name('y'), y)
        with open(self._persist_name('TR'), mode='wb') as f:
            pickle.dump(thesaurus, f) 
Example #2
Source File: base_preprocessor.py    From MatchZoo with Apache License 2.0 6 votes vote down vote up
def save(self, dirpath: typing.Union[str, Path]):
        """
        Save the :class:`DSSMPreprocessor` object.

        A saved :class:`DSSMPreprocessor` is represented as a directory with
        the `context` object (fitted parameters on training data), it will
        be saved by `pickle`.

        :param dirpath: directory path of the saved :class:`DSSMPreprocessor`.
        """
        dirpath = Path(dirpath)
        data_file_path = dirpath.joinpath(self.DATA_FILENAME)

        if data_file_path.exists():
            raise FileExistsError(
                f'{data_file_path} instance exist, fail to save.')
        elif not dirpath.exists():
            dirpath.mkdir()

        dill.dump(self, open(data_file_path, mode='wb')) 
Example #3
Source File: base_preprocessor.py    From MatchZoo-py with Apache License 2.0 6 votes vote down vote up
def save(self, dirpath: typing.Union[str, Path]):
        """
        Save the :class:`DSSMPreprocessor` object.

        A saved :class:`DSSMPreprocessor` is represented as a directory with
        the `context` object (fitted parameters on training data), it will
        be saved by `pickle`.

        :param dirpath: directory path of the saved :class:`DSSMPreprocessor`.
        """
        dirpath = Path(dirpath)
        data_file_path = dirpath.joinpath(self.DATA_FILENAME)

        if not dirpath.exists():
            dirpath.mkdir(parents=True)

        dill.dump(self, open(data_file_path, mode='wb')) 
Example #4
Source File: data_pack.py    From MatchZoo-py with Apache License 2.0 6 votes vote down vote up
def save(self, dirpath: typing.Union[str, Path]):
        """
        Save the :class:`DataPack` object.

        A saved :class:`DataPack` is represented as a directory with a
        :class:`DataPack` object (transformed user input as features and
        context), it will be saved by `pickle`.

        :param dirpath: directory path of the saved :class:`DataPack`.
        """
        dirpath = Path(dirpath)
        data_file_path = dirpath.joinpath(self.DATA_FILENAME)

        if not dirpath.exists():
            dirpath.mkdir(parents=True)

        dill.dump(self, open(data_file_path, mode='wb')) 
Example #5
Source File: AllenCahn_contracting_circle_SDC.py    From pySDC with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def main(cwd=''):
    """
    Main driver

    Args:
        cwd (str): current working directory (need this for testing)
    """

    # Loop over variants, exact and inexact solves
    results = {}
    for variant in ['multi-implicit', 'semi-implicit', 'fully-implicit', 'semi-implicit_v2', 'multi-implicit_v2']:

        results[(variant, 'exact')] = run_SDC_variant(variant=variant, inexact=False)
        results[(variant, 'inexact')] = run_SDC_variant(variant=variant, inexact=True)

    # dump result
    fname = 'data/results_SDC_variants_AllenCahn_1E-03'
    file = open(cwd + fname + '.pkl', 'wb')
    dill.dump(results, file)
    file.close()
    assert os.path.isfile(cwd + fname + '.pkl'), 'ERROR: dill did not create file'

    # visualize
    # show_results(fname, cwd=cwd) 
Example #6
Source File: data_pack.py    From MatchZoo with Apache License 2.0 6 votes vote down vote up
def save(self, dirpath: typing.Union[str, Path]):
        """
        Save the :class:`DataPack` object.

        A saved :class:`DataPack` is represented as a directory with a
        :class:`DataPack` object (transformed user input as features and
        context), it will be saved by `pickle`.

        :param dirpath: directory path of the saved :class:`DataPack`.
        """
        dirpath = Path(dirpath)
        data_file_path = dirpath.joinpath(self.DATA_FILENAME)

        if data_file_path.exists():
            raise FileExistsError(
                f'{data_file_path} already exist, fail to save')
        elif not dirpath.exists():
            dirpath.mkdir()

        dill.dump(self, open(data_file_path, mode='wb')) 
Example #7
Source File: test_cases.py    From ebonite with Apache License 2.0 6 votes vote down vote up
def test_requirements_analyzer__model_works(tmpdir):
    from proxy_model import model
    reqs = get_object_requirements(model)

    for r in reqs.custom:
        for p, src in r.to_sources_dict().items():
            join = os.path.join(tmpdir, p)
            os.makedirs(os.path.dirname(join), exist_ok=True)
            with open(join, 'w') as f:
                f.write(src)

    with open(os.path.join(tmpdir, 'model.pkl'), 'wb') as f:
        dill.dump(model, f)

    shutil.copy(fs.current_module_path('use_model.py'), tmpdir)

    cp = subprocess.run('python use_model.py', shell=True, cwd=tmpdir)
    assert cp.returncode == 0 
Example #8
Source File: exec_in_new_process.py    From petastorm with Apache License 2.0 6 votes vote down vote up
def exec_in_new_process(func, *args, **kargs):
    """Launches a function in a separate process. Takes variable number of arguments which are passed to the function.
    The process IS NOT FORKED by 'exec'ed.

    :param func: Function to be executed in a separate process.
    :param args: position arguments passed to the func
    :param kargs: named arguments passed to the func
    :return:
    """

    # Store function handle and arguments into a pickle
    new_process_runnable_handle, new_process_runnable_file = mkstemp(suffix='runnable')
    with os.fdopen(new_process_runnable_handle, 'wb') as f:
        dill.dump((func, args, kargs), f)

    bootstrap_package_name = '{}.{}'.format(__package__, os.path.splitext(os.path.basename(__file__))[0])
    # Popen this script (__main__) below will be an entry point
    process = subprocess.Popen(args=[sys.executable,
                                     '-m',
                                     bootstrap_package_name,
                                     new_process_runnable_file],
                               executable=sys.executable)
    return process 
Example #9
Source File: AllenCahn_contracting_circle_FFT.py    From pySDC with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def main(cwd=''):
    """
    Main driver

    Args:
        cwd (str): current working directory (need this for testing)
    """

    # Loop over variants, exact and inexact solves
    results = {}
    for variant in ['semi-implicit-stab']:

        results[(variant, 'exact')] = run_SDC_variant(variant=variant)

    # dump result
    fname = 'data/results_SDC_variants_AllenCahn_1E-03'
    file = open(cwd + fname + '.pkl', 'wb')
    dill.dump(results, file)
    file.close()
    assert os.path.isfile(cwd + fname + '.pkl'), 'ERROR: dill did not create file'

    # visualize
    show_results(fname, cwd=cwd) 
Example #10
Source File: abstractFileStore.py    From toil with Apache License 2.0 6 votes vote down vote up
def write(self, fileName):
            """
            Write the current state into a temporary file then atomically rename it to the main
            state file.

            :param str fileName: Path to the state file.
            """
            with open(fileName + '.tmp', 'wb') as fH:
                # Based on answer by user "Mark" at:
                # http://stackoverflow.com/questions/2709800/how-to-pickle-yourself
                # We can't pickle nested classes. So we have to pickle the variables of the class
                # If we ever change this, we need to ensure it doesn't break FileID
                dill.dump(self.__dict__, fH)
            os.rename(fileName + '.tmp', fileName)

    # Functions related to logging 
Example #11
Source File: monitorbot.py    From python-webpage-monitor-slackbot with MIT License 6 votes vote down vote up
def dill_soup(bs4_obj, url):
	'''Serializes a beautifulsoup object after converting it to a string.
     saves the file using the url'''
 
	dill_file = os.path.join('webpage_cache', strip_url(url) + '.dill')
	with open(dill_file, 'wb') as f:
		dill.dump(str(bs4_obj), f) 
Example #12
Source File: nonCachingFileStore.py    From toil with Apache License 2.0 6 votes vote down vote up
def _createJobStateFile(self):
        """
        Create the job state file for the current job and fill in the required
        values.

        :return: Path to the job state file
        :rtype: str
        """
        jobStateFile = os.path.join(self.localTempDir, '.jobState')
        jobState = {'jobProcessName': get_process_name(self.workDir),
                    'jobName': self.jobName,
                    'jobDir': self.localTempDir}
        with open(jobStateFile + '.tmp', 'wb') as fH:
            dill.dump(jobState, fH)
        os.rename(jobStateFile + '.tmp', jobStateFile)
        return jobStateFile 
Example #13
Source File: exp_baseline_linear.py    From marseille with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def saga_cv_cache(*args):

    arghash = sha1(repr(args).encode('utf-8')).hexdigest()
    fn = "res/baseline_linear_{}.dill".format(arghash)

    try:
        with open(fn, 'rb') as f:
            out = dill.load(f)
        logging.info("Loaded cached version.")
    except FileNotFoundError:
        logging.info("Computing...")
        out = saga_cv(*args)
        with open(fn, 'wb') as f:
            dill.dump(out, f)

    return out 
Example #14
Source File: exp_svmstruct.py    From marseille with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def svmstruct_cv_score(dataset, C, class_weight, constraints,
                       compat_features, second_order_features):

    fn = cache_fname("svmstruct_cv_score", (dataset, C, class_weight,
                                            constraints, compat_features,
                                            second_order_features))

    if os.path.exists(fn):
        logging.info("Cached file already exists.")
        with open(fn, "rb") as f:
            return dill.load(f)

    load, ids = get_dataset_loader(dataset, split="train")

    n_folds = 5 if dataset == 'ukp' else 3

    # below are boolean logical ops
    grandparents = second_order_features and dataset == 'ukp'
    coparents = second_order_features
    siblings = second_order_features and dataset == 'cdcp'

    scores = []
    all_Y_pred = []

    for k, (tr, val) in enumerate(KFold(n_folds).split(ids)):
        train_docs = list(load(ids[tr]))
        val_docs = list(load(ids[val]))

        clf, Y_val, Y_pred = fit_predict(train_docs, val_docs, dataset, C,
                                         class_weight,
                                         constraints, compat_features,
                                         second_order_features, grandparents,
                                         coparents, siblings)
        all_Y_pred.extend(Y_pred)
        scores.append(clf.model._score(Y_val, Y_pred))

    with open(fn, "wb") as f:
        dill.dump((scores, all_Y_pred), f)

    return scores, all_Y_pred 
Example #15
Source File: train.py    From aivivn-tone with MIT License 5 votes vote down vote up
def load_data_in_parts(train_src, train_tgt, val_src, val_tgt, batch_size=64, save_path="checkpoint"):
    # prepare dataset
    print("Reading data...")
    val = Seq2SeqDataset.from_file(val_src, val_tgt)

    print("Building vocab...")
    val.build_vocab(max_size=300)

    src_vocab = val.src_field.vocab
    tgt_vocab = val.tgt_field.vocab

    # save vocab
    with open(os.path.join(save_path, "vocab.src"), "wb") as f:
        dill.dump(src_vocab, f)
    with open(os.path.join(save_path, "vocab.tgt"), "wb") as f:
        dill.dump(tgt_vocab, f)

    print("Source vocab size:", len(src_vocab))
    print("Target vocab size:", len(tgt_vocab))

    # data iterator
    # keep sort=False and shuffle=False to speed up training and reduce memory usage
    val_iterator = BucketIterator(dataset=val, batch_size=batch_size, train=False,
                                  sort=False, sort_within_batch=True,
                                  sort_key=lambda x: len(x.src),
                                  shuffle=False, device=device)

    return src_vocab, tgt_vocab, list(zip(train_src, train_tgt)), val, val_iterator, batch_size 
Example #16
Source File: utils.py    From lifetimes with MIT License 5 votes vote down vote up
def _save_obj_without_attr(
    obj, 
    attr_list, 
    path, 
    values_to_save=None
):
    """
    Save object with attributes from attr_list.

    Parameters
    ----------
    obj: obj
        Object of class with __dict__ attribute.
    attr_list: list
        List with attributes to exclude from saving to dill object. If empty
        list all attributes will be saved.
    path: str
        Where to save dill object.
    values_to_save: list, optional
        Placeholders for original attributes for saving object. If None will be
        extended to attr_list length like [None] * len(attr_list)
    """

    if values_to_save is None:
        values_to_save = [None] * len(attr_list)

    saved_attr_dict = {}
    for attr, val_save in zip(attr_list, values_to_save):
        if attr in obj.__dict__:
            item = obj.__dict__.pop(attr)
            saved_attr_dict[attr] = item
            setattr(obj, attr, val_save)

    with open(path, "wb") as out_file:
        dill.dump(obj, out_file)

    for attr, item in saved_attr_dict.items():
        setattr(obj, attr, item) 
Example #17
Source File: deferred.py    From toil with Apache License 2.0 5 votes vote down vote up
def open(self):
        """
        Yields a single-argument function that allows for deferred functions of
        type :class:`toil.DeferredFunction` to be registered.  We use this
        design so deferred functions can be registered only inside this context
        manager.

        Not thread safe.
        """

        # Clean up other jobs before we run, so our job has a nice clean node
        self._runOrphanedDeferredFunctions()
    
        try:
            def defer(deferredFunction):
                # Just serialize defered functions one after the other.
                # If serializing later ones fails, eariler ones will still be intact.
                # We trust dill to protect sufficiently against partial reads later.
                logger.debug("Deferring function %s" % repr(deferredFunction))
                dill.dump(deferredFunction, self.stateFileOut)
                # Flush before returning so we can guarantee the write is on disk if we die.
                self.stateFileOut.flush()

            logger.debug("Running job")
            yield defer
        finally:
            self._runOwnDeferredFunctions()
            self._runOrphanedDeferredFunctions() 
Example #18
Source File: backends.py    From kale with Apache License 2.0 5 votes vote down vote up
def resource_function_save(obj, path, **kwargs):
    """Save a Python function."""
    log.info("Saving function: %s", _get_obj_name(path))
    with open(path + ".pyfn", "wb") as f:
        dill.dump(obj, f) 
Example #19
Source File: exp_linear.py    From marseille with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def linear_cv_score(dataset, alpha, l1_ratio, constraints):

    fn = cache_fname("linear_cv_score", (dataset, alpha, l1_ratio,
                                         constraints))
    if os.path.exists(fn):
        logging.info("Loading {}".format(fn))
        with open(fn, "rb") as f:
            return dill.load(f)

    load, ids = get_dataset_loader(dataset, split="train")
    n_folds = 5 if dataset == 'ukp' else 3

    scores = []
    for k, (tr, val) in enumerate(KFold(n_folds).split(ids)):
        Y_marg, bl = saga_decision_function(dataset, k, alpha, alpha, l1_ratio)

        val_docs = list(load(ids[val]))
        Y_true = [doc.label for doc in val_docs]
        Y_pred = bl.fast_decode(Y_marg, val_docs, constraints)

        scores.append(bl._score(Y_true, Y_pred))

    with open(fn, "wb") as f:
        logging.info("Saving {}".format(fn))
        dill.dump(scores, f)
    return scores 
Example #20
Source File: batcher.py    From bootcamp with Apache License 2.0 5 votes vote down vote up
def update_triplets_history(self):
        model_inputs = []
        speakers = list(self.audio.speakers_to_utterances.keys())
        np.random.shuffle(speakers)
        selected_speakers = speakers[: self.nb_speakers]
        embeddings_utterances = []
        for speaker_id in selected_speakers:
            train_utterances = self.sp_to_utt_train[speaker_id]
            for selected_utterance in np.random.choice(a=train_utterances, size=self.nb_per_speaker, replace=False):
                mfcc = sample_from_mfcc_file(selected_utterance, self.max_length)
                embeddings_utterances.append(selected_utterance)
                model_inputs.append(mfcc)
        embeddings = self.model.m.predict(np.array(model_inputs))
        assert embeddings.shape[-1] == 512
        embeddings = np.reshape(embeddings, (len(selected_speakers), self.nb_per_speaker, 512))
        self.history_embeddings_train.extend(list(embeddings.reshape((-1, 512))))
        self.history_utterances_train.extend(embeddings_utterances)
        self.history_model_inputs_train.extend(model_inputs)

        # reason: can't index a deque with a np.array.
        self.history_embeddings = np.array(self.history_embeddings_train)
        self.history_utterances = np.array(self.history_utterances_train)
        self.history_model_inputs = np.array(self.history_model_inputs_train)

        with open(self.metadata_output_file, 'w') as w:
            json.dump(obj=dict(self.metadata_train_speakers), fp=w, indent=2) 
Example #21
Source File: batcher.py    From bootcamp with Apache License 2.0 5 votes vote down vote up
def persist_to_disk(self):
        with open(os.path.join(self.output_dir, 'categorical_speakers.pkl'), 'wb') as w:
            dill.dump(self.categorical_speakers, w)
        np.save(os.path.join(self.output_dir, 'kx_train.npy'), self.kx_train)
        np.save(os.path.join(self.output_dir, 'kx_test.npy'), self.kx_test)
        np.save(os.path.join(self.output_dir, 'ky_train.npy'), self.ky_train)
        np.save(os.path.join(self.output_dir, 'ky_test.npy'), self.ky_test) 
Example #22
Source File: procedure_continuous_tasks.py    From action-branching-agents with MIT License 5 votes vote down vote up
def save(self, path):
        """Save model to a pickle located at `path`"""
        with tempfile.TemporaryDirectory() as td:
            U.save_state(os.path.join(td, "model"))
            arc_name = os.path.join(td, "packed.zip")
            with zipfile.ZipFile(arc_name, 'w') as zipf:
                for root, dirs, files in os.walk(td):
                    for fname in files:
                        file_path = os.path.join(root, fname)
                        if file_path != arc_name:
                            zipf.write(file_path, os.path.relpath(file_path, td))
            with open(arc_name, "rb") as f:
                model_data = f.read()
        with open(path, "wb") as f:
            dill.dump((model_data, self._act_params), f) 
Example #23
Source File: base_model.py    From MatchZoo with Apache License 2.0 5 votes vote down vote up
def save(self, dirpath: typing.Union[str, Path]):
        """
        Save the model.

        A saved model is represented as a directory with two files. One is a
        model parameters file saved by `pickle`, and the other one is a model
        h5 file saved by `keras`.

        :param dirpath: directory path of the saved model

        Example:

            >>> import matchzoo as mz
            >>> model = mz.models.Naive()
            >>> model.guess_and_fill_missing_params(verbose=0)
            >>> model.build()
            >>> model.save('temp-model')
            >>> import shutil
            >>> shutil.rmtree('temp-model')

        """
        dirpath = Path(dirpath)
        params_path = dirpath.joinpath(self.PARAMS_FILENAME)
        weights_path = dirpath.joinpath(self.BACKEND_WEIGHTS_FILENAME)

        if not dirpath.exists():
            dirpath.mkdir(parents=True)
        else:
            raise FileExistsError(f'{dirpath} already exist, fail to save.')

        self._backend.save_weights(weights_path)
        with open(params_path, mode='wb') as params_file:
            dill.dump(self._params, params_file) 
Example #24
Source File: fileIO.py    From bayesloop with MIT License 5 votes vote down vote up
def save(filename, study):
    """
    Save an instance of a bayesloop study class to file.

    Args:
        filename(str): Path + filename to store bayesloop study
        study: Instance of study class (Study, HyperStudy, etc.)
    """
    with open(filename, 'wb') as f:
        dill.dump(study, f, protocol=dill.HIGHEST_PROTOCOL)
    print('+ Successfully saved current study.') 
Example #25
Source File: serialization.py    From kubeface with Apache License 2.0 5 votes vote down vote up
def dump(obj, fd):
    check(obj)
    return dill.dump(obj, fd, protocol=PICKLE_PROTOCOL) 
Example #26
Source File: tv.py    From Kairos with GNU General Public License v3.0 5 votes vote down vote up
def save_browser_state(browser):
    # Serialize and save on disk
    fp = open(FILENAME, 'wb')
    # pickle()
    dill.dump(browser, fp)
    fp.close() 
Example #27
Source File: core.py    From jaxnet with Apache License 2.0 5 votes vote down vote up
def save(parameters, path: Path):
    with path.open('wb') as file:
        dill.dump(parameters, file) 
Example #28
Source File: feature_sampler.py    From VerifAI with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def saveToFile(self, path):
        with open(path, 'wb') as outfile:
            randState = random.getstate()
            numpyRandState = np.random.get_state()
            allState = (randState, numpyRandState, self)
            dill.dump(allState, outfile) 
Example #29
Source File: resource_save.py    From kale with Apache License 2.0 5 votes vote down vote up
def resource_all(o, path, *args, **kwargs):
    """Save any type of object in a general way."""
    log.info("Saving general object: %s", path.split('/')[-1])
    with open(path + ".dillpkl", "wb") as f:
        dill.dump(o, f) 
Example #30
Source File: _results.py    From nelpy with MIT License 5 votes vote down vote up
def save_pkl(fname, res, zip=True, overwrite=False):
    """Write pickled data to disk, possible compressing."""
    if os.path.isfile(fname):
            # file exists
        if overwrite:
            pass
        else:
            print('File "{}" already exists! Aborting...'.format(fname))
            return
    if zip:
        save_large_file_without_zip = False
        with gzip.open(fname, "wb") as fid:
            try:
                pickle.dump(res, fid)
            except OverflowError:
                print('writing to disk using protocol=4, which supports file sizes > 4 GiB, and ignoring zip=True (zip is not supported for large files yet)')
                save_large_file_without_zip = True

        if save_large_file_without_zip:
            with open(fname, "wb") as fid:
                pickle.dump(res, fid, protocol=4)
    else:
        with open(fname, "wb") as fid:
            try:
                pickle.dump(res, fid)
            except OverflowError:
                print('writing to disk using protocol=4, which supports file sizes > 4 GiB')
                pickle.dump(res, fid, protocol=4)