Python lmdb.MapFullError() Examples

The following are 15 code examples of lmdb.MapFullError(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module lmdb , or try the search function .
Example #1
Source File: test_formats.py    From tsinfer with GNU General Public License v3.0 6 votes vote down vote up
def test_too_small_max_file_size_add(self):
        with tempfile.TemporaryDirectory(prefix="tsinf_format_test") as tempdir:
            base_size = 2 ** 16  # Big enough to allow the initial file to be created
            # Fail during adding a large amount of data
            with self.assertRaises(lmdb.MapFullError):
                filename = os.path.join(tempdir, "samples.tmp")
                with formats.SampleData(
                    path=filename, sequence_length=1, max_file_size=base_size
                ) as small_sample_file:
                    small_sample_file.add_site(
                        0,
                        alleles=["0", "1"],
                        genotypes=np.zeros(base_size, dtype=np.int8),
                    )
            # Work around https://github.com/tskit-dev/tsinfer/issues/201
            small_sample_file.data.store.close() 
Example #2
Source File: lmdbslab.py    From synapse with Apache License 2.0 6 votes vote down vote up
def _onCoFini(self):
        assert s_glob.iAmLoop()

        await self.fire('commit')

        while True:
            try:
                self._finiCoXact()
            except lmdb.MapFullError:
                self._handle_mapfull()
                continue
            break

        self.lenv.close()
        _AllSlabs.discard(self.abspath)
        del self.lenv 
Example #3
Source File: lmdbslab.py    From synapse with Apache License 2.0 6 votes vote down vote up
def _finiCoXact(self):
        '''
        Note:
            This method may raise a MapFullError
        '''

        assert s_glob.iAmLoop()

        [scan.bump() for scan in self.scans]

        # Readonly or self.xact has already been closed
        if self.xact is None:
            return

        self.xact.commit()

        self.xactops.clear()

        del self.xact
        self.xact = None 
Example #4
Source File: lmdbslab.py    From synapse with Apache License 2.0 6 votes vote down vote up
def initdb(self, name, dupsort=False, integerkey=False):
        while True:
            try:
                if self.readonly:
                    # In a readonly environment, we can't make our own write transaction, but we
                    # can have the lmdb module create one for us by not specifying the transaction
                    db = self.lenv.open_db(name.encode('utf8'), create=False, dupsort=dupsort, integerkey=integerkey)
                else:
                    db = self.lenv.open_db(name.encode('utf8'), txn=self.xact, dupsort=dupsort, integerkey=integerkey)
                    self.dirty = True
                    self.forcecommit()

                self.dbnames[name] = (db, dupsort)
                return name
            except lmdb.MapFullError:
                self._handle_mapfull() 
Example #5
Source File: lmdbslab.py    From synapse with Apache License 2.0 6 votes vote down vote up
def dropdb(self, name):
        '''
        Deletes an **entire database** (i.e. a table), losing all data.
        '''
        if self.readonly:
            raise s_exc.IsReadOnly()

        while True:
            try:
                if not self.dbexists(name):
                    return

                self.initdb(name)
                db, dupsort = self.dbnames.pop(name)

                self.dirty = True
                self.xact.drop(db, delete=True)
                self.forcecommit()
                return

            except lmdb.MapFullError:
                self._handle_mapfull() 
Example #6
Source File: lmdbslab.py    From synapse with Apache License 2.0 6 votes vote down vote up
def _xact_action(self, calling_func, xact_func, lkey, *args, db=None, **kwargs):
        if self.readonly:
            raise s_exc.IsReadOnly()

        realdb, dupsort = self.dbnames[db]

        try:
            self.dirty = True

            if not self.recovering:
                self._logXactOper(calling_func, lkey, *args, db=db, **kwargs)

            return xact_func(self.xact, lkey, *args, db=realdb, **kwargs)

        except lmdb.MapFullError:
            return self._handle_mapfull() 
Example #7
Source File: lmdbslab.py    From synapse with Apache License 2.0 6 votes vote down vote up
def putmulti(self, kvpairs, dupdata=False, append=False, db=None):
        '''
        Returns:
            Tuple of number of items consumed, number of items added
        '''
        if self.readonly:
            raise s_exc.IsReadOnly()

        # Log playback isn't compatible with generators
        if not isinstance(kvpairs, list):
            kvpairs = list(kvpairs)

        realdb, dupsort = self.dbnames[db]

        try:
            self.dirty = True

            if not self.recovering:
                self._logXactOper(self.putmulti, kvpairs, dupdata=dupdata, append=append, db=db)

            with self.xact.cursor(db=realdb) as curs:
                return curs.putmulti(kvpairs, dupdata=dupdata, append=append)

        except lmdb.MapFullError:
            return self._handle_mapfull() 
Example #8
Source File: test_formats.py    From tsinfer with GNU General Public License v3.0 5 votes vote down vote up
def test_too_small_max_file_size_init(self):
        with tempfile.TemporaryDirectory(prefix="tsinf_format_test") as tempdir:
            # Fail immediately if the max_size is so small we can't even create a file
            filename = os.path.join(tempdir, "samples.tmp")
            self.assertRaises(
                lmdb.MapFullError,
                formats.SampleData,
                path=filename,
                sequence_length=1,
                max_file_size=1,
            ) 
Example #9
Source File: lmdb_30.py    From hangar-py with Apache License 2.0 5 votes vote down vote up
def write_data(self, data: str, *, remote_operation: bool = False) -> bytes:
        """verifies correctness of array data and performs write operation.

        Parameters
        ----------
        data: str
            data to write to group.
        remote_operation : optional, kwarg only, bool
            If this is a remote process which is adding data, any necessary
            hdf5 dataset files will be created in the remote data dir instead
            of the stage directory. (default is False, which is for a regular
            access process)

        Returns
        -------
        bytes
            string identifying the collection dataset and collection dim-0 index
            which the array can be accessed at.
        """
        encoded_data = data.encode()
        checksum = xxh64_hexdigest(encoded_data)

        if self.w_uid in self.wFp:
            try:
                row_idx = next(self.row_idx)
            except StopIteration:
                self._create_schema(remote_operation=remote_operation)
                return self.write_data(data, remote_operation=remote_operation)
        else:
            self._create_schema(remote_operation=remote_operation)
            return self.write_data(data, remote_operation=remote_operation)

        encoded_row_idx = row_idx.encode()
        try:
            with self.wFp[self.w_uid].begin(write=True) as txn:
                txn.put(encoded_row_idx, encoded_data, append=True)
        except lmdb.MapFullError:
            self._create_schema(remote_operation=remote_operation)
            return self.write_data(data, remote_operation=remote_operation)

        return lmdb_30_encode(self.w_uid, row_idx, checksum) 
Example #10
Source File: lmdb_31.py    From hangar-py with Apache License 2.0 5 votes vote down vote up
def write_data(self, data: bytes, *, remote_operation: bool = False) -> bytes:
        """verifies correctness of array data and performs write operation.

        Parameters
        ----------
        data: bytes
            data to write to group.
        remote_operation : optional, kwarg only, bool
            If this is a remote process which is adding data, any necessary
            hdf5 dataset files will be created in the remote data dir instead
            of the stage directory. (default is False, which is for a regular
            access process)

        Returns
        -------
        bytes
            string identifying the collection dataset and collection dim-0 index
            which the array can be accessed at.
        """
        checksum = xxh64_hexdigest(data)
        if self.w_uid in self.wFp:
            try:
                row_idx = next(self.row_idx)
            except StopIteration:
                self._create_schema(remote_operation=remote_operation)
                return self.write_data(data, remote_operation=remote_operation)
        else:
            self._create_schema(remote_operation=remote_operation)
            return self.write_data(data, remote_operation=remote_operation)

        encoded_row_idx = row_idx.encode()
        try:
            with self.wFp[self.w_uid].begin(write=True) as txn:
                txn.put(encoded_row_idx, data, append=True)
        except lmdb.MapFullError:
            self._create_schema(remote_operation=remote_operation)
            return self.write_data(data, remote_operation=remote_operation)

        return lmdb_31_encode(self.w_uid, row_idx, checksum) 
Example #11
Source File: lmdbslab.py    From synapse with Apache License 2.0 5 votes vote down vote up
def sync(self):
        try:
            # do this from the loop thread only to avoid recursion
            await self.fire('commit')
            self.forcecommit()

        except lmdb.MapFullError:
            self._handle_mapfull()
            # There's no need to re-try self.forcecommit as _growMapSize does it 
Example #12
Source File: lmdbslab.py    From synapse with Apache License 2.0 5 votes vote down vote up
def forcecommit(self):
        '''
        Note:
            This method may raise a MapFullError
        '''
        if not self.dirty:
            return False

        # ok... lets commit and re-open
        self._finiCoXact()
        self._initCoXact()
        return True 
Example #13
Source File: serialize.py    From dataflow with Apache License 2.0 4 votes vote down vote up
def save(df, path, write_frequency=5000):
        """
        Args:
            df (DataFlow): the DataFlow to serialize.
            path (str): output path. Either a directory or an lmdb file.
            write_frequency (int): the frequency to write back data to disk.
                A smaller value reduces memory usage.
        """
        assert isinstance(df, DataFlow), type(df)
        isdir = os.path.isdir(path)
        if isdir:
            assert not os.path.isfile(os.path.join(path, 'data.mdb')), "LMDB file exists!"
        else:
            assert not os.path.isfile(path), "LMDB file {} exists!".format(path)
        # It's OK to use super large map_size on Linux, but not on other platforms
        # See: https://github.com/NVIDIA/DIGITS/issues/206
        map_size = 1099511627776 * 2 if platform.system() == 'Linux' else 128 * 10**6
        db = lmdb.open(path, subdir=isdir,
                       map_size=map_size, readonly=False,
                       meminit=False, map_async=True)    # need sync() at the end
        size = _reset_df_and_get_size(df)

        # put data into lmdb, and doubling the size if full.
        # Ref: https://github.com/NVIDIA/DIGITS/pull/209/files
        def put_or_grow(txn, key, value):
            try:
                txn.put(key, value)
                return txn
            except lmdb.MapFullError:
                pass
            txn.abort()
            curr_size = db.info()['map_size']
            new_size = curr_size * 2
            logger.info("Doubling LMDB map_size to {:.2f}GB".format(new_size / 10**9))
            db.set_mapsize(new_size)
            txn = db.begin(write=True)
            txn = put_or_grow(txn, key, value)
            return txn

        with get_tqdm(total=size) as pbar:
            idx = -1

            # LMDB transaction is not exception-safe!
            # although it has a context manager interface
            txn = db.begin(write=True)
            for idx, dp in enumerate(df):
                txn = put_or_grow(txn, u'{:08}'.format(idx).encode('ascii'), dumps(dp))
                pbar.update()
                if (idx + 1) % write_frequency == 0:
                    txn.commit()
                    txn = db.begin(write=True)
            txn.commit()

            keys = [u'{:08}'.format(k).encode('ascii') for k in range(idx + 1)]
            with db.begin(write=True) as txn:
                txn = put_or_grow(txn, b'__keys__', dumps(keys))

            logger.info("Flushing database ...")
            db.sync()
        db.close() 
Example #14
Source File: serialize.py    From ADL with MIT License 4 votes vote down vote up
def save(df, path, write_frequency=5000):
        """
        Args:
            df (DataFlow): the DataFlow to serialize.
            path (str): output path. Either a directory or an lmdb file.
            write_frequency (int): the frequency to write back data to disk.
                A smaller value reduces memory usage.
        """
        assert isinstance(df, DataFlow), type(df)
        isdir = os.path.isdir(path)
        if isdir:
            assert not os.path.isfile(os.path.join(path, 'data.mdb')), "LMDB file exists!"
        else:
            assert not os.path.isfile(path), "LMDB file {} exists!".format(path)
        # It's OK to use super large map_size on Linux, but not on other platforms
        # See: https://github.com/NVIDIA/DIGITS/issues/206
        map_size = 1099511627776 * 2 if platform.system() == 'Linux' else 128 * 10**6
        db = lmdb.open(path, subdir=isdir,
                       map_size=map_size, readonly=False,
                       meminit=False, map_async=True)    # need sync() at the end
        size = _reset_df_and_get_size(df)

        # put data into lmdb, and doubling the size if full.
        # Ref: https://github.com/NVIDIA/DIGITS/pull/209/files
        def put_or_grow(txn, key, value):
            try:
                txn.put(key, value)
                return txn
            except lmdb.MapFullError:
                pass
            txn.abort()
            curr_size = db.info()['map_size']
            new_size = curr_size * 2
            logger.info("Doubling LMDB map_size to {:.2f}GB".format(new_size / 10**9))
            db.set_mapsize(new_size)
            txn = db.begin(write=True)
            txn = put_or_grow(txn, key, value)
            return txn

        with get_tqdm(total=size) as pbar:
            idx = -1

            # LMDB transaction is not exception-safe!
            # although it has a context manager interface
            txn = db.begin(write=True)
            for idx, dp in enumerate(df):
                txn = put_or_grow(txn, u'{:08}'.format(idx).encode('ascii'), dumps(dp))
                pbar.update()
                if (idx + 1) % write_frequency == 0:
                    txn.commit()
                    txn = db.begin(write=True)
            txn.commit()

            keys = [u'{:08}'.format(k).encode('ascii') for k in range(idx + 1)]
            with db.begin(write=True) as txn:
                txn = put_or_grow(txn, b'__keys__', dumps(keys))

            logger.info("Flushing database ...")
            db.sync()
        db.close() 
Example #15
Source File: serialize.py    From tensorpack with Apache License 2.0 4 votes vote down vote up
def save(df, path, write_frequency=5000):
        """
        Args:
            df (DataFlow): the DataFlow to serialize.
            path (str): output path. Either a directory or an lmdb file.
            write_frequency (int): the frequency to write back data to disk.
                A smaller value reduces memory usage.
        """
        assert isinstance(df, DataFlow), type(df)
        isdir = os.path.isdir(path)
        if isdir:
            assert not os.path.isfile(os.path.join(path, 'data.mdb')), "LMDB file exists!"
        else:
            assert not os.path.isfile(path), "LMDB file {} exists!".format(path)
        # It's OK to use super large map_size on Linux, but not on other platforms
        # See: https://github.com/NVIDIA/DIGITS/issues/206
        map_size = 1099511627776 * 2 if platform.system() == 'Linux' else 128 * 10**6
        db = lmdb.open(path, subdir=isdir,
                       map_size=map_size, readonly=False,
                       meminit=False, map_async=True)    # need sync() at the end
        size = _reset_df_and_get_size(df)

        # put data into lmdb, and doubling the size if full.
        # Ref: https://github.com/NVIDIA/DIGITS/pull/209/files
        def put_or_grow(txn, key, value):
            try:
                txn.put(key, value)
                return txn
            except lmdb.MapFullError:
                pass
            txn.abort()
            curr_size = db.info()['map_size']
            new_size = curr_size * 2
            logger.info("Doubling LMDB map_size to {:.2f}GB".format(new_size / 10**9))
            db.set_mapsize(new_size)
            txn = db.begin(write=True)
            txn = put_or_grow(txn, key, value)
            return txn

        with get_tqdm(total=size) as pbar:
            idx = -1

            # LMDB transaction is not exception-safe!
            # although it has a context manager interface
            txn = db.begin(write=True)
            for idx, dp in enumerate(df):
                txn = put_or_grow(txn, u'{:08}'.format(idx).encode('ascii'), dumps(dp))
                pbar.update()
                if (idx + 1) % write_frequency == 0:
                    txn.commit()
                    txn = db.begin(write=True)
            txn.commit()

            keys = [u'{:08}'.format(k).encode('ascii') for k in range(idx + 1)]
            with db.begin(write=True) as txn:
                txn = put_or_grow(txn, b'__keys__', dumps(keys))

            logger.info("Flushing database ...")
            db.sync()
        db.close()