Python Examples of lmdb.MapFullError

Source File: test_formats.py From tsinfer with GNU General Public License v3.0

6 votes

def test_too_small_max_file_size_add(self):
        with tempfile.TemporaryDirectory(prefix="tsinf_format_test") as tempdir:
            base_size = 2 ** 16  # Big enough to allow the initial file to be created
            # Fail during adding a large amount of data
            with self.assertRaises(lmdb.MapFullError):
                filename = os.path.join(tempdir, "samples.tmp")
                with formats.SampleData(
                    path=filename, sequence_length=1, max_file_size=base_size
                ) as small_sample_file:
                    small_sample_file.add_site(
                        0,
                        alleles=["0", "1"],
                        genotypes=np.zeros(base_size, dtype=np.int8),
                    )
            # Work around https://github.com/tskit-dev/tsinfer/issues/201
            small_sample_file.data.store.close()

Source File: lmdbslab.py From synapse with Apache License 2.0

6 votes

def _onCoFini(self):
        assert s_glob.iAmLoop()

        await self.fire('commit')

        while True:
            try:
                self._finiCoXact()
            except lmdb.MapFullError:
                self._handle_mapfull()
                continue
            break

        self.lenv.close()
        _AllSlabs.discard(self.abspath)
        del self.lenv

Source File: lmdbslab.py From synapse with Apache License 2.0

6 votes

def _finiCoXact(self):
        '''
        Note:
            This method may raise a MapFullError
        '''

        assert s_glob.iAmLoop()

        [scan.bump() for scan in self.scans]

        # Readonly or self.xact has already been closed
        if self.xact is None:
            return

        self.xact.commit()

        self.xactops.clear()

        del self.xact
        self.xact = None

Source File: lmdbslab.py From synapse with Apache License 2.0

6 votes

def initdb(self, name, dupsort=False, integerkey=False):
        while True:
            try:
                if self.readonly:
                    # In a readonly environment, we can't make our own write transaction, but we
                    # can have the lmdb module create one for us by not specifying the transaction
                    db = self.lenv.open_db(name.encode('utf8'), create=False, dupsort=dupsort, integerkey=integerkey)
                else:
                    db = self.lenv.open_db(name.encode('utf8'), txn=self.xact, dupsort=dupsort, integerkey=integerkey)
                    self.dirty = True
                    self.forcecommit()

                self.dbnames[name] = (db, dupsort)
                return name
            except lmdb.MapFullError:
                self._handle_mapfull()

Source File: lmdbslab.py From synapse with Apache License 2.0

6 votes

def dropdb(self, name):
        '''
        Deletes an **entire database** (i.e. a table), losing all data.
        '''
        if self.readonly:
            raise s_exc.IsReadOnly()

        while True:
            try:
                if not self.dbexists(name):
                    return

                self.initdb(name)
                db, dupsort = self.dbnames.pop(name)

                self.dirty = True
                self.xact.drop(db, delete=True)
                self.forcecommit()
                return

            except lmdb.MapFullError:
                self._handle_mapfull()

Source File: lmdbslab.py From synapse with Apache License 2.0

6 votes

def _xact_action(self, calling_func, xact_func, lkey, *args, db=None, **kwargs):
        if self.readonly:
            raise s_exc.IsReadOnly()

        realdb, dupsort = self.dbnames[db]

        try:
            self.dirty = True

            if not self.recovering:
                self._logXactOper(calling_func, lkey, *args, db=db, **kwargs)

            return xact_func(self.xact, lkey, *args, db=realdb, **kwargs)

        except lmdb.MapFullError:
            return self._handle_mapfull()

Source File: lmdbslab.py From synapse with Apache License 2.0

6 votes

def putmulti(self, kvpairs, dupdata=False, append=False, db=None):
        '''
        Returns:
            Tuple of number of items consumed, number of items added
        '''
        if self.readonly:
            raise s_exc.IsReadOnly()

        # Log playback isn't compatible with generators
        if not isinstance(kvpairs, list):
            kvpairs = list(kvpairs)

        realdb, dupsort = self.dbnames[db]

        try:
            self.dirty = True

            if not self.recovering:
                self._logXactOper(self.putmulti, kvpairs, dupdata=dupdata, append=append, db=db)

            with self.xact.cursor(db=realdb) as curs:
                return curs.putmulti(kvpairs, dupdata=dupdata, append=append)

        except lmdb.MapFullError:
            return self._handle_mapfull()

Source File: test_formats.py From tsinfer with GNU General Public License v3.0

5 votes

def test_too_small_max_file_size_init(self):
        with tempfile.TemporaryDirectory(prefix="tsinf_format_test") as tempdir:
            # Fail immediately if the max_size is so small we can't even create a file
            filename = os.path.join(tempdir, "samples.tmp")
            self.assertRaises(
                lmdb.MapFullError,
                formats.SampleData,
                path=filename,
                sequence_length=1,
                max_file_size=1,
            )

Source File: lmdb_30.py From hangar-py with Apache License 2.0

5 votes

def write_data(self, data: str, *, remote_operation: bool = False) -> bytes:
        """verifies correctness of array data and performs write operation.

        Parameters
        ----------
        data: str
            data to write to group.
        remote_operation : optional, kwarg only, bool
            If this is a remote process which is adding data, any necessary
            hdf5 dataset files will be created in the remote data dir instead
            of the stage directory. (default is False, which is for a regular
            access process)

        Returns
        -------
        bytes
            string identifying the collection dataset and collection dim-0 index
            which the array can be accessed at.
        """
        encoded_data = data.encode()
        checksum = xxh64_hexdigest(encoded_data)

        if self.w_uid in self.wFp:
            try:
                row_idx = next(self.row_idx)
            except StopIteration:
                self._create_schema(remote_operation=remote_operation)
                return self.write_data(data, remote_operation=remote_operation)
        else:
            self._create_schema(remote_operation=remote_operation)
            return self.write_data(data, remote_operation=remote_operation)

        encoded_row_idx = row_idx.encode()
        try:
            with self.wFp[self.w_uid].begin(write=True) as txn:
                txn.put(encoded_row_idx, encoded_data, append=True)
        except lmdb.MapFullError:
            self._create_schema(remote_operation=remote_operation)
            return self.write_data(data, remote_operation=remote_operation)

        return lmdb_30_encode(self.w_uid, row_idx, checksum)

Source File: lmdb_31.py From hangar-py with Apache License 2.0

5 votes

def write_data(self, data: bytes, *, remote_operation: bool = False) -> bytes:
        """verifies correctness of array data and performs write operation.

        Parameters
        ----------
        data: bytes
            data to write to group.
        remote_operation : optional, kwarg only, bool
            If this is a remote process which is adding data, any necessary
            hdf5 dataset files will be created in the remote data dir instead
            of the stage directory. (default is False, which is for a regular
            access process)

        Returns
        -------
        bytes
            string identifying the collection dataset and collection dim-0 index
            which the array can be accessed at.
        """
        checksum = xxh64_hexdigest(data)
        if self.w_uid in self.wFp:
            try:
                row_idx = next(self.row_idx)
            except StopIteration:
                self._create_schema(remote_operation=remote_operation)
                return self.write_data(data, remote_operation=remote_operation)
        else:
            self._create_schema(remote_operation=remote_operation)
            return self.write_data(data, remote_operation=remote_operation)

        encoded_row_idx = row_idx.encode()
        try:
            with self.wFp[self.w_uid].begin(write=True) as txn:
                txn.put(encoded_row_idx, data, append=True)
        except lmdb.MapFullError:
            self._create_schema(remote_operation=remote_operation)
            return self.write_data(data, remote_operation=remote_operation)

        return lmdb_31_encode(self.w_uid, row_idx, checksum)

Source File: lmdbslab.py From synapse with Apache License 2.0

5 votes

def sync(self):
        try:
            # do this from the loop thread only to avoid recursion
            await self.fire('commit')
            self.forcecommit()

        except lmdb.MapFullError:
            self._handle_mapfull()
            # There's no need to re-try self.forcecommit as _growMapSize does it

Source File: lmdbslab.py From synapse with Apache License 2.0

5 votes

def forcecommit(self):
        '''
        Note:
            This method may raise a MapFullError
        '''
        if not self.dirty:
            return False

        # ok... lets commit and re-open
        self._finiCoXact()
        self._initCoXact()
        return True

Source File: serialize.py From dataflow with Apache License 2.0

4 votes

def save(df, path, write_frequency=5000):
        """
        Args:
            df (DataFlow): the DataFlow to serialize.
            path (str): output path. Either a directory or an lmdb file.
            write_frequency (int): the frequency to write back data to disk.
                A smaller value reduces memory usage.
        """
        assert isinstance(df, DataFlow), type(df)
        isdir = os.path.isdir(path)
        if isdir:
            assert not os.path.isfile(os.path.join(path, 'data.mdb')), "LMDB file exists!"
        else:
            assert not os.path.isfile(path), "LMDB file {} exists!".format(path)
        # It's OK to use super large map_size on Linux, but not on other platforms
        # See: https://github.com/NVIDIA/DIGITS/issues/206
        map_size = 1099511627776 * 2 if platform.system() == 'Linux' else 128 * 10**6
        db = lmdb.open(path, subdir=isdir,
                       map_size=map_size, readonly=False,
                       meminit=False, map_async=True)    # need sync() at the end
        size = _reset_df_and_get_size(df)

        # put data into lmdb, and doubling the size if full.
        # Ref: https://github.com/NVIDIA/DIGITS/pull/209/files
        def put_or_grow(txn, key, value):
            try:
                txn.put(key, value)
                return txn
            except lmdb.MapFullError:
                pass
            txn.abort()
            curr_size = db.info()['map_size']
            new_size = curr_size * 2
            logger.info("Doubling LMDB map_size to {:.2f}GB".format(new_size / 10**9))
            db.set_mapsize(new_size)
            txn = db.begin(write=True)
            txn = put_or_grow(txn, key, value)
            return txn

        with get_tqdm(total=size) as pbar:
            idx = -1

            # LMDB transaction is not exception-safe!
            # although it has a context manager interface
            txn = db.begin(write=True)
            for idx, dp in enumerate(df):
                txn = put_or_grow(txn, u'{:08}'.format(idx).encode('ascii'), dumps(dp))
                pbar.update()
                if (idx + 1) % write_frequency == 0:
                    txn.commit()
                    txn = db.begin(write=True)
            txn.commit()

            keys = [u'{:08}'.format(k).encode('ascii') for k in range(idx + 1)]
            with db.begin(write=True) as txn:
                txn = put_or_grow(txn, b'__keys__', dumps(keys))

            logger.info("Flushing database ...")
            db.sync()
        db.close()

Source File: serialize.py From ADL with MIT License

4 votes

def save(df, path, write_frequency=5000):
        """
        Args:
            df (DataFlow): the DataFlow to serialize.
            path (str): output path. Either a directory or an lmdb file.
            write_frequency (int): the frequency to write back data to disk.
                A smaller value reduces memory usage.
        """
        assert isinstance(df, DataFlow), type(df)
        isdir = os.path.isdir(path)
        if isdir:
            assert not os.path.isfile(os.path.join(path, 'data.mdb')), "LMDB file exists!"
        else:
            assert not os.path.isfile(path), "LMDB file {} exists!".format(path)
        # It's OK to use super large map_size on Linux, but not on other platforms
        # See: https://github.com/NVIDIA/DIGITS/issues/206
        map_size = 1099511627776 * 2 if platform.system() == 'Linux' else 128 * 10**6
        db = lmdb.open(path, subdir=isdir,
                       map_size=map_size, readonly=False,
                       meminit=False, map_async=True)    # need sync() at the end
        size = _reset_df_and_get_size(df)

        # put data into lmdb, and doubling the size if full.
        # Ref: https://github.com/NVIDIA/DIGITS/pull/209/files
        def put_or_grow(txn, key, value):
            try:
                txn.put(key, value)
                return txn
            except lmdb.MapFullError:
                pass
            txn.abort()
            curr_size = db.info()['map_size']
            new_size = curr_size * 2
            logger.info("Doubling LMDB map_size to {:.2f}GB".format(new_size / 10**9))
            db.set_mapsize(new_size)
            txn = db.begin(write=True)
            txn = put_or_grow(txn, key, value)
            return txn

        with get_tqdm(total=size) as pbar:
            idx = -1

            # LMDB transaction is not exception-safe!
            # although it has a context manager interface
            txn = db.begin(write=True)
            for idx, dp in enumerate(df):
                txn = put_or_grow(txn, u'{:08}'.format(idx).encode('ascii'), dumps(dp))
                pbar.update()
                if (idx + 1) % write_frequency == 0:
                    txn.commit()
                    txn = db.begin(write=True)
            txn.commit()

            keys = [u'{:08}'.format(k).encode('ascii') for k in range(idx + 1)]
            with db.begin(write=True) as txn:
                txn = put_or_grow(txn, b'__keys__', dumps(keys))

            logger.info("Flushing database ...")
            db.sync()
        db.close()

Source File: serialize.py From tensorpack with Apache License 2.0

4 votes

def save(df, path, write_frequency=5000):
        """
        Args:
            df (DataFlow): the DataFlow to serialize.
            path (str): output path. Either a directory or an lmdb file.
            write_frequency (int): the frequency to write back data to disk.
                A smaller value reduces memory usage.
        """
        assert isinstance(df, DataFlow), type(df)
        isdir = os.path.isdir(path)
        if isdir:
            assert not os.path.isfile(os.path.join(path, 'data.mdb')), "LMDB file exists!"
        else:
            assert not os.path.isfile(path), "LMDB file {} exists!".format(path)
        # It's OK to use super large map_size on Linux, but not on other platforms
        # See: https://github.com/NVIDIA/DIGITS/issues/206
        map_size = 1099511627776 * 2 if platform.system() == 'Linux' else 128 * 10**6
        db = lmdb.open(path, subdir=isdir,
                       map_size=map_size, readonly=False,
                       meminit=False, map_async=True)    # need sync() at the end
        size = _reset_df_and_get_size(df)

        # put data into lmdb, and doubling the size if full.
        # Ref: https://github.com/NVIDIA/DIGITS/pull/209/files
        def put_or_grow(txn, key, value):
            try:
                txn.put(key, value)
                return txn
            except lmdb.MapFullError:
                pass
            txn.abort()
            curr_size = db.info()['map_size']
            new_size = curr_size * 2
            logger.info("Doubling LMDB map_size to {:.2f}GB".format(new_size / 10**9))
            db.set_mapsize(new_size)
            txn = db.begin(write=True)
            txn = put_or_grow(txn, key, value)
            return txn

        with get_tqdm(total=size) as pbar:
            idx = -1

            # LMDB transaction is not exception-safe!
            # although it has a context manager interface
            txn = db.begin(write=True)
            for idx, dp in enumerate(df):
                txn = put_or_grow(txn, u'{:08}'.format(idx).encode('ascii'), dumps(dp))
                pbar.update()
                if (idx + 1) % write_frequency == 0:
                    txn.commit()
                    txn = db.begin(write=True)
            txn.commit()

            keys = [u'{:08}'.format(k).encode('ascii') for k in range(idx + 1)]
            with db.begin(write=True) as txn:
                txn = put_or_grow(txn, b'__keys__', dumps(keys))

            logger.info("Flushing database ...")
            db.sync()
        db.close()

Python lmdb.MapFullError() Examples