Python Examples of leveldb.LevelDB

Source File: levelhelper.py From python-script with Apache License 2.0

6 votes

def excel_to_db(from_excel, to_db):
    '''Transfer Excel file to leveldb, return total count.'''
    _wb = load_workbook(from_excel, read_only=True)
    _ws = _wb.active
    _db = leveldb.LevelDB(to_db, create_if_missing=True) if isinstance(to_db, str) else to_db
    total = 0
    for _row in _ws.iter_rows(min_row=2, min_col=1, max_col=1):
        if _row and _row[0] and _row[1]:
            _key, _value = '', ''
            if _row[0].data_type == cell.Cell.TYPE_STRING:
                _key = _row[0].value.encode('utf-8')
                _key = ''.join(_key.split())
            if _row[1].data_type == cell.Cell.TYPE_STRING:
                _value = _row[0].value.encode('utf-8')
                _value = ''.join(_value.split())
            _db.Put(_key, _value)
            total += 1

    _wb.close()
    return total

Source File: levelhelper.py From python-script with Apache License 2.0

6 votes

def text_to_db(from_text, to_db, split_char):
    '''Transfer text file to leveldb, return total and invalid count.'''
    total, invalid = 0, 0
    _split = split_char if split_char else ','
    _db = leveldb.LevelDB(to_db, create_if_missing=True) if isinstance(to_db, str) else to_db
    with open(from_text, 'r', encoding='utf-8') as _f:
        lines = _f.readlines()
        total = len(lines)
        for line in lines:
            if not line:
                invalid += 1
                continue
            # line = line.strip()
            if _split in line:
                _sub = line.split(_split, 1)
                _db.Put(_sub[0].encode('utf-8'), _sub[1].encode('utf-8'))
            else:
                _db.Put(line, '')
        return total, invalid

Source File: levelhelper.py From python-script with Apache License 2.0

6 votes

def db_to_text(from_db, to_text):
    '''Transfer leveldb to text file.'''
    _db = leveldb.LevelDB(from_db, create_if_missing=False) if isinstance(from_db, str) else from_db
    with open(to_text, 'w', encoding='utf-8') as _f:
        for _k, _v in _db.RangeIter():
            _f.write(_k.decode() + ',' + _v.decode() + '\n')

Source File: base.py From level-tsd with Apache License 2.0

5 votes

def __init__(self, dbpath):
        self._map_db = leveldb.LevelDB(dbpath)

Source File: mleveldb.py From mining with MIT License

5 votes

def conn(self):
        """Open connection on LevelDB DataBase"""
        conn = DB("/tmp/{}.mining".format(self.conf.get("db")))
        return conn

Source File: urlpool.py From xcrawler with MIT License

5 votes

def __init__(self, urlindex_file="", urls=None,
                 load_bad_url=False,
                 span_of_host=30,
                 max_in_mem=1024,
                 is_good_link=None):
        if not urlindex_file:
            urlindex_file = 'xcrawler.url.idx'
        if not is_good_link:
            import sys
            print 'no is_good_link function!!!!'
            sys.exit()
        self.is_good_link = is_good_link
        self.span_of_host = span_of_host
        self._urlindex = leveldb.LevelDB(urlindex_file)
        self._pool = {} # host: [urls]
        self._hosts_pop_recently = {}
        self.url_count = 0
        self.max_in_mem = max_in_mem
        self.last_load = time.time()
        if urls:
            self.url_count += len(urls)
            self.addmany(urls, always=True)
        self._load_from_url_index(load_bad_url, is_good_link)
        ## url is _URL_BAD if it has 5 times of 404
        self._404 = {}
        self._404_threshold = 5

Source File: levelhelper.py From python-script with Apache License 2.0

5 votes

def db_to_excel(from_db, to_excel):
    '''Transfer leveldb to Excel file, return total count.'''
    _db = leveldb.LevelDB(from_db, create_if_missing=False) if isinstance(from_db, str) else from_db
    _wb = Workbook()
    _ws = _wb.active
    total = 0
    for _k, _v in _db.RangeIter():
        _ws.append([_k.decode(), _v.decode()])
        total += 1
    _wb.save(to_excel)
    return total

Source File: base.py From level-tsd with Apache License 2.0

5 votes

def __init__(self, path):
        self._path = path
        self._db = leveldb.LevelDB(path)
        self._batch = leveldb.WriteBatch()
        self._init_db()
        self.x = 0
        self.lflush = clock()

Source File: base.py From level-tsd with Apache License 2.0

5 votes

def __init__(self, dbpath):
        self._dir_db = leveldb.LevelDB(dbpath)
        self.l = Lock()

Source File: kv_store_leveldb.py From indy-plenum with Apache License 2.0

5 votes

def open(self):
        self._db = leveldb.LevelDB(self.db_path)

Source File: leveldict.py From QMusic with GNU Lesser General Public License v2.1

5 votes

def open(self):
        import leveldb
        self.db = leveldb.LevelDB(self.path)

Source File: key_value_store_leveldb.py From loopchain with Apache License 2.0

5 votes

def _new_db(self, path, **kwargs) -> leveldb.LevelDB:
        return leveldb.LevelDB(path, **kwargs)

Source File: key_value_store_leveldb.py From loopchain with Apache License 2.0

5 votes

def __init__(self, store: KeyValueStore, db: leveldb.LevelDB, sync: bool):
        super().__init__(store, sync=sync)
        self._touched_keys = set()
        self._snapshot = db.CreateSnapshot()

Source File: leveldb_service.py From pyethapp with BSD 3-Clause "New" or "Revised" License

5 votes

def __init__(self, app):
        BaseService.__init__(self, app)
        assert self.app.config['data_dir']
        self.uncommitted = dict()
        self.stop_event = Event()
        dbfile = os.path.join(self.app.config['data_dir'], 'leveldb')
        LevelDB.__init__(self, dbfile)

Source File: leveldb_service.py From pyethapp with BSD 3-Clause "New" or "Revised" License

5 votes

def __init__(self, dbfile):
        self.uncommitted = dict()
        log.info('opening LevelDB', path=dbfile)
        self.db = leveldb.LevelDB(dbfile)

Source File: toolbox.py From QuickUMLS with MIT License

5 votes

def __init__(self, path, database_backend='leveldb'):
        if not (os.path.exists(path) or os.path.isdir(path)):
            err_msg = (
                '"{}" is not a valid directory').format(path)
            raise IOError(err_msg)

        if database_backend == 'unqlite':
            assert UNQLITE_AVAILABLE, (
                'You selected unqlite as database backend, but it is not '
                'installed. Please install it via `pip install unqlite`'
            )
            self.cui_db = unqlite.UnQLite(os.path.join(path, 'cui.unqlite'))
            self.cui_db_put = self.cui_db.store
            self.cui_db_get = self.cui_db.fetch
            self.semtypes_db = unqlite.UnQLite(os.path.join(path, 'semtypes.unqlite'))
            self.semtypes_db_put = self.semtypes_db.store
            self.semtypes_db_get = self.semtypes_db.fetch
        elif database_backend == 'leveldb':
            self.cui_db = leveldb.LevelDB(os.path.join(path, 'cui.leveldb'))
            self.cui_db_put = self.cui_db.Put
            self.cui_db_get = self.cui_db.Get
            self.semtypes_db = leveldb.LevelDB(os.path.join(path, 'semtypes.leveldb'))
            self.semtypes_db_put = self.semtypes_db.Put
            self.semtypes_db_get = self.semtypes_db.Get
        else:
            raise ValueError(f'database_backend {database_backend} not recognized')

Source File: kv_store_leveldb_int_keys.py From indy-plenum with Apache License 2.0

5 votes

def open(self):
        self._db = leveldb.LevelDB(self.db_path, comparator=(
            'IntegerComparator', integer_comparator))

Source File: key_value_store_leveldb.py From loopchain with Apache License 2.0

4 votes

def __init__(self, db: leveldb.LevelDB, sync: bool):
        self._db = db
        self._batch = self._new_batch()
        self._sync = sync

Source File: levelhelper.py From python-script with Apache License 2.0

4 votes

def dump(db_src):
    '''Dump database key and value items.'''
    _db = leveldb.LevelDB(db_src, create_if_missing=False) if isinstance(db_src, str) else db_src
    for _k, _v in _db.RangeIter():
        print(_k.decode(), _v.decode())

Source File: evaluate_matchnet.py From matchnet with BSD 2-Clause "Simplified" License

4 votes

def main():
    args = ParseArgs()

    # Initialize networks.
    feature_net = FeatureNet(args.feature_net_model, args.feature_net_params)
    metric_net = MetricNet(args.metric_net_model, args.metric_net_params)

    if args.use_gpu:
        caffe.set_mode_gpu()
        print "GPU mode"
    else:
        caffe.set_mode_cpu()
        print "CPU mode"

    # Read the test pairs.
    pairs, labels = ReadPairs(args.test_pairs)

    # Open db.
    db = leveldb.LevelDB(args.test_db, create_if_missing=False)
    assert db is not None

    # Compute matching prediction.
    start_idx = 0  # Start index for a batch.
    N = len(labels)  # Total number of pairs.
    scores = np.zeros(N, dtype=np.float)
    while start_idx < N:
        # Index after the last item in the batch.
        stop_idx = min(start_idx + feature_net.GetBatchSize(), N)
        print "Block (%d,%d)" % (start_idx, stop_idx)

        # Read features.
        input_patches = ReadPatches(db, pairs[start_idx:stop_idx])

        # Compute features.
        feats = [feature_net.ComputeFeature(input_patches[0]),
                 feature_net.ComputeFeature(input_patches[1])]

        # # Compute scores.
        scores[start_idx:stop_idx] = \
            metric_net.ComputeScore(feats[0], feats[1])

        start_idx = stop_idx

    # Compute evaluation metrics.
    error_at_95 = ErrorRateAt95Recall(labels, scores)
    print "Error rate at 95%% recall: %0.2f%%" % (error_at_95 * 100)

Source File: generate_patch_db.py From matchnet with BSD 2-Clause "Simplified" License

4 votes

def main():
    # Parse input arguments.
    args = ParseArgs()

    # Read the 3Dpoint IDs from the info file.
    with open(args.info_file) as f:
        point_id = [int(line.split()[0]) for line in f]

    # Read the interest point from the interest file. The fields in each line
    # are: image_id, x, y, orientation, and scale. We parse all of them as float
    # even though image_id is integer.
    with open(args.interest_file) as f:
        interest = [[float(x) for x in line.split()] for line in f]

    # Create the output database, fail if exists.
    db = leveldb.LevelDB(args.output_db,
                         create_if_missing=True,
                         error_if_exists=True)

    # Add patches to the database in batch.
    batch = leveldb.WriteBatch()
    total = len(interest)
    processed = 0
    for i, metadata in enumerate(interest):
        datum = caffe_pb2.Datum()
        datum.channels, datum.height, datum.width = (1, 64, 64)

        # Extract the patch
        datum.data = GetPatchImage(i, args.container_dir).tostring()

        # Write 3D point ID into the label field.
        datum.label = point_id[i]

        # Write other metadata into float_data fields.
        datum.float_data.extend(metadata)
        batch.Put(str(i), datum.SerializeToString())
        processed += 1
        if processed % 1000 == 0:
            print processed, '/', total

            # Write the current batch.
            db.Write(batch, sync=True)

            # Verify the last written record.
            d = caffe_pb2.Datum()
            d.ParseFromString(db.Get(str(processed - 1)))
            assert (d.data == datum.data)

            # Start a new batch
            batch = leveldb.WriteBatch()
    db.Write(batch, sync=True)

Source File: run03_s2_read_leveldb.py From DLS with MIT License

4 votes

def readImageDatasetLevelDB(imageDirParser=None, numberOfSamples=1000, isRawBlob=False):
    if imgDirParser is None:
        raise Exception('Invalid imageDirParser')
    rndIndex = np.random.randint(0, imgDirParser.getNumSamples(), (numberOfSamples))
    dataTypeBuilder = DataType()
    # (1) check dataset type and prepare write
    tpref = 'raw%d' % isRawBlob
    dbfout = 'test-dataset-leveldb-%s' % tpref
    if not os.path.isdir(dbfout):
        raise Exception('Cant find LMDB dataset [%s]' % dbfout)
    levelDB = leveldb.LevelDB(dbfout)
    t0 = time.time()
    meanImage = None
    meanArray = None
    meanArra2 = None
    schemeOfFeatures = None
    lstKeys = [key for key in levelDB.RangeIter(include_value=False)]
    # lstKeys = [key for key, _ in lmdbTxn.cursor()]
    rndIndex = np.random.randint(len(lstKeys), size=numberOfSamples)
    for ii, ridx in enumerate(rndIndex):
        tkey = lstKeys[ridx]
        texampleStr = levelDB.Get(tkey)
        texample = tf.train.Example()
        texample.ParseFromString(texampleStr)
        tfeatures = texample.features._fields.values()[0]
        # (1) Prepare scheme for dataset row-sample
        if schemeOfFeatures is None:
            d1 = {ss: ss.split('.') for ss in tfeatures.keys()}
            schemeOfFeatures = {}
            for kk,vv in d1.items():
                if not schemeOfFeatures.has_key(vv[0]):
                    schemeOfFeatures[vv[0]] = {}
                tk = vv[1]
                schemeOfFeatures[vv[0]][tk] = kk
        # (2) iterate over scheme-data-types
        for ttypeStr,vv in schemeOfFeatures.items():
            tdataTypeObj = dataTypeBuilder.getDataClassByName(ttypeStr)
            cfg = {k2:tfeatures.pop(v2) for k2,v2 in vv.items()}
            tret = tdataTypeObj.blob2Data(cfg)
            #
            if ttypeStr == 'path-img2d':
                if meanImage is None:
                    meanImage = tret['img'].copy().astype(np.float)
                else:
                    meanImage += tret['img'].copy().astype(np.float)
            elif ttypeStr == 'array-float':
                tarr = tret['val'].copy()
                if meanArray is None:
                    meanArray = tarr
                    meanArra2 = tarr ** 2
                else:
                    meanArray += tarr
                    meanArra2 += tarr ** 2
    numData = len(lstKeys)
    meanImage /= numData
    meanArray /= numData
    stdArray = np.sqrt(meanArra2 - meanArray ** 2)
    dt = time.time() - t0
    return (dt, meanImage, meanArray, stdArray, numData)

#################################

Source File: dataloader.py From hapi with Apache License 2.0

4 votes

def prepare_leveldb(self,
                        input_file,
                        leveldb_file,
                        label_list,
                        max_seq_length,
                        tokenizer,
                        line_processor=None,
                        delimiter="\t",
                        quotechar=None):
        def default_line_processor(line_id, line):
            assert len(line) == 2
            text_a = line[0]
            label = line[1]

            return BertInputExample(
                str(line_id), text_a=text_a, text_b=None, label=label)

        if line_processor is None:
            line_processor = default_line_processor

        if ParallelEnv().nranks > 1:
            leveldb_file = leveldb_file + "_" + str(ParallelEnv().local_rank)

        if not os.path.exists(leveldb_file):
            print("putting data %s into leveldb %s" %
                  (input_file, leveldb_file))
            _example_num = 0
            _db = leveldb.LevelDB(leveldb_file, create_if_missing=True)
            with io.open(input_file, "r", encoding="utf8") as f:
                reader = csv.reader(
                    f, delimiter=delimiter, quotechar=quotechar)
                line_id = 0
                for (_line_id, line) in enumerate(reader):
                    if line_processor(str(_line_id), line) is None:
                        continue

                    line_str = delimiter.join(line)
                    _db.Put(
                        str(line_id).encode("utf8"), line_str.encode("utf8"))
                    line_id += 1
                    _example_num += 1
            _db.Put("_example_num_".encode("utf8"),
                    str(_example_num).encode("utf8"))
        else:
            _db = leveldb.LevelDB(leveldb_file, create_if_missing=False)

        self.label_list = label_list
        self.max_seq_length = max_seq_length
        self.tokenizer = tokenizer
        self.delimiter = delimiter
        self._db = _db
        self._line_processor = line_processor

Python leveldb.LevelDB() Examples