Python Examples of gridfs.GridFS

Source File: tf_agents.py From cryptotrader with MIT License

6 votes

def save_memory_to_db(self, env, name):
        try:
            env.logger.info(ArenaDDPGAgent.save_to_db, "Trying to save memory to database.")

            memory = pickle.dumps(self.memory)

            fs = gd.GridFS(env.db, collection=name+'_memory')

            fs.put(memory)

            del fs

            env.logger.info(ArenaDDPGAgent.save_to_db, "Memory saved to db!")

        except Exception as e:
            env.logger.error(ArenaDDPGAgent.save_to_db, env.parse_error(e))

Source File: database.py From VolUtility with GNU General Public License v3.0

6 votes

def update_plugin(self, plugin_id, new_values):
        plugin_id = ObjectId(plugin_id)
        if len(str(new_values)) > 12000000:
            print "Storing Large Document in GridFS"
            large_document = json.dumps(new_values['plugin_output'])
            large_document_id = self.create_file(large_document, 'sess_id', 'sha256', 'filename', pid=None, file_meta=None)
            new_values['plugin_output'] = large_document_id
            new_values['largedoc'] = 'True'

        self.vol_plugins.update_one({'_id': plugin_id}, {"$set": new_values})
        return True


    ##
    # File System
    ##

Source File: datastore.py From lightflow with BSD 3-Clause "New" or "Revised" License

6 votes

def remove(self, workflow_id):
        """ Removes a document specified by its id from the data store.

        All associated GridFs documents are deleted as well.

        Args:
            workflow_id (str): The id of the document that represents a workflow run.

        Raises:
            DataStoreNotConnected: If the data store is not connected to the server.
        """
        try:
            db = self._client[self.database]
            fs = GridFSProxy(GridFS(db.unproxied_object))

            for grid_doc in fs.find({"workflow_id": workflow_id},
                                    no_cursor_timeout=True):
                fs.delete(grid_doc._id)

            col = db[WORKFLOW_DATA_COLLECTION_NAME]
            return col.delete_one({"_id": ObjectId(workflow_id)})

        except ConnectionFailure:
            raise DataStoreNotConnected()

Source File: Launcher.py From codex-backend with MIT License

6 votes

def testCode4():
    inicio = 10569000
    client = MongoClient(envget('files.host'), envget('files.port'))
    db = client[envget('db_files_name')]
    fs = gridfs.GridFS(db)
    res = fs.find(timeout=False).skip(inicio)
    lc = Launcher()
    count = inicio
    reset = 0
    for f in res:
        data = f.read()
        # print(f.filename,count)
        lc.launchFileAnalitics((f.filename, data))
        reset += 1
        count += 1
        if(reset >= 1000):
            print(str(count) + " processed")
            reset = 0
    print(str(count) + " processed")

# ----------------------------------------------

Source File: filesystems.py From signac with BSD 3-Clause "New" or "Revised" License

6 votes

def get(self, _id, mode='r'):
            """Open the file with the specified id.

            .. warning::

                To avoid compatiblity issues, all files are
                opened in text-mode (`r`) by default, however
                for higher efficiency, files should generally
                be opened in binary mode (`rb`) whenever possible.

            :param _id: The file identifier.
            :type _id: str
            :param mode: The file mode used for opening.
            :returns: A file-like object to read from."""
            if mode == 'r':
                file = io.StringIO(self.gridfs.get(_id).read().decode())
                if len(file.getvalue()) > GRIDFS_LARGE_FILE_WARNING_THRSHLD:
                    warnings.warn(
                        "Open large GridFS files more efficiently in 'rb' mode.")
                return file
            elif mode == 'rb':
                return self.gridfs.get(file_id=_id)
            else:
                raise ValueError(mode)

Source File: datastore.py From lightflow with BSD 3-Clause "New" or "Revised" License

6 votes

def get(self, workflow_id):
        """ Returns the document for the given workflow id.

        Args:
            workflow_id (str): The id of the document that represents a workflow run.

        Raises:
            DataStoreNotConnected: If the data store is not connected to the server.

        Returns:
            DataStoreDocument: The document for the given workflow id.
        """
        try:
            db = self._client[self.database]
            fs = GridFSProxy(GridFS(db.unproxied_object))
            return DataStoreDocument(db[WORKFLOW_DATA_COLLECTION_NAME], fs, workflow_id)

        except ConnectionFailure:
            raise DataStoreNotConnected()

Source File: log_mongodb.py From tanner with GNU General Public License v3.0

6 votes

def __init__(self):
        if MONGO:
            # Create the connection
            mongo_uri = config.TannerConfig.get('MONGO', 'URI')

            connection = pymongo.MongoClient(mongo_uri)

            # Connect to Databases.
            tandb = connection['tanner']
            tandbfs = connection['voldbfs']

            # Get Collections
            self.tan_sessions = tandb.sessions
            self.tan_files = GridFS(tandbfs)

            # Indexes
            self.tan_sessions.create_index([('$**', 'text')])
        else:
            print('pymongo not found. pip install pymongo')

Source File: mongodb.py From CuckooSploit with GNU General Public License v3.0

6 votes

def connect(self):
        """Connects to Mongo database, loads options and set connectors.
        @raise CuckooReportError: if unable to connect.
        """
        host = self.options.get("host", "127.0.0.1")
        port = self.options.get("port", 27017)
        db = self.options.get("db", "cuckoo")

        try:
            self.conn = MongoClient(host, port)
            self.db = self.conn[db]
            self.fs = GridFS(self.db)
        except TypeError:
            raise CuckooReportError("Mongo connection port must be integer")
        except ConnectionFailure:
            raise CuckooReportError("Cannot connect to MongoDB")

Source File: datastore.py From lightflow with BSD 3-Clause "New" or "Revised" License

6 votes

def _encode_value(self, value):
        """ Encodes the value such that it can be stored into MongoDB.

        Any primitive types are stored directly into MongoDB, while non-primitive types
        are pickled and stored as GridFS objects. The id pointing to a GridFS object
        replaces the original value.

        Args:
            value (object): The object that should be encoded for storing in MongoDB.

        Returns:
            object: The encoded value ready to be stored in MongoDB.
        """
        if isinstance(value, (int, float, str, bool, datetime)):
            return value
        elif isinstance(value, list):
            return [self._encode_value(item) for item in value]
        elif isinstance(value, dict):
            result = {}
            for key, item in value.items():
                result[key] = self._encode_value(item)
            return result
        else:
            return self._gridfs.put(Binary(pickle.dumps(value)),
                                    workflow_id=self._workflow_id)

Source File: db.py From ceph-lcm with Apache License 2.0

5 votes

def __init__(self, db):
        self.fs = gridfs.GridFS(db, collection=self.COLLECTION)

Source File: datastore.py From lightflow with BSD 3-Clause "New" or "Revised" License

5 votes

def _decode_value(self, value):
        """ Decodes the value by turning any binary data back into Python objects.

        The method searches for ObjectId values, loads the associated binary data from
        GridFS and returns the decoded Python object.

        Args:
            value (object): The value that should be decoded.

        Raises:
            DataStoreDecodingError: An ObjectId was found but the id is not a valid
                GridFS id.
            DataStoreDecodeUnknownType: The type of the specified value is unknown.

        Returns:
            object: The decoded value as a valid Python object.
        """
        if isinstance(value, (int, float, str, bool, datetime)):
            return value
        elif isinstance(value, list):
            return [self._decode_value(item) for item in value]
        elif isinstance(value, dict):
            result = {}
            for key, item in value.items():
                result[key] = self._decode_value(item)
            return result
        elif isinstance(value, ObjectId):
            if self._gridfs.exists({"_id": value}):
                return pickle.loads(self._gridfs.get(value).read())
            else:
                raise DataStoreGridfsIdInvalid()
        else:
            raise DataStoreDecodeUnknownType()

Source File: file.py From openslack-crawler with Apache License 2.0

5 votes

def inti_fs(self, key):
        self.db = self.client[key]
        self.fs = gridfs.GridFS(self.db, self.shard_gridfs_collection)

Source File: QuincyDatabase.py From quincy with GNU General Public License v3.0

5 votes

def __init__(self, hostname='localhost', port=27017, db_name='winXP'):
        logging.info("Init database module")
        self._client = MongoClient(hostname, port)
        self._database = self._client.get_database(db_name)
        self._samplesFs = GridFS(self._database, dbconfig['sampleFsCollectionName'])
        self._samples = self._database[dbconfig['sampleCollectionName']]
        self._results = self._database[dbconfig['resultCollectionName']]
        self._dumps = self._database[dbconfig['dumpCollectionName']]

Source File: database.py From VolUtility with GNU General Public License v3.0

5 votes

def __init__(self):
        # Create the connection
        if config['valid']:
            mongo_uri = config['database']['mongo_uri']
        else:
            mongo_uri = 'mongodb://localhost'

        connection = pymongo.MongoClient(mongo_uri)

        # Version Check
        server_version = connection.server_info()['version']
        if int(server_version[0]) < 3:
            raise UserWarning('Incompatible MongoDB Version detected. Requires 3 or higher. Found {0}'.format(server_version))

        # Connect to Databases.
        voldb = connection['voldb']
        voldbfs = connection['voldbfs']

        # Get Collections
        self.vol_sessions = voldb.sessions
        self.vol_comments = voldb.comments
        self.vol_plugins = voldb.plugins
        self.vol_datastore = voldb.datastore
        self.vol_files = GridFS(voldbfs)

        # Indexes
        self.vol_comments.create_index([('freetext', 'text')])

        self.vol_plugins.create_index([('$**', 'text')])

    ##
    # Sessions
    ##

Source File: db_interface_view_sync.py From FACT_core with GNU General Public License v3.0

5 votes

def __init__(self, config=None):
        super().__init__(config=config)
        self.view_collection = self.client[self.config['data_storage']['view_storage']]
        self.view_storage = gridfs.GridFS(self.view_collection)

Source File: db_interface_common.py From FACT_core with GNU General Public License v3.0

5 votes

def _setup_database_mapping(self):
        main_database = self.config['data_storage']['main_database']
        self.main = self.client[main_database]
        self.firmwares = self.main.firmwares
        self.file_objects = self.main.file_objects
        self.search_query_cache = self.main.search_query_cache
        self.locks = self.main.locks
        # sanitize stuff
        self.report_threshold = int(self.config['data_storage']['report_threshold'])
        sanitize_db = self.config['data_storage'].get('sanitize_database', 'faf_sanitize')
        self.sanitize_storage = self.client[sanitize_db]
        self.sanitize_fs = gridfs.GridFS(self.sanitize_storage)

Source File: common_mongo_binding.py From FACT_core with GNU General Public License v3.0

5 votes

def _setup_database_mapping(self):
        self.connections = {}
        for item in self.INTERCOM_CONNECTION_TYPES:
            self.connections[item] = {'name': '{}_{}'.format(self.config['data_storage']['intercom_database_prefix'], item)}
            self.connections[item]['collection'] = self.client[self.connections[item]['name']]
            self.connections[item]['fs'] = gridfs.GridFS(self.connections[item]['collection'])

Source File: queue.py From PyChemia with MIT License

5 votes

def __init__(self, name='Queue', host='localhost', port=27017, user=None, passwd=None, ssl=False, replicaset=None):
        """
        Creates a MongoDB client to 'host' with 'port' and connect it to the database 'name'.
        Authentication can be used with 'user' and 'password'

        :param name: (str) The name of the database
        :param host: (str) The host as name or IP
        :param port: (int) The number of port to connect with the server (Default is 27017)
        :param user: (str) The user with read or write permissions to the database
        :param passwd: (str/int) Password to authenticate the user into the server

        :return:
        """
        self.db_settings = {'name': name, 'host': host, 'port': port, 'user': user, 'passwd': passwd, 'ssl': ssl}
        self.name = name
        uri = 'mongodb://'
        if user is not None:
            uri += user
            if passwd is not None:
                uri += ':' + str(passwd)
            uri += '@'
        uri += host + ':' + str(port)
        print('URI:', uri)
        if user is not None:
            uri += '/' + name
        if replicaset is not None:
            self._client = pymongo.MongoClient(uri, ssl=ssl, replicaset=replicaset)
        else:
            self._client = pymongo.MongoClient(host=host, port=port, ssl=ssl,
                                               ssl_cert_reqs=pymongo.ssl_support.ssl.CERT_NONE)
        for i in ['version']:
            print('%20s : %s' % (i, self._client.server_info()[i]))
        self.db = self._client[name]
        if user is not None and self.db.authenticate(user, passwd):
            print('Authentication successful')

        self.set_minimal_schema()
        self.fs = gridfs.GridFS(self.db)

Source File: inspector.py From Rocket.Chat.Audit with Apache License 2.0

5 votes

def main(rocketchat_host, timestring, arguments):
    client = pymongo.MongoClient(rocketchat_host)
    grid = GridFS(client['rocketchat_audit'], collection='file_uploads')
    inspector = Inspector(client['rocketchat_audit']['messages'], grid)

    if arguments['files']:
        print to_json(imap(Archiver.print_file, inspector.list_files(timestring)))
    elif arguments['logs']:
        logs = Archiver.group_by(inspector.list_logs(timestring), lambda e: e['room_name'])
        print json.dumps({k: map(Archiver.print_msg, v) for k, v in logs.iteritems()}, indent=2)
    elif arguments['email']:
        archiver = Archiver(inspector)
        archiver.send_email(timestring, arguments['--from'], arguments['<address>'],
                            dry_run=arguments['--dry-run'])

Source File: filesystem.py From allura with Apache License 2.0

5 votes

def _fs(cls):
        gridfs_args = (session(cls).impl.db, cls._root_collection())
        try:
            # for some pymongo 2.x versions the _connect option is available to avoid index creation on every usage
            # (it'll still create indexes on delete & write)
            gridfs = GridFS(*gridfs_args, _connect=False)
        except TypeError:  # (unexpected keyword argument)
            # pymongo 3.0 removes the _connect arg
            # pymongo 3.1 makes index creation only happen on the very first write
            gridfs = GridFS(*gridfs_args)
        return gridfs

Source File: storage.py From djongo with GNU Affero General Public License v3.0

5 votes

def _get_gridfs(self, path):
        """
        Returns a :class:`~gridfs.GridFS` using the sub-collection for
        `path`.
        """
        path, filename = os.path.split(path)
        path = os.path.join(self.collection, self.location, path.strip(os.sep))
        collection_name = path.replace(os.sep, '.').strip('.')

        if not hasattr(self, '_db'):
            from django.db import connections
            self._db = connections[self.database].connection

        return GridFS(self._db, collection_name), filename

Source File: mongoexp.py From auptimizer with GNU General Public License v3.0

5 votes

def __init__(self, db, jobs, gfs, conn, tunnel, config_name):
        """
        Parameters
        ----------

        db - Mongo Database (e.g. `Connection()[dbname]`)
            database in which all job-related info is stored

        jobs - Mongo Collection handle
            collection within `db` to use for job arguments, return vals,
            and various bookkeeping stuff and meta-data. Typically this is
            `db['jobs']`

        gfs - Mongo GridFS handle
            GridFS is used to store attachments - binary blobs that don't fit
            or are awkward to store in the `jobs` collection directly.

        conn - Mongo Connection
            Why we need to keep this, I'm not sure.

        tunnel - something for ssh tunneling if you're doing that
            See `connection_with_tunnel` for more info.

        config_name - string
            XXX: No idea what this is for, seems unimportant.

        """
        self.db = db
        self.jobs = jobs
        self.gfs = gfs
        self.conn = conn
        self.tunnel = tunnel
        self.config_name = config_name

    # TODO: rename jobs -> coll throughout

Source File: db.py From super-resolution-videos with The Unlicense

5 votes

def __init__(
        self,
        ip = 'localhost',
        port = 27017,
        db_name = 'db_name',
        user_name = None,
        password = 'password',
        studyID=None
    ):
        ## connect mongodb
        client = MongoClient(ip, port)
        self.db = client[db_name]
        if user_name != None:
            self.db.authenticate(user_name, password)


        if studyID is None:
            self.studyID=str(uuid.uuid1())
        else:
            self.studyID=studyID

        ## define file system (Buckets)
        self.datafs = gridfs.GridFS(self.db, collection="datafs")
        self.modelfs = gridfs.GridFS(self.db, collection="modelfs")
        self.paramsfs = gridfs.GridFS(self.db, collection="paramsfs")
        self.archfs=gridfs.GridFS(self.db,collection="ModelArchitecture")
        ##
        print("[TensorDB] Connect SUCCESS {}:{} {} {} {}".format(ip, port, db_name, user_name, studyID))

        self.ip = ip
        self.port = port
        self.db_name = db_name
        self.user_name = user_name

Source File: mongo_proxy.py From lightflow with BSD 3-Clause "New" or "Revised" License

5 votes

def __init__(self, obj):
        super().__init__(obj,
                         get_methods(gridfs, GridFS))

Source File: rocketchat.audit.py From Rocket.Chat.Audit with Apache License 2.0

5 votes

def __init__(self, rocketchat_db, audit_db):
        """
        :param rocketchat_db: the mongodb audit log
        :param audit_db: the mongodb database used for writing audit logs
        """
        self.rocketchat_gridfs = GridFS(rocketchat_db, collection='rocketchat_uploads')
        self.audit_gridfs = GridFS(audit_db, collection='file_uploads')
        self.messages = audit_db['messages']

Source File: datastore.py From lightflow with BSD 3-Clause "New" or "Revised" License

5 votes

def set(self, key, value, *, section=DataStoreDocumentSection.Data):
        """ Store a value under the specified key in the given section of the document.

        This method stores a value into the specified section of the workflow data store
        document. Any existing value is overridden. Before storing a value, any linked
        GridFS document under the specified key is deleted.

        Args:
            key (str): The key pointing to the value that should be stored/updated.
                It supports MongoDB's dot notation for nested fields.
            value: The value that should be stored/updated.
            section (DataStoreDocumentSection): The section from which the data should
                be retrieved.

        Returns:
            bool: ``True`` if the value could be set/updated, otherwise ``False``.
        """
        key_notation = '.'.join([section, key])

        try:
            self._delete_gridfs_data(self._data_from_dotnotation(key_notation,
                                                                 default=None))
        except KeyError:
            logger.info('Adding new field {} to the data store'.format(key_notation))

        result = self._collection.update_one(
            {"_id": ObjectId(self._workflow_id)},
            {
                "$set": {
                    key_notation: self._encode_value(value)
                },
                "$currentDate": {"lastModified": True}
            }
        )
        return result.modified_count == 1

Source File: test_gridfs_backend.py From flask-fs with MIT License

5 votes

def setup(self):
        self.client = MongoClient()
        self.db = self.client[TEST_DB]
        self.gfs = GridFS(self.db, 'test')

        self.config = Config({
            'mongo_url': 'mongodb://localhost:27017',
            'mongo_db': TEST_DB,
        })
        self.backend = GridFsBackend('test', self.config)
        yield
        self.client.drop_database(TEST_DB)

Source File: gridfs.py From flask-fs with MIT License

5 votes

def __init__(self, name, config):
        super(GridFsBackend, self).__init__(name, config)

        self.client = MongoClient(config.mongo_url)
        self.db = self.client[config.mongo_db]
        self.fs = GridFS(self.db, self.name)

Source File: media.py From microblog.pub with GNU Affero General Public License v3.0

5 votes

def __init__(self, gridfs_db: str, user_agent: str) -> None:
        self.fs = gridfs.GridFS(gridfs_db)
        self.user_agent = user_agent

Source File: mongolog.py From streamingbandit with MIT License

5 votes

def get_simulation_log(self, exp_id, limit):
        """ Return all the logged simulation data
        
        :param int exp_id: The specified experiment.
        :returns list of dicts logs: All the simulation runs for that experiment.
        """
        self.sim_db = self.mongo_client['simulations']
        self.fs = gridfs.GridFS(self.sim_db)
        self.sim_log_rows = []
        for row in self.fs.find({"filename" : str(exp_id)}).sort("uploadDate", -1).limit(limit):
            self.sim_log_rows.append(json.loads(row.read().decode("UTF-8")))
        return self.sim_log_rows

Python gridfs.GridFS() Examples