Python pymongo.errors.DocumentTooLarge() Examples

The following are 5 code examples of pymongo.errors.DocumentTooLarge(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pymongo.errors , or try the search function .
Example #1
Source File: mongomodelstore.py    From aurum-datadiscovery with MIT License 6 votes vote down vote up
def new_column(f, c, t, sig, n_data, t_data):
    '''
    f -> file name
    c -> column name
    t -> column type
    sig -> column signature
    n_data -> numerical data
    t_data -> textual data
    '''
    key = build_column_key(f, c)
    doc = {
        "key": key,
        "filename": f,
        "column": c,
        "type": t,
        "signature": sig,
        "t_data": t_data,
        "n_data": n_data
    }
    try:
        modeldb.insert_one(doc)
    except DocumentTooLarge:
        print("Trying to load: " + str(f) + " - " + str(c)) 
Example #2
Source File: wp6_output.py    From news-popularity-prediction with Apache License 2.0 5 votes vote down vote up
def write_to_mongo(prediction_json,
                   mongo_client,
                   tweet_input_mongo_database_name):
    json_report = make_w6_json_report(prediction_json)

    mongo_database = mongo_client[tweet_input_mongo_database_name]
    mongo_collection = mongo_database["popularity_prediction_output"]

    tweet_id = int(prediction_json["tweet_id"])
    json_report["_id"] = tweet_id
    json_report["tweet_id_string"] = repr(tweet_id)
    # print("wp6", json_report)

    smaller_json = copy.copy(json_report)
    while True:
        counter = 0
        try:
            mongo_collection.replace_one({"_id": tweet_id}, smaller_json, upsert=True)
            break
        except pymongo_errors.DocumentTooLarge:
            print("It was too large.")
            if counter >= (len(json_report["graph_snapshots"]) -1):
                smaller_json = copy.copy(json_report)
                smaller_json["graph_snapshots"] = [smaller_json["graph_snapshots"][0]]
                try:
                    mongo_collection.replace_one({"_id": tweet_id}, smaller_json, upsert=True)
                except pymongo_errors.DocumentTooLarge:
                    break
            smaller_json = copy.copy(json_report)
            smaller_json["graph_snapshots"] = [smaller_json["graph_snapshots"][0:-(counter+1)]]
            counter += 1 
Example #3
Source File: message.py    From satori with Apache License 2.0 5 votes vote down vote up
def _raise_document_too_large(operation, doc_size, max_size):
    """Internal helper for raising DocumentTooLarge."""
    if operation == "insert":
        raise DocumentTooLarge("BSON document too large (%d bytes)"
                               " - the connected server supports"
                               " BSON document sizes up to %d"
                               " bytes." % (doc_size, max_size))
    else:
        # There's nothing intelligent we can say
        # about size for update and remove
        raise DocumentTooLarge("command document too large") 
Example #4
Source File: message.py    From opsbro with MIT License 5 votes vote down vote up
def _raise_document_too_large(operation, doc_size, max_size):
    """Internal helper for raising DocumentTooLarge."""
    if operation == "insert":
        raise DocumentTooLarge("BSON document too large (%d bytes)"
                               " - the connected server supports"
                               " BSON document sizes up to %d"
                               " bytes." % (doc_size, max_size))
    else:
        # There's nothing intelligent we can say
        # about size for update and remove
        raise DocumentTooLarge("command document too large") 
Example #5
Source File: jobs.py    From cascade-server with Apache License 2.0 4 votes vote down vote up
def run(self):
        self.update_start()
        query_context = self.get_query_context()
        logger.debug('Training on {} with ctx {}'.format(self.analytic, query_context))
        baseline = AnalyticBaseline.objects(analytic=self.analytic).first()
        if baseline is None:
            baseline = AnalyticBaseline(analytic=self.analytic)

        baseline.time_range = self.range
        results = []
        found_keys = set()

        for i, output in enumerate(query_context.query(self.analytic)):
            fields = output['state']
            found_keys.update(fields.keys())
            results.append(fields)
            if i < 512:
                self.update(add_to_set__results=output, inc__count=1)
            else:
                self.update(inc__count=1)

        baseline.keys = [ClusterKey(name=k, status=True) for k in found_keys]
        baseline.cluster_events(results, min_size=1)
        baseline.original_root = baseline.root

        min_size = 1
        max_children = 1024

        # Continue to build a baseline until it works
        while max_children > 0:
            try:
                baseline.save()
                return
            except DocumentTooLarge:
                # try to dynamically adjust this until it fits
                baseline.cluster_events(results, min_size=min_size, max_children=max_children)
                baseline.original_root = baseline.root
                baseline.save()
                min_size += 1
                max_children = int(max_children * 0.9)

        # probably redundant, but useful to re-raise errors if the baseline isn't successful yet
        baseline.save()