Python Examples of ijson.parse

Source File: runner.py From rally with Apache License 2.0

6 votes

def simple_stats(self, bulk_size, response):
        bulk_error_count = 0
        error_details = set()
        # parse lazily on the fast path
        props = parse(response, ["errors", "took"])

        if props.get("errors", False):
            # Reparse fully in case of errors - this will be slower
            parsed_response = json.loads(response.getvalue())
            for idx, item in enumerate(parsed_response["items"]):
                data = next(iter(item.values()))
                if data["status"] > 299 or ('_shards' in data and data["_shards"]["failed"] > 0):
                    bulk_error_count += 1
                    self.extract_error_details(error_details, data)
        stats = {
            "took": props.get("took"),
            "success": bulk_error_count == 0,
            "success-count": bulk_size - bulk_error_count,
            "error-count": bulk_error_count
        }
        if bulk_error_count > 0:
            stats["error-type"] = "bulk"
            stats["error-description"] = self.error_description(error_details)
        return stats

Source File: read_regions.py From densecap-tensorflow with MIT License

5 votes

def read_regions():
    if not os.path.exists(REGION_JSON):
        os.makedirs(REGION_JSON)
    parser = ijson.parse(open(VG_REGION_PATH))
    last_value = None
    Dic = {}
    regions = []
    dic = {}
    count = 0
    for prefix, event, value in parser:
        sys.stdout.write('>>> %d \r' % count)
        sys.stdout.flush()
        if value == 'regions':
            Dic = {}
            regions = []
            last_value = None
        elif last_value == 'id' and value:
            count += 1
            Dic['regions'] = regions
            Dic['id'] = value
            with open(REGION_JSON + '/%s.json' % value, 'w') as f:
                json.dump(Dic, f)
        elif event == 'map_key':
            last_value = value
        elif event == 'end_map':
            regions.append(dic)
            dic = {}
            last_value = None
        elif last_value:
            dic[last_value] = value

Source File: read_regions_test.py From densecap-tensorflow with MIT License

5 votes

def read_regions( ):
    VG_VERSION = '1.2'
    VG_PATH = '/home/joe/git/VG_raw_data'
    VG_REGION_PATH = '%s/%s/region_descriptions.json' % (VG_PATH, VG_VERSION)
    # parser = ijson.parse(open('test_region.json'))
    parser = ijson.parse(open(VG_REGION_PATH))

    last_value = None
    Dic = {}
    regions = []
    dic = {}
    for prefix, event, value in parser:
        if value == 'regions':
            Dic = {}
            regions = []
            last_value = None
        elif last_value == 'id':
            Dic['regions'] = regions
            Dic['id'] = value
            with open('test_id_%s.json' % value, 'w') as f:
                json.dump(Dic, f)
                break
        elif event == 'map_key':
            last_value = value
        elif event == 'end_map':
            regions.append(dic)
            dic = {}
            last_value = None
        elif last_value:
            dic[last_value] = value

Source File: SwiftKitten.py From SwiftKitten with MIT License

5 votes

def _autocomplete_request(self, view, cache, request,
            text, offset, included=lambda item: True):
        """
        """
        # this should not happen, but just in case, do not
        # overload the system with too many requests
        if len(self.current_requests) > self.get_settings(view, "concurrent_request_limit", 4):
            raise AutocompleteRequestError("Request denied: too many concurrent requests.")

        # prevent duplicate requests
        if request in self.current_requests:
            raise AutocompleteRequestError(
                "Request denied: completion for \"{request}\" "
                "already in progress.".format(request=request)
            )

        # start request
        self.current_requests.add(request)

        # get completion command
        cmd = self.get_completion_cmd(view, text, offset)

        # run completion command
        p = Popen(cmd, shell=True, stdout=PIPE, stderr=STDOUT)
        parser = ijson.parse(p.stdout)
        completions = list(self._parse_completions(parser, included=included))

        # finish request
        self.current_requests.discard(request)

        return completions

Source File: runner.py From rally with Apache License 2.0

5 votes

def parse(text, props, lists=None):
    """
    Selectively parsed the provided text as JSON extracting only the properties provided in ``props``. If ``lists`` is
    specified, this function determines whether the provided lists are empty (respective value will be ``True``) or
    contain elements (respective key will be ``False``).

    :param text: A text to parse.
    :param props: A mandatory list of property paths (separated by a dot character) for which to extract values.
    :param lists: An optional list of property paths to JSON lists in the provided text.
    :return: A dict containing all properties and lists that have been found in the provided text.
    """
    text.seek(0)
    parser = ijson.parse(text)
    parsed = {}
    parsed_lists = {}
    current_list = None
    expect_end_array = False
    try:
        for prefix, event, value in parser:
            if expect_end_array:
                # True if the list is empty, False otherwise
                parsed_lists[current_list] = event == "end_array"
                expect_end_array = False
            if prefix in props:
                parsed[prefix] = value
            elif lists is not None and prefix in lists and event == "start_array":
                current_list = prefix
                expect_end_array = True
            # found all necessary properties
            if len(parsed) == len(props) and (lists is None or len(parsed_lists) == len(lists)):
                break
    except ijson.IncompleteJSONError:
        # did not find all properties
        pass

    parsed.update(parsed_lists)
    return parsed

Source File: runner.py From rally with Apache License 2.0

5 votes

def request_body_query(self, es, params):
        request_params = self._default_request_params(params)
        index = params.get("index", "_all")
        body = mandatory(params, "body", self)
        doc_type = params.get("type")
        detailed_results = params.get("detailed-results", False)
        headers = self._headers(params)

        # disable eager response parsing - responses might be huge thus skewing results
        es.return_raw_response()

        r = await self._raw_search(es, doc_type, index, body, request_params, headers)

        if detailed_results:
            props = parse(r, ["hits.total", "hits.total.value", "hits.total.relation", "timed_out", "took"])
            hits_total = props.get("hits.total.value", props.get("hits.total", 0))
            hits_relation = props.get("hits.total.relation", "eq")
            timed_out = props.get("timed_out", False)
            took = props.get("took", 0)

            return {
                "weight": 1,
                "unit": "ops",
                "success": True,
                "hits": hits_total,
                "hits_relation": hits_relation,
                "timed_out": timed_out,
                "took": took
            }
        else:
            return {
                "weight": 1,
                "unit": "ops",
                "success": True
            }

Source File: cuckoo.py From fame_modules with GNU General Public License v3.0

5 votes

def extract_info(self, report):
        # First, build an array with every antivirus information that might be
        # of interrest
        av_prefixes = []
        for av in self._analysis._file['antivirus']:
            av_prefixes.append('data.signatures.item.data.item.{}'.format(av))

        parser = ijson.parse(report)
        self.results['signatures'] = []
        signature = dict()

        for prefix, event, value in parser:
            if prefix == "data.signatures.item" and event == "end_map":
                self.results['signatures'].append(signature)
                signature = dict()
            elif prefix == "data.signatures.item.name":
                signature['name'] = value
                self.add_tag(value)
            elif prefix == "data.signatures.item.severity":
                signature['severity'] = value
            elif prefix == "data.signatures.item.description":
                signature['description'] = value
            elif ('name' in signature
                  and signature['name'] == 'antivirus_virustotal'
                  and prefix in av_prefixes):
                self._analysis._file.update_value(['antivirus', prefix.split('.')[-1]], value)
            elif prefix == "data.malfamily":
                self.results['classification'] = value
            elif prefix == "data.malscore":
                self.results['score'] = str(value)
            elif prefix in ["data.network.domains.item.domain", "data.network.hosts.item.ip", "data.network.traffic.http.item.uri"]:
                self.add_ioc(value)

Source File: cuckoo.py From fame_modules with GNU General Public License v3.0

5 votes

def extract_info(self, report):
        parser = ijson.parse(report)
        self.results['signatures'] = []
        signature = dict()

        for prefix, event, value in parser:
            if prefix == "signatures.item" and event == "end_map":
                self.results['signatures'].append(signature)
                signature = dict()
            elif prefix == "signatures.item.name":
                signature['name'] = value
                self.add_tag(value)
            elif prefix == "signatures.item.severity":
                signature['severity'] = value
            elif prefix == "signatures.item.description":
                signature['description'] = value
            elif prefix == "info.score":
                self.results['score'] = float(value)
            elif prefix in ["network.domains.item.domain", "network.hosts.item.ip", "network.http.item.uri"]:
                if value not in ["8.8.8.8", "8.8.4.4"]:
                    self.add_ioc(value)

Source File: joe.py From fame_modules with GNU General Public License v3.0

5 votes

def extract_threatname(self, report):
        parser = ijson.parse(report)
        for prefix, event, value in parser:
            if prefix == "analysis.signaturedetections.strategy.item.threatname" \
                and value is not None and str(value).lower() != "unknown":
                self.add_probable_name(str(value))
                self.add_tag(str(value).lower())

Source File: joe.py From fame_modules with GNU General Public License v3.0

5 votes

def extract_iocs(self, report):
        iocs = set()
        parser = ijson.parse(report)
        lines = ""
        for prefix, event, value in parser:
            if prefix in [
                "analysis.behavior.network.tcp.packet.item.srcip",
                "analysis.behavior.network.tcp.packet.item.dstip",
                "analysis.behavior.network.udp.packet.item.srcip",
                "analysis.behavior.network.udp.packet.item.dstip",
                "analysis.behavior.network.dns.packet.item.name",
            ]:
                if not value.startswith("192.168."):
                    iocs.add(value)
            elif prefix in [
                "analysis.behavior.network.http.packet.item.header",
                "analysis.behavior.network.https.packet.item.header",
                "analysis.behavior.network.sslhttp.packet.item.header",
            ]:
                lines = ""
            elif prefix == "analysis.behavior.network.http.packet.item.header.line.item":
                lines += "{}\n".format(value)
                self.extract_url("http", iocs, lines)
            elif prefix in [
                "analysis.behavior.network.https.packet.item.header.line.item",
                "analysis.behavior.network.sslhttp.packet.item.header.line.item"
            ]:
                lines += "{}\n".format(value)
                self.extract_url("https", iocs, lines)

        for ioc in iocs:
            self.add_ioc(ioc)

Source File: aws_rate_hook_with_cb_configured_rate_per_env.py From cloudbolt-forge with Apache License 2.0

5 votes

def get_sku(location, instance_type, products_file):
    """
    Optimized JSON parsing to find the SKU for the provided location and type.
    """
    # SKU dicts have prefixes like 76V3SF2FJC3ZR3GH
    sku_dict_prefix = re.compile('^[a-zA-Z0-9]+$')
    sku = ''
    matches = 0
    event_count = 0
    with open(products_file) as f:
        parser = ijson.parse(f)
        for prefix, event, value in parser:
            event_count += 1
            if prefix.endswith('.sku'):
                # Save the SKU of the current SKU dict
                sku = value
            elif prefix.endswith('.productFamily') and value == "Compute Instance":
                matches += 1
            elif prefix.endswith('.location') and value == location:
                matches += 1
            elif prefix.endswith('.instanceType') and value == instance_type:
                matches += 1
            elif event == 'end_map' and sku_dict_prefix.match(prefix):
                # We've reached the end of the SKU dict, is this the right one?
                if matches == 3:
                    # All three values matched, this is our sku
                    logger.debug("SKU: {}".format(sku))
                    return sku
                else:
                    # This wasn't the right SKU dict, reset our matches
                    matches = 0

Source File: aws_rate_hook_with_cb_configured_rate_per_env.py From cloudbolt-forge with Apache License 2.0

5 votes

def get_price(sku, terms_file):
    event_count = 0
    with open(terms_file) as file:
        parser = ijson.parse(file)
        for prefix, event, value in parser:
            event_count += 1
            if prefix.endswith('.pricePerUnit.USD') and sku in prefix:
                logger.debug('Hourly price: {}'.format(value))
                return value

Source File: dataset_utility.py From minerva with Apache License 2.0

4 votes

def jsonObjectReader(filepath):
    """
    Creates a generator that parses an array of json objects from a valid
    json array file, yielding each top level json object in the array.

    :param filepath: path to json file.
    """
    top_level_array = False
    array_stack = 0
    top_level_object = False
    object_stack = 0
    parser = ijson.parse(open(filepath, 'r'))

    for prefix, event, value in parser:
        if event == 'start_array':
            if not top_level_array:
                top_level_array = True
                continue
            else:
                array_stack += 1
        if event == 'start_map':
            if not top_level_object:
                top_level_object = True
                builder = ijson.ObjectBuilder()
            else:
                object_stack += 1
        if event == 'end_map':
            if not top_level_object:
                raise Exception('end_map without a top level object')
            else:
                if object_stack == 0:
                    top_level_object = False
                    yield builder.value
                else:
                    object_stack -= 1
        if event == 'end_array':
            if not top_level_array:
                raise Exception('end_array without a top level array')
            else:
                if array_stack == 0:
                    top_level_array = False
                else:
                    array_stack -= 1
        # convert Decimal to float because mongo can't serialize Decimal
        # TODO is this the right place to do this? Should it be done instead
        # upon save?
        if isinstance(value, decimal.Decimal):
            # TODO this has different behavior on python 2.6 vs 2.7 due to
            # different rounding behavior
            value = float(value)
        builder.event(event, value)

Source File: dataset_utility.py From minerva with Apache License 2.0

4 votes

def __init__(self, objConverter=None, mapping=None):
        geojson_header = """{
        "type": "FeatureCollection",
        "crs": {
            "type": "name",
            "properties": {
                "name": "urn:ogc:def:crs:OGC:1.3:CRS84"
            }
        },
        "features": [
        """

        geojson_footer = """
        ]
        }
        """

        if objConverter is None:
            if mapping is None:
                raise Exception('Must provide objConverter or geoJsonMapping')

            def convertToGeoJson(obj):
                lat_expr = jsonpath_rw.parse(mapping['latitudeKeypath'])
                long_expr = jsonpath_rw.parse(mapping['longitudeKeypath'])

                def extractLat(obj):
                    match = lat_expr.find(obj)
                    return float(match[0].value)

                def extractLong(obj):
                    match = long_expr.find(obj)
                    return float(match[0].value)

                point = geojson.Point((extractLong(obj), extractLat(obj)))
                properties = {"placeholder": 0}
                feature = geojson.Feature(geometry=point,
                                          properties=properties)
                return feature

            objConverter = convertToGeoJson

        super(GeoJsonMapper, self).__init__(objConverter, geojson_header,
                                            geojson_footer, geojson.dumps)

Python ijson.parse() Examples