Python ijson.parse() Examples

The following are 14 code examples of ijson.parse(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module ijson , or try the search function .
Example #1
Source File: runner.py    From rally with Apache License 2.0 6 votes vote down vote up
def simple_stats(self, bulk_size, response):
        bulk_error_count = 0
        error_details = set()
        # parse lazily on the fast path
        props = parse(response, ["errors", "took"])

        if props.get("errors", False):
            # Reparse fully in case of errors - this will be slower
            parsed_response = json.loads(response.getvalue())
            for idx, item in enumerate(parsed_response["items"]):
                data = next(iter(item.values()))
                if data["status"] > 299 or ('_shards' in data and data["_shards"]["failed"] > 0):
                    bulk_error_count += 1
                    self.extract_error_details(error_details, data)
        stats = {
            "took": props.get("took"),
            "success": bulk_error_count == 0,
            "success-count": bulk_size - bulk_error_count,
            "error-count": bulk_error_count
        }
        if bulk_error_count > 0:
            stats["error-type"] = "bulk"
            stats["error-description"] = self.error_description(error_details)
        return stats 
Example #2
Source File: read_regions.py    From densecap-tensorflow with MIT License 5 votes vote down vote up
def read_regions():
    if not os.path.exists(REGION_JSON):
        os.makedirs(REGION_JSON)
    parser = ijson.parse(open(VG_REGION_PATH))
    last_value = None
    Dic = {}
    regions = []
    dic = {}
    count = 0
    for prefix, event, value in parser:
        sys.stdout.write('>>> %d \r' % count)
        sys.stdout.flush()
        if value == 'regions':
            Dic = {}
            regions = []
            last_value = None
        elif last_value == 'id' and value:
            count += 1
            Dic['regions'] = regions
            Dic['id'] = value
            with open(REGION_JSON + '/%s.json' % value, 'w') as f:
                json.dump(Dic, f)
        elif event == 'map_key':
            last_value = value
        elif event == 'end_map':
            regions.append(dic)
            dic = {}
            last_value = None
        elif last_value:
            dic[last_value] = value 
Example #3
Source File: read_regions_test.py    From densecap-tensorflow with MIT License 5 votes vote down vote up
def read_regions( ):
    VG_VERSION = '1.2'
    VG_PATH = '/home/joe/git/VG_raw_data'
    VG_REGION_PATH = '%s/%s/region_descriptions.json' % (VG_PATH, VG_VERSION)
    # parser = ijson.parse(open('test_region.json'))
    parser = ijson.parse(open(VG_REGION_PATH))

    last_value = None
    Dic = {}
    regions = []
    dic = {}
    for prefix, event, value in parser:
        if value == 'regions':
            Dic = {}
            regions = []
            last_value = None
        elif last_value == 'id':
            Dic['regions'] = regions
            Dic['id'] = value
            with open('test_id_%s.json' % value, 'w') as f:
                json.dump(Dic, f)
                break
        elif event == 'map_key':
            last_value = value
        elif event == 'end_map':
            regions.append(dic)
            dic = {}
            last_value = None
        elif last_value:
            dic[last_value] = value 
Example #4
Source File: SwiftKitten.py    From SwiftKitten with MIT License 5 votes vote down vote up
def _autocomplete_request(self, view, cache, request,
            text, offset, included=lambda item: True):
        """
        """
        # this should not happen, but just in case, do not
        # overload the system with too many requests
        if len(self.current_requests) > self.get_settings(view, "concurrent_request_limit", 4):
            raise AutocompleteRequestError("Request denied: too many concurrent requests.")

        # prevent duplicate requests
        if request in self.current_requests:
            raise AutocompleteRequestError(
                "Request denied: completion for \"{request}\" "
                "already in progress.".format(request=request)
            )

        # start request
        self.current_requests.add(request)

        # get completion command
        cmd = self.get_completion_cmd(view, text, offset)

        # run completion command
        p = Popen(cmd, shell=True, stdout=PIPE, stderr=STDOUT)
        parser = ijson.parse(p.stdout)
        completions = list(self._parse_completions(parser, included=included))

        # finish request
        self.current_requests.discard(request)

        return completions 
Example #5
Source File: runner.py    From rally with Apache License 2.0 5 votes vote down vote up
def parse(text, props, lists=None):
    """
    Selectively parsed the provided text as JSON extracting only the properties provided in ``props``. If ``lists`` is
    specified, this function determines whether the provided lists are empty (respective value will be ``True``) or
    contain elements (respective key will be ``False``).

    :param text: A text to parse.
    :param props: A mandatory list of property paths (separated by a dot character) for which to extract values.
    :param lists: An optional list of property paths to JSON lists in the provided text.
    :return: A dict containing all properties and lists that have been found in the provided text.
    """
    text.seek(0)
    parser = ijson.parse(text)
    parsed = {}
    parsed_lists = {}
    current_list = None
    expect_end_array = False
    try:
        for prefix, event, value in parser:
            if expect_end_array:
                # True if the list is empty, False otherwise
                parsed_lists[current_list] = event == "end_array"
                expect_end_array = False
            if prefix in props:
                parsed[prefix] = value
            elif lists is not None and prefix in lists and event == "start_array":
                current_list = prefix
                expect_end_array = True
            # found all necessary properties
            if len(parsed) == len(props) and (lists is None or len(parsed_lists) == len(lists)):
                break
    except ijson.IncompleteJSONError:
        # did not find all properties
        pass

    parsed.update(parsed_lists)
    return parsed 
Example #6
Source File: runner.py    From rally with Apache License 2.0 5 votes vote down vote up
def request_body_query(self, es, params):
        request_params = self._default_request_params(params)
        index = params.get("index", "_all")
        body = mandatory(params, "body", self)
        doc_type = params.get("type")
        detailed_results = params.get("detailed-results", False)
        headers = self._headers(params)

        # disable eager response parsing - responses might be huge thus skewing results
        es.return_raw_response()

        r = await self._raw_search(es, doc_type, index, body, request_params, headers)

        if detailed_results:
            props = parse(r, ["hits.total", "hits.total.value", "hits.total.relation", "timed_out", "took"])
            hits_total = props.get("hits.total.value", props.get("hits.total", 0))
            hits_relation = props.get("hits.total.relation", "eq")
            timed_out = props.get("timed_out", False)
            took = props.get("took", 0)

            return {
                "weight": 1,
                "unit": "ops",
                "success": True,
                "hits": hits_total,
                "hits_relation": hits_relation,
                "timed_out": timed_out,
                "took": took
            }
        else:
            return {
                "weight": 1,
                "unit": "ops",
                "success": True
            } 
Example #7
Source File: cuckoo.py    From fame_modules with GNU General Public License v3.0 5 votes vote down vote up
def extract_info(self, report):
        # First, build an array with every antivirus information that might be
        # of interrest
        av_prefixes = []
        for av in self._analysis._file['antivirus']:
            av_prefixes.append('data.signatures.item.data.item.{}'.format(av))

        parser = ijson.parse(report)
        self.results['signatures'] = []
        signature = dict()

        for prefix, event, value in parser:
            if prefix == "data.signatures.item" and event == "end_map":
                self.results['signatures'].append(signature)
                signature = dict()
            elif prefix == "data.signatures.item.name":
                signature['name'] = value
                self.add_tag(value)
            elif prefix == "data.signatures.item.severity":
                signature['severity'] = value
            elif prefix == "data.signatures.item.description":
                signature['description'] = value
            elif ('name' in signature
                  and signature['name'] == 'antivirus_virustotal'
                  and prefix in av_prefixes):
                self._analysis._file.update_value(['antivirus', prefix.split('.')[-1]], value)
            elif prefix == "data.malfamily":
                self.results['classification'] = value
            elif prefix == "data.malscore":
                self.results['score'] = str(value)
            elif prefix in ["data.network.domains.item.domain", "data.network.hosts.item.ip", "data.network.traffic.http.item.uri"]:
                self.add_ioc(value) 
Example #8
Source File: cuckoo.py    From fame_modules with GNU General Public License v3.0 5 votes vote down vote up
def extract_info(self, report):
        parser = ijson.parse(report)
        self.results['signatures'] = []
        signature = dict()

        for prefix, event, value in parser:
            if prefix == "signatures.item" and event == "end_map":
                self.results['signatures'].append(signature)
                signature = dict()
            elif prefix == "signatures.item.name":
                signature['name'] = value
                self.add_tag(value)
            elif prefix == "signatures.item.severity":
                signature['severity'] = value
            elif prefix == "signatures.item.description":
                signature['description'] = value
            elif prefix == "info.score":
                self.results['score'] = float(value)
            elif prefix in ["network.domains.item.domain", "network.hosts.item.ip", "network.http.item.uri"]:
                if value not in ["8.8.8.8", "8.8.4.4"]:
                    self.add_ioc(value) 
Example #9
Source File: joe.py    From fame_modules with GNU General Public License v3.0 5 votes vote down vote up
def extract_threatname(self, report):
        parser = ijson.parse(report)
        for prefix, event, value in parser:
            if prefix == "analysis.signaturedetections.strategy.item.threatname" \
                and value is not None and str(value).lower() != "unknown":
                self.add_probable_name(str(value))
                self.add_tag(str(value).lower()) 
Example #10
Source File: joe.py    From fame_modules with GNU General Public License v3.0 5 votes vote down vote up
def extract_iocs(self, report):
        iocs = set()
        parser = ijson.parse(report)
        lines = ""
        for prefix, event, value in parser:
            if prefix in [
                "analysis.behavior.network.tcp.packet.item.srcip",
                "analysis.behavior.network.tcp.packet.item.dstip",
                "analysis.behavior.network.udp.packet.item.srcip",
                "analysis.behavior.network.udp.packet.item.dstip",
                "analysis.behavior.network.dns.packet.item.name",
            ]:
                if not value.startswith("192.168."):
                    iocs.add(value)
            elif prefix in [
                "analysis.behavior.network.http.packet.item.header",
                "analysis.behavior.network.https.packet.item.header",
                "analysis.behavior.network.sslhttp.packet.item.header",
            ]:
                lines = ""
            elif prefix == "analysis.behavior.network.http.packet.item.header.line.item":
                lines += "{}\n".format(value)
                self.extract_url("http", iocs, lines)
            elif prefix in [
                "analysis.behavior.network.https.packet.item.header.line.item",
                "analysis.behavior.network.sslhttp.packet.item.header.line.item"
            ]:
                lines += "{}\n".format(value)
                self.extract_url("https", iocs, lines)

        for ioc in iocs:
            self.add_ioc(ioc) 
Example #11
Source File: aws_rate_hook_with_cb_configured_rate_per_env.py    From cloudbolt-forge with Apache License 2.0 5 votes vote down vote up
def get_sku(location, instance_type, products_file):
    """
    Optimized JSON parsing to find the SKU for the provided location and type.
    """
    # SKU dicts have prefixes like 76V3SF2FJC3ZR3GH
    sku_dict_prefix = re.compile('^[a-zA-Z0-9]+$')
    sku = ''
    matches = 0
    event_count = 0
    with open(products_file) as f:
        parser = ijson.parse(f)
        for prefix, event, value in parser:
            event_count += 1
            if prefix.endswith('.sku'):
                # Save the SKU of the current SKU dict
                sku = value
            elif prefix.endswith('.productFamily') and value == "Compute Instance":
                matches += 1
            elif prefix.endswith('.location') and value == location:
                matches += 1
            elif prefix.endswith('.instanceType') and value == instance_type:
                matches += 1
            elif event == 'end_map' and sku_dict_prefix.match(prefix):
                # We've reached the end of the SKU dict, is this the right one?
                if matches == 3:
                    # All three values matched, this is our sku
                    logger.debug("SKU: {}".format(sku))
                    return sku
                else:
                    # This wasn't the right SKU dict, reset our matches
                    matches = 0 
Example #12
Source File: aws_rate_hook_with_cb_configured_rate_per_env.py    From cloudbolt-forge with Apache License 2.0 5 votes vote down vote up
def get_price(sku, terms_file):
    event_count = 0
    with open(terms_file) as file:
        parser = ijson.parse(file)
        for prefix, event, value in parser:
            event_count += 1
            if prefix.endswith('.pricePerUnit.USD') and sku in prefix:
                logger.debug('Hourly price: {}'.format(value))
                return value 
Example #13
Source File: dataset_utility.py    From minerva with Apache License 2.0 4 votes vote down vote up
def jsonObjectReader(filepath):
    """
    Creates a generator that parses an array of json objects from a valid
    json array file, yielding each top level json object in the array.

    :param filepath: path to json file.
    """
    top_level_array = False
    array_stack = 0
    top_level_object = False
    object_stack = 0
    parser = ijson.parse(open(filepath, 'r'))

    for prefix, event, value in parser:
        if event == 'start_array':
            if not top_level_array:
                top_level_array = True
                continue
            else:
                array_stack += 1
        if event == 'start_map':
            if not top_level_object:
                top_level_object = True
                builder = ijson.ObjectBuilder()
            else:
                object_stack += 1
        if event == 'end_map':
            if not top_level_object:
                raise Exception('end_map without a top level object')
            else:
                if object_stack == 0:
                    top_level_object = False
                    yield builder.value
                else:
                    object_stack -= 1
        if event == 'end_array':
            if not top_level_array:
                raise Exception('end_array without a top level array')
            else:
                if array_stack == 0:
                    top_level_array = False
                else:
                    array_stack -= 1
        # convert Decimal to float because mongo can't serialize Decimal
        # TODO is this the right place to do this? Should it be done instead
        # upon save?
        if isinstance(value, decimal.Decimal):
            # TODO this has different behavior on python 2.6 vs 2.7 due to
            # different rounding behavior
            value = float(value)
        builder.event(event, value) 
Example #14
Source File: dataset_utility.py    From minerva with Apache License 2.0 4 votes vote down vote up
def __init__(self, objConverter=None, mapping=None):
        geojson_header = """{
        "type": "FeatureCollection",
        "crs": {
            "type": "name",
            "properties": {
                "name": "urn:ogc:def:crs:OGC:1.3:CRS84"
            }
        },
        "features": [
        """

        geojson_footer = """
        ]
        }
        """

        if objConverter is None:
            if mapping is None:
                raise Exception('Must provide objConverter or geoJsonMapping')

            def convertToGeoJson(obj):
                lat_expr = jsonpath_rw.parse(mapping['latitudeKeypath'])
                long_expr = jsonpath_rw.parse(mapping['longitudeKeypath'])

                def extractLat(obj):
                    match = lat_expr.find(obj)
                    return float(match[0].value)

                def extractLong(obj):
                    match = long_expr.find(obj)
                    return float(match[0].value)

                point = geojson.Point((extractLong(obj), extractLat(obj)))
                properties = {"placeholder": 0}
                feature = geojson.Feature(geometry=point,
                                          properties=properties)
                return feature

            objConverter = convertToGeoJson

        super(GeoJsonMapper, self).__init__(objConverter, geojson_header,
                                            geojson_footer, geojson.dumps)