Python ijson.parse() Examples
The following are 14
code examples of ijson.parse().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
ijson
, or try the search function
.
Example #1
Source File: runner.py From rally with Apache License 2.0 | 6 votes |
def simple_stats(self, bulk_size, response): bulk_error_count = 0 error_details = set() # parse lazily on the fast path props = parse(response, ["errors", "took"]) if props.get("errors", False): # Reparse fully in case of errors - this will be slower parsed_response = json.loads(response.getvalue()) for idx, item in enumerate(parsed_response["items"]): data = next(iter(item.values())) if data["status"] > 299 or ('_shards' in data and data["_shards"]["failed"] > 0): bulk_error_count += 1 self.extract_error_details(error_details, data) stats = { "took": props.get("took"), "success": bulk_error_count == 0, "success-count": bulk_size - bulk_error_count, "error-count": bulk_error_count } if bulk_error_count > 0: stats["error-type"] = "bulk" stats["error-description"] = self.error_description(error_details) return stats
Example #2
Source File: read_regions.py From densecap-tensorflow with MIT License | 5 votes |
def read_regions(): if not os.path.exists(REGION_JSON): os.makedirs(REGION_JSON) parser = ijson.parse(open(VG_REGION_PATH)) last_value = None Dic = {} regions = [] dic = {} count = 0 for prefix, event, value in parser: sys.stdout.write('>>> %d \r' % count) sys.stdout.flush() if value == 'regions': Dic = {} regions = [] last_value = None elif last_value == 'id' and value: count += 1 Dic['regions'] = regions Dic['id'] = value with open(REGION_JSON + '/%s.json' % value, 'w') as f: json.dump(Dic, f) elif event == 'map_key': last_value = value elif event == 'end_map': regions.append(dic) dic = {} last_value = None elif last_value: dic[last_value] = value
Example #3
Source File: read_regions_test.py From densecap-tensorflow with MIT License | 5 votes |
def read_regions( ): VG_VERSION = '1.2' VG_PATH = '/home/joe/git/VG_raw_data' VG_REGION_PATH = '%s/%s/region_descriptions.json' % (VG_PATH, VG_VERSION) # parser = ijson.parse(open('test_region.json')) parser = ijson.parse(open(VG_REGION_PATH)) last_value = None Dic = {} regions = [] dic = {} for prefix, event, value in parser: if value == 'regions': Dic = {} regions = [] last_value = None elif last_value == 'id': Dic['regions'] = regions Dic['id'] = value with open('test_id_%s.json' % value, 'w') as f: json.dump(Dic, f) break elif event == 'map_key': last_value = value elif event == 'end_map': regions.append(dic) dic = {} last_value = None elif last_value: dic[last_value] = value
Example #4
Source File: SwiftKitten.py From SwiftKitten with MIT License | 5 votes |
def _autocomplete_request(self, view, cache, request, text, offset, included=lambda item: True): """ """ # this should not happen, but just in case, do not # overload the system with too many requests if len(self.current_requests) > self.get_settings(view, "concurrent_request_limit", 4): raise AutocompleteRequestError("Request denied: too many concurrent requests.") # prevent duplicate requests if request in self.current_requests: raise AutocompleteRequestError( "Request denied: completion for \"{request}\" " "already in progress.".format(request=request) ) # start request self.current_requests.add(request) # get completion command cmd = self.get_completion_cmd(view, text, offset) # run completion command p = Popen(cmd, shell=True, stdout=PIPE, stderr=STDOUT) parser = ijson.parse(p.stdout) completions = list(self._parse_completions(parser, included=included)) # finish request self.current_requests.discard(request) return completions
Example #5
Source File: runner.py From rally with Apache License 2.0 | 5 votes |
def parse(text, props, lists=None): """ Selectively parsed the provided text as JSON extracting only the properties provided in ``props``. If ``lists`` is specified, this function determines whether the provided lists are empty (respective value will be ``True``) or contain elements (respective key will be ``False``). :param text: A text to parse. :param props: A mandatory list of property paths (separated by a dot character) for which to extract values. :param lists: An optional list of property paths to JSON lists in the provided text. :return: A dict containing all properties and lists that have been found in the provided text. """ text.seek(0) parser = ijson.parse(text) parsed = {} parsed_lists = {} current_list = None expect_end_array = False try: for prefix, event, value in parser: if expect_end_array: # True if the list is empty, False otherwise parsed_lists[current_list] = event == "end_array" expect_end_array = False if prefix in props: parsed[prefix] = value elif lists is not None and prefix in lists and event == "start_array": current_list = prefix expect_end_array = True # found all necessary properties if len(parsed) == len(props) and (lists is None or len(parsed_lists) == len(lists)): break except ijson.IncompleteJSONError: # did not find all properties pass parsed.update(parsed_lists) return parsed
Example #6
Source File: runner.py From rally with Apache License 2.0 | 5 votes |
def request_body_query(self, es, params): request_params = self._default_request_params(params) index = params.get("index", "_all") body = mandatory(params, "body", self) doc_type = params.get("type") detailed_results = params.get("detailed-results", False) headers = self._headers(params) # disable eager response parsing - responses might be huge thus skewing results es.return_raw_response() r = await self._raw_search(es, doc_type, index, body, request_params, headers) if detailed_results: props = parse(r, ["hits.total", "hits.total.value", "hits.total.relation", "timed_out", "took"]) hits_total = props.get("hits.total.value", props.get("hits.total", 0)) hits_relation = props.get("hits.total.relation", "eq") timed_out = props.get("timed_out", False) took = props.get("took", 0) return { "weight": 1, "unit": "ops", "success": True, "hits": hits_total, "hits_relation": hits_relation, "timed_out": timed_out, "took": took } else: return { "weight": 1, "unit": "ops", "success": True }
Example #7
Source File: cuckoo.py From fame_modules with GNU General Public License v3.0 | 5 votes |
def extract_info(self, report): # First, build an array with every antivirus information that might be # of interrest av_prefixes = [] for av in self._analysis._file['antivirus']: av_prefixes.append('data.signatures.item.data.item.{}'.format(av)) parser = ijson.parse(report) self.results['signatures'] = [] signature = dict() for prefix, event, value in parser: if prefix == "data.signatures.item" and event == "end_map": self.results['signatures'].append(signature) signature = dict() elif prefix == "data.signatures.item.name": signature['name'] = value self.add_tag(value) elif prefix == "data.signatures.item.severity": signature['severity'] = value elif prefix == "data.signatures.item.description": signature['description'] = value elif ('name' in signature and signature['name'] == 'antivirus_virustotal' and prefix in av_prefixes): self._analysis._file.update_value(['antivirus', prefix.split('.')[-1]], value) elif prefix == "data.malfamily": self.results['classification'] = value elif prefix == "data.malscore": self.results['score'] = str(value) elif prefix in ["data.network.domains.item.domain", "data.network.hosts.item.ip", "data.network.traffic.http.item.uri"]: self.add_ioc(value)
Example #8
Source File: cuckoo.py From fame_modules with GNU General Public License v3.0 | 5 votes |
def extract_info(self, report): parser = ijson.parse(report) self.results['signatures'] = [] signature = dict() for prefix, event, value in parser: if prefix == "signatures.item" and event == "end_map": self.results['signatures'].append(signature) signature = dict() elif prefix == "signatures.item.name": signature['name'] = value self.add_tag(value) elif prefix == "signatures.item.severity": signature['severity'] = value elif prefix == "signatures.item.description": signature['description'] = value elif prefix == "info.score": self.results['score'] = float(value) elif prefix in ["network.domains.item.domain", "network.hosts.item.ip", "network.http.item.uri"]: if value not in ["8.8.8.8", "8.8.4.4"]: self.add_ioc(value)
Example #9
Source File: joe.py From fame_modules with GNU General Public License v3.0 | 5 votes |
def extract_threatname(self, report): parser = ijson.parse(report) for prefix, event, value in parser: if prefix == "analysis.signaturedetections.strategy.item.threatname" \ and value is not None and str(value).lower() != "unknown": self.add_probable_name(str(value)) self.add_tag(str(value).lower())
Example #10
Source File: joe.py From fame_modules with GNU General Public License v3.0 | 5 votes |
def extract_iocs(self, report): iocs = set() parser = ijson.parse(report) lines = "" for prefix, event, value in parser: if prefix in [ "analysis.behavior.network.tcp.packet.item.srcip", "analysis.behavior.network.tcp.packet.item.dstip", "analysis.behavior.network.udp.packet.item.srcip", "analysis.behavior.network.udp.packet.item.dstip", "analysis.behavior.network.dns.packet.item.name", ]: if not value.startswith("192.168."): iocs.add(value) elif prefix in [ "analysis.behavior.network.http.packet.item.header", "analysis.behavior.network.https.packet.item.header", "analysis.behavior.network.sslhttp.packet.item.header", ]: lines = "" elif prefix == "analysis.behavior.network.http.packet.item.header.line.item": lines += "{}\n".format(value) self.extract_url("http", iocs, lines) elif prefix in [ "analysis.behavior.network.https.packet.item.header.line.item", "analysis.behavior.network.sslhttp.packet.item.header.line.item" ]: lines += "{}\n".format(value) self.extract_url("https", iocs, lines) for ioc in iocs: self.add_ioc(ioc)
Example #11
Source File: aws_rate_hook_with_cb_configured_rate_per_env.py From cloudbolt-forge with Apache License 2.0 | 5 votes |
def get_sku(location, instance_type, products_file): """ Optimized JSON parsing to find the SKU for the provided location and type. """ # SKU dicts have prefixes like 76V3SF2FJC3ZR3GH sku_dict_prefix = re.compile('^[a-zA-Z0-9]+$') sku = '' matches = 0 event_count = 0 with open(products_file) as f: parser = ijson.parse(f) for prefix, event, value in parser: event_count += 1 if prefix.endswith('.sku'): # Save the SKU of the current SKU dict sku = value elif prefix.endswith('.productFamily') and value == "Compute Instance": matches += 1 elif prefix.endswith('.location') and value == location: matches += 1 elif prefix.endswith('.instanceType') and value == instance_type: matches += 1 elif event == 'end_map' and sku_dict_prefix.match(prefix): # We've reached the end of the SKU dict, is this the right one? if matches == 3: # All three values matched, this is our sku logger.debug("SKU: {}".format(sku)) return sku else: # This wasn't the right SKU dict, reset our matches matches = 0
Example #12
Source File: aws_rate_hook_with_cb_configured_rate_per_env.py From cloudbolt-forge with Apache License 2.0 | 5 votes |
def get_price(sku, terms_file): event_count = 0 with open(terms_file) as file: parser = ijson.parse(file) for prefix, event, value in parser: event_count += 1 if prefix.endswith('.pricePerUnit.USD') and sku in prefix: logger.debug('Hourly price: {}'.format(value)) return value
Example #13
Source File: dataset_utility.py From minerva with Apache License 2.0 | 4 votes |
def jsonObjectReader(filepath): """ Creates a generator that parses an array of json objects from a valid json array file, yielding each top level json object in the array. :param filepath: path to json file. """ top_level_array = False array_stack = 0 top_level_object = False object_stack = 0 parser = ijson.parse(open(filepath, 'r')) for prefix, event, value in parser: if event == 'start_array': if not top_level_array: top_level_array = True continue else: array_stack += 1 if event == 'start_map': if not top_level_object: top_level_object = True builder = ijson.ObjectBuilder() else: object_stack += 1 if event == 'end_map': if not top_level_object: raise Exception('end_map without a top level object') else: if object_stack == 0: top_level_object = False yield builder.value else: object_stack -= 1 if event == 'end_array': if not top_level_array: raise Exception('end_array without a top level array') else: if array_stack == 0: top_level_array = False else: array_stack -= 1 # convert Decimal to float because mongo can't serialize Decimal # TODO is this the right place to do this? Should it be done instead # upon save? if isinstance(value, decimal.Decimal): # TODO this has different behavior on python 2.6 vs 2.7 due to # different rounding behavior value = float(value) builder.event(event, value)
Example #14
Source File: dataset_utility.py From minerva with Apache License 2.0 | 4 votes |
def __init__(self, objConverter=None, mapping=None): geojson_header = """{ "type": "FeatureCollection", "crs": { "type": "name", "properties": { "name": "urn:ogc:def:crs:OGC:1.3:CRS84" } }, "features": [ """ geojson_footer = """ ] } """ if objConverter is None: if mapping is None: raise Exception('Must provide objConverter or geoJsonMapping') def convertToGeoJson(obj): lat_expr = jsonpath_rw.parse(mapping['latitudeKeypath']) long_expr = jsonpath_rw.parse(mapping['longitudeKeypath']) def extractLat(obj): match = lat_expr.find(obj) return float(match[0].value) def extractLong(obj): match = long_expr.find(obj) return float(match[0].value) point = geojson.Point((extractLong(obj), extractLat(obj))) properties = {"placeholder": 0} feature = geojson.Feature(geometry=point, properties=properties) return feature objConverter = convertToGeoJson super(GeoJsonMapper, self).__init__(objConverter, geojson_header, geojson_footer, geojson.dumps)