Python ijson.parse() Examples
The following are 14
code examples of ijson.parse().
Example #1
Source File: From rally with Apache License 2.0 | 6 votes |
def simple_stats(self, bulk_size, response): bulk_error_count = 0 error_details = set() # parse lazily on the fast path props = parse(response, ["errors", "took"]) if props.get("errors", False): # Reparse fully in case of errors - this will be slower parsed_response = json.loads(response.getvalue()) for idx, item in enumerate(parsed_response["items"]): data = next(iter(item.values())) if data["status"] > 299 or ('_shards' in data and data["_shards"]["failed"] > 0): bulk_error_count += 1 self.extract_error_details(error_details, data) stats = { "took": props.get("took"), "success": bulk_error_count == 0, "success-count": bulk_size - bulk_error_count, "error-count": bulk_error_count } if bulk_error_count > 0: stats["error-type"] = "bulk" stats["error-description"] = self.error_description(error_details) return stats
Example #2
Source File: From densecap-tensorflow with MIT License | 5 votes |
def read_regions(): if not os.path.exists(REGION_JSON): os.makedirs(REGION_JSON) parser = ijson.parse(open(VG_REGION_PATH)) last_value = None Dic = {} regions = [] dic = {} count = 0 for prefix, event, value in parser: sys.stdout.write('>>> %d \r' % count) sys.stdout.flush() if value == 'regions': Dic = {} regions = [] last_value = None elif last_value == 'id' and value: count += 1 Dic['regions'] = regions Dic['id'] = value with open(REGION_JSON + '/%s.json' % value, 'w') as f: json.dump(Dic, f) elif event == 'map_key': last_value = value elif event == 'end_map': regions.append(dic) dic = {} last_value = None elif last_value: dic[last_value] = value
Example #3
Source File: From densecap-tensorflow with MIT License | 5 votes |
def read_regions( ): VG_VERSION = '1.2' VG_PATH = '/home/joe/git/VG_raw_data' VG_REGION_PATH = '%s/%s/region_descriptions.json' % (VG_PATH, VG_VERSION) # parser = ijson.parse(open('test_region.json')) parser = ijson.parse(open(VG_REGION_PATH)) last_value = None Dic = {} regions = [] dic = {} for prefix, event, value in parser: if value == 'regions': Dic = {} regions = [] last_value = None elif last_value == 'id': Dic['regions'] = regions Dic['id'] = value with open('test_id_%s.json' % value, 'w') as f: json.dump(Dic, f) break elif event == 'map_key': last_value = value elif event == 'end_map': regions.append(dic) dic = {} last_value = None elif last_value: dic[last_value] = value
Example #4
Source File: From SwiftKitten with MIT License | 5 votes |
def _autocomplete_request(self, view, cache, request, text, offset, included=lambda item: True): """ """ # this should not happen, but just in case, do not # overload the system with too many requests if len(self.current_requests) > self.get_settings(view, "concurrent_request_limit", 4): raise AutocompleteRequestError("Request denied: too many concurrent requests.") # prevent duplicate requests if request in self.current_requests: raise AutocompleteRequestError( "Request denied: completion for \"{request}\" " "already in progress.".format(request=request) ) # start request self.current_requests.add(request) # get completion command cmd = self.get_completion_cmd(view, text, offset) # run completion command p = Popen(cmd, shell=True, stdout=PIPE, stderr=STDOUT) parser = ijson.parse(p.stdout) completions = list(self._parse_completions(parser, included=included)) # finish request self.current_requests.discard(request) return completions
Example #5
Source File: From rally with Apache License 2.0 | 5 votes |
def parse(text, props, lists=None): """ Selectively parsed the provided text as JSON extracting only the properties provided in ``props``. If ``lists`` is specified, this function determines whether the provided lists are empty (respective value will be ``True``) or contain elements (respective key will be ``False``). :param text: A text to parse. :param props: A mandatory list of property paths (separated by a dot character) for which to extract values. :param lists: An optional list of property paths to JSON lists in the provided text. :return: A dict containing all properties and lists that have been found in the provided text. """ parser = ijson.parse(text) parsed = {} parsed_lists = {} current_list = None expect_end_array = False try: for prefix, event, value in parser: if expect_end_array: # True if the list is empty, False otherwise parsed_lists[current_list] = event == "end_array" expect_end_array = False if prefix in props: parsed[prefix] = value elif lists is not None and prefix in lists and event == "start_array": current_list = prefix expect_end_array = True # found all necessary properties if len(parsed) == len(props) and (lists is None or len(parsed_lists) == len(lists)): break except ijson.IncompleteJSONError: # did not find all properties pass parsed.update(parsed_lists) return parsed
Example #6
Source File: From rally with Apache License 2.0 | 5 votes |
def request_body_query(self, es, params): request_params = self._default_request_params(params) index = params.get("index", "_all") body = mandatory(params, "body", self) doc_type = params.get("type") detailed_results = params.get("detailed-results", False) headers = self._headers(params) # disable eager response parsing - responses might be huge thus skewing results es.return_raw_response() r = await self._raw_search(es, doc_type, index, body, request_params, headers) if detailed_results: props = parse(r, ["", "", "", "timed_out", "took"]) hits_total = props.get("", props.get("", 0)) hits_relation = props.get("", "eq") timed_out = props.get("timed_out", False) took = props.get("took", 0) return { "weight": 1, "unit": "ops", "success": True, "hits": hits_total, "hits_relation": hits_relation, "timed_out": timed_out, "took": took } else: return { "weight": 1, "unit": "ops", "success": True }
Example #7
Source File: From fame_modules with GNU General Public License v3.0 | 5 votes |
def extract_info(self, report): # First, build an array with every antivirus information that might be # of interrest av_prefixes = [] for av in self._analysis._file['antivirus']: av_prefixes.append('{}'.format(av)) parser = ijson.parse(report) self.results['signatures'] = [] signature = dict() for prefix, event, value in parser: if prefix == "data.signatures.item" and event == "end_map": self.results['signatures'].append(signature) signature = dict() elif prefix == "": signature['name'] = value self.add_tag(value) elif prefix == "data.signatures.item.severity": signature['severity'] = value elif prefix == "data.signatures.item.description": signature['description'] = value elif ('name' in signature and signature['name'] == 'antivirus_virustotal' and prefix in av_prefixes): self._analysis._file.update_value(['antivirus', prefix.split('.')[-1]], value) elif prefix == "data.malfamily": self.results['classification'] = value elif prefix == "data.malscore": self.results['score'] = str(value) elif prefix in ["", "", ""]: self.add_ioc(value)
Example #8
Source File: From fame_modules with GNU General Public License v3.0 | 5 votes |
def extract_info(self, report): parser = ijson.parse(report) self.results['signatures'] = [] signature = dict() for prefix, event, value in parser: if prefix == "signatures.item" and event == "end_map": self.results['signatures'].append(signature) signature = dict() elif prefix == "": signature['name'] = value self.add_tag(value) elif prefix == "signatures.item.severity": signature['severity'] = value elif prefix == "signatures.item.description": signature['description'] = value elif prefix == "info.score": self.results['score'] = float(value) elif prefix in ["", "network.hosts.item.ip", "network.http.item.uri"]: if value not in ["", ""]: self.add_ioc(value)
Example #9
Source File: From fame_modules with GNU General Public License v3.0 | 5 votes |
def extract_threatname(self, report): parser = ijson.parse(report) for prefix, event, value in parser: if prefix == "analysis.signaturedetections.strategy.item.threatname" \ and value is not None and str(value).lower() != "unknown": self.add_probable_name(str(value)) self.add_tag(str(value).lower())
Example #10
Source File: From fame_modules with GNU General Public License v3.0 | 5 votes |
def extract_iocs(self, report): iocs = set() parser = ijson.parse(report) lines = "" for prefix, event, value in parser: if prefix in [ "", "", "", "", "", ]: if not value.startswith("192.168."): iocs.add(value) elif prefix in [ "", "", "", ]: lines = "" elif prefix == "": lines += "{}\n".format(value) self.extract_url("http", iocs, lines) elif prefix in [ "", "" ]: lines += "{}\n".format(value) self.extract_url("https", iocs, lines) for ioc in iocs: self.add_ioc(ioc)
Example #11
Source File: From cloudbolt-forge with Apache License 2.0 | 5 votes |
def get_sku(location, instance_type, products_file): """ Optimized JSON parsing to find the SKU for the provided location and type. """ # SKU dicts have prefixes like 76V3SF2FJC3ZR3GH sku_dict_prefix = re.compile('^[a-zA-Z0-9]+$') sku = '' matches = 0 event_count = 0 with open(products_file) as f: parser = ijson.parse(f) for prefix, event, value in parser: event_count += 1 if prefix.endswith('.sku'): # Save the SKU of the current SKU dict sku = value elif prefix.endswith('.productFamily') and value == "Compute Instance": matches += 1 elif prefix.endswith('.location') and value == location: matches += 1 elif prefix.endswith('.instanceType') and value == instance_type: matches += 1 elif event == 'end_map' and sku_dict_prefix.match(prefix): # We've reached the end of the SKU dict, is this the right one? if matches == 3: # All three values matched, this is our sku logger.debug("SKU: {}".format(sku)) return sku else: # This wasn't the right SKU dict, reset our matches matches = 0
Example #12
Source File: From cloudbolt-forge with Apache License 2.0 | 5 votes |
def get_price(sku, terms_file): event_count = 0 with open(terms_file) as file: parser = ijson.parse(file) for prefix, event, value in parser: event_count += 1 if prefix.endswith('.pricePerUnit.USD') and sku in prefix: logger.debug('Hourly price: {}'.format(value)) return value
Example #13
Source File: From minerva with Apache License 2.0 | 4 votes |
def jsonObjectReader(filepath): """ Creates a generator that parses an array of json objects from a valid json array file, yielding each top level json object in the array. :param filepath: path to json file. """ top_level_array = False array_stack = 0 top_level_object = False object_stack = 0 parser = ijson.parse(open(filepath, 'r')) for prefix, event, value in parser: if event == 'start_array': if not top_level_array: top_level_array = True continue else: array_stack += 1 if event == 'start_map': if not top_level_object: top_level_object = True builder = ijson.ObjectBuilder() else: object_stack += 1 if event == 'end_map': if not top_level_object: raise Exception('end_map without a top level object') else: if object_stack == 0: top_level_object = False yield builder.value else: object_stack -= 1 if event == 'end_array': if not top_level_array: raise Exception('end_array without a top level array') else: if array_stack == 0: top_level_array = False else: array_stack -= 1 # convert Decimal to float because mongo can't serialize Decimal # TODO is this the right place to do this? Should it be done instead # upon save? if isinstance(value, decimal.Decimal): # TODO this has different behavior on python 2.6 vs 2.7 due to # different rounding behavior value = float(value) builder.event(event, value)
Example #14
Source File: From minerva with Apache License 2.0 | 4 votes |
def __init__(self, objConverter=None, mapping=None): geojson_header = """{ "type": "FeatureCollection", "crs": { "type": "name", "properties": { "name": "urn:ogc:def:crs:OGC:1.3:CRS84" } }, "features": [ """ geojson_footer = """ ] } """ if objConverter is None: if mapping is None: raise Exception('Must provide objConverter or geoJsonMapping') def convertToGeoJson(obj): lat_expr = jsonpath_rw.parse(mapping['latitudeKeypath']) long_expr = jsonpath_rw.parse(mapping['longitudeKeypath']) def extractLat(obj): match = lat_expr.find(obj) return float(match[0].value) def extractLong(obj): match = long_expr.find(obj) return float(match[0].value) point = geojson.Point((extractLong(obj), extractLat(obj))) properties = {"placeholder": 0} feature = geojson.Feature(geometry=point, properties=properties) return feature objConverter = convertToGeoJson super(GeoJsonMapper, self).__init__(objConverter, geojson_header, geojson_footer, geojson.dumps)