Python ijson.items() Examples
The following are 30
code examples of ijson.items().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
ijson
, or try the search function
.
Example #1
Source File: migrate_json.py From examples with Apache License 2.0 | 7 votes |
def load_data_into_grakn(input, session): ''' loads the json data into our Grakn phone_calls keyspace: 1. gets the data items as a list of dictionaries 2. for each item dictionary a. creates a Grakn transaction b. constructs the corresponding Graql insert query c. runs the query d. commits the transaction :param input as dictionary: contains details required to parse the data :param session: off of which a transaction will be created ''' items = parse_data_to_dictionaries(input) # 1 for item in items: # 2 with session.transaction().write() as transaction: # a graql_insert_query = input["template"](item) # b print("Executing Graql Query: " + graql_insert_query) transaction.query(graql_insert_query) # c transaction.commit() # d print("\nInserted " + str(len(items)) + " items from [ " + input["file"] + ".json] into Grakn.\n")
Example #2
Source File: query.py From mixpanel-jql with MIT License | 6 votes |
def _validate_people_params(self, params): if not params: return "{}" if not isinstance(params, dict): raise JQLSyntaxError("people_params must be a dict") for k, v in params.items(): if k != 'user_selectors': raise JQLSyntaxError('"%s" is not a valid key in people_params' % k) if not isinstance(v, collections.Iterable): raise JQLSyntaxError("people_params['user_selectors'] must be iterable") for i, e in enumerate(v): for ek, ev in e.items(): if ek not in ('selector',): raise JQLSyntaxError( "'%s' is not a valid key in " "people_params['user_selectors'][%s]" % (ek, i)) elif not isinstance(ev, six.string_types): raise JQLSyntaxError( "people_params['user_selectors'][%s].%s " "must be a string" % (i, ek)) return json.dumps(params)
Example #3
Source File: client.py From anchore-engine with Apache License 2.0 | 6 votes |
def _extract_response_data(self, response_text): next_token = None sio = BytesIO(response_text) count = 0 # Get the next token p = ijson.items(sio, FEED_DATA_NEXT_TOKEN_PATH) d = [x for x in p] if len(d) == 1: next_token = d[0] # Be explicit, no empty strings if not next_token: next_token = None # Get the record count # Not using the special parser for handling decimals here because this isn't on the return path, just counting records sio.seek(0) for i in ijson.items(sio, FEED_DATA_ITEMS_PATH): count += 1 logger.debug('Found {} records in data chunk'.format(count)) sio.close() return next_token, response_text, count
Example #4
Source File: json.py From tabulator-py with MIT License | 6 votes |
def __iter_extended_rows(self): path = 'item' if self.__property is not None: path = '%s.item' % self.__property items = ijson.items(self.__bytes, path) for row_number, item in enumerate(items, start=1): if isinstance(item, (tuple, list)): yield (row_number, None, list(item)) elif isinstance(item, dict): keys = [] values = [] for key in sorted(item.keys()): keys.append(key) values.append(item[key]) yield (row_number, list(keys), list(values)) else: if not self.__force_parse: message = 'JSON item has to be list or dict' raise exceptions.SourceError(message) yield (row_number, None, [])
Example #5
Source File: metrics.py From dstc-noesis with MIT License | 6 votes |
def rank(src, tgt): """ The function calculates rank for each prediction given target Args: src (dict): predictions by the model tgt (dict): ground truth/ targets Returns: ranks (list): rank of a correct responses (default = 0) """ ranks = [] for idx, target in tgt.items(): ranks.append(0) try: predictions = src[idx] for i, entry in enumerate(predictions): if entry in target: ranks[-1] = i + 1 break except KeyError: msg = "No matching entry found for test case with dialog-id {}".format(idx) logging.warning(msg) return ranks
Example #6
Source File: language_modeling.py From prenlp with Apache License 2.0 | 6 votes |
def _get_data(self) -> list: out_path_train = self.root/self.out_filename if out_path_train.exists(): train = load_language_modeling(out_path_train) dataset = train else: dataset = [] with open(self.root/self.dirname, 'r', encoding='utf-8') as jfile: for item in tqdm(ijson.items(jfile, 'item')): text = self._normalize(item['text']).strip() samples = list(filter(lambda x: len(x) > 0, text.split('\n'))) # split document into sentences(len > 0) dataset += samples # If sample is a document, use below code not above two lines. # sample = '\n'.join(list(filter(lambda x: len(x) > 0, text.split('\n')))) # dataset.append(sample) # Save dataset (self.root/self.dirname).unlink() save_language_modeling(dataset, to_path=out_path_train) return dataset
Example #7
Source File: query.py From mixpanel-jql with MIT License | 6 votes |
def _decode(entity): """ Decodes all unicode characters to avoid the `u` prefix from proliferating in complex data structures. We should probably instead JSON encode everything, but for now, this is fine. This is only needed as long as Python 2 support is necessary. :param entity: The entity to decode. :return: The iterable without unicode. """ # Only necessary for Python 2 if six.PY3: return entity if isinstance(entity, tuple): return tuple(_decode(e) for e in entity) elif isinstance(entity, list): return list(_decode(e) for e in entity) elif isinstance(entity, dict): return {_decode(k): _decode(v) for k, v in entity.items()} elif isinstance(entity, six.text_type): return entity.encode('utf8') return entity
Example #8
Source File: convert_dstc8_data.py From NOESIS-II with Apache License 2.0 | 6 votes |
def create_test_answers_file(test_file, test_answers_file): answers = {} with open(test_file, 'rb') as f: json_data = ijson.items(f, 'item') for entry in json_data: for i, utterance in enumerate(entry['options-for-next']): answer = utterance['utterance'] + " __eou__ " answer_id = utterance['candidate-id'] answers[answer_id] = answer answers["NONE"] = "None __eou__ " with open(test_answers_file, "w") as vocabfile: for answer_id, answer in answers.items(): vocabfile.write(str(answer_id) + "\t" + answer.replace("\n", "") + "\n") print("Saved test answers to {}".format(test_answers_file)) return answers
Example #9
Source File: parsers.py From riko with MIT License | 6 votes |
def etree2dict(element): """Convert an element tree into a dict imitating how Yahoo Pipes does it. """ i = dict(element.items()) i.update(_make_content(i, element.text, strip=True)) for child in element: tag = child.tag value = etree2dict(child) i.update(_make_content(i, value, tag)) if element.text and not set(i).difference(['content']): # element is leaf node and doesn't have attributes i = i.get('content') return i
Example #10
Source File: geo_heatmap.py From geo-heatmap with MIT License | 5 votes |
def generateMap(self, settings): """Generates the heatmap. Arguments: settings {dict} -- The settings for the heatmap. Returns: Map -- The Heatmap. """ tiles = settings["tiles"] zoom_start = settings["zoom_start"] radius = settings["radius"] blur = settings["blur"] min_opacity = settings["min_opacity"] max_zoom = settings["max_zoom"] map_data = [(coords[0], coords[1], magnitude) for coords, magnitude in self.coordinates.items()] # Generate map m = folium.Map(location=self.max_coordinates, zoom_start=zoom_start, tiles=tiles) # Generate heat map heatmap = HeatMap(map_data, max_val=self.max_magnitude, min_opacity=min_opacity, radius=radius, blur=blur, max_zoom=max_zoom) m.add_child(heatmap) return m
Example #11
Source File: SwiftKitten.py From SwiftKitten with MIT License | 5 votes |
def _get_structure_info(self, view): """ """ #get structure info command text = view.substr(Region(0, view.size())) cmd = self.get_structure_info_cmd(view, text) timeout = self.get_settings(view, "sourcekitten_timeout", 1.0) # run structure info command p = Popen(cmd, shell=True, stdout=PIPE, stderr=STDOUT) structure_info = list(ijson.items(p.stdout,''))[0] return structure_info
Example #12
Source File: io.py From meza with MIT License | 5 votes |
def read_json(filepath, mode='r', path='item', newline=False): """Reads a json file (both regular and newline-delimited) Args: filepath (str): The json file path or file like object. mode (Optional[str]): The file open mode (default: 'r'). path (Optional[str]): Path to the content you wish to read (default: 'item', i.e., the root list). Note: `path` must refer to a list. newline (Optional[bool]): Interpret file as newline-delimited (default: False). Kwargs: encoding (str): File encoding. Returns: Iterable: The parsed records See also: `meza.io.read_any` Examples: >>> filepath = p.join(DATA_DIR, 'test.json') >>> records = read_json(filepath) >>> next(records) == { ... 'text': 'Chicago Reader', ... 'float': 1, ... 'datetime': '1971-01-01T04:14:00', ... 'boolean': True, ... 'time': '04:14:00', ... 'date': '1971-01-01', ... 'integer': 40} True """ reader = lambda f, **kw: map(json.loads, f) if newline else items(f, path) return read_any(filepath, reader, mode)
Example #13
Source File: io.py From meza with MIT License | 5 votes |
def _remove_bom_from_dict(row, bom): """Remove a byte order marker (BOM) from a dict""" for k, v in row.items(): try: if all([k, v, bom in k, bom in v]): yield (k.lstrip(bom), v.lstrip(bom)) elif v and bom in v: yield (k, v.lstrip(bom)) elif k and bom in k: yield (k.lstrip(bom), v) else: yield (k, v) except TypeError: yield (k, v)
Example #14
Source File: update_dtp.py From dtp-stat with GNU General Public License v2.0 | 5 votes |
def main(): with open("data/dtp.json", 'r') as f: objects = ijson.items(f, 'item') for row in tqdm(objects): get_crashes_data(row)
Example #15
Source File: compare_messages.py From zulip with Apache License 2.0 | 5 votes |
def handle(self, *args: Any, **options: Any) -> None: total_count = 0 changed_count = 0 with open(options['dump1']) as dump1, open(options['dump2']) as dump2: for m1, m2 in zip(ijson.items(dump1, 'item'), ijson.items(dump2, 'item')): total_count += 1 if m1['id'] != m2['id']: self.stderr.write('Inconsistent messages dump') break if m1['content'] != m2['content']: changed_count += 1 self.stdout.write('Changed message id: {id}'.format(id=m1['id'])) self.stdout.write(f'Total messages: {total_count}') self.stdout.write(f'Changed messages: {changed_count}')
Example #16
Source File: prepare_data.py From dstc-noesis with MIT License | 5 votes |
def create_dialog_iter(filename): """ Returns an iterator over a JSON file. :param filename: :return: """ with open(filename, 'rb') as f: json_data = ijson.items(f, 'item') for entry in json_data: row = process_dialog(entry) yield row
Example #17
Source File: utils.py From dstc-noesis with MIT License | 5 votes |
def read_json(input_file): json_objects_lst = list() json_objects = ijson.items(input_file, 'item') for obj in json_objects: json_objects_lst.append(obj) return json_objects_lst
Example #18
Source File: metrics.py From dstc-noesis with MIT License | 5 votes |
def calculate_MAP(src, tgt): """ The function calculate Mean Average Precision (MAP). Args: src (dict): predictions by the model tgt (dict): ground truth/ targets """ avg_precision = list() for idx, targets in tgt.items(): try: predictions = src[idx] precision = list() for i, target in enumerate(targets): try: precision.append(((i + 1) / (predictions.index(target) + 1))) except ValueError: msg = "Answer: {} isn't part of the predictions by the model.".format(target) logging.warning(msg) avg_precision.append(sum(precision) / len(targets)) except KeyError: msg = "No matching entry found for test case with dialog-id {}".format(idx) logging.warning(msg) map = sum(avg_precision)/len(tgt) msg = "Mean Average Precision (MAP): {}".format(map) logging.info(msg)
Example #19
Source File: exchangerate.py From riko with MIT License | 5 votes |
def parse_response(json): if 'rates' in json: resp = {k: Decimal(v) for k, v in json['rates'].items() if v} else: logger.warning('invalid json response:') logger.warning(json) resp = {} return resp
Example #20
Source File: metrics.py From dstc-noesis with MIT License | 5 votes |
def read_predictions(filename): predictions = OrderedDict() with open(filename, 'r') as fp: for item in ijson.items(fp, 'item'): predictions[item['example-id']] = [candidate['candidate-id'] for candidate in item['candidate-ranking']] return predictions
Example #21
Source File: parsers.py From riko with MIT License | 5 votes |
def any2dict(f, ext='xml', html5=False, path=None): path = path or '' if ext in {'xml', 'html'}: xml = ext == 'xml' root = xml2etree(f, xml, html5).getroot() replaced = '/'.join(path.split('.')) tree = next(xpath(root, replaced)) if replaced else root content = etree2dict(tree) elif ext == 'json': content = next(items(f, path)) else: raise TypeError("Invalid file type: '%s'" % ext) return content
Example #22
Source File: geo_heatmap.py From geo-heatmap with MIT License | 5 votes |
def streamJSONData(self, json_file, date_range): """Stream the Google location data from the given json file. Arguments: json_file {file} -- An open file-like object with JSON-encoded Google location data. date_range {tuple} -- A tuple containing the min-date and max-date. e.g.: (None, None), (None, '2019-01-01'), ('2017-02-11'), ('2019-01-01') """ # Estimate location amount max_value_est = sum(1 for line in json_file) / 13 json_file.seek(0) locations = ijson.items(json_file, "locations.item") w = [Bar(), Percentage(), " ", ETA()] with ProgressBar(max_value=max_value_est, widgets=w) as pb: for i, loc in enumerate(locations): if "latitudeE7" not in loc or "longitudeE7" not in loc: continue coords = (round(loc["latitudeE7"] / 1e7, 6), round(loc["longitudeE7"] / 1e7, 6)) if timestampInRange(loc["timestampMs"], date_range): self.updateCoord(coords) if i > max_value_est: max_value_est = i pb.max_value = i pb.update(i)
Example #23
Source File: query.py From mixpanel-jql with MIT License | 5 votes |
def send(self): with closing(requests.post(self.ENDPOINT % self.VERSION, auth=HTTPBasicAuth(self.api_secret, ''), data={'script': str(self)}, stream=True)) as resp: resp.raise_for_status() for row in ijson.items(RequestsStreamWrapper(resp), 'item'): yield row
Example #24
Source File: query.py From mixpanel-jql with MIT License | 5 votes |
def _validate_join_params(self, params): if not params: return "{}" if not isinstance(params, dict): raise JQLSyntaxError("join_params must be a dict") for k, v in params.items(): if k == 'type': if v not in self.VALID_JOIN_TYPES: raise JQLSyntaxError( '"%s" is not a valid join type (valid types: %s)' % (v, ', '.join(self.VALID_JOIN_TYPES)) ) elif k == 'selectors': if not isinstance(v, collections.Iterable): raise JQLSyntaxError("join_params['selectors'] must be iterable") for i, e in enumerate(v): if not isinstance(e, dict): raise JQLSyntaxError("join_params['selectors'][x] must be a dict") for ek, ev in e.items(): if ek not in ('event', 'selector'): raise JQLSyntaxError( "'%s' is not a valid key in " "join_params['selectors'][%s]" % (ek, i)) elif not isinstance(ev, six.string_types): raise JQLSyntaxError( "join_params['selectors'][%s].%s " "must be a string" % (i, ek)) else: raise JQLSyntaxError('"%s" is not a valid key in join_params' % k) return json.dumps(params)
Example #25
Source File: query.py From mixpanel-jql with MIT License | 5 votes |
def _validate_event_params(self, params): if not params: return "{}" if not isinstance(params, dict): raise JQLSyntaxError("event_params must be a dict") params = dict(params) for k, v in params.items(): if k in ('to_date', 'from_date'): if isinstance(v, (datetime, date,)): params[k] = v.strftime('%Y-%m-%d') elif not isinstance(v, six.string_types): raise JQLSyntaxError('to_date must be datetime, datetime.date, or str') elif k == 'event_selectors': if not isinstance(v, collections.Iterable): raise JQLSyntaxError("event_params['event_selectors'] must be iterable") for i, e in enumerate(v): if not isinstance(e, dict): raise JQLSyntaxError("event_params['event_selectors'][x] must be a dict") for ek, ev in e.items(): if ek not in ('event', 'selector', 'label'): raise JQLSyntaxError( "'%s' is not a valid key in " "event_params['event_selectors'][%s]" % (ek, i)) elif not isinstance(ev, six.string_types): raise JQLSyntaxError( "event_params['event_selectors'][%s].%s " "must be a string" % (i, ek)) else: raise JQLSyntaxError('"%s" is not a valid key in event_params' % k) return json.dumps(params)
Example #26
Source File: convert_dstc8_data.py From NOESIS-II with Apache License 2.0 | 5 votes |
def get_dialogs(filename): rows = [] with open(filename, 'rb') as f: json_data = ijson.items(f, 'item') for entry in json_data: rows.append(process_dialog(entry, train=True, positive=True)) rows.extend(process_dialog(entry, train=True, positive=False, all_negative=True)) return rows
Example #27
Source File: migrate_json.py From examples with Apache License 2.0 | 5 votes |
def parse_data_to_dictionaries(input): ''' 1. reads the file through a stream, 2. adds the dictionary to the list of items :param input.file as string: the path to the data file, minus the format :returns items as list of dictionaries: each item representing a data item from the file at input.file ''' items = [] with open(input["file"] + ".json") as data: # 1 for item in ijson.items(data, "item"): items.append(item) # 2 return items
Example #28
Source File: exchangerate.py From riko with MIT License | 4 votes |
def parser(base, objconf, skip=False, **kwargs): """ Parses the pipe content Args: base (str): The base currency (exchanging from) objconf (obj): The pipe configuration (an Objectify instance) skip (bool): Don't parse the content kwargs (dict): Keyword arguments Kwargs: assign (str): Attribute to assign parsed content (default: exchangerate) stream (dict): The original item Returns: dict: The item Examples: >>> from riko import get_path >>> from meza.fntools import Objectify >>> >>> url = get_path('quote.json') >>> conf = {'url': url, 'currency': 'USD', 'delay': 0, 'precision': 6} >>> item = {'content': 'GBP'} >>> objconf = Objectify(conf) >>> kwargs = {'stream': item, 'assign': 'content'} >>> parser(item['content'], objconf, **kwargs) Decimal('1.275201') """ same_currency = base == objconf.currency if skip: rate = kwargs['stream'] elif same_currency: rate = Decimal(1) else: decode = objconf.url.startswith('http') with fetch(decode=decode, **objconf) as f: try: json = next(items(f, '')) except Exception as e: f.seek(0) logger.error('Error parsing {url}'.format(**objconf)) logger.debug(f.read()) logger.error(e) logger.error(traceback.format_exc()) skip = True rate = 0 if not (skip or same_currency): places = Decimal(10) ** -objconf.precision rates = parse_response(json) rate = calc_rate(base, objconf.currency, rates, places=places) return rate
Example #29
Source File: exchangerate.py From riko with MIT License | 4 votes |
def async_pipe(*args, **kwargs): """A processor that asynchronously retrieves the current exchange rate for a given currency pair. Args: item (dict): The entry to process kwargs (dict): The keyword arguments passed to the wrapper Kwargs: conf (dict): The pipe configuration. May contain the keys 'url', 'params', 'currency', 'delay', 'memoize', or 'field'. url (str): The exchange rate API url (default: http://finance.yahoo.com...) params (dict): The API url parameters (default: {'format': 'json'}) currency: The (exchanging to) currency ISO abbreviation (default: USD). delay (flt): Amount of time to sleep (in secs) before fetching the url. Useful for simulating network latency. Default: 0. memoize (bool): Cache the exchange rate API response (default: False). field (str): Item attribute from which to obtain the string to be formatted (default: 'content') assign (str): Attribute to assign parsed content (default: exchangerate) Returns: dict: twisted.internet.defer.Deferred stream of items Examples: >>> from riko import get_path >>> from riko.bado import react >>> from riko.bado.mock import FakeReactor >>> >>> def run(reactor): ... callback = lambda x: print(next(x)['exchangerate']) ... url = get_path('quote.json') ... d = async_pipe({'content': 'GBP'}, conf={'url': url}) ... return d.addCallbacks(callback, logger.error) >>> >>> try: ... react(run, _reactor=FakeReactor()) ... except SystemExit: ... pass ... 1.275201 """ return async_parser(*args, **kwargs)
Example #30
Source File: convert_dstc8_data.py From NOESIS-II with Apache License 2.0 | 4 votes |
def create_dev_file(dev_file, dev_file_out, answers): dev_file_op = open(dev_file_out, "w") positive_samples_count = 0 negative_samples_count = 0 dev_data_handle = open(dev_file, 'rb') json_data = ijson.items(dev_data_handle, 'item') for index, entry in enumerate(json_data): row = str(index+1) + "\t" context = get_context(entry) row += context + "\t" if len(entry['options-for-correct-answers']) == 0: correct_answer = {} correct_answer['utterance'] = "None" target_id = "NONE" else: correct_answer = entry['options-for-correct-answers'][0] target_id = correct_answer['candidate-id'] answer = correct_answer['utterance'] + " __eou__ " answer = answer.strip() row += str(answers[answer] + 1) + "\t" positive_samples_count += 1 negative_answers = [] for i, utterance in enumerate(entry['options-for-next']): if utterance['candidate-id'] == target_id: continue answer = utterance['utterance'] + " __eou__ " answer = answer.strip() negative_answers.append(str(answers[answer] + 1)) negative_samples_count += 1 if len(negative_answers) < 100: answer = "None __eou__" negative_answers.append(str(answers[answer] + 1)) negative_samples_count += 1 negative_answers = "|".join(negative_answers) row += negative_answers + "\t" dev_file_op.write(row.replace("\n", "") + "\n") print("Saved dev data to {}".format(dev_file_out)) print("Dev - Positive samples count - {}".format(positive_samples_count)) print("Dev - Negative samples count - {}".format(negative_samples_count)) dev_file_op.close()