Python pandas.json_normalize() Examples
The following are 11
code examples of pandas.json_normalize().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
pandas
, or try the search function
.
Example #1
Source File: json_loader.py From dtale with GNU Lesser General Public License v2.1 | 6 votes |
def loader_func(**kwargs): path = kwargs.pop("path") normalize = kwargs.pop("normalize", False) if path.startswith("http://") or path.startswith( "https://" ): # add support for URLs proxy = kwargs.pop("proxy", None) req_kwargs = {} if proxy is not None: req_kwargs["proxies"] = dict(http=proxy, https=proxy) resp = requests.get(path, **req_kwargs) assert resp.status_code == 200 path = resp.json() if normalize else resp.text if normalize: normalize_func = ( pd.json_normalize if is_pandas1() else pd.io.json.json_normalize ) return normalize_func(path, **kwargs) return pd.read_json( path, **{k: v for k, v in kwargs.items() if k in loader_prop_keys(LOADER_PROPS)} ) # IMPORTANT!!! This function is required for building any customized CLI loader.
Example #2
Source File: splunk.py From huntlib with MIT License | 6 votes |
def search_df(self, *args, **kwargs): ''' Search Splunk and return the results as a Pandas DataFrame. Accepts all the same arguments as the search() function ''' normalize = kwargs.get('normalize', True) results = list() for hit in self.search(*args, **kwargs): results.append(hit) if normalize: df = pd.json_normalize(results) else: df = pd.DataFrame(results) return df
Example #3
Source File: io.py From modin with Apache License 2.0 | 6 votes |
def json_normalize( data: Union[Dict, List[Dict]], record_path: Optional[Union[str, List]] = None, meta: Optional[Union[str, List[Union[str, List[str]]]]] = None, meta_prefix: Optional[str] = None, record_prefix: Optional[str] = None, errors: Optional[str] = "raise", sep: str = ".", max_level: Optional[int] = None, ) -> DataFrame: ErrorMessage.default_to_pandas("json_normalize") return DataFrame( pandas.json_normalize( data, record_path, meta, meta_prefix, record_prefix, errors, sep, max_level ) )
Example #4
Source File: client.py From tdameritrade with Apache License 2.0 | 5 votes |
def accountsDF(self): '''get accounts as dataframe''' data = self.accounts() account_dataframes = [] for accountId, value in data.items(): account_dataframes.append(pd.io.json.json_normalize(value)) account_dataframes[-1].columns = [c.replace('securitiesAccount.', '') for c in account_dataframes[-1].columns] return pd.concat(account_dataframes)
Example #5
Source File: client.py From tdameritrade with Apache License 2.0 | 5 votes |
def transactionsDF(self, accountId=None, type=None, symbol=None, startDate=None, endDate=None): '''get transaction information as Dataframe''' return pd.json_normalize(self.transactions(accountId=accountId, type=type, symbol=symbol, startDate=startDate, endDate=endDate))
Example #6
Source File: spider.py From advertools with MIT License | 5 votes |
def _json_to_dict(jsonobj, i=None): df = json_normalize(jsonobj) if i: df = df.add_prefix('jsonld_{}_'.format(i)) else: df = df.add_prefix('jsonld_') return dict(zip(df.columns, df.values[0]))
Example #7
Source File: _yt_helpers.py From advertools with MIT License | 5 votes |
def _json_to_df(json_resp, params): json = json_resp.json() resp_types = [(type(json[key]).__name__, key) for key in json] df = pd.DataFrame() for typ, key in resp_types: if typ == 'list': df = json_normalize(json[key]) if len(df) == 0: df = pd.DataFrame([0], columns=['delete_me']) for typ, key in resp_types: if typ == 'str': df[key] = json[key] if typ == 'dict': df = df.assign(**json[key]) for col in df: if 'Count' in col: try: df[col] = df[col].astype(int) except ValueError: continue if ('published' in col) or ('updated' in col): try: df[col] = pd.to_datetime(df[col]) except ValueError: continue df = df.assign(**{'param_' + key: val for key, val in params.items()}) if 'delete_me' in df: df = df.drop(columns=['delete_me']) df['queryTime'] = pd.Timestamp.now(tz='UTC') return df
Example #8
Source File: twitter.py From advertools with MIT License | 5 votes |
def _expand_entities(df): if 'tweet_entities' in df: colnames = ['tweet_entities_' + x for x in ['mentions', 'hashtags', 'urls', 'symbols', 'media']] entities_df = json_normalize(df['tweet_entities']) mentions = [', '.join(['@' + x['screen_name'] for x in y]) for y in entities_df['user_mentions']] hashtags = [', '.join(['#' + x['text'] for x in y]) for y in entities_df['hashtags']] urls = [', '.join([x['expanded_url'] for x in y]) for y in entities_df['urls']] symbols = [', '.join(['$' + x['text'] for x in y]) for y in entities_df['symbols']] if 'media' in entities_df: entities_df['media'] = entities_df['media'].fillna('') media = [', '.join([x['media_url'] for x in y]) if y != '' else y for y in entities_df['media']] entity_cols = [mentions, hashtags, urls, symbols, media] else: entity_cols = [mentions, hashtags, urls, symbols] col_idx = df.columns.get_loc('tweet_entities') for j, col in enumerate(entity_cols): df.insert(col_idx+j+1, colnames[j], col) return df
Example #9
Source File: aggregate.py From sigsep-mus-eval with MIT License | 5 votes |
def json2df(json_string, track_name): """converts json scores into pandas dataframe Parameters ---------- json_string : str track_name : str """ df = pd.json_normalize( json_string['targets'], ['frames'], ['name'] ) df.columns = [col.replace('metrics.', '') for col in df.columns] df = pd.melt( df, var_name='metric', value_name='score', id_vars=['time', 'name'], value_vars=['SDR', 'SAR', 'ISR', 'SIR'] ) df['track'] = track_name df = df.rename(index=str, columns={"name": "target"}) return df
Example #10
Source File: reader.py From rsmtool with Apache License 2.0 | 4 votes |
def read_jsonlines(filename, converters=None): """ Read jsonlines from a file. Normalize nested jsons with up to one level of nesting Parameters ---------- filename: str Name of file to read converters : dict or None, optional A dictionary specifying how the types of the columns in the file should be converted. Specified in the same format as for `pd.read_csv() <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_csv.html>`_. Returns ------- df : pandas DataFrame Data frame containing the data in the given file. """ try: df = pd.read_json(filename, orient='records', lines=True, dtype=converters) except ValueError: raise ValueError("The jsonlines file is not formatted correctly. " "Please check that each line ends with a comma, " "there is no comma at the end of the last line, " "and that all quotes match.") # make sure we didn't get a plain json if type(df.columns) == pd.RangeIndex: raise ValueError("It looks like {} is a simple json file. " "Please check documentation (for the expected " "file format".format(filename)) dfs = [] for column in df: try: df_column = pd.json_normalize(df[column]) except AttributeError: df_column = df[column].copy() dfs.append(df_column) df = pd.concat(dfs, axis=1) return df
Example #11
Source File: elastic.py From huntlib with MIT License | 4 votes |
def search_df(self, lucene, index="*", doctype="doc", fields=None, date_field="@timestamp", days=None, start_time=None, end_time=None, normalize=True, limit=None): ''' Search Elastic and return the results as a Pandas DataFrame. lucene: A string containing the Elastic search (e.g., 'item:5282 AND color:red') index: A string containing the index name to search, or an index name pattern if you want to search multiple indices (e.g., 'myindex' or 'myindex-*') doctype: The document type you are interested in. fields: A string containing a comma-separated list of field names to return. The default is to return all fields, but using this list you can select only certain fields, which may make things a bit faster. date_field: The name of the field used for date/time comparison. days: Search the past X days. If provided, this supercedes both start_time and end_time. start_time: A datetime() object representing the start of the search window. If used without end_time, the end of the search window is the current time. end_time: A datetime() object representing the end of the search window. If used without start_time, the search start will be the earliest time in the index. normalize: If set to True, fields containing structures (i.e. subfields) will be flattened such that each field has it's own column in the dataframe. If False, there will be a single column for the structure, with a JSON string encoding all the contents. limit: An integer describing the max number of search results to return. ''' results = list() for hit in self.search(lucene=lucene, index=index, doctype=doctype, fields=fields, date_field=date_field, days=days, start_time=start_time, end_time=end_time, limit=limit): results.append(hit) if normalize: df = pd.json_normalize(results) else: df = pd.DataFrame(results) return df