Python pandas.json_normalize() Examples

The following are 11 code examples of pandas.json_normalize(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pandas , or try the search function .
Example #1
Source File: json_loader.py    From dtale with GNU Lesser General Public License v2.1 6 votes vote down vote up
def loader_func(**kwargs):
    path = kwargs.pop("path")
    normalize = kwargs.pop("normalize", False)
    if path.startswith("http://") or path.startswith(
        "https://"
    ):  # add support for URLs
        proxy = kwargs.pop("proxy", None)
        req_kwargs = {}
        if proxy is not None:
            req_kwargs["proxies"] = dict(http=proxy, https=proxy)
        resp = requests.get(path, **req_kwargs)
        assert resp.status_code == 200
        path = resp.json() if normalize else resp.text
    if normalize:
        normalize_func = (
            pd.json_normalize if is_pandas1() else pd.io.json.json_normalize
        )
        return normalize_func(path, **kwargs)
    return pd.read_json(
        path, **{k: v for k, v in kwargs.items() if k in loader_prop_keys(LOADER_PROPS)}
    )


# IMPORTANT!!! This function is required for building any customized CLI loader. 
Example #2
Source File: splunk.py    From huntlib with MIT License 6 votes vote down vote up
def search_df(self, *args, **kwargs):
        '''
        Search Splunk and return the results as a Pandas DataFrame.

        Accepts all the same arguments as the search() function
        '''

        normalize = kwargs.get('normalize', True)

        results = list()
        for hit in self.search(*args, **kwargs):
            results.append(hit)

        if normalize:
            df = pd.json_normalize(results)
        else:
            df = pd.DataFrame(results)

        return df 
Example #3
Source File: io.py    From modin with Apache License 2.0 6 votes vote down vote up
def json_normalize(
    data: Union[Dict, List[Dict]],
    record_path: Optional[Union[str, List]] = None,
    meta: Optional[Union[str, List[Union[str, List[str]]]]] = None,
    meta_prefix: Optional[str] = None,
    record_prefix: Optional[str] = None,
    errors: Optional[str] = "raise",
    sep: str = ".",
    max_level: Optional[int] = None,
) -> DataFrame:
    ErrorMessage.default_to_pandas("json_normalize")
    return DataFrame(
        pandas.json_normalize(
            data, record_path, meta, meta_prefix, record_prefix, errors, sep, max_level
        )
    ) 
Example #4
Source File: client.py    From tdameritrade with Apache License 2.0 5 votes vote down vote up
def accountsDF(self):
        '''get accounts as dataframe'''
        data = self.accounts()
        account_dataframes = []
        for accountId, value in data.items():
            account_dataframes.append(pd.io.json.json_normalize(value))
            account_dataframes[-1].columns = [c.replace('securitiesAccount.', '') for c in account_dataframes[-1].columns]
        return pd.concat(account_dataframes) 
Example #5
Source File: client.py    From tdameritrade with Apache License 2.0 5 votes vote down vote up
def transactionsDF(self, accountId=None, type=None, symbol=None, startDate=None, endDate=None):
        '''get transaction information as Dataframe'''
        return pd.json_normalize(self.transactions(accountId=accountId, type=type, symbol=symbol, startDate=startDate, endDate=endDate)) 
Example #6
Source File: spider.py    From advertools with MIT License 5 votes vote down vote up
def _json_to_dict(jsonobj, i=None):
    df = json_normalize(jsonobj)
    if i:
        df = df.add_prefix('jsonld_{}_'.format(i))
    else:
        df = df.add_prefix('jsonld_')
    return dict(zip(df.columns, df.values[0])) 
Example #7
Source File: _yt_helpers.py    From advertools with MIT License 5 votes vote down vote up
def _json_to_df(json_resp, params):
    json = json_resp.json()
    resp_types = [(type(json[key]).__name__, key) for key in json]
    df = pd.DataFrame()
    for typ, key in resp_types:
        if typ == 'list':
            df = json_normalize(json[key])
        if len(df) == 0:
            df = pd.DataFrame([0], columns=['delete_me'])

    for typ, key in resp_types:
        if typ == 'str':
            df[key] = json[key]
        if typ == 'dict':
            df = df.assign(**json[key])
    for col in df:
        if 'Count' in col:
            try:
                df[col] = df[col].astype(int)
            except ValueError:
                continue
        if ('published' in col) or ('updated' in col):
            try:
                df[col] = pd.to_datetime(df[col])
            except ValueError:
                continue
    df = df.assign(**{'param_' + key: val for key, val in params.items()})
    if 'delete_me' in df:
        df = df.drop(columns=['delete_me'])
    df['queryTime'] = pd.Timestamp.now(tz='UTC')
    return df 
Example #8
Source File: twitter.py    From advertools with MIT License 5 votes vote down vote up
def _expand_entities(df):
    if 'tweet_entities' in df:
        colnames = ['tweet_entities_' + x for x in ['mentions', 'hashtags',
                                                    'urls', 'symbols',
                                                    'media']]
        entities_df = json_normalize(df['tweet_entities'])
        mentions = [', '.join(['@' + x['screen_name'] for x in y])
                    for y in entities_df['user_mentions']]
        hashtags = [', '.join(['#' + x['text'] for x in y])
                    for y in entities_df['hashtags']]
        urls = [', '.join([x['expanded_url'] for x in y])
                for y in entities_df['urls']]
        symbols = [', '.join(['$' + x['text'] for x in y])
                   for y in entities_df['symbols']]

        if 'media' in entities_df:
            entities_df['media'] = entities_df['media'].fillna('')
            media = [', '.join([x['media_url'] for x in y]) if y != '' else
                     y for y in entities_df['media']]
            entity_cols = [mentions, hashtags, urls, symbols, media]
        else:
            entity_cols = [mentions, hashtags, urls, symbols]
        col_idx = df.columns.get_loc('tweet_entities')
        for j, col in enumerate(entity_cols):
            df.insert(col_idx+j+1, colnames[j], col)
    return df 
Example #9
Source File: aggregate.py    From sigsep-mus-eval with MIT License 5 votes vote down vote up
def json2df(json_string, track_name):
    """converts json scores into pandas dataframe

    Parameters
    ----------
    json_string : str
    track_name : str
    """

    df = pd.json_normalize(
        json_string['targets'],
        ['frames'],
        ['name']
    )
    
    df.columns = [col.replace('metrics.', '') for col in df.columns]
    
    df = pd.melt(
        df,
        var_name='metric',
        value_name='score',
        id_vars=['time', 'name'],
        value_vars=['SDR', 'SAR', 'ISR', 'SIR']
    )
    df['track'] = track_name
    df = df.rename(index=str, columns={"name": "target"})
    return df 
Example #10
Source File: reader.py    From rsmtool with Apache License 2.0 4 votes vote down vote up
def read_jsonlines(filename, converters=None):
    """
    Read jsonlines from a file.
    Normalize nested jsons with up to one level of nesting

    Parameters
    ----------
    filename: str
        Name of file to read
    converters : dict or None, optional
        A dictionary specifying how the types of the columns
        in the file should be converted. Specified in the same
        format as for `pd.read_csv() <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_csv.html>`_.

    Returns
    -------
    df : pandas DataFrame
         Data frame containing the data in the given file.
    """

    try:
        df = pd.read_json(filename,
                          orient='records',
                          lines=True,
                          dtype=converters)
    except ValueError:
        raise ValueError("The jsonlines file is not formatted correctly. "
                         "Please check that each line ends with a comma, "
                         "there is no comma at the end of the last line, "
                         "and that all quotes match.")

    # make sure we didn't get a plain json
    if type(df.columns) == pd.RangeIndex:
        raise ValueError("It looks like {} is a simple json file. "
                         "Please check documentation (for the expected "
                         "file format".format(filename))

    dfs = []
    for column in df:
        try:
            df_column = pd.json_normalize(df[column])
        except AttributeError:
            df_column = df[column].copy()

        dfs.append(df_column)

    df = pd.concat(dfs, axis=1)

    return df 
Example #11
Source File: elastic.py    From huntlib with MIT License 4 votes vote down vote up
def search_df(self, lucene, index="*", doctype="doc", fields=None,
                  date_field="@timestamp", days=None, start_time=None,
                  end_time=None, normalize=True, limit=None):
        '''
        Search Elastic and return the results as a Pandas DataFrame.

        lucene: A string containing the Elastic search (e.g., 'item:5282 AND color:red')
        index: A string containing the index name to search, or an index name pattern
               if you want to search multiple indices (e.g., 'myindex' or 'myindex-*')
        doctype: The document type you are interested in.
        fields: A string containing a comma-separated list of field names to return.
                The default is to return all fields, but using this list you can
                select only certain fields, which may make things a bit faster.
        date_field: The name of the field used for date/time comparison.
        days: Search the past X days. If provided, this supercedes both start_time
              and end_time.
        start_time: A datetime() object representing the start of the search
                    window. If used without end_time, the end of the search
                    window is the current time.
        end_time: A datetime() object representing the end of the search window.
                  If used without start_time, the search start will be the earliest
                  time in the index.
        normalize: If set to True, fields containing structures (i.e. subfields)
                   will be flattened such that each field has it's own column in
                   the dataframe. If False, there will be a single column for the
                   structure, with a JSON string encoding all the contents.
        limit: An integer describing the max number of search results to return.
        '''
        results = list()

        for hit in self.search(lucene=lucene, index=index, doctype=doctype,
                               fields=fields, date_field=date_field, days=days,
                               start_time=start_time, end_time=end_time,
                               limit=limit):
            results.append(hit)

        if normalize:
            df = pd.json_normalize(results)
        else:
            df = pd.DataFrame(results)

        return df