Python Examples of pandas.json

Source File: json_loader.py From dtale with GNU Lesser General Public License v2.1

6 votes

def loader_func(**kwargs):
    path = kwargs.pop("path")
    normalize = kwargs.pop("normalize", False)
    if path.startswith("http://") or path.startswith(
        "https://"
    ):  # add support for URLs
        proxy = kwargs.pop("proxy", None)
        req_kwargs = {}
        if proxy is not None:
            req_kwargs["proxies"] = dict(http=proxy, https=proxy)
        resp = requests.get(path, **req_kwargs)
        assert resp.status_code == 200
        path = resp.json() if normalize else resp.text
    if normalize:
        normalize_func = (
            pd.json_normalize if is_pandas1() else pd.io.json.json_normalize
        )
        return normalize_func(path, **kwargs)
    return pd.read_json(
        path, **{k: v for k, v in kwargs.items() if k in loader_prop_keys(LOADER_PROPS)}
    )


# IMPORTANT!!! This function is required for building any customized CLI loader.

Source File: splunk.py From huntlib with MIT License

6 votes

def search_df(self, *args, **kwargs):
        '''
        Search Splunk and return the results as a Pandas DataFrame.

        Accepts all the same arguments as the search() function
        '''

        normalize = kwargs.get('normalize', True)

        results = list()
        for hit in self.search(*args, **kwargs):
            results.append(hit)

        if normalize:
            df = pd.json_normalize(results)
        else:
            df = pd.DataFrame(results)

        return df

Source File: io.py From modin with Apache License 2.0

6 votes

def json_normalize(
    data: Union[Dict, List[Dict]],
    record_path: Optional[Union[str, List]] = None,
    meta: Optional[Union[str, List[Union[str, List[str]]]]] = None,
    meta_prefix: Optional[str] = None,
    record_prefix: Optional[str] = None,
    errors: Optional[str] = "raise",
    sep: str = ".",
    max_level: Optional[int] = None,
) -> DataFrame:
    ErrorMessage.default_to_pandas("json_normalize")
    return DataFrame(
        pandas.json_normalize(
            data, record_path, meta, meta_prefix, record_prefix, errors, sep, max_level
        )
    )

Source File: client.py From tdameritrade with Apache License 2.0

5 votes

def accountsDF(self):
        '''get accounts as dataframe'''
        data = self.accounts()
        account_dataframes = []
        for accountId, value in data.items():
            account_dataframes.append(pd.io.json.json_normalize(value))
            account_dataframes[-1].columns = [c.replace('securitiesAccount.', '') for c in account_dataframes[-1].columns]
        return pd.concat(account_dataframes)

Source File: client.py From tdameritrade with Apache License 2.0

5 votes

def transactionsDF(self, accountId=None, type=None, symbol=None, startDate=None, endDate=None):
        '''get transaction information as Dataframe'''
        return pd.json_normalize(self.transactions(accountId=accountId, type=type, symbol=symbol, startDate=startDate, endDate=endDate))

Source File: spider.py From advertools with MIT License

5 votes

def _json_to_dict(jsonobj, i=None):
    df = json_normalize(jsonobj)
    if i:
        df = df.add_prefix('jsonld_{}_'.format(i))
    else:
        df = df.add_prefix('jsonld_')
    return dict(zip(df.columns, df.values[0]))

Source File: _yt_helpers.py From advertools with MIT License

5 votes

def _json_to_df(json_resp, params):
    json = json_resp.json()
    resp_types = [(type(json[key]).__name__, key) for key in json]
    df = pd.DataFrame()
    for typ, key in resp_types:
        if typ == 'list':
            df = json_normalize(json[key])
        if len(df) == 0:
            df = pd.DataFrame([0], columns=['delete_me'])

    for typ, key in resp_types:
        if typ == 'str':
            df[key] = json[key]
        if typ == 'dict':
            df = df.assign(**json[key])
    for col in df:
        if 'Count' in col:
            try:
                df[col] = df[col].astype(int)
            except ValueError:
                continue
        if ('published' in col) or ('updated' in col):
            try:
                df[col] = pd.to_datetime(df[col])
            except ValueError:
                continue
    df = df.assign(**{'param_' + key: val for key, val in params.items()})
    if 'delete_me' in df:
        df = df.drop(columns=['delete_me'])
    df['queryTime'] = pd.Timestamp.now(tz='UTC')
    return df

Source File: twitter.py From advertools with MIT License

5 votes

def _expand_entities(df):
    if 'tweet_entities' in df:
        colnames = ['tweet_entities_' + x for x in ['mentions', 'hashtags',
                                                    'urls', 'symbols',
                                                    'media']]
        entities_df = json_normalize(df['tweet_entities'])
        mentions = [', '.join(['@' + x['screen_name'] for x in y])
                    for y in entities_df['user_mentions']]
        hashtags = [', '.join(['#' + x['text'] for x in y])
                    for y in entities_df['hashtags']]
        urls = [', '.join([x['expanded_url'] for x in y])
                for y in entities_df['urls']]
        symbols = [', '.join(['$' + x['text'] for x in y])
                   for y in entities_df['symbols']]

        if 'media' in entities_df:
            entities_df['media'] = entities_df['media'].fillna('')
            media = [', '.join([x['media_url'] for x in y]) if y != '' else
                     y for y in entities_df['media']]
            entity_cols = [mentions, hashtags, urls, symbols, media]
        else:
            entity_cols = [mentions, hashtags, urls, symbols]
        col_idx = df.columns.get_loc('tweet_entities')
        for j, col in enumerate(entity_cols):
            df.insert(col_idx+j+1, colnames[j], col)
    return df

Source File: aggregate.py From sigsep-mus-eval with MIT License

5 votes

def json2df(json_string, track_name):
    """converts json scores into pandas dataframe

    Parameters
    ----------
    json_string : str
    track_name : str
    """

    df = pd.json_normalize(
        json_string['targets'],
        ['frames'],
        ['name']
    )
    
    df.columns = [col.replace('metrics.', '') for col in df.columns]
    
    df = pd.melt(
        df,
        var_name='metric',
        value_name='score',
        id_vars=['time', 'name'],
        value_vars=['SDR', 'SAR', 'ISR', 'SIR']
    )
    df['track'] = track_name
    df = df.rename(index=str, columns={"name": "target"})
    return df

Source File: reader.py From rsmtool with Apache License 2.0

4 votes

def read_jsonlines(filename, converters=None):
    """
    Read jsonlines from a file.
    Normalize nested jsons with up to one level of nesting

    Parameters
    ----------
    filename: str
        Name of file to read
    converters : dict or None, optional
        A dictionary specifying how the types of the columns
        in the file should be converted. Specified in the same
        format as for `pd.read_csv() <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_csv.html>`_.

    Returns
    -------
    df : pandas DataFrame
         Data frame containing the data in the given file.
    """

    try:
        df = pd.read_json(filename,
                          orient='records',
                          lines=True,
                          dtype=converters)
    except ValueError:
        raise ValueError("The jsonlines file is not formatted correctly. "
                         "Please check that each line ends with a comma, "
                         "there is no comma at the end of the last line, "
                         "and that all quotes match.")

    # make sure we didn't get a plain json
    if type(df.columns) == pd.RangeIndex:
        raise ValueError("It looks like {} is a simple json file. "
                         "Please check documentation (for the expected "
                         "file format".format(filename))

    dfs = []
    for column in df:
        try:
            df_column = pd.json_normalize(df[column])
        except AttributeError:
            df_column = df[column].copy()

        dfs.append(df_column)

    df = pd.concat(dfs, axis=1)

    return df

Source File: elastic.py From huntlib with MIT License

4 votes

def search_df(self, lucene, index="*", doctype="doc", fields=None,
                  date_field="@timestamp", days=None, start_time=None,
                  end_time=None, normalize=True, limit=None):
        '''
        Search Elastic and return the results as a Pandas DataFrame.

        lucene: A string containing the Elastic search (e.g., 'item:5282 AND color:red')
        index: A string containing the index name to search, or an index name pattern
               if you want to search multiple indices (e.g., 'myindex' or 'myindex-*')
        doctype: The document type you are interested in.
        fields: A string containing a comma-separated list of field names to return.
                The default is to return all fields, but using this list you can
                select only certain fields, which may make things a bit faster.
        date_field: The name of the field used for date/time comparison.
        days: Search the past X days. If provided, this supercedes both start_time
              and end_time.
        start_time: A datetime() object representing the start of the search
                    window. If used without end_time, the end of the search
                    window is the current time.
        end_time: A datetime() object representing the end of the search window.
                  If used without start_time, the search start will be the earliest
                  time in the index.
        normalize: If set to True, fields containing structures (i.e. subfields)
                   will be flattened such that each field has it's own column in
                   the dataframe. If False, there will be a single column for the
                   structure, with a JSON string encoding all the contents.
        limit: An integer describing the max number of search results to return.
        '''
        results = list()

        for hit in self.search(lucene=lucene, index=index, doctype=doctype,
                               fields=fields, date_field=date_field, days=days,
                               start_time=start_time, end_time=end_time,
                               limit=limit):
            results.append(hit)

        if normalize:
            df = pd.json_normalize(results)
        else:
            df = pd.DataFrame(results)

        return df

Python pandas.json_normalize() Examples