Python pandas.read_json() Examples
The following are 30
code examples of pandas.read_json().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
pandas
, or try the search function
.
Example #1
Source File: test_json_table_schema.py From recruit with Apache License 2.0 | 6 votes |
def test_comprehensive(self): df = DataFrame( {'A': [1, 2, 3, 4], 'B': ['a', 'b', 'c', 'c'], 'C': pd.date_range('2016-01-01', freq='d', periods=4), # 'D': pd.timedelta_range('1H', periods=4, freq='T'), 'E': pd.Series(pd.Categorical(['a', 'b', 'c', 'c'])), 'F': pd.Series(pd.Categorical(['a', 'b', 'c', 'c'], ordered=True)), 'G': [1.1, 2.2, 3.3, 4.4], # 'H': pd.date_range('2016-01-01', freq='d', periods=4, # tz='US/Central'), 'I': [True, False, False, True], }, index=pd.Index(range(4), name='idx')) out = df.to_json(orient="table") result = pd.read_json(out, orient="table") tm.assert_frame_equal(df, result)
Example #2
Source File: sql_dash_dropdown.py From dash-recipes with MIT License | 6 votes |
def dff_to_table(dff_json, dropdown_x, dropdown_y): dff = pd.read_json(dff_json) return { 'data': [{ 'x': dff[dropdown_x], 'y': dff[dropdown_y], 'type': 'bar' }], 'layout': { 'margin': { 'l': 20, 'r': 10, 'b': 60, 't': 10 } } }
Example #3
Source File: test_pandas.py From recruit with Apache License 2.0 | 6 votes |
def test_date_format_frame(self): df = self.tsframe.copy() def test_w_date(date, date_unit=None): df['date'] = Timestamp(date) df.iloc[1, df.columns.get_loc('date')] = pd.NaT df.iloc[5, df.columns.get_loc('date')] = pd.NaT if date_unit: json = df.to_json(date_format='iso', date_unit=date_unit) else: json = df.to_json(date_format='iso') result = read_json(json) assert_frame_equal(result, df) test_w_date('20130101 20:43:42.123') test_w_date('20130101 20:43:42', date_unit='s') test_w_date('20130101 20:43:42.123', date_unit='ms') test_w_date('20130101 20:43:42.123456', date_unit='us') test_w_date('20130101 20:43:42.123456789', date_unit='ns') msg = "Invalid value 'foo' for option 'date_unit'" with pytest.raises(ValueError, match=msg): df.to_json(date_format='iso', date_unit='foo')
Example #4
Source File: test_pandas.py From recruit with Apache License 2.0 | 6 votes |
def test_v12_compat(self): df = DataFrame( [[1.56808523, 0.65727391, 1.81021139, -0.17251653], [-0.2550111, -0.08072427, -0.03202878, -0.17581665], [1.51493992, 0.11805825, 1.629455, -1.31506612], [-0.02765498, 0.44679743, 0.33192641, -0.27885413], [0.05951614, -2.69652057, 1.28163262, 0.34703478]], columns=['A', 'B', 'C', 'D'], index=pd.date_range('2000-01-03', '2000-01-07')) df['date'] = pd.Timestamp('19920106 18:21:32.12') df.iloc[3, df.columns.get_loc('date')] = pd.Timestamp('20130101') df['modified'] = df['date'] df.iloc[1, df.columns.get_loc('modified')] = pd.NaT v12_json = os.path.join(self.dirpath, 'tsframe_v012.json') df_unser = pd.read_json(v12_json) assert_frame_equal(df, df_unser) df_iso = df.drop(['modified'], axis=1) v12_iso_json = os.path.join(self.dirpath, 'tsframe_iso_v012.json') df_unser_iso = pd.read_json(v12_iso_json) assert_frame_equal(df_iso, df_unser_iso)
Example #5
Source File: test_pandas.py From recruit with Apache License 2.0 | 6 votes |
def test_date_format_series(self): def test_w_date(date, date_unit=None): ts = Series(Timestamp(date), index=self.ts.index) ts.iloc[1] = pd.NaT ts.iloc[5] = pd.NaT if date_unit: json = ts.to_json(date_format='iso', date_unit=date_unit) else: json = ts.to_json(date_format='iso') result = read_json(json, typ='series') assert_series_equal(result, ts) test_w_date('20130101 20:43:42.123') test_w_date('20130101 20:43:42', date_unit='s') test_w_date('20130101 20:43:42.123', date_unit='ms') test_w_date('20130101 20:43:42.123456', date_unit='us') test_w_date('20130101 20:43:42.123456789', date_unit='ns') ts = Series(Timestamp('20130101 20:43:42.123'), index=self.ts.index) msg = "Invalid value 'foo' for option 'date_unit'" with pytest.raises(ValueError, match=msg): ts.to_json(date_format='iso', date_unit='foo')
Example #6
Source File: test_pandas.py From recruit with Apache License 2.0 | 6 votes |
def test_date_unit(self): df = self.tsframe.copy() df['date'] = Timestamp('20130101 20:43:42') dl = df.columns.get_loc('date') df.iloc[1, dl] = Timestamp('19710101 20:43:42') df.iloc[2, dl] = Timestamp('21460101 20:43:42') df.iloc[4, dl] = pd.NaT for unit in ('s', 'ms', 'us', 'ns'): json = df.to_json(date_format='epoch', date_unit=unit) # force date unit result = read_json(json, date_unit=unit) assert_frame_equal(result, df) # detect date unit result = read_json(json, date_unit=None) assert_frame_equal(result, df)
Example #7
Source File: test_pandas.py From recruit with Apache License 2.0 | 6 votes |
def test_weird_nested_json(self): # this used to core dump the parser s = r'''{ "status": "success", "data": { "posts": [ { "id": 1, "title": "A blog post", "body": "Some useful content" }, { "id": 2, "title": "Another blog post", "body": "More content" } ] } }''' read_json(s)
Example #8
Source File: test_pandas.py From recruit with Apache License 2.0 | 6 votes |
def test_misc_example(self): # parsing unordered input fails result = read_json('[{"a": 1, "b": 2}, {"b":2, "a" :1}]', numpy=True) expected = DataFrame([[1, 2], [1, 2]], columns=['a', 'b']) error_msg = """DataFrame\\.index are different DataFrame\\.index values are different \\(100\\.0 %\\) \\[left\\]: Index\\(\\[u?'a', u?'b'\\], dtype='object'\\) \\[right\\]: RangeIndex\\(start=0, stop=2, step=1\\)""" with pytest.raises(AssertionError, match=error_msg): assert_frame_equal(result, expected, check_index_type=False) result = read_json('[{"a": 1, "b": 2}, {"b":2, "a" :1}]') expected = DataFrame([[1, 2], [1, 2]], columns=['a', 'b']) assert_frame_equal(result, expected)
Example #9
Source File: test_readlines.py From recruit with Apache License 2.0 | 6 votes |
def test_readjson_chunks_multiple_empty_lines(chunksize): j = """ {"A":1,"B":4} {"A":2,"B":5} {"A":3,"B":6} """ orig = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}) test = pd.read_json(j, lines=True, chunksize=chunksize) if chunksize is not None: test = pd.concat(test) tm.assert_frame_equal( orig, test, obj="chunksize: {chunksize}".format(chunksize=chunksize))
Example #10
Source File: utils_mnli.py From interpret-text with MIT License | 6 votes |
def load_mnli_pandas_df(local_cache_path=".", file_split="train"): """Loads extracted test_utils into pandas Args: local_cache_path ([type], optional): [description]. Defaults to current working directory. file_split (str, optional): The subset to load. One of: {"train", "dev_matched", "dev_mismatched"} Defaults to "train". Returns: pd.DataFrame: pandas DataFrame containing the specified MultiNLI subset. """ try: download_file_and_extract(local_cache_path, file_split) except Exception as e: raise e return pd.read_json( os.path.join(local_cache_path, DATA_FILES[file_split]), lines=True )
Example #11
Source File: run_han.py From DeepResearch with MIT License | 6 votes |
def main(): """ A small tutorial to use HAN module """ filename = './News_Category_Dataset/News_Category_Dataset.json' df = pd.read_json(filename, lines=True).reset_index() df = preprocessing(df) han_network = HAN.HAN(text = df.text, labels = df.category, num_categories = 30, pretrained_embedded_vector_path = './glove.6B/glove.6B.100d.txt', max_features = 200000, max_senten_len = 150, max_senten_num = 4 , embedding_size = 100, validation_split=0.2, verbose=1) print(han_network.get_model().summary()) han_network.show_hyperparameters() ## How to change hyperparameters # Let's add regularizers # To replace a hyperparameter change the corresponding key value to the new value in set_hyperparameters han_network.set_hyperparameters({'l2_regulizer': 1e-13, 'dropout_regulizer': 0.5}) han_network.show_hyperparameters() print(han_network.get_model().summary()) han_network.train_model(epochs=3, batch_size=16, best_model_path='./best_model.h5')
Example #12
Source File: test_readlines.py From recruit with Apache License 2.0 | 6 votes |
def test_to_jsonl(): # GH9180 df = DataFrame([[1, 2], [1, 2]], columns=['a', 'b']) result = df.to_json(orient="records", lines=True) expected = '{"a":1,"b":2}\n{"a":1,"b":2}' assert result == expected df = DataFrame([["foo}", "bar"], ['foo"', "bar"]], columns=['a', 'b']) result = df.to_json(orient="records", lines=True) expected = '{"a":"foo}","b":"bar"}\n{"a":"foo\\"","b":"bar"}' assert result == expected assert_frame_equal(read_json(result, lines=True), df) # GH15096: escaped characters in columns and data df = DataFrame([["foo\\", "bar"], ['foo"', "bar"]], columns=["a\\", 'b']) result = df.to_json(orient="records", lines=True) expected = ('{"a\\\\":"foo\\\\","b":"bar"}\n' '{"a\\\\":"foo\\"","b":"bar"}') assert result == expected assert_frame_equal(read_json(result, lines=True), df)
Example #13
Source File: test_readlines.py From recruit with Apache License 2.0 | 6 votes |
def test_read_jsonl_unicode_chars(): # GH15132: non-ascii unicode characters # \u201d == RIGHT DOUBLE QUOTATION MARK # simulate file handle json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n' json = StringIO(json) result = read_json(json, lines=True) expected = DataFrame([[u"foo\u201d", "bar"], ["foo", "bar"]], columns=['a', 'b']) assert_frame_equal(result, expected) # simulate string json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n' result = read_json(json, lines=True) expected = DataFrame([[u"foo\u201d", "bar"], ["foo", "bar"]], columns=['a', 'b']) assert_frame_equal(result, expected)
Example #14
Source File: test_readlines.py From vnpy_crypto with MIT License | 6 votes |
def test_read_jsonl_unicode_chars(): # GH15132: non-ascii unicode characters # \u201d == RIGHT DOUBLE QUOTATION MARK # simulate file handle json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n' json = StringIO(json) result = read_json(json, lines=True) expected = DataFrame([[u"foo\u201d", "bar"], ["foo", "bar"]], columns=['a', 'b']) assert_frame_equal(result, expected) # simulate string json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n' result = read_json(json, lines=True) expected = DataFrame([[u"foo\u201d", "bar"], ["foo", "bar"]], columns=['a', 'b']) assert_frame_equal(result, expected)
Example #15
Source File: test_readlines.py From vnpy_crypto with MIT License | 6 votes |
def test_to_jsonl(): # GH9180 df = DataFrame([[1, 2], [1, 2]], columns=['a', 'b']) result = df.to_json(orient="records", lines=True) expected = '{"a":1,"b":2}\n{"a":1,"b":2}' assert result == expected df = DataFrame([["foo}", "bar"], ['foo"', "bar"]], columns=['a', 'b']) result = df.to_json(orient="records", lines=True) expected = '{"a":"foo}","b":"bar"}\n{"a":"foo\\"","b":"bar"}' assert result == expected assert_frame_equal(read_json(result, lines=True), df) # GH15096: escaped characters in columns and data df = DataFrame([["foo\\", "bar"], ['foo"', "bar"]], columns=["a\\", 'b']) result = df.to_json(orient="records", lines=True) expected = ('{"a\\\\":"foo\\\\","b":"bar"}\n' '{"a\\\\":"foo\\"","b":"bar"}') assert result == expected assert_frame_equal(read_json(result, lines=True), df)
Example #16
Source File: test_pandas.py From recruit with Apache License 2.0 | 6 votes |
def test_read_jsonl_unicode_chars(self): # GH15132: non-ascii unicode characters # \u201d == RIGHT DOUBLE QUOTATION MARK # simulate file handle json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n' json = StringIO(json) result = read_json(json, lines=True) expected = DataFrame([[u"foo\u201d", "bar"], ["foo", "bar"]], columns=['a', 'b']) assert_frame_equal(result, expected) # simulate string json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n' result = read_json(json, lines=True) expected = DataFrame([[u"foo\u201d", "bar"], ["foo", "bar"]], columns=['a', 'b']) assert_frame_equal(result, expected)
Example #17
Source File: test_compression.py From recruit with Apache License 2.0 | 5 votes |
def test_read_zipped_json(datapath): uncompressed_path = datapath("io", "json", "data", "tsframe_v012.json") uncompressed_df = pd.read_json(uncompressed_path) compressed_path = datapath("io", "json", "data", "tsframe_v012.json.zip") compressed_df = pd.read_json(compressed_path, compression='zip') assert_frame_equal(uncompressed_df, compressed_df)
Example #18
Source File: test_compression.py From recruit with Apache License 2.0 | 5 votes |
def test_read_unsupported_compression_type(): with tm.ensure_clean() as path: msg = "Unrecognized compression type: unsupported" with pytest.raises(ValueError, match=msg): pd.read_json(path, compression="unsupported")
Example #19
Source File: exploredata.py From MSMARCO with MIT License | 5 votes |
def general_stats_data_public(path): df = pd.read_json(path) query_type_label = {'LOCATION': 0, 'DESCRIPTION':0, 'NUMERIC':0, 'ENTITY':0, 'PERSON':0} total_size = len(df) for row in df.iterrows(): category = row[1]['query_type'] if category in query_type_label: query_type_label[category] += 1 print('Columns:{}'.format(df.columns.values)) print('{} queries'.format(total_size)) print('----query distribution by dataset type----') for key in query_type_label: print(key + ',' + str(query_type_label[key])+ ',' + str(query_type_label[key]/total_size))
Example #20
Source File: test_compression.py From recruit with Apache License 2.0 | 5 votes |
def test_write_unsupported_compression_type(): df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}') with tm.ensure_clean() as path: msg = "Unrecognized compression type: unsupported" with pytest.raises(ValueError, match=msg): df.to_json(path, compression="unsupported")
Example #21
Source File: test_compression.py From recruit with Apache License 2.0 | 5 votes |
def test_lines_with_compression(compression): with tm.ensure_clean() as path: df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}') df.to_json(path, orient='records', lines=True, compression=compression) roundtripped_df = pd.read_json(path, lines=True, compression=compression) assert_frame_equal(df, roundtripped_df)
Example #22
Source File: get_stats_about_length.py From MSMARCO with MIT License | 5 votes |
def main(): file = sys.argv[1] df = pd.read_json(file) queries = {} answers = {} well_formed_answers = {} passages = {} for row in df.iterrows(): queries[row[1]['query']] = 1 for v in row[1]['answers']: answers[v] = 1 for v in row[1]['wellFormedAnswers']: well_formed_answers[v] = 1 for p in row[1]['passages']: passages[p['passage_text']] = 1 data = {'queries' : queries, 'answers' : answers, 'well_formed_answers' : well_formed_answers, 'passages' : passages} for value in data: histogram = {} for v in data[value]: l = len(v.split()) if l in histogram: histogram[l] += 1 else: histogram[l] = 1 compute_stats(histogram, value)
Example #23
Source File: converttowellformed.py From MSMARCO with MIT License | 5 votes |
def makewf(input,output): df = pd.read_json(input) df = df.drop('answers',1) df = df.rename(columns={'wellFormedAnswers':'answers'}) df = df[df.answers != '[]'] df.to_json(output) return
Example #24
Source File: test_compression.py From recruit with Apache License 2.0 | 5 votes |
def test_with_s3_url(compression, s3_resource): # Bucket "pandas-test" created in tests/io/conftest.py df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}') with tm.ensure_clean() as path: df.to_json(path, compression=compression) with open(path, 'rb') as f: s3_resource.Bucket("pandas-test").put_object(Key='test-1', Body=f) roundtripped_df = pd.read_json('s3://pandas-test/test-1', compression=compression) assert_frame_equal(df, roundtripped_df)
Example #25
Source File: test_pandas.py From recruit with Apache License 2.0 | 5 votes |
def test_index_false_from_json_to_json(self, orient, index): # GH25170 # Test index=False in from_json to_json expected = DataFrame({'a': [1, 2], 'b': [3, 4]}) dfjson = expected.to_json(orient=orient, index=index) result = read_json(dfjson, orient=orient) assert_frame_equal(result, expected)
Example #26
Source File: test_compression.py From recruit with Apache License 2.0 | 5 votes |
def test_compression_roundtrip(compression): df = pd.DataFrame([[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]], index=['A', 'B'], columns=['X', 'Y', 'Z']) with tm.ensure_clean() as path: df.to_json(path, compression=compression) assert_frame_equal(df, pd.read_json(path, compression=compression)) # explicitly ensure file was compressed. with tm.decompress_file(path, compression) as fh: result = fh.read().decode('utf8') assert_frame_equal(df, pd.read_json(result))
Example #27
Source File: test_json_table_schema.py From recruit with Apache License 2.0 | 5 votes |
def test_empty_frame_roundtrip(self, strict_check): # GH 21287 df = pd.DataFrame([], columns=['a', 'b', 'c']) expected = df.copy() out = df.to_json(orient='table') result = pd.read_json(out, orient='table') # TODO: When DF coercion issue (#21345) is resolved tighten type checks tm.assert_frame_equal(expected, result, check_dtype=strict_check, check_index_type=strict_check)
Example #28
Source File: test_json_table_schema.py From recruit with Apache License 2.0 | 5 votes |
def test_read_json_table_orient_raises(self, index_nm, vals, recwarn): df = DataFrame(vals, index=pd.Index(range(4), name=index_nm)) out = df.to_json(orient="table") with pytest.raises(NotImplementedError, match='can not yet read '): pd.read_json(out, orient="table")
Example #29
Source File: test_json_table_schema.py From recruit with Apache License 2.0 | 5 votes |
def test_read_json_table_orient(self, index_nm, vals, recwarn): df = DataFrame(vals, index=pd.Index(range(4), name=index_nm)) out = df.to_json(orient="table") result = pd.read_json(out, orient="table") tm.assert_frame_equal(df, result)
Example #30
Source File: test_pandas.py From recruit with Apache License 2.0 | 5 votes |
def test_read_inline_jsonl(self): # GH9180 result = read_json('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n', lines=True) expected = DataFrame([[1, 2], [1, 2]], columns=['a', 'b']) assert_frame_equal(result, expected)