Python pandas.DataFrame.from_dict() Examples
The following are 30
code examples of pandas.DataFrame.from_dict().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
pandas.DataFrame
, or try the search function
.
Example #1
Source File: test_convert_to.py From recruit with Apache License 2.0 | 13 votes |
def test_to_dict_index_dtypes(self, into, expected): # GH 18580 # When using to_dict(orient='index') on a dataframe with int # and float columns only the int columns were cast to float df = DataFrame({'int_col': [1, 2, 3], 'float_col': [1.0, 2.0, 3.0]}) result = df.to_dict(orient='index', into=into) cols = ['int_col', 'float_col'] result = DataFrame.from_dict(result, orient='index')[cols] expected = DataFrame.from_dict(expected, orient='index')[cols] tm.assert_frame_equal(result, expected)
Example #2
Source File: test_excel.py From vnpy_crypto with MIT License | 11 votes |
def test_reader_converters(self, ext): basename = 'test_converters' expected = DataFrame.from_dict(OrderedDict([ ("IntCol", [1, 2, -3, -1000, 0]), ("FloatCol", [12.5, np.nan, 18.3, 19.2, 0.000000005]), ("BoolCol", ['Found', 'Found', 'Found', 'Not found', 'Found']), ("StrCol", ['1', np.nan, '3', '4', '5']), ])) converters = {'IntCol': lambda x: int(x) if x != '' else -1000, 'FloatCol': lambda x: 10 * x if x else np.nan, 2: lambda x: 'Found' if x != '' else 'Not found', 3: lambda x: str(x) if x else '', } # should read in correctly and set types of single cells (not array # dtypes) actual = self.get_exceldf(basename, ext, 'Sheet1', converters=converters) tm.assert_frame_equal(actual, expected)
Example #3
Source File: test_excel.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 10 votes |
def test_reader_seconds(self, ext): # Test reading times with and without milliseconds. GH5945. expected = DataFrame.from_dict({"Time": [time(1, 2, 3), time(2, 45, 56, 100000), time(4, 29, 49, 200000), time(6, 13, 42, 300000), time(7, 57, 35, 400000), time(9, 41, 28, 500000), time(11, 25, 21, 600000), time(13, 9, 14, 700000), time(14, 53, 7, 800000), time(16, 37, 0, 900000), time(18, 20, 54)]}) actual = self.get_exceldf('times_1900', ext, 'Sheet1') tm.assert_frame_equal(actual, expected) actual = self.get_exceldf('times_1904', ext, 'Sheet1') tm.assert_frame_equal(actual, expected)
Example #4
Source File: account.py From ctpbee with MIT License | 7 votes |
def result(self): # 根据daily_life里面的数据 获取最后的结果 result = defaultdict(list) for daily in self.daily_life.values(): for key, value in daily.items(): result[key].append(value) df = DataFrame.from_dict(result).set_index("date") try: import matplotlib.pyplot as plt df['balance'].plot() plt.show() except ImportError as e: pass finally: return self._cal_result(df)
Example #5
Source File: test_excel.py From recruit with Apache License 2.0 | 6 votes |
def test_reader_seconds(self, ext): # Test reading times with and without milliseconds. GH5945. expected = DataFrame.from_dict({"Time": [time(1, 2, 3), time(2, 45, 56, 100000), time(4, 29, 49, 200000), time(6, 13, 42, 300000), time(7, 57, 35, 400000), time(9, 41, 28, 500000), time(11, 25, 21, 600000), time(13, 9, 14, 700000), time(14, 53, 7, 800000), time(16, 37, 0, 900000), time(18, 20, 54)]}) actual = self.get_exceldf('times_1900', ext, 'Sheet1') tm.assert_frame_equal(actual, expected) actual = self.get_exceldf('times_1904', ext, 'Sheet1') tm.assert_frame_equal(actual, expected)
Example #6
Source File: encoder.py From sk-dist with Apache License 2.0 | 5 votes |
def _process_input(self, X, fit=True): """ Converts flexible intput type into pandas DataFrame. Handles pandas DataFrame, dictionary, pyspark DataFrame, or numpy ndarray. """ if isinstance(X, DataFrame): out = X elif isinstance(X, dict): try: out = DataFrame.from_dict( X, orient="columns") except: raise ValueError("Cannot parse input") elif isinstance(X, np.ndarray) or isinstance(X, list): if fit and self.col_names is None: raise ValueError( "Must supply col_names with numpy array input") elif fit: out = DataFrame(X, columns=self.col_names) else: out = DataFrame(X, columns=self.fields_) else: from pyspark.sql import DataFrame as SparkDataFrame if isinstance(X, SparkDataFrame): out = X.toPandas() else: raise ValueError("Cannot parse input") if fit: self.fields_ = list(out.columns) return out
Example #7
Source File: solver.py From pastas with MIT License | 5 votes |
def get_realizations(self, func, n=None, name=None, **kwargs): """Internal method to obtain n number of realizations.""" if name: kwargs["name"] = name params = self.get_parameter_sample(n=n, name=name) data = {} for i, param in enumerate(params): data[i] = func(parameters=param, **kwargs) return DataFrame.from_dict(data, orient="columns")
Example #8
Source File: read.py From anndata with BSD 3-Clause "New" or "Revised" License | 5 votes |
def read_umi_tools(filename: PathLike, dtype: str = "float32") -> AnnData: """\ Read a gzipped condensed count matrix from umi_tools. Parameters ---------- filename File name to read from. """ # import pandas for conversion of a dict of dicts into a matrix # import gzip to read a gzipped file :-) import gzip from pandas import DataFrame dod = {} # this will contain basically everything fh = gzip.open(fspath(filename)) header = fh.readline() # read the first line for line in fh: # gzip read bytes, hence the decoding t = line.decode("ascii").split("\t") try: dod[t[1]].update({t[0]: int(t[2])}) except KeyError: dod[t[1]] = {t[0]: int(t[2])} df = DataFrame.from_dict(dod, orient="index") # build the matrix df.fillna(value=0.0, inplace=True) # many NaN, replace with zeros return AnnData( np.array(df), dict(obs_names=df.index), dict(var_names=df.columns), dtype=dtype, )
Example #9
Source File: annotation_database_parser.py From HistomicsTK with Apache License 2.0 | 5 votes |
def _add_item_to_sqlite(dbcon, item): # modify item info to prep for appending to sqlite table item_info = copy.deepcopy(item) item_info['largeImage'] = str(item_info['largeImage']) item_info_dtypes = { '_id': String(), '_modelType': String(), 'baseParentId': String(), 'baseParentType': String(), 'copyOfItem': String(), 'created': String(), 'creatorId': String(), 'description': String(), 'folderId': String(), 'largeImage': String(), 'name': String(), 'size': Integer(), 'updated': String(), } # in case anything is not in the schema, drop it item_info = { k: v for k, v in item_info.items() if k in item_info_dtypes.keys()} # convert to df and add to items table item_info_df = DataFrame.from_dict(item_info, orient='index').T item_info_df.to_sql( name='items', con=dbcon, if_exists='append', dtype=item_info_dtypes, index=False)
Example #10
Source File: annotation_database_parser.py From HistomicsTK with Apache License 2.0 | 5 votes |
def _add_folder_to_sqlite(dbcon, folder_info): # modify folder info to prep for appending to sqlite table folder_info_dtypes = { '_accessLevel': Integer(), '_id': String(), '_modelType': String(), 'baseParentId': String(), 'baseParentType': String(), 'created': String(), 'creatorId': String(), 'description': String(), 'name': String(), 'parentCollection': String(), 'parentId': String(), 'public': Boolean(), 'size': Integer(), 'updated': String(), 'folder_path': String(), } # in case anything is not in the schema, drop it folder_info = { k: v for k, v in folder_info.items() if k in folder_info_dtypes.keys()} # convert to df and add to items table folder_info_df = DataFrame.from_dict(folder_info, orient='index').T folder_info_df.to_sql( name='folders', con=dbcon, if_exists='append', dtype=folder_info_dtypes, index=False)
Example #11
Source File: load.py From bifacial_radiance with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _exportTrackerDict(trackerdict, savefile, reindex): """ Save a TrackerDict output as a ``.csv`` file. Parameters ---------- trackerdict : Dictionary The tracker dictionary to save savefile : str Path to .csv save file location reindex : bool Boolean indicating if trackerdict should be resampled to include all 8760 hours in the year (even those when the sun is not up and irradiance results is empty). """ from pandas import DataFrame as df import numpy as np import pandas as pd # convert trackerdict into dataframe d = df.from_dict(trackerdict,orient='index',columns=['dhi','ghi','Wm2Back','Wm2Front','theta','surf_tilt','surf_azm','ground_clearance']) d['Wm2BackAvg'] = [np.nanmean(i) for i in d['Wm2Back']] d['Wm2FrontAvg'] = [np.nanmean(i) for i in d['Wm2Front']] d['BifiRatio'] = d['Wm2BackAvg'] / d['Wm2FrontAvg'] if reindex is True: # change to proper timestamp and interpolate to get 8760 output d['measdatetime'] = d.index d=d.set_index(pd.to_datetime(d['measdatetime'] , format='%m_%d_%H')) d=d.resample('H').asfreq() d.to_csv(savefile)
Example #12
Source File: test_multilevel.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def test_frame_dict_constructor_empty_series(self): s1 = Series([ 1, 2, 3, 4 ], index=MultiIndex.from_tuples([(1, 2), (1, 3), (2, 2), (2, 4)])) s2 = Series([ 1, 2, 3, 4 ], index=MultiIndex.from_tuples([(1, 2), (1, 3), (3, 2), (3, 4)])) s3 = Series() # it works! DataFrame({'foo': s1, 'bar': s2, 'baz': s3}) DataFrame.from_dict({'foo': s1, 'baz': s3, 'bar': s2})
Example #13
Source File: test_merge.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def test_merge_nosort(self): # #2098, anything to do? from datetime import datetime d = {"var1": np.random.randint(0, 10, size=10), "var2": np.random.randint(0, 10, size=10), "var3": [datetime(2012, 1, 12), datetime(2011, 2, 4), datetime( 2010, 2, 3), datetime(2012, 1, 12), datetime( 2011, 2, 4), datetime(2012, 4, 3), datetime( 2012, 3, 4), datetime(2008, 5, 1), datetime(2010, 2, 3), datetime(2012, 2, 3)]} df = DataFrame.from_dict(d) var3 = df.var3.unique() var3.sort() new = DataFrame.from_dict({"var3": var3, "var8": np.random.random(7)}) result = df.merge(new, on="var3", sort=False) exp = merge(df, new, on='var3', sort=False) assert_frame_equal(result, exp) assert (df.var3.unique() == result.var3.unique()).all()
Example #14
Source File: test_multilevel.py From coffeegrindsize with MIT License | 5 votes |
def test_frame_dict_constructor_empty_series(self): s1 = Series([ 1, 2, 3, 4 ], index=MultiIndex.from_tuples([(1, 2), (1, 3), (2, 2), (2, 4)])) s2 = Series([ 1, 2, 3, 4 ], index=MultiIndex.from_tuples([(1, 2), (1, 3), (3, 2), (3, 4)])) s3 = Series() # it works! DataFrame({'foo': s1, 'bar': s2, 'baz': s3}) DataFrame.from_dict({'foo': s1, 'baz': s3, 'bar': s2})
Example #15
Source File: test_reshape.py From coffeegrindsize with MIT License | 5 votes |
def test_get_dummies_dont_sparsify_all_columns(self, sparse): # GH18914 df = DataFrame.from_dict(OrderedDict([('GDP', [1, 2]), ('Nation', ['AB', 'CD'])])) df = get_dummies(df, columns=['Nation'], sparse=sparse) df2 = df.reindex(columns=['GDP']) tm.assert_frame_equal(df[['GDP']], df2)
Example #16
Source File: test_reshape.py From recruit with Apache License 2.0 | 5 votes |
def test_get_dummies_dont_sparsify_all_columns(self, sparse): # GH18914 df = DataFrame.from_dict(OrderedDict([('GDP', [1, 2]), ('Nation', ['AB', 'CD'])])) df = get_dummies(df, columns=['Nation'], sparse=sparse) df2 = df.reindex(columns=['GDP']) tm.assert_frame_equal(df[['GDP']], df2)
Example #17
Source File: _posthocs.py From scikit-posthocs with BSD 3-Clause "New" or "Revised" License | 5 votes |
def __convert_to_block_df(a, y_col=None, group_col=None, block_col=None, melted=False): if melted and not all([i is not None for i in [block_col, group_col, y_col]]): raise ValueError('`block_col`, `group_col`, `y_col` should be explicitly specified if using melted data') if isinstance(a, DataFrame) and not melted: x = a.copy(deep=True) group_col = 'groups' block_col = 'blocks' y_col = 'y' x.columns.name = group_col x.index.name = block_col x = x.reset_index().melt(id_vars=block_col, var_name=group_col, value_name=y_col) elif isinstance(a, DataFrame) and melted: x = DataFrame.from_dict({'groups': a[group_col], 'blocks': a[block_col], 'y': a[y_col]}) elif not isinstance(a, DataFrame): x = np.array(a) x = DataFrame(x, index=np.arange(x.shape[0]), columns=np.arange(x.shape[1])) if not melted: group_col = 'groups' block_col = 'blocks' y_col = 'y' x.columns.name = group_col x.index.name = block_col x = x.reset_index().melt(id_vars=block_col, var_name=group_col, value_name=y_col) else: x.rename(columns={group_col: 'groups', block_col: 'blocks', y_col: 'y'}, inplace=True) group_col = 'groups' block_col = 'blocks' y_col = 'y' return x, 'y', 'groups', 'blocks'
Example #18
Source File: simpletable.py From pyphot with MIT License | 5 votes |
def to_pandas(self, **kwargs): """ Construct a pandas dataframe Parameters ---------- data : ndarray (structured dtype), list of tuples, dict, or DataFrame keys: sequence, optional ordered subset of columns to export index : string, list of fields, array-like Field of array to use as the index, alternately a specific set of input labels to use exclude : sequence, default None Columns or fields to exclude columns : sequence, default None Column names to use. If the passed data do not have names associated with them, this argument provides names for the columns. Otherwise this argument indicates the order of the columns in the result (any names not found in the data will become all-NA columns) coerce_float : boolean, default False Attempt to convert values to non-string, non-numeric objects (like decimal.Decimal) to floating point, useful for SQL result sets Returns ------- df : DataFrame """ try: from pandas import DataFrame keys = kwargs.pop('keys', None) return DataFrame.from_dict(self.to_dict(keys=keys), **kwargs) except ImportError as error: print("Pandas import error") raise error
Example #19
Source File: simpletable.py From pyphot with MIT License | 5 votes |
def to_pandas(self, **kwargs): """ Construct a pandas dataframe Parameters ---------- data : ndarray (structured dtype), list of tuples, dict, or DataFrame keys: sequence, optional ordered subset of columns to export index : string, list of fields, array-like Field of array to use as the index, alternately a specific set of input labels to use exclude : sequence, default None Columns or fields to exclude columns : sequence, default None Column names to use. If the passed data do not have names associated with them, this argument provides names for the columns. Otherwise this argument indicates the order of the columns in the result (any names not found in the data will become all-NA columns) coerce_float : boolean, default False Attempt to convert values to non-string, non-numeric objects (like decimal.Decimal) to floating point, useful for SQL result sets Returns ------- df : DataFrame """ try: from pandas import DataFrame keys = kwargs.pop('keys', None) return DataFrame.from_dict(self.to_dict(keys=keys), **kwargs) except ImportError as error: print("Pandas import error") raise error
Example #20
Source File: combine-sqm-tables.py From SqueezeMeta with GNU General Public License v3.0 | 5 votes |
def write_feature_dict(sampleNames, featureDict, outName): df = DataFrame.from_dict(featureDict).fillna(0) df = df.sort_index() df = df[sampleNames] df.to_csv(outName, sep='\t')
Example #21
Source File: test_multilevel.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_frame_dict_constructor_empty_series(self): s1 = Series([ 1, 2, 3, 4 ], index=MultiIndex.from_tuples([(1, 2), (1, 3), (2, 2), (2, 4)])) s2 = Series([ 1, 2, 3, 4 ], index=MultiIndex.from_tuples([(1, 2), (1, 3), (3, 2), (3, 4)])) s3 = Series() # it works! DataFrame({'foo': s1, 'bar': s2, 'baz': s3}) DataFrame.from_dict({'foo': s1, 'baz': s3, 'bar': s2})
Example #22
Source File: test_reshape.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_get_dummies_dont_sparsify_all_columns(self, sparse): # GH18914 df = DataFrame.from_dict(OrderedDict([('GDP', [1, 2]), ('Nation', ['AB', 'CD'])])) df = get_dummies(df, columns=['Nation'], sparse=sparse) df2 = df.reindex(columns=['GDP']) tm.assert_frame_equal(df[['GDP']], df2)
Example #23
Source File: common.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_scientific_no_exponent(self): # see gh-12215 df = DataFrame.from_dict(OrderedDict([('w', ['2e']), ('x', ['3E']), ('y', ['42e']), ('z', ['632E'])])) data = df.to_csv(index=False) for prec in self.float_precision_choices: df_roundtrip = self.read_csv( StringIO(data), float_precision=prec) tm.assert_frame_equal(df_roundtrip, df)
Example #24
Source File: test_common.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def test_scientific_no_exponent(all_parsers): # see gh-12215 df = DataFrame.from_dict(OrderedDict([("w", ["2e"]), ("x", ["3E"]), ("y", ["42e"]), ("z", ["632E"])])) data = df.to_csv(index=False) parser = all_parsers for precision in parser.float_precision_choices: df_roundtrip = parser.read_csv(StringIO(data), float_precision=precision) tm.assert_frame_equal(df_roundtrip, df)
Example #25
Source File: test_multilevel.py From recruit with Apache License 2.0 | 5 votes |
def test_frame_dict_constructor_empty_series(self): s1 = Series([ 1, 2, 3, 4 ], index=MultiIndex.from_tuples([(1, 2), (1, 3), (2, 2), (2, 4)])) s2 = Series([ 1, 2, 3, 4 ], index=MultiIndex.from_tuples([(1, 2), (1, 3), (3, 2), (3, 4)])) s3 = Series() # it works! DataFrame({'foo': s1, 'bar': s2, 'baz': s3}) DataFrame.from_dict({'foo': s1, 'baz': s3, 'bar': s2})
Example #26
Source File: test_common.py From recruit with Apache License 2.0 | 5 votes |
def test_scientific_no_exponent(all_parsers): # see gh-12215 df = DataFrame.from_dict(OrderedDict([("w", ["2e"]), ("x", ["3E"]), ("y", ["42e"]), ("z", ["632E"])])) data = df.to_csv(index=False) parser = all_parsers for precision in parser.float_precision_choices: df_roundtrip = parser.read_csv(StringIO(data), float_precision=precision) tm.assert_frame_equal(df_roundtrip, df)
Example #27
Source File: test_multilevel.py From vnpy_crypto with MIT License | 5 votes |
def test_frame_dict_constructor_empty_series(self): s1 = Series([ 1, 2, 3, 4 ], index=MultiIndex.from_tuples([(1, 2), (1, 3), (2, 2), (2, 4)])) s2 = Series([ 1, 2, 3, 4 ], index=MultiIndex.from_tuples([(1, 2), (1, 3), (3, 2), (3, 4)])) s3 = Series() # it works! DataFrame({'foo': s1, 'bar': s2, 'baz': s3}) DataFrame.from_dict({'foo': s1, 'baz': s3, 'bar': s2})
Example #28
Source File: test_reshape.py From vnpy_crypto with MIT License | 5 votes |
def test_get_dummies_dont_sparsify_all_columns(self, sparse): # GH18914 df = DataFrame.from_dict(OrderedDict([('GDP', [1, 2]), ('Nation', ['AB', 'CD'])])) df = get_dummies(df, columns=['Nation'], sparse=sparse) df2 = df.reindex(columns=['GDP']) tm.assert_frame_equal(df[['GDP']], df2)
Example #29
Source File: common.py From vnpy_crypto with MIT License | 5 votes |
def test_scientific_no_exponent(self): # see gh-12215 df = DataFrame.from_dict(OrderedDict([('w', ['2e']), ('x', ['3E']), ('y', ['42e']), ('z', ['632E'])])) data = df.to_csv(index=False) for prec in self.float_precision_choices: df_roundtrip = self.read_csv( StringIO(data), float_precision=prec) tm.assert_frame_equal(df_roundtrip, df)
Example #30
Source File: test_merge.py From Computable with MIT License | 5 votes |
def test_merge_nosort(self): # #2098, anything to do? from datetime import datetime d = {"var1": np.random.randint(0, 10, size=10), "var2": np.random.randint(0, 10, size=10), "var3": [datetime(2012, 1, 12), datetime(2011, 2, 4), datetime( 2010, 2, 3), datetime(2012, 1, 12), datetime( 2011, 2, 4), datetime(2012, 4, 3), datetime( 2012, 3, 4), datetime(2008, 5, 1), datetime(2010, 2, 3), datetime(2012, 2, 3)]} df = DataFrame.from_dict(d) var3 = df.var3.unique() var3.sort() new = DataFrame.from_dict({"var3": var3, "var8": np.random.random(7)}) result = df.merge(new, on="var3", sort=False) exp = merge(df, new, on='var3', sort=False) assert_frame_equal(result, exp) self.assert_((df.var3.unique() == result.var3.unique()).all())