Python pandas.core.frame.DataFrame.from_dict() Examples
The following are 12
code examples of pandas.core.frame.DataFrame.from_dict().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
pandas.core.frame.DataFrame
, or try the search function
.
Example #1
Source File: test_stata.py From recruit with Apache License 2.0 | 5 votes |
def test_categorical_order(self, file): # Directly construct using expected codes # Format is is_cat, col_name, labels (in order), underlying data expected = [(True, 'ordered', ['a', 'b', 'c', 'd', 'e'], np.arange(5)), (True, 'reverse', ['a', 'b', 'c', 'd', 'e'], np.arange(5)[::-1]), (True, 'noorder', ['a', 'b', 'c', 'd', 'e'], np.array([2, 1, 4, 0, 3])), (True, 'floating', [ 'a', 'b', 'c', 'd', 'e'], np.arange(0, 5)), (True, 'float_missing', [ 'a', 'd', 'e'], np.array([0, 1, 2, -1, -1])), (False, 'nolabel', [ 1.0, 2.0, 3.0, 4.0, 5.0], np.arange(5)), (True, 'int32_mixed', ['d', 2, 'e', 'b', 'a'], np.arange(5))] cols = [] for is_cat, col, labels, codes in expected: if is_cat: cols.append((col, pd.Categorical.from_codes(codes, labels))) else: cols.append((col, pd.Series(labels, dtype=np.float32))) expected = DataFrame.from_dict(OrderedDict(cols)) # Read with and with out categoricals, ensure order is identical file = getattr(self, file) parsed = read_stata(file) tm.assert_frame_equal(expected, parsed, check_categorical=False) # Check identity of codes for col in expected: if is_categorical_dtype(expected[col]): tm.assert_series_equal(expected[col].cat.codes, parsed[col].cat.codes) tm.assert_index_equal(expected[col].cat.categories, parsed[col].cat.categories)
Example #2
Source File: stata.py From recruit with Apache License 2.0 | 5 votes |
def _do_convert_categoricals(self, data, value_label_dict, lbllist, order_categoricals): """ Converts categorical columns to Categorical type. """ value_labels = list(compat.iterkeys(value_label_dict)) cat_converted_data = [] for col, label in zip(data, lbllist): if label in value_labels: # Explicit call with ordered=True cat_data = Categorical(data[col], ordered=order_categoricals) categories = [] for category in cat_data.categories: if category in value_label_dict[label]: categories.append(value_label_dict[label][category]) else: categories.append(category) # Partially labeled try: cat_data.categories = categories except ValueError: vc = Series(categories).value_counts() repeats = list(vc.index[vc > 1]) repeats = '\n' + '-' * 80 + '\n'.join(repeats) raise ValueError('Value labels for column {col} are not ' 'unique. The repeated labels are:\n' '{repeats}' .format(col=col, repeats=repeats)) # TODO: is the next line needed above in the data(...) method? cat_data = Series(cat_data, index=data.index) cat_converted_data.append((col, cat_data)) else: cat_converted_data.append((col, data[col])) data = DataFrame.from_dict(OrderedDict(cat_converted_data)) return data
Example #3
Source File: stata.py From recruit with Apache License 2.0 | 5 votes |
def _prepare_categoricals(self, data): """Check for categorical columns, retain categorical information for Stata file and convert categorical data to int""" is_cat = [is_categorical_dtype(data[col]) for col in data] self._is_col_cat = is_cat self._value_labels = [] if not any(is_cat): return data get_base_missing_value = StataMissingValue.get_base_missing_value data_formatted = [] for col, col_is_cat in zip(data, is_cat): if col_is_cat: self._value_labels.append(StataValueLabel(data[col])) dtype = data[col].cat.codes.dtype if dtype == np.int64: raise ValueError('It is not possible to export ' 'int64-based categorical data to Stata.') values = data[col].cat.codes.values.copy() # Upcast if needed so that correct missing values can be set if values.max() >= get_base_missing_value(dtype): if dtype == np.int8: dtype = np.int16 elif dtype == np.int16: dtype = np.int32 else: dtype = np.float64 values = np.array(values, dtype=dtype) # Replace missing values with Stata missing value for type values[values == -1] = get_base_missing_value(dtype) data_formatted.append((col, values)) else: data_formatted.append((col, data[col])) return DataFrame.from_dict(OrderedDict(data_formatted))
Example #4
Source File: test_stata.py From vnpy_crypto with MIT License | 5 votes |
def test_categorical_order(self, file): # Directly construct using expected codes # Format is is_cat, col_name, labels (in order), underlying data expected = [(True, 'ordered', ['a', 'b', 'c', 'd', 'e'], np.arange(5)), (True, 'reverse', ['a', 'b', 'c', 'd', 'e'], np.arange(5)[::-1]), (True, 'noorder', ['a', 'b', 'c', 'd', 'e'], np.array([2, 1, 4, 0, 3])), (True, 'floating', [ 'a', 'b', 'c', 'd', 'e'], np.arange(0, 5)), (True, 'float_missing', [ 'a', 'd', 'e'], np.array([0, 1, 2, -1, -1])), (False, 'nolabel', [ 1.0, 2.0, 3.0, 4.0, 5.0], np.arange(5)), (True, 'int32_mixed', ['d', 2, 'e', 'b', 'a'], np.arange(5))] cols = [] for is_cat, col, labels, codes in expected: if is_cat: cols.append((col, pd.Categorical.from_codes(codes, labels))) else: cols.append((col, pd.Series(labels, dtype=np.float32))) expected = DataFrame.from_dict(OrderedDict(cols)) # Read with and with out categoricals, ensure order is identical file = getattr(self, file) parsed = read_stata(file) tm.assert_frame_equal(expected, parsed, check_categorical=False) # Check identity of codes for col in expected: if is_categorical_dtype(expected[col]): tm.assert_series_equal(expected[col].cat.codes, parsed[col].cat.codes) tm.assert_index_equal(expected[col].cat.categories, parsed[col].cat.categories)
Example #5
Source File: stata.py From vnpy_crypto with MIT License | 5 votes |
def _do_convert_categoricals(self, data, value_label_dict, lbllist, order_categoricals): """ Converts categorical columns to Categorical type. """ value_labels = list(compat.iterkeys(value_label_dict)) cat_converted_data = [] for col, label in zip(data, lbllist): if label in value_labels: # Explicit call with ordered=True cat_data = Categorical(data[col], ordered=order_categoricals) categories = [] for category in cat_data.categories: if category in value_label_dict[label]: categories.append(value_label_dict[label][category]) else: categories.append(category) # Partially labeled try: cat_data.categories = categories except ValueError: vc = Series(categories).value_counts() repeats = list(vc.index[vc > 1]) repeats = '\n' + '-' * 80 + '\n'.join(repeats) msg = 'Value labels for column {0} are not unique. The ' \ 'repeated labels are:\n{1}'.format(col, repeats) raise ValueError(msg) # TODO: is the next line needed above in the data(...) method? cat_data = Series(cat_data, index=data.index) cat_converted_data.append((col, cat_data)) else: cat_converted_data.append((col, data[col])) data = DataFrame.from_dict(OrderedDict(cat_converted_data)) return data
Example #6
Source File: stata.py From vnpy_crypto with MIT License | 5 votes |
def _prepare_categoricals(self, data): """Check for categorical columns, retain categorical information for Stata file and convert categorical data to int""" is_cat = [is_categorical_dtype(data[col]) for col in data] self._is_col_cat = is_cat self._value_labels = [] if not any(is_cat): return data get_base_missing_value = StataMissingValue.get_base_missing_value data_formatted = [] for col, col_is_cat in zip(data, is_cat): if col_is_cat: self._value_labels.append(StataValueLabel(data[col])) dtype = data[col].cat.codes.dtype if dtype == np.int64: raise ValueError('It is not possible to export ' 'int64-based categorical data to Stata.') values = data[col].cat.codes.values.copy() # Upcast if needed so that correct missing values can be set if values.max() >= get_base_missing_value(dtype): if dtype == np.int8: dtype = np.int16 elif dtype == np.int16: dtype = np.int32 else: dtype = np.float64 values = np.array(values, dtype=dtype) # Replace missing values with Stata missing value for type values[values == -1] = get_base_missing_value(dtype) data_formatted.append((col, values)) else: data_formatted.append((col, data[col])) return DataFrame.from_dict(OrderedDict(data_formatted))
Example #7
Source File: categorical.py From Computable with MIT License | 5 votes |
def describe(self): """ Returns a dataframe with frequency and counts by level. """ # Hack? from pandas.core.frame import DataFrame grouped = DataFrame(self.labels).groupby(0) counts = grouped.count().values.squeeze() freqs = counts / float(counts.sum()) return DataFrame.from_dict({ 'counts': counts, 'freqs': freqs, 'levels': self.levels }).set_index('levels')
Example #8
Source File: test_stata.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def test_categorical_order(self, file): # Directly construct using expected codes # Format is is_cat, col_name, labels (in order), underlying data expected = [(True, 'ordered', ['a', 'b', 'c', 'd', 'e'], np.arange(5)), (True, 'reverse', ['a', 'b', 'c', 'd', 'e'], np.arange(5)[::-1]), (True, 'noorder', ['a', 'b', 'c', 'd', 'e'], np.array([2, 1, 4, 0, 3])), (True, 'floating', [ 'a', 'b', 'c', 'd', 'e'], np.arange(0, 5)), (True, 'float_missing', [ 'a', 'd', 'e'], np.array([0, 1, 2, -1, -1])), (False, 'nolabel', [ 1.0, 2.0, 3.0, 4.0, 5.0], np.arange(5)), (True, 'int32_mixed', ['d', 2, 'e', 'b', 'a'], np.arange(5))] cols = [] for is_cat, col, labels, codes in expected: if is_cat: cols.append((col, pd.Categorical.from_codes(codes, labels))) else: cols.append((col, pd.Series(labels, dtype=np.float32))) expected = DataFrame.from_dict(OrderedDict(cols)) # Read with and with out categoricals, ensure order is identical file = getattr(self, file) parsed = read_stata(file) tm.assert_frame_equal(expected, parsed, check_categorical=False) # Check identity of codes for col in expected: if is_categorical_dtype(expected[col]): tm.assert_series_equal(expected[col].cat.codes, parsed[col].cat.codes) tm.assert_index_equal(expected[col].cat.categories, parsed[col].cat.categories)
Example #9
Source File: stata.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def _do_convert_categoricals(self, data, value_label_dict, lbllist, order_categoricals): """ Converts categorical columns to Categorical type. """ value_labels = list(compat.iterkeys(value_label_dict)) cat_converted_data = [] for col, label in zip(data, lbllist): if label in value_labels: # Explicit call with ordered=True cat_data = Categorical(data[col], ordered=order_categoricals) categories = [] for category in cat_data.categories: if category in value_label_dict[label]: categories.append(value_label_dict[label][category]) else: categories.append(category) # Partially labeled try: cat_data.categories = categories except ValueError: vc = Series(categories).value_counts() repeats = list(vc.index[vc > 1]) repeats = '\n' + '-' * 80 + '\n'.join(repeats) raise ValueError('Value labels for column {col} are not ' 'unique. The repeated labels are:\n' '{repeats}' .format(col=col, repeats=repeats)) # TODO: is the next line needed above in the data(...) method? cat_data = Series(cat_data, index=data.index) cat_converted_data.append((col, cat_data)) else: cat_converted_data.append((col, data[col])) data = DataFrame.from_dict(OrderedDict(cat_converted_data)) return data
Example #10
Source File: stata.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def _prepare_categoricals(self, data): """Check for categorical columns, retain categorical information for Stata file and convert categorical data to int""" is_cat = [is_categorical_dtype(data[col]) for col in data] self._is_col_cat = is_cat self._value_labels = [] if not any(is_cat): return data get_base_missing_value = StataMissingValue.get_base_missing_value data_formatted = [] for col, col_is_cat in zip(data, is_cat): if col_is_cat: self._value_labels.append(StataValueLabel(data[col])) dtype = data[col].cat.codes.dtype if dtype == np.int64: raise ValueError('It is not possible to export ' 'int64-based categorical data to Stata.') values = data[col].cat.codes.values.copy() # Upcast if needed so that correct missing values can be set if values.max() >= get_base_missing_value(dtype): if dtype == np.int8: dtype = np.int16 elif dtype == np.int16: dtype = np.int32 else: dtype = np.float64 values = np.array(values, dtype=dtype) # Replace missing values with Stata missing value for type values[values == -1] = get_base_missing_value(dtype) data_formatted.append((col, values)) else: data_formatted.append((col, data[col])) return DataFrame.from_dict(OrderedDict(data_formatted))
Example #11
Source File: pandas2ri.py From rpy2 with GNU General Public License v2.0 | 5 votes |
def rpy2py_dataframe(obj): items = OrderedDict((k, rpy2py(v) if isinstance(v, Sexp) else v) for k, v in obj.items()) res = PandasDataFrame.from_dict(items) res.index = obj.rownames return res
Example #12
Source File: test_stata.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_categorical_order(self, file): # Directly construct using expected codes # Format is is_cat, col_name, labels (in order), underlying data expected = [(True, 'ordered', ['a', 'b', 'c', 'd', 'e'], np.arange(5)), (True, 'reverse', ['a', 'b', 'c', 'd', 'e'], np.arange(5)[::-1]), (True, 'noorder', ['a', 'b', 'c', 'd', 'e'], np.array([2, 1, 4, 0, 3])), (True, 'floating', [ 'a', 'b', 'c', 'd', 'e'], np.arange(0, 5)), (True, 'float_missing', [ 'a', 'd', 'e'], np.array([0, 1, 2, -1, -1])), (False, 'nolabel', [ 1.0, 2.0, 3.0, 4.0, 5.0], np.arange(5)), (True, 'int32_mixed', ['d', 2, 'e', 'b', 'a'], np.arange(5))] cols = [] for is_cat, col, labels, codes in expected: if is_cat: cols.append((col, pd.Categorical.from_codes(codes, labels))) else: cols.append((col, pd.Series(labels, dtype=np.float32))) expected = DataFrame.from_dict(OrderedDict(cols)) # Read with and with out categoricals, ensure order is identical file = getattr(self, file) parsed = read_stata(file) tm.assert_frame_equal(expected, parsed, check_categorical=False) # Check identity of codes for col in expected: if is_categorical_dtype(expected[col]): tm.assert_series_equal(expected[col].cat.codes, parsed[col].cat.codes) tm.assert_index_equal(expected[col].cat.categories, parsed[col].cat.categories)