Python pandas.SparseDtype() Examples
The following are 27
code examples of pandas.SparseDtype().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
pandas
, or try the search function
.
Example #1
Source File: filtering_fe_autotype.py From dash-docs with MIT License | 6 votes |
def table_type(df_column): # Note - this only works with Pandas >= 1.0.0 if sys.version_info < (3, 0): # Pandas 1.0.0 does not support Python 2 return 'any' if isinstance(df_column.dtype, pd.DatetimeTZDtype): return 'datetime', elif (isinstance(df_column.dtype, pd.StringDtype) or isinstance(df_column.dtype, pd.BooleanDtype) or isinstance(df_column.dtype, pd.CategoricalDtype) or isinstance(df_column.dtype, pd.PeriodDtype)): return 'text' elif (isinstance(df_column.dtype, pd.SparseDtype) or isinstance(df_column.dtype, pd.IntervalDtype) or isinstance(df_column.dtype, pd.Int8Dtype) or isinstance(df_column.dtype, pd.Int16Dtype) or isinstance(df_column.dtype, pd.Int32Dtype) or isinstance(df_column.dtype, pd.Int64Dtype)): return 'numeric' else: return 'any'
Example #2
Source File: test_sparse.py From coffeegrindsize with MIT License | 6 votes |
def test_where_series(self, data, na_value): assert data[0] != data[1] cls = type(data) a, b = data[:2] ser = pd.Series(cls._from_sequence([a, a, b, b], dtype=data.dtype)) cond = np.array([True, True, False, False]) result = ser.where(cond) new_dtype = SparseDtype('float', 0.0) expected = pd.Series(cls._from_sequence([a, a, na_value, na_value], dtype=new_dtype)) self.assert_series_equal(result, expected) other = cls._from_sequence([a, b, a, b], dtype=data.dtype) cond = np.array([True, False, True, True]) result = ser.where(cond, other) expected = pd.Series(cls._from_sequence([a, b, b, b], dtype=data.dtype)) self.assert_series_equal(result, expected)
Example #3
Source File: test_sparse.py From coffeegrindsize with MIT License | 6 votes |
def test_fillna_frame(self, data_missing): # Have to override to specify that fill_value will change. fill_value = data_missing[1] result = pd.DataFrame({ "A": data_missing, "B": [1, 2] }).fillna(fill_value) if pd.isna(data_missing.fill_value): dtype = SparseDtype(data_missing.dtype, fill_value) else: dtype = data_missing.dtype expected = pd.DataFrame({ "A": data_missing._from_sequence([fill_value, fill_value], dtype=dtype), "B": [1, 2], }) self.assert_frame_equal(result, expected)
Example #4
Source File: test_sparse.py From coffeegrindsize with MIT License | 6 votes |
def test_isna(self, data_missing): expected_dtype = SparseDtype(bool, pd.isna(data_missing.dtype.fill_value)) expected = SparseArray([True, False], dtype=expected_dtype) result = pd.isna(data_missing) self.assert_equal(result, expected) result = pd.Series(data_missing).isna() expected = pd.Series(expected) self.assert_series_equal(result, expected) # GH 21189 result = pd.Series(data_missing).drop([0, 1]).isna() expected = pd.Series([], dtype=expected_dtype) self.assert_series_equal(result, expected)
Example #5
Source File: test_utils.py From scprep with GNU General Public License v3.0 | 6 votes |
def test_SparseDataFrame(): X = data.load_10X(sparse=False) Y = X.astype(pd.SparseDtype(float, fill_value=0.0)) index = X.index columns = X.columns def test_fun(X): X = scprep.utils.SparseDataFrame(X, index=index, columns=columns) utils.assert_matrix_class_equivalent(X, Y) matrix.test_all_matrix_types(X, test_fun) matrix.test_pandas_matrix_types( X, utils.assert_transform_equivalent, Y=Y, transform=scprep.utils.SparseDataFrame, )
Example #6
Source File: test_utils.py From scprep with GNU General Public License v3.0 | 6 votes |
def test_is_sparse_dataframe(): X = data.load_10X(sparse=False) Y = X.astype(pd.SparseDtype(float, fill_value=0.0)) assert scprep.utils.is_sparse_dataframe(Y) def test_fun(X): assert not scprep.utils.is_sparse_dataframe(X) types = ( matrix._scipy_matrix_types + matrix._numpy_matrix_types + matrix._pandas_dense_matrix_types ) if matrix._pandas_0: types.append(matrix.SparseDataFrame_deprecated) matrix.test_matrix_types( X, test_fun, types, )
Example #7
Source File: test_sparse.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 6 votes |
def test_where_series(self, data, na_value): assert data[0] != data[1] cls = type(data) a, b = data[:2] ser = pd.Series(cls._from_sequence([a, a, b, b], dtype=data.dtype)) cond = np.array([True, True, False, False]) result = ser.where(cond) new_dtype = SparseDtype('float', 0.0) expected = pd.Series(cls._from_sequence([a, a, na_value, na_value], dtype=new_dtype)) self.assert_series_equal(result, expected) other = cls._from_sequence([a, b, a, b], dtype=data.dtype) cond = np.array([True, False, True, True]) result = ser.where(cond, other) expected = pd.Series(cls._from_sequence([a, b, b, b], dtype=data.dtype)) self.assert_series_equal(result, expected)
Example #8
Source File: test_sparse.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 6 votes |
def test_fillna_frame(self, data_missing): # Have to override to specify that fill_value will change. fill_value = data_missing[1] result = pd.DataFrame({ "A": data_missing, "B": [1, 2] }).fillna(fill_value) if pd.isna(data_missing.fill_value): dtype = SparseDtype(data_missing.dtype, fill_value) else: dtype = data_missing.dtype expected = pd.DataFrame({ "A": data_missing._from_sequence([fill_value, fill_value], dtype=dtype), "B": [1, 2], }) self.assert_frame_equal(result, expected)
Example #9
Source File: test_sparse.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 6 votes |
def test_isna(self, data_missing): expected_dtype = SparseDtype(bool, pd.isna(data_missing.dtype.fill_value)) expected = SparseArray([True, False], dtype=expected_dtype) result = pd.isna(data_missing) self.assert_equal(result, expected) result = pd.Series(data_missing).isna() expected = pd.Series(expected) self.assert_series_equal(result, expected) # GH 21189 result = pd.Series(data_missing).drop([0, 1]).isna() expected = pd.Series([], dtype=expected_dtype) self.assert_series_equal(result, expected)
Example #10
Source File: test_sparse.py From recruit with Apache License 2.0 | 6 votes |
def test_isna(self, data_missing): expected_dtype = SparseDtype(bool, pd.isna(data_missing.dtype.fill_value)) expected = SparseArray([True, False], dtype=expected_dtype) result = pd.isna(data_missing) self.assert_equal(result, expected) result = pd.Series(data_missing).isna() expected = pd.Series(expected) self.assert_series_equal(result, expected) # GH 21189 result = pd.Series(data_missing).drop([0, 1]).isna() expected = pd.Series([], dtype=expected_dtype) self.assert_series_equal(result, expected)
Example #11
Source File: test_sparse.py From recruit with Apache License 2.0 | 6 votes |
def test_fillna_frame(self, data_missing): # Have to override to specify that fill_value will change. fill_value = data_missing[1] result = pd.DataFrame({ "A": data_missing, "B": [1, 2] }).fillna(fill_value) if pd.isna(data_missing.fill_value): dtype = SparseDtype(data_missing.dtype, fill_value) else: dtype = data_missing.dtype expected = pd.DataFrame({ "A": data_missing._from_sequence([fill_value, fill_value], dtype=dtype), "B": [1, 2], }) self.assert_frame_equal(result, expected)
Example #12
Source File: test_sparse.py From recruit with Apache License 2.0 | 6 votes |
def test_where_series(self, data, na_value): assert data[0] != data[1] cls = type(data) a, b = data[:2] ser = pd.Series(cls._from_sequence([a, a, b, b], dtype=data.dtype)) cond = np.array([True, True, False, False]) result = ser.where(cond) new_dtype = SparseDtype('float', 0.0) expected = pd.Series(cls._from_sequence([a, a, na_value, na_value], dtype=new_dtype)) self.assert_series_equal(result, expected) other = cls._from_sequence([a, b, a, b], dtype=data.dtype) cond = np.array([True, False, True, True]) result = ser.where(cond, other) expected = pd.Series(cls._from_sequence([a, b, b, b], dtype=data.dtype)) self.assert_series_equal(result, expected)
Example #13
Source File: test_subclass.py From recruit with Apache License 2.0 | 5 votes |
def test_subclass_sparse_slice(self): # int64 s = tm.SubclassedSparseSeries([1, 2, 3, 4, 5]) exp = tm.SubclassedSparseSeries([2, 3, 4], index=[1, 2, 3]) tm.assert_sp_series_equal(s.loc[1:3], exp) assert s.loc[1:3].dtype == SparseDtype(np.int64) exp = tm.SubclassedSparseSeries([2, 3], index=[1, 2]) tm.assert_sp_series_equal(s.iloc[1:3], exp) assert s.iloc[1:3].dtype == SparseDtype(np.int64) exp = tm.SubclassedSparseSeries([2, 3], index=[1, 2]) tm.assert_sp_series_equal(s[1:3], exp) assert s[1:3].dtype == SparseDtype(np.int64) # float64 s = tm.SubclassedSparseSeries([1., 2., 3., 4., 5.]) exp = tm.SubclassedSparseSeries([2., 3., 4.], index=[1, 2, 3]) tm.assert_sp_series_equal(s.loc[1:3], exp) assert s.loc[1:3].dtype == SparseDtype(np.float64) exp = tm.SubclassedSparseSeries([2., 3.], index=[1, 2]) tm.assert_sp_series_equal(s.iloc[1:3], exp) assert s.iloc[1:3].dtype == SparseDtype(np.float64) exp = tm.SubclassedSparseSeries([2., 3.], index=[1, 2]) tm.assert_sp_series_equal(s[1:3], exp) assert s[1:3].dtype == SparseDtype(np.float64)
Example #14
Source File: test_sparse.py From recruit with Apache License 2.0 | 5 votes |
def _check_unsupported(self, data): if data.dtype == SparseDtype(int, 0): pytest.skip("Can't store nan in int array.")
Example #15
Source File: test_sparse.py From coffeegrindsize with MIT License | 5 votes |
def _check_unsupported(self, data): if data.dtype == SparseDtype(int, 0): pytest.skip("Can't store nan in int array.")
Example #16
Source File: test_sparse.py From coffeegrindsize with MIT License | 5 votes |
def dtype(): return SparseDtype()
Example #17
Source File: test_encoders.py From dask-ml with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_basic_dataframe(sparse, method, dask_data, dtype): a = sklearn.preprocessing.OneHotEncoder(sparse=sparse, dtype=dtype) b = dask_ml.preprocessing.OneHotEncoder(sparse=sparse, dtype=dtype) if method == "fit": a.fit(df) b.fit(dask_data) expected = a.transform(df) result = b.transform(dask_data) else: expected = a.fit_transform(df) result = b.fit_transform(dask_data) assert_estimator_equal( a, b, exclude={ "n_values_", "feature_indices_", "active_features_", "dtypes_", "drop_idx_", }, ) assert isinstance(result, type(dask_data)) assert len(result.columns) == expected.shape[1] if sparse and PANDAS_VERSION >= packaging.version.parse("0.24.0"): # pandas sparse ExtensionDtype interface dtype = pd.SparseDtype(dtype, dtype(0)) assert (result.dtypes == dtype).all() da.utils.assert_eq(result.values, expected)
Example #18
Source File: test_patch.py From scprep with GNU General Public License v3.0 | 5 votes |
def test_fill_value(): values = pd.Series(np.arange(3), dtype=pd.UInt16Dtype()) custom_block = CustomBlock(values, placement=slice(1, 2)) assert pd.isna(custom_block.fill_value) values = pd.Series(np.arange(3), dtype=pd.SparseDtype(float, 0.0)) custom_block = CustomBlock(values, placement=slice(1, 2)) assert not pd.isna(custom_block.fill_value)
Example #19
Source File: test_sparse.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def _check_unsupported(self, data): if data.dtype == SparseDtype(int, 0): pytest.skip("Can't store nan in int array.")
Example #20
Source File: matrix.py From scprep with GNU General Public License v3.0 | 5 votes |
def SparseDataFrame(X, default_fill_value=0.0): if sparse.issparse(X): X = pd.DataFrame.sparse.from_spmatrix(X) X.sparse.fill_value = default_fill_value elif is_SparseDataFrame(X) or not isinstance(X, pd.DataFrame): X = pd.DataFrame(X) return X.astype(pd.SparseDtype(float, fill_value=default_fill_value))
Example #21
Source File: matrix.py From scprep with GNU General Public License v3.0 | 5 votes |
def SparseSeries(X, default_fill_value=0.0): return pd.Series(X).astype(pd.SparseDtype(float, fill_value=default_fill_value))
Example #22
Source File: utils.py From scprep with GNU General Public License v3.0 | 5 votes |
def dataframe_to_sparse(x, fill_value=0.0): return x.astype(pd.SparseDtype(float, fill_value=fill_value))
Example #23
Source File: utils.py From anndata with BSD 3-Clause "New" or "Revised" License | 5 votes |
def ensure_df_homogeneous( df: pd.DataFrame, name: str ) -> Union[np.ndarray, sparse.csr_matrix]: # TODO: rename this function, I would not expect this to return a non-dataframe if all(isinstance(dt, pd.SparseDtype) for dt in df.dtypes): arr = df.sparse.to_coo().tocsr() else: arr = df.to_numpy() if df.dtypes.nunique() != 1: warnings.warn(f"{name} converted to numpy array with dtype {arr.dtype}") return arr
Example #24
Source File: test_sparse.py From recruit with Apache License 2.0 | 5 votes |
def dtype(): return SparseDtype()
Example #25
Source File: test_subclass.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def test_subclass_sparse_slice(self): # int64 s = tm.SubclassedSparseSeries([1, 2, 3, 4, 5]) exp = tm.SubclassedSparseSeries([2, 3, 4], index=[1, 2, 3]) tm.assert_sp_series_equal(s.loc[1:3], exp) assert s.loc[1:3].dtype == SparseDtype(np.int64) exp = tm.SubclassedSparseSeries([2, 3], index=[1, 2]) tm.assert_sp_series_equal(s.iloc[1:3], exp) assert s.iloc[1:3].dtype == SparseDtype(np.int64) exp = tm.SubclassedSparseSeries([2, 3], index=[1, 2]) tm.assert_sp_series_equal(s[1:3], exp) assert s[1:3].dtype == SparseDtype(np.int64) # float64 s = tm.SubclassedSparseSeries([1., 2., 3., 4., 5.]) exp = tm.SubclassedSparseSeries([2., 3., 4.], index=[1, 2, 3]) tm.assert_sp_series_equal(s.loc[1:3], exp) assert s.loc[1:3].dtype == SparseDtype(np.float64) exp = tm.SubclassedSparseSeries([2., 3.], index=[1, 2]) tm.assert_sp_series_equal(s.iloc[1:3], exp) assert s.iloc[1:3].dtype == SparseDtype(np.float64) exp = tm.SubclassedSparseSeries([2., 3.], index=[1, 2]) tm.assert_sp_series_equal(s[1:3], exp) assert s[1:3].dtype == SparseDtype(np.float64)
Example #26
Source File: test_sparse.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def dtype(): return SparseDtype()
Example #27
Source File: test_dtypes.py From pandera with MIT License | 4 votes |
def test_pandas_extension_types(): """Test pandas extension data type happy path.""" # pylint: disable=no-member test_params = [ ( pd.CategoricalDtype(), pd.Series(["a", "a", "b", "b", "c", "c"], dtype="category"), None ), ( pd.DatetimeTZDtype(tz='UTC'), pd.Series( pd.date_range(start="20200101", end="20200301"), dtype="datetime64[ns, utc]" ), None ), (pd.Int64Dtype(), pd.Series(range(10), dtype="Int64"), None), (pd.StringDtype(), pd.Series(["foo", "bar", "baz"], dtype="string"), None), ( pd.PeriodDtype(freq='D'), pd.Series(pd.period_range('1/1/2019', '1/1/2020', freq='D')), None ), ( pd.SparseDtype("float"), pd.Series(range(100)).where( lambda s: s < 5, other=np.nan).astype("Sparse[float]"), {"nullable": True}, ), ( pd.BooleanDtype(), pd.Series([1, 0, 0, 1, 1], dtype="boolean"), None ), ( pd.IntervalDtype(subtype="int64"), pd.Series(pd.IntervalIndex.from_breaks([0, 1, 2, 3, 4])), None, ) ] for dtype, data, series_kwargs in test_params: series_kwargs = {} if series_kwargs is None else series_kwargs series_schema = SeriesSchema(pandas_dtype=dtype, **series_kwargs) assert isinstance(series_schema.validate(data), pd.Series)