Python pandas.core.algorithms.value_counts() Examples
The following are 30
code examples of pandas.core.algorithms.value_counts().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
pandas.core.algorithms
, or try the search function
.
Example #1
Source File: test_algos.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 6 votes |
def test_value_counts_datetime_outofbounds(self): # GH 13663 s = Series([datetime(3000, 1, 1), datetime(5000, 1, 1), datetime(5000, 1, 1), datetime(6000, 1, 1), datetime(3000, 1, 1), datetime(3000, 1, 1)]) res = s.value_counts() exp_index = Index([datetime(3000, 1, 1), datetime(5000, 1, 1), datetime(6000, 1, 1)], dtype=object) exp = Series([3, 2, 1], index=exp_index) tm.assert_series_equal(res, exp) # GH 12424 res = pd.to_datetime(Series(['2362-01-01', np.nan]), errors='ignore') exp = Series(['2362-01-01', np.nan], dtype=object) tm.assert_series_equal(res, exp)
Example #2
Source File: test_algos.py From vnpy_crypto with MIT License | 6 votes |
def test_value_counts_datetime_outofbounds(self): # GH 13663 s = Series([datetime(3000, 1, 1), datetime(5000, 1, 1), datetime(5000, 1, 1), datetime(6000, 1, 1), datetime(3000, 1, 1), datetime(3000, 1, 1)]) res = s.value_counts() exp_index = Index([datetime(3000, 1, 1), datetime(5000, 1, 1), datetime(6000, 1, 1)], dtype=object) exp = Series([3, 2, 1], index=exp_index) tm.assert_series_equal(res, exp) # GH 12424 res = pd.to_datetime(Series(['2362-01-01', np.nan]), errors='ignore') exp = Series(['2362-01-01', np.nan], dtype=object) tm.assert_series_equal(res, exp)
Example #3
Source File: test_algos.py From elasticintel with GNU General Public License v3.0 | 6 votes |
def test_value_counts_datetime_outofbounds(self): # GH 13663 s = Series([datetime(3000, 1, 1), datetime(5000, 1, 1), datetime(5000, 1, 1), datetime(6000, 1, 1), datetime(3000, 1, 1), datetime(3000, 1, 1)]) res = s.value_counts() exp_index = pd.Index([datetime(3000, 1, 1), datetime(5000, 1, 1), datetime(6000, 1, 1)], dtype=object) exp = Series([3, 2, 1], index=exp_index) tm.assert_series_equal(res, exp) # GH 12424 res = pd.to_datetime(Series(['2362-01-01', np.nan]), errors='ignore') exp = Series(['2362-01-01', np.nan], dtype=object) tm.assert_series_equal(res, exp)
Example #4
Source File: series.py From Computable with MIT License | 6 votes |
def mode(self): """Returns the mode(s) of the dataset. Empty if nothing occurs at least 2 times. Always returns Series even if only one value. Parameters ---------- sort : bool, default True If True, will lexicographically sort values, if False skips sorting. Result ordering when ``sort=False`` is not defined. Returns ------- modes : Series (sorted) """ # TODO: Add option for bins like value_counts() from pandas.core.algorithms import mode return mode(self)
Example #5
Source File: test_algos.py From twitter-stock-recommendation with MIT License | 6 votes |
def test_value_counts_datetime_outofbounds(self): # GH 13663 s = Series([datetime(3000, 1, 1), datetime(5000, 1, 1), datetime(5000, 1, 1), datetime(6000, 1, 1), datetime(3000, 1, 1), datetime(3000, 1, 1)]) res = s.value_counts() exp_index = Index([datetime(3000, 1, 1), datetime(5000, 1, 1), datetime(6000, 1, 1)], dtype=object) exp = Series([3, 2, 1], index=exp_index) tm.assert_series_equal(res, exp) # GH 12424 res = pd.to_datetime(Series(['2362-01-01', np.nan]), errors='ignore') exp = Series(['2362-01-01', np.nan], dtype=object) tm.assert_series_equal(res, exp)
Example #6
Source File: test_algos.py From recruit with Apache License 2.0 | 6 votes |
def test_value_counts_datetime_outofbounds(self): # GH 13663 s = Series([datetime(3000, 1, 1), datetime(5000, 1, 1), datetime(5000, 1, 1), datetime(6000, 1, 1), datetime(3000, 1, 1), datetime(3000, 1, 1)]) res = s.value_counts() exp_index = Index([datetime(3000, 1, 1), datetime(5000, 1, 1), datetime(6000, 1, 1)], dtype=object) exp = Series([3, 2, 1], index=exp_index) tm.assert_series_equal(res, exp) # GH 12424 res = pd.to_datetime(Series(['2362-01-01', np.nan]), errors='ignore') exp = Series(['2362-01-01', np.nan], dtype=object) tm.assert_series_equal(res, exp)
Example #7
Source File: test_algos.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def test_dropna(self): # https://github.com/pandas-dev/pandas/issues/9443#issuecomment-73719328 tm.assert_series_equal( Series([True, True, False]).value_counts(dropna=True), Series([2, 1], index=[True, False])) tm.assert_series_equal( Series([True, True, False]).value_counts(dropna=False), Series([2, 1], index=[True, False])) tm.assert_series_equal( Series([True, True, False, None]).value_counts(dropna=True), Series([2, 1], index=[True, False])) tm.assert_series_equal( Series([True, True, False, None]).value_counts(dropna=False), Series([2, 1, 1], index=[True, False, np.nan])) tm.assert_series_equal( Series([10.3, 5., 5.]).value_counts(dropna=True), Series([2, 1], index=[5., 10.3])) tm.assert_series_equal( Series([10.3, 5., 5.]).value_counts(dropna=False), Series([2, 1], index=[5., 10.3])) tm.assert_series_equal( Series([10.3, 5., 5., None]).value_counts(dropna=True), Series([2, 1], index=[5., 10.3])) # 32-bit linux has a different ordering if not compat.is_platform_32bit(): result = Series([10.3, 5., 5., None]).value_counts(dropna=False) expected = Series([2, 1, 1], index=[5., 10.3, np.nan]) tm.assert_series_equal(result, expected)
Example #8
Source File: test_algos.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def test_value_counts_normalized(self): # GH12558 s = Series([1, 2, np.nan, np.nan, np.nan]) dtypes = (np.float64, np.object, 'M8[ns]') for t in dtypes: s_typed = s.astype(t) result = s_typed.value_counts(normalize=True, dropna=False) expected = Series([0.6, 0.2, 0.2], index=Series([np.nan, 2.0, 1.0], dtype=t)) tm.assert_series_equal(result, expected) result = s_typed.value_counts(normalize=True, dropna=True) expected = Series([0.5, 0.5], index=Series([2.0, 1.0], dtype=t)) tm.assert_series_equal(result, expected)
Example #9
Source File: test_algos.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def test_value_counts_uint64(self): arr = np.array([2**63], dtype=np.uint64) expected = Series([1], index=[2**63]) result = algos.value_counts(arr) tm.assert_series_equal(result, expected) arr = np.array([-1, 2**63], dtype=object) expected = Series([1, 1], index=[-1, 2**63]) result = algos.value_counts(arr) # 32-bit linux has a different ordering if not compat.is_platform_32bit(): tm.assert_series_equal(result, expected)
Example #10
Source File: test_algos.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def test_categorical_zeroes(self): # keep the `d` category with 0 s = Series(Categorical( list('bbbaac'), categories=list('abcd'), ordered=True)) result = s.value_counts() expected = Series([3, 2, 1, 0], index=Categorical( ['b', 'a', 'c', 'd'], categories=list('abcd'), ordered=True)) tm.assert_series_equal(result, expected, check_index_type=True)
Example #11
Source File: base.py From Splunking-Crime with GNU Affero General Public License v3.0 | 5 votes |
def value_counts(self, normalize=False, sort=True, ascending=False, bins=None, dropna=True): """ Returns object containing counts of unique values. The resulting object will be in descending order so that the first element is the most frequently-occurring element. Excludes NA values by default. Parameters ---------- normalize : boolean, default False If True then the object returned will contain the relative frequencies of the unique values. sort : boolean, default True Sort by values ascending : boolean, default False Sort in ascending order bins : integer, optional Rather than count values, group them into half-open bins, a convenience for pd.cut, only works with numeric data dropna : boolean, default True Don't include counts of NaN. Returns ------- counts : Series """ from pandas.core.algorithms import value_counts result = value_counts(self, sort=sort, ascending=ascending, normalize=normalize, bins=bins, dropna=dropna) return result
Example #12
Source File: test_algos.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def test_value_counts(self): np.random.seed(1234) from pandas.core.reshape.tile import cut arr = np.random.randn(4) factor = cut(arr, 4) # assert isinstance(factor, n) result = algos.value_counts(factor) breaks = [-1.194, -0.535, 0.121, 0.777, 1.433] expected_index = pd.IntervalIndex.from_breaks( breaks).astype('category') expected = Series([1, 1, 1, 1], index=expected_index) tm.assert_series_equal(result.sort_index(), expected.sort_index())
Example #13
Source File: test_algos.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def test_value_counts_bins(self): s = [1, 2, 3, 4] result = algos.value_counts(s, bins=1) expected = Series([4], index=IntervalIndex.from_tuples([(0.996, 4.0)])) tm.assert_series_equal(result, expected) result = algos.value_counts(s, bins=2, sort=False) expected = Series([2, 2], index=IntervalIndex.from_tuples([(0.996, 2.5), (2.5, 4.0)])) tm.assert_series_equal(result, expected)
Example #14
Source File: test_algos.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def test_value_counts_dtypes(self): result = algos.value_counts([1, 1.]) assert len(result) == 1 result = algos.value_counts([1, 1.], bins=1) assert len(result) == 1 result = algos.value_counts(Series([1, 1., '1'])) # object assert len(result) == 2 pytest.raises(TypeError, lambda s: algos.value_counts(s, bins=1), ['1', 1])
Example #15
Source File: test_algos.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def test_value_counts_nat(self): td = Series([np.timedelta64(10000), pd.NaT], dtype='timedelta64[ns]') dt = pd.to_datetime(['NaT', '2014-01-01']) for s in [td, dt]: vc = algos.value_counts(s) vc_with_na = algos.value_counts(s, dropna=False) assert len(vc) == 1 assert len(vc_with_na) == 2 exp_dt = Series({Timestamp('2014-01-01 00:00:00'): 1}) tm.assert_series_equal(algos.value_counts(dt), exp_dt) # TODO same for (timedelta)
Example #16
Source File: test_algos.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def test_categorical_nans(self): s = Series(Categorical(list('aaaaabbbcc'))) # 4,3,2,1 (nan) s.iloc[1] = np.nan result = s.value_counts() expected = Series([4, 3, 2], index=CategoricalIndex( ['a', 'b', 'c'], categories=['a', 'b', 'c'])) tm.assert_series_equal(result, expected, check_index_type=True) result = s.value_counts(dropna=False) expected = Series([ 4, 3, 2, 1 ], index=CategoricalIndex(['a', 'b', 'c', np.nan])) tm.assert_series_equal(result, expected, check_index_type=True) # out of order s = Series(Categorical( list('aaaaabbbcc'), ordered=True, categories=['b', 'a', 'c'])) s.iloc[1] = np.nan result = s.value_counts() expected = Series([4, 3, 2], index=CategoricalIndex( ['a', 'b', 'c'], categories=['b', 'a', 'c'], ordered=True)) tm.assert_series_equal(result, expected, check_index_type=True) result = s.value_counts(dropna=False) expected = Series([4, 3, 2, 1], index=CategoricalIndex( ['a', 'b', 'c', np.nan], categories=['b', 'a', 'c'], ordered=True)) tm.assert_series_equal(result, expected, check_index_type=True)
Example #17
Source File: test_algos.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def test_categorical_zeroes(self): # keep the `d` category with 0 s = Series(pd.Categorical( list('bbbaac'), categories=list('abcd'), ordered=True)) result = s.value_counts() expected = Series([3, 2, 1, 0], index=pd.Categorical( ['b', 'a', 'c', 'd'], categories=list('abcd'), ordered=True)) tm.assert_series_equal(result, expected, check_index_type=True)
Example #18
Source File: test_algos.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def test_categorical_nans(self): s = Series(pd.Categorical(list('aaaaabbbcc'))) # 4,3,2,1 (nan) s.iloc[1] = np.nan result = s.value_counts() expected = Series([4, 3, 2], index=pd.CategoricalIndex( ['a', 'b', 'c'], categories=['a', 'b', 'c'])) tm.assert_series_equal(result, expected, check_index_type=True) result = s.value_counts(dropna=False) expected = Series([ 4, 3, 2, 1 ], index=CategoricalIndex(['a', 'b', 'c', np.nan])) tm.assert_series_equal(result, expected, check_index_type=True) # out of order s = Series(pd.Categorical( list('aaaaabbbcc'), ordered=True, categories=['b', 'a', 'c'])) s.iloc[1] = np.nan result = s.value_counts() expected = Series([4, 3, 2], index=pd.CategoricalIndex( ['a', 'b', 'c'], categories=['b', 'a', 'c'], ordered=True)) tm.assert_series_equal(result, expected, check_index_type=True) result = s.value_counts(dropna=False) expected = Series([4, 3, 2, 1], index=pd.CategoricalIndex( ['a', 'b', 'c', np.nan], categories=['b', 'a', 'c'], ordered=True)) tm.assert_series_equal(result, expected, check_index_type=True)
Example #19
Source File: test_algos.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def test_value_counts_dtypes(self): result = algos.value_counts([1, 1.]) assert len(result) == 1 result = algos.value_counts([1, 1.], bins=1) assert len(result) == 1 result = algos.value_counts(Series([1, 1., '1'])) # object assert len(result) == 2 pytest.raises(TypeError, lambda s: algos.value_counts(s, bins=1), ['1', 1])
Example #20
Source File: test_algos.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def test_dropna(self): # https://github.com/pandas-dev/pandas/issues/9443#issuecomment-73719328 tm.assert_series_equal( Series([True, True, False]).value_counts(dropna=True), Series([2, 1], index=[True, False])) tm.assert_series_equal( Series([True, True, False]).value_counts(dropna=False), Series([2, 1], index=[True, False])) tm.assert_series_equal( Series([True, True, False, None]).value_counts(dropna=True), Series([2, 1], index=[True, False])) tm.assert_series_equal( Series([True, True, False, None]).value_counts(dropna=False), Series([2, 1, 1], index=[True, False, np.nan])) tm.assert_series_equal( Series([10.3, 5., 5.]).value_counts(dropna=True), Series([2, 1], index=[5., 10.3])) tm.assert_series_equal( Series([10.3, 5., 5.]).value_counts(dropna=False), Series([2, 1], index=[5., 10.3])) tm.assert_series_equal( Series([10.3, 5., 5., None]).value_counts(dropna=True), Series([2, 1], index=[5., 10.3])) # 32-bit linux has a different ordering if not compat.is_platform_32bit(): result = Series([10.3, 5., 5., None]).value_counts(dropna=False) expected = Series([2, 1, 1], index=[5., 10.3, np.nan]) tm.assert_series_equal(result, expected)
Example #21
Source File: test_algos.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def test_value_counts_normalized(self): # GH12558 s = Series([1, 2, np.nan, np.nan, np.nan]) dtypes = (np.float64, np.object, 'M8[ns]') for t in dtypes: s_typed = s.astype(t) result = s_typed.value_counts(normalize=True, dropna=False) expected = Series([0.6, 0.2, 0.2], index=Series([np.nan, 2.0, 1.0], dtype=t)) tm.assert_series_equal(result, expected) result = s_typed.value_counts(normalize=True, dropna=True) expected = Series([0.5, 0.5], index=Series([2.0, 1.0], dtype=t)) tm.assert_series_equal(result, expected)
Example #22
Source File: test_algos.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def test_value_counts_uint64(self): arr = np.array([2**63], dtype=np.uint64) expected = Series([1], index=[2**63]) result = algos.value_counts(arr) tm.assert_series_equal(result, expected) arr = np.array([-1, 2**63], dtype=object) expected = Series([1, 1], index=[-1, 2**63]) result = algos.value_counts(arr) # 32-bit linux has a different ordering if not compat.is_platform_32bit(): tm.assert_series_equal(result, expected)
Example #23
Source File: test_algos.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_value_counts(self): np.random.seed(1234) from pandas.core.reshape.tile import cut arr = np.random.randn(4) factor = cut(arr, 4) # assert isinstance(factor, n) result = algos.value_counts(factor) breaks = [-1.194, -0.535, 0.121, 0.777, 1.433] index = IntervalIndex.from_breaks(breaks).astype(CDT(ordered=True)) expected = Series([1, 1, 1, 1], index=index) tm.assert_series_equal(result.sort_index(), expected.sort_index())
Example #24
Source File: test_algos.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_value_counts_bins(self): s = [1, 2, 3, 4] result = algos.value_counts(s, bins=1) expected = Series([4], index=IntervalIndex.from_tuples([(0.996, 4.0)])) tm.assert_series_equal(result, expected) result = algos.value_counts(s, bins=2, sort=False) expected = Series([2, 2], index=IntervalIndex.from_tuples([(0.996, 2.5), (2.5, 4.0)])) tm.assert_series_equal(result, expected)
Example #25
Source File: test_algos.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_value_counts_dtypes(self): result = algos.value_counts([1, 1.]) assert len(result) == 1 result = algos.value_counts([1, 1.], bins=1) assert len(result) == 1 result = algos.value_counts(Series([1, 1., '1'])) # object assert len(result) == 2 pytest.raises(TypeError, lambda s: algos.value_counts(s, bins=1), ['1', 1])
Example #26
Source File: test_algos.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_value_counts_nat(self): td = Series([np.timedelta64(10000), pd.NaT], dtype='timedelta64[ns]') dt = pd.to_datetime(['NaT', '2014-01-01']) for s in [td, dt]: vc = algos.value_counts(s) vc_with_na = algos.value_counts(s, dropna=False) assert len(vc) == 1 assert len(vc_with_na) == 2 exp_dt = Series({Timestamp('2014-01-01 00:00:00'): 1}) tm.assert_series_equal(algos.value_counts(dt), exp_dt) # TODO same for (timedelta)
Example #27
Source File: test_algos.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_categorical_nans(self): s = Series(Categorical(list('aaaaabbbcc'))) # 4,3,2,1 (nan) s.iloc[1] = np.nan result = s.value_counts() expected = Series([4, 3, 2], index=CategoricalIndex( ['a', 'b', 'c'], categories=['a', 'b', 'c'])) tm.assert_series_equal(result, expected, check_index_type=True) result = s.value_counts(dropna=False) expected = Series([ 4, 3, 2, 1 ], index=CategoricalIndex(['a', 'b', 'c', np.nan])) tm.assert_series_equal(result, expected, check_index_type=True) # out of order s = Series(Categorical( list('aaaaabbbcc'), ordered=True, categories=['b', 'a', 'c'])) s.iloc[1] = np.nan result = s.value_counts() expected = Series([4, 3, 2], index=CategoricalIndex( ['a', 'b', 'c'], categories=['b', 'a', 'c'], ordered=True)) tm.assert_series_equal(result, expected, check_index_type=True) result = s.value_counts(dropna=False) expected = Series([4, 3, 2, 1], index=CategoricalIndex( ['a', 'b', 'c', np.nan], categories=['b', 'a', 'c'], ordered=True)) tm.assert_series_equal(result, expected, check_index_type=True)
Example #28
Source File: test_algos.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_categorical_zeroes(self): # keep the `d` category with 0 s = Series(Categorical( list('bbbaac'), categories=list('abcd'), ordered=True)) result = s.value_counts() expected = Series([3, 2, 1, 0], index=Categorical( ['b', 'a', 'c', 'd'], categories=list('abcd'), ordered=True)) tm.assert_series_equal(result, expected, check_index_type=True)
Example #29
Source File: test_algos.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_dropna(self): # https://github.com/pandas-dev/pandas/issues/9443#issuecomment-73719328 tm.assert_series_equal( Series([True, True, False]).value_counts(dropna=True), Series([2, 1], index=[True, False])) tm.assert_series_equal( Series([True, True, False]).value_counts(dropna=False), Series([2, 1], index=[True, False])) tm.assert_series_equal( Series([True, True, False, None]).value_counts(dropna=True), Series([2, 1], index=[True, False])) tm.assert_series_equal( Series([True, True, False, None]).value_counts(dropna=False), Series([2, 1, 1], index=[True, False, np.nan])) tm.assert_series_equal( Series([10.3, 5., 5.]).value_counts(dropna=True), Series([2, 1], index=[5., 10.3])) tm.assert_series_equal( Series([10.3, 5., 5.]).value_counts(dropna=False), Series([2, 1], index=[5., 10.3])) tm.assert_series_equal( Series([10.3, 5., 5., None]).value_counts(dropna=True), Series([2, 1], index=[5., 10.3])) # 32-bit linux has a different ordering if not compat.is_platform_32bit(): result = Series([10.3, 5., 5., None]).value_counts(dropna=False) expected = Series([2, 1, 1], index=[5., 10.3, np.nan]) tm.assert_series_equal(result, expected)
Example #30
Source File: test_algos.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_value_counts_normalized(self): # GH12558 s = Series([1, 2, np.nan, np.nan, np.nan]) dtypes = (np.float64, np.object, 'M8[ns]') for t in dtypes: s_typed = s.astype(t) result = s_typed.value_counts(normalize=True, dropna=False) expected = Series([0.6, 0.2, 0.2], index=Series([np.nan, 2.0, 1.0], dtype=t)) tm.assert_series_equal(result, expected) result = s_typed.value_counts(normalize=True, dropna=True) expected = Series([0.5, 0.5], index=Series([2.0, 1.0], dtype=t)) tm.assert_series_equal(result, expected)