Python Examples of pandas.crosstab

Source File: test_pivot.py From predictive-maintenance-using-machine-learning with Apache License 2.0

6 votes

def test_crosstab_with_empties(self):
        # Check handling of empties
        df = pd.DataFrame({'a': [1, 2, 2, 2, 2], 'b': [3, 3, 4, 4, 4],
                           'c': [np.nan, np.nan, np.nan, np.nan, np.nan]})

        empty = pd.DataFrame([[0.0, 0.0], [0.0, 0.0]],
                             index=pd.Index([1, 2],
                                            name='a',
                                            dtype='int64'),
                             columns=pd.Index([3, 4], name='b'))

        for i in [True, 'index', 'columns']:
            calculated = pd.crosstab(df.a, df.b, values=df.c, aggfunc='count',
                                     normalize=i)
            tm.assert_frame_equal(empty, calculated)

        nans = pd.DataFrame([[0.0, np.nan], [0.0, 0.0]],
                            index=pd.Index([1, 2],
                                           name='a',
                                           dtype='int64'),
                            columns=pd.Index([3, 4], name='b'))

        calculated = pd.crosstab(df.a, df.b, values=df.c, aggfunc='count',
                                 normalize=False)
        tm.assert_frame_equal(nans, calculated)

Source File: test_pivot.py From vnpy_crypto with MIT License

6 votes

def test_crosstab_ndarray(self):
        a = np.random.randint(0, 5, size=100)
        b = np.random.randint(0, 3, size=100)
        c = np.random.randint(0, 10, size=100)

        df = DataFrame({'a': a, 'b': b, 'c': c})

        result = crosstab(a, [b, c], rownames=['a'], colnames=('b', 'c'))
        expected = crosstab(df['a'], [df['b'], df['c']])
        tm.assert_frame_equal(result, expected)

        result = crosstab([b, c], a, colnames=['a'], rownames=('b', 'c'))
        expected = crosstab([df['b'], df['c']], df['a'])
        tm.assert_frame_equal(result, expected)

        # assign arbitrary names
        result = crosstab(self.df['A'].values, self.df['C'].values)
        assert result.index.name == 'row_0'
        assert result.columns.name == 'col_0'

Source File: contingency_tables.py From vnpy_crypto with MIT License

6 votes

def from_data(cls, data, shift_zeros=True):
        """
        Construct a Table object from data.

        Parameters
        ----------
        data : array-like
            The raw data, the first column defines the rows and the
            second column defines the columns.
        shift_zeros : boolean
            If True, and if there are any zeros in the contingency
            table, add 0.5 to all four cells of the table.
        """

        if isinstance(data, pd.DataFrame):
            table = pd.crosstab(data.iloc[:, 0], data.iloc[:, 1])
        else:
            table = pd.crosstab(data[:, 0], data[:, 1])
        return cls(table, shift_zeros)

Source File: contingency_tables.py From vnpy_crypto with MIT License

6 votes

def from_data(cls, data, shift_zeros=True):
        """
        Construct a Table object from data.

        Parameters
        ----------
        data : array-like
            The raw data, from which a contingency table is constructed
            using the first two columns.
        shift_zeros : boolean
            If True and any cell count is zero, add 0.5 to all values
            in the table.

        Returns
        -------
        A Table instance.
        """

        if isinstance(data, pd.DataFrame):
            table = pd.crosstab(data.iloc[:, 0], data.iloc[:, 1])
        else:
            table = pd.crosstab(data[:, 0], data[:, 1])

        return cls(table, shift_zeros)

Source File: test_contingency_tables.py From vnpy_crypto with MIT License

6 votes

def test_from_data(self):

        np.random.seed(241)
        df = pd.DataFrame(index=range(100), columns=("v1", "v2", "strat"))
        df["v1"] = np.random.randint(0, 2, 100)
        df["v2"] = np.random.randint(0, 2, 100)
        df["strat"] = np.kron(np.arange(10), np.ones(10))

        tables = []
        for k in range(10):
            ii = np.arange(10*k, 10*(k+1))
            tables.append(pd.crosstab(df.loc[ii, "v1"], df.loc[ii, "v2"]))

        rslt1 = ctab.StratifiedTable(tables)
        rslt2 = ctab.StratifiedTable.from_data("v1", "v2", "strat", df)

        assert_equal(rslt1.summary().as_text(), rslt2.summary().as_text())

Source File: test_contingency_tables.py From vnpy_crypto with MIT License

6 votes

def test_SquareTable_from_data():

    np.random.seed(434)
    df = pd.DataFrame(index=range(100), columns=["v1", "v2"])
    df["v1"] = np.random.randint(0, 5, 100)
    df["v2"] = np.random.randint(0, 5, 100)
    table = pd.crosstab(df["v1"], df["v2"])

    rslt1 = ctab.SquareTable(table)
    rslt2 = ctab.SquareTable.from_data(df)
    rslt3 = ctab.SquareTable(np.asarray(table))

    assert_equal(rslt1.summary().as_text(),
                 rslt2.summary().as_text())

    assert_equal(rslt2.summary().as_text(),
                 rslt3.summary().as_text())

    s = str(rslt1)
    assert_equal(s.startswith('A 5x5 contingency table with counts:'), True)
    assert_equal(rslt1.table[0, 0], 8.)

Source File: test_mosaicplot.py From vnpy_crypto with MIT License

6 votes

def test_mosaic_empty_cells():
    # SMOKE test  see #2286
    import pandas as pd
    mydata = pd.DataFrame({'id2': {64: 'Angelica',
                                   65: 'DXW_UID', 66: 'casuid01',
                                   67: 'casuid01', 68: 'EC93_uid',
                                   69: 'EC93_uid', 70: 'EC93_uid',
                                   60: 'DXW_UID',  61: 'AtmosFox',
                                   62: 'DXW_UID', 63: 'DXW_UID'},
                           'id1': {64: 'TGP',
                                   65: 'Retention01', 66: 'default',
                                   67: 'default', 68: 'Musa_EC_9_3',
                                   69: 'Musa_EC_9_3', 70: 'Musa_EC_9_3',
                                   60: 'default', 61: 'default',
                                   62: 'default', 63: 'default'}})

    ct = pd.crosstab(mydata.id1, mydata.id2)
    fig, vals = mosaic(ct.T.unstack())
    pylab.close('all')
    fig, vals = mosaic(mydata, ['id1','id2'])
    pylab.close('all')

Source File: test_pivot.py From vnpy_crypto with MIT License

6 votes

def test_crosstab_errors(self):
        # Issue 12578

        df = pd.DataFrame({'a': [1, 2, 2, 2, 2], 'b': [3, 3, 4, 4, 4],
                           'c': [1, 1, np.nan, 1, 1]})

        error = 'values cannot be used without an aggfunc.'
        with tm.assert_raises_regex(ValueError, error):
            pd.crosstab(df.a, df.b, values=df.c)

        error = 'aggfunc cannot be used without values'
        with tm.assert_raises_regex(ValueError, error):
            pd.crosstab(df.a, df.b, aggfunc=np.mean)

        error = 'Not a valid normalize argument'
        with tm.assert_raises_regex(ValueError, error):
            pd.crosstab(df.a, df.b, normalize='42')

        with tm.assert_raises_regex(ValueError, error):
            pd.crosstab(df.a, df.b, normalize=42)

        error = 'Not a valid margins argument'
        with tm.assert_raises_regex(ValueError, error):
            pd.crosstab(df.a, df.b, normalize='all', margins=42)

Source File: test_pivot.py From predictive-maintenance-using-machine-learning with Apache License 2.0

6 votes

def test_crosstab_errors(self):
        # Issue 12578

        df = pd.DataFrame({'a': [1, 2, 2, 2, 2], 'b': [3, 3, 4, 4, 4],
                           'c': [1, 1, np.nan, 1, 1]})

        error = 'values cannot be used without an aggfunc.'
        with pytest.raises(ValueError, match=error):
            pd.crosstab(df.a, df.b, values=df.c)

        error = 'aggfunc cannot be used without values'
        with pytest.raises(ValueError, match=error):
            pd.crosstab(df.a, df.b, aggfunc=np.mean)

        error = 'Not a valid normalize argument'
        with pytest.raises(ValueError, match=error):
            pd.crosstab(df.a, df.b, normalize='42')

        with pytest.raises(ValueError, match=error):
            pd.crosstab(df.a, df.b, normalize=42)

        error = 'Not a valid margins argument'
        with pytest.raises(ValueError, match=error):
            pd.crosstab(df.a, df.b, normalize='all', margins=42)

Source File: decisionTree.py From statistical_learning with Apache License 2.0

6 votes

def SplitData(self, df):
        labels = df.iloc[:, -1]
        data = df.iloc[:, :-1]
        # use crosstab to count the frequency
        cbs = (pd.crosstab(data.iloc[:, i], labels)
               for i in range(data.columns.size))
        y_c = labels.groupby(labels).count()
        # entropy of y
        HD = self.calH(y_c)
        HDA = [self.calg(cb) for cb in cbs]
        if self.method == "ID3":
            g = HD-HDA
        elif self.method == "C4.5":
            g = 1-HDA/HD
        if g.max() < self.eps:
            return None
        # the split location
        split = g.argmax()
        name = df.columns[split]
        # divide into parts
        gp = df.groupby(df.iloc[:, split])
        return ((name, i, d.drop(name, axis=1)) for i, d in gp)

Source File: test_replication_kw_97.py From respy with MIT License

6 votes

def test_distribution_of_lagged_choices():
    params, options, actual_df = rp.get_example_model("kw_97_extended")

    options["n_periods"] = 1
    options["simulated_agents"] = 10_000

    simulate = rp.get_simulate_func(params, options)
    df = simulate(params)

    actual_df = actual_df.query("Period == 0")
    expected = pd.crosstab(
        actual_df.Lagged_Choice_1, actual_df.Experience_School, normalize="columns"
    )

    df = df.query("Period == 0")
    calculated = pd.crosstab(
        df.Lagged_Choice_1, df.Experience_School, normalize="columns"
    )

    # Allow for 4% differences which likely for small subsets.
    np.testing.assert_allclose(expected, calculated, atol=0.04)

Source File: test_pivot.py From recruit with Apache License 2.0

6 votes

def test_crosstab_errors(self):
        # Issue 12578

        df = pd.DataFrame({'a': [1, 2, 2, 2, 2], 'b': [3, 3, 4, 4, 4],
                           'c': [1, 1, np.nan, 1, 1]})

        error = 'values cannot be used without an aggfunc.'
        with pytest.raises(ValueError, match=error):
            pd.crosstab(df.a, df.b, values=df.c)

        error = 'aggfunc cannot be used without values'
        with pytest.raises(ValueError, match=error):
            pd.crosstab(df.a, df.b, aggfunc=np.mean)

        error = 'Not a valid normalize argument'
        with pytest.raises(ValueError, match=error):
            pd.crosstab(df.a, df.b, normalize='42')

        with pytest.raises(ValueError, match=error):
            pd.crosstab(df.a, df.b, normalize=42)

        error = 'Not a valid margins argument'
        with pytest.raises(ValueError, match=error):
            pd.crosstab(df.a, df.b, normalize='all', margins=42)

Source File: test_pivot.py From recruit with Apache License 2.0

6 votes

def test_crosstab_with_empties(self):
        # Check handling of empties
        df = pd.DataFrame({'a': [1, 2, 2, 2, 2], 'b': [3, 3, 4, 4, 4],
                           'c': [np.nan, np.nan, np.nan, np.nan, np.nan]})

        empty = pd.DataFrame([[0.0, 0.0], [0.0, 0.0]],
                             index=pd.Index([1, 2],
                                            name='a',
                                            dtype='int64'),
                             columns=pd.Index([3, 4], name='b'))

        for i in [True, 'index', 'columns']:
            calculated = pd.crosstab(df.a, df.b, values=df.c, aggfunc='count',
                                     normalize=i)
            tm.assert_frame_equal(empty, calculated)

        nans = pd.DataFrame([[0.0, np.nan], [0.0, 0.0]],
                            index=pd.Index([1, 2],
                                           name='a',
                                           dtype='int64'),
                            columns=pd.Index([3, 4], name='b'))

        calculated = pd.crosstab(df.a, df.b, values=df.c, aggfunc='count',
                                 normalize=False)
        tm.assert_frame_equal(nans, calculated)

Source File: contingency_tables.py From Splunking-Crime with GNU Affero General Public License v3.0

6 votes

def from_data(cls, data, shift_zeros=True):
        """
        Construct a Table object from data.

        Parameters
        ----------
        data : array-like
            The raw data, from which a contingency table is constructed
            using the first two columns.
        shift_zeros : boolean
            If True and any cell count is zero, add 0.5 to all values
            in the table.

        Returns
        -------
        A Table instance.
        """

        if isinstance(data, pd.DataFrame):
            table = pd.crosstab(data.iloc[:, 0], data.iloc[:, 1])
        else:
            table = pd.crosstab(data[:, 0], data[:, 1])

        return cls(table, shift_zeros)

Source File: contingency_tables.py From Splunking-Crime with GNU Affero General Public License v3.0

6 votes

def from_data(cls, data, shift_zeros=True):
        """
        Construct a Table object from data.

        Parameters
        ----------
        data : array-like
            The raw data, the first column defines the rows and the
            second column defines the columns.
        shift_zeros : boolean
            If True, and if there are any zeros in the contingency
            table, add 0.5 to all four cells of the table.
        """

        if isinstance(data, pd.DataFrame):
            table = pd.crosstab(data.iloc[:, 0], data.iloc[:, 1])
        else:
            table = pd.crosstab(data[:, 0], data[:, 1])
        return cls(table, shift_zeros)

Source File: test_pivot.py From predictive-maintenance-using-machine-learning with Apache License 2.0

6 votes

def test_crosstab_ndarray(self):
        a = np.random.randint(0, 5, size=100)
        b = np.random.randint(0, 3, size=100)
        c = np.random.randint(0, 10, size=100)

        df = DataFrame({'a': a, 'b': b, 'c': c})

        result = crosstab(a, [b, c], rownames=['a'], colnames=('b', 'c'))
        expected = crosstab(df['a'], [df['b'], df['c']])
        tm.assert_frame_equal(result, expected)

        result = crosstab([b, c], a, colnames=['a'], rownames=('b', 'c'))
        expected = crosstab([df['b'], df['c']], df['a'])
        tm.assert_frame_equal(result, expected)

        # assign arbitrary names
        result = crosstab(self.df['A'].values, self.df['C'].values)
        assert result.index.name == 'row_0'
        assert result.columns.name == 'col_0'

Source File: evaluate.py From toad with MIT License

6 votes

def crosstab_data(columns_var, row_var, data,unique_num,*args):
    columns_data, columns_target, columns_bins = merger_data(data, columns_var, unique_num,args[0])
    row_data, row_target, row_bins = merger_data(data, row_var, unique_num,args[1])
    result = pd.crosstab(row_data, columns_data, margins=True, dropna=False)
    if columns_bins is not None:
        columns = result.columns.tolist()
        columns.remove('All')
        columns_bins_list = rename_columns(columns, columns_bins, args[2])
        columns_bins_list.append('All')
        result.set_axis(columns_bins_list, axis=1, inplace=True)
    if row_bins is not None:
        index = result.index.tolist()
        index.remove('All')
        index_bins_list = rename_columns(index, row_bins, args[3])
        index_bins_list.append('All')
        result.set_axis(index_bins_list, axis=0, inplace=True)
    return result


# 写入所有高iv的变量分组和图到excel

Source File: crosstabs.py From audit-ai with MIT License

6 votes

def crosstab_df(labels, decisions):
    """
    Parameters
    ------------
    labels : array_like
        containing categorical values like ['M', 'F']
    decisions : array_like
        containing boolean / binary values

    Returns
    --------
    crosstab : 2x2 array
        in the form,
                    False True
        TopGroup       5    4
        BottomGroup    3    4
        so, crosstab = array([[5, 4], [3, 4]])
    """
    labels, decisions = pd.Series(labels), pd.Series(decisions)
    # rows are label values (e.g. ['F', 'M'])
    # columns are decision values (e.g. [False, True])
    ctab = pd.crosstab(labels, decisions)
    return ctab

Source File: test_pivot.py From recruit with Apache License 2.0

6 votes

def test_crosstab_ndarray(self):
        a = np.random.randint(0, 5, size=100)
        b = np.random.randint(0, 3, size=100)
        c = np.random.randint(0, 10, size=100)

        df = DataFrame({'a': a, 'b': b, 'c': c})

        result = crosstab(a, [b, c], rownames=['a'], colnames=('b', 'c'))
        expected = crosstab(df['a'], [df['b'], df['c']])
        tm.assert_frame_equal(result, expected)

        result = crosstab([b, c], a, colnames=['a'], rownames=('b', 'c'))
        expected = crosstab([df['b'], df['c']], df['a'])
        tm.assert_frame_equal(result, expected)

        # assign arbitrary names
        result = crosstab(self.df['A'].values, self.df['C'].values)
        assert result.index.name == 'row_0'
        assert result.columns.name == 'col_0'

Source File: test_pivot.py From vnpy_crypto with MIT License

6 votes

def test_crosstab_with_empties(self):
        # Check handling of empties
        df = pd.DataFrame({'a': [1, 2, 2, 2, 2], 'b': [3, 3, 4, 4, 4],
                           'c': [np.nan, np.nan, np.nan, np.nan, np.nan]})

        empty = pd.DataFrame([[0.0, 0.0], [0.0, 0.0]],
                             index=pd.Index([1, 2],
                                            name='a',
                                            dtype='int64'),
                             columns=pd.Index([3, 4], name='b'))

        for i in [True, 'index', 'columns']:
            calculated = pd.crosstab(df.a, df.b, values=df.c, aggfunc='count',
                                     normalize=i)
            tm.assert_frame_equal(empty, calculated)

        nans = pd.DataFrame([[0.0, np.nan], [0.0, 0.0]],
                            index=pd.Index([1, 2],
                                           name='a',
                                           dtype='int64'),
                            columns=pd.Index([3, 4], name='b'))

        calculated = pd.crosstab(df.a, df.b, values=df.c, aggfunc='count',
                                 normalize=False)
        tm.assert_frame_equal(nans, calculated)

Source File: test_pivot.py From predictive-maintenance-using-machine-learning with Apache License 2.0

5 votes

def test_crosstab_with_numpy_size(self):
        # GH 4003
        df = pd.DataFrame({'A': ['one', 'one', 'two', 'three'] * 6,
                           'B': ['A', 'B', 'C'] * 8,
                           'C': ['foo', 'foo', 'foo', 'bar', 'bar', 'bar'] * 4,
                           'D': np.random.randn(24),
                           'E': np.random.randn(24)})
        result = pd.crosstab(index=[df['A'], df['B']],
                             columns=[df['C']],
                             margins=True,
                             aggfunc=np.size,
                             values=df['D'])
        expected_index = pd.MultiIndex(levels=[['All', 'one', 'three', 'two'],
                                               ['', 'A', 'B', 'C']],
                                       codes=[[1, 1, 1, 2, 2, 2, 3, 3, 3, 0],
                                              [1, 2, 3, 1, 2, 3, 1, 2, 3, 0]],
                                       names=['A', 'B'])
        expected_column = pd.Index(['bar', 'foo', 'All'],
                                   dtype='object',
                                   name='C')
        expected_data = np.array([[2., 2., 4.],
                                  [2., 2., 4.],
                                  [2., 2., 4.],
                                  [2., np.nan, 2.],
                                  [np.nan, 2., 2.],
                                  [2., np.nan, 2.],
                                  [np.nan, 2., 2.],
                                  [2., np.nan, 2.],
                                  [np.nan, 2., 2.],
                                  [12., 12., 24.]])
        expected = pd.DataFrame(expected_data,
                                index=expected_index,
                                columns=expected_column)
        tm.assert_frame_equal(result, expected)

Source File: _histograms.py From epiScanpy with BSD 3-Clause "New" or "Revised" License

5 votes

def cluster_composition(adata, cluster, condition, xlabel='cell cluster',
                        ylabel='cell count', title=None, save=False):
    """
    """
    

    contingency_table = pd.crosstab(
        adata.obs[condition],
        adata.obs[cluster],
        margins = True
    )

    counts = []
    p_part = []
    index = 0
    categories = sorted(list(set(adata.obs[cluster])))
    for n in sorted(set(adata.obs[condition])):
        #counts.append()
        p_part.append(plt.bar(categories, contingency_table.iloc[index][0:-1].values))
        index += 1

    #Plots the bar chart
    #plt.figsize(figsize=[6.4, 4.8])
    plt.legend(tuple([p[0] for p in p_part]), tuple(sorted(set(adata.obs[condition]))))
    plt.xlabel(xlabel, )
    plt.ylabel(ylabel)
    plt.title(title)
    
    
    if save!=False:
        
        if (save==True) or (save.split('.')[-1] not in ['png', 'pdf']):
            plt.savefig('cluster_composition.png', dpi=300, bbox_inches="tight")
        else:
            plt.savefig('_'.join(['cluster_composition',save]), #format=save.split('.')[-1],
                        dpi=300, bbox_inches="tight")
            
    plt.show()

Source File: random_forest.py From Speculator with MIT License

5 votes

def confusion_matrix(self, actual, preds):
        """ Confusion matrix of actual set to predicted set """
        return crosstab(actual, preds, rownames=['(A)'], colnames=['(P)'])

Source File: NaiveBayes.py From statistical_learning with Apache License 2.0

5 votes

def __init__(self, data, lam=0):
        df = pd.DataFrame(data)
        dim = df.shape[1]
        self.y_p = df[dim-1].groupby(df[dim-1]).count()+lam
        self.y_p /= self.y_p.sum()
        self.cb = []
        for i in range(dim-1):
            xi_p = pd.crosstab(df[i], df[dim-1])+lam
            self.cb.append(xi_p/xi_p.sum())

Source File: test_pivot.py From predictive-maintenance-using-machine-learning with Apache License 2.0

5 votes

def test_crosstab_unsorted_order(self):
        df = pd.DataFrame({"b": [3, 1, 2], 'a': [5, 4, 6]},
                          index=['C', 'A', 'B'])
        result = pd.crosstab(df.index, [df.b, df.a])
        e_idx = pd.Index(['A', 'B', 'C'], name='row_0')
        e_columns = pd.MultiIndex.from_tuples([(1, 4), (2, 6), (3, 5)],
                                              names=['b', 'a'])
        expected = pd.DataFrame([[1, 0, 0], [0, 1, 0], [0, 0, 1]],
                                index=e_idx,
                                columns=e_columns)
        tm.assert_frame_equal(result, expected)

Source File: test_pivot.py From predictive-maintenance-using-machine-learning with Apache License 2.0

5 votes

def test_crosstab_tuple_name(self, names):
        s1 = pd.Series(range(3), name=names[0])
        s2 = pd.Series(range(1, 4), name=names[1])

        mi = pd.MultiIndex.from_arrays([range(3), range(1, 4)], names=names)
        expected = pd.Series(1, index=mi).unstack(1, fill_value=0)

        result = pd.crosstab(s1, s2)
        tm.assert_frame_equal(result, expected)

Source File: test_pivot.py From predictive-maintenance-using-machine-learning with Apache License 2.0

5 votes

def test_crosstab_dup_index_names(self):
        # GH 13279
        s = pd.Series(range(3), name='foo')

        result = pd.crosstab(s, s)
        expected_index = pd.Index(range(3), name='foo')
        expected = pd.DataFrame(np.eye(3, dtype=np.int64),
                                index=expected_index,
                                columns=expected_index)
        tm.assert_frame_equal(result, expected)

Source File: metrics.py From reportgen with MIT License

5 votes

def info_value(X,y,bins='auto'):
    '''计算连续变量的IV值
    计算X和y之间的IV值
    IV=\sum (g_k/n_g-b_k/n_b)*log2(g_k*n_b/n_g/)
    '''
    threshold=[]
    for q in [0.05,0.04,0.03,0.02,0.01,1e-7]:
         t_down=max([X[y==k].quantile(q) for k in y.dropna().unique()])
         t_up=min([X[y==k].quantile(1-q) for k in y.dropna().unique()])
         threshold.append((t_down,t_up))

    if bins is not None:
        X=pd.cut(X,bins)
    ctable=pd.crosstab(X,y)
    p=ctable.sum()/ctable.sum().sum()
    if ctable.shape[1]==2:
        ctable=ctable/ctable.sum()
        IV=((ctable.iloc[:,0]-ctable.iloc[:,1])*np.log2(ctable.iloc[:,0]/ctable.iloc[:,1])).sum()
        return IV

    IV=0
    for cc in ctable.columns:
        ctable_bin=pd.concat([ctable[cc],ctable.loc[:,~(ctable.columns==cc)].sum(axis=1)],axis=1)
        ctable_bin=ctable_bin/ctable_bin.sum()
        IV_bin=((ctable_bin.iloc[:,0]-ctable_bin.iloc[:,1])*np.log2(ctable_bin.iloc[:,0]/ctable_bin.iloc[:,1])).sum()
        IV+=IV_bin*p[cc]
    return IV



# 计算离散随机变量的熵

Source File: test_pivot.py From predictive-maintenance-using-machine-learning with Apache License 2.0

5 votes

def test_crosstab_no_overlap(self):
        # GS 10291

        s1 = pd.Series([1, 2, 3], index=[1, 2, 3])
        s2 = pd.Series([4, 5, 6], index=[4, 5, 6])

        actual = crosstab(s1, s2)
        expected = pd.DataFrame()

        tm.assert_frame_equal(actual, expected)

Source File: test_pivot.py From predictive-maintenance-using-machine-learning with Apache License 2.0

5 votes

def test_crosstab_dropna(self):
        # GH 3820
        a = np.array(['foo', 'foo', 'foo', 'bar',
                      'bar', 'foo', 'foo'], dtype=object)
        b = np.array(['one', 'one', 'two', 'one',
                      'two', 'two', 'two'], dtype=object)
        c = np.array(['dull', 'dull', 'dull', 'dull',
                      'dull', 'shiny', 'shiny'], dtype=object)
        res = pd.crosstab(a, [b, c], rownames=['a'],
                          colnames=['b', 'c'], dropna=False)
        m = MultiIndex.from_tuples([('one', 'dull'), ('one', 'shiny'),
                                    ('two', 'dull'), ('two', 'shiny')],
                                   names=['b', 'c'])
        tm.assert_index_equal(res.columns, m)

Python pandas.crosstab() Examples