Python Examples of pandas.MultiIndex.from

Source File: test_excel.py From recruit with Apache License 2.0

10 votes

def test_to_excel_multiindex(self, merge_cells, engine, ext):
        frame = self.frame
        arrays = np.arange(len(frame.index) * 2).reshape(2, -1)
        new_index = MultiIndex.from_arrays(arrays,
                                           names=['first', 'second'])
        frame.index = new_index

        frame.to_excel(self.path, 'test1', header=False)
        frame.to_excel(self.path, 'test1', columns=['A', 'B'])

        # round trip
        frame.to_excel(self.path, 'test1', merge_cells=merge_cells)
        reader = ExcelFile(self.path)
        df = read_excel(reader, 'test1', index_col=[0, 1])
        tm.assert_frame_equal(frame, df)

    # GH13511

Source File: test_duplicates.py From recruit with Apache License 2.0

6 votes

def test_unique_level(idx, level):
    # GH #17896 - with level= argument
    result = idx.unique(level=level)
    expected = idx.get_level_values(level).unique()
    tm.assert_index_equal(result, expected)

    # With already unique level
    mi = MultiIndex.from_arrays([[1, 3, 2, 4], [1, 3, 2, 5]],
                                names=['first', 'second'])
    result = mi.unique(level=level)
    expected = mi.get_level_values(level)
    tm.assert_index_equal(result, expected)

    # With empty MI
    mi = MultiIndex.from_arrays([[], []], names=['first', 'second'])
    result = mi.unique(level=level)
    expected = mi.get_level_values(level)

Source File: test_excel.py From recruit with Apache License 2.0

6 votes

def test_to_excel_multiindex_cols(self, merge_cells, engine, ext):
        frame = self.frame
        arrays = np.arange(len(frame.index) * 2).reshape(2, -1)
        new_index = MultiIndex.from_arrays(arrays,
                                           names=['first', 'second'])
        frame.index = new_index

        new_cols_index = MultiIndex.from_tuples([(40, 1), (40, 2),
                                                 (50, 1), (50, 2)])
        frame.columns = new_cols_index
        header = [0, 1]
        if not merge_cells:
            header = 0

        # round trip
        frame.to_excel(self.path, 'test1', merge_cells=merge_cells)
        reader = ExcelFile(self.path)
        df = read_excel(reader, 'test1', header=header,
                        index_col=[0, 1])
        if not merge_cells:
            fm = frame.columns.format(sparsify=False,
                                      adjoin=False, names=False)
            frame.columns = [".".join(map(str, q)) for q in zip(*fm)]
        tm.assert_frame_equal(frame, df)

Source File: test_compare.py From recordlinkage with BSD 3-Clause "New" or "Revised" License

6 votes

def test_dates(self):

        A = DataFrame({
            'col':
            to_datetime(
                ['2005/11/23', nan, '2004/11/23', '2010/01/10', '2010/10/30'])
        })
        B = DataFrame({
            'col':
            to_datetime([
                '2005/11/23', '2010/12/31', '2005/11/23', '2010/10/01',
                '2010/9/30'
            ])
        })
        ix = MultiIndex.from_arrays([A.index.values, B.index.values])

        comp = recordlinkage.Compare()
        comp.date('col', 'col')
        result = comp.compute(ix, A, B)[0]

        expected = Series([1, 0, 0, 0.5, 0.5], index=ix, name=0)

        pdt.assert_series_equal(result, expected)

Source File: test_compare.py From recordlinkage with BSD 3-Clause "New" or "Revised" License

6 votes

def test_numeric(self):

        A = DataFrame({'col': [1, 1, 1, nan, 0]})
        B = DataFrame({'col': [1, 2, 3, nan, nan]})
        ix = MultiIndex.from_arrays([A.index.values, B.index.values])

        comp = recordlinkage.Compare()
        comp.numeric('col', 'col', 'step', offset=2)
        comp.numeric('col', 'col', method='step', offset=2)
        comp.numeric('col', 'col', 'step', 2)
        result = comp.compute(ix, A, B)

        # Basics
        expected = Series([1.0, 1.0, 1.0, 0.0, 0.0], index=ix, name=0)
        pdt.assert_series_equal(result[0], expected)

        # Basics
        expected = Series([1.0, 1.0, 1.0, 0.0, 0.0], index=ix, name=1)
        pdt.assert_series_equal(result[1], expected)

        # Basics
        expected = Series([1.0, 1.0, 1.0, 0.0, 0.0], index=ix, name=2)
        pdt.assert_series_equal(result[2], expected)

Source File: test_compare.py From recordlinkage with BSD 3-Clause "New" or "Revised" License

6 votes

def test_feature_multicolumn_input(self):
        # test using classes and the base class

        A = DataFrame({
            'col1': ['abc', 'abc', 'abc', 'abc', 'abc'],
            'col2': ['abc', 'abc', 'abc', 'abc', 'abc']
        })
        B = DataFrame({
            'col1': ['abc', 'abd', 'abc', 'abc', '123'],
            'col2': ['abc', 'abd', 'abc', 'abc', '123']
        })
        ix = MultiIndex.from_arrays([A.index.values, B.index.values])

        feature = BaseCompareFeature(['col1', 'col2'], ['col1', 'col2'])
        feature._f_compare_vectorized = \
            lambda s1_1, s1_2, s2_1, s2_2: np.ones(len(s1_1))
        feature.compute(ix, A, B)

Source File: pivot.py From recruit with Apache License 2.0

6 votes

def pivot(data, index=None, columns=None, values=None):
    if values is None:
        cols = [columns] if index is None else [index, columns]
        append = index is None
        indexed = data.set_index(cols, append=append)
    else:
        if index is None:
            index = data.index
        else:
            index = data[index]
        index = MultiIndex.from_arrays([index, data[columns]])

        if is_list_like(values) and not isinstance(values, tuple):
            # Exclude tuple because it is seen as a single column name
            indexed = data._constructor(data[values].values, index=index,
                                        columns=values)
        else:
            indexed = data._constructor_sliced(data[values].values,
                                               index=index)
    return indexed.unstack(columns)

Source File: test_compare.py From recordlinkage with BSD 3-Clause "New" or "Revised" License

6 votes

def test_pass_list_of_features(self):

        from recordlinkage.compare import FrequencyA, VariableA, VariableB

        # setup datasets and record pairs
        A = DataFrame({'col': ['abc', 'abc', 'abc', 'abc', 'abc']})
        B = DataFrame({'col': ['abc', 'abc', 'abc', 'abc', 'abc']})
        ix = MultiIndex.from_arrays([np.arange(5), np.arange(5)])

        # test with label indexing type

        features = [
            VariableA('col', label='y1'),
            VariableB('col', label='y2'),
            FrequencyA('col', label='y3')
        ]
        comp_label = recordlinkage.Compare(features=features)
        result_label = comp_label.compute(ix, A, B)

        assert list(result_label) == ["y1", "y2", "y3"]

Source File: test_compare.py From recordlinkage with BSD 3-Clause "New" or "Revised" License

6 votes

def test_indexing_types(self):
        # test the two types of indexing

        # this test needs improvement

        A = DataFrame({'col': ['abc', 'abc', 'abc', 'abc', 'abc']})
        B = DataFrame({'col': ['abc', 'abc', 'abc', 'abc', 'abc']})
        B_reversed = B[::-1].copy()
        ix = MultiIndex.from_arrays([np.arange(5), np.arange(5)])

        # test with label indexing type
        comp_label = recordlinkage.Compare(indexing_type='label')
        comp_label.exact('col', 'col')
        result_label = comp_label.compute(ix, A, B_reversed)

        # test with position indexing type
        comp_position = recordlinkage.Compare(indexing_type='position')
        comp_position.exact('col', 'col')
        result_position = comp_position.compute(ix, A, B_reversed)

        assert (result_position.values == 1).all(axis=0)

        pdt.assert_frame_equal(result_label, result_position)

Source File: test_compare.py From recordlinkage with BSD 3-Clause "New" or "Revised" License

6 votes

def test_compare_custom_vectorized_dedup(self):

        A = DataFrame({'col': ['abc', 'abc', 'abc', 'abc', 'abc']})
        ix = MultiIndex.from_arrays([[0, 1, 2, 3, 4], [1, 2, 3, 4, 0]])

        # test without label
        comp = recordlinkage.Compare()
        comp.compare_vectorized(lambda s1, s2: np.ones(len(s1), dtype=np.int),
                                'col', 'col')
        result = comp.compute(ix, A)
        expected = DataFrame([1, 1, 1, 1, 1], index=ix)
        pdt.assert_frame_equal(result, expected)

        # test with label
        comp = recordlinkage.Compare()
        comp.compare_vectorized(
            lambda s1, s2: np.ones(len(s1), dtype=np.int),
            'col',
            'col',
            label='test')
        result = comp.compute(ix, A)
        expected = DataFrame([1, 1, 1, 1, 1], index=ix, columns=['test'])
        pdt.assert_frame_equal(result, expected)

Source File: test_ix.py From vnpy_crypto with MIT License

6 votes

def test_ix_general(self):

        # ix general issues

        # GH 2817
        data = {'amount': {0: 700, 1: 600, 2: 222, 3: 333, 4: 444},
                'col': {0: 3.5, 1: 3.5, 2: 4.0, 3: 4.0, 4: 4.0},
                'year': {0: 2012, 1: 2011, 2: 2012, 3: 2012, 4: 2012}}
        df = DataFrame(data).set_index(keys=['col', 'year'])
        key = 4.0, 2012

        # emits a PerformanceWarning, ok
        with tm.assert_produces_warning(PerformanceWarning):
            tm.assert_frame_equal(df.loc[key], df.iloc[2:])

        # this is ok
        df.sort_index(inplace=True)
        res = df.loc[key]

        # col has float dtype, result should be Float64Index
        index = MultiIndex.from_arrays([[4.] * 3, [2012] * 3],
                                       names=['col', 'year'])
        expected = DataFrame({'amount': [222, 333, 444]}, index=index)
        tm.assert_frame_equal(res, expected)

Source File: test_compare.py From recordlinkage with BSD 3-Clause "New" or "Revised" License

6 votes

def test_compare_custom_instance_type(self):

        A = DataFrame({'col': ['abc', 'abc', 'abc', 'abc', 'abc']})
        B = DataFrame({'col': ['abc', 'abd', 'abc', 'abc', '123']})
        ix = MultiIndex.from_arrays([A.index.values, B.index.values])

        def call(s1, s2):

            # this should raise on incorrect types
            assert isinstance(s1, np.ndarray)
            assert isinstance(s2, np.ndarray)

            return np.ones(len(s1), dtype=np.int)

        comp = recordlinkage.Compare()
        comp.compare_vectorized(lambda s1, s2: np.ones(len(s1), dtype=np.int),
                                'col', 'col')
        result = comp.compute(ix, A, B)
        expected = DataFrame([1, 1, 1, 1, 1], index=ix)
        pdt.assert_frame_equal(result, expected)

Source File: test_compare.py From recordlinkage with BSD 3-Clause "New" or "Revised" License

6 votes

def test_date_incorrect_dtype(self):

        A = DataFrame({
            'col':
            ['2005/11/23', nan, '2004/11/23', '2010/01/10', '2010/10/30']
        })
        B = DataFrame({
            'col': [
                '2005/11/23', '2010/12/31', '2005/11/23', '2010/10/01',
                '2010/9/30'
            ]
        })
        ix = MultiIndex.from_arrays([A.index.values, B.index.values])

        A['col1'] = to_datetime(A['col'])
        B['col1'] = to_datetime(B['col'])

        comp = recordlinkage.Compare()
        comp.date('col', 'col1')
        pytest.raises(ValueError, comp.compute, ix, A, B)

        comp = recordlinkage.Compare()
        comp.date('col1', 'col')
        pytest.raises(ValueError, comp.compute, ix, A, B)

Source File: test_compare.py From recordlinkage with BSD 3-Clause "New" or "Revised" License

6 votes

def test_geo(self):

        # Utrecht, Amsterdam, Rotterdam (Cities in The Netherlands)
        A = DataFrame({
            'lat': [52.0842455, 52.3747388, 51.9280573],
            'lng': [5.0124516, 4.7585305, 4.4203581]
        })
        B = DataFrame({
            'lat': [52.3747388, 51.9280573, 52.0842455],
            'lng': [4.7585305, 4.4203581, 5.0124516]
        })
        ix = MultiIndex.from_arrays([A.index.values, B.index.values])

        comp = recordlinkage.Compare()
        comp.geo(
            'lat', 'lng', 'lat', 'lng', method='step',
            offset=50)  # 50 km range
        result = comp.compute(ix, A, B)

        # Missing values as default [36.639460, 54.765854, 44.092472]
        expected = Series([1.0, 0.0, 1.0], index=ix, name=0)
        pdt.assert_series_equal(result[0], expected)

Source File: test_compare.py From recordlinkage with BSD 3-Clause "New" or "Revised" License

6 votes

def test_defaults(self):

        # default algorithm is levenshtein algorithm
        # test default values are indentical to levenshtein

        A = DataFrame({
            'col': [u'str_abc', u'str_abc', u'str_abc', nan, u'hsdkf']
        })
        B = DataFrame({'col': [u'str_abc', u'str_abd', u'jaskdfsd', nan, nan]})
        ix = MultiIndex.from_arrays([A.index.values, B.index.values])

        comp = recordlinkage.Compare()
        comp.string('col', 'col', label='default')
        comp.string('col', 'col', method='levenshtein', label='with_args')
        result = comp.compute(ix, A, B)

        pdt.assert_series_equal(
            result['default'].rename(None),
            result['with_args'].rename(None)
        )

Source File: test_compare.py From recordlinkage with BSD 3-Clause "New" or "Revised" License

6 votes

def test_fuzzy(self):

        A = DataFrame({
            'col': [u'str_abc', u'str_abc', u'str_abc', nan, u'hsdkf']
        })
        B = DataFrame({'col': [u'str_abc', u'str_abd', u'jaskdfsd', nan, nan]})
        ix = MultiIndex.from_arrays([A.index.values, B.index.values])

        comp = recordlinkage.Compare()
        comp.string('col', 'col', method='jaro', missing_value=0)
        comp.string('col', 'col', method='q_gram', missing_value=0)
        comp.string('col', 'col', method='cosine', missing_value=0)
        comp.string('col', 'col', method='jaro_winkler', missing_value=0)
        comp.string('col', 'col', method='dameraulevenshtein', missing_value=0)
        comp.string('col', 'col', method='levenshtein', missing_value=0)
        result = comp.compute(ix, A, B)

        print(result)

        assert result.notnull().all(1).all(0)
        assert (result[result.notnull()] >= 0).all(1).all(0)
        assert (result[result.notnull()] <= 1).all(1).all(0)

Source File: test_apply.py From recruit with Apache License 2.0

6 votes

def test_groupby_apply_none_first():
    # GH 12824. Tests if apply returns None first.
    test_df1 = DataFrame({'groups': [1, 1, 1, 2], 'vars': [0, 1, 2, 3]})
    test_df2 = DataFrame({'groups': [1, 2, 2, 2], 'vars': [0, 1, 2, 3]})

    def test_func(x):
        if x.shape[0] < 2:
            return None
        return x.iloc[[0, -1]]

    result1 = test_df1.groupby('groups').apply(test_func)
    result2 = test_df2.groupby('groups').apply(test_func)
    index1 = MultiIndex.from_arrays([[1, 1], [0, 2]],
                                    names=['groups', None])
    index2 = MultiIndex.from_arrays([[2, 2], [1, 3]],
                                    names=['groups', None])
    expected1 = DataFrame({'groups': [1, 1], 'vars': [0, 2]},
                          index=index1)
    expected2 = DataFrame({'groups': [2, 2], 'vars': [1, 3]},
                          index=index2)
    tm.assert_frame_equal(result1, expected1)
    tm.assert_frame_equal(result2, expected2)

Source File: test_apply.py From recruit with Apache License 2.0

6 votes

def test_apply_multiindex_fail():
    index = MultiIndex.from_arrays([[0, 0, 0, 1, 1, 1], [1, 2, 3, 1, 2, 3]
                                    ])
    df = DataFrame({'d': [1., 1., 1., 2., 2., 2.],
                    'c': np.tile(['a', 'b', 'c'], 2),
                    'v': np.arange(1., 7.)}, index=index)

    def f(group):
        v = group['v']
        group['v2'] = (v - v.min()) / (v.max() - v.min())
        return group

    result = df.groupby('d').apply(f)

    expected = df.copy()
    expected['v2'] = np.tile([0., 0.5, 1], 2)

    tm.assert_frame_equal(result, expected)

Source File: test_value_counts.py From recruit with Apache License 2.0

6 votes

def test_series_groupby_value_counts(df, keys, bins, n, m):

    def rebuild_index(df):
        arr = list(map(df.index.get_level_values, range(df.index.nlevels)))
        df.index = MultiIndex.from_arrays(arr, names=df.index.names)
        return df

    for isort, normalize, sort, ascending, dropna \
            in product((False, True), repeat=5):

        kwargs = dict(normalize=normalize, sort=sort,
                      ascending=ascending, dropna=dropna, bins=bins)

        gr = df.groupby(keys, sort=isort)
        left = gr['3rd'].value_counts(**kwargs)

        gr = df.groupby(keys, sort=isort)
        right = gr['3rd'].apply(Series.value_counts, **kwargs)
        right.index.names = right.index.names[:-1] + ['3rd']

        # have to sort on index because of unstable sort on values
        left, right = map(rebuild_index, (left, right))  # xref GH9212
        tm.assert_series_equal(left.sort_index(), right.sort_index())

Source File: test_nth.py From recruit with Apache License 2.0

6 votes

def test_nth_multi_index_as_expected():
    # PR 9090, related to issue 8979
    # test nth on MultiIndex
    three_group = DataFrame(
        {'A': ['foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar',
               'foo', 'foo', 'foo'],
         'B': ['one', 'one', 'one', 'two', 'one', 'one', 'one', 'two',
               'two', 'two', 'one'],
         'C': ['dull', 'dull', 'shiny', 'dull', 'dull', 'shiny', 'shiny',
               'dull', 'shiny', 'shiny', 'shiny']})
    grouped = three_group.groupby(['A', 'B'])
    result = grouped.nth(0)
    expected = DataFrame(
        {'C': ['dull', 'dull', 'dull', 'dull']},
        index=MultiIndex.from_arrays([['bar', 'bar', 'foo', 'foo'],
                                      ['one', 'two', 'one', 'two']],
                                     names=['A', 'B']))
    assert_frame_equal(result, expected)

Source File: test_transform.py From recruit with Apache License 2.0

6 votes

def test_transform_mixed_type():
    index = MultiIndex.from_arrays([[0, 0, 0, 1, 1, 1], [1, 2, 3, 1, 2, 3]
                                    ])
    df = DataFrame({'d': [1., 1., 1., 2., 2., 2.],
                    'c': np.tile(['a', 'b', 'c'], 2),
                    'v': np.arange(1., 7.)}, index=index)

    def f(group):
        group['g'] = group['d'] * 2
        return group[:1]

    grouped = df.groupby('c')
    result = grouped.apply(f)

    assert result['d'].dtype == np.float64

    # this is by definition a mutating operation!
    with option_context('mode.chained_assignment', None):
        for key, group in grouped:
            res = f(group)
            assert_frame_equal(res, result.loc[key])

Source File: test_compare.py From recordlinkage with BSD 3-Clause "New" or "Revised" License

6 votes

def test_freq_nan(self, missing_value):

        # data
        array_repeated = np.repeat(np.arange(10, dtype=np.float64), 10)
        array_repeated[90:] = np.nan
        array_tiled = np.tile(np.arange(20, dtype=np.float64), 5)

        # convert to pandas data
        A = DataFrame({'col': array_repeated})
        B = DataFrame({'col': array_tiled})
        ix = MultiIndex.from_arrays([A.index.values, B.index.values])

        # the part to test
        from recordlinkage.compare import Frequency

        comp = recordlinkage.Compare()
        comp.add(Frequency(left_on='col', missing_value=missing_value))
        result = comp.compute(ix, A, B)

        expected_np = np.ones((100, )) / 10
        expected_np[90:] = missing_value
        expected = DataFrame(expected_np, index=ix)
        pdt.assert_frame_equal(result, expected)

Source File: numpy_records.py From arctic with GNU Lesser General Public License v2.1

6 votes

def _index_from_records(self, recarr):
        index = recarr.dtype.metadata['index']

        if len(index) == 1:
            rtn = Index(np.copy(recarr[str(index[0])]), name=index[0])
            if isinstance(rtn, DatetimeIndex) and 'index_tz' in recarr.dtype.metadata:
                rtn = rtn.tz_localize('UTC').tz_convert(recarr.dtype.metadata['index_tz'])
        else:
            level_arrays = []
            index_tz = recarr.dtype.metadata.get('index_tz', [])
            for level_no, index_name in enumerate(index):
                # build each index level separately to ensure we end up with the right index dtype
                level = Index(np.copy(recarr[str(index_name)]))
                if level_no < len(index_tz):
                    tz = index_tz[level_no]
                    if tz is not None:
                        if not isinstance(level, DatetimeIndex) and len(level) == 0:
                            # index type information got lost during save as the index was empty, cast back
                            level = DatetimeIndex([], tz=tz)
                        else:
                            level = level.tz_localize('UTC').tz_convert(tz)
                level_arrays.append(level)
            rtn = MultiIndex.from_arrays(level_arrays, names=index)
        return rtn

Source File: test_ix.py From recruit with Apache License 2.0

6 votes

def test_ix_general(self):

        # ix general issues

        # GH 2817
        data = {'amount': {0: 700, 1: 600, 2: 222, 3: 333, 4: 444},
                'col': {0: 3.5, 1: 3.5, 2: 4.0, 3: 4.0, 4: 4.0},
                'year': {0: 2012, 1: 2011, 2: 2012, 3: 2012, 4: 2012}}
        df = DataFrame(data).set_index(keys=['col', 'year'])
        key = 4.0, 2012

        # emits a PerformanceWarning, ok
        with tm.assert_produces_warning(PerformanceWarning):
            tm.assert_frame_equal(df.loc[key], df.iloc[2:])

        # this is ok
        df.sort_index(inplace=True)
        res = df.loc[key]

        # col has float dtype, result should be Float64Index
        index = MultiIndex.from_arrays([[4.] * 3, [2012] * 3],
                                       names=['col', 'year'])
        expected = DataFrame({'amount': [222, 333, 444]}, index=index)
        tm.assert_frame_equal(res, expected)

Source File: test_contains.py From recruit with Apache License 2.0

6 votes

def test_isin():
    values = [('foo', 2), ('bar', 3), ('quux', 4)]

    idx = MultiIndex.from_arrays([
        ['qux', 'baz', 'foo', 'bar'],
        np.arange(4)
    ])
    result = idx.isin(values)
    expected = np.array([False, False, True, True])
    tm.assert_numpy_array_equal(result, expected)

    # empty, return dtype bool
    idx = MultiIndex.from_arrays([[], []])
    result = idx.isin(values)
    assert len(result) == 0
    assert result.dtype == np.bool_

Source File: test_get_set.py From recruit with Apache License 2.0

6 votes

def test_get_level_values_na():
    arrays = [[np.nan, np.nan, np.nan], ['a', np.nan, 1]]
    index = pd.MultiIndex.from_arrays(arrays)
    result = index.get_level_values(0)
    expected = pd.Index([np.nan, np.nan, np.nan])
    tm.assert_index_equal(result, expected)

    result = index.get_level_values(1)
    expected = pd.Index(['a', np.nan, 1])
    tm.assert_index_equal(result, expected)

    arrays = [['a', 'b', 'b'], pd.DatetimeIndex([0, 1, pd.NaT])]
    index = pd.MultiIndex.from_arrays(arrays)
    result = index.get_level_values(1)
    expected = pd.DatetimeIndex([0, 1, pd.NaT])
    tm.assert_index_equal(result, expected)

    arrays = [[], []]
    index = pd.MultiIndex.from_arrays(arrays)
    result = index.get_level_values(0)
    expected = pd.Index([], dtype=object)
    tm.assert_index_equal(result, expected)

Source File: test_constructor.py From recruit with Apache License 2.0

6 votes

def test_from_arrays_empty():
    # 0 levels
    msg = "Must pass non-zero number of levels/codes"
    with pytest.raises(ValueError, match=msg):
        MultiIndex.from_arrays(arrays=[])

    # 1 level
    result = MultiIndex.from_arrays(arrays=[[]], names=['A'])
    assert isinstance(result, MultiIndex)
    expected = Index([], name='A')
    tm.assert_index_equal(result.levels[0], expected)

    # N levels
    for N in [2, 3]:
        arrays = [[]] * N
        names = list('ABC')[:N]
        result = MultiIndex.from_arrays(arrays=arrays, names=names)
        expected = MultiIndex(levels=[[]] * N, codes=[[]] * N,
                              names=names)
        tm.assert_index_equal(result, expected)

Source File: test_constructor.py From recruit with Apache License 2.0

6 votes

def test_from_arrays_index_series_categorical():
    # GH13743
    idx1 = pd.CategoricalIndex(list("abcaab"), categories=list("bac"),
                               ordered=False)
    idx2 = pd.CategoricalIndex(list("abcaab"), categories=list("bac"),
                               ordered=True)

    result = pd.MultiIndex.from_arrays([idx1, idx2])
    tm.assert_index_equal(result.get_level_values(0), idx1)
    tm.assert_index_equal(result.get_level_values(1), idx2)

    result2 = pd.MultiIndex.from_arrays([pd.Series(idx1), pd.Series(idx2)])
    tm.assert_index_equal(result2.get_level_values(0), idx1)
    tm.assert_index_equal(result2.get_level_values(1), idx2)

    result3 = pd.MultiIndex.from_arrays([idx1.values, idx2.values])
    tm.assert_index_equal(result3.get_level_values(0), idx1)
    tm.assert_index_equal(result3.get_level_values(1), idx2)

Source File: test_transform.py From vnpy_crypto with MIT License

6 votes

def test_transform_mixed_type():
    index = MultiIndex.from_arrays([[0, 0, 0, 1, 1, 1], [1, 2, 3, 1, 2, 3]
                                    ])
    df = DataFrame({'d': [1., 1., 1., 2., 2., 2.],
                    'c': np.tile(['a', 'b', 'c'], 2),
                    'v': np.arange(1., 7.)}, index=index)

    def f(group):
        group['g'] = group['d'] * 2
        return group[:1]

    grouped = df.groupby('c')
    result = grouped.apply(f)

    assert result['d'].dtype == np.float64

    # this is by definition a mutating operation!
    with option_context('mode.chained_assignment', None):
        for key, group in grouped:
            res = f(group)
            assert_frame_equal(res, result.loc[key])

Source File: test_constructor.py From recruit with Apache License 2.0

5 votes

def test_from_arrays_index_series_period():
    idx1 = pd.period_range('2011-01-01', freq='D', periods=3)
    idx2 = pd.period_range('2015-01-01', freq='H', periods=3)
    result = pd.MultiIndex.from_arrays([idx1, idx2])
    tm.assert_index_equal(result.get_level_values(0), idx1)
    tm.assert_index_equal(result.get_level_values(1), idx2)

    result2 = pd.MultiIndex.from_arrays([pd.Series(idx1), pd.Series(idx2)])
    tm.assert_index_equal(result2.get_level_values(0), idx1)
    tm.assert_index_equal(result2.get_level_values(1), idx2)

    tm.assert_index_equal(result, result2)

Python pandas.MultiIndex.from_arrays() Examples