Python pandas.MultiIndex.from_arrays() Examples

The following are 30 code examples of pandas.MultiIndex.from_arrays(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pandas.MultiIndex , or try the search function .
Example #1
Source File: test_excel.py    From recruit with Apache License 2.0 10 votes vote down vote up
def test_to_excel_multiindex(self, merge_cells, engine, ext):
        frame = self.frame
        arrays = np.arange(len(frame.index) * 2).reshape(2, -1)
        new_index = MultiIndex.from_arrays(arrays,
                                           names=['first', 'second'])
        frame.index = new_index

        frame.to_excel(self.path, 'test1', header=False)
        frame.to_excel(self.path, 'test1', columns=['A', 'B'])

        # round trip
        frame.to_excel(self.path, 'test1', merge_cells=merge_cells)
        reader = ExcelFile(self.path)
        df = read_excel(reader, 'test1', index_col=[0, 1])
        tm.assert_frame_equal(frame, df)

    # GH13511 
Example #2
Source File: test_duplicates.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_unique_level(idx, level):
    # GH #17896 - with level= argument
    result = idx.unique(level=level)
    expected = idx.get_level_values(level).unique()
    tm.assert_index_equal(result, expected)

    # With already unique level
    mi = MultiIndex.from_arrays([[1, 3, 2, 4], [1, 3, 2, 5]],
                                names=['first', 'second'])
    result = mi.unique(level=level)
    expected = mi.get_level_values(level)
    tm.assert_index_equal(result, expected)

    # With empty MI
    mi = MultiIndex.from_arrays([[], []], names=['first', 'second'])
    result = mi.unique(level=level)
    expected = mi.get_level_values(level) 
Example #3
Source File: test_excel.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_to_excel_multiindex_cols(self, merge_cells, engine, ext):
        frame = self.frame
        arrays = np.arange(len(frame.index) * 2).reshape(2, -1)
        new_index = MultiIndex.from_arrays(arrays,
                                           names=['first', 'second'])
        frame.index = new_index

        new_cols_index = MultiIndex.from_tuples([(40, 1), (40, 2),
                                                 (50, 1), (50, 2)])
        frame.columns = new_cols_index
        header = [0, 1]
        if not merge_cells:
            header = 0

        # round trip
        frame.to_excel(self.path, 'test1', merge_cells=merge_cells)
        reader = ExcelFile(self.path)
        df = read_excel(reader, 'test1', header=header,
                        index_col=[0, 1])
        if not merge_cells:
            fm = frame.columns.format(sparsify=False,
                                      adjoin=False, names=False)
            frame.columns = [".".join(map(str, q)) for q in zip(*fm)]
        tm.assert_frame_equal(frame, df) 
Example #4
Source File: test_compare.py    From recordlinkage with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_dates(self):

        A = DataFrame({
            'col':
            to_datetime(
                ['2005/11/23', nan, '2004/11/23', '2010/01/10', '2010/10/30'])
        })
        B = DataFrame({
            'col':
            to_datetime([
                '2005/11/23', '2010/12/31', '2005/11/23', '2010/10/01',
                '2010/9/30'
            ])
        })
        ix = MultiIndex.from_arrays([A.index.values, B.index.values])

        comp = recordlinkage.Compare()
        comp.date('col', 'col')
        result = comp.compute(ix, A, B)[0]

        expected = Series([1, 0, 0, 0.5, 0.5], index=ix, name=0)

        pdt.assert_series_equal(result, expected) 
Example #5
Source File: test_compare.py    From recordlinkage with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_numeric(self):

        A = DataFrame({'col': [1, 1, 1, nan, 0]})
        B = DataFrame({'col': [1, 2, 3, nan, nan]})
        ix = MultiIndex.from_arrays([A.index.values, B.index.values])

        comp = recordlinkage.Compare()
        comp.numeric('col', 'col', 'step', offset=2)
        comp.numeric('col', 'col', method='step', offset=2)
        comp.numeric('col', 'col', 'step', 2)
        result = comp.compute(ix, A, B)

        # Basics
        expected = Series([1.0, 1.0, 1.0, 0.0, 0.0], index=ix, name=0)
        pdt.assert_series_equal(result[0], expected)

        # Basics
        expected = Series([1.0, 1.0, 1.0, 0.0, 0.0], index=ix, name=1)
        pdt.assert_series_equal(result[1], expected)

        # Basics
        expected = Series([1.0, 1.0, 1.0, 0.0, 0.0], index=ix, name=2)
        pdt.assert_series_equal(result[2], expected) 
Example #6
Source File: test_compare.py    From recordlinkage with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_feature_multicolumn_input(self):
        # test using classes and the base class

        A = DataFrame({
            'col1': ['abc', 'abc', 'abc', 'abc', 'abc'],
            'col2': ['abc', 'abc', 'abc', 'abc', 'abc']
        })
        B = DataFrame({
            'col1': ['abc', 'abd', 'abc', 'abc', '123'],
            'col2': ['abc', 'abd', 'abc', 'abc', '123']
        })
        ix = MultiIndex.from_arrays([A.index.values, B.index.values])

        feature = BaseCompareFeature(['col1', 'col2'], ['col1', 'col2'])
        feature._f_compare_vectorized = \
            lambda s1_1, s1_2, s2_1, s2_2: np.ones(len(s1_1))
        feature.compute(ix, A, B) 
Example #7
Source File: pivot.py    From recruit with Apache License 2.0 6 votes vote down vote up
def pivot(data, index=None, columns=None, values=None):
    if values is None:
        cols = [columns] if index is None else [index, columns]
        append = index is None
        indexed = data.set_index(cols, append=append)
    else:
        if index is None:
            index = data.index
        else:
            index = data[index]
        index = MultiIndex.from_arrays([index, data[columns]])

        if is_list_like(values) and not isinstance(values, tuple):
            # Exclude tuple because it is seen as a single column name
            indexed = data._constructor(data[values].values, index=index,
                                        columns=values)
        else:
            indexed = data._constructor_sliced(data[values].values,
                                               index=index)
    return indexed.unstack(columns) 
Example #8
Source File: test_compare.py    From recordlinkage with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_pass_list_of_features(self):

        from recordlinkage.compare import FrequencyA, VariableA, VariableB

        # setup datasets and record pairs
        A = DataFrame({'col': ['abc', 'abc', 'abc', 'abc', 'abc']})
        B = DataFrame({'col': ['abc', 'abc', 'abc', 'abc', 'abc']})
        ix = MultiIndex.from_arrays([np.arange(5), np.arange(5)])

        # test with label indexing type

        features = [
            VariableA('col', label='y1'),
            VariableB('col', label='y2'),
            FrequencyA('col', label='y3')
        ]
        comp_label = recordlinkage.Compare(features=features)
        result_label = comp_label.compute(ix, A, B)

        assert list(result_label) == ["y1", "y2", "y3"] 
Example #9
Source File: test_compare.py    From recordlinkage with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_indexing_types(self):
        # test the two types of indexing

        # this test needs improvement

        A = DataFrame({'col': ['abc', 'abc', 'abc', 'abc', 'abc']})
        B = DataFrame({'col': ['abc', 'abc', 'abc', 'abc', 'abc']})
        B_reversed = B[::-1].copy()
        ix = MultiIndex.from_arrays([np.arange(5), np.arange(5)])

        # test with label indexing type
        comp_label = recordlinkage.Compare(indexing_type='label')
        comp_label.exact('col', 'col')
        result_label = comp_label.compute(ix, A, B_reversed)

        # test with position indexing type
        comp_position = recordlinkage.Compare(indexing_type='position')
        comp_position.exact('col', 'col')
        result_position = comp_position.compute(ix, A, B_reversed)

        assert (result_position.values == 1).all(axis=0)

        pdt.assert_frame_equal(result_label, result_position) 
Example #10
Source File: test_compare.py    From recordlinkage with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_compare_custom_vectorized_dedup(self):

        A = DataFrame({'col': ['abc', 'abc', 'abc', 'abc', 'abc']})
        ix = MultiIndex.from_arrays([[0, 1, 2, 3, 4], [1, 2, 3, 4, 0]])

        # test without label
        comp = recordlinkage.Compare()
        comp.compare_vectorized(lambda s1, s2: np.ones(len(s1), dtype=np.int),
                                'col', 'col')
        result = comp.compute(ix, A)
        expected = DataFrame([1, 1, 1, 1, 1], index=ix)
        pdt.assert_frame_equal(result, expected)

        # test with label
        comp = recordlinkage.Compare()
        comp.compare_vectorized(
            lambda s1, s2: np.ones(len(s1), dtype=np.int),
            'col',
            'col',
            label='test')
        result = comp.compute(ix, A)
        expected = DataFrame([1, 1, 1, 1, 1], index=ix, columns=['test'])
        pdt.assert_frame_equal(result, expected) 
Example #11
Source File: test_ix.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_ix_general(self):

        # ix general issues

        # GH 2817
        data = {'amount': {0: 700, 1: 600, 2: 222, 3: 333, 4: 444},
                'col': {0: 3.5, 1: 3.5, 2: 4.0, 3: 4.0, 4: 4.0},
                'year': {0: 2012, 1: 2011, 2: 2012, 3: 2012, 4: 2012}}
        df = DataFrame(data).set_index(keys=['col', 'year'])
        key = 4.0, 2012

        # emits a PerformanceWarning, ok
        with tm.assert_produces_warning(PerformanceWarning):
            tm.assert_frame_equal(df.loc[key], df.iloc[2:])

        # this is ok
        df.sort_index(inplace=True)
        res = df.loc[key]

        # col has float dtype, result should be Float64Index
        index = MultiIndex.from_arrays([[4.] * 3, [2012] * 3],
                                       names=['col', 'year'])
        expected = DataFrame({'amount': [222, 333, 444]}, index=index)
        tm.assert_frame_equal(res, expected) 
Example #12
Source File: test_compare.py    From recordlinkage with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_compare_custom_instance_type(self):

        A = DataFrame({'col': ['abc', 'abc', 'abc', 'abc', 'abc']})
        B = DataFrame({'col': ['abc', 'abd', 'abc', 'abc', '123']})
        ix = MultiIndex.from_arrays([A.index.values, B.index.values])

        def call(s1, s2):

            # this should raise on incorrect types
            assert isinstance(s1, np.ndarray)
            assert isinstance(s2, np.ndarray)

            return np.ones(len(s1), dtype=np.int)

        comp = recordlinkage.Compare()
        comp.compare_vectorized(lambda s1, s2: np.ones(len(s1), dtype=np.int),
                                'col', 'col')
        result = comp.compute(ix, A, B)
        expected = DataFrame([1, 1, 1, 1, 1], index=ix)
        pdt.assert_frame_equal(result, expected) 
Example #13
Source File: test_compare.py    From recordlinkage with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_date_incorrect_dtype(self):

        A = DataFrame({
            'col':
            ['2005/11/23', nan, '2004/11/23', '2010/01/10', '2010/10/30']
        })
        B = DataFrame({
            'col': [
                '2005/11/23', '2010/12/31', '2005/11/23', '2010/10/01',
                '2010/9/30'
            ]
        })
        ix = MultiIndex.from_arrays([A.index.values, B.index.values])

        A['col1'] = to_datetime(A['col'])
        B['col1'] = to_datetime(B['col'])

        comp = recordlinkage.Compare()
        comp.date('col', 'col1')
        pytest.raises(ValueError, comp.compute, ix, A, B)

        comp = recordlinkage.Compare()
        comp.date('col1', 'col')
        pytest.raises(ValueError, comp.compute, ix, A, B) 
Example #14
Source File: test_compare.py    From recordlinkage with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_geo(self):

        # Utrecht, Amsterdam, Rotterdam (Cities in The Netherlands)
        A = DataFrame({
            'lat': [52.0842455, 52.3747388, 51.9280573],
            'lng': [5.0124516, 4.7585305, 4.4203581]
        })
        B = DataFrame({
            'lat': [52.3747388, 51.9280573, 52.0842455],
            'lng': [4.7585305, 4.4203581, 5.0124516]
        })
        ix = MultiIndex.from_arrays([A.index.values, B.index.values])

        comp = recordlinkage.Compare()
        comp.geo(
            'lat', 'lng', 'lat', 'lng', method='step',
            offset=50)  # 50 km range
        result = comp.compute(ix, A, B)

        # Missing values as default [36.639460, 54.765854, 44.092472]
        expected = Series([1.0, 0.0, 1.0], index=ix, name=0)
        pdt.assert_series_equal(result[0], expected) 
Example #15
Source File: test_compare.py    From recordlinkage with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_defaults(self):

        # default algorithm is levenshtein algorithm
        # test default values are indentical to levenshtein

        A = DataFrame({
            'col': [u'str_abc', u'str_abc', u'str_abc', nan, u'hsdkf']
        })
        B = DataFrame({'col': [u'str_abc', u'str_abd', u'jaskdfsd', nan, nan]})
        ix = MultiIndex.from_arrays([A.index.values, B.index.values])

        comp = recordlinkage.Compare()
        comp.string('col', 'col', label='default')
        comp.string('col', 'col', method='levenshtein', label='with_args')
        result = comp.compute(ix, A, B)

        pdt.assert_series_equal(
            result['default'].rename(None),
            result['with_args'].rename(None)
        ) 
Example #16
Source File: test_compare.py    From recordlinkage with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_fuzzy(self):

        A = DataFrame({
            'col': [u'str_abc', u'str_abc', u'str_abc', nan, u'hsdkf']
        })
        B = DataFrame({'col': [u'str_abc', u'str_abd', u'jaskdfsd', nan, nan]})
        ix = MultiIndex.from_arrays([A.index.values, B.index.values])

        comp = recordlinkage.Compare()
        comp.string('col', 'col', method='jaro', missing_value=0)
        comp.string('col', 'col', method='q_gram', missing_value=0)
        comp.string('col', 'col', method='cosine', missing_value=0)
        comp.string('col', 'col', method='jaro_winkler', missing_value=0)
        comp.string('col', 'col', method='dameraulevenshtein', missing_value=0)
        comp.string('col', 'col', method='levenshtein', missing_value=0)
        result = comp.compute(ix, A, B)

        print(result)

        assert result.notnull().all(1).all(0)
        assert (result[result.notnull()] >= 0).all(1).all(0)
        assert (result[result.notnull()] <= 1).all(1).all(0) 
Example #17
Source File: test_apply.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_groupby_apply_none_first():
    # GH 12824. Tests if apply returns None first.
    test_df1 = DataFrame({'groups': [1, 1, 1, 2], 'vars': [0, 1, 2, 3]})
    test_df2 = DataFrame({'groups': [1, 2, 2, 2], 'vars': [0, 1, 2, 3]})

    def test_func(x):
        if x.shape[0] < 2:
            return None
        return x.iloc[[0, -1]]

    result1 = test_df1.groupby('groups').apply(test_func)
    result2 = test_df2.groupby('groups').apply(test_func)
    index1 = MultiIndex.from_arrays([[1, 1], [0, 2]],
                                    names=['groups', None])
    index2 = MultiIndex.from_arrays([[2, 2], [1, 3]],
                                    names=['groups', None])
    expected1 = DataFrame({'groups': [1, 1], 'vars': [0, 2]},
                          index=index1)
    expected2 = DataFrame({'groups': [2, 2], 'vars': [1, 3]},
                          index=index2)
    tm.assert_frame_equal(result1, expected1)
    tm.assert_frame_equal(result2, expected2) 
Example #18
Source File: test_apply.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_apply_multiindex_fail():
    index = MultiIndex.from_arrays([[0, 0, 0, 1, 1, 1], [1, 2, 3, 1, 2, 3]
                                    ])
    df = DataFrame({'d': [1., 1., 1., 2., 2., 2.],
                    'c': np.tile(['a', 'b', 'c'], 2),
                    'v': np.arange(1., 7.)}, index=index)

    def f(group):
        v = group['v']
        group['v2'] = (v - v.min()) / (v.max() - v.min())
        return group

    result = df.groupby('d').apply(f)

    expected = df.copy()
    expected['v2'] = np.tile([0., 0.5, 1], 2)

    tm.assert_frame_equal(result, expected) 
Example #19
Source File: test_value_counts.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_series_groupby_value_counts(df, keys, bins, n, m):

    def rebuild_index(df):
        arr = list(map(df.index.get_level_values, range(df.index.nlevels)))
        df.index = MultiIndex.from_arrays(arr, names=df.index.names)
        return df

    for isort, normalize, sort, ascending, dropna \
            in product((False, True), repeat=5):

        kwargs = dict(normalize=normalize, sort=sort,
                      ascending=ascending, dropna=dropna, bins=bins)

        gr = df.groupby(keys, sort=isort)
        left = gr['3rd'].value_counts(**kwargs)

        gr = df.groupby(keys, sort=isort)
        right = gr['3rd'].apply(Series.value_counts, **kwargs)
        right.index.names = right.index.names[:-1] + ['3rd']

        # have to sort on index because of unstable sort on values
        left, right = map(rebuild_index, (left, right))  # xref GH9212
        tm.assert_series_equal(left.sort_index(), right.sort_index()) 
Example #20
Source File: test_nth.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_nth_multi_index_as_expected():
    # PR 9090, related to issue 8979
    # test nth on MultiIndex
    three_group = DataFrame(
        {'A': ['foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar',
               'foo', 'foo', 'foo'],
         'B': ['one', 'one', 'one', 'two', 'one', 'one', 'one', 'two',
               'two', 'two', 'one'],
         'C': ['dull', 'dull', 'shiny', 'dull', 'dull', 'shiny', 'shiny',
               'dull', 'shiny', 'shiny', 'shiny']})
    grouped = three_group.groupby(['A', 'B'])
    result = grouped.nth(0)
    expected = DataFrame(
        {'C': ['dull', 'dull', 'dull', 'dull']},
        index=MultiIndex.from_arrays([['bar', 'bar', 'foo', 'foo'],
                                      ['one', 'two', 'one', 'two']],
                                     names=['A', 'B']))
    assert_frame_equal(result, expected) 
Example #21
Source File: test_transform.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_transform_mixed_type():
    index = MultiIndex.from_arrays([[0, 0, 0, 1, 1, 1], [1, 2, 3, 1, 2, 3]
                                    ])
    df = DataFrame({'d': [1., 1., 1., 2., 2., 2.],
                    'c': np.tile(['a', 'b', 'c'], 2),
                    'v': np.arange(1., 7.)}, index=index)

    def f(group):
        group['g'] = group['d'] * 2
        return group[:1]

    grouped = df.groupby('c')
    result = grouped.apply(f)

    assert result['d'].dtype == np.float64

    # this is by definition a mutating operation!
    with option_context('mode.chained_assignment', None):
        for key, group in grouped:
            res = f(group)
            assert_frame_equal(res, result.loc[key]) 
Example #22
Source File: test_compare.py    From recordlinkage with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_freq_nan(self, missing_value):

        # data
        array_repeated = np.repeat(np.arange(10, dtype=np.float64), 10)
        array_repeated[90:] = np.nan
        array_tiled = np.tile(np.arange(20, dtype=np.float64), 5)

        # convert to pandas data
        A = DataFrame({'col': array_repeated})
        B = DataFrame({'col': array_tiled})
        ix = MultiIndex.from_arrays([A.index.values, B.index.values])

        # the part to test
        from recordlinkage.compare import Frequency

        comp = recordlinkage.Compare()
        comp.add(Frequency(left_on='col', missing_value=missing_value))
        result = comp.compute(ix, A, B)

        expected_np = np.ones((100, )) / 10
        expected_np[90:] = missing_value
        expected = DataFrame(expected_np, index=ix)
        pdt.assert_frame_equal(result, expected) 
Example #23
Source File: numpy_records.py    From arctic with GNU Lesser General Public License v2.1 6 votes vote down vote up
def _index_from_records(self, recarr):
        index = recarr.dtype.metadata['index']

        if len(index) == 1:
            rtn = Index(np.copy(recarr[str(index[0])]), name=index[0])
            if isinstance(rtn, DatetimeIndex) and 'index_tz' in recarr.dtype.metadata:
                rtn = rtn.tz_localize('UTC').tz_convert(recarr.dtype.metadata['index_tz'])
        else:
            level_arrays = []
            index_tz = recarr.dtype.metadata.get('index_tz', [])
            for level_no, index_name in enumerate(index):
                # build each index level separately to ensure we end up with the right index dtype
                level = Index(np.copy(recarr[str(index_name)]))
                if level_no < len(index_tz):
                    tz = index_tz[level_no]
                    if tz is not None:
                        if not isinstance(level, DatetimeIndex) and len(level) == 0:
                            # index type information got lost during save as the index was empty, cast back
                            level = DatetimeIndex([], tz=tz)
                        else:
                            level = level.tz_localize('UTC').tz_convert(tz)
                level_arrays.append(level)
            rtn = MultiIndex.from_arrays(level_arrays, names=index)
        return rtn 
Example #24
Source File: test_ix.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_ix_general(self):

        # ix general issues

        # GH 2817
        data = {'amount': {0: 700, 1: 600, 2: 222, 3: 333, 4: 444},
                'col': {0: 3.5, 1: 3.5, 2: 4.0, 3: 4.0, 4: 4.0},
                'year': {0: 2012, 1: 2011, 2: 2012, 3: 2012, 4: 2012}}
        df = DataFrame(data).set_index(keys=['col', 'year'])
        key = 4.0, 2012

        # emits a PerformanceWarning, ok
        with tm.assert_produces_warning(PerformanceWarning):
            tm.assert_frame_equal(df.loc[key], df.iloc[2:])

        # this is ok
        df.sort_index(inplace=True)
        res = df.loc[key]

        # col has float dtype, result should be Float64Index
        index = MultiIndex.from_arrays([[4.] * 3, [2012] * 3],
                                       names=['col', 'year'])
        expected = DataFrame({'amount': [222, 333, 444]}, index=index)
        tm.assert_frame_equal(res, expected) 
Example #25
Source File: test_contains.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_isin():
    values = [('foo', 2), ('bar', 3), ('quux', 4)]

    idx = MultiIndex.from_arrays([
        ['qux', 'baz', 'foo', 'bar'],
        np.arange(4)
    ])
    result = idx.isin(values)
    expected = np.array([False, False, True, True])
    tm.assert_numpy_array_equal(result, expected)

    # empty, return dtype bool
    idx = MultiIndex.from_arrays([[], []])
    result = idx.isin(values)
    assert len(result) == 0
    assert result.dtype == np.bool_ 
Example #26
Source File: test_get_set.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_get_level_values_na():
    arrays = [[np.nan, np.nan, np.nan], ['a', np.nan, 1]]
    index = pd.MultiIndex.from_arrays(arrays)
    result = index.get_level_values(0)
    expected = pd.Index([np.nan, np.nan, np.nan])
    tm.assert_index_equal(result, expected)

    result = index.get_level_values(1)
    expected = pd.Index(['a', np.nan, 1])
    tm.assert_index_equal(result, expected)

    arrays = [['a', 'b', 'b'], pd.DatetimeIndex([0, 1, pd.NaT])]
    index = pd.MultiIndex.from_arrays(arrays)
    result = index.get_level_values(1)
    expected = pd.DatetimeIndex([0, 1, pd.NaT])
    tm.assert_index_equal(result, expected)

    arrays = [[], []]
    index = pd.MultiIndex.from_arrays(arrays)
    result = index.get_level_values(0)
    expected = pd.Index([], dtype=object)
    tm.assert_index_equal(result, expected) 
Example #27
Source File: test_constructor.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_from_arrays_empty():
    # 0 levels
    msg = "Must pass non-zero number of levels/codes"
    with pytest.raises(ValueError, match=msg):
        MultiIndex.from_arrays(arrays=[])

    # 1 level
    result = MultiIndex.from_arrays(arrays=[[]], names=['A'])
    assert isinstance(result, MultiIndex)
    expected = Index([], name='A')
    tm.assert_index_equal(result.levels[0], expected)

    # N levels
    for N in [2, 3]:
        arrays = [[]] * N
        names = list('ABC')[:N]
        result = MultiIndex.from_arrays(arrays=arrays, names=names)
        expected = MultiIndex(levels=[[]] * N, codes=[[]] * N,
                              names=names)
        tm.assert_index_equal(result, expected) 
Example #28
Source File: test_constructor.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_from_arrays_index_series_categorical():
    # GH13743
    idx1 = pd.CategoricalIndex(list("abcaab"), categories=list("bac"),
                               ordered=False)
    idx2 = pd.CategoricalIndex(list("abcaab"), categories=list("bac"),
                               ordered=True)

    result = pd.MultiIndex.from_arrays([idx1, idx2])
    tm.assert_index_equal(result.get_level_values(0), idx1)
    tm.assert_index_equal(result.get_level_values(1), idx2)

    result2 = pd.MultiIndex.from_arrays([pd.Series(idx1), pd.Series(idx2)])
    tm.assert_index_equal(result2.get_level_values(0), idx1)
    tm.assert_index_equal(result2.get_level_values(1), idx2)

    result3 = pd.MultiIndex.from_arrays([idx1.values, idx2.values])
    tm.assert_index_equal(result3.get_level_values(0), idx1)
    tm.assert_index_equal(result3.get_level_values(1), idx2) 
Example #29
Source File: test_transform.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_transform_mixed_type():
    index = MultiIndex.from_arrays([[0, 0, 0, 1, 1, 1], [1, 2, 3, 1, 2, 3]
                                    ])
    df = DataFrame({'d': [1., 1., 1., 2., 2., 2.],
                    'c': np.tile(['a', 'b', 'c'], 2),
                    'v': np.arange(1., 7.)}, index=index)

    def f(group):
        group['g'] = group['d'] * 2
        return group[:1]

    grouped = df.groupby('c')
    result = grouped.apply(f)

    assert result['d'].dtype == np.float64

    # this is by definition a mutating operation!
    with option_context('mode.chained_assignment', None):
        for key, group in grouped:
            res = f(group)
            assert_frame_equal(res, result.loc[key]) 
Example #30
Source File: test_constructor.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_from_arrays_index_series_period():
    idx1 = pd.period_range('2011-01-01', freq='D', periods=3)
    idx2 = pd.period_range('2015-01-01', freq='H', periods=3)
    result = pd.MultiIndex.from_arrays([idx1, idx2])
    tm.assert_index_equal(result.get_level_values(0), idx1)
    tm.assert_index_equal(result.get_level_values(1), idx2)

    result2 = pd.MultiIndex.from_arrays([pd.Series(idx1), pd.Series(idx2)])
    tm.assert_index_equal(result2.get_level_values(0), idx1)
    tm.assert_index_equal(result2.get_level_values(1), idx2)

    tm.assert_index_equal(result, result2)