Python pandas.MultiIndex.from_arrays() Examples
The following are 30
code examples of pandas.MultiIndex.from_arrays().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
pandas.MultiIndex
, or try the search function
.
Example #1
Source File: test_excel.py From recruit with Apache License 2.0 | 10 votes |
def test_to_excel_multiindex(self, merge_cells, engine, ext): frame = self.frame arrays = np.arange(len(frame.index) * 2).reshape(2, -1) new_index = MultiIndex.from_arrays(arrays, names=['first', 'second']) frame.index = new_index frame.to_excel(self.path, 'test1', header=False) frame.to_excel(self.path, 'test1', columns=['A', 'B']) # round trip frame.to_excel(self.path, 'test1', merge_cells=merge_cells) reader = ExcelFile(self.path) df = read_excel(reader, 'test1', index_col=[0, 1]) tm.assert_frame_equal(frame, df) # GH13511
Example #2
Source File: test_duplicates.py From recruit with Apache License 2.0 | 6 votes |
def test_unique_level(idx, level): # GH #17896 - with level= argument result = idx.unique(level=level) expected = idx.get_level_values(level).unique() tm.assert_index_equal(result, expected) # With already unique level mi = MultiIndex.from_arrays([[1, 3, 2, 4], [1, 3, 2, 5]], names=['first', 'second']) result = mi.unique(level=level) expected = mi.get_level_values(level) tm.assert_index_equal(result, expected) # With empty MI mi = MultiIndex.from_arrays([[], []], names=['first', 'second']) result = mi.unique(level=level) expected = mi.get_level_values(level)
Example #3
Source File: test_excel.py From recruit with Apache License 2.0 | 6 votes |
def test_to_excel_multiindex_cols(self, merge_cells, engine, ext): frame = self.frame arrays = np.arange(len(frame.index) * 2).reshape(2, -1) new_index = MultiIndex.from_arrays(arrays, names=['first', 'second']) frame.index = new_index new_cols_index = MultiIndex.from_tuples([(40, 1), (40, 2), (50, 1), (50, 2)]) frame.columns = new_cols_index header = [0, 1] if not merge_cells: header = 0 # round trip frame.to_excel(self.path, 'test1', merge_cells=merge_cells) reader = ExcelFile(self.path) df = read_excel(reader, 'test1', header=header, index_col=[0, 1]) if not merge_cells: fm = frame.columns.format(sparsify=False, adjoin=False, names=False) frame.columns = [".".join(map(str, q)) for q in zip(*fm)] tm.assert_frame_equal(frame, df)
Example #4
Source File: test_compare.py From recordlinkage with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_dates(self): A = DataFrame({ 'col': to_datetime( ['2005/11/23', nan, '2004/11/23', '2010/01/10', '2010/10/30']) }) B = DataFrame({ 'col': to_datetime([ '2005/11/23', '2010/12/31', '2005/11/23', '2010/10/01', '2010/9/30' ]) }) ix = MultiIndex.from_arrays([A.index.values, B.index.values]) comp = recordlinkage.Compare() comp.date('col', 'col') result = comp.compute(ix, A, B)[0] expected = Series([1, 0, 0, 0.5, 0.5], index=ix, name=0) pdt.assert_series_equal(result, expected)
Example #5
Source File: test_compare.py From recordlinkage with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_numeric(self): A = DataFrame({'col': [1, 1, 1, nan, 0]}) B = DataFrame({'col': [1, 2, 3, nan, nan]}) ix = MultiIndex.from_arrays([A.index.values, B.index.values]) comp = recordlinkage.Compare() comp.numeric('col', 'col', 'step', offset=2) comp.numeric('col', 'col', method='step', offset=2) comp.numeric('col', 'col', 'step', 2) result = comp.compute(ix, A, B) # Basics expected = Series([1.0, 1.0, 1.0, 0.0, 0.0], index=ix, name=0) pdt.assert_series_equal(result[0], expected) # Basics expected = Series([1.0, 1.0, 1.0, 0.0, 0.0], index=ix, name=1) pdt.assert_series_equal(result[1], expected) # Basics expected = Series([1.0, 1.0, 1.0, 0.0, 0.0], index=ix, name=2) pdt.assert_series_equal(result[2], expected)
Example #6
Source File: test_compare.py From recordlinkage with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_feature_multicolumn_input(self): # test using classes and the base class A = DataFrame({ 'col1': ['abc', 'abc', 'abc', 'abc', 'abc'], 'col2': ['abc', 'abc', 'abc', 'abc', 'abc'] }) B = DataFrame({ 'col1': ['abc', 'abd', 'abc', 'abc', '123'], 'col2': ['abc', 'abd', 'abc', 'abc', '123'] }) ix = MultiIndex.from_arrays([A.index.values, B.index.values]) feature = BaseCompareFeature(['col1', 'col2'], ['col1', 'col2']) feature._f_compare_vectorized = \ lambda s1_1, s1_2, s2_1, s2_2: np.ones(len(s1_1)) feature.compute(ix, A, B)
Example #7
Source File: pivot.py From recruit with Apache License 2.0 | 6 votes |
def pivot(data, index=None, columns=None, values=None): if values is None: cols = [columns] if index is None else [index, columns] append = index is None indexed = data.set_index(cols, append=append) else: if index is None: index = data.index else: index = data[index] index = MultiIndex.from_arrays([index, data[columns]]) if is_list_like(values) and not isinstance(values, tuple): # Exclude tuple because it is seen as a single column name indexed = data._constructor(data[values].values, index=index, columns=values) else: indexed = data._constructor_sliced(data[values].values, index=index) return indexed.unstack(columns)
Example #8
Source File: test_compare.py From recordlinkage with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_pass_list_of_features(self): from recordlinkage.compare import FrequencyA, VariableA, VariableB # setup datasets and record pairs A = DataFrame({'col': ['abc', 'abc', 'abc', 'abc', 'abc']}) B = DataFrame({'col': ['abc', 'abc', 'abc', 'abc', 'abc']}) ix = MultiIndex.from_arrays([np.arange(5), np.arange(5)]) # test with label indexing type features = [ VariableA('col', label='y1'), VariableB('col', label='y2'), FrequencyA('col', label='y3') ] comp_label = recordlinkage.Compare(features=features) result_label = comp_label.compute(ix, A, B) assert list(result_label) == ["y1", "y2", "y3"]
Example #9
Source File: test_compare.py From recordlinkage with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_indexing_types(self): # test the two types of indexing # this test needs improvement A = DataFrame({'col': ['abc', 'abc', 'abc', 'abc', 'abc']}) B = DataFrame({'col': ['abc', 'abc', 'abc', 'abc', 'abc']}) B_reversed = B[::-1].copy() ix = MultiIndex.from_arrays([np.arange(5), np.arange(5)]) # test with label indexing type comp_label = recordlinkage.Compare(indexing_type='label') comp_label.exact('col', 'col') result_label = comp_label.compute(ix, A, B_reversed) # test with position indexing type comp_position = recordlinkage.Compare(indexing_type='position') comp_position.exact('col', 'col') result_position = comp_position.compute(ix, A, B_reversed) assert (result_position.values == 1).all(axis=0) pdt.assert_frame_equal(result_label, result_position)
Example #10
Source File: test_compare.py From recordlinkage with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_compare_custom_vectorized_dedup(self): A = DataFrame({'col': ['abc', 'abc', 'abc', 'abc', 'abc']}) ix = MultiIndex.from_arrays([[0, 1, 2, 3, 4], [1, 2, 3, 4, 0]]) # test without label comp = recordlinkage.Compare() comp.compare_vectorized(lambda s1, s2: np.ones(len(s1), dtype=np.int), 'col', 'col') result = comp.compute(ix, A) expected = DataFrame([1, 1, 1, 1, 1], index=ix) pdt.assert_frame_equal(result, expected) # test with label comp = recordlinkage.Compare() comp.compare_vectorized( lambda s1, s2: np.ones(len(s1), dtype=np.int), 'col', 'col', label='test') result = comp.compute(ix, A) expected = DataFrame([1, 1, 1, 1, 1], index=ix, columns=['test']) pdt.assert_frame_equal(result, expected)
Example #11
Source File: test_ix.py From vnpy_crypto with MIT License | 6 votes |
def test_ix_general(self): # ix general issues # GH 2817 data = {'amount': {0: 700, 1: 600, 2: 222, 3: 333, 4: 444}, 'col': {0: 3.5, 1: 3.5, 2: 4.0, 3: 4.0, 4: 4.0}, 'year': {0: 2012, 1: 2011, 2: 2012, 3: 2012, 4: 2012}} df = DataFrame(data).set_index(keys=['col', 'year']) key = 4.0, 2012 # emits a PerformanceWarning, ok with tm.assert_produces_warning(PerformanceWarning): tm.assert_frame_equal(df.loc[key], df.iloc[2:]) # this is ok df.sort_index(inplace=True) res = df.loc[key] # col has float dtype, result should be Float64Index index = MultiIndex.from_arrays([[4.] * 3, [2012] * 3], names=['col', 'year']) expected = DataFrame({'amount': [222, 333, 444]}, index=index) tm.assert_frame_equal(res, expected)
Example #12
Source File: test_compare.py From recordlinkage with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_compare_custom_instance_type(self): A = DataFrame({'col': ['abc', 'abc', 'abc', 'abc', 'abc']}) B = DataFrame({'col': ['abc', 'abd', 'abc', 'abc', '123']}) ix = MultiIndex.from_arrays([A.index.values, B.index.values]) def call(s1, s2): # this should raise on incorrect types assert isinstance(s1, np.ndarray) assert isinstance(s2, np.ndarray) return np.ones(len(s1), dtype=np.int) comp = recordlinkage.Compare() comp.compare_vectorized(lambda s1, s2: np.ones(len(s1), dtype=np.int), 'col', 'col') result = comp.compute(ix, A, B) expected = DataFrame([1, 1, 1, 1, 1], index=ix) pdt.assert_frame_equal(result, expected)
Example #13
Source File: test_compare.py From recordlinkage with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_date_incorrect_dtype(self): A = DataFrame({ 'col': ['2005/11/23', nan, '2004/11/23', '2010/01/10', '2010/10/30'] }) B = DataFrame({ 'col': [ '2005/11/23', '2010/12/31', '2005/11/23', '2010/10/01', '2010/9/30' ] }) ix = MultiIndex.from_arrays([A.index.values, B.index.values]) A['col1'] = to_datetime(A['col']) B['col1'] = to_datetime(B['col']) comp = recordlinkage.Compare() comp.date('col', 'col1') pytest.raises(ValueError, comp.compute, ix, A, B) comp = recordlinkage.Compare() comp.date('col1', 'col') pytest.raises(ValueError, comp.compute, ix, A, B)
Example #14
Source File: test_compare.py From recordlinkage with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_geo(self): # Utrecht, Amsterdam, Rotterdam (Cities in The Netherlands) A = DataFrame({ 'lat': [52.0842455, 52.3747388, 51.9280573], 'lng': [5.0124516, 4.7585305, 4.4203581] }) B = DataFrame({ 'lat': [52.3747388, 51.9280573, 52.0842455], 'lng': [4.7585305, 4.4203581, 5.0124516] }) ix = MultiIndex.from_arrays([A.index.values, B.index.values]) comp = recordlinkage.Compare() comp.geo( 'lat', 'lng', 'lat', 'lng', method='step', offset=50) # 50 km range result = comp.compute(ix, A, B) # Missing values as default [36.639460, 54.765854, 44.092472] expected = Series([1.0, 0.0, 1.0], index=ix, name=0) pdt.assert_series_equal(result[0], expected)
Example #15
Source File: test_compare.py From recordlinkage with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_defaults(self): # default algorithm is levenshtein algorithm # test default values are indentical to levenshtein A = DataFrame({ 'col': [u'str_abc', u'str_abc', u'str_abc', nan, u'hsdkf'] }) B = DataFrame({'col': [u'str_abc', u'str_abd', u'jaskdfsd', nan, nan]}) ix = MultiIndex.from_arrays([A.index.values, B.index.values]) comp = recordlinkage.Compare() comp.string('col', 'col', label='default') comp.string('col', 'col', method='levenshtein', label='with_args') result = comp.compute(ix, A, B) pdt.assert_series_equal( result['default'].rename(None), result['with_args'].rename(None) )
Example #16
Source File: test_compare.py From recordlinkage with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_fuzzy(self): A = DataFrame({ 'col': [u'str_abc', u'str_abc', u'str_abc', nan, u'hsdkf'] }) B = DataFrame({'col': [u'str_abc', u'str_abd', u'jaskdfsd', nan, nan]}) ix = MultiIndex.from_arrays([A.index.values, B.index.values]) comp = recordlinkage.Compare() comp.string('col', 'col', method='jaro', missing_value=0) comp.string('col', 'col', method='q_gram', missing_value=0) comp.string('col', 'col', method='cosine', missing_value=0) comp.string('col', 'col', method='jaro_winkler', missing_value=0) comp.string('col', 'col', method='dameraulevenshtein', missing_value=0) comp.string('col', 'col', method='levenshtein', missing_value=0) result = comp.compute(ix, A, B) print(result) assert result.notnull().all(1).all(0) assert (result[result.notnull()] >= 0).all(1).all(0) assert (result[result.notnull()] <= 1).all(1).all(0)
Example #17
Source File: test_apply.py From recruit with Apache License 2.0 | 6 votes |
def test_groupby_apply_none_first(): # GH 12824. Tests if apply returns None first. test_df1 = DataFrame({'groups': [1, 1, 1, 2], 'vars': [0, 1, 2, 3]}) test_df2 = DataFrame({'groups': [1, 2, 2, 2], 'vars': [0, 1, 2, 3]}) def test_func(x): if x.shape[0] < 2: return None return x.iloc[[0, -1]] result1 = test_df1.groupby('groups').apply(test_func) result2 = test_df2.groupby('groups').apply(test_func) index1 = MultiIndex.from_arrays([[1, 1], [0, 2]], names=['groups', None]) index2 = MultiIndex.from_arrays([[2, 2], [1, 3]], names=['groups', None]) expected1 = DataFrame({'groups': [1, 1], 'vars': [0, 2]}, index=index1) expected2 = DataFrame({'groups': [2, 2], 'vars': [1, 3]}, index=index2) tm.assert_frame_equal(result1, expected1) tm.assert_frame_equal(result2, expected2)
Example #18
Source File: test_apply.py From recruit with Apache License 2.0 | 6 votes |
def test_apply_multiindex_fail(): index = MultiIndex.from_arrays([[0, 0, 0, 1, 1, 1], [1, 2, 3, 1, 2, 3] ]) df = DataFrame({'d': [1., 1., 1., 2., 2., 2.], 'c': np.tile(['a', 'b', 'c'], 2), 'v': np.arange(1., 7.)}, index=index) def f(group): v = group['v'] group['v2'] = (v - v.min()) / (v.max() - v.min()) return group result = df.groupby('d').apply(f) expected = df.copy() expected['v2'] = np.tile([0., 0.5, 1], 2) tm.assert_frame_equal(result, expected)
Example #19
Source File: test_value_counts.py From recruit with Apache License 2.0 | 6 votes |
def test_series_groupby_value_counts(df, keys, bins, n, m): def rebuild_index(df): arr = list(map(df.index.get_level_values, range(df.index.nlevels))) df.index = MultiIndex.from_arrays(arr, names=df.index.names) return df for isort, normalize, sort, ascending, dropna \ in product((False, True), repeat=5): kwargs = dict(normalize=normalize, sort=sort, ascending=ascending, dropna=dropna, bins=bins) gr = df.groupby(keys, sort=isort) left = gr['3rd'].value_counts(**kwargs) gr = df.groupby(keys, sort=isort) right = gr['3rd'].apply(Series.value_counts, **kwargs) right.index.names = right.index.names[:-1] + ['3rd'] # have to sort on index because of unstable sort on values left, right = map(rebuild_index, (left, right)) # xref GH9212 tm.assert_series_equal(left.sort_index(), right.sort_index())
Example #20
Source File: test_nth.py From recruit with Apache License 2.0 | 6 votes |
def test_nth_multi_index_as_expected(): # PR 9090, related to issue 8979 # test nth on MultiIndex three_group = DataFrame( {'A': ['foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar', 'foo', 'foo', 'foo'], 'B': ['one', 'one', 'one', 'two', 'one', 'one', 'one', 'two', 'two', 'two', 'one'], 'C': ['dull', 'dull', 'shiny', 'dull', 'dull', 'shiny', 'shiny', 'dull', 'shiny', 'shiny', 'shiny']}) grouped = three_group.groupby(['A', 'B']) result = grouped.nth(0) expected = DataFrame( {'C': ['dull', 'dull', 'dull', 'dull']}, index=MultiIndex.from_arrays([['bar', 'bar', 'foo', 'foo'], ['one', 'two', 'one', 'two']], names=['A', 'B'])) assert_frame_equal(result, expected)
Example #21
Source File: test_transform.py From recruit with Apache License 2.0 | 6 votes |
def test_transform_mixed_type(): index = MultiIndex.from_arrays([[0, 0, 0, 1, 1, 1], [1, 2, 3, 1, 2, 3] ]) df = DataFrame({'d': [1., 1., 1., 2., 2., 2.], 'c': np.tile(['a', 'b', 'c'], 2), 'v': np.arange(1., 7.)}, index=index) def f(group): group['g'] = group['d'] * 2 return group[:1] grouped = df.groupby('c') result = grouped.apply(f) assert result['d'].dtype == np.float64 # this is by definition a mutating operation! with option_context('mode.chained_assignment', None): for key, group in grouped: res = f(group) assert_frame_equal(res, result.loc[key])
Example #22
Source File: test_compare.py From recordlinkage with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_freq_nan(self, missing_value): # data array_repeated = np.repeat(np.arange(10, dtype=np.float64), 10) array_repeated[90:] = np.nan array_tiled = np.tile(np.arange(20, dtype=np.float64), 5) # convert to pandas data A = DataFrame({'col': array_repeated}) B = DataFrame({'col': array_tiled}) ix = MultiIndex.from_arrays([A.index.values, B.index.values]) # the part to test from recordlinkage.compare import Frequency comp = recordlinkage.Compare() comp.add(Frequency(left_on='col', missing_value=missing_value)) result = comp.compute(ix, A, B) expected_np = np.ones((100, )) / 10 expected_np[90:] = missing_value expected = DataFrame(expected_np, index=ix) pdt.assert_frame_equal(result, expected)
Example #23
Source File: numpy_records.py From arctic with GNU Lesser General Public License v2.1 | 6 votes |
def _index_from_records(self, recarr): index = recarr.dtype.metadata['index'] if len(index) == 1: rtn = Index(np.copy(recarr[str(index[0])]), name=index[0]) if isinstance(rtn, DatetimeIndex) and 'index_tz' in recarr.dtype.metadata: rtn = rtn.tz_localize('UTC').tz_convert(recarr.dtype.metadata['index_tz']) else: level_arrays = [] index_tz = recarr.dtype.metadata.get('index_tz', []) for level_no, index_name in enumerate(index): # build each index level separately to ensure we end up with the right index dtype level = Index(np.copy(recarr[str(index_name)])) if level_no < len(index_tz): tz = index_tz[level_no] if tz is not None: if not isinstance(level, DatetimeIndex) and len(level) == 0: # index type information got lost during save as the index was empty, cast back level = DatetimeIndex([], tz=tz) else: level = level.tz_localize('UTC').tz_convert(tz) level_arrays.append(level) rtn = MultiIndex.from_arrays(level_arrays, names=index) return rtn
Example #24
Source File: test_ix.py From recruit with Apache License 2.0 | 6 votes |
def test_ix_general(self): # ix general issues # GH 2817 data = {'amount': {0: 700, 1: 600, 2: 222, 3: 333, 4: 444}, 'col': {0: 3.5, 1: 3.5, 2: 4.0, 3: 4.0, 4: 4.0}, 'year': {0: 2012, 1: 2011, 2: 2012, 3: 2012, 4: 2012}} df = DataFrame(data).set_index(keys=['col', 'year']) key = 4.0, 2012 # emits a PerformanceWarning, ok with tm.assert_produces_warning(PerformanceWarning): tm.assert_frame_equal(df.loc[key], df.iloc[2:]) # this is ok df.sort_index(inplace=True) res = df.loc[key] # col has float dtype, result should be Float64Index index = MultiIndex.from_arrays([[4.] * 3, [2012] * 3], names=['col', 'year']) expected = DataFrame({'amount': [222, 333, 444]}, index=index) tm.assert_frame_equal(res, expected)
Example #25
Source File: test_contains.py From recruit with Apache License 2.0 | 6 votes |
def test_isin(): values = [('foo', 2), ('bar', 3), ('quux', 4)] idx = MultiIndex.from_arrays([ ['qux', 'baz', 'foo', 'bar'], np.arange(4) ]) result = idx.isin(values) expected = np.array([False, False, True, True]) tm.assert_numpy_array_equal(result, expected) # empty, return dtype bool idx = MultiIndex.from_arrays([[], []]) result = idx.isin(values) assert len(result) == 0 assert result.dtype == np.bool_
Example #26
Source File: test_get_set.py From recruit with Apache License 2.0 | 6 votes |
def test_get_level_values_na(): arrays = [[np.nan, np.nan, np.nan], ['a', np.nan, 1]] index = pd.MultiIndex.from_arrays(arrays) result = index.get_level_values(0) expected = pd.Index([np.nan, np.nan, np.nan]) tm.assert_index_equal(result, expected) result = index.get_level_values(1) expected = pd.Index(['a', np.nan, 1]) tm.assert_index_equal(result, expected) arrays = [['a', 'b', 'b'], pd.DatetimeIndex([0, 1, pd.NaT])] index = pd.MultiIndex.from_arrays(arrays) result = index.get_level_values(1) expected = pd.DatetimeIndex([0, 1, pd.NaT]) tm.assert_index_equal(result, expected) arrays = [[], []] index = pd.MultiIndex.from_arrays(arrays) result = index.get_level_values(0) expected = pd.Index([], dtype=object) tm.assert_index_equal(result, expected)
Example #27
Source File: test_constructor.py From recruit with Apache License 2.0 | 6 votes |
def test_from_arrays_empty(): # 0 levels msg = "Must pass non-zero number of levels/codes" with pytest.raises(ValueError, match=msg): MultiIndex.from_arrays(arrays=[]) # 1 level result = MultiIndex.from_arrays(arrays=[[]], names=['A']) assert isinstance(result, MultiIndex) expected = Index([], name='A') tm.assert_index_equal(result.levels[0], expected) # N levels for N in [2, 3]: arrays = [[]] * N names = list('ABC')[:N] result = MultiIndex.from_arrays(arrays=arrays, names=names) expected = MultiIndex(levels=[[]] * N, codes=[[]] * N, names=names) tm.assert_index_equal(result, expected)
Example #28
Source File: test_constructor.py From recruit with Apache License 2.0 | 6 votes |
def test_from_arrays_index_series_categorical(): # GH13743 idx1 = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), ordered=False) idx2 = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), ordered=True) result = pd.MultiIndex.from_arrays([idx1, idx2]) tm.assert_index_equal(result.get_level_values(0), idx1) tm.assert_index_equal(result.get_level_values(1), idx2) result2 = pd.MultiIndex.from_arrays([pd.Series(idx1), pd.Series(idx2)]) tm.assert_index_equal(result2.get_level_values(0), idx1) tm.assert_index_equal(result2.get_level_values(1), idx2) result3 = pd.MultiIndex.from_arrays([idx1.values, idx2.values]) tm.assert_index_equal(result3.get_level_values(0), idx1) tm.assert_index_equal(result3.get_level_values(1), idx2)
Example #29
Source File: test_transform.py From vnpy_crypto with MIT License | 6 votes |
def test_transform_mixed_type(): index = MultiIndex.from_arrays([[0, 0, 0, 1, 1, 1], [1, 2, 3, 1, 2, 3] ]) df = DataFrame({'d': [1., 1., 1., 2., 2., 2.], 'c': np.tile(['a', 'b', 'c'], 2), 'v': np.arange(1., 7.)}, index=index) def f(group): group['g'] = group['d'] * 2 return group[:1] grouped = df.groupby('c') result = grouped.apply(f) assert result['d'].dtype == np.float64 # this is by definition a mutating operation! with option_context('mode.chained_assignment', None): for key, group in grouped: res = f(group) assert_frame_equal(res, result.loc[key])
Example #30
Source File: test_constructor.py From recruit with Apache License 2.0 | 5 votes |
def test_from_arrays_index_series_period(): idx1 = pd.period_range('2011-01-01', freq='D', periods=3) idx2 = pd.period_range('2015-01-01', freq='H', periods=3) result = pd.MultiIndex.from_arrays([idx1, idx2]) tm.assert_index_equal(result.get_level_values(0), idx1) tm.assert_index_equal(result.get_level_values(1), idx2) result2 = pd.MultiIndex.from_arrays([pd.Series(idx1), pd.Series(idx2)]) tm.assert_index_equal(result2.get_level_values(0), idx1) tm.assert_index_equal(result2.get_level_values(1), idx2) tm.assert_index_equal(result, result2)