Python pandas.MultiIndex() Examples
The following are 30
code examples of pandas.MultiIndex().
Example #1
Source File: From recruit with Apache License 2.0 | 6 votes |
def setup_method(self, method): self.indices = dict(unicodeIndex=tm.makeUnicodeIndex(100), strIndex=tm.makeStringIndex(100), dateIndex=tm.makeDateIndex(100), periodIndex=tm.makePeriodIndex(100), tdIndex=tm.makeTimedeltaIndex(100), intIndex=tm.makeIntIndex(100), uintIndex=tm.makeUIntIndex(100), rangeIndex=tm.makeRangeIndex(100), floatIndex=tm.makeFloatIndex(100), boolIndex=Index([True, False]), catIndex=tm.makeCategoricalIndex(100), empty=Index([]), tuples=MultiIndex.from_tuples(lzip( ['foo', 'bar', 'baz'], [1, 2, 3])), repeats=Index([0, 0, 1, 1, 2, 2])) self.setup_indices()
Example #2
Source File: From recruit with Apache License 2.0 | 6 votes |
def test_constructor_non_hashable_name(self, indices): # GH 20527 if isinstance(indices, MultiIndex): pytest.skip("multiindex handled in") message = " must be a hashable type" renamed = [['1']] # With .rename() with pytest.raises(TypeError, match=message): indices.rename(name=renamed) # With .set_names() with pytest.raises(TypeError, match=message): indices.set_names(names=renamed)
Example #3
Source File: From recordlinkage with BSD 3-Clause "New" or "Revised" License | 6 votes |
def _link_index(self, df_a, df_b): """Build an index for linking two datasets. Parameters ---------- df_a : (tuple of) pandas.Series The data of the left DataFrame to build the index with. df_b : (tuple of) pandas.Series The data of the right DataFrame to build the index with. Returns ------- pandas.MultiIndex A pandas.MultiIndex with record pairs. Each record pair contains the index values of two records. """ raise NotImplementedError( "Not possible to call index for the BaseEstimator" )
Example #4
Source File: From recruit with Apache License 2.0 | 6 votes |
def test_droplevel(self, indices): # GH 21115 if isinstance(indices, MultiIndex): # Tested separately in return assert indices.droplevel([]).equals(indices) for level in, []: if isinstance(, tuple) and level is # GH 21121 : droplevel with tuple name continue with pytest.raises(ValueError): indices.droplevel(level) for level in 'wrong', ['wrong']: with pytest.raises(KeyError): indices.droplevel(level)
Example #5
Source File: From recruit with Apache License 2.0 | 6 votes |
def test_duplicated(self, indices, keep): if not len(indices) or isinstance(indices, (MultiIndex, RangeIndex)): # MultiIndex tested separately in: # tests/indexes/multi/test_unique_and_duplicates pytest.skip('Skip check for empty Index, MultiIndex, RangeIndex') holder = type(indices) idx = holder(indices) if idx.has_duplicates: # We are testing the duplicated-method here, so we need to know # exactly which indices are duplicate and how (for the result). # This is not possible if "idx" has duplicates already, which we # therefore remove. This is seemingly circular, as drop_duplicates # invokes duplicated, but in the end, it all works out because we # cross-check with Series.duplicated, which is tested separately. idx = idx.drop_duplicates() n, k = len(idx), 10 duplicated_selection = np.random.choice(n, k * n) expected = pd.Series(duplicated_selection).duplicated(keep=keep).values idx = holder(idx.values[duplicated_selection]) result = idx.duplicated(keep=keep) tm.assert_numpy_array_equal(result, expected)
Example #6
Source File: From recordlinkage with BSD 3-Clause "New" or "Revised" License | 6 votes |
def fit_predict(self, comparison_vectors, match_index=None): """Train the classifier. Parameters ---------- comparison_vectors : pandas.DataFrame The comparison vectors. match_index : pandas.MultiIndex The true matches. return_type : str Deprecated. Use recordlinkage.options instead. Use the option `recordlinkage.set_option('classification.return_type', 'index')` instead. Returns ------- pandas.Series A pandas Series with the labels 1 (for the matches) and 0 (for the non-matches). """, match_index) result = self.predict(comparison_vectors) return result
Example #7
Source File: From recordlinkage with BSD 3-Clause "New" or "Revised" License | 6 votes |
def _febrl_links(df): """Get the links of a FEBRL dataset.""" index = df.index.to_series() keys = index.str.extract(r'rec-(\d+)', expand=True)[0] index_int = numpy.arange(len(df)) df_helper = pandas.DataFrame({ 'key': keys, 'index': index_int }) # merge the two frame and make MultiIndex. pairs_df = df_helper.merge( df_helper, on='key' )[['index_x', 'index_y']] pairs_df = pairs_df[pairs_df['index_x'] > pairs_df['index_y']] return pandas.MultiIndex( levels=[df.index.values, df.index.values], codes=[pairs_df['index_x'].values, pairs_df['index_y'].values], names=[None, None], verify_integrity=False )
Example #8
Source File: From arctic with GNU Lesser General Public License v2.1 | 6 votes |
def test_data_info_cols(library): i = MultiIndex.from_tuples([(1, "ab"), (2, "bb"), (3, "cb")]) s = DataFrame(data=[100, 200, 300], index=i) library.write('test_data', s) md = library.get_info('test_data') # {'dtype': [('level_0', '<i8'), ('level_1', 'S2'), ('0', '<i8')], # 'col_names': {u'index': [u'level_0', u'level_1'], u'columns': [u'0'], 'index_tz': [None, None]}, # 'type': u'pandasdf', # 'handler': 'PandasDataFrameStore', # 'rows': 3, # 'segment_count': 1, # 'size': 50} assert 'size' in md assert md['segment_count'] == 1 assert md['rows'] == 3 assert md['handler'] == 'PandasDataFrameStore' assert md['type'] == 'pandasdf' assert md['col_names'] == {'index': ['level_0', u'level_1'], 'columns': [u'0'], 'index_tz': [None, None]} assert len(md['dtype']) == 3 assert md['dtype'][0][0] == 'level_0' assert md['dtype'][1][0] == 'level_1' assert md['dtype'][2][0] == '0'
Example #9
Source File: From recordlinkage with BSD 3-Clause "New" or "Revised" License | 6 votes |
def true_positives(links_true, links_pred): """Count the number of True Positives. Returns the number of correctly predicted links, also called the number of True Positives (TP). Parameters ---------- links_true: pandas.MultiIndex, pandas.DataFrame, pandas.Series The true (or actual) links. links_pred: pandas.MultiIndex, pandas.DataFrame, pandas.Series The predicted links. Returns ------- int The number of correctly predicted links. """ links_true = _get_multiindex(links_true) links_pred = _get_multiindex(links_pred) return len(links_true & links_pred)
Example #10
Source File: From recordlinkage with BSD 3-Clause "New" or "Revised" License | 6 votes |
def false_positives(links_true, links_pred): """Count the number of False Positives. Returns the number of incorrect predictions of true non-links. (true non- links, but predicted as links). This value is known as the number of False Positives (FP). Parameters ---------- links_true: pandas.MultiIndex, pandas.DataFrame, pandas.Series The true (or actual) links. links_pred: pandas.MultiIndex, pandas.DataFrame, pandas.Series The predicted links. Returns ------- int The number of false positives. """ links_true = _get_multiindex(links_true) links_pred = _get_multiindex(links_pred) return len(links_pred.difference(links_true))
Example #11
Source File: From recordlinkage with BSD 3-Clause "New" or "Revised" License | 6 votes |
def false_negatives(links_true, links_pred): """Count the number of False Negatives. Returns the number of incorrect predictions of true links. (true links, but predicted as non-links). This value is known as the number of False Negatives (FN). Parameters ---------- links_true: pandas.MultiIndex, pandas.DataFrame, pandas.Series The true (or actual) links. links_pred: pandas.MultiIndex, pandas.DataFrame, pandas.Series The predicted links. Returns ------- int The number of false negatives. """ links_true = _get_multiindex(links_true) links_pred = _get_multiindex(links_pred) return len(links_true.difference(links_pred))
Example #12
Source File: From recordlinkage with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_iterative(self): """Test the iterative behaviour.""" # SINGLE STEP index_class = Full() pairs = index_class.index((self.a, self.b)) pairs = pd.DataFrame(index=pairs).sort_index() # MULTI STEP index_class = Full() pairs1 = index_class.index((self.a[0:50], self.b)) pairs2 = index_class.index((self.a[50:100], self.b)) pairs_split = pairs1.append(pairs2) pairs_split = pd.DataFrame(index=pairs_split).sort_index() pdt.assert_frame_equal(pairs, pairs_split) # note possible to sort MultiIndex, so made a frame out of it.
Example #13
Source File: From recordlinkage with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_index_names_pandas023(self, index_class): # Pandas changes the behaviour of MultiIndex names. # # # This test tests compatibility. # make an index for each dataframe with a new index name index_a = pd.Index(self.a.index, name='index') df_a = pd.DataFrame(self.a, index=index_a) index_b = pd.Index(self.b.index, name='index') df_b = pd.DataFrame(self.b, index=index_b) # make the index pairs_link = index_class._link_index(df_a, df_b) if pairs_link.names[0] is not None: assert pairs_link.names[0] != pairs_link.names[1] # make the index pairs_dedup = index_class._dedup_index(df_a) if pairs_link.names[0] is not None: assert pairs_dedup.names[0] != pairs_dedup.names[1]
Example #14
Source File: From recordlinkage with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_lower_triangular(self, index_class): # make an index for each dataframe with a new index name index_a = pd.Index(self.a.index, name='index') df_a = pd.DataFrame(self.a, index=index_a) pairs = index_class.index(df_a) # expected levels = [df_a.index.values, df_a.index.values] codes = np.tril_indices(len(df_a.index), k=-1) full_pairs = pd.MultiIndex(levels=levels, codes=codes, verify_integrity=False) # all pairs are in the lower triangle of the matrix. assert len(pairs.difference(full_pairs)) == 0
Example #15
Source File: From recordlinkage with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_krebs_dataset_download(): # remove downloaded datasets clear_data_home() krebs_data, krebs_matches = load_krebsregister() for i in range(1, 11): assert Path(get_data_home(), "krebsregister", "block_{}.zip".format(i)).is_file() # count the number of recordss assert type(krebs_data), pandas.DataFrame assert type(krebs_matches), pandas.MultiIndex assert len(krebs_data) == 5749132 assert len(krebs_matches) == 20931
Example #16
Source File: From arctic with GNU Lesser General Public License v2.1 | 6 votes |
def multi_index_insert_row(df, index_row, values_row): """ Return a new dataframe with a row inserted for a multi-index dataframe. This will sort the rows according to the ordered multi-index levels. """ if PD_VER < '0.24.0': row_index = pd.MultiIndex(levels=[[i] for i in index_row], labels=[[0] for i in index_row]) else: row_index = pd.MultiIndex(levels=[[i] for i in index_row], codes=[[0] for i in index_row]) row = pd.DataFrame(values_row, index=row_index, columns=df.columns) df = pd.concat((df, row)) if df.index.lexsort_depth == len(index_row) and df.index[-2] < df.index[-1]: # We've just appended a row to an already-sorted dataframe return df # The df wasn't sorted or the row has to be put in the middle somewhere return df.sort_index()
Example #17
Source File: From respy with MIT License | 6 votes |
def _infer_choices_with_experience(params, options): """Infer choices with experiences. Example ------- >>> options = {"covariates": {"a": "exp_white_collar + exp_a", "b": "exp_b >= 2"}} >>> index = pd.MultiIndex.from_product([["category"], ["a", "b"]]) >>> params = pd.Series(index=index, dtype="object") >>> _infer_choices_with_experience(params, options) ['a', 'b', 'white_collar'] """ covariates = options["covariates"] parameters = params.index.get_level_values(1) used_covariates = [cov for cov in covariates if cov in parameters] matches = [] for param in parameters: matches += re.findall(r"\bexp_([A-Za-z_]+)\b", str(param)) for cov in used_covariates: matches += re.findall(r"\bexp_([A-Za-z_]+)\b", covariates[cov]) return sorted(set(matches))
Example #18
Source File: From arctic with GNU Lesser General Public License v2.1 | 5 votes |
def groupby_asof(df, as_of=None, dt_col='sample_dt', asof_col='observed_dt'): ''' Common use case for selecting the latest rows from a bitemporal dataframe as-of a certain date. Parameters ---------- df: ``pd.DataFrame`` Dataframe with a MultiIndex index as_of: ``datetime`` Return a timeseries with values observed <= this as-of date. By default, the latest observed values will be returned. dt_col: ``str`` or ``int`` Name or index of the column in the MultiIndex that is the sample date asof_col: ``str`` or ``int`` Name or index of the column in the MultiIndex that is the observed date ''' if as_of: if as_of.tzinfo is None and df.index.get_level_values(asof_col).tz is not None: as_of = as_of.replace(tzinfo=mktz()) return fancy_group_by(df, grouping_level=dt_col, aggregate_level=asof_col, method='last', max_=as_of) # ----------------------- Insert/Append ---------------------------- #
Example #19
Source File: From arctic with GNU Lesser General Public License v2.1 | 5 votes |
def test_save_read_pandas_empty_series_with_datetime_multiindex_with_timezone(library): try: # hack to support modern and older versions of pandas empty_index = pd.MultiIndex(levels=(pd.DatetimeIndex([], tz="America/Chicago"), pd.Index([])), codes=([], [])) except Exception: empty_index = pd.MultiIndex(levels=(pd.DatetimeIndex([], tz="America/Chicago"), pd.Index([])), labels=([], [])) df = Series(data=[], index=empty_index) library.write('pandas', df) saved_df ='pandas').data assert empty_index.equal_levels(saved_df.index), "Index timezone information should be maintained, even when empty"
Example #20
Source File: From pyTD with MIT License | 5 votes |
def test_batch_history_pandas(self): data =["AAPL", "TSLA", "MSFT"], output_format='pandas') assert isinstance(data, pd.DataFrame) assert isinstance(data.columns, pd.MultiIndex) assert "AAPL" in data.columns assert "TSLA" in data.columns assert "MSFT" in data.columns assert data.iloc[0] ==, 1, 2)
Example #21
Source File: From respy with MIT License | 5 votes |
def _create_tidy_data(data, moment_set_labels): """Create tidy data from list of pandas.DataFrames.""" counter = itertools.count() tidy_data = [] for series_or_df, label in zip(data, moment_set_labels): # Join index levels for MultiIndex objects. if isinstance(series_or_df.index, pd.MultiIndex): series_or_df = series_or_df.rename(index=str) series_or_df.index = series_or_df.index.to_flat_index().str.join("_") # If moments are a pandas.Series, convert into pandas.DataFrame. if isinstance(series_or_df, pd.Series): # Unnamed pandas.Series receive a name based on a counter. if is None: series_or_df = series_or_df.to_frame(name=next(counter)) else: series_or_df = series_or_df.to_frame() # Create pandas.DataFrame in tidy format. tidy_df = series_or_df.unstack() tidy_df.index.names = ("moment_column", "moment_index") tidy_df.rename("value", inplace=True) tidy_df = tidy_df.reset_index() tidy_df["moment_set"] = label tidy_data.append(tidy_df) return pd.concat(tidy_data, ignore_index=True)
Example #22
Source File: From recordlinkage with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_fit_predict_unsupervised(self, classifier): cl = classifier() result = cl.predict(self.X_train) assert isinstance(result, pd.MultiIndex) cl2 = classifier() expected = cl2.fit_predict(self.X_train) assert isinstance(expected, pd.MultiIndex) assert result.values.shape == expected.values.shape pdt.assert_index_equal(result, expected)
Example #23
Source File: From PoseWarper with Apache License 2.0 | 5 votes |
def new_event_dataframe(): """Create a new DataFrame for event tracking.""" idx = pd.MultiIndex(levels=[[],[]], labels=[[],[]], names=['FrameId','Event']) cats = pd.Categorical([], categories=['FP', 'MISS', 'SWITCH', 'MATCH']) df = pd.DataFrame( OrderedDict([ ('Type', pd.Series(cats)), # Type of event. One of FP (false positive), MISS, SWITCH, MATCH ('OId', pd.Series(dtype=str)), # Object ID or -1 if FP. Using float as missing values will be converted to NaN anyways. ('HId', pd.Series(dtype=str)), # Hypothesis ID or NaN if MISS. Using float as missing values will be converted to NaN anyways. ('D', pd.Series(dtype=float)), # Distance or NaN when FP or MISS ]), index=idx ) return df
Example #24
Source File: From recruit with Apache License 2.0 | 5 votes |
def test_copy_and_deepcopy(self, indices): from copy import copy, deepcopy if isinstance(indices, MultiIndex): pytest.skip('Skip check for MultiIndex') for func in (copy, deepcopy): idx_copy = func(indices) assert idx_copy is not indices assert idx_copy.equals(indices) new_copy = indices.copy(deep=True, name="banana") assert == "banana"
Example #25
Source File: From arctic with GNU Lesser General Public License v2.1 | 5 votes |
def test_save_read_pandas_series_with_multiindex_and_name(library): df = Series(data=['A', 'BC', 'DEF'], index=MultiIndex.from_tuples([(1, 2), (1, 3), (2, 2)]), name='Foo') library.write('pandas', df) saved_df ='pandas').data assert np.all(df.values == saved_df.values) assert == 'Foo'
Example #26
Source File: From arctic with GNU Lesser General Public License v2.1 | 5 votes |
def test_save_read_pandas_dataframe_with_multiindex(library): df = DataFrame(data=['A', 'BC', 'DEF'], index=MultiIndex.from_tuples([(1, 2), (1, 3), (2, 2)])) library.write('pandas', df) saved_df ='pandas').data assert np.all(df.values == saved_df.values)
Example #27
Source File: From arctic with GNU Lesser General Public License v2.1 | 5 votes |
def test_save_read_pandas_dataframe_with_unicode_index_name(library): df = DataFrame(data=['A', 'BC', 'DEF'], index=MultiIndex.from_tuples([(np.datetime64(dt(2013, 1, 1)),), (np.datetime64(dt(2013, 1, 2)),), (np.datetime64(dt(2013, 1, 3)),)], names=[u'DATETIME'])) library.write('pandas', df) saved_df ='pandas').data assert np.all(df.values == saved_df.values)
Example #28
Source File: From recruit with Apache License 2.0 | 5 votes |
def test_set_name_methods(self, indices): new_name = "This is the new name for this index" # don't tests a MultiIndex here (as its tested separated) if isinstance(indices, MultiIndex): pytest.skip('Skip check for MultiIndex') original_name = new_ind = indices.set_names([new_name]) assert == new_name assert == original_name res = indices.rename(new_name, inplace=True) # should return None assert res is None assert == new_name assert indices.names == [new_name] # with pytest.raises(TypeError, match="list-like"): # # should still fail even if it would be the right length # ind.set_names("a") with pytest.raises(ValueError, match="Level must be None"): indices.set_names("a", level=0) # rename in place just leaves tuples and other containers alone name = ('A', 'B') indices.rename(name, inplace=True) assert == name assert indices.names == [name]
Example #29
Source File: From recruit with Apache License 2.0 | 5 votes |
def test_to_flat_index(self, indices): # 22866 if isinstance(indices, MultiIndex): pytest.skip("Separate expectation for MultiIndex") result = indices.to_flat_index() tm.assert_index_equal(result, indices)
Example #30
Source File: From arctic with GNU Lesser General Public License v2.1 | 5 votes |
def test_save_read_pandas_series_with_multiindex(library): df = Series(data=['A', 'BC', 'DEF'], index=MultiIndex.from_tuples([(1, 2), (1, 3), (2, 2)])) library.write('pandas', df) saved_df ='pandas').data assert np.all(df.values == saved_df.values)