Python pandas.MultiIndex() Examples
The following are 30
code examples of pandas.MultiIndex().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
pandas
, or try the search function
.
Example #1
Source File: test_base.py From recruit with Apache License 2.0 | 6 votes |
def setup_method(self, method): self.indices = dict(unicodeIndex=tm.makeUnicodeIndex(100), strIndex=tm.makeStringIndex(100), dateIndex=tm.makeDateIndex(100), periodIndex=tm.makePeriodIndex(100), tdIndex=tm.makeTimedeltaIndex(100), intIndex=tm.makeIntIndex(100), uintIndex=tm.makeUIntIndex(100), rangeIndex=tm.makeRangeIndex(100), floatIndex=tm.makeFloatIndex(100), boolIndex=Index([True, False]), catIndex=tm.makeCategoricalIndex(100), empty=Index([]), tuples=MultiIndex.from_tuples(lzip( ['foo', 'bar', 'baz'], [1, 2, 3])), repeats=Index([0, 0, 1, 1, 2, 2])) self.setup_indices()
Example #2
Source File: test_common.py From recruit with Apache License 2.0 | 6 votes |
def test_constructor_non_hashable_name(self, indices): # GH 20527 if isinstance(indices, MultiIndex): pytest.skip("multiindex handled in test_multi.py") message = "Index.name must be a hashable type" renamed = [['1']] # With .rename() with pytest.raises(TypeError, match=message): indices.rename(name=renamed) # With .set_names() with pytest.raises(TypeError, match=message): indices.set_names(names=renamed)
Example #3
Source File: base.py From recordlinkage with BSD 3-Clause "New" or "Revised" License | 6 votes |
def _link_index(self, df_a, df_b): """Build an index for linking two datasets. Parameters ---------- df_a : (tuple of) pandas.Series The data of the left DataFrame to build the index with. df_b : (tuple of) pandas.Series The data of the right DataFrame to build the index with. Returns ------- pandas.MultiIndex A pandas.MultiIndex with record pairs. Each record pair contains the index values of two records. """ raise NotImplementedError( "Not possible to call index for the BaseEstimator" )
Example #4
Source File: test_common.py From recruit with Apache License 2.0 | 6 votes |
def test_droplevel(self, indices): # GH 21115 if isinstance(indices, MultiIndex): # Tested separately in test_multi.py return assert indices.droplevel([]).equals(indices) for level in indices.name, [indices.name]: if isinstance(indices.name, tuple) and level is indices.name: # GH 21121 : droplevel with tuple name continue with pytest.raises(ValueError): indices.droplevel(level) for level in 'wrong', ['wrong']: with pytest.raises(KeyError): indices.droplevel(level)
Example #5
Source File: test_common.py From recruit with Apache License 2.0 | 6 votes |
def test_duplicated(self, indices, keep): if not len(indices) or isinstance(indices, (MultiIndex, RangeIndex)): # MultiIndex tested separately in: # tests/indexes/multi/test_unique_and_duplicates pytest.skip('Skip check for empty Index, MultiIndex, RangeIndex') holder = type(indices) idx = holder(indices) if idx.has_duplicates: # We are testing the duplicated-method here, so we need to know # exactly which indices are duplicate and how (for the result). # This is not possible if "idx" has duplicates already, which we # therefore remove. This is seemingly circular, as drop_duplicates # invokes duplicated, but in the end, it all works out because we # cross-check with Series.duplicated, which is tested separately. idx = idx.drop_duplicates() n, k = len(idx), 10 duplicated_selection = np.random.choice(n, k * n) expected = pd.Series(duplicated_selection).duplicated(keep=keep).values idx = holder(idx.values[duplicated_selection]) result = idx.duplicated(keep=keep) tm.assert_numpy_array_equal(result, expected)
Example #6
Source File: base.py From recordlinkage with BSD 3-Clause "New" or "Revised" License | 6 votes |
def fit_predict(self, comparison_vectors, match_index=None): """Train the classifier. Parameters ---------- comparison_vectors : pandas.DataFrame The comparison vectors. match_index : pandas.MultiIndex The true matches. return_type : str Deprecated. Use recordlinkage.options instead. Use the option `recordlinkage.set_option('classification.return_type', 'index')` instead. Returns ------- pandas.Series A pandas Series with the labels 1 (for the matches) and 0 (for the non-matches). """ self.fit(comparison_vectors, match_index) result = self.predict(comparison_vectors) return result
Example #7
Source File: febrl.py From recordlinkage with BSD 3-Clause "New" or "Revised" License | 6 votes |
def _febrl_links(df): """Get the links of a FEBRL dataset.""" index = df.index.to_series() keys = index.str.extract(r'rec-(\d+)', expand=True)[0] index_int = numpy.arange(len(df)) df_helper = pandas.DataFrame({ 'key': keys, 'index': index_int }) # merge the two frame and make MultiIndex. pairs_df = df_helper.merge( df_helper, on='key' )[['index_x', 'index_y']] pairs_df = pairs_df[pairs_df['index_x'] > pairs_df['index_y']] return pandas.MultiIndex( levels=[df.index.values, df.index.values], codes=[pairs_df['index_x'].values, pairs_df['index_y'].values], names=[None, None], verify_integrity=False )
Example #8
Source File: test_pandas_store.py From arctic with GNU Lesser General Public License v2.1 | 6 votes |
def test_data_info_cols(library): i = MultiIndex.from_tuples([(1, "ab"), (2, "bb"), (3, "cb")]) s = DataFrame(data=[100, 200, 300], index=i) library.write('test_data', s) md = library.get_info('test_data') # {'dtype': [('level_0', '<i8'), ('level_1', 'S2'), ('0', '<i8')], # 'col_names': {u'index': [u'level_0', u'level_1'], u'columns': [u'0'], 'index_tz': [None, None]}, # 'type': u'pandasdf', # 'handler': 'PandasDataFrameStore', # 'rows': 3, # 'segment_count': 1, # 'size': 50} assert 'size' in md assert md['segment_count'] == 1 assert md['rows'] == 3 assert md['handler'] == 'PandasDataFrameStore' assert md['type'] == 'pandasdf' assert md['col_names'] == {'index': ['level_0', u'level_1'], 'columns': [u'0'], 'index_tz': [None, None]} assert len(md['dtype']) == 3 assert md['dtype'][0][0] == 'level_0' assert md['dtype'][1][0] == 'level_1' assert md['dtype'][2][0] == '0'
Example #9
Source File: measures.py From recordlinkage with BSD 3-Clause "New" or "Revised" License | 6 votes |
def true_positives(links_true, links_pred): """Count the number of True Positives. Returns the number of correctly predicted links, also called the number of True Positives (TP). Parameters ---------- links_true: pandas.MultiIndex, pandas.DataFrame, pandas.Series The true (or actual) links. links_pred: pandas.MultiIndex, pandas.DataFrame, pandas.Series The predicted links. Returns ------- int The number of correctly predicted links. """ links_true = _get_multiindex(links_true) links_pred = _get_multiindex(links_pred) return len(links_true & links_pred)
Example #10
Source File: measures.py From recordlinkage with BSD 3-Clause "New" or "Revised" License | 6 votes |
def false_positives(links_true, links_pred): """Count the number of False Positives. Returns the number of incorrect predictions of true non-links. (true non- links, but predicted as links). This value is known as the number of False Positives (FP). Parameters ---------- links_true: pandas.MultiIndex, pandas.DataFrame, pandas.Series The true (or actual) links. links_pred: pandas.MultiIndex, pandas.DataFrame, pandas.Series The predicted links. Returns ------- int The number of false positives. """ links_true = _get_multiindex(links_true) links_pred = _get_multiindex(links_pred) return len(links_pred.difference(links_true))
Example #11
Source File: measures.py From recordlinkage with BSD 3-Clause "New" or "Revised" License | 6 votes |
def false_negatives(links_true, links_pred): """Count the number of False Negatives. Returns the number of incorrect predictions of true links. (true links, but predicted as non-links). This value is known as the number of False Negatives (FN). Parameters ---------- links_true: pandas.MultiIndex, pandas.DataFrame, pandas.Series The true (or actual) links. links_pred: pandas.MultiIndex, pandas.DataFrame, pandas.Series The predicted links. Returns ------- int The number of false negatives. """ links_true = _get_multiindex(links_true) links_pred = _get_multiindex(links_pred) return len(links_true.difference(links_pred))
Example #12
Source File: test_indexing.py From recordlinkage with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_iterative(self): """Test the iterative behaviour.""" # SINGLE STEP index_class = Full() pairs = index_class.index((self.a, self.b)) pairs = pd.DataFrame(index=pairs).sort_index() # MULTI STEP index_class = Full() pairs1 = index_class.index((self.a[0:50], self.b)) pairs2 = index_class.index((self.a[50:100], self.b)) pairs_split = pairs1.append(pairs2) pairs_split = pd.DataFrame(index=pairs_split).sort_index() pdt.assert_frame_equal(pairs, pairs_split) # note possible to sort MultiIndex, so made a frame out of it.
Example #13
Source File: test_indexing.py From recordlinkage with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_index_names_pandas023(self, index_class): # Pandas changes the behaviour of MultiIndex names. # https://github.com/pandas-dev/pandas/pull/18882 # https://github.com/J535D165/recordlinkage/issues/55 # This test tests compatibility. # make an index for each dataframe with a new index name index_a = pd.Index(self.a.index, name='index') df_a = pd.DataFrame(self.a, index=index_a) index_b = pd.Index(self.b.index, name='index') df_b = pd.DataFrame(self.b, index=index_b) # make the index pairs_link = index_class._link_index(df_a, df_b) if pairs_link.names[0] is not None: assert pairs_link.names[0] != pairs_link.names[1] # make the index pairs_dedup = index_class._dedup_index(df_a) if pairs_link.names[0] is not None: assert pairs_dedup.names[0] != pairs_dedup.names[1]
Example #14
Source File: test_indexing.py From recordlinkage with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_lower_triangular(self, index_class): # make an index for each dataframe with a new index name index_a = pd.Index(self.a.index, name='index') df_a = pd.DataFrame(self.a, index=index_a) pairs = index_class.index(df_a) # expected levels = [df_a.index.values, df_a.index.values] codes = np.tril_indices(len(df_a.index), k=-1) full_pairs = pd.MultiIndex(levels=levels, codes=codes, verify_integrity=False) # all pairs are in the lower triangle of the matrix. assert len(pairs.difference(full_pairs)) == 0
Example #15
Source File: test_datasets.py From recordlinkage with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_krebs_dataset_download(): # remove downloaded datasets clear_data_home() krebs_data, krebs_matches = load_krebsregister() for i in range(1, 11): assert Path(get_data_home(), "krebsregister", "block_{}.zip".format(i)).is_file() # count the number of recordss assert type(krebs_data), pandas.DataFrame assert type(krebs_matches), pandas.MultiIndex assert len(krebs_data) == 5749132 assert len(krebs_matches) == 20931
Example #16
Source File: multi_index.py From arctic with GNU Lesser General Public License v2.1 | 6 votes |
def multi_index_insert_row(df, index_row, values_row): """ Return a new dataframe with a row inserted for a multi-index dataframe. This will sort the rows according to the ordered multi-index levels. """ if PD_VER < '0.24.0': row_index = pd.MultiIndex(levels=[[i] for i in index_row], labels=[[0] for i in index_row]) else: row_index = pd.MultiIndex(levels=[[i] for i in index_row], codes=[[0] for i in index_row]) row = pd.DataFrame(values_row, index=row_index, columns=df.columns) df = pd.concat((df, row)) if df.index.lexsort_depth == len(index_row) and df.index[-2] < df.index[-1]: # We've just appended a row to an already-sorted dataframe return df # The df wasn't sorted or the row has to be put in the middle somewhere return df.sort_index()
Example #17
Source File: model_processing.py From respy with MIT License | 6 votes |
def _infer_choices_with_experience(params, options): """Infer choices with experiences. Example ------- >>> options = {"covariates": {"a": "exp_white_collar + exp_a", "b": "exp_b >= 2"}} >>> index = pd.MultiIndex.from_product([["category"], ["a", "b"]]) >>> params = pd.Series(index=index, dtype="object") >>> _infer_choices_with_experience(params, options) ['a', 'b', 'white_collar'] """ covariates = options["covariates"] parameters = params.index.get_level_values(1) used_covariates = [cov for cov in covariates if cov in parameters] matches = [] for param in parameters: matches += re.findall(r"\bexp_([A-Za-z_]+)\b", str(param)) for cov in used_covariates: matches += re.findall(r"\bexp_([A-Za-z_]+)\b", covariates[cov]) return sorted(set(matches))
Example #18
Source File: multi_index.py From arctic with GNU Lesser General Public License v2.1 | 5 votes |
def groupby_asof(df, as_of=None, dt_col='sample_dt', asof_col='observed_dt'): ''' Common use case for selecting the latest rows from a bitemporal dataframe as-of a certain date. Parameters ---------- df: ``pd.DataFrame`` Dataframe with a MultiIndex index as_of: ``datetime`` Return a timeseries with values observed <= this as-of date. By default, the latest observed values will be returned. dt_col: ``str`` or ``int`` Name or index of the column in the MultiIndex that is the sample date asof_col: ``str`` or ``int`` Name or index of the column in the MultiIndex that is the observed date ''' if as_of: if as_of.tzinfo is None and df.index.get_level_values(asof_col).tz is not None: as_of = as_of.replace(tzinfo=mktz()) return fancy_group_by(df, grouping_level=dt_col, aggregate_level=asof_col, method='last', max_=as_of) # ----------------------- Insert/Append ---------------------------- #
Example #19
Source File: test_pandas_store.py From arctic with GNU Lesser General Public License v2.1 | 5 votes |
def test_save_read_pandas_empty_series_with_datetime_multiindex_with_timezone(library): try: # hack to support modern and older versions of pandas empty_index = pd.MultiIndex(levels=(pd.DatetimeIndex([], tz="America/Chicago"), pd.Index([])), codes=([], [])) except Exception: empty_index = pd.MultiIndex(levels=(pd.DatetimeIndex([], tz="America/Chicago"), pd.Index([])), labels=([], [])) df = Series(data=[], index=empty_index) library.write('pandas', df) saved_df = library.read('pandas').data assert empty_index.equal_levels(saved_df.index), "Index timezone information should be maintained, even when empty"
Example #20
Source File: test_market.py From pyTD with MIT License | 5 votes |
def test_batch_history_pandas(self): data = pyTD.market.get_price_history(["AAPL", "TSLA", "MSFT"], output_format='pandas') assert isinstance(data, pd.DataFrame) assert isinstance(data.columns, pd.MultiIndex) assert "AAPL" in data.columns assert "TSLA" in data.columns assert "MSFT" in data.columns assert data.iloc[0].name.date() == datetime.date(2018, 1, 2)
Example #21
Source File: method_of_simulated_moments.py From respy with MIT License | 5 votes |
def _create_tidy_data(data, moment_set_labels): """Create tidy data from list of pandas.DataFrames.""" counter = itertools.count() tidy_data = [] for series_or_df, label in zip(data, moment_set_labels): # Join index levels for MultiIndex objects. if isinstance(series_or_df.index, pd.MultiIndex): series_or_df = series_or_df.rename(index=str) series_or_df.index = series_or_df.index.to_flat_index().str.join("_") # If moments are a pandas.Series, convert into pandas.DataFrame. if isinstance(series_or_df, pd.Series): # Unnamed pandas.Series receive a name based on a counter. if series_or_df.name is None: series_or_df = series_or_df.to_frame(name=next(counter)) else: series_or_df = series_or_df.to_frame() # Create pandas.DataFrame in tidy format. tidy_df = series_or_df.unstack() tidy_df.index.names = ("moment_column", "moment_index") tidy_df.rename("value", inplace=True) tidy_df = tidy_df.reset_index() tidy_df["moment_set"] = label tidy_data.append(tidy_df) return pd.concat(tidy_data, ignore_index=True)
Example #22
Source File: test_classify.py From recordlinkage with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_fit_predict_unsupervised(self, classifier): cl = classifier() cl.fit(self.X_train) result = cl.predict(self.X_train) assert isinstance(result, pd.MultiIndex) cl2 = classifier() expected = cl2.fit_predict(self.X_train) assert isinstance(expected, pd.MultiIndex) assert result.values.shape == expected.values.shape pdt.assert_index_equal(result, expected)
Example #23
Source File: mot.py From PoseWarper with Apache License 2.0 | 5 votes |
def new_event_dataframe(): """Create a new DataFrame for event tracking.""" idx = pd.MultiIndex(levels=[[],[]], labels=[[],[]], names=['FrameId','Event']) cats = pd.Categorical([], categories=['FP', 'MISS', 'SWITCH', 'MATCH']) df = pd.DataFrame( OrderedDict([ ('Type', pd.Series(cats)), # Type of event. One of FP (false positive), MISS, SWITCH, MATCH ('OId', pd.Series(dtype=str)), # Object ID or -1 if FP. Using float as missing values will be converted to NaN anyways. ('HId', pd.Series(dtype=str)), # Hypothesis ID or NaN if MISS. Using float as missing values will be converted to NaN anyways. ('D', pd.Series(dtype=float)), # Distance or NaN when FP or MISS ]), index=idx ) return df
Example #24
Source File: test_common.py From recruit with Apache License 2.0 | 5 votes |
def test_copy_and_deepcopy(self, indices): from copy import copy, deepcopy if isinstance(indices, MultiIndex): pytest.skip('Skip check for MultiIndex') for func in (copy, deepcopy): idx_copy = func(indices) assert idx_copy is not indices assert idx_copy.equals(indices) new_copy = indices.copy(deep=True, name="banana") assert new_copy.name == "banana"
Example #25
Source File: test_pandas_store.py From arctic with GNU Lesser General Public License v2.1 | 5 votes |
def test_save_read_pandas_series_with_multiindex_and_name(library): df = Series(data=['A', 'BC', 'DEF'], index=MultiIndex.from_tuples([(1, 2), (1, 3), (2, 2)]), name='Foo') library.write('pandas', df) saved_df = library.read('pandas').data assert np.all(df.values == saved_df.values) assert df.name == 'Foo'
Example #26
Source File: test_pandas_store.py From arctic with GNU Lesser General Public License v2.1 | 5 votes |
def test_save_read_pandas_dataframe_with_multiindex(library): df = DataFrame(data=['A', 'BC', 'DEF'], index=MultiIndex.from_tuples([(1, 2), (1, 3), (2, 2)])) library.write('pandas', df) saved_df = library.read('pandas').data assert np.all(df.values == saved_df.values)
Example #27
Source File: test_pandas_store.py From arctic with GNU Lesser General Public License v2.1 | 5 votes |
def test_save_read_pandas_dataframe_with_unicode_index_name(library): df = DataFrame(data=['A', 'BC', 'DEF'], index=MultiIndex.from_tuples([(np.datetime64(dt(2013, 1, 1)),), (np.datetime64(dt(2013, 1, 2)),), (np.datetime64(dt(2013, 1, 3)),)], names=[u'DATETIME'])) library.write('pandas', df) saved_df = library.read('pandas').data assert np.all(df.values == saved_df.values)
Example #28
Source File: test_common.py From recruit with Apache License 2.0 | 5 votes |
def test_set_name_methods(self, indices): new_name = "This is the new name for this index" # don't tests a MultiIndex here (as its tested separated) if isinstance(indices, MultiIndex): pytest.skip('Skip check for MultiIndex') original_name = indices.name new_ind = indices.set_names([new_name]) assert new_ind.name == new_name assert indices.name == original_name res = indices.rename(new_name, inplace=True) # should return None assert res is None assert indices.name == new_name assert indices.names == [new_name] # with pytest.raises(TypeError, match="list-like"): # # should still fail even if it would be the right length # ind.set_names("a") with pytest.raises(ValueError, match="Level must be None"): indices.set_names("a", level=0) # rename in place just leaves tuples and other containers alone name = ('A', 'B') indices.rename(name, inplace=True) assert indices.name == name assert indices.names == [name]
Example #29
Source File: test_common.py From recruit with Apache License 2.0 | 5 votes |
def test_to_flat_index(self, indices): # 22866 if isinstance(indices, MultiIndex): pytest.skip("Separate expectation for MultiIndex") result = indices.to_flat_index() tm.assert_index_equal(result, indices)
Example #30
Source File: test_pandas_store.py From arctic with GNU Lesser General Public License v2.1 | 5 votes |
def test_save_read_pandas_series_with_multiindex(library): df = Series(data=['A', 'BC', 'DEF'], index=MultiIndex.from_tuples([(1, 2), (1, 3), (2, 2)])) library.write('pandas', df) saved_df = library.read('pandas').data assert np.all(df.values == saved_df.values)