Python pandas.merge_asof() Examples
The following are 30
code examples of pandas.merge_asof().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
pandas
, or try the search function
.
Example #1
Source File: test_merge_asof.py From recruit with Apache License 2.0 | 6 votes |
def test_multi_index(self): # MultiIndex is prohibited trades = self.trades.set_index(['time', 'price']) quotes = self.quotes.set_index('time') with pytest.raises(MergeError): merge_asof(trades, quotes, left_index=True, right_index=True) trades = self.trades.set_index('time') quotes = self.quotes.set_index(['time', 'bid']) with pytest.raises(MergeError): merge_asof(trades, quotes, left_index=True, right_index=True)
Example #2
Source File: test_merge_asof.py From recruit with Apache License 2.0 | 6 votes |
def test_forward_by(self): # GH14887 left = pd.DataFrame({'a': [1, 5, 10, 12, 15], 'b': ['X', 'X', 'Y', 'Z', 'Y'], 'left_val': ['a', 'b', 'c', 'd', 'e']}) right = pd.DataFrame({'a': [1, 6, 11, 15, 16], 'b': ['X', 'Z', 'Y', 'Z', 'Y'], 'right_val': [1, 6, 11, 15, 16]}) expected = pd.DataFrame({'a': [1, 5, 10, 12, 15], 'b': ['X', 'X', 'Y', 'Z', 'Y'], 'left_val': ['a', 'b', 'c', 'd', 'e'], 'right_val': [1, np.nan, 11, 15, 16]}) result = pd.merge_asof(left, right, on='a', by='b', direction='forward') assert_frame_equal(result, expected)
Example #3
Source File: test_merge_asof.py From recruit with Apache License 2.0 | 6 votes |
def test_nearest_by(self): # GH14887 left = pd.DataFrame({'a': [1, 5, 10, 12, 15], 'b': ['X', 'X', 'Z', 'Z', 'Y'], 'left_val': ['a', 'b', 'c', 'd', 'e']}) right = pd.DataFrame({'a': [1, 6, 11, 15, 16], 'b': ['X', 'Z', 'Z', 'Z', 'Y'], 'right_val': [1, 6, 11, 15, 16]}) expected = pd.DataFrame({'a': [1, 5, 10, 12, 15], 'b': ['X', 'X', 'Z', 'Z', 'Y'], 'left_val': ['a', 'b', 'c', 'd', 'e'], 'right_val': [1, 1, 11, 11, 16]}) result = pd.merge_asof(left, right, on='a', by='b', direction='nearest') assert_frame_equal(result, expected)
Example #4
Source File: test_merge_asof.py From vnpy_crypto with MIT License | 6 votes |
def test_on_and_index(self): # 'on' parameter and index together is prohibited trades = self.trades.set_index('time') quotes = self.quotes.set_index('time') with pytest.raises(MergeError): merge_asof(trades, quotes, left_on='price', left_index=True, right_index=True) trades = self.trades.set_index('time') quotes = self.quotes.set_index('time') with pytest.raises(MergeError): merge_asof(trades, quotes, right_on='bid', left_index=True, right_index=True)
Example #5
Source File: test_merge_asof.py From vnpy_crypto with MIT License | 6 votes |
def test_allow_exact_matches_and_tolerance3(self): # GH 13709 df1 = pd.DataFrame({ 'time': pd.to_datetime(['2016-07-15 13:30:00.030', '2016-07-15 13:30:00.030']), 'username': ['bob', 'charlie']}) df2 = pd.DataFrame({ 'time': pd.to_datetime(['2016-07-15 13:30:00.000', '2016-07-15 13:30:00.030']), 'version': [1, 2]}) result = pd.merge_asof(df1, df2, on='time', allow_exact_matches=False, tolerance=pd.Timedelta('10ms')) expected = pd.DataFrame({ 'time': pd.to_datetime(['2016-07-15 13:30:00.030', '2016-07-15 13:30:00.030']), 'username': ['bob', 'charlie'], 'version': [np.nan, np.nan]}) assert_frame_equal(result, expected)
Example #6
Source File: test_merge_asof.py From recruit with Apache License 2.0 | 6 votes |
def test_on_float(self): # mimics how to determine the minimum-price variation df1 = pd.DataFrame({ 'price': [5.01, 0.0023, 25.13, 340.05, 30.78, 1040.90, 0.0078], 'symbol': list("ABCDEFG")}, columns=['symbol', 'price']) df2 = pd.DataFrame({ 'price': [0.0, 1.0, 100.0], 'mpv': [0.0001, 0.01, 0.05]}, columns=['price', 'mpv']) df1 = df1.sort_values('price').reset_index(drop=True) result = pd.merge_asof(df1, df2, on='price') expected = pd.DataFrame({ 'symbol': list("BGACEDF"), 'price': [0.0023, 0.0078, 5.01, 25.13, 30.78, 340.05, 1040.90], 'mpv': [0.0001, 0.0001, 0.01, 0.01, 0.01, 0.05, 0.05]}, columns=['symbol', 'price', 'mpv']) assert_frame_equal(result, expected)
Example #7
Source File: test_merge_asof.py From vnpy_crypto with MIT License | 6 votes |
def test_forward_by(self): # GH14887 left = pd.DataFrame({'a': [1, 5, 10, 12, 15], 'b': ['X', 'X', 'Y', 'Z', 'Y'], 'left_val': ['a', 'b', 'c', 'd', 'e']}) right = pd.DataFrame({'a': [1, 6, 11, 15, 16], 'b': ['X', 'Z', 'Y', 'Z', 'Y'], 'right_val': [1, 6, 11, 15, 16]}) expected = pd.DataFrame({'a': [1, 5, 10, 12, 15], 'b': ['X', 'X', 'Y', 'Z', 'Y'], 'left_val': ['a', 'b', 'c', 'd', 'e'], 'right_val': [1, np.nan, 11, 15, 16]}) result = pd.merge_asof(left, right, on='a', by='b', direction='forward') assert_frame_equal(result, expected)
Example #8
Source File: test_merge_asof.py From vnpy_crypto with MIT License | 6 votes |
def test_nearest_by(self): # GH14887 left = pd.DataFrame({'a': [1, 5, 10, 12, 15], 'b': ['X', 'X', 'Z', 'Z', 'Y'], 'left_val': ['a', 'b', 'c', 'd', 'e']}) right = pd.DataFrame({'a': [1, 6, 11, 15, 16], 'b': ['X', 'Z', 'Z', 'Z', 'Y'], 'right_val': [1, 6, 11, 15, 16]}) expected = pd.DataFrame({'a': [1, 5, 10, 12, 15], 'b': ['X', 'X', 'Z', 'Z', 'Y'], 'left_val': ['a', 'b', 'c', 'd', 'e'], 'right_val': [1, 1, 11, 11, 16]}) result = pd.merge_asof(left, right, on='a', by='b', direction='nearest') assert_frame_equal(result, expected)
Example #9
Source File: test_merge_asof.py From recruit with Apache License 2.0 | 6 votes |
def test_on_and_index(self): # 'on' parameter and index together is prohibited trades = self.trades.set_index('time') quotes = self.quotes.set_index('time') with pytest.raises(MergeError): merge_asof(trades, quotes, left_on='price', left_index=True, right_index=True) trades = self.trades.set_index('time') quotes = self.quotes.set_index('time') with pytest.raises(MergeError): merge_asof(trades, quotes, right_on='bid', left_index=True, right_index=True)
Example #10
Source File: test_merge_asof.py From vnpy_crypto with MIT License | 6 votes |
def test_multi_index(self): # MultiIndex is prohibited trades = self.trades.set_index(['time', 'price']) quotes = self.quotes.set_index('time') with pytest.raises(MergeError): merge_asof(trades, quotes, left_index=True, right_index=True) trades = self.trades.set_index('time') quotes = self.quotes.set_index(['time', 'bid']) with pytest.raises(MergeError): merge_asof(trades, quotes, left_index=True, right_index=True)
Example #11
Source File: test_merge_asof.py From vnpy_crypto with MIT License | 6 votes |
def test_on_float(self): # mimics how to determine the minimum-price variation df1 = pd.DataFrame({ 'price': [5.01, 0.0023, 25.13, 340.05, 30.78, 1040.90, 0.0078], 'symbol': list("ABCDEFG")}, columns=['symbol', 'price']) df2 = pd.DataFrame({ 'price': [0.0, 1.0, 100.0], 'mpv': [0.0001, 0.01, 0.05]}, columns=['price', 'mpv']) df1 = df1.sort_values('price').reset_index(drop=True) result = pd.merge_asof(df1, df2, on='price') expected = pd.DataFrame({ 'symbol': list("BGACEDF"), 'price': [0.0023, 0.0078, 5.01, 25.13, 30.78, 340.05, 1040.90], 'mpv': [0.0001, 0.0001, 0.01, 0.01, 0.01, 0.05, 0.05]}, columns=['symbol', 'price', 'mpv']) assert_frame_equal(result, expected)
Example #12
Source File: test_merge_asof.py From recruit with Apache License 2.0 | 6 votes |
def test_allow_exact_matches_and_tolerance3(self): # GH 13709 df1 = pd.DataFrame({ 'time': pd.to_datetime(['2016-07-15 13:30:00.030', '2016-07-15 13:30:00.030']), 'username': ['bob', 'charlie']}) df2 = pd.DataFrame({ 'time': pd.to_datetime(['2016-07-15 13:30:00.000', '2016-07-15 13:30:00.030']), 'version': [1, 2]}) result = pd.merge_asof(df1, df2, on='time', allow_exact_matches=False, tolerance=pd.Timedelta('10ms')) expected = pd.DataFrame({ 'time': pd.to_datetime(['2016-07-15 13:30:00.030', '2016-07-15 13:30:00.030']), 'username': ['bob', 'charlie'], 'version': [np.nan, np.nan]}) assert_frame_equal(result, expected)
Example #13
Source File: test_merge_asof.py From vnpy_crypto with MIT License | 6 votes |
def test_tolerance_tz(self): # GH 14844 left = pd.DataFrame( {'date': pd.DatetimeIndex(start=pd.to_datetime('2016-01-02'), freq='D', periods=5, tz=pytz.timezone('UTC')), 'value1': np.arange(5)}) right = pd.DataFrame( {'date': pd.DatetimeIndex(start=pd.to_datetime('2016-01-01'), freq='D', periods=5, tz=pytz.timezone('UTC')), 'value2': list("ABCDE")}) result = pd.merge_asof(left, right, on='date', tolerance=pd.Timedelta('1 day')) expected = pd.DataFrame( {'date': pd.DatetimeIndex(start=pd.to_datetime('2016-01-02'), freq='D', periods=5, tz=pytz.timezone('UTC')), 'value1': np.arange(5), 'value2': list("BCDEE")}) assert_frame_equal(result, expected)
Example #14
Source File: test_merge_asof.py From recruit with Apache License 2.0 | 6 votes |
def test_tolerance_tz(self): # GH 14844 left = pd.DataFrame( {'date': pd.date_range(start=pd.to_datetime('2016-01-02'), freq='D', periods=5, tz=pytz.timezone('UTC')), 'value1': np.arange(5)}) right = pd.DataFrame( {'date': pd.date_range(start=pd.to_datetime('2016-01-01'), freq='D', periods=5, tz=pytz.timezone('UTC')), 'value2': list("ABCDE")}) result = pd.merge_asof(left, right, on='date', tolerance=pd.Timedelta('1 day')) expected = pd.DataFrame( {'date': pd.date_range(start=pd.to_datetime('2016-01-02'), freq='D', periods=5, tz=pytz.timezone('UTC')), 'value1': np.arange(5), 'value2': list("BCDEE")}) assert_frame_equal(result, expected)
Example #15
Source File: test_merge_asof.py From vnpy_crypto with MIT License | 5 votes |
def test_basic_no_by(self): f = lambda x: x[x.ticker == 'MSFT'].drop('ticker', axis=1) \ .reset_index(drop=True) # just use a single ticker expected = f(self.asof) trades = f(self.trades) quotes = f(self.quotes) result = merge_asof(trades, quotes, on='time') assert_frame_equal(result, expected)
Example #16
Source File: test_merge_asof.py From vnpy_crypto with MIT License | 5 votes |
def test_with_duplicates(self, datapath): q = pd.concat([self.quotes, self.quotes]).sort_values( ['time', 'ticker']).reset_index(drop=True) result = merge_asof(self.trades, q, on='time', by='ticker') expected = self.read_data(datapath, 'asof.csv') assert_frame_equal(result, expected)
Example #17
Source File: test_merge_asof.py From vnpy_crypto with MIT License | 5 votes |
def test_allow_exact_matches_and_tolerance2(self): # GH 13695 df1 = pd.DataFrame({ 'time': pd.to_datetime(['2016-07-15 13:30:00.030']), 'username': ['bob']}) df2 = pd.DataFrame({ 'time': pd.to_datetime(['2016-07-15 13:30:00.000', '2016-07-15 13:30:00.030']), 'version': [1, 2]}) result = pd.merge_asof(df1, df2, on='time') expected = pd.DataFrame({ 'time': pd.to_datetime(['2016-07-15 13:30:00.030']), 'username': ['bob'], 'version': [2]}) assert_frame_equal(result, expected) result = pd.merge_asof(df1, df2, on='time', allow_exact_matches=False) expected = pd.DataFrame({ 'time': pd.to_datetime(['2016-07-15 13:30:00.030']), 'username': ['bob'], 'version': [1]}) assert_frame_equal(result, expected) result = pd.merge_asof(df1, df2, on='time', allow_exact_matches=False, tolerance=pd.Timedelta('10ms')) expected = pd.DataFrame({ 'time': pd.to_datetime(['2016-07-15 13:30:00.030']), 'username': ['bob'], 'version': [np.nan]}) assert_frame_equal(result, expected)
Example #18
Source File: test_merge_asof.py From vnpy_crypto with MIT License | 5 votes |
def test_allow_exact_matches_and_tolerance_forward(self): # GH14887 left = pd.DataFrame({'a': [1, 5, 10], 'left_val': ['a', 'b', 'c']}) right = pd.DataFrame({'a': [1, 3, 4, 6, 11], 'right_val': [1, 3, 4, 6, 11]}) expected = pd.DataFrame({'a': [1, 5, 10], 'left_val': ['a', 'b', 'c'], 'right_val': [np.nan, 6, 11]}) result = pd.merge_asof(left, right, on='a', direction='forward', allow_exact_matches=False, tolerance=1) assert_frame_equal(result, expected)
Example #19
Source File: test_merge_asof.py From vnpy_crypto with MIT License | 5 votes |
def test_on_specialized_type(self): # GH13936 for dtype in [np.uint8, np.uint16, np.uint32, np.uint64, np.int8, np.int16, np.int32, np.int64, np.float16, np.float32, np.float64]: df1 = pd.DataFrame({ 'value': [5, 2, 25, 100, 78, 120, 79], 'symbol': list("ABCDEFG")}, columns=['symbol', 'value']) df1.value = dtype(df1.value) df2 = pd.DataFrame({ 'value': [0, 80, 120, 125], 'result': list('xyzw')}, columns=['value', 'result']) df2.value = dtype(df2.value) df1 = df1.sort_values('value').reset_index(drop=True) if dtype == np.float16: with pytest.raises(MergeError): pd.merge_asof(df1, df2, on='value') continue result = pd.merge_asof(df1, df2, on='value') expected = pd.DataFrame( {'symbol': list("BACEGDF"), 'value': [2, 5, 25, 78, 79, 100, 120], 'result': list('xxxxxyz') }, columns=['symbol', 'value', 'result']) expected.value = dtype(expected.value) assert_frame_equal(result, expected)
Example #20
Source File: test_merge_asof.py From vnpy_crypto with MIT License | 5 votes |
def test_by_int(self): # we specialize by type, so test that this is correct df1 = pd.DataFrame({ 'time': pd.to_datetime(['20160525 13:30:00.020', '20160525 13:30:00.030', '20160525 13:30:00.040', '20160525 13:30:00.050', '20160525 13:30:00.060']), 'key': [1, 2, 1, 3, 2], 'value1': [1.1, 1.2, 1.3, 1.4, 1.5]}, columns=['time', 'key', 'value1']) df2 = pd.DataFrame({ 'time': pd.to_datetime(['20160525 13:30:00.015', '20160525 13:30:00.020', '20160525 13:30:00.025', '20160525 13:30:00.035', '20160525 13:30:00.040', '20160525 13:30:00.055', '20160525 13:30:00.060', '20160525 13:30:00.065']), 'key': [2, 1, 1, 3, 2, 1, 2, 3], 'value2': [2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 2.8]}, columns=['time', 'key', 'value2']) result = pd.merge_asof(df1, df2, on='time', by='key') expected = pd.DataFrame({ 'time': pd.to_datetime(['20160525 13:30:00.020', '20160525 13:30:00.030', '20160525 13:30:00.040', '20160525 13:30:00.050', '20160525 13:30:00.060']), 'key': [1, 2, 1, 3, 2], 'value1': [1.1, 1.2, 1.3, 1.4, 1.5], 'value2': [2.2, 2.1, 2.3, 2.4, 2.7]}, columns=['time', 'key', 'value1', 'value2']) assert_frame_equal(result, expected)
Example #21
Source File: test_merge_asof.py From vnpy_crypto with MIT License | 5 votes |
def test_on_float_by_int(self): # type specialize both "by" and "on" parameters df1 = pd.DataFrame({ 'symbol': list("AAABBBCCC"), 'exch': [1, 2, 3, 1, 2, 3, 1, 2, 3], 'price': [3.26, 3.2599, 3.2598, 12.58, 12.59, 12.5, 378.15, 378.2, 378.25]}, columns=['symbol', 'exch', 'price']) df2 = pd.DataFrame({ 'exch': [1, 1, 1, 2, 2, 2, 3, 3, 3], 'price': [0.0, 1.0, 100.0, 0.0, 5.0, 100.0, 0.0, 5.0, 1000.0], 'mpv': [0.0001, 0.01, 0.05, 0.0001, 0.01, 0.1, 0.0001, 0.25, 1.0]}, columns=['exch', 'price', 'mpv']) df1 = df1.sort_values('price').reset_index(drop=True) df2 = df2.sort_values('price').reset_index(drop=True) result = pd.merge_asof(df1, df2, on='price', by='exch') expected = pd.DataFrame({ 'symbol': list("AAABBBCCC"), 'exch': [3, 2, 1, 3, 1, 2, 1, 2, 3], 'price': [3.2598, 3.2599, 3.26, 12.5, 12.58, 12.59, 378.15, 378.2, 378.25], 'mpv': [0.0001, 0.0001, 0.01, 0.25, 0.01, 0.01, 0.05, 0.1, 0.25]}, columns=['symbol', 'exch', 'price', 'mpv']) assert_frame_equal(result, expected)
Example #22
Source File: test_merge_asof.py From vnpy_crypto with MIT License | 5 votes |
def test_tolerance_forward(self): # GH14887 left = pd.DataFrame({'a': [1, 5, 10], 'left_val': ['a', 'b', 'c']}) right = pd.DataFrame({'a': [1, 2, 3, 7, 11], 'right_val': [1, 2, 3, 7, 11]}) expected = pd.DataFrame({'a': [1, 5, 10], 'left_val': ['a', 'b', 'c'], 'right_val': [1, np.nan, 11]}) result = pd.merge_asof(left, right, on='a', direction='forward', tolerance=1) assert_frame_equal(result, expected)
Example #23
Source File: test_merge_asof.py From vnpy_crypto with MIT License | 5 votes |
def test_basic2(self, datapath): expected = self.read_data(datapath, 'asof2.csv') trades = self.read_data(datapath, 'trades2.csv') quotes = self.read_data(datapath, 'quotes2.csv', dedupe=True) result = merge_asof(trades, quotes, on='time', by='ticker') assert_frame_equal(result, expected)
Example #24
Source File: test_merge_asof.py From vnpy_crypto with MIT License | 5 votes |
def test_multiby_indexed(self): # GH15676 left = pd.DataFrame([ [pd.to_datetime('20160602'), 1, 'a'], [pd.to_datetime('20160602'), 2, 'a'], [pd.to_datetime('20160603'), 1, 'b'], [pd.to_datetime('20160603'), 2, 'b']], columns=['time', 'k1', 'k2']).set_index('time') right = pd.DataFrame([ [pd.to_datetime('20160502'), 1, 'a', 1.0], [pd.to_datetime('20160502'), 2, 'a', 2.0], [pd.to_datetime('20160503'), 1, 'b', 3.0], [pd.to_datetime('20160503'), 2, 'b', 4.0]], columns=['time', 'k1', 'k2', 'value']).set_index('time') expected = pd.DataFrame([ [pd.to_datetime('20160602'), 1, 'a', 1.0], [pd.to_datetime('20160602'), 2, 'a', 2.0], [pd.to_datetime('20160603'), 1, 'b', 3.0], [pd.to_datetime('20160603'), 2, 'b', 4.0]], columns=['time', 'k1', 'k2', 'value']).set_index('time') result = pd.merge_asof(left, right, left_index=True, right_index=True, by=['k1', 'k2']) assert_frame_equal(expected, result) with pytest.raises(MergeError): pd.merge_asof(left, right, left_index=True, right_index=True, left_by=['k1', 'k2'], right_by=['k1'])
Example #25
Source File: test_merge_asof.py From vnpy_crypto with MIT License | 5 votes |
def test_basic_left_by_right_by(self): # GH14253 expected = self.asof trades = self.trades quotes = self.quotes result = merge_asof(trades, quotes, on='time', left_by='ticker', right_by='ticker') assert_frame_equal(result, expected)
Example #26
Source File: test_merge_asof.py From vnpy_crypto with MIT License | 5 votes |
def test_basic_left_index_right_index(self): expected = self.asof.set_index('time') trades = self.trades.set_index('time') quotes = self.quotes.set_index('time') result = merge_asof(trades, quotes, left_index=True, right_index=True, by='ticker') assert_frame_equal(result, expected)
Example #27
Source File: test_merge_asof.py From vnpy_crypto with MIT License | 5 votes |
def test_basic_right_index(self): expected = self.asof trades = self.trades quotes = self.quotes.set_index('time') result = merge_asof(trades, quotes, left_on='time', right_index=True, by='ticker') assert_frame_equal(result, expected)
Example #28
Source File: test_merge_asof.py From vnpy_crypto with MIT License | 5 votes |
def test_basic_categorical(self): expected = self.asof trades = self.trades.copy() trades.ticker = trades.ticker.astype('category') quotes = self.quotes.copy() quotes.ticker = quotes.ticker.astype('category') expected.ticker = expected.ticker.astype('category') result = merge_asof(trades, quotes, on='time', by='ticker') assert_frame_equal(result, expected)
Example #29
Source File: test_merge_asof.py From vnpy_crypto with MIT License | 5 votes |
def test_basic(self): expected = self.asof trades = self.trades quotes = self.quotes result = merge_asof(trades, quotes, on='time', by='ticker') assert_frame_equal(result, expected)
Example #30
Source File: test_merge_asof.py From vnpy_crypto with MIT License | 5 votes |
def test_examples4(self): """ doc-string examples """ # GH14887 left = pd.DataFrame({'a': [1, 5, 10], 'left_val': ['a', 'b', 'c']}) right = pd.DataFrame({'a': [1, 2, 3, 6, 7], 'right_val': [1, 2, 3, 6, 7]}) expected = pd.DataFrame({'a': [1, 5, 10], 'left_val': ['a', 'b', 'c'], 'right_val': [1, 6, 7]}) result = pd.merge_asof(left, right, on='a', direction='nearest') assert_frame_equal(result, expected)