Python toolz.groupby() Examples
The following are 9
code examples of toolz.groupby().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
toolz
, or try the search function
.
Example #1
Source File: peer.py From pyquarkchain with MIT License | 5 votes |
def is_valid_connection_candidate(self, candidate: Node) -> bool: # connect to no more then 2 nodes with the same IP nodes_by_ip = groupby( operator.attrgetter("remote.address.ip"), self.connected_nodes.values() ) matching_ip_nodes = nodes_by_ip.get(candidate.address.ip, []) return len(matching_ip_nodes) <= 2
Example #2
Source File: asset_restrictions.py From pylivetrader with Apache License 2.0 | 5 votes |
def __init__(self, restrictions): # A dict mapping each asset to its restrictions, which are sorted by # ascending order of effective_date self._restrictions_by_asset = { asset: sorted( restrictions_for_asset, key=lambda x: x.effective_date ) for asset, restrictions_for_asset in iteritems(groupby(lambda x: x.asset, restrictions)) }
Example #3
Source File: base_network.py From cloudformation-environmentbase with BSD 2-Clause "Simplified" License | 5 votes |
def _get_subnet_config_w_cidr(self, network_config): network_cidr_base = str(network_config.get('network_cidr_base', '172.16.0.0')) network_cidr_size = str(network_config.get('network_cidr_size', '20')) first_network_address_block = str(network_config.get('first_network_address_block', network_cidr_base)) ret_val = {} base_cidr = network_cidr_base + '/' + network_cidr_size net = netaddr.IPNetwork(base_cidr) grouped_subnet = groupby('size', self._get_subnet_config_w_az(network_config)) subnet_groups = sorted(grouped_subnet.items()) available_cidrs = [] for subnet_size, subnet_configs in subnet_groups: newcidrs = net.subnet(int(subnet_size)) for subnet_config in subnet_configs: try: cidr = newcidrs.next() except StopIteration as e: net = chain(*reversed(available_cidrs)).next() newcidrs = net.subnet(int(subnet_size)) cidr = newcidrs.next() new_config = assoc(subnet_config, 'cidr', str(cidr)) yield new_config else: net = newcidrs.next() available_cidrs.append(newcidrs)
Example #4
Source File: earnings_estimates.py From catalyst with Apache License 2.0 | 5 votes |
def get_zeroth_quarter_idx(self, stacked_last_per_qtr): """ Filters for releases that are on or after each simulation date and determines the next quarter by picking out the upcoming release for each date in the index. Parameters ---------- stacked_last_per_qtr : pd.DataFrame A DataFrame with index of calendar dates, sid, and normalized quarters with each row being the latest estimate for the row's index values, sorted by event date. Returns ------- next_releases_per_date_index : pd.MultiIndex An index of calendar dates, sid, and normalized quarters, for only the rows that have a next event. """ next_releases_per_date = stacked_last_per_qtr.loc[ stacked_last_per_qtr[EVENT_DATE_FIELD_NAME] >= stacked_last_per_qtr.index.get_level_values(SIMULATION_DATES) ].groupby( level=[SIMULATION_DATES, SID_FIELD_NAME], as_index=False, # Here we take advantage of the fact that `stacked_last_per_qtr` is # sorted by event date. ).nth(0) return next_releases_per_date.index
Example #5
Source File: earnings_estimates.py From catalyst with Apache License 2.0 | 5 votes |
def get_zeroth_quarter_idx(self, stacked_last_per_qtr): """ Filters for releases that are on or after each simulation date and determines the previous quarter by picking out the most recent release relative to each date in the index. Parameters ---------- stacked_last_per_qtr : pd.DataFrame A DataFrame with index of calendar dates, sid, and normalized quarters with each row being the latest estimate for the row's index values, sorted by event date. Returns ------- previous_releases_per_date_index : pd.MultiIndex An index of calendar dates, sid, and normalized quarters, for only the rows that have a previous event. """ previous_releases_per_date = stacked_last_per_qtr.loc[ stacked_last_per_qtr[EVENT_DATE_FIELD_NAME] <= stacked_last_per_qtr.index.get_level_values(SIMULATION_DATES) ].groupby( level=[SIMULATION_DATES, SID_FIELD_NAME], as_index=False, # Here we take advantage of the fact that `stacked_last_per_qtr` is # sorted by event date. ).nth(-1) return previous_releases_per_date.index
Example #6
Source File: events.py From catalyst with Apache License 2.0 | 5 votes |
def split_next_and_previous_event_columns(self, requested_columns): """ Split requested columns into columns that should load the next known value and columns that should load the previous known value. Parameters ---------- requested_columns : iterable[BoundColumn] Returns ------- next_cols, previous_cols : iterable[BoundColumn], iterable[BoundColumn] ``requested_columns``, partitioned into sub-sequences based on whether the column should produce values from the next event or the previous event """ def next_or_previous(c): if c in self.next_value_columns: return 'next' elif c in self.previous_value_columns: return 'previous' raise ValueError( "{c} not found in next_value_columns " "or previous_value_columns".format(c=c) ) groups = groupby(next_or_previous, requested_columns) return groups.get('next', ()), groups.get('previous', ())
Example #7
Source File: asset_restrictions.py From catalyst with Apache License 2.0 | 5 votes |
def __init__(self, restrictions): # A dict mapping each asset to its restrictions, which are sorted by # ascending order of effective_date self._restrictions_by_asset = { asset: sorted( restrictions_for_asset, key=lambda x: x.effective_date ) for asset, restrictions_for_asset in iteritems(groupby(lambda x: x.asset, restrictions)) }
Example #8
Source File: lib_grouping.py From sidekick with MIT License | 5 votes |
def group_by(key: Func, seq: Seq) -> dict: """ Group collection by the results of a key function. Examples: >>> sk.group_by((X % 2), range(5)) {0: [0, 2, 4], 1: [1, 3]} See Also: :func:`reduce_by` :func:`fold_by` """ return groupby(key, seq)
Example #9
Source File: earnings_estimates.py From catalyst with Apache License 2.0 | 4 votes |
def get_adjustments(self, zero_qtr_data, requested_qtr_data, last_per_qtr, dates, assets, columns, **kwargs): """ Creates an AdjustedArray from the given estimates data for the given dates. Parameters ---------- zero_qtr_data : pd.DataFrame The 'time zero' data for each calendar date per sid. requested_qtr_data : pd.DataFrame The requested quarter data for each calendar date per sid. last_per_qtr : pd.DataFrame A DataFrame with a column MultiIndex of [self.estimates.columns, normalized_quarters, sid] that allows easily getting the timeline of estimates for a particular sid for a particular quarter. dates : pd.DatetimeIndex The calendar dates for which estimates data is requested. assets : pd.Int64Index An index of all the assets from the raw data. columns : list of BoundColumn The columns for which adjustments need to be calculated. kwargs : Additional keyword arguments that should be forwarded to `get_adjustments_for_sid` and to be used in computing adjustments for each sid. Returns ------- col_to_all_adjustments : dict[int -> AdjustedArray] A dictionary of all adjustments that should be applied. """ zero_qtr_data.sort_index(inplace=True) # Here we want to get the LAST record from each group of records # corresponding to a single quarter. This is to ensure that we select # the most up-to-date event date in case the event date changes. quarter_shifts = zero_qtr_data.groupby( level=[SID_FIELD_NAME, NORMALIZED_QUARTERS] ).nth(-1) col_to_all_adjustments = {} sid_to_idx = dict(zip(assets, range(len(assets)))) quarter_shifts.groupby(level=SID_FIELD_NAME).apply( self.get_adjustments_for_sid, dates, requested_qtr_data, last_per_qtr, sid_to_idx, columns, col_to_all_adjustments, **kwargs ) return col_to_all_adjustments