Python numpy.searchsorted() Examples
The following are 30
code examples of numpy.searchsorted().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
numpy
, or try the search function
.
Example #1
Source File: sparse.py From recruit with Apache License 2.0 | 6 votes |
def _first_fill_value_loc(self): """ Get the location of the first missing value. Returns ------- int """ if len(self) == 0 or self.sp_index.npoints == len(self): return -1 indices = self.sp_index.to_int_index().indices if not len(indices) or indices[0] > 0: return 0 diff = indices[1:] - indices[:-1] return np.searchsorted(diff, 2) + 1
Example #2
Source File: Collection.py From fullrmc with GNU Affero General Public License v3.0 | 6 votes |
def get_real_index(self, relativeIndex): """ Compute real index of the given relativeIndex considering already collected indexes. :Parameters: #. relativeIndex (int): Atom relative index to already collected indexes. :Parameters: #. index (int): Atom real index. """ ### THIS IS NOT TESTED YET. indexes = np.array( sorted(self.indexes) ) shift = np.searchsorted(a=indexes, v=relativeIndex, side='left') index = relativeIndex+shift for idx in indexes[shift:]: if idx > index: break index += 1 return index
Example #3
Source File: utils.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 | 6 votes |
def sample_categorical(prob, rng): """Sample from independent categorical distributions Each batch is an independent categorical distribution. Parameters ---------- prob : numpy.ndarray Probability of the categorical distribution. Shape --> (batch_num, category_num) rng : numpy.random.RandomState Returns ------- ret : numpy.ndarray Sampling result. Shape --> (batch_num,) """ ret = numpy.empty(prob.shape[0], dtype=numpy.float32) for ind in range(prob.shape[0]): ret[ind] = numpy.searchsorted(numpy.cumsum(prob[ind]), rng.rand()).clip(min=0.0, max=prob.shape[ 1] - 0.5) return ret
Example #4
Source File: pregenerate_training_data.py From tpu_pretrain with Apache License 2.0 | 6 votes |
def sample_doc(self, current_idx, sentence_weighted=True): # Uses the current iteration counter to ensure we don't sample the same doc twice if sentence_weighted: # With sentence weighting, we sample docs proportionally to their sentence length if self.doc_cumsum is None or len(self.doc_cumsum) != len(self.doc_lengths): self._precalculate_doc_weights() rand_start = self.doc_cumsum[current_idx] rand_end = rand_start + self.cumsum_max - self.doc_lengths[current_idx] sentence_index = randrange(rand_start, rand_end) % self.cumsum_max sampled_doc_index = np.searchsorted(self.doc_cumsum, sentence_index, side='right') else: # If we don't use sentence weighting, then every doc has an equal chance to be chosen sampled_doc_index = (current_idx + randrange(1, len(self.doc_lengths))) % len(self.doc_lengths) assert sampled_doc_index != current_idx if self.reduce_memory: return self.document_shelf[str(sampled_doc_index)] else: return self.documents[sampled_doc_index]
Example #5
Source File: MoveGenerator.py From fullrmc with GNU Affero General Public License v3.0 | 6 votes |
def move(self, coordinates): """ Move coordinates. :Parameters: #. coordinates (np.ndarray): The coordinates on which to apply the transformation. :Returns: #. coordinates (np.ndarray): The new coordinates after applying the transformation. """ if self.__randomize: index = INT_TYPE( np.searchsorted(self.__selectionScheme, generate_random_float()) ) moveGenerator = self.__collection[ index ] else: moveGenerator = self.__collection[self.__step] self.__step = (self.__step+1)%len(self.__collection) # perform the move return moveGenerator.move(coordinates)
Example #6
Source File: Collection.py From fullrmc with GNU Affero General Public License v3.0 | 6 votes |
def collect(self, index, dataDict, check=True): """ Collect atom given its index. :Parameters: #. index (int): The atom index to collect. #. dataDict (dict): The atom data dict to collect. #. check (boolean): Whether to check dataDict keys before collecting. If set to False, user promises that collected data is a dictionary and contains the needed keys. """ assert not self.is_collected(index), LOGGER.error("attempting to collect and already collected atom of index '%i'"%index) # add data if check: assert isinstance(dataDict, dict), LOGGER.error("dataDict must be a dictionary of data where keys are dataKeys") assert tuple(sorted(dataDict)) == self.__dataKeys, LOGGER.error("dataDict keys don't match promised dataKeys") self.__collectedData[index] = dataDict # set indexes sorted array idx = np.searchsorted(a=self.__indexesSortedArray, v=index, side='left') self.__indexesSortedArray = np.insert(self.__indexesSortedArray, idx, index) # set state self.__state = str(uuid.uuid1())
Example #7
Source File: Collection.py From fullrmc with GNU Affero General Public License v3.0 | 6 votes |
def release(self, index): """ Release atom from list of collected atoms and return its collected data. :Parameters: #. index (int): The atom index to release. :Returns: #. dataDict (dict): The released atom collected data. """ if not self.is_collected(index): LOGGER.warn("Attempting to release atom %i that is not collected."%index) return index = self.__collectedData.pop(index) # set indexes sorted array idx = np.searchsorted(a=self.__indexesSortedArray, v=index, side='left') self.__indexesSortedArray = np.insert(self.__indexesSortedArray, idx, index) # set state self.__state = str(uuid.uuid1()) # return return index
Example #8
Source File: pfilter.py From pfilter with MIT License | 6 votes |
def residual_resample(weights): n = len(weights) indices = np.zeros(n, np.uint32) # take int(N*w) copies of each weight num_copies = (n * weights).astype(np.uint32) k = 0 for i in range(n): for _ in range(num_copies[i]): # make n copies indices[k] = i k += 1 # use multinormial resample on the residual to fill up the rest. residual = weights - num_copies # get fractional part residual /= np.sum(residual) cumsum = np.cumsum(residual) cumsum[-1] = 1 indices[k:n] = np.searchsorted(cumsum, np.random.uniform(0, 1, n - k)) return indices
Example #9
Source File: selection.py From pyshgp with MIT License | 6 votes |
def select(self, population: Population, n: int = 1) -> Sequence[Individual]: """Return `n` individuals from the population. Parameters ---------- population A Population of Individuals. n : int The number of parents to select from the population. Default is 1. Returns ------- Sequence[Individual] The selected Individuals. """ super().select(population, n) population_total_errors = np.array([i.total_error for i in population]) sum_of_total_errors = np.sum(population_total_errors) probabilities = 1.0 - (population_total_errors / sum_of_total_errors) selected_ndxs = np.searchsorted(np.cumsum(probabilities), random(n)) return [population[ndx] for ndx in selected_ndxs]
Example #10
Source File: carbonara.py From gnocchi with Apache License 2.0 | 6 votes |
def __init__(self, ts, granularity, start=None): # NOTE(sileht): The whole class assumes ts is ordered and don't have # duplicate timestamps, it uses numpy.unique that sorted list, but # we always assume the orderd to be the same as the input. self.granularity = granularity self.can_derive = isinstance(granularity, numpy.timedelta64) self.start = start if start is None: self._ts = ts self._ts_for_derive = ts else: self._ts = ts[numpy.searchsorted(ts['timestamps'], start):] if self.can_derive: start_derive = start - granularity self._ts_for_derive = ts[ numpy.searchsorted(ts['timestamps'], start_derive): ] if self.can_derive: self.indexes = round_timestamp(self._ts['timestamps'], granularity) elif calendar.GROUPINGS.get(granularity): self.indexes = calendar.GROUPINGS.get(granularity)( self._ts['timestamps']) self.tstamps, self.counts = numpy.unique(self.indexes, return_counts=True)
Example #11
Source File: carbonara.py From gnocchi with Apache License 2.0 | 6 votes |
def __getitem__(self, key): if isinstance(key, numpy.datetime64): idx = numpy.searchsorted(self.timestamps, key) if self.timestamps[idx] == key: return self[idx] raise KeyError(key) if isinstance(key, slice): if isinstance(key.start, numpy.datetime64): start = numpy.searchsorted(self.timestamps, key.start) else: start = key.start if isinstance(key.stop, numpy.datetime64): stop = numpy.searchsorted(self.timestamps, key.stop) else: stop = key.stop key = slice(start, stop, key.step) return self.ts[key]
Example #12
Source File: carbonara.py From gnocchi with Apache License 2.0 | 6 votes |
def set_values(self, values, before_truncate_callback=None): """Set the timestamps and values in this timeseries. :param values: A sorted timeseries array. :param before_truncate_callback: A callback function to call before truncating the BoundTimeSerie to its maximum size. :return: None of the return value of before_truncate_callback """ if self.block_size is not None and len(self.ts) != 0: index = numpy.searchsorted(values['timestamps'], self.first_block_timestamp()) values = values[index:] super(BoundTimeSerie, self).set_values(values) if before_truncate_callback: return_value = before_truncate_callback(self) else: return_value = None self._truncate() return return_value
Example #13
Source File: inference_utils.py From ffn with Apache License 2.0 | 6 votes |
def compute_histogram_lut(image): """Computes the inverted CDF of image intensity. Args: image: 2d numpy array containing the image Returns: a 256-element numpy array representing a lookup table `lut`, such that lut[uniform_image] will transform `uniform_image` with a uniform intensity distribution to have an intensity distribution matching `image`. """ cdf, bins = skimage.exposure.cumulative_distribution(image) lut = np.zeros(256, dtype=np.uint8) for i in range(0, 256): lut[i] = bins[np.searchsorted(cdf, i / 255.0)] return lut
Example #14
Source File: test_bayestar.py From dustmaps with GNU General Public License v2.0 | 6 votes |
def _interp_ebv(self, datum, dist): """ Calculate samples of E(B-V) at an arbitrary distance (in kpc) for one test coordinate. """ dm = 5. * (np.log10(dist) + 2.) idx_ceil = np.searchsorted(datum['DM_bin_edges'], dm) if idx_ceil == 0: dist_0 = 10.**(datum['DM_bin_edges'][0]/5. - 2.) return dist/dist_0 * datum['samples'][:,0] elif idx_ceil == len(datum['DM_bin_edges']): return datum['samples'][:,-1] else: dm_ceil = datum['DM_bin_edges'][idx_ceil] dm_floor = datum['DM_bin_edges'][idx_ceil-1] a = (dm_ceil - dm) / (dm_ceil - dm_floor) return ( (1.-a) * datum['samples'][:,idx_ceil] + a * datum['samples'][:,idx_ceil-1] )
Example #15
Source File: period.py From recruit with Apache License 2.0 | 6 votes |
def asof_locs(self, where, mask): """ where : array of timestamps mask : array of booleans where data is not NA """ where_idx = where if isinstance(where_idx, DatetimeIndex): where_idx = PeriodIndex(where_idx.values, freq=self.freq) locs = self._ndarray_values[mask].searchsorted( where_idx._ndarray_values, side='right') locs = np.where(locs > 0, locs - 1, 0) result = np.arange(len(self))[mask].take(locs) first = mask.argmax() result[(locs == 0) & (where_idx._ndarray_values < self._ndarray_values[first])] = -1 return result
Example #16
Source File: _pandas_ndarray_store.py From arctic with GNU Lesser General Public License v2.1 | 6 votes |
def _index_range(self, version, symbol, date_range=None, **kwargs): """ Given a version, read the segment_index and return the chunks associated with the date_range. As the segment index is (id -> last datetime) we need to take care in choosing the correct chunks. """ if date_range and 'segment_index' in version: # index is read-only but it's never written to index = np.frombuffer(decompress(version['segment_index']), dtype=INDEX_DTYPE) dtcol = self._datetime64_index(index) if dtcol and len(index): dts = index[dtcol] start, end = _start_end(date_range, dts) if start > dts[-1]: return -1, -1 idxstart = min(np.searchsorted(dts, start), len(dts) - 1) idxend = min(np.searchsorted(dts, end, side='right'), len(dts) - 1) return int(index['index'][idxstart]), int(index['index'][idxend] + 1) return super(PandasStore, self)._index_range(version, symbol, **kwargs)
Example #17
Source File: ls_fap.py From feets with MIT License | 6 votes |
def fap_bootstrap( Z, fmax, t, y, dy, normalization="standard", n_bootstraps=1000, random_seed=None, ): rng = np.random.RandomState(random_seed) def bootstrapped_power(): resample = rng.randint(0, len(y), len(y)) # sample with replacement ls_boot = LombScargle(t, y[resample], dy[resample]) freq, power = ls_boot.autopower( normalization=normalization, maximum_frequency=fmax ) return power.max() pmax = np.array([bootstrapped_power() for i in range(n_bootstraps)]) pmax.sort() return 1 - np.searchsorted(pmax, Z) / len(pmax)
Example #18
Source File: period.py From recruit with Apache License 2.0 | 6 votes |
def searchsorted(self, value, side='left', sorter=None): if isinstance(value, Period): if value.freq != self.freq: msg = DIFFERENT_FREQ.format(cls=type(self).__name__, own_freq=self.freqstr, other_freq=value.freqstr) raise IncompatibleFrequency(msg) value = value.ordinal elif isinstance(value, compat.string_types): try: value = Period(value, freq=self.freq).ordinal except DateParseError: raise KeyError("Cannot interpret '{}' as period".format(value)) return self._ndarray_values.searchsorted(value, side=side, sorter=sorter)
Example #19
Source File: population.py From ibllib with MIT License | 6 votes |
def _index_of(arr, lookup): """Replace scalars in an array by their indices in a lookup table. Implicitely assume that: * All elements of arr and lookup are non-negative integers. * All elements or arr belong to lookup. This is not checked for performance reasons. """ # Equivalent of np.digitize(arr, lookup) - 1, but much faster. # TODO: assertions to disable in production for performance reasons. # TODO: np.searchsorted(lookup, arr) is faster on small arrays with large # values lookup = np.asarray(lookup, dtype=np.int32) m = (lookup.max() if len(lookup) else 0) + 1 tmp = np.zeros(m + 1, dtype=np.int) # Ensure that -1 values are kept. tmp[-1] = -1 if len(lookup): tmp[lookup] = np.arange(len(lookup)) return tmp[arr]
Example #20
Source File: graph.py From jwalk with Apache License 2.0 | 6 votes |
def encode_edges(edges, nodes): """Encode data with dictionary Args: edges (np.ndarray): np array of the form [node1, node2]. nodes (np.array): list of unique nodes Returns: np.ndarray: relabeled edges Examples: >>> import numpy as np >>> edges = np.array([['A', 'B'], ['A', 'C']]) >>> nodes = np.array(['C', 'B', 'A']) >>> print(encode_edges(edges, nodes)) [[2 1] [2 0]] """ sidx = nodes.argsort() relabeled_edges = sidx[np.searchsorted(nodes, edges, sorter=sidx)] return relabeled_edges
Example #21
Source File: dmc.py From pyqmc with MIT License | 6 votes |
def branch(configs, weights): """ Perform branching on a set of walkers by stochastic reconfiguration Walkers are resampled with probability proportional to the weights, and the new weights are all set to be equal to the average weight. Args: configs: (nconfig,nelec,3) walker coordinates weights: (nconfig,) walker weights Returns: configs: resampled walker configurations weights: (nconfig,) all weights are equal to average weight """ nconfig = configs.configs.shape[0] wtot = np.sum(weights) probability = np.cumsum(weights / wtot) base = np.random.rand() newinds = np.searchsorted(probability, (base + np.arange(nconfig) / nconfig) % 1.0) configs.resample(newinds) weights.fill(wtot / nconfig) return configs, weights
Example #22
Source File: cascade_lifetime.py From news-popularity-prediction with Apache License 2.0 | 6 votes |
def get_k_based_on_lifetime(data_frame, lifetime, min_k, max_k): lifetime_col = data_frame["timestamp"] - data_frame["timestamp"].iloc[0] lifetime_col = lifetime_col.iloc[min_k:] index = np.searchsorted(lifetime_col, lifetime) index = max(0, index[0]-1) k = min_k + index if lifetime_col.size > (index+1): next_t = lifetime_col.iloc[index+1] if k == min_k: if lifetime_col.iloc[index] == lifetime_col.iloc[index+1]: k += 1 if lifetime_col.size > (index+2): next_t = lifetime_col.iloc[index+2] else: next_t = np.nan else: next_t = np.nan return k, next_t
Example #23
Source File: carbonara.py From gnocchi with Apache License 2.0 | 6 votes |
def truncate(self, oldest_point=None): """Truncate the time series up to oldest_point excluded. :param oldest_point: Oldest point to keep from, this excluded. Default is the aggregation timespan. :type oldest_point: numpy.datetime64 or numpy.timedelta64 :return: The oldest point that could have been kept. """ last = self.last if last is None: return if oldest_point is None: oldest_point = self.aggregation.timespan if oldest_point is None: return if isinstance(oldest_point, numpy.timedelta64): oldest_point = last - oldest_point index = numpy.searchsorted(self.ts['timestamps'], oldest_point, side='right') self.ts = self.ts[index:] return oldest_point
Example #24
Source File: period.py From recruit with Apache License 2.0 | 5 votes |
def _get_string_slice(self, key): if not self.is_monotonic: raise ValueError('Partial indexing only valid for ' 'ordered time series') key, parsed, reso = parse_time_string(key, self.freq) grp = resolution.Resolution.get_freq_group(reso) freqn = resolution.get_freq_group(self.freq) if reso in ['day', 'hour', 'minute', 'second'] and not grp < freqn: raise KeyError(key) t1, t2 = self._parsed_string_to_bounds(reso, parsed) return slice(self.searchsorted(t1.ordinal, side='left'), self.searchsorted(t2.ordinal, side='right'))
Example #25
Source File: base.py From recruit with Apache License 2.0 | 5 votes |
def _searchsorted_monotonic(self, label, side='left'): if self.is_monotonic_increasing: return self.searchsorted(label, side=side) elif self.is_monotonic_decreasing: # np.searchsorted expects ascending sort order, have to reverse # everything for it to work (element ordering, search side and # resulting value). pos = self[::-1].searchsorted(label, side='right' if side == 'left' else 'left') return len(self) - pos raise ValueError('index must be monotonic increasing or decreasing')
Example #26
Source File: utils.py From mars with Apache License 2.0 | 5 votes |
def calc_columns_index(column_name, df): """ Calculate the chunk index on the axis 1 according to the selected column. :param column_name: selected column name :param df: input tiled DataFrame :return: chunk index on the columns axis """ column_nsplits = df.nsplits[1] column_loc = df.columns_value.to_pandas().get_loc(column_name) return np.searchsorted(np.cumsum(column_nsplits), column_loc + 1)
Example #27
Source File: test_datetime_index.py From recruit with Apache License 2.0 | 5 votes |
def test_resample_size(): n = 10000 dr = date_range('2015-09-19', periods=n, freq='T') ts = Series(np.random.randn(n), index=np.random.choice(dr, n)) left = ts.resample('7T').size() ix = date_range(start=left.index.min(), end=ts.index.max(), freq='7T') bins = np.searchsorted(ix.values, ts.index.values, side='right') val = np.bincount(bins, minlength=len(ix) + 1)[1:].astype('int64', copy=False) right = Series(val, index=ix) assert_series_equal(left, right)
Example #28
Source File: histograms.py From recruit with Apache License 2.0 | 5 votes |
def _search_sorted_inclusive(a, v): """ Like `searchsorted`, but where the last item in `v` is placed on the right. In the context of a histogram, this makes the last bin edge inclusive """ return np.concatenate(( a.searchsorted(v[:-1], 'left'), a.searchsorted(v[-1:], 'right') ))
Example #29
Source File: sparse.py From recruit with Apache License 2.0 | 5 votes |
def searchsorted(self, v, side="left", sorter=None): msg = "searchsorted requires high memory usage." warnings.warn(msg, PerformanceWarning, stacklevel=2) if not is_scalar(v): v = np.asarray(v) v = np.asarray(v) return np.asarray(self, dtype=self.dtype.subtype).searchsorted( v, side, sorter )
Example #30
Source File: test_datetime_index.py From recruit with Apache License 2.0 | 5 votes |
def test_resample_group_info(n, k): # GH10914 # use a fixed seed to always have the same uniques prng = np.random.RandomState(1234) dr = date_range(start='2015-08-27', periods=n // 10, freq='T') ts = Series(prng.randint(0, n // k, n).astype('int64'), index=prng.choice(dr, n)) left = ts.resample('30T').nunique() ix = date_range(start=ts.index.min(), end=ts.index.max(), freq='30T') vals = ts.values bins = np.searchsorted(ix.values, ts.index, side='right') sorter = np.lexsort((vals, bins)) vals, bins = vals[sorter], bins[sorter] mask = np.r_[True, vals[1:] != vals[:-1]] mask |= np.r_[True, bins[1:] != bins[:-1]] arr = np.bincount(bins[mask] - 1, minlength=len(ix)).astype('int64', copy=False) right = Series(arr, index=ix) assert_series_equal(left, right)