Python sklearn.utils.gen_even_slices() Examples
The following are 9
code examples of sklearn.utils.gen_even_slices().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.utils
, or try the search function
.
Example #1
Source File: audio.py From freesound-classification with Apache License 2.0 | 5 votes |
def shuffle_audio(audio, chunk_length=0.5, sr=None): n_chunks = int((audio.size / sr) / chunk_length) if n_chunks in (0, 1): return audio slices = list(gen_even_slices(audio.size, n_chunks)) random.shuffle(slices) shuffled = np.concatenate([audio[s] for s in slices]) return shuffled
Example #2
Source File: test_utils.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_gen_even_slices(): # check that gen_even_slices contains all samples some_range = range(10) joined_range = list(chain(*[some_range[slice] for slice in gen_even_slices(10, 3)])) assert_array_equal(some_range, joined_range) # check that passing negative n_chunks raises an error slices = gen_even_slices(10, -1) assert_raises_regex(ValueError, "gen_even_slices got n_packs=-1, must be" " >=1", next, slices)
Example #3
Source File: pairwise.py From trajminer with MIT License | 5 votes |
def pairwise_similarity(X, Y=None, measure=None, n_jobs=1): """Computes the similarity between trajectories in X and Y. Parameters ---------- X : array-like, shape: (n_trajectories_X, n_points, n_features) Input data. Y : array-like, shape: (n_trajectories_Y, n_points, n_features) Input data. If ``None``, the output will be the pairwise similarities between all samples in ``X``. measure : SimilarityMeasure object (default=None) The similarity measure to use for computing similarities. See :mod:`trajminer.similarity`. n_jobs : int (default=1) The number of parallel jobs. Returns ------- similarities : array An array with shape (n_trajectories_X, n_trajectories_Y). """ def compute_slice(X, Y, s): matrix = np.zeros(shape=(len(X), len(Y))) for i in range(s.start + 1, len(X)): for j in range(0, min(len(Y), i - s.start)): matrix[i][j] = measure.similarity(X[i], Y[j]) return matrix upper = Y is not None Y = X if not Y else Y func = delayed(compute_slice) similarity = Parallel(n_jobs=n_jobs, verbose=0)( func(X, Y[s], s) for s in gen_even_slices(len(Y), n_jobs)) similarity = np.hstack(similarity) if not upper: similarity += similarity.transpose() + np.identity(len(X)) return similarity
Example #4
Source File: trajectory_data.py From trajminer with MIT License | 5 votes |
def _to_csv(self, file, n_jobs): lat_lon = -1 tids = self.get_tids() def build_lines(s): lines = [] for i in range(s.start, s.stop): tid = tids[i] label = self.get_label(tid) traj = self.get_trajectory(tid) for p in traj: if lat_lon > -1: p[lat_lon] = str(p[lat_lon][0]) + \ ',' + str(p[lat_lon][1]) fmt = str(p)[1:-1].replace(', ', ',').replace("'", '') lines.append(str(tid) + ',' + str(label) + ',' + fmt) return lines with open(file, 'w') as out: header = 'tid,label' for i, attr in enumerate(self.get_attributes()): if attr == 'lat_lon': header += ',lat,lon' lat_lon = i else: header += ',' + attr out.write(header + '\n') func = delayed(build_lines) lines = Parallel(n_jobs=n_jobs, verbose=0)( func(s) for s in gen_even_slices(len(tids), n_jobs)) lines = np.concatenate(lines) lines = '\n'.join(lines) out.write(lines) out.close()
Example #5
Source File: test_utils.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_gen_even_slices(): # check that gen_even_slices contains all samples some_range = range(10) joined_range = list(chain(*[some_range[slice] for slice in gen_even_slices(10, 3)])) assert_array_equal(some_range, joined_range) # check that passing negative n_chunks raises an error slices = gen_even_slices(10, -1) assert_raises_regex(ValueError, "gen_even_slices got n_packs=-1, must be" " >=1", next, slices)
Example #6
Source File: generate_sub_final_ensemble.py From kaggle_carvana_segmentation with MIT License | 4 votes |
def main(): parser = argparse.ArgumentParser() parser.add_argument('-j', '--n_jobs', type=int, default=1, metavar='N', help='number of parallel jobs') parser.add_argument('--load', action='store_true', help='load pregenerated probs from folder?') parser.add_argument('--no_save', action='store_true', help='not save probs as pngs?') args = parser.parse_args() probs_dirs = [ ('test_scratch2', 1.0), ('test_vgg11v1_final', 1.0), ('albu27.09', 1.0), ('ternaus27', 1.0), ] w_sum = sum([x[1] for x in probs_dirs]) print 'W_sum=', w_sum probs_dirs = map(lambda x: (Path(join(config.submissions_dir, x[0])), float(x[1]) / w_sum), probs_dirs) print 'Weights:', [x[1] for x in probs_dirs] output_dir = Path(config.submissions_dir) / ('ens_scratch2(1)_v1-final(1)_al27(1)_te27(1)') with open(str(output_dir) + '.txt', mode='w') as f: f.write('Following models were averaged:\n') for l, w in probs_dirs: f.write(str(l) + '; weight={}\n'.format(w)) print str(l.stem) + '; weight={}\n'.format(w) print '====' test_pathes = CARVANA.get_test_paths(is_hq=True) print 'Reading from', map(str, probs_dirs) print 'output_dir', output_dir if not args.load: fd = delayed(average_from_files) ret = Parallel(n_jobs=args.n_jobs, verbose=0)( fd(test_pathes[s], probs_dirs=probs_dirs, output_dir=output_dir, is_quiet=(i > 0), should_save_masks=not args.no_save) for i, s in enumerate(gen_even_slices(len(test_pathes), args.n_jobs))) else: fd = delayed(load_from_files) ret = Parallel(n_jobs=args.n_jobs, verbose=0)( fd(test_pathes[s], output_dir=output_dir, is_quiet=(i > 0)) for i, s in enumerate(gen_even_slices(len(test_pathes), args.n_jobs))) df = pd.concat(ret, axis=0) output_path = str(output_dir) + '.csv' create_submission(df, str(output_path))
Example #7
Source File: generate_sub_average.py From kaggle_carvana_segmentation with MIT License | 4 votes |
def main(): parser = argparse.ArgumentParser() parser.add_argument('-j', '--n_jobs', type=int, default=1, metavar='N', help='number of parallel jobs') parser.add_argument('--load', action='store_true', help='load pregenerated probs from folder?') parser.add_argument('--net_name', choices=['scratch', 'vgg11v1']) args = parser.parse_args() print 'config.submissions_dir', config.submissions_dir if args.net_name == 'vgg11v1': probs_dirs = list() for fold_id in xrange(7): dirs = glob.glob(join(config.submissions_dir, 'test_probs_vgg11v1_s1993_im1024_gacc1_aug1_v2fold{}.7_noreg_epoch*'.format(fold_id))) epochs = map(lambda x: int(x.rsplit('_epoch', 1)[1]), dirs) last_epoch_dir = sorted(zip(epochs, dirs))[-1][1] probs_dirs.append(last_epoch_dir) print map(lambda x: os.path.basename(x), probs_dirs) output_dir = Path(config.submissions_dir) / ('test_vgg11v1_final') elif args.net_name == 'scratch': probs_dirs = list() for fold_id in xrange(7): dirs = glob.glob(join(config.submissions_dir, 'test_probs_scratch_s1993_im1024_aug1_fold{}.7_epoch*'.format(fold_id))) epochs = map(lambda x: int(x.rsplit('_epoch', 1)[1]), dirs) last_epoch_dir = sorted(zip(epochs, dirs))[-1][1] probs_dirs.append(last_epoch_dir) print map(lambda x: os.path.basename(x), probs_dirs) output_dir = Path(config.submissions_dir) / ('test_scratch2') else: raise ValueError('Unknown net_name {}'.format(args.net_name)) probs_dirs = map(Path, probs_dirs) with open(str(output_dir) + '.txt', mode='w') as f: f.write('Following models were averaged:\n') for l in probs_dirs: f.write(str(l) + '\n') test_pathes = CARVANA.get_test_paths(is_hq=True) print 'Reading from', map(str, probs_dirs) print 'output_dir', output_dir if not args.load: fd = delayed(average_from_files) ret = Parallel(n_jobs=args.n_jobs, verbose=0)( fd(test_pathes[s], probs_dirs=probs_dirs, output_dir=output_dir, is_quiet=(i > 0)) for i, s in enumerate(gen_even_slices(len(test_pathes), args.n_jobs))) else: fd = delayed(load_from_files) ret = Parallel(n_jobs=args.n_jobs, verbose=0)( fd(test_pathes[s], output_dir=output_dir, is_quiet=(i > 0)) for i, s in enumerate(gen_even_slices(len(test_pathes), args.n_jobs))) df = pd.concat(ret, axis=0) output_path = str(output_dir) + '.csv' create_submission(df, str(output_path))
Example #8
Source File: segmentation.py From trajminer with MIT License | 4 votes |
def fit_transform(self, X): """Fit and segment trajectories. Parameters ---------- X : :class:`trajminer.TrajectoryData` Input dataset to segment. Returns ------- X_out : :class:`trajminer.TrajectoryData` Segmented dataset. """ tids = X.get_tids() def segment(X, s): def check_segment(p1, p2): b = [] for i, attr in enumerate(self.attributes): f = self.thresholds[attr] b.append(f(p1[i], p2[i])) return np.any(b) if self.mode == 'any' else np.all(b) ret = [] for t in range(s.start, s.stop): subret = [] traj = X.get_trajectory(tids[t]) s = [traj[0]] for i in range(1, len(traj)): if check_segment(traj[i - 1], traj[i]): subret.append(s) s = [traj[i]] else: s.append(traj[i]) subret.append(s) ret.append(subret) return ret func = delayed(segment) segments = Parallel(n_jobs=self.n_jobs, verbose=0)( func(X, s) for s in gen_even_slices(len(X.get_trajectories()), self.n_jobs)) labels = X.get_labels() segments = np.squeeze(segments) new_labels = None if labels is not None: new_labels = [] for idx, l in enumerate(labels): new_labels.extend(np.full(len(segments[idx]), l)) segments = np.squeeze(segments) new_tids = np.r_[1:len(segments) + 1] return TrajectoryData(attributes=X.get_attributes(), data=segments, tids=new_tids, labels=new_labels)
Example #9
Source File: filter.py From trajminer with MIT License | 4 votes |
def filter_duplicate_points(data, criterium, remove_first=True, inplace=True, n_jobs=1): """Removes duplicates of trajectory points according to the given criteria. Parameters ---------- data : :class:`trajminer.TrajectoryData` The dataset to be filtered. criterium : callable A callable that takes two trajectory points and decides wheter or not they are duplicates. If `True`, then one of the points is removed from the dataset (the first or the last point, depending on the `remove_first` parameter). remove_first : bool (default=True) If `True`, then whenever duplicates are found, the first point is removed. Otherwise, the last one is removed from the dataset. inplace : bool (default=True) If `True` modifies the current object, otherwise returns a new object. n_jobs : int (default=1) The number of parallel jobs. Returns ------- dataset : :class:`trajminer.TrajectoryData` The filtered dataset. If `inplace=True`, then returns the modified current object. """ tids = data.get_tids() def filter_slice(s): n_data = [] for t in range(s.start, s.stop): traj = np.copy(data.get_trajectory(tids[t])) i = 1 while i < len(traj): if not criterium(traj[i-1], traj[i]): i += 1 elif remove_first: traj = np.delete(traj, i-1, axis=0) else: traj = np.delete(traj, i, axis=0) n_data.append(traj) return n_data func = delayed(filter_slice) ret = Parallel(n_jobs=n_jobs, verbose=0)( func(s) for s in gen_even_slices(len(tids), n_jobs)) n_data = np.concatenate(ret) if inplace: data._update(data.get_attributes(), n_data, data.get_tids(), data.get_labels()) return data return TrajectoryData(data.get_attributes(), n_data, data.get_tids(), data.get_labels())