Python Examples of sklearn.utils.gen_even

Source File: audio.py From freesound-classification with Apache License 2.0

5 votes

def shuffle_audio(audio, chunk_length=0.5, sr=None):

    n_chunks = int((audio.size / sr) / chunk_length)

    if n_chunks in (0, 1):
        return audio

    slices = list(gen_even_slices(audio.size, n_chunks))
    random.shuffle(slices)

    shuffled = np.concatenate([audio[s] for s in slices])

    return shuffled

Source File: test_utils.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

def test_gen_even_slices():
    # check that gen_even_slices contains all samples
    some_range = range(10)
    joined_range = list(chain(*[some_range[slice] for slice in
                                gen_even_slices(10, 3)]))
    assert_array_equal(some_range, joined_range)

    # check that passing negative n_chunks raises an error
    slices = gen_even_slices(10, -1)
    assert_raises_regex(ValueError, "gen_even_slices got n_packs=-1, must be"
                        " >=1", next, slices)

Source File: pairwise.py From trajminer with MIT License

5 votes

def pairwise_similarity(X, Y=None, measure=None, n_jobs=1):
    """Computes the similarity between trajectories in X and Y.

    Parameters
    ----------
    X : array-like, shape: (n_trajectories_X, n_points, n_features)
        Input data.
    Y : array-like, shape: (n_trajectories_Y, n_points, n_features)
        Input data. If ``None``, the output will be the pairwise
        similarities between all samples in ``X``.
    measure : SimilarityMeasure object (default=None)
        The similarity measure to use for computing similarities. See
        :mod:`trajminer.similarity`.
    n_jobs : int (default=1)
        The number of parallel jobs.

    Returns
    -------
    similarities : array
        An array with shape (n_trajectories_X, n_trajectories_Y).
    """
    def compute_slice(X, Y, s):
        matrix = np.zeros(shape=(len(X), len(Y)))

        for i in range(s.start + 1, len(X)):
            for j in range(0, min(len(Y), i - s.start)):
                matrix[i][j] = measure.similarity(X[i], Y[j])
        return matrix

    upper = Y is not None
    Y = X if not Y else Y
    func = delayed(compute_slice)

    similarity = Parallel(n_jobs=n_jobs, verbose=0)(
        func(X, Y[s], s) for s in gen_even_slices(len(Y), n_jobs))
    similarity = np.hstack(similarity)

    if not upper:
        similarity += similarity.transpose() + np.identity(len(X))

    return similarity

Source File: trajectory_data.py From trajminer with MIT License

5 votes

def _to_csv(self, file, n_jobs):
        lat_lon = -1
        tids = self.get_tids()

        def build_lines(s):
            lines = []
            for i in range(s.start, s.stop):
                tid = tids[i]
                label = self.get_label(tid)
                traj = self.get_trajectory(tid)

                for p in traj:
                    if lat_lon > -1:
                        p[lat_lon] = str(p[lat_lon][0]) + \
                            ',' + str(p[lat_lon][1])
                    fmt = str(p)[1:-1].replace(', ', ',').replace("'", '')
                    lines.append(str(tid) + ',' + str(label) + ',' + fmt)
            return lines

        with open(file, 'w') as out:
            header = 'tid,label'

            for i, attr in enumerate(self.get_attributes()):
                if attr == 'lat_lon':
                    header += ',lat,lon'
                    lat_lon = i
                else:
                    header += ',' + attr

            out.write(header + '\n')
            func = delayed(build_lines)
            lines = Parallel(n_jobs=n_jobs, verbose=0)(
                func(s) for s in gen_even_slices(len(tids), n_jobs))

            lines = np.concatenate(lines)
            lines = '\n'.join(lines)
            out.write(lines)
            out.close()

Source File: test_utils.py From twitter-stock-recommendation with MIT License

5 votes

def test_gen_even_slices():
    # check that gen_even_slices contains all samples
    some_range = range(10)
    joined_range = list(chain(*[some_range[slice] for slice in
                                gen_even_slices(10, 3)]))
    assert_array_equal(some_range, joined_range)

    # check that passing negative n_chunks raises an error
    slices = gen_even_slices(10, -1)
    assert_raises_regex(ValueError, "gen_even_slices got n_packs=-1, must be"
                        " >=1", next, slices)

Source File: generate_sub_final_ensemble.py From kaggle_carvana_segmentation with MIT License

4 votes

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-j', '--n_jobs', type=int, default=1, metavar='N',
                        help='number of parallel jobs')
    parser.add_argument('--load', action='store_true',
                        help='load pregenerated probs from folder?')
    parser.add_argument('--no_save', action='store_true',
                        help='not save probs as pngs?')

    args = parser.parse_args()

    probs_dirs = [
        ('test_scratch2', 1.0),
        ('test_vgg11v1_final', 1.0),
        ('albu27.09', 1.0),
        ('ternaus27', 1.0),
    ]
    w_sum = sum([x[1] for x in probs_dirs])
    print 'W_sum=', w_sum
    probs_dirs = map(lambda x: (Path(join(config.submissions_dir, x[0])), float(x[1]) / w_sum), probs_dirs)
    print 'Weights:', [x[1] for x in probs_dirs]
    output_dir = Path(config.submissions_dir) / ('ens_scratch2(1)_v1-final(1)_al27(1)_te27(1)')

    with open(str(output_dir) + '.txt', mode='w') as f:
        f.write('Following models were averaged:\n')
        for l, w in probs_dirs:
            f.write(str(l) + '; weight={}\n'.format(w))
            print str(l.stem) + '; weight={}\n'.format(w)
    print '===='
    test_pathes = CARVANA.get_test_paths(is_hq=True)

    print 'Reading from', map(str, probs_dirs)
    print 'output_dir', output_dir

    if not args.load:
        fd = delayed(average_from_files)
        ret = Parallel(n_jobs=args.n_jobs, verbose=0)(
            fd(test_pathes[s], probs_dirs=probs_dirs,
               output_dir=output_dir, is_quiet=(i > 0),
               should_save_masks=not args.no_save)
            for i, s in enumerate(gen_even_slices(len(test_pathes), args.n_jobs)))
    else:
        fd = delayed(load_from_files)
        ret = Parallel(n_jobs=args.n_jobs, verbose=0)(
            fd(test_pathes[s], output_dir=output_dir, is_quiet=(i > 0))
            for i, s in enumerate(gen_even_slices(len(test_pathes), args.n_jobs)))

    df = pd.concat(ret, axis=0)

    output_path = str(output_dir) + '.csv'
    create_submission(df, str(output_path))

Source File: generate_sub_average.py From kaggle_carvana_segmentation with MIT License

4 votes

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-j', '--n_jobs', type=int, default=1, metavar='N',
                        help='number of parallel jobs')
    parser.add_argument('--load', action='store_true',
                        help='load pregenerated probs from folder?')
    parser.add_argument('--net_name', choices=['scratch', 'vgg11v1'])
    args = parser.parse_args()
    print 'config.submissions_dir', config.submissions_dir

    if args.net_name == 'vgg11v1':
        probs_dirs = list()
        for fold_id in xrange(7):
            dirs = glob.glob(join(config.submissions_dir,
                                  'test_probs_vgg11v1_s1993_im1024_gacc1_aug1_v2fold{}.7_noreg_epoch*'.format(fold_id)))
            epochs = map(lambda x: int(x.rsplit('_epoch', 1)[1]), dirs)
            last_epoch_dir = sorted(zip(epochs, dirs))[-1][1]
            probs_dirs.append(last_epoch_dir)
        print map(lambda x: os.path.basename(x), probs_dirs)
        output_dir = Path(config.submissions_dir) / ('test_vgg11v1_final')

    elif args.net_name == 'scratch':
        probs_dirs = list()
        for fold_id in xrange(7):
            dirs = glob.glob(join(config.submissions_dir,
                                  'test_probs_scratch_s1993_im1024_aug1_fold{}.7_epoch*'.format(fold_id)))
            epochs = map(lambda x: int(x.rsplit('_epoch', 1)[1]), dirs)
            last_epoch_dir = sorted(zip(epochs, dirs))[-1][1]
            probs_dirs.append(last_epoch_dir)
        print map(lambda x: os.path.basename(x), probs_dirs)
        output_dir = Path(config.submissions_dir) / ('test_scratch2')
    else:
        raise ValueError('Unknown net_name {}'.format(args.net_name))

    probs_dirs = map(Path, probs_dirs)
    with open(str(output_dir) + '.txt', mode='w') as f:
        f.write('Following models were averaged:\n')
        for l in probs_dirs:
            f.write(str(l) + '\n')
    test_pathes = CARVANA.get_test_paths(is_hq=True)

    print 'Reading from', map(str, probs_dirs)
    print 'output_dir', output_dir

    if not args.load:
        fd = delayed(average_from_files)
        ret = Parallel(n_jobs=args.n_jobs, verbose=0)(
            fd(test_pathes[s], probs_dirs=probs_dirs, output_dir=output_dir, is_quiet=(i > 0))
            for i, s in enumerate(gen_even_slices(len(test_pathes), args.n_jobs)))
    else:
        fd = delayed(load_from_files)
        ret = Parallel(n_jobs=args.n_jobs, verbose=0)(
            fd(test_pathes[s], output_dir=output_dir, is_quiet=(i > 0))
            for i, s in enumerate(gen_even_slices(len(test_pathes), args.n_jobs)))

    df = pd.concat(ret, axis=0)

    output_path = str(output_dir) + '.csv'
    create_submission(df, str(output_path))

Source File: segmentation.py From trajminer with MIT License

4 votes

def fit_transform(self, X):
        """Fit and segment trajectories.

        Parameters
        ----------
        X : :class:`trajminer.TrajectoryData`
            Input dataset to segment.

        Returns
        -------
        X_out : :class:`trajminer.TrajectoryData`
            Segmented dataset.
        """
        tids = X.get_tids()

        def segment(X, s):
            def check_segment(p1, p2):
                b = []
                for i, attr in enumerate(self.attributes):
                    f = self.thresholds[attr]
                    b.append(f(p1[i], p2[i]))
                return np.any(b) if self.mode == 'any' else np.all(b)

            ret = []

            for t in range(s.start, s.stop):
                subret = []
                traj = X.get_trajectory(tids[t])
                s = [traj[0]]

                for i in range(1, len(traj)):
                    if check_segment(traj[i - 1], traj[i]):
                        subret.append(s)
                        s = [traj[i]]
                    else:
                        s.append(traj[i])
                subret.append(s)
                ret.append(subret)

            return ret

        func = delayed(segment)
        segments = Parallel(n_jobs=self.n_jobs, verbose=0)(
            func(X, s) for s in gen_even_slices(len(X.get_trajectories()),
                                                self.n_jobs))
        labels = X.get_labels()
        segments = np.squeeze(segments)
        new_labels = None

        if labels is not None:
            new_labels = []

            for idx, l in enumerate(labels):
                new_labels.extend(np.full(len(segments[idx]), l))

        segments = np.squeeze(segments)
        new_tids = np.r_[1:len(segments) + 1]
        return TrajectoryData(attributes=X.get_attributes(),
                              data=segments,
                              tids=new_tids,
                              labels=new_labels)

Source File: filter.py From trajminer with MIT License

4 votes

def filter_duplicate_points(data, criterium, remove_first=True, inplace=True,
                            n_jobs=1):
    """Removes duplicates of trajectory points according to the given criteria.

    Parameters
    ----------
    data : :class:`trajminer.TrajectoryData`
        The dataset to be filtered.
    criterium : callable
        A callable that takes two trajectory points and decides wheter or not
        they are duplicates. If `True`, then one of the points is removed from
        the dataset (the first or the last point, depending on the
        `remove_first` parameter).
    remove_first : bool (default=True)
        If `True`, then whenever duplicates are found, the first point is
        removed. Otherwise, the last one is removed from the dataset.
    inplace : bool (default=True)
        If `True` modifies the current object, otherwise returns a new
        object.
    n_jobs : int (default=1)
        The number of parallel jobs.

    Returns
    -------
    dataset : :class:`trajminer.TrajectoryData`
        The filtered dataset. If `inplace=True`, then returns the modified
        current object.
    """
    tids = data.get_tids()

    def filter_slice(s):
        n_data = []

        for t in range(s.start, s.stop):
            traj = np.copy(data.get_trajectory(tids[t]))
            i = 1

            while i < len(traj):
                if not criterium(traj[i-1], traj[i]):
                    i += 1
                elif remove_first:
                    traj = np.delete(traj, i-1, axis=0)
                else:
                    traj = np.delete(traj, i, axis=0)
            n_data.append(traj)

        return n_data

    func = delayed(filter_slice)
    ret = Parallel(n_jobs=n_jobs, verbose=0)(
        func(s) for s in gen_even_slices(len(tids), n_jobs))

    n_data = np.concatenate(ret)

    if inplace:
        data._update(data.get_attributes(), n_data, data.get_tids(),
                     data.get_labels())
        return data

    return TrajectoryData(data.get_attributes(), n_data, data.get_tids(),
                          data.get_labels())

Python sklearn.utils.gen_even_slices() Examples