Python Examples of numpy.lexsort

Source File: test_algos.py From recruit with Apache License 2.0

6 votes

def test_groupsort_indexer():
    a = np.random.randint(0, 1000, 100).astype(np.int64)
    b = np.random.randint(0, 1000, 100).astype(np.int64)

    result = libalgos.groupsort_indexer(a, 1000)[0]

    # need to use a stable sort
    # np.argsort returns int, groupsort_indexer
    # always returns int64
    expected = np.argsort(a, kind='mergesort')
    expected = expected.astype(np.int64)

    tm.assert_numpy_array_equal(result, expected)

    # compare with lexsort
    # np.lexsort returns int, groupsort_indexer
    # always returns int64
    key = a * 1000 + b
    result = libalgos.groupsort_indexer(key, 1000000)[0]
    expected = np.lexsort((b, a))
    expected = expected.astype(np.int64)

    tm.assert_numpy_array_equal(result, expected)

Source File: np_conserved.py From tenpy with GNU General Public License v3.0

6 votes

def isort_qdata(self):
        """(Lexiographically) sort ``self._qdata``; in place.

        Lexsort ``self._qdata`` and ``self._data`` and set ``self._qdata_sorted = True``.
        """
        if self._qdata_sorted:
            return
        if len(self._qdata) < 2:
            self._qdata_sorted = True
            return
        perm = np.lexsort(self._qdata.T)
        self._qdata = self._qdata[perm, :]
        self._data = [self._data[p] for p in perm]
        self._qdata_sorted = True

    # reshaping ===============================================================

Source File: test_lattice.py From tenpy with GNU General Public License v3.0

6 votes

def test_possible_couplings():
    lat_reg = lattice.Honeycomb(2,
                                3, [None, None],
                                order="snake",
                                bc="periodic",
                                bc_MPS="infinite")
    lat_irreg = lattice.IrregularLattice(lat_reg, remove=[[0, 0, 0]])
    u0, u1 = 0, 1
    for lat in [lat_reg, lat_irreg]:
        for dx in [(0, 0), (0, 1), (2, 1), (-1, -1)]:
            print("dx =", dx)
            mps0, mps1, lat_indices, coupling_shape = lat.possible_couplings(u0, u1, dx)
            ops = [(None, [0, 0], u0), (None, dx, u1)]
            m_ijkl, m_lat_indices, m_coupling_shape = lat.possible_multi_couplings(ops)
            assert coupling_shape == m_coupling_shape
            if len(lat_indices) == 0:
                continue
            sort = np.lexsort(lat_indices.T)
            mps0, mps1, lat_indices = mps0[sort], mps1[sort], lat_indices[sort, :]
            assert m_ijkl.shape == (len(mps0), 2)
            m_sort = np.lexsort(m_lat_indices.T)
            m_ijkl, m_lat_indices = m_ijkl[m_sort, :], m_lat_indices[m_sort, :]
            npt.assert_equal(m_lat_indices, lat_indices)
            npt.assert_equal(mps0, m_ijkl[:, 0])
            npt.assert_equal(mps1, m_ijkl[:, 1])

Source File: lattice.py From tenpy with GNU General Public License v3.0

6 votes

def order(self, order_):
        # update the value itself
        self._order = order_
        # and the other stuff which is cached
        self._perm = np.lexsort(order_.T)
        # use advanced numpy indexing...
        self._mps2lat_vals_idx = np.empty(self.shape, np.intp)
        self._mps2lat_vals_idx[tuple(order_.T)] = np.arange(self.N_sites)
        # versions for fixed u
        self._mps_fix_u = []
        self._mps2lat_vals_idx_fix_u = []
        for u in range(len(self.unit_cell)):
            mps_fix_u = np.nonzero(order_[:, -1] == u)[0]
            self._mps_fix_u.append(mps_fix_u)
            mps2lat_vals_idx = np.empty(self.Ls, np.intp)
            mps2lat_vals_idx[tuple(order_[mps_fix_u, :-1].T)] = np.arange(self.N_cells)
            self._mps2lat_vals_idx_fix_u.append(mps2lat_vals_idx)
        self._mps_fix_u = tuple(self._mps_fix_u)

Source File: coo.py From lambda-packs with MIT License

6 votes

def _sum_duplicates(self, row, col, data):
        # Assumes (data, row, col) not in canonical format.
        if len(data) == 0:
            return row, col, data
        order = np.lexsort((row, col))
        row = row[order]
        col = col[order]
        data = data[order]
        unique_mask = ((row[1:] != row[:-1]) |
                       (col[1:] != col[:-1]))
        unique_mask = np.append(True, unique_mask)
        row = row[unique_mask]
        col = col[unique_mask]
        unique_inds, = np.nonzero(unique_mask)
        data = np.add.reduceat(data, unique_inds, dtype=self.dtype)
        return row, col, data

Source File: multi.py From vnpy_crypto with MIT License

6 votes

def is_monotonic_increasing(self):
        """
        return if the index is monotonic increasing (only equal or
        increasing) values.
        """

        # reversed() because lexsort() wants the most significant key last.
        values = [self._get_level_values(i).values
                  for i in reversed(range(len(self.levels)))]
        try:
            sort_order = np.lexsort(values)
            return Index(sort_order).is_monotonic
        except TypeError:

            # we have mixed types and np.lexsort is not happy
            return Index(self.values).is_monotonic

Source File: misc.py From pymoo with Apache License 2.0

6 votes

def get_duplicates(M):
    res = []
    I = np.lexsort([M[:, i] for i in reversed(range(0, M.shape[1]))])
    S = M[I, :]

    i = 0

    while i < S.shape[0] - 1:
        l = []
        while np.all(S[i, :] == S[i + 1, :]):
            l.append(I[i])
            i += 1
        if len(l) > 0:
            l.append(I[i])
            res.append(l)
        i += 1

    return res

Source File: reference_direction.py From pymoo with Apache License 2.0

6 votes

def do(self):

        # set the random seed if it is provided
        if self.seed is not None:
            np.random.seed(self.seed)

        if self.n_dim == 1:
            return np.array([[1.0]])
        else:

            val = self._do()
            if isinstance(val, tuple):
                ref_dirs, other = val[0], val[1:]
            else:
                ref_dirs = val

            if self.scaling is not None:
                ref_dirs = scale_reference_directions(ref_dirs, self.scaling)

            # do ref_dirs is desired
            if self.lexsort:
                I = np.lexsort([ref_dirs[:, j] for j in range(ref_dirs.shape[1])][::-1])
                ref_dirs = ref_dirs[I]

            return ref_dirs

Source File: multi.py From recruit with Apache License 2.0

6 votes

def is_monotonic_increasing(self):
        """
        return if the index is monotonic increasing (only equal or
        increasing) values.
        """

        # reversed() because lexsort() wants the most significant key last.
        values = [self._get_level_values(i).values
                  for i in reversed(range(len(self.levels)))]
        try:
            sort_order = np.lexsort(values)
            return Index(sort_order).is_monotonic
        except TypeError:

            # we have mixed types and np.lexsort is not happy
            return Index(self.values).is_monotonic

Source File: test_algos.py From vnpy_crypto with MIT License

6 votes

def test_groupsort_indexer():
    a = np.random.randint(0, 1000, 100).astype(np.int64)
    b = np.random.randint(0, 1000, 100).astype(np.int64)

    result = libalgos.groupsort_indexer(a, 1000)[0]

    # need to use a stable sort
    # np.argsort returns int, groupsort_indexer
    # always returns int64
    expected = np.argsort(a, kind='mergesort')
    expected = expected.astype(np.int64)

    tm.assert_numpy_array_equal(result, expected)

    # compare with lexsort
    # np.lexsort returns int, groupsort_indexer
    # always returns int64
    key = a * 1000 + b
    result = libalgos.groupsort_indexer(key, 1000000)[0]
    expected = np.lexsort((b, a))
    expected = expected.astype(np.int64)

    tm.assert_numpy_array_equal(result, expected)

Source File: test_resample.py From vnpy_crypto with MIT License

6 votes

def test_resample_group_info(self):  # GH10914
        for n, k in product((10000, 100000), (10, 100, 1000)):
            dr = date_range(start='2015-08-27', periods=n // 10, freq='T')
            ts = Series(np.random.randint(0, n // k, n).astype('int64'),
                        index=np.random.choice(dr, n))

            left = ts.resample('30T').nunique()
            ix = date_range(start=ts.index.min(), end=ts.index.max(),
                            freq='30T')

            vals = ts.values
            bins = np.searchsorted(ix.values, ts.index, side='right')

            sorter = np.lexsort((vals, bins))
            vals, bins = vals[sorter], bins[sorter]

            mask = np.r_[True, vals[1:] != vals[:-1]]
            mask |= np.r_[True, bins[1:] != bins[:-1]]

            arr = np.bincount(bins[mask] - 1,
                              minlength=len(ix)).astype('int64', copy=False)
            right = Series(arr, index=ix)

            assert_series_equal(left, right)

Source File: groupby.py From vnpy_crypto with MIT License

5 votes

def label_info(self):
        # return the labels of items in original grouped axis
        labels, _, _ = self.group_info
        if self.indexer is not None:
            sorter = np.lexsort((labels, self.indexer))
            labels = labels[sorter]
        return labels

Source File: test_regression.py From auto-alt-text-lambda-api with MIT License

5 votes

def test_lexsort(self,level=rlevel):
        # Lexsort memory error
        v = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
        assert_equal(np.lexsort(v), 0)

Source File: test_regression.py From auto-alt-text-lambda-api with MIT License

5 votes

def test_lexsort_invalid_sequence(self):
        # Issue gh-4123
        class BuggySequence(object):
            def __len__(self):
                return 4

            def __getitem__(self, key):
                raise KeyError

        assert_raises(KeyError, np.lexsort, BuggySequence())

Source File: test_multiarray.py From Computable with MIT License

5 votes

def test_basic(self):
        a = [1, 2, 1, 3, 1, 5]
        b = [0, 4, 5, 6, 2, 3]
        idx = np.lexsort((b, a))
        expected_idx = np.array([0, 4, 2, 1, 3, 5])
        assert_array_equal(idx, expected_idx)

        x = np.vstack((b, a))
        idx = np.lexsort(x)
        assert_array_equal(idx, expected_idx)

        assert_array_equal(x[1][idx], np.sort(x[1]))

Source File: test_sorting.py From vnpy_crypto with MIT License

5 votes

def test_sort_index_different_sortorder(self):
        A = np.arange(20).repeat(5)
        B = np.tile(np.arange(5), 20)

        indexer = np.random.permutation(100)
        A = A.take(indexer)
        B = B.take(indexer)

        df = DataFrame({'A': A, 'B': B,
                        'C': np.random.randn(100)})

        # use .sort_values #9816
        with tm.assert_produces_warning(FutureWarning):
            df.sort_index(by=['A', 'B'], ascending=[1, 0])
        result = df.sort_values(by=['A', 'B'], ascending=[1, 0])

        ex_indexer = np.lexsort((df.B.max() - df.B, df.A))
        expected = df.take(ex_indexer)
        assert_frame_equal(result, expected)

        # test with multiindex, too
        idf = df.set_index(['A', 'B'])

        result = idf.sort_index(ascending=[1, 0])
        expected = idf.take(ex_indexer)
        assert_frame_equal(result, expected)

        # also, Series!
        result = idf['C'].sort_index(ascending=[1, 0])
        assert_series_equal(result, expected['C'])

Source File: multi.py From vnpy_crypto with MIT License

5 votes

def _partial_tup_index(self, tup, side='left'):
        if len(tup) > self.lexsort_depth:
            raise UnsortedIndexError(
                'Key length (%d) was greater than MultiIndex'
                ' lexsort depth (%d)' %
                (len(tup), self.lexsort_depth))

        n = len(tup)
        start, end = 0, len(self)
        zipped = zip(tup, self.levels, self.labels)
        for k, (lab, lev, labs) in enumerate(zipped):
            section = labs[start:end]

            if lab not in lev:
                if not lev.is_type_compatible(lib.infer_dtype([lab])):
                    raise TypeError('Level type mismatch: %s' % lab)

                # short circuit
                loc = lev.searchsorted(lab, side=side)
                if side == 'right' and loc >= 0:
                    loc -= 1
                return start + section.searchsorted(loc, side=side)

            idx = lev.get_loc(lab)
            if k < n - 1:
                end = start + section.searchsorted(idx, side='right')
                start = start + section.searchsorted(idx, side='left')
            else:
                return start + section.searchsorted(idx, side=side)

Source File: test_regression.py From Computable with MIT License

5 votes

def test_lexsort_buffer_length(self):
        """Ticket #1217, don't segfault."""
        a = np.ones(100, dtype=np.int8)
        b = np.ones(100, dtype=np.int32)
        i = np.lexsort((a[::-1], b))
        assert_equal(i, np.arange(100, dtype=np.int))

Source File: defragTrees.py From defragTrees with MIT License

5 votes

def __uniqueRows(X):
        B = X[np.lexsort(X.T)]
        idx = np.r_[True, np.any(B[1:]!=B[:-1], axis=tuple(range(1,X.ndim)))]
        Z = B[idx]
        return Z

Source File: test_regression.py From Computable with MIT License

5 votes

def test_mem_lexsort_strings(self, level=rlevel):
        """Ticket #298"""
        lst = ['abc', 'cde', 'fgh']
        np.lexsort((lst,))

Source File: test_regression.py From vnpy_crypto with MIT License

5 votes

def test_lexsort(self):
        # Lexsort memory error
        v = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
        assert_equal(np.lexsort(v), 0)

Source File: resampling_dataset.py From fairseq with MIT License

5 votes

def ordered_indices(self):
        if self.batch_by_size:
            order = [
                np.arange(len(self)),
                self.sizes,
            ]  # No need to handle `self.shuffle == True`
            return np.lexsort(order)
        else:
            return np.arange(len(self))

Source File: monolingual_dataset.py From fairseq with MIT License

5 votes

def ordered_indices(self):
        """Return an ordered list of indices. Batches will be constructed based
        on this order."""
        if self.shuffle:
            order = [np.random.permutation(len(self))]
        else:
            order = [np.arange(len(self))]
        order.append(self.sizes)
        return np.lexsort(order)

Source File: subsample_dataset.py From fairseq with MIT License

5 votes

def ordered_indices(self):
        """Return an ordered list of indices. Batches will be constructed based
        on this order."""
        if self.shuffle:
            order = [np.random.permutation(len(self))]
        else:
            order = [np.arange(len(self))]
        order.append(self.sizes)
        return np.lexsort(order)

Source File: masked_lm_dataset.py From fairseq with MIT License

5 votes

def ordered_indices(self):
        """
        Return an ordered list of indices. Batches will be constructed based
        on this order.
        """
        if self.shuffle:
            return np.random.permutation(len(self))
        else:
            order = [np.arange(len(self))]
            order.append(self.sizes)
            return np.lexsort(order)

Source File: sort_dataset.py From fairseq with MIT License

5 votes

def ordered_indices(self):
        return np.lexsort(self.sort_order)

Source File: device_layout.py From phidl with MIT License

5 votes

def hash_geometry(self, precision = 1e-4):
        """
        Algorithm:
        hash(
            hash(First layer information: [layer1, datatype1]),
            hash(Polygon 1 on layer 1 points: [(x1,y1),(x2,y2),(x3,y3)] ),
            hash(Polygon 2 on layer 1 points: [(x1,y1),(x2,y2),(x3,y3),(x4,y4)] ),
            hash(Polygon 3 on layer 1 points: [(x1,y1),(x2,y2),(x3,y3)] ),
            hash(Second layer information: [layer2, datatype2]),
            hash(Polygon 1 on layer 2 points: [(x1,y1),(x2,y2),(x3,y3),(x4,y4)] ),
            hash(Polygon 2 on layer 2 points: [(x1,y1),(x2,y2),(x3,y3)] ),
        )
        ...
        Note: For each layer, each polygon is individually hashed and then
              the polygon hashes are sorted, to ensure the hash stays constant
              regardless of the ordering the polygons.  Similarly, the layers
              are sorted by (layer, datatype)
        """
        polygons_by_spec = self.get_polygons(by_spec = True)
        layers = np.array(list(polygons_by_spec.keys()))
        sorted_layers = layers[np.lexsort((layers[:,0], layers[:,1]))]

        # A random offset which fixes common rounding errors intrinsic
        # to floating point math. Example: with a precision of 0.1, the
        # floating points 7.049999 and 7.050001 round to different values
        # (7.0 and 7.1), but offset values (7.220485 and 7.220487) don't
        magic_offset = .17048614

        final_hash = hashlib.sha1()
        for layer in sorted_layers:
            layer_hash = hashlib.sha1(layer.astype(np.int64)).digest()
            polygons = polygons_by_spec[tuple(layer)]
            polygons = [((p/precision) + magic_offset).astype(np.int64) for p in polygons]
            polygon_hashes = np.sort([hashlib.sha1(p).digest() for p in polygons])
            final_hash.update(layer_hash)
            for ph in polygon_hashes:
                final_hash.update(ph)

        return final_hash.hexdigest()

Source File: utils.py From mars with Apache License 2.0

5 votes

def split_indexes_into_chunks(nsplits, indexes, ret_is_asc=True):
    indexes = np.asarray(indexes)
    chunk_idxes = np.empty_like(indexes)
    cum_nsplits = [np.cumsum(nsplit) for nsplit in nsplits]
    for i, cum_nsplit, index in zip(itertools.count(0), cum_nsplits, indexes):
        # handle negative value in index
        if hasattr(index, 'flags') and not index.flags.writeable:
            index = index.copy()
        index = np.add(index, cum_nsplit[-1], out=index, where=index < 0)
        sorted_idx = np.argsort(index)

        if np.any(index >= cum_nsplit[-1]):
            idx = index[index >= cum_nsplit[-1]][0]
            raise IndexError('index {0} is out of bounds with size {1}'.format(
                idx, cum_nsplit[-1]))

        chunk_idx = np.searchsorted(cum_nsplit, index[sorted_idx], side='right')
        chunk_idxes[i, sorted_idx] = chunk_idx

    chunk_idxes_asc = False
    if ret_is_asc:
        chunk_idxes_asc = is_asc_sorted(np.lexsort(chunk_idxes[::-1]))

    chunk_index_to_indexes = OrderedDict()
    chunk_index_to_poses = OrderedDict()
    poses = np.arange(len(indexes[0]))
    for idx in itertools.product(*(range(len(nsplit)) for nsplit in nsplits)):
        cond = (chunk_idxes == np.array(idx).reshape((len(idx), 1))).all(axis=0)
        filtered = indexes[:, cond]
        for i in range(len(indexes)):
            filtered[i] = filtered[i] - (cum_nsplits[i][idx[i] - 1] if idx[i] > 0 else 0)
        chunk_index_to_indexes[idx] = filtered
        chunk_index_to_poses[idx] = poses[cond]

    if ret_is_asc:
        return chunk_index_to_indexes, chunk_index_to_poses, chunk_idxes_asc
    return chunk_index_to_indexes, chunk_index_to_poses

Source File: monolingual_dataset.py From crosentgec with GNU General Public License v3.0

5 votes

def ordered_indices(self):
        """Ordered indices for batching."""
        if self.shuffle:
            order = [np.random.permutation(len(self))]
        else:
            order = [np.arange(len(self))]
        order.append(self.sizes)
        return np.lexsort(order)

Source File: test_lattice.py From tenpy with GNU General Public License v3.0

5 votes

def test_IrregularLattice():
    s1 = site.SpinHalfSite('Sz')
    s2 = site.SpinSite(0.5, 'Sz')
    reg = lattice.Honeycomb(3, 3, [s1, s2], bc=['open', 'periodic'])
    ir = lattice.IrregularLattice(reg, [[1, 1, 0], [1, 1, 1], [0, 0, 0]],
                                  ([[1, 1, 2], [1, 1, 3]], [7, 10]), [s2, s2],
                                  [[-0.1, 0.0], [0.1, 0.0]])
    known = {  # written down by hand for this particular case
        (0, 1, (0, 0)): {'i': [5, 11, 0, 12, 1, 7, 13], 'j': [8, 14, 3, 15, 4, 10, 16]},
        (1, 0, (1, 0)): {'i': [ 2,  8,  4, 10], 'j': [5, 11, 7, 13]},
        (1, 0, (0, 1)): {'i': [ 2,  14,  3, 15, 10, 16], 'j': [0, 12, 1, 13, 5, 11]},
    }
    for (u0, u1, dx), expect in known.items():
        i, j, lat, sh = ir.possible_couplings(u0, u1, dx)
        print(i, j)
        sort = np.lexsort(lat.T)
        i = i[sort]
        j = j[sort]
        npt.assert_equal(i, np.array(expect['i']))
        npt.assert_equal(j, np.array(expect['j']))

        ops = [(None, dx, u1), (None, [0, 0], u0)]
        m_ji, m_lat_indices, m_coupling_shape = ir.possible_multi_couplings(ops)
        sort = np.lexsort(m_lat_indices.T)
        npt.assert_equal(m_ji[sort, 1], np.array(expect['i']))
        npt.assert_equal(m_ji[sort, 0], np.array(expect['j']))

Python numpy.lexsort() Examples