Python Examples of numpy.union1d

Source File: test_datasets.py From AmpliGraph with Apache License 2.0

6 votes

def test_yago_3_10():
    yago_3_10 = load_yago3_10()
    assert len(yago_3_10['train']) == 1079040 
    assert len(yago_3_10['valid']) == 5000 - 22
    assert len(yago_3_10['test']) == 5000 - 18

    # ent_train = np.union1d(np.unique(yago_3_10["train"][:,0]), np.unique(yago_3_10["train"][:,2]))
    # ent_valid = np.union1d(np.unique(yago_3_10["valid"][:,0]), np.unique(yago_3_10["valid"][:,2]))
    # ent_test = np.union1d(np.unique(yago_3_10["test"][:,0]), np.unique(yago_3_10["test"][:,2]))

    # assert len(set(ent_valid) - set(ent_train)) == 22
    # assert len (set(ent_test) - ((set(ent_valid) & set(ent_train)) | set(ent_train))) == 18

    # distinct_ent = np.union1d(np.union1d(ent_train, ent_valid), ent_test)
    # distinct_rel = np.union1d(np.union1d(np.unique(yago_3_10["train"][:,1]), np.unique(yago_3_10["train"][:,1])),
    #                           np.unique(yago_3_10["train"][:,1]))

    # assert len(distinct_ent) == 123182  
    # assert len(distinct_rel) == 37

Source File: eval.py From Pointnet2.ScanNet with MIT License

6 votes

def compute_miou(coords, preds, targets, weights):
    coords, preds, targets, weights = filter_points(coords, preds, targets, weights)
    seen_classes = np.unique(targets)
    mask = np.zeros(CONF.NUM_CLASSES)
    mask[seen_classes] = 1

    pointmiou = np.zeros(CONF.NUM_CLASSES)
    voxmiou = np.zeros(CONF.NUM_CLASSES)

    uvidx, uvlabel, _ = point_cloud_label_to_surface_voxel_label_fast(coords, np.concatenate((np.expand_dims(targets,1),np.expand_dims(preds,1)),axis=1), res=0.02)
    for l in seen_classes:
        target_label = np.arange(targets.shape[0])[targets==l]
        pred_label = np.arange(preds.shape[0])[preds==l]
        num_intersection_label = np.intersect1d(pred_label, target_label).shape[0]
        num_union_label = np.union1d(pred_label, target_label).shape[0]
        pointmiou[l] = num_intersection_label / (num_union_label + 1e-8)

        target_label_vox = uvidx[(uvlabel[:, 0] == l)]
        pred_label_vox = uvidx[(uvlabel[:, 1] == l)]
        num_intersection_label_vox = np.intersect1d(pred_label_vox, target_label_vox).shape[0]
        num_union_label_vox = np.union1d(pred_label_vox, target_label_vox).shape[0]
        voxmiou[l] = num_intersection_label_vox / (num_union_label_vox + 1e-8)

    return pointmiou, voxmiou, mask

Source File: vecm.py From vnpy_crypto with MIT License

6 votes

def cov_params_wo_det(self):
        # rows & cols to be dropped (related to deterministic terms inside the
        # cointegration relation)
        start_i = self.neqs**2  # first elements belong to alpha @ beta.T
        end_i = start_i + self.neqs * self.det_coef_coint.shape[0]
        to_drop_i = np.arange(start_i, end_i)

        # rows & cols to be dropped (related to deterministic terms outside of
        # the cointegration relation)
        cov = self.cov_params_default
        cov_size = len(cov)
        to_drop_o = np.arange(cov_size-self.det_coef.size, cov_size)

        to_drop = np.union1d(to_drop_i, to_drop_o)

        mask = np.ones(cov.shape, dtype=bool)
        mask[to_drop] = False
        mask[:, to_drop] = False
        cov_size_new = mask.sum(axis=0)[0]
        return cov[mask].reshape((cov_size_new, cov_size_new))

    # standard errors:

Source File: test_image.py From attention-lvcsr with MIT License

6 votes

def test_list_batch_source(self):
        # Make sure that with enough epochs we sample everything.
        stream = RandomFixedSizeCrop(self.batch_stream, (5, 4),
                                     which_sources=('source2',))
        seen_indices = numpy.array([], dtype='uint8')
        for i in range(30):
            for batch in stream.get_epoch_iterator():
                for example in batch[1]:
                    assert example.shape == (2, 5, 4)
                    seen_indices = numpy.union1d(seen_indices,
                                                 example.flatten())
                assert len(batch[1]) in (1, 2)
            if self.source2_biggest == len(seen_indices):
                break
        else:
            assert False

Source File: test_merge_execute.py From mars with Apache License 2.0

6 votes

def testUnion1dExecution(self):
        rs = np.random.RandomState(0)
        raw1 = rs.random(10)
        raw2 = rs.random(9)

        t1 = tensor(raw1, chunk_size=3)
        t2 = tensor(raw2, chunk_size=4)

        t = union1d(t1, t2, aggregate_size=1)
        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.union1d(raw1, raw2)
        np.testing.assert_array_equal(res, expected)

        t = union1d(t1, t2)
        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.union1d(raw1, raw2)
        np.testing.assert_array_equal(res, expected)

Source File: array.py From unyt with BSD 3-Clause "New" or "Revised" License

6 votes

def uunion1d(arr1, arr2):
    """Find the union of two arrays.

    A wrapper around numpy.intersect1d that preserves units.  All input arrays
    must have the same units.  See the documentation of numpy.intersect1d for
    full details.

    Examples
    --------
    >>> from unyt import cm
    >>> A = [1, 2, 3]*cm
    >>> B = [2, 3, 4]*cm
    >>> uunion1d(A, B)
    unyt_array([1, 2, 3, 4], 'cm')

    """
    v = np.union1d(arr1, arr2)
    v = _validate_numpy_wrapper_units(v, [arr1, arr2])
    return v

Source File: test_split.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_stratified_shuffle_split_overlap_train_test_bug():
    # See https://github.com/scikit-learn/scikit-learn/issues/6121 for
    # the original bug report
    y = [0, 1, 2, 3] * 3 + [4, 5] * 5
    X = np.ones_like(y)

    sss = StratifiedShuffleSplit(n_splits=1,
                                 test_size=0.5, random_state=0)

    train, test = next(sss.split(X=X, y=y))

    # no overlap
    assert_array_equal(np.intersect1d(train, test), [])

    # complete partition
    assert_array_equal(np.union1d(train, test), np.arange(len(y)))

Source File: test_split.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_stratified_shuffle_split_multilabel():
    # fix for issue 9037
    for y in [np.array([[0, 1], [1, 0], [1, 0], [0, 1]]),
              np.array([[0, 1], [1, 1], [1, 1], [0, 1]])]:
        X = np.ones_like(y)
        sss = StratifiedShuffleSplit(n_splits=1, test_size=0.5, random_state=0)
        train, test = next(sss.split(X=X, y=y))
        y_train = y[train]
        y_test = y[test]

        # no overlap
        assert_array_equal(np.intersect1d(train, test), [])

        # complete partition
        assert_array_equal(np.union1d(train, test), np.arange(len(y)))

        # correct stratification of entire rows
        # (by design, here y[:, 0] uniquely determines the entire row of y)
        expected_ratio = np.mean(y[:, 0])
        assert_equal(expected_ratio, np.mean(y_train[:, 0]))
        assert_equal(expected_ratio, np.mean(y_test[:, 0]))

Source File: electrode_placement.py From simnibs with GNU General Public License v3.0

6 votes

def _optimize_2D(nodes, triangles, stay=[]):
    ''' Optimize the locations of the points by moving them towards the center
    of their patch. This is done iterativally for all points for a number of
    iterations and using a .05 step length'''
    edges, tr_edges, adjacency_list = _edge_list(triangles)
    boundary = edges[adjacency_list[:, 1] == -1].reshape(-1)
    stay = np.union1d(boundary, stay)
    stay = stay.astype(int)
    n_iter = 5
    step_length = .05
    mean_bar = np.zeros_like(nodes)
    new_nodes = np.copy(nodes)
    k = np.bincount(triangles.reshape(-1), minlength=len(nodes))
    for n in range(n_iter):
        bar = np.mean(new_nodes[triangles], axis=1)
        for i in range(2):
            mean_bar[:, i] = np.bincount(triangles.reshape(-1),
                                         weights=np.repeat(bar[:, i], 3),
                                         minlength=len(nodes))
        mean_bar /= k[:, None]
        new_nodes += step_length * (mean_bar - new_nodes)
        new_nodes[stay] = nodes[stay]
    return new_nodes

Source File: lshutils.py From Fly-LSH with MIT License

6 votes

def compute_recall(self,n_points,nnn,sr):
        sample_indices=np.random.choice(self.numsamples,n_points)
        recalls=[]
        elapsed=[]
        numpredicted=[]
        for qidx in sample_indices:
            start=time.time()
            #preds=np.array([m.query_bins(qidx,sr) for m in self.models])
            predicted=self.firstmodel.query_bins(qidx,sr)#reduce(np.union1d,preds)
            if len(predicted)<nnn:
                raise ValueError('Not a good search radius')
            numpredicted.append(len(predicted))
            l1distances=np.array([np.sum((m.hashes[predicted,:]^m.hashes[qidx,:]),axis=1) for m in self.models])
            rankings=l1distances.mean(axis=0).argsort()
            #trusted_model=self.models[np.argmax([len(p) for p in preds])]
            #rankings=np.sum((trusted_model.hashes[predicted,:]^trusted_model.hashes[qidx,:]),axis=1).argsort()
            predicted=predicted[rankings][:nnn]
            elapsed.append(time.time()-start)
            trueNNs=self.firstmodel.true_nns(qidx,nnn)
            recalls.append(len(set(predicted)&set(trueNNs))/nnn)
        return [np.mean(recalls),np.std(recalls),np.mean(elapsed),np.std(elapsed),np.mean(numpredicted),np.std(numpredicted)]

Source File: test_protocol.py From AmpliGraph with Apache License 2.0

6 votes

def test_evaluate_performance_too_many_entities_warning():
    X = load_yago3_10()
    model = TransE(batches_count=200, seed=0, epochs=1, k=5, eta=1, verbose=True)
    model.fit(X['train'])

    # no entity list declared
    with pytest.warns(UserWarning):
        evaluate_performance(X['test'][::100], model, verbose=True, corrupt_side='o')

    # with larger than threshold entity list
    with pytest.warns(UserWarning):
        # TOO_MANY_ENT_TH threshold is set to 50,000 entities. Using explicit value to comply with linting
        # and thus avoiding exporting unused global variable.
        entities_subset = np.union1d(np.unique(X["train"][:, 0]), np.unique(X["train"][:, 2]))[:50000]
        evaluate_performance(X['test'][::100], model, verbose=True, corrupt_side='o', entities_subset=entities_subset)

    # with small entity list (no exception expected)
    evaluate_performance(X['test'][::100], model, verbose=True, corrupt_side='o', entities_subset=entities_subset[:10])

    # with smaller dataset, no entity list declared (no exception expected)
    X_wn18rr = load_wn18rr()
    model_wn18 = TransE(batches_count=200, seed=0, epochs=1, k=5, eta=1, verbose=True)
    model_wn18.fit(X_wn18rr['train'])
    evaluate_performance(X_wn18rr['test'][::100], model_wn18, verbose=True, corrupt_side='o')

Source File: PeakAreaCorrections.py From pax with BSD 3-Clause "New" or "Revised" License

6 votes

def transform_event(self, event):

        for peak in event.peaks:
            # check that there is a position
            if not len(peak.reconstructed_positions):
                continue
            try:
                # Get x,y position from peak
                xy = peak.get_position_from_preferred_algorithm(self.config['xy_posrec_preference'])
            except ValueError:
                self.log.debug("Could not find any position from the chosen algorithms")
                continue
            try:
                peak.s2_saturation_correction *= saturation_correction(
                    peak=peak,
                    channels_in_pattern=self.config['channels_top'],
                    expected_pattern=self.s2_patterns.expected_pattern((xy.x, xy.y)),
                    confused_channels=np.union1d(peak.saturated_channels, self.zombie_pmts_s2),
                    log=self.log)
            except exceptions.CoordinateOutOfRangeException:
                self.log.debug("Expected light pattern at coordinates "
                               "(%f, %f) consists of only zeros!" % (xy.x, xy.y))

        return event

Source File: test_datasets.py From AmpliGraph with Apache License 2.0

6 votes

def test_wn18rr():
    wn18rr = load_wn18rr()

    ent_train = np.union1d(np.unique(wn18rr["train"][:, 0]), np.unique(wn18rr["train"][:, 2]))
    ent_valid = np.union1d(np.unique(wn18rr["valid"][:, 0]), np.unique(wn18rr["valid"][:, 2]))
    ent_test = np.union1d(np.unique(wn18rr["test"][:, 0]), np.unique(wn18rr["test"][:, 2]))
    distinct_ent = np.union1d(np.union1d(ent_train, ent_valid), ent_test)
    distinct_rel = np.union1d(np.union1d(np.unique(wn18rr["train"][:, 1]), np.unique(wn18rr["train"][:, 1])),
                              np.unique(wn18rr["train"][:, 1]))

    assert len(wn18rr['train']) == 86835

    # - 210 because 210 triples containing unseen entities are removed
    assert len(wn18rr['valid']) == 3034 - 210

    # - 210 because 210 triples containing unseen entities are removed
    assert len(wn18rr['test']) == 3134 - 210

Source File: MDLP.py From Discretization-MDLPC with GNU General Public License v3.0

5 votes

def feature_boundary_points(self, values):
        '''
        Given an attribute, find all potential cut_points (boundary points)
        :param feature: feature of interest
        :param partition_index: indices of rows for which feature value falls whithin interval of interest
        :return: array with potential cut_points
        '''

        missing_mask = np.isnan(values)
        data_partition = np.concatenate([values[:, np.newaxis], self._class_labels], axis=1)
        data_partition = data_partition[~missing_mask]
        #sort data by values
        data_partition = data_partition[data_partition[:, 0].argsort()]

        #Get unique values in column
        unique_vals = np.unique(data_partition[:, 0])  # each of this could be a bin boundary
        #Find if when feature changes there are different class values
        boundaries = []
        for i in range(1, unique_vals.size):  # By definition first unique value cannot be a boundary
            previous_val_idx = np.where(data_partition[:, 0] == unique_vals[i-1])[0]
            current_val_idx = np.where(data_partition[:, 0] == unique_vals[i])[0]
            merged_classes = np.union1d(data_partition[previous_val_idx, 1], data_partition[current_val_idx, 1])
            if merged_classes.size > 1:
                boundaries += [unique_vals[i]]
        boundaries_offset = np.array([previous_item(unique_vals, var) for var in boundaries])
        return (np.array(boundaries) + boundaries_offset) / 2

Source File: lshutils.py From Fly-LSH with MIT License

5 votes

def compute_ens_mAP(self,n_points,nnn,sr):
        sample_indices=np.random.choice(self.numsamples,n_points)
        allAPs=[]
        elapsed=[]
        numpredicted=[]
        ms = lambda l:(np.mean(l),np.std(l))
        for qidx in sample_indices:
            start=time.time()
            preds=np.array([m.query_bins(qidx,sr) for m in self.models])
            predicted=reduce(np.union1d,preds)
            if len(predicted)<nnn:
                #raise ValueError('Not a good search radius')
                continue
            numpredicted.append(len(predicted))
            l1distances=np.array([np.sum((m.hashes[predicted,:]^m.hashes[qidx,:]),axis=1) for m in self.models])
            rankings=l1distances.mean(axis=0).argsort()
            #trusted_model=self.models[np.argmax([len(p) for p in preds])]
            #rankings=np.sum((trusted_model.hashes[predicted,:]^trusted_model.hashes[qidx,:]),axis=1).argsort()
            predicted=predicted[rankings][:nnn]
            elapsed.append(time.time()-start)
            trueNNs=self.firstmodel.true_nns(qidx,nnn)
            allAPs.append(self.firstmodel.AP(predicted,trueNNs))
        
        if len(allAPs)<0.8*n_points:
            raise ValueError('Not a good search radius')

        return [*ms(allAPs),*ms(elapsed),*ms(numpredicted)]

Source File: classify_keyword_endpoint.py From nupic.fluent with GNU Affero General Public License v3.0

5 votes

def trainModel(self, samples, labels):
    """
    Train the classifier on the input sample and label. Use Cortical.io's
    keyword extraction to get the most relevant terms then get the intersection
    of those bitmaps

    @param samples     (dictionary)      Dictionary, containing text, sparsity,
                                         and bitmap
    @param labels      (int)             Reference index for the classification
                                         of this sample.
    """
    for sample, sample_labels in zip(samples, labels):
      keywords = self.client.extractKeywords(sample["text"])

      # No keywords were found
      if len(keywords) == 0:
        # Get each token in the sample so the union is not empty
        keywords = sample["text"].split(" ")

      union = numpy.zeros(0)
      for word in keywords:
        bitmap = self._encodeText(word)
        union = numpy.union1d(bitmap, union).astype(int)

      for label in sample_labels:
        if label not in self.categoryBitmaps:
          self.categoryBitmaps[label] = union

        intersection = numpy.intersect1d(union, self.categoryBitmaps[label])
        if intersection.size == 0:
          # Don't want to lose all the old information
          union = numpy.union1d(union, self.categoryBitmaps[label]).astype(int)
          # Need to sample to stay sparse
          count = len(union)
          sampleIndices = random.sample(xrange(count), min(count, self.w))
          intersection = numpy.sort(union[sampleIndices])

        self.categoryBitmaps[label] = intersection

Source File: arraysetops.py From Splunking-Crime with GNU Affero General Public License v3.0

5 votes

def union1d(ar1, ar2):
    """
    Find the union of two arrays.

    Return the unique, sorted array of values that are in either of the two
    input arrays.

    Parameters
    ----------
    ar1, ar2 : array_like
        Input arrays. They are flattened if they are not already 1D.

    Returns
    -------
    union1d : ndarray
        Unique, sorted union of the input arrays.

    See Also
    --------
    numpy.lib.arraysetops : Module with a number of other functions for
                            performing set operations on arrays.

    Examples
    --------
    >>> np.union1d([-1, 0, 1], [-2, 0, 2])
    array([-2, -1,  0,  1,  2])

    To find the union of more than two arrays, use functools.reduce:

    >>> from functools import reduce
    >>> reduce(np.union1d, ([1, 3, 4, 3], [3, 1, 2, 1], [6, 3, 4, 2]))
    array([1, 2, 3, 4, 6])
    """
    return unique(np.concatenate((ar1, ar2)))

Source File: competing_methods.py From reveal-graph-embedding with Apache License 2.0

5 votes

def jaccard(c_1, c_2):
    """
    Calculates the Jaccard similarity between two sets of nodes. Called by mroc.

    Inputs:  - c_1: Community (set of nodes) 1.
             - c_2: Community (set of nodes) 2.

    Outputs: - jaccard_similarity: The Jaccard similarity of these two communities.
    """
    nom = np.intersect1d(c_1, c_2).size
    denom = np.union1d(c_1, c_2).size
    return nom/denom

Source File: arraysetops.py From ImageFusion with MIT License

5 votes

def union1d(ar1, ar2):
    """
    Find the union of two arrays.

    Return the unique, sorted array of values that are in either of the two
    input arrays.

    Parameters
    ----------
    ar1, ar2 : array_like
        Input arrays. They are flattened if they are not already 1D.

    Returns
    -------
    union1d : ndarray
        Unique, sorted union of the input arrays.

    See Also
    --------
    numpy.lib.arraysetops : Module with a number of other functions for
                            performing set operations on arrays.

    Examples
    --------
    >>> np.union1d([-1, 0, 1], [-2, 0, 2])
    array([-2, -1,  0,  1,  2])

    """
    return unique(np.concatenate((ar1, ar2)))

Source File: core.py From PhenoGraph with MIT License

5 votes

def graph2binary(filename, graph):
    """
    Write (weighted) graph to binary file filename.bin
    :param filename:
    :param graph:
    :return None: graph is written to filename.bin
    """
    tic = time.time()
    # Unpack values in graph
    i, j = graph.nonzero()
    s = graph.data
    # place i and j in single array as edge list
    ij = np.hstack((i[:, np.newaxis], j[:, np.newaxis]))
    # add dummy self-edges for vertices at the END of the list with no neighbors
    ijmax = np.union1d(i, j).max()
    n = graph.shape[0]
    missing = np.arange(ijmax+1, n)
    for q in missing:
        ij = np.append(ij, [[q, q]], axis=0)
        s = np.append(s, [0.], axis=0)
    # Check data types: int32 for indices, float64 for weights
    if ij.dtype != np.int32:
        ij = ij.astype('int32')
    if s.dtype != np.float64:
        s = s.astype('float64')
    # write to file (NB f.writelines is ~10x faster than np.tofile(f))
    with open(filename + '.bin', 'w+b') as f:
        f.writelines([e for t in zip(ij, s) for e in t])
    print("Wrote graph to binary file in {} seconds".format(time.time() - tic))

Source File: classify_context.py From nupic.fluent with GNU Affero General Public License v3.0

5 votes

def trainModel(self, samples, labels):
    """
    Train the classifier on the input sample and label. Use Cortical.io's
    keyword extraction to get the most relevant terms then get the intersection
    of those bitmaps

    @param samples     (dictionary)      Dictionary, containing text, sparsity,
                                         and bitmap
    @param labels      (int)             Reference index for the classification
                                         of this sample.
    """
    for sample, sample_labels in zip(samples, labels):
      bitmaps = [sample["bitmap"].tolist()]
      context = self.client.getContextFromText(bitmaps, maxResults=5,
                                               getFingerprint=True)

      if len(context) != 0:
        union = numpy.zeros(0)
        for c in context:
          bitmap = c["fingerprint"]["positions"]
          union = numpy.union1d(bitmap, union).astype(int)

        for label in sample_labels:
          # Haven't seen the label before
          if label not in self.categoryBitmaps:
            self.categoryBitmaps[label] = union

          intersection = numpy.intersect1d(union, self.categoryBitmaps[label])
          if intersection.size == 0:
            # Don't want to lose all the old information
            union = numpy.union1d(union, self.categoryBitmaps[label]).astype(int)
            # Need to sample to stay sparse
            count = len(union)
            sampleIndices = random.sample(xrange(count), min(count, self.w))
            intersection = numpy.sort(union[sampleIndices])

          self.categoryBitmaps[label] = intersection

Source File: features.py From deepbgc with MIT License

5 votes

def fit(self, X, y=None):
        self.unique_values = np.union1d(self.unique_values, X[self.column])
        return self

Source File: qt_vectors_layer.py From napari with BSD 3-Clause "New" or "Revised" License

5 votes

def _get_property_values(self):
        """Get the current property values from the Vectors layer

        Returns
        -------
        property_values : np.ndarray
            array of all of the union of the property names (keys)
            in Vectors.properties and Vectors._property_choices

        """
        property_choices = [*self.layer._property_choices]
        properties = [*self.layer.properties]
        property_values = np.union1d(property_choices, properties)

        return property_values

Source File: arraysetops.py From predictive-maintenance-using-machine-learning with Apache License 2.0

5 votes

def union1d(ar1, ar2):
    """
    Find the union of two arrays.

    Return the unique, sorted array of values that are in either of the two
    input arrays.

    Parameters
    ----------
    ar1, ar2 : array_like
        Input arrays. They are flattened if they are not already 1D.

    Returns
    -------
    union1d : ndarray
        Unique, sorted union of the input arrays.

    See Also
    --------
    numpy.lib.arraysetops : Module with a number of other functions for
                            performing set operations on arrays.

    Examples
    --------
    >>> np.union1d([-1, 0, 1], [-2, 0, 2])
    array([-2, -1,  0,  1,  2])

    To find the union of more than two arrays, use functools.reduce:

    >>> from functools import reduce
    >>> reduce(np.union1d, ([1, 3, 4, 3], [3, 1, 2, 1], [6, 3, 4, 2]))
    array([1, 2, 3, 4, 6])
    """
    return unique(np.concatenate((ar1, ar2), axis=None))

Source File: utilities.py From muDIC with MIT License

5 votes

def find_inconsistent(ep, ny):
    rem1 = np.where(ep > 1.)
    rem2 = np.where(ep < 0.)
    rem3 = np.where(ny > 1.)
    rem4 = np.where(ny < 0.)
    return reduce(np.union1d, [rem1[0], rem2[0], rem3[0], rem4[0]])

Source File: arraysetops.py From GraphicDesignPatternByPython with MIT License

5 votes

def union1d(ar1, ar2):
    """
    Find the union of two arrays.

    Return the unique, sorted array of values that are in either of the two
    input arrays.

    Parameters
    ----------
    ar1, ar2 : array_like
        Input arrays. They are flattened if they are not already 1D.

    Returns
    -------
    union1d : ndarray
        Unique, sorted union of the input arrays.

    See Also
    --------
    numpy.lib.arraysetops : Module with a number of other functions for
                            performing set operations on arrays.

    Examples
    --------
    >>> np.union1d([-1, 0, 1], [-2, 0, 2])
    array([-2, -1,  0,  1,  2])

    To find the union of more than two arrays, use functools.reduce:

    >>> from functools import reduce
    >>> reduce(np.union1d, ([1, 3, 4, 3], [3, 1, 2, 1], [6, 3, 4, 2]))
    array([1, 2, 3, 4, 6])
    """
    return unique(np.concatenate((ar1, ar2), axis=None))

Source File: test_image.py From attention-lvcsr with MIT License

5 votes

def test_ndarray_batch_source(self):
        # Make sure that with enough epochs we sample everything.
        stream = RandomFixedSizeCrop(self.batch_stream, (5, 4),
                                     which_sources=('source1',))
        seen_indices = numpy.array([], dtype='uint8')
        for i in range(30):
            for batch in stream.get_epoch_iterator():
                assert batch[0].shape[1:] == (3, 5, 4)
                assert batch[0].shape[0] in (1, 2)
                seen_indices = numpy.union1d(seen_indices, batch[0].flatten())
            if 3 * 7 * 5 == len(seen_indices):
                break
        else:
            assert False

Source File: lshutils.py From Fly-LSH with MIT License

5 votes

def query_highd_bins(self,qidx,order=False):
        if not hasattr(self,'highd_bins'):
            raise ValueError('high dimensional bins for model not created')
        valid_bins=self.highd_pointstobins[qidx]
        all_points=reduce(np.union1d,np.array([self.highd_binstopoints[idx] for idx in valid_bins]))
        if order:
            l1distances=(self.hashes[qidx,:]^self.hashes[all_points,:]).sum(axis=1)
            all_points=all_points[l1distances.argsort()]
        return all_points

Source File: lshutils.py From Fly-LSH with MIT License

5 votes

def query_lowd_bins(self,qidx,search_radius=1,order=False):
        if not hasattr(self,'lowd_bins'):
            raise ValueError('low dimensional bins for model not created')
        query_bin=self.lowd_bins[self.lowd_pointstobins[qidx]]
        valid_bins=np.flatnonzero((query_bin[None,:]^self.lowd_bins).sum(axis=1)<=2*search_radius)
        all_points=reduce(np.union1d,np.array([self.lowd_binstopoints[idx] for idx in valid_bins]))
        if order:
            l1distances=(self.hashes[qidx,:]^self.hashes[all_points,:]).sum(axis=1)
            all_points=all_points[l1distances.argsort()]
        return all_points

Source File: lshutils.py From Fly-LSH with MIT License

5 votes

def query_bins(self,qidx,search_radius=1,order=True):
        if not hasattr(self,'bins'):
            raise ValueError('Bins for model not created')
        query_bin=self.bins[self.pointstobins[qidx]]
        valid_bins=np.flatnonzero((query_bin[None,:]^self.bins).sum(axis=1)<=search_radius)
        all_points=reduce(np.union1d,np.array([self.binstopoints[idx] for idx in valid_bins]))
        if order:
            l1distances=(self.hashes[qidx,:]^self.hashes[all_points,:]).sum(axis=1)
            all_points=all_points[l1distances.argsort()]
        return all_points

Python numpy.union1d() Examples