Python numpy.ma.masked_equal() Examples

The following are 12 code examples of numpy.ma.masked_equal(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module numpy.ma , or try the search function .
Example #1
Source File: mstats_basic.py    From Computable with MIT License 6 votes vote down vote up
def kruskalwallis(*args):
    output = argstoarray(*args)
    ranks = ma.masked_equal(rankdata(output, use_missing=False), 0)
    sumrk = ranks.sum(-1)
    ngrp = ranks.count(-1)
    ntot = ranks.count()
#    ssbg = (sumrk**2/ranks.count(-1)).sum() - ranks.sum()**2/ntotal
#    H = ssbg / (ntotal*(ntotal+1)/12.)
    H = 12./(ntot*(ntot+1)) * (sumrk**2/ngrp).sum() - 3*(ntot+1)
    # Tie correction
    ties = count_tied_groups(ranks)
    T = 1. - np.sum(v*(k**3-k) for (k,v) in iteritems(ties))/float(ntot**3-ntot)
    if T == 0:
        raise ValueError('All numbers are identical in kruskal')
    H /= T
    #
    df = len(output) - 1
    prob = stats.chisqprob(H,df)
    return (H, prob) 
Example #2
Source File: corex.py    From discrete_sieve with Apache License 2.0 6 votes vote down vote up
def transform(self, X, details=False):
        """
        Label hidden factors for (possibly previously unseen) samples of data.
        Parameters: samples of data, X, shape = [n_samples, n_visible]
        Returns: , shape = [n_samples, n_hidden]
        """
        Xm = ma.masked_equal(X, self.missing_values)
        log_marg_x = self.calculate_marginals_on_samples(self.theta, Xm)
        p_y_given_x, log_z = self.calculate_latent(log_marg_x)
        labels = self.label(p_y_given_x)
        if details == 'surprise':
            # Totally experimental
            log_marg_x = self.calculate_marginals_on_samples(self.theta, Xm, return_ratio=False)
            n_samples = Xm.shape[0]
            surprise = []
            for l in range(n_samples):
                q = - sum([max([log_marg_x[j,l,i,labels[l, j]]
                                for j in range(self.n_hidden)])
                           for i in range(self.n_visible)])
                surprise.append(q)
            return p_y_given_x, log_z, np.array(surprise)
        elif details:
            return p_y_given_x, log_z
        else:
            return labels 
Example #3
Source File: corex.py    From bio_corex with Apache License 2.0 6 votes vote down vote up
def transform(self, X, details=False):
        """
        Label hidden factors for (possibly previously unseen) samples of data.
        Parameters: samples of data, X, shape = [n_samples, n_visible]
        Returns: , shape = [n_samples, n_hidden]
        """
        Xm = ma.masked_equal(X, self.missing_values)
        p_y_given_x, log_z = self.calculate_latent(self.theta, Xm)
        labels = self.label(p_y_given_x)
        if details == 'surprise':
            # Totally experimental
            log_marg_x = self.calculate_marginals_on_samples(self.theta, Xm, return_ratio=False)
            n_samples = Xm.shape[0]
            surprise = []
            for l in range(n_samples):
                q = - sum([max([log_marg_x[j,l,i,labels[l, j]]
                                for j in range(self.n_hidden)])
                           for i in range(self.n_visible)])
                surprise.append(q)
            return p_y_given_x, log_z, np.array(surprise)
        elif details:
            return p_y_given_x, log_z
        else:
            return labels 
Example #4
Source File: test_filter.py    From pfilter with MIT License 5 votes vote down vote up
def test_partial_missing():
    # check that
    pf = ParticleFilter(
        prior_fn=lambda n: np.random.normal(0, 1, (n, 4)), n_particles=100
    )
    for i in range(10):
        masked_input = ma.masked_equal(np.array([1, 999, 0, 999]), 999)
        pf.update(masked_input)
        pf.update(np.array([1, 1, 1, 1]))
        assert np.allclose(np.sum(pf.weights), 1.0)
        assert len(pf.weights) == len(pf.particles) == 100 
Example #5
Source File: mstats_basic.py    From lambda-packs with MIT License 5 votes vote down vote up
def kruskal(*args):
    """
    Compute the Kruskal-Wallis H-test for independent samples

    Parameters
    ----------
    sample1, sample2, ... : array_like
       Two or more arrays with the sample measurements can be given as
       arguments.

    Returns
    -------
    statistic : float
       The Kruskal-Wallis H statistic, corrected for ties
    pvalue : float
       The p-value for the test using the assumption that H has a chi
       square distribution

    Notes
    -----
    For more details on `kruskal`, see `stats.kruskal`.

    """
    output = argstoarray(*args)
    ranks = ma.masked_equal(rankdata(output, use_missing=False), 0)
    sumrk = ranks.sum(-1)
    ngrp = ranks.count(-1)
    ntot = ranks.count()
    H = 12./(ntot*(ntot+1)) * (sumrk**2/ngrp).sum() - 3*(ntot+1)
    # Tie correction
    ties = count_tied_groups(ranks)
    T = 1. - np.sum(v*(k**3-k) for (k,v) in iteritems(ties))/float(ntot**3-ntot)
    if T == 0:
        raise ValueError('All numbers are identical in kruskal')

    H /= T
    df = len(output) - 1
    prob = distributions.chi2.sf(H, df)
    return KruskalResult(H, prob) 
Example #6
Source File: mstats_basic.py    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def kruskal(*args):
    """
    Compute the Kruskal-Wallis H-test for independent samples

    Parameters
    ----------
    sample1, sample2, ... : array_like
       Two or more arrays with the sample measurements can be given as
       arguments.

    Returns
    -------
    statistic : float
       The Kruskal-Wallis H statistic, corrected for ties
    pvalue : float
       The p-value for the test using the assumption that H has a chi
       square distribution

    Notes
    -----
    For more details on `kruskal`, see `stats.kruskal`.

    """
    output = argstoarray(*args)
    ranks = ma.masked_equal(rankdata(output, use_missing=False), 0)
    sumrk = ranks.sum(-1)
    ngrp = ranks.count(-1)
    ntot = ranks.count()
    H = 12./(ntot*(ntot+1)) * (sumrk**2/ngrp).sum() - 3*(ntot+1)
    # Tie correction
    ties = count_tied_groups(ranks)
    T = 1. - sum(v*(k**3-k) for (k,v) in iteritems(ties))/float(ntot**3-ntot)
    if T == 0:
        raise ValueError('All numbers are identical in kruskal')

    H /= T
    df = len(output) - 1
    prob = distributions.chi2.sf(H, df)
    return KruskalResult(H, prob) 
Example #7
Source File: masks.py    From argos with GNU General Public License v3.0 5 votes vote down vote up
def maskedEqual(array, missingValue):
    """ Mask an array where equal to a given (missing)value.

        Unfortunately ma.masked_equal does not work with structured arrays. See:
        https://mail.scipy.org/pipermail/numpy-discussion/2011-July/057669.html

        If the data is a structured array the mask is applied for every field (i.e. forming a
        logical-and). Otherwise ma.masked_equal is called.
    """
    if array_is_structured(array):
        # Enforce the array to be masked
        if not isinstance(array, ma.MaskedArray):
            array = ma.MaskedArray(array)

        # Set the mask separately per field
        for nr, field in enumerate(array.dtype.names):
            if hasattr(missingValue, '__len__'):
                fieldMissingValue = missingValue[nr]
            else:
                fieldMissingValue = missingValue

            array[field] = ma.masked_equal(array[field], fieldMissingValue)

        check_class(array, ma.MaskedArray) # post-condition check
        return array
    else:
        # masked_equal works with missing is None
        result = ma.masked_equal(array, missingValue, copy=False)
        check_class(result, ma.MaskedArray) # post-condition check
        return result 
Example #8
Source File: mstats_basic.py    From Splunking-Crime with GNU Affero General Public License v3.0 5 votes vote down vote up
def kruskal(*args):
    """
    Compute the Kruskal-Wallis H-test for independent samples

    Parameters
    ----------
    sample1, sample2, ... : array_like
       Two or more arrays with the sample measurements can be given as
       arguments.

    Returns
    -------
    statistic : float
       The Kruskal-Wallis H statistic, corrected for ties
    pvalue : float
       The p-value for the test using the assumption that H has a chi
       square distribution

    Notes
    -----
    For more details on `kruskal`, see `stats.kruskal`.

    """
    output = argstoarray(*args)
    ranks = ma.masked_equal(rankdata(output, use_missing=False), 0)
    sumrk = ranks.sum(-1)
    ngrp = ranks.count(-1)
    ntot = ranks.count()
    H = 12./(ntot*(ntot+1)) * (sumrk**2/ngrp).sum() - 3*(ntot+1)
    # Tie correction
    ties = count_tied_groups(ranks)
    T = 1. - np.sum(v*(k**3-k) for (k,v) in iteritems(ties))/float(ntot**3-ntot)
    if T == 0:
        raise ValueError('All numbers are identical in kruskal')

    H /= T
    df = len(output) - 1
    prob = distributions.chi2.sf(H, df)
    return KruskalResult(H, prob) 
Example #9
Source File: vis_corex.py    From bio_corex with Apache License 2.0 5 votes vote down vote up
def calculate_log_latent(corex, X):
    """"Calculate the log probabilities for hidden factors for each sample, with high precision."""
    Xm = ma.masked_equal(X, corex.missing_values)
    n_samples, n_visible = Xm.shape
    n_hidden, dim_hidden, ram = corex.n_hidden, corex.dim_hidden, corex.ram
    log_p_y_given_x_unnorm = np.empty((n_hidden, n_samples, dim_hidden))
    memory_size = float(n_samples * n_visible * n_hidden * dim_hidden * 64) / 1000**3  # GB
    batch_size = np.clip(int(ram * n_samples / memory_size), 1, n_samples)
    for l in range(0, n_samples, batch_size):
        log_marg_x = corex.calculate_marginals_on_samples(corex.theta, Xm[l:l+batch_size])  # LLRs for each sample, for each var.
        log_p_y_given_x_unnorm[:, l:l+batch_size, :] = corex.log_p_y + np.einsum('ikl,ijkl->ijl', corex.alpha, log_marg_x, optimize=False)
    return normalize_latent(log_p_y_given_x_unnorm, n_hidden) 
Example #10
Source File: _gridprop_import_grdecl.py    From xtgeo with GNU Lesser General Public License v3.0 5 votes vote down vote up
def import_grdecl_prop(self, pfile, name="unknown", grid=None):
    """Read a GRDECL ASCII property record"""

    if grid is None:
        raise ValueError("A grid instance is required as argument")

    self._ncol = grid.ncol
    self._nrow = grid.nrow
    self._nlay = grid.nlay
    self._name = name
    self._filesrc = pfile
    actnumv = grid.get_actnum().values

    # This requires that the Python part clean up comments
    # etc, and make a tmp file.
    fds, tmpfile = mkstemp(prefix="tmpxtgeo")
    os.close(fds)

    with open(pfile.name, "r") as oldfile, open(tmpfile, "w") as newfile:
        for line in oldfile:
            if not (re.search(r"^--", line) or re.search(r"^\s+$", line)):
                newfile.write(line)

    # now read the property
    nlen = self._ncol * self._nrow * self._nlay
    ier, values = _cxtgeo.grd3d_import_grdecl_prop(
        tmpfile, self._ncol, self._nrow, self._nlay, name, nlen, 0,
    )

    os.remove(tmpfile)

    if ier != 0:
        raise xtgeo.KeywordNotFoundError(
            "Cannot import {}, not present in file {}?".format(name, pfile)
        )

    self.values = values.reshape(self.dimensions)
    self.values = ma.masked_equal(self.values, actnumv == 0) 
Example #11
Source File: corex.py    From discrete_sieve with Apache License 2.0 4 votes vote down vote up
def fit_transform(self, X):
        """Fit CorEx on the data

        Parameters
        ----------
        X : array-like, shape = [n_samples, n_visible]
            The data.

        Returns
        -------
        Y: array-like, shape = [n_samples, n_hidden]
           Learned values for each latent factor for each sample.
           Y's are sorted so that Y_1 explains most correlation, etc.
        """

        Xm = ma.masked_equal(X, self.missing_values)

        best_tc = -np.inf
        for n_rep in range(self.n_repeat):

            self.initialize_parameters(X)

            for nloop in range(self.max_iter):

                self.log_p_y = self.calculate_p_y(self.p_y_given_x)
                self.theta = self.calculate_theta(Xm, self.p_y_given_x)

                log_marg_x = self.calculate_marginals_on_samples(self.theta, Xm)  # LLRs for each sample, for each var.

                self.p_y_given_x, self.log_z = self.calculate_latent(log_marg_x)

                self.update_tc(self.log_z)  # Calculate TC and record history to check convergence

                self.print_verbose()
                if self.convergence():
                    break

            if self.tc > best_tc:
                best_tc = self.tc
                best_dict = self.__dict__.copy()
        self.__dict__ = best_dict
        if self.verbose:
            print 'Best tc:', self.tc

        self.sort_and_output(Xm)

        return self.labels 
Example #12
Source File: corex.py    From bio_corex with Apache License 2.0 4 votes vote down vote up
def fit_transform(self, X):
        """Fit CorEx on the data

        Parameters
        ----------
        X : array-like, shape = [n_samples, n_visible]
            The data.

        Returns
        -------
        Y: array-like, shape = [n_samples, n_hidden]
           Learned values for each latent factor for each sample.
           Y's are sorted so that Y_1 explains most correlation, etc.
        """

        if self.n_cpu == 1:
            self.pool = None
        else:
            self.pool = Pool(self.n_cpu)
        Xm = ma.masked_equal(X, self.missing_values)
        best_tc = -np.inf
        for n_rep in range(self.n_repeat):

            self.initialize_parameters(X)

            for nloop in range(self.max_iter):

                self.log_p_y = self.calculate_p_y(self.p_y_given_x)
                self.theta = self.calculate_theta(Xm, self.p_y_given_x)

                if self.n_hidden > 1:  # Structure learning step
                    self.update_alpha(self.p_y_given_x, self.theta, Xm, self.tcs)

                self.p_y_given_x, self.log_z = self.calculate_latent(self.theta, Xm)

                self.update_tc(self.log_z)  # Calculate TC and record history to check convergence

                self.print_verbose()
                if self.convergence():
                    break

            if self.verbose:
                print('Overall tc:', self.tc)
            if self.tc > best_tc:
                best_tc = self.tc
                best_dict = self.__dict__.copy()  # TODO: what happens if n_cpu > 1 and n_repeat > 1? Does pool get copied? Probably not...just a pointer to the same object... Seems fine.
        self.__dict__ = best_dict
        if self.verbose:
            print('Best tc:', self.tc)

        self.sort_and_output(Xm)
        if self.pool is not None:
            self.pool.close()
            self.pool = None
        return self.labels