Python numpy.ma.masked_equal() Examples
The following are 12
code examples of numpy.ma.masked_equal().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
numpy.ma
, or try the search function
.
Example #1
Source File: mstats_basic.py From Computable with MIT License | 6 votes |
def kruskalwallis(*args): output = argstoarray(*args) ranks = ma.masked_equal(rankdata(output, use_missing=False), 0) sumrk = ranks.sum(-1) ngrp = ranks.count(-1) ntot = ranks.count() # ssbg = (sumrk**2/ranks.count(-1)).sum() - ranks.sum()**2/ntotal # H = ssbg / (ntotal*(ntotal+1)/12.) H = 12./(ntot*(ntot+1)) * (sumrk**2/ngrp).sum() - 3*(ntot+1) # Tie correction ties = count_tied_groups(ranks) T = 1. - np.sum(v*(k**3-k) for (k,v) in iteritems(ties))/float(ntot**3-ntot) if T == 0: raise ValueError('All numbers are identical in kruskal') H /= T # df = len(output) - 1 prob = stats.chisqprob(H,df) return (H, prob)
Example #2
Source File: corex.py From discrete_sieve with Apache License 2.0 | 6 votes |
def transform(self, X, details=False): """ Label hidden factors for (possibly previously unseen) samples of data. Parameters: samples of data, X, shape = [n_samples, n_visible] Returns: , shape = [n_samples, n_hidden] """ Xm = ma.masked_equal(X, self.missing_values) log_marg_x = self.calculate_marginals_on_samples(self.theta, Xm) p_y_given_x, log_z = self.calculate_latent(log_marg_x) labels = self.label(p_y_given_x) if details == 'surprise': # Totally experimental log_marg_x = self.calculate_marginals_on_samples(self.theta, Xm, return_ratio=False) n_samples = Xm.shape[0] surprise = [] for l in range(n_samples): q = - sum([max([log_marg_x[j,l,i,labels[l, j]] for j in range(self.n_hidden)]) for i in range(self.n_visible)]) surprise.append(q) return p_y_given_x, log_z, np.array(surprise) elif details: return p_y_given_x, log_z else: return labels
Example #3
Source File: corex.py From bio_corex with Apache License 2.0 | 6 votes |
def transform(self, X, details=False): """ Label hidden factors for (possibly previously unseen) samples of data. Parameters: samples of data, X, shape = [n_samples, n_visible] Returns: , shape = [n_samples, n_hidden] """ Xm = ma.masked_equal(X, self.missing_values) p_y_given_x, log_z = self.calculate_latent(self.theta, Xm) labels = self.label(p_y_given_x) if details == 'surprise': # Totally experimental log_marg_x = self.calculate_marginals_on_samples(self.theta, Xm, return_ratio=False) n_samples = Xm.shape[0] surprise = [] for l in range(n_samples): q = - sum([max([log_marg_x[j,l,i,labels[l, j]] for j in range(self.n_hidden)]) for i in range(self.n_visible)]) surprise.append(q) return p_y_given_x, log_z, np.array(surprise) elif details: return p_y_given_x, log_z else: return labels
Example #4
Source File: test_filter.py From pfilter with MIT License | 5 votes |
def test_partial_missing(): # check that pf = ParticleFilter( prior_fn=lambda n: np.random.normal(0, 1, (n, 4)), n_particles=100 ) for i in range(10): masked_input = ma.masked_equal(np.array([1, 999, 0, 999]), 999) pf.update(masked_input) pf.update(np.array([1, 1, 1, 1])) assert np.allclose(np.sum(pf.weights), 1.0) assert len(pf.weights) == len(pf.particles) == 100
Example #5
Source File: mstats_basic.py From lambda-packs with MIT License | 5 votes |
def kruskal(*args): """ Compute the Kruskal-Wallis H-test for independent samples Parameters ---------- sample1, sample2, ... : array_like Two or more arrays with the sample measurements can be given as arguments. Returns ------- statistic : float The Kruskal-Wallis H statistic, corrected for ties pvalue : float The p-value for the test using the assumption that H has a chi square distribution Notes ----- For more details on `kruskal`, see `stats.kruskal`. """ output = argstoarray(*args) ranks = ma.masked_equal(rankdata(output, use_missing=False), 0) sumrk = ranks.sum(-1) ngrp = ranks.count(-1) ntot = ranks.count() H = 12./(ntot*(ntot+1)) * (sumrk**2/ngrp).sum() - 3*(ntot+1) # Tie correction ties = count_tied_groups(ranks) T = 1. - np.sum(v*(k**3-k) for (k,v) in iteritems(ties))/float(ntot**3-ntot) if T == 0: raise ValueError('All numbers are identical in kruskal') H /= T df = len(output) - 1 prob = distributions.chi2.sf(H, df) return KruskalResult(H, prob)
Example #6
Source File: mstats_basic.py From GraphicDesignPatternByPython with MIT License | 5 votes |
def kruskal(*args): """ Compute the Kruskal-Wallis H-test for independent samples Parameters ---------- sample1, sample2, ... : array_like Two or more arrays with the sample measurements can be given as arguments. Returns ------- statistic : float The Kruskal-Wallis H statistic, corrected for ties pvalue : float The p-value for the test using the assumption that H has a chi square distribution Notes ----- For more details on `kruskal`, see `stats.kruskal`. """ output = argstoarray(*args) ranks = ma.masked_equal(rankdata(output, use_missing=False), 0) sumrk = ranks.sum(-1) ngrp = ranks.count(-1) ntot = ranks.count() H = 12./(ntot*(ntot+1)) * (sumrk**2/ngrp).sum() - 3*(ntot+1) # Tie correction ties = count_tied_groups(ranks) T = 1. - sum(v*(k**3-k) for (k,v) in iteritems(ties))/float(ntot**3-ntot) if T == 0: raise ValueError('All numbers are identical in kruskal') H /= T df = len(output) - 1 prob = distributions.chi2.sf(H, df) return KruskalResult(H, prob)
Example #7
Source File: masks.py From argos with GNU General Public License v3.0 | 5 votes |
def maskedEqual(array, missingValue): """ Mask an array where equal to a given (missing)value. Unfortunately ma.masked_equal does not work with structured arrays. See: https://mail.scipy.org/pipermail/numpy-discussion/2011-July/057669.html If the data is a structured array the mask is applied for every field (i.e. forming a logical-and). Otherwise ma.masked_equal is called. """ if array_is_structured(array): # Enforce the array to be masked if not isinstance(array, ma.MaskedArray): array = ma.MaskedArray(array) # Set the mask separately per field for nr, field in enumerate(array.dtype.names): if hasattr(missingValue, '__len__'): fieldMissingValue = missingValue[nr] else: fieldMissingValue = missingValue array[field] = ma.masked_equal(array[field], fieldMissingValue) check_class(array, ma.MaskedArray) # post-condition check return array else: # masked_equal works with missing is None result = ma.masked_equal(array, missingValue, copy=False) check_class(result, ma.MaskedArray) # post-condition check return result
Example #8
Source File: mstats_basic.py From Splunking-Crime with GNU Affero General Public License v3.0 | 5 votes |
def kruskal(*args): """ Compute the Kruskal-Wallis H-test for independent samples Parameters ---------- sample1, sample2, ... : array_like Two or more arrays with the sample measurements can be given as arguments. Returns ------- statistic : float The Kruskal-Wallis H statistic, corrected for ties pvalue : float The p-value for the test using the assumption that H has a chi square distribution Notes ----- For more details on `kruskal`, see `stats.kruskal`. """ output = argstoarray(*args) ranks = ma.masked_equal(rankdata(output, use_missing=False), 0) sumrk = ranks.sum(-1) ngrp = ranks.count(-1) ntot = ranks.count() H = 12./(ntot*(ntot+1)) * (sumrk**2/ngrp).sum() - 3*(ntot+1) # Tie correction ties = count_tied_groups(ranks) T = 1. - np.sum(v*(k**3-k) for (k,v) in iteritems(ties))/float(ntot**3-ntot) if T == 0: raise ValueError('All numbers are identical in kruskal') H /= T df = len(output) - 1 prob = distributions.chi2.sf(H, df) return KruskalResult(H, prob)
Example #9
Source File: vis_corex.py From bio_corex with Apache License 2.0 | 5 votes |
def calculate_log_latent(corex, X): """"Calculate the log probabilities for hidden factors for each sample, with high precision.""" Xm = ma.masked_equal(X, corex.missing_values) n_samples, n_visible = Xm.shape n_hidden, dim_hidden, ram = corex.n_hidden, corex.dim_hidden, corex.ram log_p_y_given_x_unnorm = np.empty((n_hidden, n_samples, dim_hidden)) memory_size = float(n_samples * n_visible * n_hidden * dim_hidden * 64) / 1000**3 # GB batch_size = np.clip(int(ram * n_samples / memory_size), 1, n_samples) for l in range(0, n_samples, batch_size): log_marg_x = corex.calculate_marginals_on_samples(corex.theta, Xm[l:l+batch_size]) # LLRs for each sample, for each var. log_p_y_given_x_unnorm[:, l:l+batch_size, :] = corex.log_p_y + np.einsum('ikl,ijkl->ijl', corex.alpha, log_marg_x, optimize=False) return normalize_latent(log_p_y_given_x_unnorm, n_hidden)
Example #10
Source File: _gridprop_import_grdecl.py From xtgeo with GNU Lesser General Public License v3.0 | 5 votes |
def import_grdecl_prop(self, pfile, name="unknown", grid=None): """Read a GRDECL ASCII property record""" if grid is None: raise ValueError("A grid instance is required as argument") self._ncol = grid.ncol self._nrow = grid.nrow self._nlay = grid.nlay self._name = name self._filesrc = pfile actnumv = grid.get_actnum().values # This requires that the Python part clean up comments # etc, and make a tmp file. fds, tmpfile = mkstemp(prefix="tmpxtgeo") os.close(fds) with open(pfile.name, "r") as oldfile, open(tmpfile, "w") as newfile: for line in oldfile: if not (re.search(r"^--", line) or re.search(r"^\s+$", line)): newfile.write(line) # now read the property nlen = self._ncol * self._nrow * self._nlay ier, values = _cxtgeo.grd3d_import_grdecl_prop( tmpfile, self._ncol, self._nrow, self._nlay, name, nlen, 0, ) os.remove(tmpfile) if ier != 0: raise xtgeo.KeywordNotFoundError( "Cannot import {}, not present in file {}?".format(name, pfile) ) self.values = values.reshape(self.dimensions) self.values = ma.masked_equal(self.values, actnumv == 0)
Example #11
Source File: corex.py From discrete_sieve with Apache License 2.0 | 4 votes |
def fit_transform(self, X): """Fit CorEx on the data Parameters ---------- X : array-like, shape = [n_samples, n_visible] The data. Returns ------- Y: array-like, shape = [n_samples, n_hidden] Learned values for each latent factor for each sample. Y's are sorted so that Y_1 explains most correlation, etc. """ Xm = ma.masked_equal(X, self.missing_values) best_tc = -np.inf for n_rep in range(self.n_repeat): self.initialize_parameters(X) for nloop in range(self.max_iter): self.log_p_y = self.calculate_p_y(self.p_y_given_x) self.theta = self.calculate_theta(Xm, self.p_y_given_x) log_marg_x = self.calculate_marginals_on_samples(self.theta, Xm) # LLRs for each sample, for each var. self.p_y_given_x, self.log_z = self.calculate_latent(log_marg_x) self.update_tc(self.log_z) # Calculate TC and record history to check convergence self.print_verbose() if self.convergence(): break if self.tc > best_tc: best_tc = self.tc best_dict = self.__dict__.copy() self.__dict__ = best_dict if self.verbose: print 'Best tc:', self.tc self.sort_and_output(Xm) return self.labels
Example #12
Source File: corex.py From bio_corex with Apache License 2.0 | 4 votes |
def fit_transform(self, X): """Fit CorEx on the data Parameters ---------- X : array-like, shape = [n_samples, n_visible] The data. Returns ------- Y: array-like, shape = [n_samples, n_hidden] Learned values for each latent factor for each sample. Y's are sorted so that Y_1 explains most correlation, etc. """ if self.n_cpu == 1: self.pool = None else: self.pool = Pool(self.n_cpu) Xm = ma.masked_equal(X, self.missing_values) best_tc = -np.inf for n_rep in range(self.n_repeat): self.initialize_parameters(X) for nloop in range(self.max_iter): self.log_p_y = self.calculate_p_y(self.p_y_given_x) self.theta = self.calculate_theta(Xm, self.p_y_given_x) if self.n_hidden > 1: # Structure learning step self.update_alpha(self.p_y_given_x, self.theta, Xm, self.tcs) self.p_y_given_x, self.log_z = self.calculate_latent(self.theta, Xm) self.update_tc(self.log_z) # Calculate TC and record history to check convergence self.print_verbose() if self.convergence(): break if self.verbose: print('Overall tc:', self.tc) if self.tc > best_tc: best_tc = self.tc best_dict = self.__dict__.copy() # TODO: what happens if n_cpu > 1 and n_repeat > 1? Does pool get copied? Probably not...just a pointer to the same object... Seems fine. self.__dict__ = best_dict if self.verbose: print('Best tc:', self.tc) self.sort_and_output(Xm) if self.pool is not None: self.pool.close() self.pool = None return self.labels