Python scipy.stats.entropy() Examples
The following are 30
code examples of scipy.stats.entropy().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
scipy.stats
, or try the search function
.
Example #1
Source File: extraction.py From git2net with GNU Affero General Public License v3.0 | 7 votes |
def text_entropy(text): """ Computes entropy for a given text based on UTF8 alphabet. Args: text: string to compute the text entropy for Returns: text_entropy: text entropy of the given string """ # we only consider UTF8 characters to compute the text entropy pk = [text.count(chr(i)) for i in range(256)] if sum(pk) == 0: text_entropy = None else: text_entropy = entropy(pk, base=2) return text_entropy
Example #2
Source File: evaluation_metrics.py From PointFlow with MIT License | 7 votes |
def jensen_shannon_divergence(P, Q): if np.any(P < 0) or np.any(Q < 0): raise ValueError('Negative values.') if len(P) != len(Q): raise ValueError('Non equal size.') P_ = P / np.sum(P) # Ensure probabilities. Q_ = Q / np.sum(Q) e1 = entropy(P_, base=2) e2 = entropy(Q_, base=2) e_sum = entropy((P_ + Q_) / 2.0, base=2) res = e_sum - ((e1 + e2) / 2.0) res2 = _jsdiv(P_, Q_) if not np.allclose(res, res2, atol=10e-5, rtol=0): warnings.warn('Numerical values of two JSD methods don\'t agree.') return res
Example #3
Source File: actions.py From reinvent-randomized with MIT License | 6 votes |
def _nll_stats(self, sampled_nlls, validation_nlls, training_nlls): self._add_histogram("nll_plot/sampled", sampled_nlls) self._add_histogram("nll_plot/validation", validation_nlls) self._add_histogram("nll_plot/training", training_nlls) self._add_scalars("nll/avg", { "sampled": sampled_nlls.mean(), "validation": validation_nlls.mean(), "training": training_nlls.mean() }) self._add_scalars("nll/var", { "sampled": sampled_nlls.var(), "validation": validation_nlls.var(), "training": training_nlls.var() }) def jsd(dists): min_size = min(len(dist) for dist in dists) dists = [dist[:min_size] for dist in dists] num_dists = len(dists) avg_dist = np.sum(dists, axis=0) / num_dists return np.sum([sps.entropy(dist, avg_dist) for dist in dists]) / num_dists self._add_scalar("nll_plot/jsd_joined", jsd([sampled_nlls, training_nlls, validation_nlls]))
Example #4
Source File: entropy.py From netrd with MIT License | 6 votes |
def js_divergence(P, Q): """Jensen-Shannon divergence between `P` and `Q`. Parameters ---------- P, Q (np.ndarray) Two discrete distributions represented as 1D arrays. They are assumed to have the same support Returns ------- float The Jensen-Shannon divergence between `P` and `Q`. """ M = 0.5 * (P + Q) return 0.5 * (sp_entropy(P, M, base=2) + sp_entropy(Q, M, base=2))
Example #5
Source File: _paga.py From scanpy with BSD 3-Clause "New" or "Revised" License | 6 votes |
def paga_expression_entropies(adata) -> List[float]: """Compute the median expression entropy for each node-group. Parameters ---------- adata : AnnData Annotated data matrix. Returns ------- Entropies of median expressions for each node. """ from scipy.stats import entropy groups_order, groups_masks = _utils.select_groups( adata, key=adata.uns['paga']['groups'] ) entropies = [] for mask in groups_masks: X_mask = adata.X[mask].todense() x_median = np.nanmedian(X_mask, axis=1,overwrite_input=True) x_probs = (x_median - np.nanmin(x_median)) / (np.nanmax(x_median) - np.nanmin(x_median)) entropies.append(entropy(x_probs)) return entropies
Example #6
Source File: test_distributions.py From GraphicDesignPatternByPython with MIT License | 6 votes |
def test_genextreme_entropy(): # regression test for gh-5181 euler_gamma = 0.5772156649015329 h = stats.genextreme.entropy(-1.0) assert_allclose(h, 2*euler_gamma + 1, rtol=1e-14) h = stats.genextreme.entropy(0) assert_allclose(h, euler_gamma + 1, rtol=1e-14) h = stats.genextreme.entropy(1.0) assert_equal(h, 1) h = stats.genextreme.entropy(-2.0, scale=10) assert_allclose(h, euler_gamma*3 + np.log(10) + 1, rtol=1e-14) h = stats.genextreme.entropy(10) assert_allclose(h, -9*euler_gamma + 1, rtol=1e-14) h = stats.genextreme.entropy(-10) assert_allclose(h, 11*euler_gamma + 1, rtol=1e-14)
Example #7
Source File: selection.py From StageDP with MIT License | 6 votes |
def select(self, features, freq_table): """ Select features via some criteria :type features: dict :param features: features vocab :type freq_table: 2-D numpy.array :param freq_table: frequency table with rows as features, columns as frequency values """ if self.method == 'frequency': feat_vals = self.frequency(features, freq_table) elif self.method == 'entropy': feat_vals = self.entropy(features, freq_table) elif self.method == 'freq-entropy': feat_vals = self.freq_entropy(features, freq_table) else: raise KeyError("Unrecognized method") new_features = self.rank(feat_vals) return new_features
Example #8
Source File: qgan.py From qiskit-aqua with Apache License 2.0 | 6 votes |
def _run(self): """ Run qGAN training Returns: dict: with generator(discriminator) parameters & loss, relative entropy Raises: AquaError: invalid backend """ if self._quantum_instance.backend_name == ('unitary_simulator' or 'clifford_simulator'): raise AquaError( 'Chosen backend not supported - ' 'Set backend either to statevector_simulator, qasm_simulator' ' or actual quantum hardware') self.train() return self._ret
Example #9
Source File: disagreement.py From modAL with MIT License | 6 votes |
def consensus_entropy(committee: BaseCommittee, X: modALinput, **predict_proba_kwargs) -> np.ndarray: """ Calculates the consensus entropy for the Committee. First it computes the class probabilties of X for each learner in the Committee, then calculates the consensus probability distribution by averaging the individual class probabilities for each learner. The entropy of the consensus probability distribution is the vote entropy of the Committee, which is returned. Args: committee: The :class:`modAL.models.BaseCommittee` instance for which the consensus entropy is to be calculated. X: The data for which the consensus entropy is to be calculated. **predict_proba_kwargs: Keyword arguments for the :meth:`predict_proba` of the Committee. Returns: Consensus entropy of the Committee for the samples in X. """ try: proba = committee.predict_proba(X, **predict_proba_kwargs) except NotFittedError: return np.zeros(shape=(X.shape[0],)) entr = np.transpose(entropy(np.transpose(proba))) return entr
Example #10
Source File: uncertainty.py From modAL with MIT License | 6 votes |
def classifier_entropy(classifier: BaseEstimator, X: modALinput, **predict_proba_kwargs) -> np.ndarray: """ Entropy of predictions of the for the provided samples. Args: classifier: The classifier for which the prediction entropy is to be measured. X: The samples for which the prediction entropy is to be measured. **predict_proba_kwargs: Keyword arguments to be passed for the :meth:`predict_proba` of the classifier. Returns: Entropy of the class probabilities. """ try: classwise_uncertainty = classifier.predict_proba(X, **predict_proba_kwargs) except NotFittedError: return np.zeros(shape=(X.shape[0], )) return np.transpose(entropy(np.transpose(classwise_uncertainty)))
Example #11
Source File: core_tests.py From modAL with MIT License | 6 votes |
def test_vote_entropy(self): for n_samples in range(1, 10): for n_classes in range(1, 10): for true_query_idx in range(n_samples): # 1. fitted committee vote_return = np.zeros(shape=(n_samples, n_classes), dtype=np.int16) vote_return[true_query_idx] = np.asarray(range(n_classes), dtype=np.int16) committee = mock.MockCommittee(classes_=np.asarray(range(n_classes)), vote_return=vote_return) vote_entr = modAL.disagreement.vote_entropy( committee, np.random.rand(n_samples, n_classes) ) true_entropy = np.zeros(shape=(n_samples, )) true_entropy[true_query_idx] = entropy(np.ones(n_classes)/n_classes) np.testing.assert_array_almost_equal(vote_entr, true_entropy) # 2. unfitted committee committee = mock.MockCommittee(fitted=False) true_entropy = np.zeros(shape=(n_samples,)) vote_entr = modAL.disagreement.vote_entropy( committee, np.random.rand(n_samples, n_classes) ) np.testing.assert_almost_equal(vote_entr, true_entropy)
Example #12
Source File: core_tests.py From modAL with MIT License | 6 votes |
def test_consensus_entropy(self): for n_samples in range(1, 10): for n_classes in range(2, 10): for true_query_idx in range(n_samples): # 1. fitted committee proba = np.zeros(shape=(n_samples, n_classes)) proba[:, 0] = 1.0 proba[true_query_idx] = np.ones(n_classes)/n_classes committee = mock.MockCommittee(predict_proba_return=proba) consensus_entropy = modAL.disagreement.consensus_entropy( committee, np.random.rand(n_samples, n_classes) ) true_entropy = np.zeros(shape=(n_samples,)) true_entropy[true_query_idx] = entropy(np.ones(n_classes) / n_classes) np.testing.assert_array_almost_equal(consensus_entropy, true_entropy) # 2. unfitted committee committee = mock.MockCommittee(fitted=False) true_entropy = np.zeros(shape=(n_samples,)) consensus_entropy = modAL.disagreement.consensus_entropy( committee, np.random.rand(n_samples, n_classes) ) np.testing.assert_almost_equal(consensus_entropy, true_entropy)
Example #13
Source File: evaluation_metrics.py From PointFlow with MIT License | 5 votes |
def entropy_of_occupancy_grid(pclouds, grid_resolution, in_sphere=False, verbose=False): """Given a collection of point-clouds, estimate the entropy of the random variables corresponding to occupancy-grid activation patterns. Inputs: pclouds: (numpy array) #point-clouds x points per point-cloud x 3 grid_resolution (int) size of occupancy grid that will be used. """ epsilon = 10e-4 bound = 0.5 + epsilon if abs(np.max(pclouds)) > bound or abs(np.min(pclouds)) > bound: if verbose: warnings.warn('Point-clouds are not in unit cube.') if in_sphere and np.max(np.sqrt(np.sum(pclouds ** 2, axis=2))) > bound: if verbose: warnings.warn('Point-clouds are not in unit sphere.') grid_coordinates, _ = unit_cube_grid_point_cloud(grid_resolution, in_sphere) grid_coordinates = grid_coordinates.reshape(-1, 3) grid_counters = np.zeros(len(grid_coordinates)) grid_bernoulli_rvars = np.zeros(len(grid_coordinates)) nn = NearestNeighbors(n_neighbors=1).fit(grid_coordinates) for pc in pclouds: _, indices = nn.kneighbors(pc) indices = np.squeeze(indices) for i in indices: grid_counters[i] += 1 indices = np.unique(indices) for i in indices: grid_bernoulli_rvars[i] += 1 acc_entropy = 0.0 n = float(len(pclouds)) for g in grid_bernoulli_rvars: if g > 0: p = float(g) / n acc_entropy += entropy([p, 1.0 - p]) return acc_entropy / len(grid_counters), grid_counters
Example #14
Source File: entropy.py From gwin with GNU General Public License v3.0 | 5 votes |
def kl(samples1, samples2, pdf1=False, pdf2=False, bins=30, hist_min=None, hist_max=None): """ Computes the Kullback-Leibler divergence for a single parameter from two distributions. Parameters ---------- samples1 : numpy.array Samples or probability density function (must also set `pdf1=True`). samples2 : numpy.array Samples or probability density function (must also set `pdf2=True`). pdf1 : bool Set to `True` if `samples1` is a probability density funtion already. pdf2 : bool Set to `True` if `samples2` is a probability density funtion already. bins : int Number of bins to use when calculating probability density function from a set of samples of the distribution. hist_min : numpy.float64 Minimum of the distributions' values to use. hist_max : numpy.float64 Maximum of the distributions' values to use. Returns ------- numpy.float64 The Kullback-Leibler divergence value. """ hist_range = (hist_min, hist_max) if not pdf1: samples1, _ = numpy.histogram(samples1, bins=bins, range=hist_range, normed=True) if not pdf2: samples2, _ = numpy.histogram(samples2, bins=bins, range=hist_range, normed=True) return stats.entropy(samples1, qk=samples2)
Example #15
Source File: feature_squeezing.py From EvadeML-Zoo with MIT License | 5 votes |
def kl(x1, x2): assert x1.shape == x2.shape # x1_2d, x2_2d = reshape_2d(x1), reshape_2d(x2) # Transpose to [?, #num_examples] x1_2d_t = x1.transpose() x2_2d_t = x2.transpose() # pdb.set_trace() e = entropy(x1_2d_t, x2_2d_t) e[np.where(e==np.inf)] = 2 return e
Example #16
Source File: toy_world_state.py From mcts with BSD 2-Clause "Simplified" License | 5 votes |
def reward(self, parent, action): if (self.pos == self.world.goal).all(): print("g", end="") return 100 else: reward = -1 if self.world.information_gain: for a in self.actions: reward += entropy(parent.belief[a], self.belief[a]) return reward
Example #17
Source File: TestCode.py From aktaion with Apache License 2.0 | 5 votes |
def H(data, iterator=range_bytes): if not data: return 0 entropy = 0 for x in iterator(): p_x = float(data.count(chr(x)))/len(data) if p_x > 0: entropy += - p_x*math.log(p_x, 2) return entropy #def main (): # for row in fileinput.input(): # string = row.rstrip('\n') # print ("%s: %f" % (string, H(string, range_printable)))
Example #18
Source File: posterior_utils.py From scVI with MIT License | 5 votes |
def entropy_from_indices(indices): return entropy(np.array(np.unique(indices, return_counts=True)[1].astype(np.int32)))
Example #19
Source File: qgan.py From qiskit-aqua with Apache License 2.0 | 5 votes |
def get_rel_entr(self) -> float: """ Get relative entropy between target and trained distribution """ samples_gen, prob_gen = self._generator.get_output(self._quantum_instance) temp = np.zeros(len(self._grid_elements)) for j, sample in enumerate(samples_gen): for i, element in enumerate(self._grid_elements): if sample == element: temp[i] += prob_gen[j] prob_gen = temp prob_gen = [1e-8 if x == 0 else x for x in prob_gen] rel_entr = entropy(prob_gen, self._prob_data) return rel_entr
Example #20
Source File: qgan.py From qiskit-aqua with Apache License 2.0 | 5 votes |
def rel_entr(self) -> List[float]: """ Returns relative entropy between target and trained distribution """ return self._rel_entr
Example #21
Source File: qgan.py From qiskit-aqua with Apache License 2.0 | 5 votes |
def tol_rel_ent(self, t): """ Set tolerance for relative entropy Args: t (float): or None, Set tolerance level for relative entropy. If the training achieves relative entropy equal or lower than tolerance it finishes. """ self._tol_rel_ent = t
Example #22
Source File: qgan.py From qiskit-aqua with Apache License 2.0 | 5 votes |
def tol_rel_ent(self): """ Returns tolerance for relative entropy """ return self._tol_rel_ent
Example #23
Source File: selection.py From StageDP with MIT License | 5 votes |
def test(): vocab = {'hello': 0, 'data': 1, 'computer': 2} freq_table = [[23, 23, 23, 23], [23, 1, 4, 5], [1, 34, 1, 1]] freq_table = numpy.array(freq_table) fs = FeatureSelector(topn=2, method='freq-entropy') newvocab = fs.select(vocab, freq_table) print(newvocab)
Example #24
Source File: selection.py From StageDP with MIT License | 5 votes |
def freq_entropy(self, features, freq_table): """ """ feat_vals = {} feat_freqs = self.frequency(features, freq_table) feat_ents = self.entropy(features, freq_table) for feat in features.keys(): freq = feat_freqs[feat] ent = feat_ents[feat] feat_vals[feat] = numpy.log(freq + 1e-3) * (ent + 1e-3) return feat_vals
Example #25
Source File: selection.py From StageDP with MIT License | 5 votes |
def entropy(self, features, freq_table): """ """ feat_vals = {} for (feat, idx) in features.items(): freq = freq_table[idx, :] feat_vals[feat] = 1 / (entropy(freq) + 1e-3) return feat_vals
Example #26
Source File: posterior_utils.py From scVI with MIT License | 5 votes |
def entropy_batch_mixing( latent_space, batches, n_neighbors=50, n_pools=50, n_samples_per_pool=100 ): def entropy(hist_data): n_batches = len(np.unique(hist_data)) if n_batches > 2: raise ValueError("Should be only two clusters for this metric") frequency = np.mean(hist_data == 1) if frequency == 0 or frequency == 1: return 0 return -frequency * np.log(frequency) - (1 - frequency) * np.log(1 - frequency) n_neighbors = min(n_neighbors, len(latent_space) - 1) nne = NearestNeighbors(n_neighbors=1 + n_neighbors, n_jobs=8) nne.fit(latent_space) kmatrix = nne.kneighbors_graph(latent_space) - scipy.sparse.identity( latent_space.shape[0] ) score = 0 for t in range(n_pools): indices = np.random.choice( np.arange(latent_space.shape[0]), size=n_samples_per_pool ) score += np.mean( [ entropy( batches[ kmatrix[indices].nonzero()[1][ kmatrix[indices].nonzero()[0] == i ] ] ) for i in range(n_samples_per_pool) ] ) return score / float(n_pools)
Example #27
Source File: test_distributions.py From GraphicDesignPatternByPython with MIT License | 5 votes |
def test_entropy(self): assert_allclose(self.norm_template.entropy(), stats.norm.entropy(loc=1.0, scale=2.5), rtol=0.05)
Example #28
Source File: infotheo.py From vnpy_crypto with MIT License | 5 votes |
def bitstonats(X): """ Converts from bits to nats """ return logbasechange(2, np.e) * X #TODO: make this entropy, and then have different measures as #a method
Example #29
Source File: test_distributions.py From GraphicDesignPatternByPython with MIT License | 5 votes |
def test_entropy_2d_zero(self): pk = [[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]] qk = [[0.0, 0.1], [0.3, 0.6], [0.5, 0.3]] assert_array_almost_equal(stats.entropy(pk, qk), [np.inf, 0.18609809]) pk[0][0] = 0.0 assert_array_almost_equal(stats.entropy(pk, qk), [0.17403988, 0.18609809])
Example #30
Source File: ir2tagsets.py From plastering with MIT License | 5 votes |
def ir2tagset_al_query_entropy(self, target_prob_mat, #target_prob, target_srcids, learning_srcids, target_building, inc_num ): assert len(target_srcids) == target_prob_mat.shape[0] entropies = get_entropy(target_prob_mat.T) sorted_entropies = sorted([(srcid, ent) for srcid, ent in zip(target_srcids, entropies)], key=itemgetter(1)) cluster_dict = self.building_cluster_dict[target_building] added_cids = [] todo_srcids = [] new_srcid_cnt = 0 for srcid, ent in sorted_entropies: if srcid in learning_srcids: continue the_cid = None for cid, cluster in cluster_dict.items(): if srcid in cluster: the_cid = cid break if the_cid in added_cids: continue added_cids.append(the_cid) todo_srcids.append(srcid) new_srcid_cnt += 1 if new_srcid_cnt == inc_num: break return todo_srcids