Python Examples of scipy.stats.mstats.gmean

Source File: stacking_predict.py From argus-freesound with MIT License

6 votes

def stacking_pred(experiment_dir, stack_probs):
    print(f"Start predict: {experiment_dir}")

    pred_lst = []
    for fold in config.folds:
        print("Predict fold", fold)
        fold_dir = experiment_dir / f'fold_{fold}'
        model_path = get_best_model_path(fold_dir)
        print("Model path", model_path)
        predictor = StackPredictor(model_path, STACK_BATCH_SIZE,
                                   device=DEVICE)
        pred = predictor.predict(stack_probs)
        pred_lst.append(pred)

    preds = gmean(pred_lst, axis=0)
    return preds

Source File: stacking_predict.py From argus-freesound with MIT License

6 votes

def experiment_pred(experiment_dir, images_lst):
    print(f"Start predict: {experiment_dir}")
    transforms = get_transforms(False, CROP_SIZE)

    pred_lst = []
    for fold in config.folds:
        print("Predict fold", fold)
        fold_dir = experiment_dir / f'fold_{fold}'
        model_path = get_best_model_path(fold_dir)
        print("Model path", model_path)
        predictor = Predictor(model_path, transforms,
                              BATCH_SIZE,
                              (config.audio.n_mels, CROP_SIZE),
                              (config.audio.n_mels, CROP_SIZE//TILE_STEP),
                              device=DEVICE)

        pred = pred_test(predictor, images_lst)
        pred_lst.append(pred)

    preds = gmean(pred_lst, axis=0)
    return preds

Source File: test_mstats_basic.py From Computable with MIT License

6 votes

def test_1D(self):
        a = (1,2,3,4)
        actual = mstats.gmean(a)
        desired = np.power(1*2*3*4,1./4.)
        assert_almost_equal(actual, desired,decimal=14)

        desired1 = mstats.gmean(a,axis=-1)
        assert_almost_equal(actual, desired1, decimal=14)
        assert_(not isinstance(desired1, ma.MaskedArray))

        a = ma.array((1,2,3,4),mask=(0,0,0,1))
        actual = mstats.gmean(a)
        desired = np.power(1*2*3,1./3.)
        assert_almost_equal(actual, desired,decimal=14)

        desired1 = mstats.gmean(a,axis=-1)
        assert_almost_equal(actual, desired1, decimal=14)

Source File: test_mstats_basic.py From GraphicDesignPatternByPython with MIT License

6 votes

def test_1D(self):
        a = (1,2,3,4)
        actual = mstats.gmean(a)
        desired = np.power(1*2*3*4,1./4.)
        assert_almost_equal(actual, desired, decimal=14)

        desired1 = mstats.gmean(a,axis=-1)
        assert_almost_equal(actual, desired1, decimal=14)
        assert_(not isinstance(desired1, ma.MaskedArray))

        a = ma.array((1,2,3,4),mask=(0,0,0,1))
        actual = mstats.gmean(a)
        desired = np.power(1*2*3,1./3.)
        assert_almost_equal(actual, desired,decimal=14)

        desired1 = mstats.gmean(a,axis=-1)
        assert_almost_equal(actual, desired1, decimal=14)

Source File: eleven.py From eleven with BSD 2-Clause "Simplified" License

6 votes

def calculate_all_nfs(sample_frame, ranked_targets, ref_sample):
    """For a set of n ranked_genes, calculates normalization factors NF_1,
    NF_2, ..., NF_n. NF_i represents the normalization factor generated by
    considering the first i targets in ranked_targets.
    
    calculate_nf (which returns only NF_n) is probably more
    useful for routine analysis.

    :param DataFrame sample_frame: A sample data frame.
    :param iterable ranked_targets: A list or Series of target names, in order
        of descending stability (ascending M).
    :param string ref_sample: The name of the sample to normalize against.
    :return: a DataFrame with columns 1, 2, ..., n containing normalization
        factors NF_1, ..., NF_n for each sample, indexed by sample name.
    :rtype: DataFrame
    """

    # Returns a DataFrame, where rows represent samples and columns represent a number of reference genes.
    grouped = sample_frame.groupby(['Target', 'Sample'])['Cq'].aggregate(average_cq)
    samples = sample_frame['Sample'].unique()
    nfs = {}
    for i in xrange(1, len(ranked_targets)+1):
        nfs[i] = gmean([pow(2, -grouped.ix[zip(repeat(ref_gene), samples)] + grouped.ix[ref_gene, ref_sample]) for ref_gene in ranked_targets[:i]])
    return pd.DataFrame(nfs, index=samples)

Source File: lda.py From topic-stability with Apache License 2.0

5 votes

def __rerank_terms( self, num_terms, k, mallet_weights_path, eps = 1e-9 ):
		"""
		Implements the term re-weighting method proposed by Blei and Lafferty.
		"""
		log.debug( "Reweighting terms  ..." )
		# Parse weights for all terms and topics
		W = np.zeros( (num_terms, k) )
		with open(mallet_weights_path) as f:
			lines = [line.rstrip() for line in f]
			for l in lines:
				if len(l) == 0 or l.startswith("#"):
					continue
				parts = l.split("\t")
				if len(parts) < 3:
					continue
				topic_index = int(parts[0])
				term_index = int(parts[1])		
				W[term_index,topic_index] = float(parts[2])
		# Calculate geometric means
		gmeans = gmean( W, axis = 1 )
		# Reweight the terms
		# TODO: vectorize this
		for row in range(num_terms):
			if gmeans[row] <= eps or np.isnan( gmeans[row] ):
				continue
			for col in range(k):
				if W[row,col] <= eps:
					continue
				lp = np.log( W[row,col] / gmeans[row] )
				if np.isnan( lp ):
					continue
				W[row,col] = W[row,col] * lp
		# Get new term rankings per topic
		rankings = []
		for topic_index in range(k):
			ranking = np.argsort( W[:,topic_index] )[::-1].tolist()
			rankings.append( ranking )
		return rankings

Source File: run.py From tfdiffeq with MIT License

5 votes

def main():

    sol = dict()
    for method in ['dopri5', 'adams']:
        for tol in [1e-3, 1e-6, 1e-9]:
            print('======= {} | tol={:e} ======='.format(method, tol))
            nfes = []
            times = []
            errs = []
            for c in ['A', 'B', 'C', 'D', 'E']:
                for i in ['1', '2', '3', '4', '5']:
                    diffeq, init, _ = getattr(detest, c + i)()
                    t0, y0 = init()
                    diffeq = NFEDiffEq(diffeq)

                    if not c + i in sol:
                        sol[c + i] = odeint(
                            diffeq, y0, tf.stack([t0, tf.convert_to_tensor(20., dtype=tf.float64)]), atol=1e-12, rtol=1e-12,
                            method='dopri5'
                        )[1]
                        diffeq.nfe = 0

                    start_time = time.time()
                    est = odeint(diffeq, y0, tf.stack([t0, tf.convert_to_tensor(20., dtype=tf.float64)]), atol=tol, rtol=tol,
                                 method=method)
                    time_spent = time.time() - start_time

                    error = tf.sqrt(tf.reduce_mean((sol[c + i] - est[1])**2))

                    errs.append(error.numpy())
                    nfes.append(diffeq.nfe)
                    times.append(time_spent)

                    print('{}: NFE {} | Time {} | Err {:e}'.format(c + i, diffeq.nfe, time_spent, error.numpy()))

            print('Total NFE {} | Total Time {} | GeomAvg Error {:e}'.format(np.sum(nfes), np.sum(times), gmean(errs)))

Source File: multivariate_convolve.py From alphacsc with BSD 3-Clause "New" or "Revised" License

5 votes

def benchmark():
    n_atoms_range = [1, 2, 4, 8, 16]
    n_channels_range = [10, 20, 40, 80, 160]
    n_times_atom_range = [10, 20, 40, 80, 160]
    n_runs = (len(n_atoms_range) * len(n_channels_range) *
              len(n_times_atom_range) * len(all_func))

    k = 0
    results = []
    for n_atoms in n_atoms_range:
        for n_channels in n_channels_range:
            for n_times_atom in n_times_atom_range:
                for func in all_func:
                    print('%d/%d, %s' % (k, n_runs, func.__name__))
                    k += 1
                    results.append(
                        run_one(n_atoms, n_channels, n_times_atom, func))

    df = pd.DataFrame(results, columns=[
        'n_atoms', 'n_channels', 'n_times_atom', 'func', 'duration'
    ])
    fig, axes = plt.subplots(2, 2, figsize=(10, 8))
    axes = axes.ravel()

    def plot(index, ax):
        pivot = df.pivot_table(columns='func', index=index, values='duration',
                               aggfunc=gmean)
        pivot.plot(ax=ax)
        ax.set_xscale('log')
        ax.set_yscale('log')
        ax.set_ylabel('duration')

    plot('n_atoms', axes[0])
    plot('n_times_atom', axes[1])
    plot('n_channels', axes[2])
    plt.tight_layout()
    plt.show()

Source File: convolve_ztz.py From alphacsc with BSD 3-Clause "New" or "Revised" License

5 votes

def benchmark():
    n_atoms_range = [1, 3, 9]
    n_channels_range = [1, 25, 50, 100, 200]
    n_times_atom_range = [8, 32, 128]

    n_runs = (len(n_atoms_range) * len(n_channels_range) * len(
        n_times_atom_range) * len(all_func))

    k = 0
    results = []
    for n_atoms in n_atoms_range:
        for n_channels in n_channels_range:
            for n_times_atom in n_times_atom_range:
                for func in all_func:
                    print('%d/%d, %s' % (k, n_runs, func.__name__))
                    k += 1
                    results.append(
                        run_one(n_atoms, n_channels, n_times_atom, func))

    df = pd.DataFrame(results, columns=[
        'n_atoms', 'n_channels', 'n_times_atom', 'func', 'duration'
    ])
    fig, axes = plt.subplots(2, 2, figsize=(10, 8))
    axes = axes.ravel()

    def plot(index, ax):
        pivot = df.pivot_table(columns='func', index=index, values='duration',
                               aggfunc=gmean)
        pivot.plot(ax=ax)
        ax.set_xscale('log')
        ax.set_yscale('log')
        ax.set_ylabel('duration')

    plot('n_atoms', axes[0])
    plot('n_times_atom', axes[1])
    plot('n_channels', axes[2])
    # plot('n_times_valid', axes[3])
    plt.tight_layout()
    plt.show()

Source File: eleven.py From eleven with BSD 2-Clause "Simplified" License

5 votes

def calculate_nf(sample_frame, ref_targets, ref_sample):
    """Calculates a normalization factor from the geometric mean of the
    expression of all ref_targets, normalized to a reference sample.

    :param DataFrame sample_frame: A sample data frame.
    :param iterable ref_targets: A list or Series of target names.
    :param string ref_sample: The name of the sample to normalize against.
    :return: a Series indexed by sample name containing normalization factors
        for each sample.
    """
    grouped = sample_frame.groupby(['Target', 'Sample'])['Cq'].aggregate(average_cq)
    samples = sample_frame['Sample'].unique()
    nfs = gmean([pow(2, -grouped.ix[zip(repeat(ref_gene), samples)] + grouped.ix[ref_gene, ref_sample]) for ref_gene in ref_targets])
    return pd.Series(nfs, index=samples)

Source File: create_phylowgs_inputs.py From phylowgs with GNU General Public License v3.0

5 votes

def impute_missing_total_reads(total_reads, missing_variant_confidence):
  # Change NaNs to masked values via SciPy.
  masked_total_reads = ma.fix_invalid(total_reads)

  # Going forward, suppose you have v variants and s samples in a v*s matrix of
  # read counts. Missing values are masked.

  # Calculate geometric mean of variant read depth in each sample. Result: s*1
  sample_means = gmean(masked_total_reads, axis=0)
  assert np.sum(sample_means <= 0) == np.sum(np.isnan(sample_means)) == 0
  # Divide every variant's read count by its mean sample read depth to get read
  # depth enrichment relative to other variants in sample. Result: v*s
  normalized_to_sample = np.dot(masked_total_reads, np.diag(1./sample_means))
  # For each variant, calculate geometric mean of its read depth enrichment
  # across samples. Result: v*1
  variant_mean_reads = gmean(normalized_to_sample, axis=1)
  assert np.sum(variant_mean_reads <= 0) == np.sum(np.isnan(variant_mean_reads)) == 0

  # Convert 1D arrays to vectors to permit matrix multiplication.
  imputed_counts = np.dot(variant_mean_reads.reshape((-1, 1)), sample_means.reshape((1, -1)))
  nan_coords = np.where(np.isnan(total_reads))
  total_reads[nan_coords] = imputed_counts[nan_coords]
  assert np.sum(total_reads <= 0) == np.sum(np.isnan(total_reads)) == 0

  total_reads[nan_coords] *= missing_variant_confidence
  return np.floor(total_reads).astype(np.int)

Source File: test_mstats_basic.py From GraphicDesignPatternByPython with MIT License

5 votes

def test_gmean(self):
        for n in self.get_n():
            x, y, xm, ym = self.generate_xy_sample(n)
            r = stats.gmean(abs(x))
            rm = stats.mstats.gmean(abs(xm))
            assert_allclose(r, rm, rtol=1e-13)

            r = stats.gmean(abs(y))
            rm = stats.mstats.gmean(abs(ym))
            assert_allclose(r, rm, rtol=1e-13)

Source File: test_mstats_basic.py From GraphicDesignPatternByPython with MIT License

5 votes

def test_2D(self):
        a = ma.array(((1, 2, 3, 4), (1, 2, 3, 4), (1, 2, 3, 4)),
                     mask=((0, 0, 0, 0), (1, 0, 0, 1), (0, 1, 1, 0)))
        actual = mstats.gmean(a)
        desired = np.array((1,2,3,4))
        assert_array_almost_equal(actual, desired, decimal=14)

        desired1 = mstats.gmean(a,axis=0)
        assert_array_almost_equal(actual, desired1, decimal=14)

        actual = mstats.gmean(a, -1)
        desired = ma.array((np.power(1*2*3*4,1./4.),
                            np.power(2*3,1./2.),
                            np.power(1*4,1./2.)))
        assert_array_almost_equal(actual, desired, decimal=14)

Source File: test_mstats_basic.py From GraphicDesignPatternByPython with MIT License

5 votes

def test_1D_float96(self):
        a = ma.array((1,2,3,4), mask=(0,0,0,1))
        actual_dt = mstats.gmean(a, dtype=np.float96)
        desired_dt = np.power(1 * 2 * 3, 1. / 3.).astype(np.float96)
        assert_almost_equal(actual_dt, desired_dt, decimal=14)
        assert_(actual_dt.dtype == desired_dt.dtype)

Source File: run.py From torchdiffeq with MIT License

5 votes

def main():

    sol = dict()
    for method in ['dopri5', 'adams']:
        for tol in [1e-3, 1e-6, 1e-9]:
            print('======= {} | tol={:e} ======='.format(method, tol))
            nfes = []
            times = []
            errs = []
            for c in ['A', 'B', 'C', 'D', 'E']:
                for i in ['1', '2', '3', '4', '5']:
                    diffeq, init, _ = getattr(detest, c + i)()
                    t0, y0 = init()
                    diffeq = NFEDiffEq(diffeq)

                    if not c + i in sol:
                        sol[c + i] = odeint(
                            diffeq, y0, torch.stack([t0, torch.tensor(20.)]), atol=1e-12, rtol=1e-12, method='dopri5'
                        )[1]
                        diffeq.nfe = 0

                    start_time = time.time()
                    est = odeint(diffeq, y0, torch.stack([t0, torch.tensor(20.)]), atol=tol, rtol=tol, method=method)
                    time_spent = time.time() - start_time

                    error = torch.sqrt(torch.mean((sol[c + i] - est[1])**2))

                    errs.append(error.item())
                    nfes.append(diffeq.nfe)
                    times.append(time_spent)

                    print('{}: NFE {} | Time {} | Err {:e}'.format(c + i, diffeq.nfe, time_spent, error.item()))

            print('Total NFE {} | Total Time {} | GeomAvg Error {:e}'.format(np.sum(nfes), np.sum(times), gmean(errs)))

Source File: test_mstats_basic.py From Computable with MIT License

5 votes

def test_2D(self):
        a = ma.array(((1,2,3,4),(1,2,3,4),(1,2,3,4)),
                     mask=((0,0,0,0),(1,0,0,1),(0,1,1,0)))
        actual = mstats.gmean(a)
        desired = np.array((1,2,3,4))
        assert_array_almost_equal(actual, desired, decimal=14)
        #
        desired1 = mstats.gmean(a,axis=0)
        assert_array_almost_equal(actual, desired1, decimal=14)
        #
        actual = mstats.gmean(a, -1)
        desired = ma.array((np.power(1*2*3*4,1./4.),
                            np.power(2*3,1./2.),
                            np.power(1*4,1./2.)))
        assert_array_almost_equal(actual, desired, decimal=14)

Source File: utils.py From argus-freesound with MIT License

5 votes

def gmean_preds_blend(probs_df_lst):
    blend_df = probs_df_lst[0]
    blend_values = np.stack([df.loc[blend_df.index.values].values
                             for df in probs_df_lst], axis=0)
    blend_values = gmean(blend_values, axis=0)

    blend_df.values[:] = blend_values
    return blend_df

Source File: multivariate_convolve_2.py From alphacsc with BSD 3-Clause "New" or "Revised" License

4 votes

def benchmark():
    n_atoms_range = [1, 10, 100]
    sparsity_range = np.logspace(-4, -1, 5)
    n_times_atom_range = [10, 40, 160]
    n_times_valid_range = [200, 800, 3200]

    n_runs = (len(n_atoms_range) * len(sparsity_range) * len(
        n_times_atom_range) * len(n_times_valid_range) * len(all_func))

    k = 0
    results = []
    for n_atoms in n_atoms_range:
        for sparsity in sparsity_range:
            for n_times_atom in n_times_atom_range:
                for n_times_valid in n_times_valid_range:
                    for func in all_func:
                        print('%d/%d, %s' % (k, n_runs, func.__name__))
                        k += 1
                        results.append(
                            run_one(n_atoms, sparsity, n_times_atom,
                                    n_times_valid, func))

    df = pd.DataFrame(results, columns=[
        'n_atoms', 'sparsity', 'n_times_atom', 'n_times_valid', 'func',
        'duration'
    ])
    fig, axes = plt.subplots(2, 2, figsize=(10, 8))
    axes = axes.ravel()

    def plot(index, ax):
        pivot = df.pivot_table(columns='func', index=index, values='duration',
                               aggfunc=gmean)
        pivot.plot(ax=ax)
        ax.set_xscale('log')
        ax.set_yscale('log')
        ax.set_ylabel('duration')

    plot('n_atoms', axes[0])
    plot('n_times_atom', axes[1])
    plot('sparsity', axes[2])
    plot('n_times_valid', axes[3])
    plt.tight_layout()
    plt.show()

Source File: compute_ztz.py From alphacsc with BSD 3-Clause "New" or "Revised" License

4 votes

def benchmark():
    n_atoms_range = [1, 3, 9]
    sparsity_range = [0.01, 0.03, 0.1, 0.3]
    n_times_atom_range = [8, 32, 128]
    n_times_valid_range = [1000, 30000, 10000]

    n_runs = (len(n_atoms_range) * len(sparsity_range) * len(
        n_times_atom_range) * len(n_times_valid_range) * len(all_func))

    k = 0
    results = []
    for n_atoms in n_atoms_range:
        for sparsity in sparsity_range:
            for n_times_atom in n_times_atom_range:
                for n_times_valid in n_times_valid_range:
                    for func in all_func:
                        print('%d/%d, %s' % (k, n_runs, func.__name__))
                        k += 1
                        results.append(
                            run_one(n_atoms, sparsity, n_times_atom,
                                    n_times_valid, func))

    df = pd.DataFrame(results, columns=[
        'n_atoms', 'sparsity', 'n_times_atom', 'n_times_valid', 'func',
        'duration'
    ])
    fig, axes = plt.subplots(2, 2, figsize=(10, 8))
    axes = axes.ravel()

    def plot(index, ax):
        pivot = df.pivot_table(columns='func', index=index, values='duration',
                               aggfunc=gmean)
        pivot.plot(ax=ax)
        ax.set_xscale('log')
        ax.set_yscale('log')
        ax.set_ylabel('duration')

    plot('n_atoms', axes[0])
    plot('n_times_atom', axes[1])
    plot('sparsity', axes[2])
    plot('n_times_valid', axes[3])
    plt.tight_layout()
    plt.show()

Source File: compute_ztX.py From alphacsc with BSD 3-Clause "New" or "Revised" License

4 votes

def benchmark():
    n_atoms_range = [1, 4, 16]
    sparsity_range = np.logspace(-4, -1, 5)
    n_times_atom_range = [10, 40, 160]
    n_times_valid_range = [200, 800, 3200]

    n_runs = (len(n_atoms_range) * len(sparsity_range) * len(
        n_times_atom_range) * len(n_times_valid_range) * len(all_func))

    k = 0
    results = []
    for n_atoms in n_atoms_range:
        for sparsity in sparsity_range:
            for n_times_atom in n_times_atom_range:
                for n_times_valid in n_times_valid_range:
                    for func in all_func:
                        print('%d/%d, %s' % (k, n_runs, func.__name__))
                        k += 1
                        results.append(
                            run_one(n_atoms, sparsity, n_times_atom,
                                    n_times_valid, func))

    df = pd.DataFrame(results, columns=[
        'n_atoms', 'sparsity', 'n_times_atom', 'n_times_valid', 'func',
        'duration'
    ])
    fig, axes = plt.subplots(2, 2, figsize=(10, 8))
    axes = axes.ravel()

    def plot(index, ax):
        pivot = df.pivot_table(columns='func', index=index, values='duration',
                               aggfunc=gmean)
        pivot.plot(ax=ax)
        ax.set_xscale('log')
        ax.set_yscale('log')
        ax.set_ylabel('duration')

    plot('n_atoms', axes[0])
    plot('n_times_atom', axes[1])
    plot('sparsity', axes[2])
    plot('n_times_valid', axes[3])
    plt.tight_layout()
    plt.show()

Python scipy.stats.mstats.gmean() Examples