Python scipy.stats() Examples

The following are 30 code examples of scipy.stats(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module scipy , or try the search function .
Example #1
Source File: cookbook.py    From PyDDM with MIT License 6 votes vote down vote up
def apply(self, solution):
        # Make sure params are within range
        assert self.ndsigma > 0, "Invalid st parameter"
        # Extract components of the solution object for convenience
        corr = solution.corr
        err = solution.err
        dt = solution.model.dt
        # Create the weights for different timepoints
        times = np.asarray(list(range(-len(corr), len(corr))))*dt
        weights = scipy.stats.norm(scale=self.ndsigma, loc=self.nondectime).pdf(times)
        if np.sum(weights) > 0:
            weights /= np.sum(weights) # Ensure it integrates to 1
        newcorr = np.convolve(weights, corr, mode="full")[len(corr):(2*len(corr))]
        newerr = np.convolve(weights, err, mode="full")[len(corr):(2*len(corr))]
        return Solution(newcorr, newerr, solution.model,
                        solution.conditions, solution.undec)
# End OverlayNonDecisionGaussian

# Start OverlayNonDecisionLR 
Example #2
Source File: sampler.py    From batchflow with Apache License 2.0 6 votes vote down vote up
def _get_method_by_alias(alias, module, tf_distributions=None):
    """ Fetch fullname of a randomizer from ``scipy.stats``, ``tensorflow`` or
    ``numpy`` by its alias or fullname.
    """
    rnd_submodules = {'np': np.random,
                      'tf': tf_distributions,
                      'ss': ss}
    # fetch fullname
    fullname = ALIASES.get(alias, {module: alias for module in ['np', 'tf', 'ss']}).get(module, None)
    if fullname is None:
        raise ValueError("Distribution %s has no implementaion in module %s" % (alias, module))

    # check that the randomizer is implemented in corresponding module
    if not hasattr(rnd_submodules[module], fullname):
        raise ValueError("Distribution %s has no implementaion in module %s" % (fullname, module))

    return fullname 
Example #3
Source File: linear_model.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def get_influence(self):
        """
        get an instance of Influence with influence and outlier measures

        Returns
        -------
        infl : Influence instance
            the instance has methods to calculate the main influence and
            outlier measures for the OLS regression

        See also
        --------
        statsmodels.stats.outliers_influence.OLSInfluence
        """
        from statsmodels.stats.outliers_influence import OLSInfluence
        return OLSInfluence(self) 
Example #4
Source File: test_analytics.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_kurt(self):
        from scipy.stats import kurtosis
        alt = lambda x: kurtosis(x, bias=False)
        self._check_stat_op('kurt', alt)

        index = MultiIndex(levels=[['bar'], ['one', 'two', 'three'], [0, 1]],
                           labels=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2],
                                   [0, 1, 0, 1, 0, 1]])
        s = Series(np.random.randn(6), index=index)
        tm.assert_almost_equal(s.kurt(), s.kurt(level=0)['bar'])

        # test corner cases, kurt() returns NaN unless there's at least 4
        # values
        min_N = 4
        for i in range(1, min_N + 1):
            s = Series(np.ones(i))
            df = DataFrame(np.ones((i, i)))
            if i < min_N:
                assert np.isnan(s.kurt())
                assert np.isnan(df.kurt()).all()
            else:
                assert 0 == s.kurt()
                assert (df.kurt() == 0).all() 
Example #5
Source File: gaussian_fitting.py    From FRETBursts with GNU General Public License v2.0 6 votes vote down vote up
def test_gaussian_fit():
    m0 = 0.1
    s0 = 0.4
    size = 500

    s = R.normal(size=size, loc=m0, scale=s0)
    #s = s[s<0.4]
    mu, sig = gaussian_fit(s)
    mu1, sig1 = S.norm.fit(s)
    mu2, sig2 = gaussian_fit_ml(s)

    print("ECDF ", mu, sig)
    print("ML         ", mu1, sig1)
    print("ML (manual)", mu2, sig2)

    H = np.histogram(s, bins=20, density=True)
    h = H[0]
    bw = H[1][1] - H[1][0]
    #bins_c = H[1][:-1]+0.5*bw
    bar(H[1][:-1], H[0], bw, alpha=0.3)

    x = np.r_[s.min()-1:s.max()+1:200j]
    plot(x, normpdf(x,m0,s0), lw=2, color='grey')
    plot(x, normpdf(x,mu,sig), lw=2, color='r', alpha=0.5)
    plot(x, normpdf(x,mu1,sig1), lw=2, color='b', alpha=0.5) 
Example #6
Source File: anova.py    From DIVE-backend with GNU General Public License v3.0 6 votes vote down vote up
def run_anova(df, independent_variables_names, dependent_variables_names, NUM_GROUPS_CUTOFF=15):
    '''
    Returns either a dictionary with the anova stats are an empty list (if the anova test
    is not valid)
    df : dataframe
    independent_variables : list of independent_variable's, where each independent_variable is of form [type, name, num_bins (0 means will be treated as continuous)]
    depedendent_variables : list of dependent_variable's, where each dependent_variable is of form [type, name]
    '''
    num_independent_variables = len(independent_variables_names)
    num_dependent_variables = len(dependent_variables_names)

    transformed_data = add_binned_columns_to_df(df, independent_variables_names, dependent_variables_names)
    if num_dependent_variables == 1:
        first_dependent_variable = dependent_variables_names[0]
        return anova(transformed_data, independent_variables_names, first_dependent_variable)

    return [] 
Example #7
Source File: test_nonparametric.py    From pingouin with GNU General Public License v3.0 6 votes vote down vote up
def test_mwu(self):
        """Test function mwu"""
        mwu_scp = scipy.stats.mannwhitneyu(x, y, use_continuity=True,
                                           alternative='two-sided')
        mwu_pg = mwu(x, y, tail='two-sided')
        # Similar to R: wilcox.test(df$x, df$y, paired = FALSE, exact = FALSE)
        # Note that the RBC value are compared to JASP in test_pairwise.py
        assert mwu_scp[0] == mwu_pg.at['MWU', 'U-val']
        assert mwu_scp[1] == mwu_pg.at['MWU', 'p-val']
        # One-sided
        assert np.median(x) > np.median(y)  # Tail = greater, x > y
        assert (mwu(x, y, tail='one-sided').at['MWU', 'p-val'] ==
                mwu(x, y, tail='greater').at['MWU', 'p-val'])
        assert (mwu(x, y, tail='less').at['MWU', 'p-val'] ==
                scipy.stats.mannwhitneyu(x, y, use_continuity=True,
                                         alternative='less')[1]) 
Example #8
Source File: test_nonparametric.py    From pingouin with GNU General Public License v3.0 6 votes vote down vote up
def test_wilcoxon(self):
        """Test function wilcoxon"""
        # R: wilcox.test(df$x, df$y, paired = TRUE, exact = FALSE)
        # The V value is slightly different between SciPy and R
        # The p-value, however, is almost identical
        wc_scp = scipy.stats.wilcoxon(x2, y2, correction=True)
        wc_pg = wilcoxon(x2, y2, tail='two-sided')
        assert wc_scp[0] == wc_pg.at['Wilcoxon', 'W-val'] == 20.5  # JASP
        assert wc_scp[1] == wc_pg.at['Wilcoxon', 'p-val']
        wc_pg_less = wilcoxon(x2, y2, tail='less')
        wc_pg_greater = wilcoxon(x2, y2, tail='greater')
        wc_pg_ones = wilcoxon(x2, y2, tail='one-sided')
        pd.testing.assert_frame_equal(wc_pg_ones, wc_pg_less)
        # Note that the RBC value are compared to JASP in test_pairwise.py
        # The RBC values in JASP does not change according to the tail.
        assert round(wc_pg.at['Wilcoxon', 'RBC'], 3) == -0.379
        assert round(wc_pg_less.at['Wilcoxon', 'RBC'], 3) == -0.379
        assert round(wc_pg_greater.at['Wilcoxon', 'RBC'], 3) == -0.379
        # CLES is compared to:
        # https://janhove.github.io/reporting/2016/11/16/common-language-effect-sizes
        assert round(wc_pg.at['Wilcoxon', 'CLES'], 3) == 0.396
        assert round(wc_pg_less.at['Wilcoxon', 'CLES'], 3) == 0.604
        assert round(wc_pg_greater.at['Wilcoxon', 'CLES'], 3) == 0.396 
Example #9
Source File: gaussian_fitting.py    From FRETBursts with GNU General Public License v2.0 6 votes vote down vote up
def gaussian2d_fit(sx, sy, guess=[0.5,1]):
    """2D-Gaussian fit of samples S using a fit to the empirical CDF."""
    assert sx.size == sy.size

    ## Empirical CDF
    ecdfx = [np.sort(sx), np.arange(0.5,sx.size+0.5)*1./sx.size]
    ecdfy = [np.sort(sy), np.arange(0.5,sy.size+0.5)*1./sy.size]

    ## Analytical gaussian CDF
    gauss_cdf = lambda x, mu, sigma: 0.5*(1+erf((x-mu)/(np.sqrt(2)*sigma)))

    ## Fitting the empirical CDF
    fitfunc = lambda p, x: gauss_cdf(x, p[0], p[1])
    errfunc = lambda p, x, y: fitfunc(p, x) - y
    px,v = leastsq(errfunc, x0=guess, args=(ecdfx[0],ecdfx[1]))
    py,v = leastsq(errfunc, x0=guess, args=(ecdfy[0],ecdfy[1]))
    print("2D Gaussian CDF fit", px, py)

    mux, sigmax = px[0], px[1]
    muy, sigmay = py[0], py[1]
    return mux, sigmax, muy, sigmay 
Example #10
Source File: eval.py    From woe with MIT License 6 votes vote down vote up
def wald_test(model,X):
    '''
    :param model: a model file that should have predict_proba() function
    :param X: dataset features DataFrame
    :return: the value of wald_stats,p_value
    '''
    pred_probs = np.matrix(model.predict_proba(X))
    X_design = np.hstack((np.ones(shape=(X.shape[0], 1)), X))
    diag_array = np.multiply(pred_probs[:, 0], pred_probs[:, 1]).A1
    V = scipy.sparse.diags(diag_array)
    m1 = X_design.T * V
    m2 = m1.dot(X_design)
    cov_mat = np.linalg.inv(m2)

    model_params = np.hstack((model.intercept_[0], model.coef_[0]))
    wald_stats = (model_params / np.sqrt(np.diag(cov_mat))) ** 2

    wald = scipy.stats.wald()
    p_value = wald.pdf(wald_stats)

    return wald_stats,p_value 
Example #11
Source File: eval.py    From woe with MIT License 6 votes vote down vote up
def wald_test(model,X):
    '''
    :param model: a model file that should have predict_proba() function
    :param X: dataset features DataFrame
    :return: the value of wald_stats,p_value
    '''
    pred_probs = np.matrix(model.predict_proba(X))
    X_design = np.hstack((np.ones(shape=(X.shape[0], 1)), X))
    diag_array = np.multiply(pred_probs[:, 0], pred_probs[:, 1]).A1
    V = scipy.sparse.diags(diag_array)
    m1 = X_design.T * V
    m2 = m1.dot(X_design)
    cov_mat = np.linalg.inv(m2)

    model_params = np.hstack((model.intercept_[0], model.coef_[0]))
    wald_stats = (model_params / np.sqrt(np.diag(cov_mat))) ** 2

    wald = scipy.stats.wald()
    p_value = wald.pdf(wald_stats)

    return wald_stats,p_value 
Example #12
Source File: parameters.py    From imgaug with MIT License 6 votes vote down vote up
def _draw_samples(self, size, random_state):
        # pylint: disable=invalid-name
        loc = self.loc.draw_sample(random_state=random_state)
        scale = self.scale.draw_sample(random_state=random_state)
        low = self.low.draw_sample(random_state=random_state)
        high = self.high.draw_sample(random_state=random_state)
        seed = random_state.generate_seed_()
        if low > high:
            low, high = high, low
        assert scale >= 0, "Expected scale to be >=0, got %.4f." % (scale,)
        if scale == 0:
            return np.full(size, fill_value=loc, dtype=np.float32)
        a = (low - loc) / scale
        b = (high - loc) / scale
        tnorm = scipy.stats.truncnorm(a=a, b=b, loc=loc, scale=scale)

        # Using a seed here works with both np.random interfaces.
        # Last time tried, scipy crashed when providing just
        # random_state.generator on the new np.random interface.
        return tnorm.rvs(size=size, random_state=seed).astype(np.float32) 
Example #13
Source File: univariate_selection.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def _clean_nans(scores):
    """
    Fixes Issue #1240: NaNs can't be properly compared, so change them to the
    smallest value of scores's dtype. -inf seems to be unreliable.
    """
    # XXX where should this function be called? fit? scoring functions
    # themselves?
    scores = as_float_array(scores, copy=True)
    scores[np.isnan(scores)] = np.finfo(scores.dtype).min
    return scores


######################################################################
# Scoring functions


# The following function is a rewriting of scipy.stats.f_oneway
# Contrary to the scipy.stats.f_oneway implementation it does not
# copy the data while keeping the inputs unchanged. 
Example #14
Source File: distribution_check.py    From hydrology with GNU General Public License v3.0 6 votes vote down vote up
def plot(fcts, data):
    import matplotlib.pyplot as plt
    import numpy as np

    # plot data
    plt.hist(data, normed=True, bins=max(10, len(data)/10))

    # plot fitted probability
    for fct in fcts:
        params = eval("scipy.stats."+fct+".fit(data)")
        f = eval("scipy.stats."+fct+".freeze"+str(params))
        x = np.linspace(f.ppf(0.001), f.ppf(0.999), 500)
        plt.plot(x, f.pdf(x), lw=3, label=fct)
    plt.legend(loc='best', frameon=False)
    plt.title("Top "+str(len(fcts))+" Results")
    plt.show() 
Example #15
Source File: H_err_dp.py    From pyrad with GNU General Public License v3.0 6 votes vote down vote up
def optim(WORK,handle, minsamp, CUT1, CUT2, datatype, haplos):
    name = handle.split("/")[-1].replace(".clustS.gz","")
    D = consensus(handle, minsamp, CUT1, CUT2, datatype)
    P = makeP(D)
    Tab = table_c(D)
    del D
    #H,E = scipy.optimize.fmin(LL,x0,(P,Tab),maxiter=500,maxfun=200,ftol=0.0001,disp=False,full_output=False)
    if haplos == 1:
        x0 = [0.001]
        H = 0.
        E = scipy.optimize.fmin(LL_haploid,x0,(P,Tab),disp=False,full_output=False)
    else:
        x0 = [0.01,0.001]
        H,E = scipy.optimize.fmin(LL,x0,(P,Tab),disp=False,full_output=False)
    del Tab
    outfile = open(WORK+"stats/."+name+".temp",'w')
    outfile.write("\t".join([name.strip(".gz"),str(round(H,8))[0:10],str(round(E,8))[0:10],"\n"]))
    outfile.close()
    sys.stderr.write(".") 
Example #16
Source File: univariate_selection.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def _chisquare(f_obs, f_exp):
    """Fast replacement for scipy.stats.chisquare.

    Version from https://github.com/scipy/scipy/pull/2525 with additional
    optimizations.
    """
    f_obs = np.asarray(f_obs, dtype=np.float64)

    k = len(f_obs)
    # Reuse f_obs for chi-squared statistics
    chisq = f_obs
    chisq -= f_exp
    chisq **= 2
    with np.errstate(invalid="ignore"):
        chisq /= f_exp
    chisq = chisq.sum(axis=0)
    return chisq, special.chdtrc(k - 1, chisq) 
Example #17
Source File: test_analytics.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def test_corr_rank(self):
        import scipy
        import scipy.stats as stats

        # kendall and spearman
        A = tm.makeTimeSeries()
        B = tm.makeTimeSeries()
        A[-5:] = A[:5]
        result = A.corr(B, method='kendall')
        expected = stats.kendalltau(A, B)[0]
        tm.assert_almost_equal(result, expected)

        result = A.corr(B, method='spearman')
        expected = stats.spearmanr(A, B)[0]
        tm.assert_almost_equal(result, expected)

        # these methods got rewritten in 0.8
        if LooseVersion(scipy.__version__) < LooseVersion('0.9'):
            pytest.skip("skipping corr rank because of scipy version "
                        "{0}".format(scipy.__version__))

        # results from R
        A = Series(
            [-0.89926396, 0.94209606, -1.03289164, -0.95445587, 0.76910310, -
             0.06430576, -2.09704447, 0.40660407, -0.89926396, 0.94209606])
        B = Series(
            [-1.01270225, -0.62210117, -1.56895827, 0.59592943, -0.01680292,
             1.17258718, -1.06009347, -0.10222060, -0.89076239, 0.89372375])
        kexp = 0.4319297
        sexp = 0.5853767
        tm.assert_almost_equal(A.corr(B, method='kendall'), kexp)
        tm.assert_almost_equal(A.corr(B, method='spearman'), sexp) 
Example #18
Source File: copula.py    From pycopula with Apache License 2.0 5 votes vote down vote up
def cdf(self, x):
		self._check_dimension(x)
		tv = np.asarray([ scipy.stats.t.ppf(u, df=self.df) for u in x ])

		def fun(a, b):
			return multivariate_t_distribution(np.asarray([a, b]), np.asarray([0, 0]), self.R, self.df, self.dim)
			
		lim_0 = lambda x: -10
		lim_1 = lambda x: tv[1]
		return fun(x[0], x[1])
		#return scipy.integrate.dblquad(fun, -10, tv[0], lim_0, lim_1)[0] 
Example #19
Source File: test_analytics.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def test_corr(self, datetime_series):
        import scipy.stats as stats

        # full overlap
        tm.assert_almost_equal(datetime_series.corr(datetime_series), 1)

        # partial overlap
        tm.assert_almost_equal(datetime_series[:15].corr(datetime_series[5:]),
                               1)

        assert isna(datetime_series[:15].corr(datetime_series[5:],
                    min_periods=12))

        ts1 = datetime_series[:15].reindex(datetime_series.index)
        ts2 = datetime_series[5:].reindex(datetime_series.index)
        assert isna(ts1.corr(ts2, min_periods=12))

        # No overlap
        assert np.isnan(datetime_series[::2].corr(datetime_series[1::2]))

        # all NA
        cp = datetime_series[:10].copy()
        cp[:] = np.nan
        assert isna(cp.corr(cp))

        A = tm.makeTimeSeries()
        B = tm.makeTimeSeries()
        result = A.corr(B)
        expected, _ = stats.pearsonr(A, B)
        tm.assert_almost_equal(result, expected) 
Example #20
Source File: models.py    From pyprocessmacro with MIT License 5 votes vote down vote up
def _estimate(self):
        """
        Estimates the direct effect of X on Y, and return the results into as a dictionary.
        :return: dict
            A dictionary of parameters and model estimates.
        """
        mod_values = [i for i in product(*self._moderators_values)]
        mod_symb = self._moderators_symb
        betas, se, llci, ulci = self._get_conditional_direct_effects(
            mod_symb, mod_values
        )
        t = betas / se
        if self._is_logit:
            p = stats.norm.sf(np.abs(t)) * 2
        else:
            df_e = self._model.estimation_results["df_e"]
            p = stats.t.sf(np.abs(t), df_e) * 2
        estimation_results = {
            "betas": betas,
            "se": se,
            "t": t,
            "p": p,
            "llci": llci,
            "ulci": ulci,
        }
        return estimation_results 
Example #21
Source File: models.py    From pyprocessmacro with MIT License 5 votes vote down vote up
def model_summary(self):
        """
        The summary of the model statistics: R², F-stats, etc...
        :return: A DataFrame of model statistics
        """
        results = self.estimation_results
        statistics = ["R2", "adjR2", "mse", "F", "df_r", "df_e", "F_pval"]
        row = [[results[s] for s in statistics]]
        df = pd.DataFrame(
            row,
            index=[""],
            columns=["R²", "Adj. R²", "MSE", "F", "df1", "df2", "p-value"],
        )
        return df 
Example #22
Source File: analysis.py    From mtgencode with MIT License 5 votes vote down vote up
def main(infile, verbose = False):
    lm = ngrams.build_ngram_model(jdecode.mtg_open_file(str(os.path.join(datadir, 'output.txt'))),
                            3, separate_lines=True, verbose=True)
    stats = get_statistics(infile, lm=lm, sep=True, verbose=verbose)
    print_statistics(stats) 
Example #23
Source File: analysis.py    From mtgencode with MIT License 5 votes vote down vote up
def print_statistics(stats, ident = 0):
    for k in stats:
        if isinstance(stats[k], OrderedDict):
            print(' ' * ident + str(k) + ':')
            print_statistics(stats[k], ident=ident+2)
        elif isinstance(stats[k], dict):
            print(' ' * ident + str(k) + ': <dict with ' + str(len(stats[k])) + ' entries>')
        elif isinstance(stats[k], list):
            print(' ' * ident + str(k) + ': <list with ' + str(len(stats[k])) + ' entries>')
        else:
            print(' ' * ident + str(k) + ': ' + str(stats[k])) 
Example #24
Source File: H_err_dp.py    From pyrad with GNU General Public License v3.0 5 votes vote down vote up
def L1(E,P,N):
    """probability homozygous"""
    h = []
    s = sum(N)
    for i,l in enumerate(N):
        p = P[i]
        b = scipy.stats.binom.pmf(s-l,s,E)
        h.append(p*b)
    return sum(h) 
Example #25
Source File: demo_model.py    From momi2 with GNU General Public License v3.0 5 votes vote down vote up
def add_size_param(self, name, N0=None, lower=1, upper=1e10, rgen=None):
        """Add a size parameter to the demographic model.

        :param str name: Parameter name
        :param float N0: Starting value. If None, use ``rgen`` to \
        randomly sample
        :param float lower: Lower bound
        :param float upper: Upper bound
        :param function rgen: Function to sample a random starting \
        value. If None, a truncated exponential with rate ``1 / N_e``
        """
        if rgen is None:
            scale = self.N_e
            truncexpon = scipy.stats.truncexpon(b=(upper-lower)/scale,
                                                loc=lower, scale=scale)
            def rgen(params):
                return truncexpon.rvs()

        def scale_transform(x, p):
            return np.log(x)

        self.add_parameter(name, N0,
                           scaled_lower=scale_transform(lower, None),
                           scaled_upper=scale_transform(upper, None),
                           scale_transform=scale_transform,
                           unscale_transform=lambda x, p: np.exp(x),
                           rgen=rgen) 
Example #26
Source File: distribution_check.py    From hydrology with GNU General Public License v3.0 5 votes vote down vote up
def check(data, fct, verbose=False):
    #fit our data set against every probability distribution
    parameters = eval("scipy.stats."+fct+".fit(data)");
    #Applying the Kolmogorov-Smirnof two sided test
    D, p = scipy.stats.kstest(data, fct, args=parameters);

    if math.isnan(p): p=0
    if math.isnan(D): D=0

    if verbose:
        print(fct.ljust(16) + "p: " + str(p).ljust(25) + "D: " +str(D))

    return (fct, p, D)

######################################################################################## 
Example #27
Source File: thinkdsp.py    From Lie_to_me with MIT License 5 votes vote down vote up
def estimate_slope(self, low=1, high=-12000):
        """Runs linear regression on log cumulative power vs log frequency.

        returns: slope, inter, r2, p, stderr
        """
        #print self.fs[low:high]
        #print self.cs[low:high]
        x = np.log(self.fs[low:high])
        y = np.log(self.cs[low:high])
        t = scipy.stats.linregress(x,y)
        return t 
Example #28
Source File: thinkdsp.py    From Lie_to_me with MIT License 5 votes vote down vote up
def estimate_slope(self):
        """Runs linear regression on log power vs log frequency.

        returns: slope, inter, r2, p, stderr
        """
        x = np.log(self.fs[1:])
        y = np.log(self.power[1:])
        t = scipy.stats.linregress(x,y)
        return t 
Example #29
Source File: gaussian_fitting.py    From FRETBursts with GNU General Public License v2.0 5 votes vote down vote up
def two_gaussian2d_fit(sx, sy, guess=[0.5,1]):
    """2D-Gaussian fit of samples S using a fit to the empirical CDF."""
    ## UNFINISHED (I have 2 alphas unp.sign the xy projections)
    assert sx.size == sy.size

    ## Empirical CDF
    ecdfx = [np.sort(sx), np.arange(0.5,sx.size+0.5)*1./sx.size]
    ecdfy = [np.sort(sy), np.arange(0.5,sy.size+0.5)*1./sy.size]

    ## Analytical gaussian CDF
    gauss_cdf = lambda x, mu, sigma: 0.5*(1+erf((x-mu)/(np.sqrt(2)*sigma)))

    gauss2d_cdf = lambda X,Y,mx,sx,my,sy: gauss_cdf(X,mx,sx)*gauss_cdf(Y,my,sy)

    two_cdf = lambda x, m1, s1, m2, s2, a:\
        a*gauss_cdf(x,m1,s1)+(1-a)*gauss_cdf(x,m2,s2)

    two2d_cdf = lambda X,Y, mx1, sx1, mx2, sx2, my1, sy1, my2, sy2, a:\
        a*gauss2d_cdf(X,Y,mx1,sx1,my1,sy1)+(1-a)*gauss_cdf(X,Y,mx2,sx2,my2,sy2)

    ## Fitting the empirical CDF
    fitfunc = lambda p, x: two_cdf(x, *p)
    errfunc = lambda p, x, y: fitfunc(p, x) - y
    fitfunc2d = lambda p, X,Y: two2d_cdf(X,Y, *p)
    errfunc2d = lambda p, X,Y,Z: fitfunc2d(p, X,Y) - Z

    px,v = leastsq(errfunc, x0=guess, args=(ecdfx[0],ecdfx[1]))
    py,v = leastsq(errfunc, x0=guess, args=(ecdfy[0],ecdfy[1]))
    print("2D Two-Gaussians CDF fit", px, py)

    mux1, sigmax1, mux2, sigmax2, alphax = px
    muy1, sigmay1, muy2, sigmay2, alphay = py
    return mu1, sigma1, mu2, sigma2, alpha 
Example #30
Source File: cem.py    From visual_dynamics with MIT License 5 votes vote down vote up
def iteration(self):
        if self.use_truncnorm:
            tn = scipy.stats.truncnorm((self.th_low - self.th_mean) / self.th_std, (self.th_high - self.th_mean) / self.th_std)
            ths = np.array([self.th_mean + dth for dth in self.th_std[None, :] * tn.rvs((self.batch_size, self.th_mean.size))])
            assert np.all(self.th_low <= ths) and np.all(ths <= self.th_high)
        else:
            ths = np.array([self.th_mean + dth for dth in self.th_std[None, :] * np.random.randn(self.batch_size, self.th_mean.size)])
        ys = np.array([self.f(th) for th in ths])
        elite_inds = ys.argsort()[::-1][:self.n_elite]
        elite_ths = ths[elite_inds]
        self.th_mean = elite_ths.mean(axis=0)
        self.th_std = elite_ths.std(axis=0)
        return ys.mean()