Python Examples of rpy2.robjects.FloatVector

Source File: test_serialization.py From rpy2 with GNU General Public License v2.0

6 votes

def test_pickle():
    tmp_file = tempfile.NamedTemporaryFile()
    robj = robjects.baseenv["pi"]
    pickle.dump(robj, tmp_file)
    tmp_file.flush()
    tmp_file.seek(0)
    robj_again = pickle.load(tmp_file)
    tmp_file.close()

    assert isinstance(robj, robjects.FloatVector)

    # Check that underlying R objects are identical.
    assert robjects.baseenv["identical"](robj,
                                         robj_again)[0]
    # Check the instance dict is also identical
    assert set(robj.__dict__.keys()) == set(robj_again.__dict__.keys())

Source File: moment_est.py From OptimalPortfolio with MIT License

6 votes

def sample_coM3(invariants):
    """
    Calculates sample third order co-moment matrix
    Taps into the R package PerformanceAnalytics through rpy2

    :param invariants: sample data of market invariants
    :type invariants: pd.Dataframe
    :param frequency: time horizon of projection, default set ot 252 days
    :type frequency: int
    :return: sample skew dataframe
    """
    
    importr('PerformanceAnalytics')
    if not isinstance(invariants, pd.DataFrame):
        warnings.warn("invariants not a pd.Dataframe", RuntimeWarning)
        invariants = pd.DataFrame(invariants)
    p = invariants.shape[1]
    coskew_function = robjects.r('M3.MM')
    r_inv_vec = robjects.FloatVector(np.concatenate(invariants.values))
    r_invariants = robjects.r.matrix(r_inv_vec,nrow=p,ncol=p)
    r_M3 = coskew_function(r_invariants)
    
    return np.matrix(r_M3)

Source File: moment_est.py From OptimalPortfolio with MIT License

6 votes

def sample_coM4(invariants):
    """
    Calculates sample fourth order co-moment matrix
    Taps into the R package PerformanceAnalytics through rpy2

    :param invariants: sample data of market invariants
    :type invariants: pd.Dataframe
    :param frequency: time horizon of projection, default set ot 252 days
    :type frequency: int
    :return: sample skew dataframe
    """
    
    importr('PerformanceAnalytics')
    if not isinstance(invariants, pd.DataFrame):
        warnings.warn("invariants not a pd.Dataframe", RuntimeWarning)
        invariants = pd.DataFrame(invariants)
    p = invariants.shape[1]
    coskew_function = robjects.r('M4.MM')
    r_inv_vec = robjects.FloatVector(np.concatenate(invariants.values))
    r_invariants = robjects.r.matrix(r_inv_vec,nrow=p,ncol=p)
    r_M4 = coskew_function(r_invariants)
    
    return np.matrix(r_M4)

Source File: RComparisonBase.py From tbats with MIT License

5 votes

def r_tbats(self, y, components):
        components = components.copy()
        if 'seasonal_periods' in components:
            components['seasonal_periods'] = ro.IntVector(components['seasonal_periods'])
        importr('forecast')
        r_bats_func = ro.r['tbats']
        r_forecast = ro.r['forecast']
        r_y = ro.FloatVector(list(y))
        r_model = r_bats_func(r_y, **components)
        summary = r_forecast(r_model)
        # predictions = np.array(summary.rx('fitted')).flatten()
        return summary, r_model

Source File: benchmarks.py From rpy2 with GNU General Public License v2.0

5 votes

def setup_func(kind):
#-- setup_sum-begin
    n = 20000
    x_list = [random.random() for i in range(n)]
    module = None
    if kind == "array.array":
        import array as module
        res = module.array('f', x_list)
    elif kind == "numpy.array":
        import numpy as module
        res = module.array(x_list, 'f')
    elif kind == "FloatVector":
        import rpy2.robjects as module
        res = module.FloatVector(x_list)
    elif kind == "FloatSexpVector":
        import rpy2.rinterface as module
        module.initr()
        res = module.FloatSexpVector(x_list)
    elif kind == "FloatSexpVector-memoryview-array":
        import rpy2.rinterface as module
        module.initr()
        tmp = module.FloatSexpVector(x_list)
        mv = tmp.memoryview()
        res = array.array(mv.format, mv)
    elif kind == "list":
        res = x_list
    elif kind == "R":
        import rpy2.robjects as module
        res = module.rinterface.FloatSexpVector(x_list)
        module.globalenv['x'] = res
        res = None
#-- setup_sum-end
    else:
        raise ValueError("Unknown kind '%s'" %kind)
    return (res, module)

Source File: test_vector.py From rpy2 with GNU General Public License v2.0

5 votes

def test_sample_probabilities():
    vec = robjects.IntVector(range(100))
    spl = vec.sample(10, probabilities=robjects.FloatVector([.01] * 100))
    assert len(spl) == 10

Source File: test_vector.py From rpy2 with GNU General Public License v2.0

5 votes

def test_sequence_to_vector():
    res = robjects.sequence_to_vector((1, 2, 3))
    assert isinstance(res, robjects.IntVector)

    res = robjects.sequence_to_vector((1, 2, 3.0))
    assert isinstance(res, robjects.FloatVector)

    res = robjects.sequence_to_vector(('ab', 'cd', 'ef'))
    assert isinstance(res, robjects.StrVector)

    with pytest.raises(ValueError):
        robjects.sequence_to_vector(list())

Source File: test_vector.py From rpy2 with GNU General Public License v2.0

5 votes

def test_float_repr():
    vec = robjects.vectors.FloatVector((1,2,3))
    r = repr(vec).split('\n')
    assert r[-1].startswith('[')
    assert r[-1].endswith(']')
    assert len(r[-1].split(',')) == 3

Source File: test_vector.py From rpy2 with GNU General Public License v2.0

5 votes

def test_nareal():
    vec = robjects.FloatVector((1.0, 2.0, 3.0))
    vec[0] = robjects.NA_Real
    assert robjects.baseenv['is.na'](vec)[0] is True

Source File: PipelineTransfacMatch.py From CGATPipelines with MIT License

5 votes

def nullDistPermutations(tfbs_genes, matched_genes, nPerms=1000):
    '''
    Randomly generate a null distribution of genes from the background
    set, match for pCpG and total number of genes.  Calculate
    enrichment of these genes for the given TFBS.  Permuate nPerm times.
    Return a median enrichment and p-value from the empirical
    cumulative frequency distribution.
    '''

    null_dist = []

    for i in range(0, nPerms):
        null_genes = genNullGeneSet(matched_genes)
        null_counts = countTFBSEnrichment(tfbs_genes, null_genes)
        null_enrich = null_counts[0] / float(null_counts[1])
        null_dist.append(null_enrich)

    # null_dist = robjects.FloatVector([float(x) for x in null_dist])
    # null_dist_r = robjects.FloatVector([float(x) for x in null_dist])

    # null_ecdf = ecdf(null_dist)
    out_dict = {}
    out_dict['null'] = null_dist
    out_dict['median'] = np.median(null_enrich)

    return out_dict

Source File: nonpar.py From DeepIV with MIT License

5 votes

def fit_and_evaluate(x,z,t,y,df):
    '''
    Fit and evaluate non-parametric regression using  Darolles, Fan, Florens and Renault (2011)

    Implemented in the `np` package in R.

    See [the np package documation](https://cran.r-project.org/web/packages/np/np.pdf) for details.
    '''
    npr=importr('np')
    y_R = robjects.FloatVector(list(y.flatten()))
    (x_eval, t_eval), y_true = test_points(df, 10000)
    mod = npr.npregiv(y_R, t, z, x=x, zeval=t_eval, xeval=x_eval,
                    method="Tikhonov", p=0, optim_method ="BFGS")
    return ((y_true - to_array(mod.rx2('phi.eval')))**2).mean()

Source File: RComparisonBase.py From tbats with MIT License

5 votes

def r_bats(self, y, components):
        components = components.copy()
        if 'seasonal_periods' in components:
            components['seasonal_periods'] = ro.IntVector(components['seasonal_periods'])
        importr('forecast')
        r_bats_func = ro.r['bats']
        r_forecast = ro.r['forecast']
        r_y = ro.FloatVector(list(y))
        r_model = r_bats_func(r_y, **components)
        summary = r_forecast(r_model)
        # predictions = np.array(summary.rx('fitted')).flatten()
        return summary, r_model

Source File: manager.py From iterativeWGCNA with GNU General Public License v2.0

5 votes

def histogram(self, vline=None, params=None):
        '''
        plot histogram with vline at x=vline
        '''
        self.params['x'] = ro.FloatVector(self.data)
        self.params['labels'] = False

        if params is not None:
            self.params.update(params)

        graphics().hist(**self.params)

        if vline is not None:
            lineParams = {'v': vline, 'col': 'red'}
            graphics().abline(**lineParams)

Source File: plotting.py From grocsvs with MIT License

5 votes

def barPlot(dict_, keysInOrder=None, printCounts=True, ylim=None, *args, **kwdargs):
    """ Plot a bar plot

    Args:
        dict_: a dictionary of name -> value, where value is the height of the bar
            use a collections.OrderedDict() to easily convey the order of the groups
        keysInOrder: an optional ordering of the keys in dict_ (alternate option to using collections.OrderedDict)
        printCounts: option to print the counts on top of each bar

    additional kwdargs are passed directly to r.barplot()
    """

    if not keysInOrder:
        keysInOrder = dict_.keys()
    
    heights = ro.FloatVector([dict_[key] for key in keysInOrder])

    kwdargs["names.arg"] = ro.StrVector(keysInOrder)

    if ylim is None:
        if printCounts:
            ylim = [min(heights), max(heights)*1.1]
        else:
            ylim = [min(heights), max(heights)]

    x = r.barplot(heights, ylim=ro.FloatVector(ylim), *args, **kwdargs)

    if printCounts:
        heightsStrings = ["{:.2g}".format(height) for height in heights]
        r.text(x, ro.FloatVector(heights), ro.StrVector(heightsStrings), pos=3)
    return x

Source File: qc.py From grocsvs with MIT License

5 votes

def visualize_report(tenx_sample_infos, outpath):
    try:
        from rpy2 import robjects as ro
        from grocsvs import plotting
        
        
        ro.r.pdf(outpath)

        frag_lengths = [numpy.log10(sample_info["frag_length_info"]["sampled"]) for sample_info in tenx_sample_infos.values()]
        max_ = max(numpy.percentile(cur_frag_lengths, 99.5) for cur_frag_lengths in frag_lengths)

        plotting.ecdf(frag_lengths, tenx_sample_infos.keys(), xlim=[0, max_], main="Fragment length distribution",
            xlab="Fragment length (log10)", legendWhere="bottomright")

        bc_counts = dict((name, sample_info["good_bc_count"]) for name, sample_info in tenx_sample_infos.items())
        plotting.barPlot(bc_counts, main="Number of high-quality barcodes", ylim=[0, 1.1*max(bc_counts.values())])

        # oldpar = r.par(mfrow=[min(3, len(tenx_sample_infos)), 2])
        oldpar = ro.r.par(mfrow=[2,1])

        for name, tenx_sample_info in tenx_sample_infos.items():
            C_Rs = tenx_sample_info["coverage_of_fragments"]
            C_Rs = C_Rs["coverages"] / C_Rs["lengths"].astype(float)

            ro.r.hist(ro.FloatVector(C_Rs), breaks=50, xlab="Fragment coverage by short-reads (C_R)", main=name)
            ro.r.hist(ro.FloatVector(tenx_sample_info["physical_depths"]), breaks=100, xlab="Coverage by long fragments (C_F)",
                main=name)

        ro.r.par(oldpar)

        ro.r["dev.off"]()
    except:
        with open(outpath, "w") as f:
            f.write("[the visual report requires rpy2 to be correctly installed]")

Source File: plotting.py From svviz with MIT License

4 votes

def ecdf(vectors, labels=None, colors=["red", "blue", "orange", "violet", "green", "brown"],
         xlab="", ylab="cumulative fraction", main="", legendWhere="topleft", 
         lty=1, lwd=1, legendArgs=None, labelsIncludeN=True, **ecdfKwdArgs):
    """ Take a list of lists, convert them to vectors, and plots them sequentially on a CDF """

    if ro is None:
        return

    #print "MEANS:", main
    #for vector, label in zip(convertToVectors, labels):
    #    print label, numpy.mean(vector)
    
    def _expand(item):
        try:
            iter(item)
            return item
        except TypeError:
            return [item] * len(vectors)
            
    
    lty = _expand(lty)
    lwd = _expand(lwd)


    if not "xlim" in ecdfKwdArgs or ecdfKwdArgs["xlim"] is None:
        xlim = [min(min(vector) for vector in vectors if len(vector) > 0),
                max(max(vector) for vector in vectors if len(vector) > 0)]
        ecdfKwdArgs["xlim"] = xlim

    ecdfKwdArgs["xlim"] = ro.FloatVector(xlim)


    started = False
    for i, vector in enumerate(vectors):
        if len(vector) > 0:
            vector = ro.FloatVector(vector)
            ecdfKwdArgs.update({"verticals":True, "do.points":False, 
                                "col.hor":colors[(i)%len(colors)],
                                "col.vert":colors[(i)%len(colors)],
                                "lty":lty[(i)%len(lty)],
                                "lwd":lwd[(i)%len(lwd)]})
            ecdf = r.ecdf(vector)

            if not started:
                r.plot(ecdf, main=main, xlab=xlab, ylab=ylab, **ecdfKwdArgs)
                started = True
            else:
                r.plot(ecdf, add=True, **ecdfKwdArgs)

    if labels is not None:
        if labelsIncludeN:
            labelsWithN = []
            for i, label in enumerate(labels):
                labelsWithN.append(label+" (n=%d)"%len(vectors[i]))
        else:
            labelsWithN = labels
        legendArgs = asdict(legendArgs, {"cex":0.7})
        r.legend(legendWhere, legend=ro.StrVector(labelsWithN), lty=ro.IntVector(lty),
                 lwd=ro.IntVector([lwdi*2 for lwdi in lwd]), col=ro.StrVector(colors),
                 bg="white", **legendArgs)

Source File: pipeline_transfacmatch.py From CGATPipelines with MIT License

4 votes

def collateEnrichmentOfTFBS(infiles, outfile):
    '''
    Concatenate the enrichment scores for all gene sets into a single table
    and recalculate qvalues.
    '''

    def _fetch(table_name):
        table_name = table_name.replace("-", "_")
        # retrieve table
        dbh = sqlite3.connect(PARAMS["database_name"])
        cc = dbh.cursor()
        data = cc.execute("SELECT * FROM %s" % table_name).fetchall()
        cc.close()

        # return pandas dataframe
        headers = [d[0] for d in cc.description]
        return pandas.DataFrame.from_records(data, columns=headers)

    first = True
    for infile in infiles:
        table_name = filenameToTablename(P.snip(os.path.basename(infile)))
        table_name = table_name + "_significance"
        geneset_id = table_name.split("_")[0]
        if first:
            # set up first dataframe
            first = False
            df = _fetch(table_name)
            # removing qvalue as it will be recalculated
            try:
                df.drop("qvalue", axis=1, inplace=True)
            except ValueError:
                # qvalue not contained in df
                pass
            # adding column for geneset_id
            geneset_id = [geneset_id, ] * len(df.index)
            df["geneset_id"] = geneset_id
            continue

        # append successive dataframes
        df_n = _fetch(table_name)
        df_n.drop("qvalue", axis=1, inplace=True)
        geneset_id = [geneset_id, ] * len(df_n.index)
        df_n["geneset_id"] = geneset_id
        df = df.append(df_n)

    # get globally adjusted pvalues
    p_vals = df["pvalue"].tolist()
    padjpy = R["p.adjust"]
    # returns a python FloatVector object
    q_vals = padjpy(p_vals)
    df["qvalue"] = [x for x in q_vals]

    df.to_csv(outfile, sep="\t", index=False)

Source File: PipelineTransfacMatch.py From CGATPipelines with MIT License

4 votes

def permuteTFBSEnrich(tfbs_table,
                      fg_gc,
                      bg_gc,
                      nPerms,
                      bg_stat):
    '''
    Generate p-value from empirical cumulative frequency distribution
    for all TFBS by permutation.
    '''
    results_dict = {}
    fg_geneset = fg_gc.index.tolist()
    matched_genes = matchGenesByComposition(bg_gc, fg_gc, bg_stat)
    tfbs_all = set(tfbs_table.index)

    ecdf_py = R['ecdf']
    for tfbs in tfbs_all:
        tfbs_res = {}
        tfbs_genes = TFBSgeneset(tfbs, tfbs_table)
        fg_genes_counters = countTFBSEnrichment(tfbs_genes, fg_geneset)

        n_foregenes = fg_genes_counters[0]
        n_tfbsgenes = fg_genes_counters[1]
        total_fg = len(fg_geneset)
        fore_enrich = n_foregenes / float(n_tfbsgenes)
        if fore_enrich > 0:
            null_perms = nullDistPermutations(tfbs_genes=tfbs_genes,
                                              matched_genes=matched_genes,
                                              nPerms=nPerms)
            null_dist = null_perms['null']
            null_median = null_perms['median']
            null_dist_r = robjects.FloatVector([f for f in null_dist])
            null_ecdf = ecdf_py(null_dist_r)
            null_func = null_ecdf(fore_enrich)
            pvalue = 1.00 - null_func[0]

        else:
            null_median = 0
            pvalue = 1.0

        tfbs_res['nForegenes'] = n_foregenes
        tfbs_res['tForegenes'] = total_fg
        tfbs_res['nTFBSGenes'] = n_tfbsgenes
        tfbs_res['propForeInTFBS'] = fore_enrich
        tfbs_res['null_median'] = null_median
        tfbs_res['pvalue'] = pvalue

        results_dict[tfbs] = tfbs_res

    return results_dict

Source File: handler.py From lambda-r-survival-stats with Apache License 2.0

4 votes

def calculate_survival_stats(times, events, values_by_record):
    """
    @param times: time elapsed before the event occurs, or when subject is censored
    @param events: 1 indicates event was observed; 0 indicates event did not occur
    @param values_by_record: two dimensional double array.  Each row is the predictor values for a record (ex: gene)
    @return: array where each element contains the hazard ratio and pvalue for the record
    """
    # flatten values of two dimensional array for use by R
    # in R, matrices are simply an array where you specify number of columns per row
    flattened_values = [y for row in values_by_record for y in row]

    t = robjects.FloatVector(times)
    e = robjects.IntVector(events)
    v = robjects.FloatVector(flattened_values)

    # convert flattened values into an R matrix
    m = robjects.r['matrix'](v, nrow=len(values_by_record), byrow=True)

    #load R library
    r('library(survival)')

    # assign variables in R
    r.assign('valuesMatrix', m)
    r.assign('numSamples', len(times))
    r.assign('times', t)
    r.assign('events', e)

    # calculate statistics by applying coxph to each record's values
    logging.debug('Calculating stats')
    r("""res <- apply(valuesMatrix,1, function(values) {
      coxlist = try(coxph(Surv(times,events)~values + cluster(1:numSamples[1])))
      return(c(summary(coxlist)$coefficients[2], summary(coxlist)$coefficients[6]))
      })""")
    logging.debug('Done calculating stats')

    # convert results
    r_res = robjects.r['res']
    res_iter = iter(r_res)
    results = []
    for hazard in res_iter:
        pval = next(res_iter)
        results.append({'hazard': hazard, 'pval': pval})
    return results

Source File: velocity_pseudotime.py From scvelo with BSD 3-Clause "New" or "Revised" License

4 votes

def principal_curve(data, basis="pca", n_comps=4, clusters_list=None, copy=False):
    """Computes the principal curve
    Arguments
    ---------
    data: :class:`~anndata.AnnData`
        Annotated data matrix.
    basis: `str` (default: `'pca'`)
        Basis to use for computing the principal curve.
    n_comps: `int` (default: 4)
        Number of pricipal components to be used.
    copy: `bool`, (default: `False`)
        Return a copy instead of writing to adata.
    Returns
    -------
    Returns or updates `adata` with the attributes
    principal_curve: `.uns`
        dictionary containing `projections`, `ixsort` and `arclength`
    """
    adata = data.copy() if copy else data
    import rpy2.robjects as robjects
    from rpy2.robjects.packages import importr

    if clusters_list is not None:
        cell_subset = np.array(
            [label in clusters_list for label in adata.obs["clusters"]]
        )
        X_emb = adata[cell_subset].obsm[f"X_{basis}"][:, :n_comps]
    else:
        cell_subset = None
        X_emb = adata.obsm[f"X_{basis}"][:, :n_comps]

    n_obs, n_dim = X_emb.shape

    # convert array to R matrix
    xvec = robjects.FloatVector(X_emb.T.reshape((X_emb.size)))
    X_R = robjects.r.matrix(xvec, nrow=n_obs, ncol=n_dim)

    fit = importr("princurve").principal_curve(X_R)

    adata.uns["principal_curve"] = dict()
    adata.uns["principal_curve"]["ixsort"] = ixsort = np.array(fit[1]) - 1
    adata.uns["principal_curve"]["projections"] = np.array(fit[0])[ixsort]
    adata.uns["principal_curve"]["arclength"] = np.array(fit[2])
    adata.uns["principal_curve"]["cell_subset"] = cell_subset

    return adata if copy else None

Source File: plotting.py From grocsvs with MIT License

4 votes

def ecdf(vectors, labels=None, colors=["red", "blue", "orange", "violet", "green", "brown"],
         xlab="", ylab="cumulative fraction", main="", legendWhere="topleft", 
         lty=1, lwd=1, legendArgs=None, labelsIncludeN=True, **ecdfKwdArgs):
    """ Take a list of lists, convert them to vectors, and plots them sequentially on a CDF """

    if ro is None:
        return

    #print "MEANS:", main
    #for vector, label in zip(convertToVectors, labels):
    #    print label, numpy.mean(vector)
    
    def _expand(item):
        try:
            iter(item)
            return item
        except TypeError:
            return [item] * len(vectors)
            
    
    lty = _expand(lty)
    lwd = _expand(lwd)


    if not "xlim" in ecdfKwdArgs or ecdfKwdArgs["xlim"] is None:
        xlim = [min(min(vector) for vector in vectors if len(vector) > 0),
                max(max(vector) for vector in vectors if len(vector) > 0)]
        ecdfKwdArgs["xlim"] = xlim

    ecdfKwdArgs["xlim"] = ro.FloatVector(ecdfKwdArgs["xlim"])


    started = False
    for i, vector in enumerate(vectors):
        if len(vector) > 0:
            vector = ro.FloatVector(vector)
            ecdfKwdArgs.update({"verticals":True, "do.points":False, 
                                "col.hor":colors[(i)%len(colors)],
                                "col.vert":colors[(i)%len(colors)],
                                "lty":lty[(i)%len(lty)],
                                "lwd":lwd[(i)%len(lwd)]})
            ecdf = r.ecdf(vector)

            if not started:
                r.plot(ecdf, main=main, xlab=xlab, ylab=ylab, **ecdfKwdArgs)
                started = True
            else:
                r.plot(ecdf, add=True, **ecdfKwdArgs)

    if labels is not None:
        if labelsIncludeN:
            labelsWithN = []
            for i, label in enumerate(labels):
                labelsWithN.append(label+" (n=%d)"%len(vectors[i]))
        else:
            labelsWithN = labels
        legendArgs = asdict(legendArgs, {"cex":0.7})
        r.legend(legendWhere, legend=ro.StrVector(labelsWithN), lty=ro.IntVector(lty),
                 lwd=ro.IntVector([lwdi*2 for lwdi in lwd]), col=ro.StrVector(colors),
                 bg="white", **legendArgs)

Python rpy2.robjects.FloatVector() Examples