Python Examples of rpy2.robjects.StrVector

Source File: common.py From Computable with MIT License

6 votes

def _convert_vector(obj):
    if isinstance(obj, robj.IntVector):
        return _convert_int_vector(obj)
    elif isinstance(obj, robj.StrVector):
        return _convert_str_vector(obj)
    # Check if the vector has extra information attached to it that can be used
    # as an index
    try:
        attributes = set(r['attributes'](obj).names)
    except AttributeError:
        return list(obj)
    if 'names' in attributes:
        return pd.Series(list(obj), index=r['names'](obj)) 
    elif 'tsp' in attributes:
        return pd.Series(list(obj), index=r['time'](obj)) 
    elif 'labels' in attributes:
        return pd.Series(list(obj), index=r['labels'](obj))
    if _rclass(obj) == 'dist':
        # For 'eurodist'. WARNING: This results in a DataFrame, not a Series or list.
        matrix = r['as.matrix'](obj)
        return convert_robj(matrix)
    else:
        return list(obj)

Source File: network.py From iterativeWGCNA with GNU General Public License v2.0

6 votes

def plot_module_eigengene(self, module):
        '''
        barchart illustrating module eigengene
        '''
        eigengene = self.eigengenes.get_module_eigengene(module)

        params = {}
        params['height'] = base().as_numeric(eigengene)

        limit = max(abs(base().max(eigengene)[0]), abs(base().min(eigengene)[0]))
        ylim = [-1 * limit, limit]
        params['ylim'] = ro.IntVector(ylim)

        colors = ["red" if e[0] > 0 else "blue" for e in eigengene]
        params['col'] = ro.StrVector(colors)

        params['border'] = ro.NA_Logical
        params['las'] = 2
        params['names.arg'] = ro.StrVector(self.eigengenes.samples())
        params['cex.names'] = 0.6
        params['main'] = "Eigengene: " + module
        manager = RManager(eigengene, params)
        manager.barchart()

Source File: genes.py From iterativeWGCNA with GNU General Public License v2.0

6 votes

def update_membership(self, genes, blocks):
        '''
        fetches new module membership from WGCNA
        blocks and updates relevant genes
        '''
        modules = rsnippets.extractModules(blocks, ro.StrVector(genes))
        # if the feature is in the subset
        # update, otherwise leave as is
        for gene in genes:
            # .rx returns a FloatVector which introduces
            # a .0 to the numeric labels when converted to string
            # which needs to be removed
            # note: R array starts at index 1, python at 0
            module = str(modules.rx(gene, 1)[0]).replace('.0', '')
            if module in ('0', 'grey'):
                module = 'UNCLASSIFIED'
            else:
                module = self.iteration + '_' + 'M' + str(module)
                self.__update_classified_iteration(gene, self.iteration)
            self.__update_module(gene, module)

        return None

Source File: test_dataframe.py From rpy2 with GNU General Public License v2.0

5 votes

def test_init_stringsasfactors():
    od = {'a': robjects.IntVector((1,2)),
          'b': robjects.StrVector(('c', 'd'))}
    dataf = robjects.DataFrame(od, stringsasfactor=True)
    assert isinstance(dataf.rx2('b'), robjects.FactorVector)
    dataf = robjects.DataFrame(od, stringsasfactor=False)
    assert isinstance(dataf.rx2('b'), robjects.StrVector)

Source File: test_robjects.py From rpy2 with GNU General Public License v2.0

5 votes

def test_rclass_set():
    x = robjects.r('1:3')
    old_class = x.rclass
    x.rclass = robjects.StrVector(('Foo', )) + x.rclass
    assert x.rclass[0] == 'Foo'
    assert old_class[0] == x.rclass[1]

Source File: test_dataframe.py From rpy2 with GNU General Public License v2.0

5 votes

def test_to_csvfile():
    fh = tempfile.NamedTemporaryFile(mode = "w", delete = False)
    fh.close()
    d = {'letter': robjects.StrVector('abc'),
         'value' : robjects.IntVector((1, 2, 3))}
    dataf = robjects.DataFrame(d)
    dataf.to_csvfile(fh.name)
    dataf = robjects.DataFrame.from_csvfile(fh.name)
    assert dataf.nrow == 3
    assert dataf.ncol == 2

Source File: test_dataframe.py From rpy2 with GNU General Public License v2.0

5 votes

def test_colnames_set():
    dataf = robjects.r('data.frame(a=1:2, b=I(c("a", "b")))')
    dataf.colnames = robjects.StrVector('de')
    assert tuple(dataf.colnames) == ('d', 'e')

Source File: test_dataframe.py From rpy2 with GNU General Public License v2.0

5 votes

def test_cbind():
    dataf = robjects.r('data.frame(a=1:2, b=I(c("a", "b")))')
    dataf = dataf.cbind(robjects.r('data.frame(a=1:2, b=I(c("a", "b")))'))
    assert dataf.ncol == 4
    assert len([x for x in dataf.colnames if x == 'a']) == 2

    dataf = robjects.r('data.frame(a=1:2, b=I(c("a", "b")))')
    dataf = dataf.cbind(a = robjects.StrVector(("c", "d")))
    assert dataf.ncol == 3
    assert len([x for x in dataf.colnames if x == 'a']) == 2

Source File: test_packages.py From rpy2 with GNU General Public License v2.0

5 votes

def tests_package_from_env(self):
        env = robjects.Environment()
        env['a'] = robjects.StrVector('abcd')
        env['b'] = robjects.IntVector((1,2,3))
        env['c'] = robjects.r(''' function(x) x^2''')
        pck = robjects.packages.Package(env, "dummy_package")
        assert isinstance(pck.a, robjects.Vector)
        assert isinstance(pck.b, robjects.Vector)
        assert isinstance(pck.c, robjects.Function)

Source File: test_packages.py From rpy2 with GNU General Public License v2.0

5 votes

def test_new_with_dot(self):
        env = robjects.Environment()
        env['a.a'] = robjects.StrVector('abcd')
        env['b'] = robjects.IntVector((1,2,3))
        env['c'] = robjects.r(''' function(x) x^2''')
        pck = robjects.packages.Package(env, "dummy_package")
        assert isinstance(pck.a_a, robjects.Vector)
        assert isinstance(pck.b, robjects.Vector)
        assert isinstance(pck.c, robjects.Function)

Source File: test_packages.py From rpy2 with GNU General Public License v2.0

5 votes

def test_new_with_dot_conflict(self):
        env = robjects.Environment()
        env['a.a_a'] = robjects.StrVector('abcd')
        env['a_a.a'] = robjects.IntVector((1,2,3))
        env['c'] = robjects.r(''' function(x) x^2''')
        with pytest.raises(packages.LibraryError):
            robjects.packages.Package(env, "dummy_package")

Source File: test_packages.py From rpy2 with GNU General Public License v2.0

5 votes

def test_new_with_dot_conflict2(self):
        env = robjects.Environment()
        name_in_use = dir(packages.Package(env, "foo"))[0]
        env[name_in_use] = robjects.StrVector('abcd')
        with pytest.raises(packages.LibraryError):
            robjects.packages.Package(env, "dummy_package")

Source File: test_vector.py From rpy2 with GNU General Public License v2.0

5 votes

def test_contains():
    v = robjects.StrVector(('abc', 'def', 'ghi'))
    assert 'def' in v.ro
    assert 'foo' not in v.ro

Source File: test_vector.py From rpy2 with GNU General Public License v2.0

5 votes

def test_nacharacter():
    vec = robjects.StrVector('abc')
    vec[0] = robjects.NA_Character
    assert robjects.baseenv['is.na'](vec)[0] is True

Source File: test_vector.py From rpy2 with GNU General Public License v2.0

5 votes

def test_items():
    vec = robjects.IntVector(range(3))
    vec.names = robjects.StrVector('abc')
    names = [k for k,v in vec.items()]
    assert names == ['a', 'b', 'c']
    values = [v for k,v in vec.items()]
    assert values == [0, 1, 2]

Source File: test_vector.py From rpy2 with GNU General Public License v2.0

5 votes

def test_sequence_to_vector():
    res = robjects.sequence_to_vector((1, 2, 3))
    assert isinstance(res, robjects.IntVector)

    res = robjects.sequence_to_vector((1, 2, 3.0))
    assert isinstance(res, robjects.FloatVector)

    res = robjects.sequence_to_vector(('ab', 'cd', 'ef'))
    assert isinstance(res, robjects.StrVector)

    with pytest.raises(ValueError):
        robjects.sequence_to_vector(list())

Source File: test_array.py From rpy2 with GNU General Public License v2.0

5 votes

def test_names_get():
    dimnames = robjects.r.list(robjects.StrVector(['a', 'b', 'c']),
                               robjects.StrVector(['d', 'e']))
    m = robjects.r.matrix(1, nrow=3, ncol=2,
                          dimnames = dimnames)
    a = robjects.vectors.FloatArray(m)
    res = a.names
    r_identical = robjects.r.identical
    assert r_identical(dimnames[0], res[0])[0]
    assert r_identical(dimnames[1], res[1])[0]

Source File: test_array.py From rpy2 with GNU General Public License v2.0

5 votes

def test_colnames():
    m = robjects.r.matrix(robjects.IntVector(range(4)), nrow=2, ncol=2)
    assert m.colnames == rinterface.NULL
    m.colnames = robjects.StrVector(('a', 'b'))
    assert len(m.colnames) == 2
    assert m.colnames[0] == 'a'
    assert m.colnames[1] == 'b'    
    with pytest.raises(ValueError):
        m.colnames = robjects.StrVector(('a', 'b', 'c'))

Source File: test_array.py From rpy2 with GNU General Public License v2.0

5 votes

def test_rownames():
    m = robjects.r.matrix(robjects.IntVector(range(4)), nrow=2, ncol=2)
    assert m.rownames == rinterface.NULL
    m.rownames = robjects.StrVector(('c', 'd'))
    assert len(m.rownames) == 2
    assert m.rownames[0] == 'c'
    assert m.rownames[1] == 'd'
    with pytest.raises(ValueError):
        m.rownames = robjects.StrVector(('a', 'b', 'c'))

Source File: test_dataframe.py From rpy2 with GNU General Public License v2.0

5 votes

def test_init_from_OrdDict():
    od = rlc.OrdDict(c=(('a', robjects.IntVector((1,2))),
                        ('b', robjects.StrVector(('c', 'd')))
                        ))
    dataf = robjects.DataFrame(od)
    assert dataf.rx2('a')[0] == 1

Source File: plotting.py From grocsvs with MIT License

5 votes

def barPlot(dict_, keysInOrder=None, printCounts=True, ylim=None, *args, **kwdargs):
    """ Plot a bar plot

    Args:
        dict_: a dictionary of name -> value, where value is the height of the bar
            use a collections.OrderedDict() to easily convey the order of the groups
        keysInOrder: an optional ordering of the keys in dict_ (alternate option to using collections.OrderedDict)
        printCounts: option to print the counts on top of each bar

    additional kwdargs are passed directly to r.barplot()
    """

    if not keysInOrder:
        keysInOrder = dict_.keys()
    
    heights = ro.FloatVector([dict_[key] for key in keysInOrder])

    kwdargs["names.arg"] = ro.StrVector(keysInOrder)

    if ylim is None:
        if printCounts:
            ylim = [min(heights), max(heights)*1.1]
        else:
            ylim = [min(heights), max(heights)]

    x = r.barplot(heights, ylim=ro.FloatVector(ylim), *args, **kwdargs)

    if printCounts:
        heightsStrings = ["{:.2g}".format(height) for height in heights]
        r.text(x, ro.FloatVector(heights), ro.StrVector(heightsStrings), pos=3)
    return x

Source File: eigengenes.py From iterativeWGCNA with GNU General Public License v2.0

5 votes

def extract_subset(self, modules):
        '''
        return a submatrix
        '''
        if self.debug:
            self.logger.debug("Extracting eigengenes for the following modules:")
            self.logger.debug(modules)

        if self.debug:
            self.logger.debug("Converting module list to ro.StrVector; see R-log")
            ro.r("print('Converting module list to ro.StrVector to extract eigengenes:')")

        vector = ro.StrVector(modules)

        if self.debug:
            self.logger.debug(vector)

        if self.debug:
            self.logger.debug("Extracted submatrix, see R-log")
            ro.r("print('Extracted eigengene submatrix:')")


        newMatrix = self.matrix.rx(vector, True)

        if self.debug:
            self.logger.debug(newMatrix)

        return newMatrix

Source File: manager.py From iterativeWGCNA with GNU General Public License v2.0

5 votes

def heatmap_annotation_data_frame(self, categories, annotation):
        '''
        takes a dict of gene->value and creates a data frame
        data frame
        assume annotation is an ordered dict
        updates column names to names
        '''
        df = base().as_data_frame(base().t(ro.DataFrame(annotation)))
        df.colnames = ro.StrVector(categories)
      
        return df

Source File: manager.py From iterativeWGCNA with GNU General Public License v2.0

5 votes

def heatmap_annotation_key(self, name, colors):
        '''
        generates data frame for color key for the annotation
        from a dict
        '''
        keyColors = ro.StrVector([c for c in colors.values()])
        keyColors.names = colors.keys()
        key = OrderedDict()
        key[name] = keyColors

        return ro.ListVector(key)

Source File: network.py From iterativeWGCNA with GNU General Public License v2.0

5 votes

def calculate_degree_modularity(self, targetModule):
        '''
        calculates in degree (kIn) and out degree (kOut)
        for the target module
        '''
        members = self.__get_module_members(targetModule)

        degree = rsnippets.degree(self.adjacency, ro.StrVector(members),
                                  self.args.edgeWeight)
        self.modules[targetModule]['kIn'] = int(degree.rx2('kIn')[0])
        self.modules[targetModule]['kOut'] = int(degree.rx2('kOut')[0])
        size = self.modules[targetModule]['size']
        self.modules[targetModule]['density'] = float(self.modules[targetModule]['kIn'])/(float(size) * (float(size) - 1.0)/2.0)

Source File: wgcna.py From iterativeWGCNA with GNU General Public License v2.0

5 votes

def module_eigengenes(self, membership):
        '''
        wrapper for moduleEigengenes function
        calculates eigengenes from profiles &
        module membership (gene -> membership dict)
        '''

        if self.debug:
            self.logger.debug("Running WGCNA moduleEigengenes function")
            self.logger.debug("Module assignments:")
            self.logger.debug(membership)

        params = {}
        params['softPower'] = self.params['power'] if 'power' in self.params else 6
        params['expr'] = base().as_data_frame(self.transpose_data())

        if self.debug:
            self.logger.debug("Converting membership list to ro.StrVector; see R-log")
            ro.r("print('Converting membership list to ro.StrVector for WGCNA moduleEigengenes:')")

        params['colors'] = ro.StrVector(list(membership))

        if self.debug:
            self.logger.debug(params['colors'])

        return wgcna().moduleEigengenes(**params)

Source File: genes.py From iterativeWGCNA with GNU General Public License v2.0

5 votes

def load_membership(self, fileName=None):
        '''
        loads membership
        '''
        if fileName is None:
            fileName = "final-membership.txt"

        membership = ro.DataFrame.from_csvfile(fileName, sep='\t',
                                               header=True, row_names=1, as_is=True)

        if self.debug:
            self.logger.debug("Loaded membership from file " + fileName + "; see R-log")
            ro.r("print('Loaded membership from file -- head of file:')")
            self.logger.debug(membership.head())

        index = membership.names.index('Module') + 1 # add 1 b/c of python/rpy2/R inconsistency

        if self.debug:
            self.logger.debug("Adjusted index of Module column: " + str(index))

        classifiedCount = 0
        unclassifiedCount = 0
        for g in self.genes:
            gStr = ro.StrVector([str(g)])
            # strange, but necessary so that rpy2 will treat numeric gene ids as strings
            # python str() conversion did not work

            module = membership.rx(gStr[0], index)[0]

            if module == 'UNCLASSIFIED':
                unclassifiedCount = unclassifiedCount + 1
            else:
                classifiedCount = classifiedCount + 1
            self.__update_module(g, module)

        self.logger.info("Loaded " + str(classifiedCount) + " classified genes")
        self.logger.info("Loaded " + str(unclassifiedCount) + " unclassified genes")

Source File: test_dataframe.py From rpy2 with GNU General Public License v2.0

5 votes

def test_init_from_dict():
    od = {'a': robjects.IntVector((1,2)),
          'b': robjects.StrVector(('c', 'd'))}
    dataf = robjects.DataFrame(od)
    assert dataf.rx2('a')[0] == 1

Source File: PipelineTimeseries.py From CGATPipelines with MIT License

4 votes

def covarFilter(infile,
                time_points,
                replicates,
                quantile):
    '''
    Filter gene list based on the distribution of the
    sums of the covariance of each gene.  This is highly
    recommended to reduce the total number of genes used
    in the dynamic time warping clustering to reduce the
    computational time.  The threshold is placed at the
    intersection of the expected and observed value
    for the given quantile.
    '''

    time_points.sort()
    time_rep_comb = [x for x in itertools.product(time_points, replicates)]
    time_cond = ro.StrVector([x[0] for x in time_rep_comb])
    rep_cond = ro.StrVector([x[1] for x in time_rep_comb])
    df = pd.read_table(infile, sep="\t", header=0, index_col=0)

    df.drop(['replicates'], inplace=True, axis=1)
    df.drop(['times'], inplace=True, axis=1)
    df = df.fillna(0.0)

    R.assign('diff_data', df)

    E.info("loading data frame")

    # need to be careful about column headers and transposing data frames

    R('''trans_data <- data.frame(diff_data)''')
    R('''times <- c(%s)''' % time_cond.r_repr())
    R('''replicates <- c(%s)''' % rep_cond.r_repr())

    # calculate the covariance matrix for all genes
    # sum each gene's covariance vector

    E.info("calculating sum of covariance of expression")

    R('''covar.mat <- abs(cov(trans_data))''')
    R('''sum.covar <- rowSums(covar.mat)''')
    R('''exp.covar <- abs(qnorm(ppoints(sum.covar),'''
      '''mean=mean(sum.covar), sd=sd(sum.covar)))''')
    R('''sum.covar.quant <- quantile(sum.covar)''')
    R('''exp.covar.quant <- quantile(exp.covar)''')

    E.info("filter on quantile")

    R('''filtered_genes <- names(sum.covar[sum.covar > '''
      '''sum.covar.quant[%(quantile)i]'''
      ''' & sum.covar > exp.covar.quant[%(quantile)i]])''' % locals())
    R('''filtered_frame <- data.frame(diff_data[, filtered_genes],'''
      '''times, replicates)''')

    filtered_frame = pandas2i.ri2py('filtered_frame').T

    return filtered_frame

Source File: PipelineTimeseries.py From CGATPipelines with MIT License

4 votes

def conditionDESeq2(data_frame, header, alpha, res_dir):
    '''
    Perform DESeq2-based analysis of condition:time interaction
    dependent differential expression
    '''

    E.info("Differential expression testing for %s" % header)
    cols = data_frame.columns
    counts = com.convert_to_r_dataframe(data_frame)
    des_times = ro.IntVector([x.split(".")[1] for x in cols])
    des_reps = ro.StrVector([x.split(".")[2] for x in cols])
    des_cond = ro.StrVector([x.split(".")[0] for x in cols])
    genes = ro.StrVector([x for x in data_frame.index])

    # setup counts table and design frame

    R('''suppressPackageStartupMessages(library("DESeq2"))''')
    R('''sink(file="/dev/null")''')
    R('''times <- as.factor(%s)''' % des_times.r_repr())
    R('''reps <- c(%s)''' % des_reps.r_repr())
    R('''condition <- c(%s)''' % des_cond.r_repr())
    R('''design <- data.frame(times, reps, condition)''')
    R('''counts <- data.frame(%s)''' % counts.r_repr())
    R('''genes <- c(%s)''' % genes.r_repr())
    R('''rownames(counts) <- genes''')
    R('''rownames(design) <- colnames(counts)''')

    # use DESeq() with LRT and reduced formula.  Use effect
    # size moderation

    R('''dds <- DESeqDataSetFromMatrix(countData=counts, '''
      '''colData=design, '''
      '''design=~reps + times + condition + times:condition)''')
    R('''dds <- DESeq(dds, test="LRT", '''
      '''reduced=~reps + times + condition, betaPrior=T)''')
    R('''res <- results(dds)[order(results(dds)$padj, na.last=T), ]''')
    R('''res.df <- data.frame(res)''')

    # generate dispersion and MA plots
    R('''png("%s/%s-dispersions.png")''' % (res_dir,
                                            header))
    R('''plotDispEsts(dds)''')
    R('''dev.off()''')

    R('''png("%s/%s-MAplot.png")''' % (res_dir,
                                       header))
    R('''plotMA(res, alpha=%0.3f, ylim=c(-5,5))''' % alpha)
    R('''dev.off()''')
    R('''sink(file=NULL)''')

    df = com.load_data('res.df')

    return df

Python rpy2.robjects.StrVector() Examples