Python Examples of loompy.connect

Source File: utils.py From pySCENIC with GNU General Public License v3.0

6 votes

def load_exp_matrix_as_loom(fname,
                            return_sparse=False,
                            attribute_name_cell_id: str = ATTRIBUTE_NAME_CELL_IDENTIFIER,
                            attribute_name_gene: str = ATTRIBUTE_NAME_GENE) -> pd.DataFrame:
    """
    Load expression matrix from loom file.

    :param fname: The name of the loom file to load.
    :return: A 2-dimensional dataframe (rows = cells x columns = genes).
    """
    if return_sparse:
        with lp.connect(fname,mode='r',validate=False) as ds:
            ex_mtx = ds.layers[''].sparse().T.tocsc()
            genes = pd.Series(ds.ra[attribute_name_gene])
            cells = ds.ca[attribute_name_cell_id]
        return ex_mtx, genes, cells

    else:
        with lp.connect(fname,mode='r',validate=False) as ds:
            # The orientation of the loom file is always:
            #   - Columns represent cells or aggregates of cells
            # 	- Rows represent genes
            return pd.DataFrame(data=ds[:, :],
                                index=ds.ra[attribute_name_gene],
                                columns=ds.ca[attribute_name_cell_id]).T

Source File: analysis.py From velocyto.py with BSD 2-Clause "Simplified" License

6 votes

def __init__(self, loom_filepath: str) -> None:
        self.loom_filepath = loom_filepath
        ds = loompy.connect(self.loom_filepath)
        self.S = ds.layer["spliced"][:, :]
        self.U = ds.layer["unspliced"][:, :]
        self.A = ds.layer["ambiguous"][:, :]
        self.ca = dict(ds.col_attrs.items())
        self.ra = dict(ds.row_attrs.items())
        ds.close()

        self.initial_cell_size = self.S.sum(0)
        self.initial_Ucell_size = self.U.sum(0)

        try:
            if np.mean(self.ca["_Valid"]) < 1:
                logging.warn(f"fraction of _Valid cells is {np.mean(self.ca['_Valid'])} but all will be taken in consideration")
        except KeyError:
            pass
            # logging.debug("The file did not specify the _Valid column attribute")

Source File: loom_file_handler.py From SCope with GNU General Public License v3.0

5 votes

def load_loom_file(self, partial_md5_hash: str, file_path: str, abs_file_path: str, mode: str = "r"):
        try:
            loom_connection = lp.connect(abs_file_path, mode=mode, validate=False)
        except KeyError as e:
            logger.error(e)
            os.remove(file_path)
            logger.warning(f"Deleting malformed loom {file_path}")
            return None
        return self.add_loom(
            partial_md5_hash=partial_md5_hash,
            file_path=file_path,
            abs_file_path=abs_file_path,
            loom_connection=loom_connection,
        )

Source File: analysis.py From velocyto.py with BSD 2-Clause "Simplified" License

5 votes

def reload_raw(self, substitute: bool=False) -> None:
        """Reload raw data as it was before filtering steps

        Arguments
        ---------
        substitute: bool=False
            if True `S, U, A, ca, ra` will be all overwritten
            if False `S, U, A, ca, ra` will be loaded separately as `raw_S, raw_U, raw_A, raw_ca, raw_ra`
        """
        if substitute:
            ds = loompy.connect(self.loom_filepath)
            self.S = ds.layer["spliced"][:, :]
            self.U = ds.layer["unspliced"][:, :]
            self.A = ds.layer["ambiguous"][:, :]
            self.initial_cell_size = self.S.sum(0)
            self.initial_Ucell_size = self.U.sum(0)
            self.ca = dict(ds.col_attrs.items())
            self.ra = dict(ds.row_attrs.items())
            ds.close()
        else:
            ds = loompy.connect(self.loom_filepath)
            self.raw_S = ds.layer["spliced"][:, :]
            self.raw_U = ds.layer["unspliced"][:, :]
            self.raw_A = ds.layer["ambiguous"][:, :]
            self.raw_initial_cell_size = self.raw_S.sum(0)
            self.raw_initial_Ucell_size = self.raw_U.sum(0)
            self.raw_ca = dict(ds.col_attrs.items())
            self.raw_ra = dict(ds.row_attrs.items())
            ds.close()

Source File: preprocessing.py From scHPF with BSD 2-Clause "Simplified" License

5 votes

def load_loom(filename):
    """Load data from a loom file

    Parameters
    ----------
    filename: str
        file to load

    Returns
    -------
    coo : coo_matrix
        cell x gene sparse count matrix
    genes : Dataframe
        Dataframe of gene attributes.  Attributes are ordered so
        Accession and Gene are the first columns, if those attributs are
        present
    """
    import loompy
    # load the loom file
    with loompy.connect(filename) as ds:
        loom_genes = pd.DataFrame(dict(ds.ra.items()))
        loom_coo = ds.sparse().T

    # order gene attributes so Accession and Gene are the first two columns,
    # if they are present
    first_cols = []
    for colname in ['Accession', 'Gene']:
        if colname in loom_genes.columns:
            first_cols.append(colname)
    rest_cols = loom_genes.columns.difference(first_cols).tolist()
    loom_genes = loom_genes[first_cols + rest_cols]

    return loom_coo,loom_genes

Source File: loom_validator.py From loompy with BSD 2-Clause "Simplified" License

5 votes

def validate(self, path: str, strictness: str = "speconly") -> bool:
		"""
		Validate a file for conformance to the Loom specification

		Args:
			path: 			Full path to the file to be validated
			strictness:		"speconly" or "conventions"

		Remarks:
			In "speconly" mode, conformance is assessed relative to the file format specification
			at http://linnarssonlab.org/loompy/format/. In "conventions" mode, conformance is additionally
			assessed relative to attribute name and data type conventions given at http://linnarssonlab.org/loompy/conventions/.
		"""
		valid1 = True
		with h5py.File(path, mode="r") as f:
			if self.version == None:
				self.version = get_loom_spec_version(f)
			valid1 = self.validate_spec(f)
			if not valid1:
				self.errors.append("For help, see http://linnarssonlab.org/loompy/format/")

		valid2 = True
		if strictness == "conventions":
			with loompy.connect(path, mode="r") as ds:
				valid2 = self.validate_conventions(ds)
				if not valid2:
					self.errors.append("For help, see http://linnarssonlab.org/loompy/conventions/")

		return valid1 and valid2

Source File: loompy.py From loompy with BSD 2-Clause "Simplified" License

5 votes

def __enter__(self) -> Any:
		"""
		Context manager, to support "with loompy.connect(..)" construct
		"""
		return self

Source File: loompy.py From loompy with BSD 2-Clause "Simplified" License

5 votes

def __exit__(self, type: Any, value: Any, traceback: Any) -> None:
		"""
		Context manager, to support "with loompy.connect(..)" construct
		"""
		if self.shape[0] == 0 or self.shape[1] == 0:
			raise ValueError("Newly created loom file must be filled with data before leaving the 'with' statement")
		if not self.closed:
			self.close(True)

Source File: loompy.py From loompy with BSD 2-Clause "Simplified" License

5 votes

def create_append(filename: str, layers: Union[np.ndarray, Dict[str, np.ndarray], loompy.LayerManager], row_attrs: Dict[str, np.ndarray], col_attrs: Dict[str, np.ndarray], *, file_attrs: Dict[str, str] = None, fill_values: Dict[str, np.ndarray] = None) -> None:
	"""
	**DEPRECATED** - Use `new` instead; see https://github.com/linnarsson-lab/loompy/issues/42
	"""
	deprecated("'create_append' is deprecated. See https://github.com/linnarsson-lab/loompy/issues/42")
	if os.path.exists(filename):
		with connect(filename) as ds:
			ds.add_columns(layers, col_attrs, fill_values=fill_values)
	else:
		create(filename, layers, row_attrs, col_attrs, file_attrs=file_attrs)

Source File: loompy.py From loompy with BSD 2-Clause "Simplified" License

5 votes

def new(filename: str, *, file_attrs: Optional[Dict[str, str]] = None) -> LoomConnection:
	"""
	Create an empty Loom file, and return it as a context manager.
	"""
	if filename.startswith("~/"):
		filename = os.path.expanduser(filename)
	if file_attrs is None:
		file_attrs = {}

	# Create the file (empty).
	# Yes, this might cause an exception, which we prefer to send to the caller
	f = h5py.File(name=filename, mode='w')
	f.create_group('/attrs')  # v3.0.0
	f.create_group('/layers')
	f.create_group('/row_attrs')
	f.create_group('/col_attrs')
	f.create_group('/row_graphs')
	f.create_group('/col_graphs')
	f.flush()
	f.close()

	ds = connect(filename, validate=False)
	for vals in file_attrs:
		if file_attrs[vals] is None:
			ds.attrs[vals] = "None"
		else:
			ds.attrs[vals] = file_attrs[vals]
	# store creation date
	ds.attrs['CreationDate'] = timestamp()
	ds.attrs["LOOM_SPEC_VERSION"] = loompy.loom_spec_version
	return ds

Source File: loompy.py From loompy with BSD 2-Clause "Simplified" License

5 votes

def connect(filename: str, mode: str = 'r+', *, validate: bool = True, spec_version: str = "3.0.0") -> LoomConnection:
	"""
	Establish a connection to a .loom file.

	Args:
		filename:		Path to the Loom file to open
		mode:			Read/write mode, 'r+' (read/write) or 'r' (read-only), defaults to 'r+'
		validate:		Validate the file structure against the Loom file format specification
		spec_version:	The loom file spec version to validate against (e.g. "2.0.1" or "old")
	Returns:
		A LoomConnection instance.

	Remarks:
		This function should typically be used as a context manager (i.e. inside a ``with``-block):

		.. highlight:: python
		.. code-block:: python

			import loompy
			with loompy.connect("mydata.loom") as ds:
				print(ds.ca.keys())

		This ensures that the file will be closed automatically when the context block ends

		Note: if validation is requested, an exception is raised if validation fails.
	"""
	return LoomConnection(filename, mode, validate=validate)

Source File: test_connection.py From loompy with BSD 2-Clause "Simplified" License

5 votes

def test_scan_with_default_ordering(self) -> None:
        with loompy.connect(self.file.name) as ds:
            for axis in [0, 1]:
                _, _, view = next(iter(ds.scan(axis=axis)))
                no_ordering_data = view[:, :]

                _, _, view = next(iter(ds.scan(axis=axis, key="key")))
                original_ordering_data = view[:, :]

        np.testing.assert_almost_equal(no_ordering_data, original_ordering_data,
                                       err_msg="Default ordering should same as in file")

Source File: test_main.py From loompy with BSD 2-Clause "Simplified" License

5 votes

def test_new() -> None:
	with loompy.new("test.loom") as ds:
		m = np.zeros((20, 100))
		ra = {"Gene": [x for x in "ABCDEFGHILJKLMNOPQRS"]}
		ca = {"Cell": np.arange(100)}
		ds.add_columns(m, ca, row_attrs=ra)
		ds.add_columns(m, ca, row_attrs=ra)
	with loompy.connect("test.loom") as ds:
		assert(ds.shape == (20, 200))

Source File: test_main.py From loompy with BSD 2-Clause "Simplified" License

5 votes

def test_sparse() -> None:
	G = 1000
	C = 100
	S = sparse.eye(G, C)

	loompy.create('test.loom', S, {'g_id': np.arange(G)}, {'c_id': np.arange(C)})
	with loompy.connect("test.loom") as ds:
		ds["layer"] = S
		assert(np.all(ds[:, :] == S.toarray()))
		assert(np.all(ds.sparse().data == S.tocoo().data))
		assert(np.all(ds.layers["layer"][:, :] == S.toarray()))
		assert(np.all(ds.layers["layer"].sparse().data == S.tocoo().data))

Source File: smfish.py From scVI with MIT License

4 votes

def populate(self):
        logger.info("Loading smFISH dataset")
        ds = loompy.connect(os.path.join(self.save_path, self.filenames[0]))
        gene_names = ds.ra["Gene"].astype(np.str)

        labels = ds.ca["ClusterID"].reshape(-1, 1)
        tmp_cell_types = np.asarray(ds.ca["ClusterName"])

        u_labels, u_index = np.unique(labels.ravel(), return_index=True)
        cell_types = ["" for _ in range(max(u_labels) + 1)]
        for i, index in zip(u_labels, u_index):
            cell_types[i] = tmp_cell_types[index]
        cell_types = np.asarray(cell_types, dtype=np.str)

        x_coord, y_coord = ds.ca["X"], ds.ca["Y"]
        x_coord = x_coord.reshape((-1, 1))
        y_coord = y_coord.reshape((-1, 1))
        data = ds[:, :].T
        self.populate_from_data(
            X=data,
            labels=labels,
            gene_names=gene_names,
            cell_types=cell_types,
            cell_attributes_dict={"x_coord": x_coord, "y_coord": y_coord},
            remap_attributes=False,
        )
        major_clusters = dict(
            [
                ((3, 2), "Astrocytes"),
                ((7, 26), "Endothelials"),
                ((18, 17, 14, 19, 15, 16, 20), "Inhibitory"),
                ((29, 28), "Microglias"),
                ((32, 33, 30, 22, 21), "Oligodendrocytes"),
                ((9, 8, 10, 6, 5, 4, 12, 1, 13), "Pyramidals"),
            ]
        )
        if self.use_high_level_cluster:
            self.map_cell_types(major_clusters)
            self.filter_cell_types(
                [
                    "Astrocytes",
                    "Endothelials",
                    "Inhibitory",
                    "Microglias",
                    "Oligodendrocytes",
                    "Pyramidals",
                ]
            )

        self.remap_categorical_attributes()

Source File: loompy.py From loompy with BSD 2-Clause "Simplified" License

4 votes

def combine(files: List[str], output_file: str, key: str = None, file_attrs: Dict[str, str] = None, batch_size: int = 1000, convert_attrs: bool = False) -> None:
	"""
	Combine two or more loom files and save as a new loom file

	Args:
		files (list of str):    the list of input files (full paths)
		output_file (str):      full path of the output loom file
		key (string):           Row attribute to use to verify row ordering
		file_attrs (dict):      file attributes (title, description, url, etc.)
		batch_size (int):       limits the batch or cols/rows read in memory (default: 1000)
		convert_attrs (bool):   convert file attributes that differ between files into column attributes

	Returns:
		Nothing, but creates a new loom file combining the input files.
		Note that the loom files must have exactly the same
		number of rows, and must have exactly the same column attributes.
		Named layers not present in the first file are discarded.

		.. warning::
			If you don't give a ``key`` argument, the files will be combined without changing
			the ordering of rows or columns. Row attributes will be taken from the first file.
			Hence, if rows are not in the same order in all files, the result may be meaningless.

		To guard against this issue, you are strongly advised to provide a ``key`` argument,
		which is used to sort files while merging. The ``key`` should be the name of a row
		attribute that contains a unique value for each row. For example, to order rows by
		the attribute ``Accession``:

		.. highlight:: python
		.. code-block:: python

			import loompy
			loompy.combine(files, key="Accession")

	"""
	if file_attrs is None:
		file_attrs = {}

	if len(files) == 0:
		raise ValueError("The input file list was empty")

	copyfile(files[0], output_file)

	ds = connect(output_file)
	for a in file_attrs:
		ds.attrs[a] = file_attrs[a]

	if len(files) >= 2:
		for f in files[1:]:
			ds.add_loom(f, key, batch_size=batch_size, convert_attrs=convert_attrs)
	ds.close()

Python loompy.connect() Examples