Python pysam.Tabixfile() Examples

The following are 5 code examples of pysam.Tabixfile(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pysam , or try the search function .
Example #1
Source File: annotations.py    From svviz with MIT License 5 votes vote down vote up
def tabix(self):
        if self._tabix is None:
            self._tabix = pysam.Tabixfile(self.tabixPath)
        return self._tabix 
Example #2
Source File: tabix.py    From svviz with MIT License 5 votes vote down vote up
def ensureIndexed(bedPath, preset="bed", trySorting=True):
    if not bedPath.endswith(".gz"):
        if not os.path.exists(bedPath+".gz"):
            logging.info("bgzf compressing {}".format(bedPath))
            pysam.tabix_compress(bedPath, bedPath+".gz")
            if not os.path.exists(bedPath+".gz"):
                raise Exception("Failed to create compress {preset} file for {file}; make sure the {preset} file is "
                    "sorted and the directory is writeable".format(preset=preset, file=bedPath))
        bedPath += ".gz"
    if not os.path.exists(bedPath+".tbi"):
        logging.info("creating tabix index for {}".format(bedPath))
        pysam.tabix_index(bedPath, preset=preset)
        if not os.path.exists(bedPath+".tbi"):
            raise Exception("Failed to create tabix index file for {file}; make sure the {preset} file is "
                "sorted and the directory is writeable".format(preset=preset, file=bedPath))

    line = next(pysam.Tabixfile(bedPath).fetch())
    if len(line.strip().split("\t")) < 6 and preset == "bed":
        raise AnnotationError("BED files need to have at least 6 (tab-delimited) fields (including "
            "chrom, start, end, name, score, strand; score is unused)")
    if len(line.strip().split("\t")) < 9 and preset == "gff":
        raise AnnotationError("GFF/GTF files need to have at least 9 tab-delimited fields")

    return bedPath


# def sortFile(uncompressedPath, preset):
#     if preset == "bed":
#         fields = {"chrom":0, "start":1, "end":2}
#     elif preset == "gff":
#         fields = {"chrom":0, "start":3, "end":4}

#     sortCommand = "sort -k{chrom}V -k{start}n -k{end}n".format(**fields)

#     tabixCommand = "{sort} {path} | bgzip > {path}.gz".format(sort=sortCommand, path=uncompressedPath)

#     logging.info("Trying to sort input annotation file with command:")
#     logging.info("  {}".format(tabixCommand))

#     subprocess.check_call(tabixCommand, shell=True) 
Example #3
Source File: bed.py    From vgraph with Apache License 2.0 5 votes vote down vote up
def tabix(self):
        """Return a tabix index for this BedFile."""
        if self._tabix:
            return self._tabix

        import pysam
        self._tabix = pysam.Tabixfile(self.filename)

        return self._tabix 
Example #4
Source File: bedgraph.py    From NucleoATAC with MIT License 5 votes vote down vote up
def __init__(self,bedgraph):
        self.tbx = pysam.Tabixfile(bedgraph) 
Example #5
Source File: vcf_utils.py    From metasv with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def merge_vcfs(in_vcfs_dir, contigs, out_vcf):
    logger.info("Mergings per-chromosome VCFs from %s" % in_vcfs_dir)
    header_done = False
    out_vcf_file = open(out_vcf, "w")
    for contig in contigs:
        chr_vcf = os.path.join(in_vcfs_dir, "%s.vcf.gz" % contig.name)
        if os.path.isfile(chr_vcf):
            chr_tabix_file = pysam.Tabixfile(chr_vcf)
            if not header_done:
                print_header(chr_tabix_file.header, out_vcf_file)
            for entry in chr_tabix_file.fetch():
                out_vcf_file.write("%s\n" % entry)
            chr_tabix_file.close()
    out_vcf_file.close()
    pysam.tabix_index(out_vcf, force=True, preset="vcf")