Python pysam.TabixFile() Examples
The following are 14
code examples of pysam.TabixFile().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
pysam
, or try the search function
.
Example #1
Source File: BedGraphTrack.py From pyGenomeTracks with GNU General Public License v3.0 | 6 votes |
def __init__(self, properties_dict): super(BedGraphTrack, self).__init__(properties_dict) self.load_file() self.tbx2 = None self.interval_tree2 = None if 'second_file' in self.properties['operation']: if self.properties['second_file'] is None: raise InputError("operation: {} requires to set the parameter" " second_file." "".format(self.properties['operation'])) else: if self.properties['second_file'].endswith(".bgz"): # from the tabix file is not possible to know the # global min and max try: self.tbx2 = pysam.TabixFile(self.properties['second_file']) except IOError: self.interval_tree2, __, __ = file_to_intervaltree(self.properties['second_file']) # load the file as an interval tree else: self.interval_tree2, __, __ = file_to_intervaltree(self.properties['second_file'])
Example #2
Source File: NFRCalling.py From NucleoATAC with MIT License | 6 votes |
def findNFRs(self): """find NFR regions""" region = np.ones(self.length()) tbx = pysam.TabixFile(self.params.calls) nucs = [] if self.chrom in tbx.contigs: for row in tbx.fetch(self.chrom, self.start, self.end, parser = pysam.asTuple()): nucs.append(int(row[1])) for j in xrange(1,len(nucs)): left = nucs[j-1] + 73 right = nucs[j] - 72 if right <= left: continue candidate = NFR(left, right, self) if candidate.min_upper < self.params.max_occ_upper and candidate.occ < self.params.max_occ: self.nfrs.append(candidate)
Example #3
Source File: samtools_variants.py From ariba with GNU General Public License v3.0 | 6 votes |
def _get_read_depths(cls, read_depths_file, sequence_name, position): '''Returns total read depth and depth of reads supporting alternative (if present)''' assert os.path.exists(read_depths_file) assert os.path.exists(read_depths_file + '.tbi') tbx = pysam.TabixFile(read_depths_file) try: rows = [x for x in tbx.fetch(sequence_name, position, position + 1)] except: return None if len(rows) > 1: # which happens with indels, mutiple lines for same base of reference test_rows = [x for x in rows if x.rstrip().split()[3] != '.'] if len(test_rows) != 1: rows = [rows[-1]] else: rows = test_rows if len(rows) == 1: r, p, ref_base, alt_base, ref_counts, alt_counts = rows[0].rstrip().split() bases = ref_base if alt_base == '.' else ref_base + ',' + alt_base return bases, int(ref_counts), alt_counts else: return None
Example #4
Source File: bedtrack.py From genomeview with MIT License | 5 votes |
def fetch_from_tabix(path, chrom, start, end): import pysam bed = pysam.TabixFile(path) chrom = match_chrom_format(chrom, bed.contigs) for locus in bed.fetch(chrom, start, end): locus = locus.split() yield tx_from_bedfields(locus)
Example #5
Source File: feature.py From igv-reports with MIT License | 5 votes |
def __init__(self, path): tabix = None if path.endswith(".gz"): # Might be a tabix file try: tabix = pysam.TabixFile(path) except: tabix = None if tabix: self.reader = _Tabix(tabix) else: self.reader = _NonIndexed(path)
Example #6
Source File: BedGraphTrack.py From pyGenomeTracks with GNU General Public License v3.0 | 5 votes |
def load_file(self): self.tbx = None # try to load a tabix file is available if self.properties['file'].endswith(".bgz"): # from the tabix file is not possible to know the # global min and max try: self.tbx = pysam.TabixFile(self.properties['file']) except IOError: self.interval_tree, __, __ = file_to_intervaltree(self.properties['file']) # load the file as an interval tree else: self.interval_tree, __, __ = file_to_intervaltree(self.properties['file']) self.num_fields = None
Example #7
Source File: test_clinvar_match.py From CharGer with GNU General Public License v3.0 | 5 votes |
def clinvar_tabix(test_root): return TabixFile( str( test_root.joinpath("examples/annotations/clinvar_chrom_22_only.b37.tsv.gz") ), encoding="utf8", )
Example #8
Source File: call_readclouds.py From grocsvs with MIT License | 5 votes |
def load_fragments(options, sample, dataset, chrom=None, start=None, end=None, usecols=None, min_reads_per_frag=1): if start is not None: if start < 0: raise Exception("start coord is negative: {}:{}-{}".format(chrom, start, end)) if end is not None: if start >= end: raise Exception("end coord is before start: {}:{}-{}".format(chrom, start, end)) readclouds_path = os.path.join( options.results_dir, "CombineReadcloudsStep", "readclouds.{}.{}.tsv.gz".format(sample.name, dataset.id)) tabix = pysam.TabixFile(readclouds_path) if chrom is not None and chrom not in tabix.contigs: #print("MISSING:", chrom) return pandas.DataFrame(columns="chrom start_pos end_pos bc num_reads obs_len hap".split()) if usecols is not None and "num_reads" not in usecols: usecols.append("num_reads") s = StringIO.StringIO("\n".join(tabix.fetch(chrom, start, end))) readclouds = pandas.read_table(s, header=None, names=Readcloud._fields, usecols=usecols) readclouds["chrom"] = readclouds["chrom"].astype("string") if min_reads_per_frag > 0: readclouds = readclouds.loc[readclouds["num_reads"]>min_reads_per_frag] return readclouds
Example #9
Source File: file_utils.py From pheweb with GNU Affero General Public License v3.0 | 5 votes |
def IndexedVariantFileReader(phenocode): filepath = common_filepaths['pheno_gz'](phenocode) with read_gzip(filepath) as f: reader = csv.reader(f, dialect='pheweb-internal-dialect') fields = next(reader) if fields[0].startswith('#'): # previous version of PheWeb commented the header line fields[0] = fields[0][1:] for field in fields: assert field in conf.parse.per_variant_fields or field in conf.parse.per_assoc_fields, field colidxs = {field: idx for idx, field in enumerate(fields)} with pysam.TabixFile(filepath, parser=None) as tabix_file: yield _ivfr(tabix_file, colidxs)
Example #10
Source File: file_utils.py From pheweb with GNU Affero General Public License v3.0 | 5 votes |
def context(self): with pysam.TabixFile(self._filepath, parser=None) as tabix_file: yield _mr(tabix_file, self._colidxs, self._colidxs_for_pheno, self._info_for_pheno)
Example #11
Source File: samplot.py From samplot with MIT License | 5 votes |
def get_tabix_iter(chrm, start, end, datafile): """Gets an iterator from a tabix BED/GFF/GFF3 file Used to avoid chrX vs. X notation issues when extracting data from annotation files """ tbx = pysam.TabixFile(datafile) itr = None try: itr = tbx.fetch(chrm, max(0, start - 1000), end + 1000) except ValueError: # try and account for chr/no chr prefix if chrm[:3] == "chr": chrm = chrm[3:] else: chrm = "chr" + chrm try: itr = tbx.fetch(chrm, max(0, start - 1000), end + 1000) except ValueError as e: print( "Warning: Could not fetch " + chrm + ":" + str(start) + "-" + str(end) + " from " + datafile, file=sys.stderr, ) print(e) return itr # }}} ##Coverage methods # {{{def add_coverage(bam_file, read, coverage, separate_mqual):
Example #12
Source File: datasets.py From grocsvs with MIT License | 4 votes |
def validate(self): assert os.path.exists(self.bam), "missing bam file '{}' for sample '{}' and dataset '{}'".format( self.bam, self.sample.name, self.id) # @staticmethod # def from_longranger_dir(self, longranger_dir): # fragments = os.path.join(longranger_dir, # "PHASER_SVCALLER_CS/PHASER_SVCALLER/_REPORTER/" # "REPORT_SINGLE_PARTITION/fork0/files/fragments.h5") # bam = os.path.join(longranger_dir, # "PHASER_SVCALLER_CS/PHASER_SVCALLER/ATTACH_PHASING/" # "fork0/files/phased_possorted_bam.bam") # phased_fragments = os.path.join(longranger_dir, # "10XSARCOMAC1/PHASER_SVCALLER_CS/PHASER_SVCALLER/" # "_SNPINDEL_PHASER/PHASE_SNPINDELS/fork0/files/" # "fragment_phasing.tsv.gz") # self.validate() # return TenXDataset(bam, fragments, phased_fragments) # def load_phased_fragments(self, chrom=None, start=None, end=None): # columns = ["chrom", "start_pos", "end_pos", "phase_set", "ps_start", # "ps_end", "bc", "h0", "h1", "hmix", "unkn"] # try: # tabix = pysam.TabixFile(self.phased_fragments) # s = StringIO.StringIO("\n".join(tabix.fetch(chrom, start, end))) # frags = pandas.read_table(s) # frags.columns = columns # except (IOError, ValueError): # frags = pandas.DataFrame(columns=columns) # return frags # def load_fragments(self, chrom=None, start=None, end=None): # tabix = pysam.TabixFile() # try: # fragments = utilities.read_data_frame(self.fragments) # goodbcs = utilities.get_good_barcodes(fragments) # fragments = fragments.loc[fragments["bc"].isin(goodbcs)] # # fragments = fragments.loc[fragments["num_reads"]>5] # if chrom is not None: # fragments = fragments.loc[fragments["chrom"]==chrom] # return fragments # except: # logging.exception("Unable to load fragments from fragments file " # "'{}'".format(self.fragments)) # raise
Example #13
Source File: _ingest.py From cooler with BSD 3-Clause "New" or "Revised" License | 4 votes |
def __init__( self, filepath, chromsizes, bins, map=map, n_chunks=1, is_one_based=False, **kwargs ): try: import pysam except ImportError: raise ImportError("pysam is required to read tabix files") import dill import pickle dill.settings["protocol"] = pickle.HIGHEST_PROTOCOL self._map = map self.n_chunks = n_chunks self.is_one_based = bool(is_one_based) self.C2 = kwargs.pop("C2", 3) self.P2 = kwargs.pop("P2", 4) # all requested contigs will be placed in the output matrix self.gs = GenomeSegmentation(chromsizes, bins) # find available contigs in the contact list self.filepath = filepath self.n_records = None with pysam.TabixFile(filepath, "r", encoding="ascii") as f: try: self.file_contigs = [c.decode("ascii") for c in f.contigs] except AttributeError: self.file_contigs = f.contigs if not len(self.file_contigs): raise RuntimeError("No reference sequences found.") # warn about requested contigs not seen in the contact list for chrom in self.gs.contigs: if chrom not in self.file_contigs: warnings.warn( "Did not find contig " + " '{}' in contact list file.".format(chrom) ) warnings.warn( "NOTE: When using the Tabix aggregator, make sure the order of " "chromosomes in the provided chromsizes agrees with the chromosome " "ordering of read ends in the contact list file." )
Example #14
Source File: read_store.py From ariba with GNU General Public License v3.0 | 4 votes |
def get_reads(self, cluster_name, out1, out2=None, fasta=False, log_fh=None, wanted_ids=None): total_reads = 0 total_bases = 0 if log_fh is not None: print('Getting reads for', cluster_name, 'from', self.outfile, file=log_fh) tabix_file = pysam.TabixFile(self.outfile) f_out1 = pyfastaq.utils.open_file_write(out1) if out2 is None: f_out2 = f_out1 else: f_out2 = pyfastaq.utils.open_file_write(out2) for line in tabix_file.fetch(reference=cluster_name): cluster, number, seq, qual = line.rstrip().split() number = int(number) if wanted_ids is not None: new_number = number if number % 2 else number - 1 if new_number not in wanted_ids: continue if number % 2 == 0: if fasta: print('>' + str(number - 1) + '/2', seq, sep='\n', file=f_out2) else: print('@' + str(number - 1) + '/2', seq, '+', qual, sep='\n', file=f_out2) else: if fasta: print('>' + str(number) + '/1', seq, sep='\n', file=f_out1) else: print('@' + str(number) + '/1', seq, '+', qual, sep='\n', file=f_out1) total_reads += 1 total_bases += len(qual) pyfastaq.utils.close(f_out1) if out2 is not None: pyfastaq.utils.close(f_out2) if log_fh is not None: print('Finished getting reads for', cluster_name, 'from', self.outfile, file=log_fh) return total_reads, total_bases