Python pysam.FastaFile() Examples
The following are 30
code examples of pysam.FastaFile().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
pysam
, or try the search function
.
Example #1
Source File: bindetect_functions.py From TOBIAS with MIT License | 6 votes |
def get_gc_content(regions, fasta): """ Get GC content from regions in fasta """ nuc_count = {"T":0, "t":0, "A":0, "a":0, "G":1, "g":1, "C":1, "c":1} gc = 0 total = 0 fasta_obj = pysam.FastaFile(fasta) for region in regions: seq = fasta_obj.fetch(region.chrom, region.start, region.end) gc += sum([nuc_count.get(nuc, 0.5) for nuc in seq]) total += region.end - region.start fasta_obj.close() gc_content = gc / float(total) return(gc_content) #---------------------------------------------------------------------------------------------------------# #------------------------------------------- Main functions ----------------------------------------------# #---------------------------------------------------------------------------------------------------------#
Example #2
Source File: fasta.py From igv-reports with MIT License | 6 votes |
def get_data(fasta_file,region=None): if None == region: with open(fasta_file,"r") as f: return(f.read()) else : if isinstance(region,str): region = regions.parse_region(region) chr = region["chr"] start = region["start"] - 1 end = region["end"] fasta = pysam.FastaFile(fasta_file) slice_seq = fasta.fetch(chr, start, end) fasta.close() return slice_seq
Example #3
Source File: util.py From pomoxis with Mozilla Public License 2.0 | 6 votes |
def reverse_bed(): """Convert bed-file coordinates to coordinates on the reverse strand.""" parser = argparse.ArgumentParser( prog='reverse_bed', description='Convert bed-file coordinates to coordinates on the reverse strand.', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('bed_in', help='Input bed file.') parser.add_argument('ref_fasta', help='Input reference fasta file.') parser.add_argument('bed_out', help='Output bed file.') args = parser.parse_args() fasta = pysam.FastaFile(args.ref_fasta) lengths = dict(zip(fasta.references, fasta.lengths)) d = pd.read_csv(args.bed_in, sep='\t', names=['chrom', 'start', 'stop']) d['chrom_length'] = d['chrom'].map(lambda x: lengths[x]) d['rc_stop'] = d['chrom_length'] - d['start'] d['rc_start'] = d['chrom_length'] - d['stop'] d['chrom_rc'] = d['chrom'] + '_rc' d[['chrom_rc', 'rc_start', 'rc_stop']].to_csv(args.bed_out, index=False, header=False, sep='\t')
Example #4
Source File: backend.py From genomelake with BSD 3-Clause "New" or "Revised" License | 6 votes |
def extract_fasta_to_file(fasta, output_dir, mode="bcolz", overwrite=False): assert mode in _array_writer makedirs(output_dir, exist_ok=overwrite) fasta_file = FastaFile(fasta) file_shapes = {} for chrom, size in zip(fasta_file.references, fasta_file.lengths): data = np.zeros((size, NUM_SEQ_CHARS), dtype=np.float32) seq = fasta_file.fetch(chrom) one_hot_encode_sequence(seq, data) file_shapes[chrom] = data.shape _array_writer[mode](data, os.path.join(output_dir, chrom)) with open(os.path.join(output_dir, "metadata.json"), "w") as fp: json.dump( { "file_shapes": file_shapes, "type": "array_{}".format(mode), "source": fasta, }, fp, )
Example #5
Source File: orf_test.py From mikado with GNU Lesser General Public License v3.0 | 6 votes |
def test_neg_in_serialise(self): line = "tr_c114_g1_i1.mrna1.89\tProdigal_v2.6.3\tCDS\t2\t205\t28.7\t-\t0\t\ ID=85_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.407;\ conf=99.86;score=28.71;cscore=27.10;sscore=1.61;rscore=0.00;uscore=0.00;tscore=1.61;" line = GFF.GffLine(line) self.assertFalse(line.header) self.assertIsNotNone(line.id) logger = create_default_logger("test_neg_in_serialise", "DEBUG") fasta = pkg_resources.resource_filename("Mikado.tests", "mikado_prepared.fasta") fai = pysam.FastaFile(fasta) bed = bed12.BED12(line, logger=logger, max_regression=0.1, start_adjustment=True, fasta_index=fai, transcriptomic=True) self.assertFalse(bed.invalid, bed.invalid_reason)
Example #6
Source File: test_system_calls.py From mikado with GNU Lesser General Public License v3.0 | 5 votes |
def setUpClass(cls): cls.fai = pysam.FastaFile(pkg_resources.resource_filename("Mikado.tests", "chr5.fas.gz"))
Example #7
Source File: fasta_utils.py From brie with Apache License 2.0 | 5 votes |
def __init__(self, fasta_file): self.f = pysam.FastaFile(fasta_file)
Example #8
Source File: fasta_utils.py From models with MIT License | 5 votes |
def __init__(self, fasta_file): self.f = pysam.FastaFile(fasta_file)
Example #9
Source File: SlamSeqFile.py From slamdunk with GNU Affero General Public License v3.0 | 5 votes |
def __init__(self, bamFile, referenceFile, snps): self._bamFile = pysam.AlignmentFile(bamFile, "rb") # Get version from BAM file self.bamVersion = "None" if('PG' in self._bamFile.header): for pg in self._bamFile.header['PG']: if pg['ID'] == "slamdunk": self.bamVersion = pg['VN'] self._referenceFile = pysam.FastaFile(referenceFile) self._snps = snps
Example #10
Source File: checking.py From mikado with GNU Lesser General Public License v3.0 | 5 votes |
def __setstate__(self, state): self.__dict__.update(state) create_queue_logger(self) self.fasta = pysam.FastaFile(self.__fasta)
Example #11
Source File: bed12.py From mikado with GNU Lesser General Public License v3.0 | 5 votes |
def __set_fasta_index(fasta_index): if isinstance(fasta_index, dict): # check that this is a bona fide dictionary ... assert isinstance( fasta_index[numpy.random.choice(fasta_index.keys(), 1)], Bio.SeqRecord.SeqRecord) elif fasta_index is not None: if isinstance(fasta_index, (str, bytes)): if isinstance(fasta_index, bytes): fasta_index = fasta_index.decode() assert os.path.exists(fasta_index) fasta_index = pysam.FastaFile(fasta_index) else: assert isinstance(fasta_index, pysam.FastaFile), type(fasta_index) return fasta_index
Example #12
Source File: test_modifications.py From mikado with GNU Lesser General Public License v3.0 | 5 votes |
def setUpClass(cls): cls.fai = pysam.FastaFile(pkg_resources.resource_filename("Mikado.tests", "chr5.fas.gz"))
Example #13
Source File: test_modifications.py From mikado with GNU Lesser General Public License v3.0 | 5 votes |
def test_basic_padding(self): logger = create_null_logger("test_basic_padding") logger.setLevel("INFO") template = self.reference.copy() template.id = "AT5G66600.3_exp" template.strip_cds() template.unfinalize() template.remove_exon((26575000, 26575410)) # First exon template.start = 26574650 template.add_exon((26574970, 26575410)) # New exon, template at 5' template.add_exon((26574650, 26574820)) # New UTR exon template.remove_exon((26578519, 26578625)) # Last exon template.end = 26579700 template.add_exon((26578519, 26578725)) template.add_exon((26579325, 26579700)) template.finalize() fai = pysam.FastaFile(pkg_resources.resource_filename("Mikado.tests", "chr5.fas.gz")) new5 = expand_transcript(self.reference, None, template, fai, logger) self.assertIn((26574970, 26575410), new5.exons) self.assertIn((26574650, 26574820), new5.exons) self.assertEqual(template.start, new5.start) self.assertEqual(self.reference.end, new5.end) new3 = expand_transcript(self.reference, template, None, fai, logger) self.assertIn((26578519, 26578725), new3.exons) self.assertIn((26579325, 26579700), new3.exons) self.assertEqual(self.reference.start, new3.start) self.assertEqual(template.end, new5.end) new53 = expand_transcript(self.reference, template, template, fai, logger) self.assertIn((26574970, 26575410), new53.exons) self.assertIn((26574650, 26574820), new53.exons) self.assertIn((26578519, 26578725), new53.exons) self.assertIn((26579325, 26579700), new53.exons) self.assertEqual(template.start, new53.start) self.assertEqual(template.end, new53.end)
Example #14
Source File: test_system_calls.py From mikado with GNU Lesser General Public License v3.0 | 5 votes |
def setUpClass(cls): cls.fai = pysam.FastaFile(pkg_resources.resource_filename("Mikado.tests", "chr5.fas.gz"))
Example #15
Source File: tcounter.py From slamdunk with GNU Affero General Public License v3.0 | 5 votes |
def genomewideReadSeparation(referenceFile, snpsFile, bam, minBaseQual, outputBAMPrefix, conversionThreshold, log): ref = pysam.FastaFile(referenceFile) snps = SNPtools.SNPDictionary(snpsFile) snps.read() # Go through one chr after the other testFile = SlamSeqBamFile(bam, referenceFile, snps) samFile = pysam.AlignmentFile(bam, "rb") chromosomes = testFile.getChromosomes() backgroundReadFileName = outputBAMPrefix + "_backgroundReads.bam" tcReadFileName = outputBAMPrefix + "_TCReads.bam" backgroundReadFile = pysam.AlignmentFile(backgroundReadFileName, "wb", template=samFile) tcReadFile = pysam.AlignmentFile(tcReadFileName, "wb", template=samFile) tcReadDict = dict() for chromosome in chromosomes: readIterator = testFile.readsInChromosome(chromosome, minBaseQual, conversionThreshold) for read in readIterator: if (read.isTcRead) : tcReadDict[read.name] = 0 for read in samFile.fetch(): if read.query_name in tcReadDict: tcReadFile.write(read) else: backgroundReadFile.write(read) backgroundReadFile.close() tcReadFile.close() pysamIndex(backgroundReadFileName) pysamIndex(tcReadFileName)
Example #16
Source File: test_system_calls.py From mikado with GNU Lesser General Public License v3.0 | 5 votes |
def setUpClass(cls): cls.fai = pysam.FastaFile(pkg_resources.resource_filename("Mikado.tests", "chr5.fas.gz"))
Example #17
Source File: prepare_misc_test.py From mikado with GNU Lesser General Public License v3.0 | 5 votes |
def setUpClass(cls): cls.fasta = pkg_resources.resource_filename("Mikado.tests", "chr5.fas.gz") cls.fai = pysam.FastaFile(cls.fasta)
Example #18
Source File: locus_test.py From mikado with GNU Lesser General Public License v3.0 | 5 votes |
def setUpClass(cls): cls.fai = pysam.FastaFile(pkg_resources.resource_filename("Mikado.tests", "chr5.fas.gz"))
Example #19
Source File: locus_test.py From mikado with GNU Lesser General Public License v3.0 | 5 votes |
def setUpClass(cls): cls.fai = pysam.FastaFile(pkg_resources.resource_filename("Mikado.tests", "chr5.fas.gz"))
Example #20
Source File: test_transcript_checker.py From mikado with GNU Lesser General Public License v3.0 | 5 votes |
def setUpClass(cls): # Prepare the genome cls.fasta = pkg_resources.resource_filename("Mikado.tests", "chr5.fas.gz") # cls.fasta_temp = tempfile.NamedTemporaryFile(mode="wb", delete=False, suffix=".fa.gz") # cls.fasta_temp.write(pkg_resources.resource_stream("Mikado.tests", "chr5.fas.gz").read()) cls.fasta = pysam.FastaFile(cls.fasta) cls._model = Transcript() cls._model.chrom = "Chr5" cls._model.start, cls._model.end, cls._model.strand, cls._model.id, cls._model.parent = ( 26584797, 26595528, "+", "c58_g1_i3.mrna1.19", "c58_g1_i3.path1.19" ) cls._model.add_exons([(26584797, 26584879), (26585220, 26585273), (26585345, 26585889), (26585982, 26586294), (26586420, 26586524), (26586638, 26586850), (26586934, 26586996), (26587084, 26587202), (26587287, 26587345), (26587427, 26587472), (26595411, 26595528)]) cls._model.finalize() cls._exons = [cls.fasta.fetch(cls._model.chrom, line[0] - 1, line[1]) for line in sorted(cls._model.exons)] # We need the whole genomic fragment cls._model_fasta = cls.fasta.fetch(cls._model.chrom, cls._model.start - 1, cls._model.end)
Example #21
Source File: blast_serialiser.py From mikado with GNU Lesser General Public License v3.0 | 5 votes |
def __determine_sequences(self, query_seqs, target_seqs): """Private method to assign the sequence file variables if necessary. :param query_seqs: :param target_seqs: :return: """ if isinstance(query_seqs, str): assert os.path.exists(query_seqs) self.query_seqs = pysam.FastaFile(query_seqs) elif query_seqs is None: self.query_seqs = None else: self.logger.warn("Query type: %s", type(query_seqs)) # assert "SeqIO.index" in repr(query_seqs) self.query_seqs = query_seqs self.target_seqs = [] for target in target_seqs: if not os.path.exists(target): raise ValueError("{} not found!".format(target)) self.target_seqs.append(pysam.FastaFile(target)) return
Example #22
Source File: orf.py From mikado with GNU Lesser General Public License v3.0 | 5 votes |
def line_parser_func(handle, fai, send_queue): fai = pysam.FastaFile(fai) for num, line in enumerate(open(handle)): if line[0] == "#": send_queue.put((num, line, None)) else: _f = line.split("\t") if _f[0] not in fai: seq = None else: seq = zlib.compress(fai[line.split("\t")[0]].encode(), 1) send_queue.put_nowait((num, line, seq)) send_queue.put("EXIT")
Example #23
Source File: fabgz.py From biocommons.seqrepo with Apache License 2.0 | 5 votes |
def close(self): if self._fh: self._fh.close() self._fh = None subprocess.check_call([self._bgzip_exe, "--force", self._basepath]) os.rename(self._basepath + ".gz", self.filename) # open file with FastaFile to create indexes, then make all read-only _fh = FastaFile(self.filename) _fh.close() os.chmod(self.filename, stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH) os.chmod(self.filename + ".fai", stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH) os.chmod(self.filename + ".gzi", stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH) _logger.info("{} written; added {} sequences".format(self.filename, len(self._added)))
Example #24
Source File: fasta_utils.py From models with MIT License | 5 votes |
def __init__(self, fasta_file): self.f = pysam.FastaFile(fasta_file)
Example #25
Source File: dataloader.py From models with MIT License | 5 votes |
def __init__(self, fasta_file): self.f = pysam.FastaFile(fasta_file)
Example #26
Source File: fasta_utils.py From models with MIT License | 5 votes |
def __init__(self, fasta_file): self.f = pysam.FastaFile(fasta_file)
Example #27
Source File: genomesource.py From genomeview with MIT License | 5 votes |
def fasta(self): if self._fasta is None: # import pyfaidx # self._fasta = pyfaidx.Fasta(self.path, as_raw=True) import pysam self._fasta = pysam.FastaFile(self.path) return self._fasta
Example #28
Source File: test_references.py From ga4gh-server with Apache License 2.0 | 5 votes |
def __init__(self, referenceSetId, fastaFile): super(ReferenceSetTest, self).__init__(referenceSetId, fastaFile) self._fastaFile = pysam.FastaFile(fastaFile)
Example #29
Source File: references.py From ga4gh-server with Apache License 2.0 | 5 votes |
def openFile(self, dataFile): return pysam.FastaFile(dataFile)
Example #30
Source File: extractors.py From genomelake with BSD 3-Clause "New" or "Revised" License | 5 votes |
def __init__(self, datafile, use_strand=False, **kwargs): """Fasta file extractor NOTE: The extractor is not thread-save. If you with to use it with multiprocessing, create a new extractor object in each process. Args: datafile (str): path to the bigwig file use_strand (bool): if True, the extracted sequence is reverse complemented in case interval.strand == "-" """ super(FastaExtractor, self).__init__(datafile, **kwargs) self.use_strand = use_strand self.fasta = FastaFile(self._datafile)