Python write fasta
39 Python code examples are found related to "
write fasta".
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: seq_io.py From GTDBTk with GNU General Public License v3.0 | 7 votes |
def write_fasta(seqs, fasta_file, wrap=80): """Write sequences to a fasta file. Parameters ---------- seqs : dict[seq_id] -> seq Sequences indexed by sequence id. fasta_file : str Path to write the sequences to. wrap: int Number of AA/NT before the line is wrapped. """ with open(fasta_file, 'w') as f: for gid, gseq in seqs.items(): f.write('>{}\n'.format(gid)) for i in range(0, len(gseq), wrap): f.write('{}\n'.format(gseq[i:i + wrap]))
Example 2
Source File: dna2proteins.py From dna2proteins with MIT License | 7 votes |
def write_fasta(dictionary, filename): """ Takes a dictionary and writes it to a fasta file Must specify the filename when caling the function """ import textwrap with open(filename, "w") as outfile: for key, value in dictionary.items(): outfile.write(key + "\n") outfile.write("\n".join(textwrap.wrap(value, 60))) outfile.write("\n") print "Success! File written" ## Swaps DNA sequencs for proteins
Example 3
Source File: seq_io.py From SqueezeMeta with GNU General Public License v3.0 | 6 votes |
def write_fasta(seqs, output_file): """Write sequences to fasta file. If the output file has the extension 'gz', it will be compressed using gzip. Parameters ---------- seqs : dict[seq_id] -> seq Sequences indexed by sequence id. output_file : str Name of fasta file to produce. """ if output_file.endswith('.gz'): fout = gzip.open(output_file, 'wb') else: fout = open(output_file, 'w') for seq_id, seq in viewitems(seqs): fout.write('>' + seq_id + '\n') fout.write(seq + '\n') fout.close()
Example 4
Source File: fasta.py From ssbio with MIT License | 6 votes |
def write_fasta_file(seq_records, outname, outdir=None, outext='.faa', force_rerun=False): """Write a FASTA file for a SeqRecord or a list of SeqRecord objects. Args: seq_records (SeqRecord, list): SeqRecord or a list of SeqRecord objects outname: Name of the output file which will have outext appended to it outdir: Path to directory to output sequences to outext: Extension of FASTA file, default ".faa" force_rerun: If file should be overwritten if it exists Returns: str: Path to output FASTA file. """ if not outdir: outdir = '' outfile = ssbio.utils.outfile_maker(inname='', outname=outname, outdir=outdir, outext=outext) if ssbio.utils.force_rerun(flag=force_rerun, outfile=outfile): SeqIO.write(seq_records, outfile, "fasta") return outfile
Example 5
Source File: seq_io.py From catch with MIT License | 6 votes |
def write_probe_fasta(probes, out_fn): """Write probe sequences to a FASTA file. This writes one probe sequence per line, with a header immediately preceding the sequence. If set, the header written is the one in probe.Probe.header. If not set, the probe.Probe.identifier() is used. Args: probes: list of instances of probe.Probe out_fn: path to FASTA file to write """ with open(out_fn, 'w') as f: for p in probes: if p.header: f.write('>' + p.header + '\n') else: f.write('>probe_%s\n' % p.identifier()) f.write(p.seq_str + '\n')
Example 6
Source File: IceIterative2.py From cDNA_Cupcake with BSD 3-Clause Clear License | 6 votes |
def write_in_fasta(self, cid, write_all=False): """ Write the ./tmp/<cid/10000 mod>/c<cid>/in.fasta for cluster cid. If write_all is True, write all subreads. Otherwise, only write a random subsample of num=self.dagcon_in_fasta_subsample reads. """ #in_filename = op.join('./tmp/', str(cid/10000), 'c'+str(cid), 'in.fasta') in_filename = op.join(self.clusterInFa(cid)) seqids = self.uc[cid] if not write_all: seqids = random.sample(seqids, min(self.dagcon_in_fa_subsample, len(seqids))) with open(in_filename, 'w') as f: for seqid in seqids: f.write(">{0}\n{1}\n".format(seqid, self.seq_dict[seqid].sequence)) return in_filename
Example 7
Source File: seq_parser.py From GetOrganelle with GNU General Public License v3.0 | 6 votes |
def write_fasta_with_list(out_dir, matrix, overwrite): if not overwrite: while os.path.exists(out_dir): out_dir = '.'.join(out_dir.split('.')[:-1]) + '_.' + out_dir.split('.')[-1] fasta_file = open(out_dir, 'w') if matrix[2]: for i in range(len(matrix[0])): fasta_file.write('>' + matrix[0][i] + '\n') j = matrix[2] while j < len(matrix[1][i]): fasta_file.write(''.join(matrix[1][i][(j - matrix[2]):j]) + '\n') j += matrix[2] fasta_file.write(''.join(matrix[1][i][(j - matrix[2]):j]) + '\n') else: for i in range(len(matrix[0])): fasta_file.write('>' + matrix[0][i] + '\n') fasta_file.write(''.join(matrix[1][i]) + '\n') fasta_file.close() # deprecated since GetOrganelle 1.6.3
Example 8
Source File: check_annotations.py From GetOrganelle with GNU General Public License v3.0 | 6 votes |
def write_fasta(out_dir, matrix, overwrite): if not overwrite: while os.path.exists(out_dir): out_dir = '.'.join(out_dir.split('.')[:-1])+'_.'+out_dir.split('.')[-1] fasta_file = open(out_dir, 'w') if matrix[2]: for i in range(len(matrix[0])): fasta_file.write('>'+matrix[0][i]+'\n') j = matrix[2] while j < len(matrix[1][i]): fasta_file.write(matrix[1][i][(j-matrix[2]):j]+'\n') j += matrix[2] fasta_file.write(matrix[1][i][(j-matrix[2]):j]+'\n') else: for i in range(len(matrix[0])): fasta_file.write('>'+matrix[0][i]+'\n') fasta_file.write(matrix[1][i]+'\n') fasta_file.close()
Example 9
Source File: stitch.py From medaka with Mozilla Public License 2.0 | 5 votes |
def write_fasta(filename, contigs): """Write a fasta file from tuples of (name, sequence). :param filename: output filename. :param contigs: tuples of the form (sequence name, base sequence). """ with open(filename, 'w') as fasta: for name, seq in contigs: fasta.write('>{}\n{}\n'.format(name, seq))
Example 10
Source File: FileIO.py From cDNA_Cupcake with BSD 3-Clause Clear License | 5 votes |
def write_preClusterSet_to_fasta(pCS, output_filename, fasta_d): """ Write to fasta: ID -- cid | selected representative seqid for this cid Seq --- sequence of the selected representative Currently, the rep is randomly chosen. """ with open(output_filename, 'w') as f: for cid in pCS.S: r = fasta_d[random.choice(pCS.S[cid].members)] f.write(">{0}\n{1}\n".format(r.id, r.seq))
Example 11
Source File: util.py From picrust2 with GNU General Public License v3.0 | 5 votes |
def write_fasta(seq, outfile): out_fasta = open(outfile, "w") # Look through sequence ids (sorted alphabetically so output file is # reproducible). for s in sorted(seq.keys()): out_fasta.write(">" + s + "\n") out_fasta.write(seq[s] + "\n") out_fasta.close()
Example 12
Source File: concoct_csv_to_fasta.py From EdwardsLab with MIT License | 5 votes |
def write_fasta_files(faf, odir, bins, maxb, verbose=False): """ Read the sequences from faf and write them into a set of files in odir. :param faf: The source fasta file :param odir: the output directory :param bins: the hash of contigs -> bin :param maxb: the maximum bin number :param verbose: more output :return: nada """ if not os.path.exists(odir): os.mkdir(odir) outputfiles = [] for i in range(maxb+1): outputfiles.append(open(os.path.join(odir, f"bin_{i}.fna"), 'w')) written_to=set() for fa, seq in stream_fasta(faf, True): faid = fa.split(" ")[0] if faid not in bins: if verbose: sys.stderr.write(f"Sequence {faid} not found in a bin\n") continue outputfiles[bins[faid]].write(">{}\n{}\n".format(fa, seq)) written_to.add(bins[faid]) for o in outputfiles: o.close() for i in range(maxb+1): if i not in written_to: os.remove(os.path.join(odir, f"bin_{i}.fna"))
Example 13
Source File: fasta.py From antismash with GNU Affero General Public License v3.0 | 5 votes |
def write_fasta(names: List[str], seqs: List[str], filename: str) -> None: """ Writes name/sequence pairs to file in FASTA format Argumnets: names: a list of sequence identifiers seqs: a list of sequences as strings filename: the filename to write the FASTA formatted data to Returns: None """ out_file = open(filename, "w") for name, seq in zip(names, seqs): out_file.write(">%s\n%s\n" % (name, seq)) out_file.close()
Example 14
Source File: run_glimmerhmm.py From antismash with GNU Affero General Public License v3.0 | 5 votes |
def write_search_fasta(record: Record) -> str: """ Constructs a FASTA representation of a record and writes it to a file in the current directory. Returns: the name of the file created """ filename = "{}.fasta".format(record.id) with open(filename, 'w') as handle: seqio.write([record.to_biopython()], handle, 'fasta') return filename
Example 15
Source File: utils.py From wgd with GNU General Public License v3.0 | 5 votes |
def write_fasta(seq_dict, output_file): """ Write a sequence dictionary to a fasta file. :param seq_dict: sequence dictionary, see :py:func:`read_fasta` :param output_file: output file name """ with open(output_file, 'w') as o: for key, val in seq_dict.items(): o.write('>' + key + '\n') o.write(val + '\n') return output_file
Example 16
Source File: assembly.py From dnaplotlib with MIT License | 5 votes |
def write_to_fasta(entries, col_length = 20) : formatted_entries = [] for seq_name, nts in entries: nts = [ nts[i:i + col_length] for i in range(0, len(nts), col_length)] nts = '\n'.join(nts) formatted_entries.append( '>%s\n%s' %(seq_name, nts) ) return '\r\n'.join(formatted_entries)
Example 17
Source File: tb.py From ariba with GNU General Public License v3.0 | 5 votes |
def write_prepareref_fasta_file(outfile, gene_coords, genes_need_upstream, genes_non_upstream, upstream_before=100, upstream_after=100): '''Writes fasta file to be used with -f option of prepareref''' tmp_dict = {} fasta_in = os.path.join(data_dir, 'NC_000962.3.fa.gz') pyfastaq.tasks.file_to_dict(fasta_in, tmp_dict) ref_seq = tmp_dict['NC_000962.3'] with open(outfile, 'w') as f: for gene in genes_non_upstream: start = gene_coords[gene]['start'] end = gene_coords[gene]['end'] if start < end: gene_fa = pyfastaq.sequences.Fasta(gene, ref_seq[start:end+1]) else: gene_fa = pyfastaq.sequences.Fasta(gene, ref_seq[end:start+1]) gene_fa.revcomp() print(gene_fa, file=f) for gene in genes_need_upstream: start = gene_coords[gene]['start'] end = gene_coords[gene]['end'] if start < end: gene_fa = pyfastaq.sequences.Fasta(gene, ref_seq[start - upstream_before:start + upstream_after]) else: gene_fa = pyfastaq.sequences.Fasta(gene, ref_seq[start - upstream_after + 1:start + upstream_before + 1]) gene_fa.revcomp() gene_fa.id += '_upstream' print(gene_fa, file=f)
Example 18
Source File: reference_data.py From ariba with GNU General Public License v3.0 | 5 votes |
def write_seqs_to_fasta(self, outfile, names): f_out = pyfastaq.utils.open_file_write(outfile) for name in sorted(names): print(self.sequence(name), file=f_out) pyfastaq.utils.close(f_out)
Example 19
Source File: SequenceSearcher.py From biskit with GNU General Public License v3.0 | 5 votes |
def writeFasta( self, frecords, fastaOut ): """ Create fasta file for given set of records. @param frecords: list of Bio.Blast.Records @type frecords: [Bio.Blast.Record] @param fastaOut: file name @type fastaOut: str """ f = open( T.absfile(fastaOut), 'w' ) for r in frecords: f.write( r.format('fasta') ) ## note better use direct SeqIO f.close()
Example 20
Source File: SequenceSearcher.py From biskit with GNU General Public License v3.0 | 5 votes |
def writeFastaClustered( self, fastaOut=None ): """ Write non-redundant set of template sequences to fasta file. @param fastaOut: write non-redundant fasta records to file (default: L{F_FASTA_NR}) @type fastaOut: str """ fastaOut = fastaOut or self.outFolder + self.F_FASTA_NR self.writeFasta( self.getClusteredRecords(), fastaOut )
Example 21
Source File: SequenceSearcher.py From biskit with GNU General Public License v3.0 | 5 votes |
def writeFastaAll( self, fastaOut=None ): """ Write all found template sequences to fasta file. @param fastaOut: write all fasta records to file (default: L{F_FASTA_ALL}) @type fastaOut: str OR None """ fastaOut = fastaOut or self.outFolder + self.F_FASTA_ALL self.writeFasta( self.frecords, fastaOut )
Example 22
Source File: download.py From fauna with GNU Affero General Public License v3.0 | 5 votes |
def write_fasta(self, viruses, fname, sep='|', fasta_fields=['strain', 'virus', 'accession'], **kwargs): try: handle = open(fname, 'w') except IOError: print('ERROR'); sys.exit(2) pass else: for virus in viruses: fields = [str(virus[field]) if (field in virus and virus[field] is not None) else '?' for field in fasta_fields] handle.write(">"+sep.join(fields)+'\n') handle.write(virus['sequence'] + "\n") handle.close()
Example 23
Source File: seqUtils.py From SqueezeMeta with GNU General Public License v3.0 | 5 votes |
def writeFasta(seqs, outputFile): '''write sequences to FASTA file''' if outputFile.endswith('.gz'): fout = gzip.open(outputFile, 'wb') else: fout = open(outputFile, 'w') for seqId, seq in seqs.items(): fout.write('>' + seqId + '\n') fout.write(seq + '\n') fout.close()
Example 24
Source File: seed.py From iva with GNU General Public License v3.0 | 5 votes |
def write_fasta(self, filename, name): f = pyfastaq.utils.open_file_write(filename) print('>' + name, file=f) print(self.seq, file=f) pyfastaq.utils.close(f)
Example 25
Source File: FileIO.py From cDNA_Cupcake with BSD 3-Clause Clear License | 5 votes |
def write_select_seqs_to_fasta(fasta_filename, seqids, output_filename, mode='w'): d = LazyFastaReader('isoseq_flnc.fasta') with open(output_filename, mode) as f: r = d[x] f.write(">{0}\n{1}\n".format(r.id, r.seq))
Example 26
Source File: FileIO.py From cDNA_Cupcake with BSD 3-Clause Clear License | 5 votes |
def write_seqids_to_fasta(seqids, output_filename, fasta_d): """ Write to fasta: ID --- the sequence id Seq -- the sequence """ with open(output_filename, 'w') as f: for seqid in seqids: r = fasta_d[seqid] f.write(">{0}\n{1}\n".format(r.id, r.seq))
Example 27
Source File: bio.py From Comparative-Annotation-Toolkit with Apache License 2.0 | 5 votes |
def write_fasta(path_or_handle, name, seq, chunk_size=100, validate=None): """Writes out fasta file. if path ends in gz, will be gzipped. """ if isinstance(path_or_handle, str): fh = opengz(path_or_handle, 'w') else: fh = path_or_handle if validate is 'DNA': valid_chars = set('ACGTUYSWKMBDHVNacgtuyswkmbdhvn.-*') elif validate is 'protein': valid_chars = set('ABCDEFGHIKLMPQSRTVWXYZUabcdefghiklmpqsrtvwxyzuNn.-*') else: valid_chars = set() try: assert any([isinstance(seq, str), isinstance(seq, str)]) except AssertionError: raise RuntimeError("Sequence is not unicode or string") if validate is not None: try: assert all(x in valid_chars for x in seq) except AssertionError: bad_chars = {x for x in seq if x not in valid_chars} raise RuntimeError("Invalid FASTA character(s) seen in fasta sequence: {}".format(bad_chars)) fh.write(">%s\n" % name) for i in range(0, len(seq), chunk_size): fh.write("%s\n" % seq[i:i+chunk_size]) if isinstance(path_or_handle, str): fh.close()
Example 28
Source File: simBench.py From V-pipe with Apache License 2.0 | 5 votes |
def write_fasta(haplotype_seqs, outdir): fasta_record = collections.namedtuple("fasta_record", "id seq") output_files = [] for idx in range(len(haplotype_seqs)): haplotype_id = ''.join(("haplotype", str(idx))) seq = fasta_record(id=haplotype_id, seq=haplotype_seqs[idx]) output_file = os.path.join(outdir, ''.join((haplotype_id, ".fasta"))) output_files.append(output_file) with open(output_file, 'w') as outfile: outfile.write(">{}\n{}\n".format(seq.id, seq.seq)) sh.cat(output_files, _out=os.path.join(outdir, "haplotypes.fasta"))
Example 29
Source File: __main__.py From vamb with MIT License | 5 votes |
def write_fasta(outdir, clusterspath, fastapath, contignames, contiglengths, minfasta, logfile): begintime = time.time() log('\nWriting FASTA files', logfile) log('Minimum FASTA size: {}'.format(minfasta), logfile, 1) lengthof = dict(zip(contignames, contiglengths)) filtered_clusters = dict() with open(clusterspath) as file: clusters = vamb.vambtools.read_clusters(file) for cluster, contigs in clusters.items(): size = sum(lengthof[contig] for contig in contigs) if size >= minfasta: filtered_clusters[cluster] = clusters[cluster] del lengthof, clusters keep = set() for contigs in filtered_clusters.values(): keep.update(set(contigs)) with vamb.vambtools.Reader(fastapath, 'rb') as file: fastadict = vamb.vambtools.loadfasta(file, keep=keep) vamb.vambtools.write_bins(os.path.join(outdir, "bins"), filtered_clusters, fastadict, maxbins=None) ncontigs = sum(map(len, filtered_clusters.values())) nfiles = len(filtered_clusters) print('', file=logfile) log('Wrote {} contigs to {} FASTA files'.format(ncontigs, nfiles), logfile, 1) elapsed = round(time.time() - begintime, 2) log('Wrote FASTA in {} seconds'.format(elapsed), logfile, 1)
Example 30
Source File: trees_msa.py From OrthoFinder with GNU General Public License v3.0 | 5 votes |
def WriteSeqsToFasta(self, seqs, outFilename): with open(outFilename, 'w') as outFile: for seq in self.SortSeqs([s.ToString() for s in seqs]): if seq in self.SeqLists: outFile.write(">%s\n" % seq) outFile.write(self.SeqLists[seq]) else: print(("ERROR: %s not found" % seq))
Example 31
Source File: trees_msa.py From OrthoFinder with GNU General Public License v3.0 | 5 votes |
def WriteFastaFiles(self, fastaWriter, ogs, idDict, qBoth): # The results ones are now written by default after orthogroups, check they're not already there if not os.path.exists(self.GetFastaFilename(0, True)): for iOg, og in enumerate(ogs): fastaWriter.WriteSeqsToFasta_withNewAccessions(og, self.GetFastaFilename(iOg, True), idDict) if qBoth: for iOg, og in enumerate(ogs): fastaWriter.WriteSeqsToFasta(og, self.GetFastaFilename(iOg))
Example 32
Source File: read_utils.py From SVE with GNU General Public License v3.0 | 5 votes |
def write_fasta_by_chrom(ss, chrom_fasta_dir, chrom_base=''): names = [] for s in ss: name = chrom_fasta_dir+'/'+chrom_base+s.name+'.fa' names += [name] with open(name, 'w') as fasta: s.write_to_fasta_file(fasta) return names
Example 33
Source File: read_utils.py From SVE with GNU General Public License v3.0 | 5 votes |
def write_fasta_mask(M,json_path): with open(json_path,'w') as f: json.dump(M,f) return True #compute an expectation given randomly distributed short reads for the RD windows (hist bins)
Example 34
Source File: read_utils.py From SVE with GNU General Public License v3.0 | 5 votes |
def write_fasta(seqs, fasta_path): with open(fasta_path, 'w') as fasta: if type(seqs) is list: for seq in seqs: seq.write_to_fasta_file(fasta) elif type(seqs) is dict: for k in sorted(seqs,key=lambda x: x.zfill(max([len(k) for k in seqs]))): seqs[k].write_to_fasta_file(fasta) return True #ss is a HTSeq Sequence list?
Example 35
Source File: fasta.py From ssbio with MIT License | 5 votes |
def write_seq_as_temp_fasta(seq): """Write a sequence as a temporary FASTA file Args: seq (str, Seq, SeqRecord): Sequence string, Biopython Seq or SeqRecord object Returns: str: Path to temporary FASTA file (located in system temporary files directory) """ sr = ssbio.protein.sequence.utils.cast_to_seq_record(seq, id='tempfasta') return write_fasta_file(seq_records=sr, outname='temp', outdir=tempfile.gettempdir(), force_rerun=True)
Example 36
Source File: fasta.py From ssbio with MIT License | 5 votes |
def write_fasta_file_from_dict(indict, outname, outdir=None, outext='.faa', force_rerun=False): """Write a FASTA file for a dictionary of IDs and their sequence strings. Args: indict: Input dictionary with keys as IDs and values as sequence strings outname: Name of the output file which will have outext appended to it outdir: Path to directory to output sequences to outext: Extension of FASTA file, default ".faa" force_rerun: If file should be overwritten if it exists Returns: str: Path to output FASTA file. """ if not outdir: outdir = '' outfile = ssbio.utils.outfile_maker(inname='', outname=outname, outdir=outdir, outext=outext) if ssbio.utils.force_rerun(flag=force_rerun, outfile=outfile): seqs = [] for i, s in indict.items(): seq = ssbio.protein.sequence.utils.cast_to_seq_record(s, id=i) seqs.append(seq) SeqIO.write(seqs, outfile, "fasta") return outfile
Example 37
Source File: toilInterface.py From Comparative-Annotation-Toolkit with Apache License 2.0 | 5 votes |
def write_fasta_to_filestore(toil, fasta_local_path): """ Convenience function that loads a fasta and its associated gdx/flat file into the fileStore. Assumes that the paths are consistent with the requirements (i.e. $path.gdx and $path.flat) :param toil: Toil context manager :param fasta_local_path: Path to local fasta to load. :return: List of fileStore IDs for fasta, fasta_gdx, fasta_flat """ fasta_file_id = FileID.forPath(toil.importFile('file:///' + fasta_local_path), fasta_local_path) gdx_file_id = FileID.forPath(toil.importFile('file:///' + fasta_local_path + '.gdx'), fasta_local_path + '.gdx') flat_file_id = FileID.forPath(toil.importFile('file:///' + fasta_local_path + '.flat'), fasta_local_path + '.flat') return fasta_file_id, gdx_file_id, flat_file_id
Example 38
Source File: model.py From pmx with GNU Lesser General Public License v3.0 | 4 votes |
def writeFASTA( self, filename, title = ""): fp = open(filename,"w") if not title: title = '_'.join(self.title.split()) if len(self.chains) == 1: print >>fp, '> %s' % title print >>fp, self.chains[0].get_sequence() else: for chain in self.chains: print >>fp, '> %s_chain_%s' % (title, chain.id ) print >>fp, chain.get_sequence() ## def writeGRO( self, filename, title = ''): ## fp = open(filename,'w') ## if self.unity == 'nm': fac = 1. ## else: fac = 0.1 ## if not title: ## title = self.title ## print >>fp, title ## print >>fp, "%5d" % len(self.atoms) ## if self.atoms[0].v[0] != 0.000 : bVel = True ## else: bVel = False ## if bVel: ## gro_format = "%8.3f%8.3f%8.3f%8.4f%8.4f%8.4f" ## else: ## gro_format = "%8.3f%8.3f%8.3f" ## for atom in self.atoms: ## resid = (atom.resnr)%100000 ## at_id = (atom.id)%100000 ## ff = "%5d%-5.5s%5.5s%5d" % (resid, atom.resname, atom.name, at_id) ## if bVel: ## ff+=gro_format % (atom.x[XX]*fac, atom.x[YY]*fac, atom.x[ZZ]*fac, ## atom.v[XX], atom.v[YY], atom.v[ZZ]) ## else: ## ff+=gro_format % (atom.x[XX]*fac, atom.x[YY]*fac, atom.x[ZZ]*fac ) ## print >>fp, ff ## if self.box[XX][YY] or self.box[XX][ZZ] or self.box[YY][XX] or \ ## self.box[YY][ZZ] or self.box[ZZ][XX] or self.box[ZZ][YY]: ## bTric = False ## ff = "%10.5f%10.5f%10.5f%10.5f%10.5f%10.5f%10.5f%10.5f%10.5f" ## else: ## bTric = True ## ff = "%10.5f%10.5f%10.5f" ## if bTric: ## print >>fp, ff % (self.box[XX][XX],self.box[YY][YY],self.box[ZZ][ZZ]) ## else: ## print >>fp, ff % (self.box[XX][XX],self.box[YY][YY],self.box[ZZ][ZZ], ## self.box[XX][YY],self.box[XX][ZZ],self.box[YY][XX], ## self.box[YY][ZZ],self.box[ZZ][XX],self.box[ZZ][YY]) ## fp.close()
Example 39
Source File: tree.py From augur with GNU Affero General Public License v3.0 | 4 votes |
def write_out_informative_fasta(compress_seq, alignment, stripFile=None): from Bio import SeqIO from Bio.SeqRecord import SeqRecord from Bio.Seq import Seq sequences = compress_seq['sequences'] ref = compress_seq['reference'] positions = compress_seq['positions'] #If want to exclude sites from initial treebuild, read in here strip_pos = load_mask_sites(stripFile) if stripFile else [] #Get sequence names seqNames = list(sequences.keys()) #Check non-ref sites to see if informative printPositionMap = False #If true, prints file mapping Fasta position to real position sites = [] pos = [] for key in positions: if key not in strip_pos: pattern = [] for k in sequences.keys(): #looping try/except is faster than list comprehension try: pattern.append(sequences[k][key]) except KeyError: pattern.append(ref[key]) origPattern = list(pattern) if '-' in pattern or 'N' in pattern: #remove gaps/Ns to see if otherwise informative pattern = [value for value in origPattern if value != '-' and value != 'N'] un = np.unique(pattern, return_counts=True) #If not all - or N, not all same base, and >1 differing base, append if len(un[0])!=0 and len(un[0])!=1 and not (len(un[0])==2 and min(un[1])==1): sites.append(origPattern) pos.append("\t".join([str(len(pos)+1),str(key)])) #Rotate and convert to SeqRecord sites = np.asarray(sites) align = np.rot90(sites) seqNamesCorr = list(reversed(seqNames)) toFasta = [ SeqRecord(id=seqNamesCorr[i], seq=Seq("".join(align[i])), description='') for i in range(len(sequences.keys()))] fasta_file = os.path.join(os.path.dirname(alignment), 'informative_sites.fasta') #now output this as fasta to read into raxml or iqtree SeqIO.write(toFasta, fasta_file, 'fasta') #If want a position map, print: if printPositionMap: with open(fasta_file+".positions.txt", 'w', encoding='utf-8') as the_file: the_file.write("\n".join(pos)) return fasta_file