Python Bio.SeqIO.read() Examples
The following are 30
code examples of Bio.SeqIO.read().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
Bio.SeqIO
, or try the search function
.
Example #1
Source File: uniprot.py From ssbio with MIT License | 6 votes |
def seq(self): """Seq: Get the Seq object from the sequence file, metadata file, or in memory""" if self.sequence_file: log.debug('{}: reading sequence from sequence file {}'.format(self.id, self.sequence_path)) tmp_sr = SeqIO.read(self.sequence_path, 'fasta') return tmp_sr.seq elif self.metadata_file: log.debug('{}: reading sequence from metadata file {}'.format(self.id, self.metadata_path)) tmp_sr = SeqIO.read(self.metadata_path, 'uniprot-xml') return tmp_sr.seq else: if not self._seq: log.debug('{}: no sequence stored in memory'.format(self.id)) else: log.debug('{}: reading sequence from memory'.format(self.id)) return self._seq
Example #2
Source File: align.py From augur with GNU Affero General Public License v3.0 | 6 votes |
def prettify_alignment(aln): ''' Converts all bases to uppercase and removes auto reverse-complement prefix (_R_). This modifies the alignment in place. Parameters ---------- aln : MultipleSeqAlign Biopython Alignment ''' for seq in aln: seq.seq = seq.seq.upper() # AlignIO.read repeats the ID in the name and description field if seq.id.startswith("_R_"): seq.id = seq.id[3:] print("Sequence \"{}\" was reverse-complemented by the alignment program.".format(seq.id)) if seq.name.startswith("_R_"): seq.name = seq.name[3:] if seq.description.startswith("_R_"): seq.description = seq.description[3:]
Example #3
Source File: intronerate.py From HybPiper with GNU General Public License v3.0 | 6 votes |
def remove_exons(gff_filename,supercontig_filename,mode="all"): '''Given a supercontig and corresponding annotation, remove the exon sequences. In "intron" mode, only return sequences specifically annotated as introns''' exon_starts = [] exon_ends = [] gff = open(gff_filename).readlines() for line in gff: line = line.rstrip().split("\t") if len(line) > 2: if line[2] == "exon": exon_starts.append(int(line[3])) exon_ends.append(int(line[4])) supercontig = SeqIO.read(supercontig_filename,'fasta') exonless_contig = SeqRecord(Seq(''),id=supercontig.id) start = 0 for exon in range(len(exon_starts)): exonless_contig += supercontig[start:exon_starts[exon]-1] start = exon_ends[exon] exonless_contig += supercontig[start:] exonless_contig.description = '' return exonless_contig
Example #4
Source File: abi.py From TBProfiler with GNU General Public License v3.0 | 6 votes |
def __init__(self,in_obj,prefix): if isinstance(in_obj[0],str): self.filenames = in_obj self.records = [SeqIO.read(x,'abi') for x in self.filenames] self.prefix = prefix elif isinstance(in_obj[0],SeqIO.SeqRecord): self.records = in_obj self.prefix = prefix self.quals = {} for rec in self.records: self.quals[rec.name] = [ord(x) for x in rec.annotations["abif_raw"]["PCON1"]] self.trimmed_coords = {} for rec in self.records: trim_seq = str(SeqIO.AbiIO._abi_trim(rec).seq) start = str(rec.seq).index(trim_seq) end = start+len(trim_seq) self.trimmed_coords[rec.name] = list(range(start,end))
Example #5
Source File: utils.py From augur with GNU Affero General Public License v3.0 | 6 votes |
def load_mask_sites(mask_file): """Load masking sites from either a BED file or a masking file. Parameters ---------- mask_file: str Path to the BED or masking file Returns ------- list[int] Sorted list of unique zero-indexed sites """ if mask_file.lower().endswith(".bed"): mask_sites = read_bed_file(mask_file) else: mask_sites = read_mask_file(mask_file) print("%d masking sites read from %s" % (len(mask_sites), mask_file)) return mask_sites
Example #6
Source File: utils.py From augur with GNU Affero General Public License v3.0 | 6 votes |
def annotate_parents_for_tree(tree): """Annotate each node in the given tree with its parent. >>> import io >>> tree = Bio.Phylo.read(io.StringIO("(A, (B, C))"), "newick") >>> not any([hasattr(node, "parent") for node in tree.find_clades()]) True >>> tree = annotate_parents_for_tree(tree) >>> tree.root.parent is None True >>> all([hasattr(node, "parent") for node in tree.find_clades()]) True """ tree.root.parent = None for node in tree.find_clades(order="level"): for child in node.clades: child.parent = node # Return the tree. return tree
Example #7
Source File: sequence_data.py From treetime with MIT License | 6 votes |
def ref(self, in_ref): """ Parameters ---------- in_ref : file name, str, Bio.Seq.Seq, Bio.SeqRecord.SeqRecord reference sequence will read and stored a byte array """ read_from_file=False if in_ref and isfile(in_ref): for fmt in ['fasta', 'genbank']: try: in_ref = SeqIO.read(in_ref, fmt) self.logger("SequenceData: loaded reference sequence as %s format"%fmt,1) read_from_file=True break except: continue if not read_from_file: raise TypeError('SequenceData.ref: reference sequence file %s could not be parsed, fasta and genbank formats are supported.') if in_ref: self._ref = seq2array(in_ref, fill_overhangs=False, word_length=self.word_length) self.full_length = self._ref.shape[0] self.compressed_to_full_sequence_map = None self.multiplicity = None
Example #8
Source File: SequenceSearcher.py From biskit with GNU General Public License v3.0 | 6 votes |
def __copyFileHandle(self, result_handle, fname ): """ Copy the blast result to a file. The input file handle will be empty afterwards! Use the returned string handle instead. @param result_handle: file handle returned by NCBI* blast runners @type result_handle: file @param fname : absolute path to output file @type fname : str @return: a fresh 'file' (string) handle with the output @rtype : cStringIO.StringI """ str_results = result_handle.read() try: save_file = open( fname, 'w' ) save_file.write( str_results ) save_file.close() finally: return cStringIO.StringIO( str_results )
Example #9
Source File: fasta.py From ssbio with MIT License | 6 votes |
def fasta_files_equal(seq_file1, seq_file2): """Check equality of a FASTA file to another FASTA file Args: seq_file1: Path to a FASTA file seq_file2: Path to another FASTA file Returns: bool: If the sequences are the same """ # Load already set representative sequence seq1 = SeqIO.read(open(seq_file1), 'fasta') # Load kegg sequence seq2 = SeqIO.read(open(seq_file2), 'fasta') # Test equality if str(seq1.seq) == str(seq2.seq): return True else: return False
Example #10
Source File: uniprot.py From ssbio with MIT License | 6 votes |
def metadata_path_unset(self): """Copy features to memory and remove the association of the metadata file.""" if not self.metadata_file: raise IOError('No metadata file to unset') log.debug('{}: reading from metadata file {}'.format(self.id, self.metadata_path)) tmp_sr = SeqIO.read(self.metadata_path, 'uniprot-xml') tmp_feats = tmp_sr.features # TODO: should this be in separate unset functions? self.metadata_dir = None self.metadata_file = None self.features = tmp_feats if self.sequence_file: tmp_sr = tmp_sr.seq self.sequence_dir = None self.sequence_file = None self.seq = tmp_sr
Example #11
Source File: uniprot.py From ssbio with MIT License | 6 votes |
def features(self): """list: Get the features from the feature file, metadata file, or in memory""" if self.feature_file: log.debug('{}: reading features from feature file {}'.format(self.id, self.feature_path)) with open(self.feature_path) as handle: feats = list(GFF.parse(handle)) if len(feats) > 1: log.warning('Too many sequences in GFF') else: return feats[0].features elif self.metadata_file: log.debug('{}: reading features from metadata file {}'.format(self.id, self.metadata_path)) tmp_sr = SeqIO.read(self.metadata_path, 'uniprot-xml') return tmp_sr.features else: return self._features
Example #12
Source File: avian_flu_upload.py From fauna with GNU Affero General Public License v3.0 | 5 votes |
def __init__(self, **kwargs): upload.__init__(self, **kwargs) self.grouping_upload_fields = ['vtype', 'subtype', 'lineage'] # patterns from the subtype and lineage fields in the GISAID fasta file self.patterns = {('a / h1n1', 'pdm09'): ('a', 'h1n1', 'seasonal_h1n1pdm'), ('a / h1n2', ''): ('a', 'h1n2', None), ('a / h1n2', 'seasonal'): ('a', 'h1n2', 'seasonal_h1n2'), ('a / h2n2', ''): ('a', 'h2n2', None), ('a / h3n2', ''): ('a', 'h3n2', 'seasonal_h3n2'), ('a / h3n2', 'seasonal'): ('a', 'h3n2', 'seasonal_h3n2'), ('a / h3n3', ''): ('a', 'h3n3', None), ('a / h5n1', ''): ('a', 'h5n1', None), ('a / h5n6', ''): ('a', 'h5n6', None), ('a / h6n1', ''): ('a', 'h6n1', None), ('a / h7n1', ''): ('a', 'h7n1', None), ('a / h7n2', ''): ('a', 'h7n2', None), ('a / h7n3', ''): ('a', 'h7n3', None), ('a / h7n7', ''): ('a', 'h7n7', None), ('a / h7n9', ''): ('a', 'h7n9', None), ('a / h9n2', ''): ('a', 'h9n2', None), ('a / h10n7', ''): ('a', 'h10n7', None), ('a / h10n8', ''): ('a', 'h10n8', None), ('a / h11', ''): ('a', 'h11', None), ('b / h0n0', 'victoria'): ('b', None, 'seasonal_vic'), ('b / h0n0', 'yamagata'): ('b', None, 'seasonal_yam'), ('b', 'victoria'): ('b', None, 'seasonal_vic'), ('b', 'yamagata'): ('b', None, 'seasonal_yam'), ('h5n1',''): ('a', 'h5n1', None), ('h7n9',''): ('a', 'h7n9', None), ('h9n2',''): ('a', 'h9n2', None)} self.outgroups = {lineage: SeqIO.read('source-data/'+lineage+'_outgroup.gb', 'genbank') for lineage in ['H3N2', 'H1N1pdm', 'Vic', 'Yam']} self.outgroup_patterns = {'H3N2': ('a', 'h3n2', 'seasonal_h3n2'), 'H1N1': ('a', 'h1n1', 'seasonal_h1n1'), 'H1N1pdm': ('a', 'h1n1', 'seasonal_h1n1pdm'), 'Vic': ('b', None, 'seasonal_vic'), 'Yam': ('b', None, 'seasonal_yam')} self.strain_fix_fname = "source-data/flu_strain_name_fix.tsv" self.location_fix_fname = "source-data/flu_location_fix.tsv" self.virus_to_sequence_transfer_fields = ['submission_date'] self.fix = set()
Example #13
Source File: SequenceSearcher.py From biskit with GNU General Public License v3.0 | 5 votes |
def fastaRecordFromId_remote( self, id ): """ experimental: fetch fasta records from remote database """ from Bio import Entrez from Bio import SeqIO Entrez.email = "A.N.Other@example.com" if self.verbose: self.log.add_nobreak( 'r' ) handle = Entrez.efetch(db="protein", rettype="fasta", id=id) frecord = SeqIO.read(handle, "fasta") frecord.id = str(id) handle.close() return frecord
Example #14
Source File: flu_update.py From fauna with GNU Affero General Public License v3.0 | 5 votes |
def __init__(self, **kwargs): update.__init__(self, **kwargs) flu_upload.__init__(self, **kwargs) self.outgroups = {lineage: SeqIO.read('source-data/'+lineage+'_outgroup.gb', 'genbank') for lineage in ['H3N2', 'H1N1', 'H1N1pdm', 'Vic', 'Yam']} self.outgroup_patterns = {'H3N2': ('a', 'h3n2', 'seasonal_h3n2'), 'H1N1': ('a', 'h1n1', 'seasonal_h1n1'), 'H1N1pdm': ('a', 'h1n1', 'seasonal_h1n1pdm'), 'Vic': ('b', None, 'seasonal_vic'), 'Yam': ('b', None, 'seasonal_yam')}
Example #15
Source File: models.py From mykrobe with MIT License | 5 votes |
def _parse_genbank(self, genbank): d = {} with open(genbank, 'r') as infile: for feat in SeqIO.read(infile, "genbank").features: if feat.type == "gene": try: name = feat.qualifiers.get( "gene", feat.qualifiers.get("db_xref"))[0] except TypeError: pass else: # SeqIO converts to 0-based strand = int(feat.location.strand) if strand == 1: forward = True elif strand == -1: forward = False else: raise ValueError("Strand must be 1 or -1") d[name] = Gene( name, self.reference, start=feat.location.start + 1, end=feat.location.end, forward=forward) return d
Example #16
Source File: flu_upload.py From fauna with GNU Affero General Public License v3.0 | 5 votes |
def __init__(self, **kwargs): upload.__init__(self, **kwargs) self.grouping_upload_fields = ['vtype', 'subtype', 'lineage'] # patterns from the subtype and lineage fields in the GISAID fasta file self.patterns = {('a / h1n1', 'pdm09'): ('a', 'h1n1', 'seasonal_h1n1pdm'), ('a / h1n2', ''): ('a', 'h1n2', None), ('a / h1n2', 'seasonal'): ('a', 'h1n2', 'seasonal_h1n2'), ('a / h2n2', ''): ('a', 'h2n2', None), ('a / h3n2', ''): ('a', 'h3n2', 'seasonal_h3n2'), ('a / h3n2', 'seasonal'): ('a', 'h3n2', 'seasonal_h3n2'), ('a / h3n3', ''): ('a', 'h3n3', None), ('a / h5n1', ''): ('a', 'h5n1', None), ('a / h5n6', ''): ('a', 'h5n6', None), ('a / h6n1', ''): ('a', 'h6n1', None), ('a / h7n1', ''): ('a', 'h7n1', None), ('a / h7n2', ''): ('a', 'h7n2', None), ('a / h7n3', ''): ('a', 'h7n3', None), ('a / h7n7', ''): ('a', 'h7n7', None), ('a / h7n9', ''): ('a', 'h7n9', None), ('a / h9n2', ''): ('a', 'h9n2', None), ('a / h10n7', ''): ('a', 'h10n7', None), ('a / h10n8', ''): ('a', 'h10n8', None), ('a / h11', ''): ('a', 'h11', None), ('b / h0n0', 'victoria'): ('b', None, 'seasonal_vic'), ('b / h0n0', 'yamagata'): ('b', None, 'seasonal_yam'), ('b', 'victoria'): ('b', None, 'seasonal_vic'), ('b', 'yamagata'): ('b', None, 'seasonal_yam')} self.outgroups = {lineage: SeqIO.read('source-data/'+lineage+'_outgroup.gb', 'genbank') for lineage in ['H3N2', 'H1N1pdm', 'Vic', 'Yam']} self.outgroup_patterns = {'H3N2': ('a', 'h3n2', 'seasonal_h3n2'), 'H1N1': ('a', 'h1n1', 'seasonal_h1n1'), 'H1N1pdm': ('a', 'h1n1', 'seasonal_h1n1pdm'), 'Vic': ('b', None, 'seasonal_vic'), 'Yam': ('b', None, 'seasonal_yam')} self.strain_fix_fname = "source-data/flu_strain_name_fix.tsv" self.location_fix_fname = "source-data/flu_location_fix.tsv" self.location_label_fix_fname = "source-data/flu_fix_location_label.tsv" self.virus_to_sequence_transfer_fields = ['submission_date'] self.fix = set()
Example #17
Source File: handleGB.py From PhyloSuite with GNU General Public License v3.0 | 5 votes |
def get_tax_id(self, query_name): """to get data from ncbi taxomomy, we need to have the taxid. we can get that by passing the species name to esearch, which will return the tax id""" query_name = query_name.replace(' ', "+").strip() Entrez.email = 'A.N.Other@example.com' search = Entrez.esearch(term=query_name, db="taxonomy", retmode="xml") record = Entrez.read(search) return record['IdList'][0] if record['IdList'] else None
Example #18
Source File: handleGB.py From PhyloSuite with GNU General Public License v3.0 | 5 votes |
def fetchContentsByIDs(self, IDs, base=None, proportion=None, processSig=None): contents = "" for num, ID in enumerate(IDs): ID_path = self.fetchRecordPath(ID) with open(ID_path, encoding="utf-8", errors='ignore') as f: contents += f.read() if processSig: processSig.emit(base + (num+1)*proportion/len(IDs)) return contents # def fetchIDsByContents(self, contents): # '''注意这个ID是locus的ID''' # rgx = re.compile(r"(?sm)LOCUS {7}(\S+).+?^//\s*?(?=LOCUS|$)") # return rgx.findall(contents)
Example #19
Source File: handleGB.py From PhyloSuite with GNU General Public License v3.0 | 5 votes |
def merge_file_contents(self, files, base=None, proportion=None, processSig=None): all_content = "" for num, file in enumerate(files): with open(file, encoding="utf-8", errors='ignore') as f: all_content += f.read() if processSig: processSig.emit(base + (num+1)*proportion/len(files)) return all_content
Example #20
Source File: handleGB.py From PhyloSuite with GNU General Public License v3.0 | 5 votes |
def fetchCBS(self): dict_ = {"1":"0", "2":"1", "5":"4", "9":"7"} out = ["NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA"] if str(self.codeTable) not in dict_: return out ##记得改 code = dict_[str(self.codeTable)] os.chdir(self.path) infile = "codonW_infile.fas" outfile = "codonW_outfile.txt" blkfile = "codonW_blk.txt" errorfile = "codonW_error.fas" with open(self.path + os.sep + infile, "w", encoding="utf-8") as f: f.write(">seq\n%s\n"%self.seq) command = '"%s" "%s" "%s" "%s" -all_indices -nomenu -silent -noblk -code %s'%(self.codonW, infile, outfile, blkfile, code) # print(command) popen = self.factory.init_popen(command) try: while True: try: out_line = popen.stdout.readline().decode("utf-8", errors='ignore') except UnicodeDecodeError: out_line = popen.stdout.readline().decode("gbk", errors='ignore') if out_line == "" and popen.poll() is not None: break except: pass ## 读取输出结果 if not os.path.exists(self.path + os.sep + outfile): with open(errorfile, "a", encoding="utf-8") as f2: f2.write(command + "\n" + self.seq + "\n") # print("error seq.:", self.seq) else: with open(self.path + os.sep + outfile, encoding="utf-8", errors="ignore") as f1: content = f1.read() try: list_ = content.split("\n")[1].split("\t") out = list_[5:9] + list_[11:15] except IndexError: with open(errorfile, "a", encoding="utf-8") as f2: f2.write(command + "\n" + self.seq + "\n") for num, i in enumerate(out): if (not self.is_float(i)) and (not self.is_int(i)): out[num] = "NA" return out
Example #21
Source File: flu_upload.py From fauna with GNU Affero General Public License v3.0 | 5 votes |
def align_flu(self, doc, min_score_percentage=0.85, **kwargs): ''' align with sequence from outgroup to determine subtype and lineage :return: True if determined grouping, False otherwise ''' try: scores = [] from Bio.Seq import Seq from Bio.SeqRecord import SeqRecord from Bio.Alphabet import IUPAC from Bio import AlignIO record = SeqRecord(Seq(doc['sequence'], IUPAC.ambiguous_dna), id=doc['strain']) for olineage, oseq in self.outgroups.items(): SeqIO.write([oseq, record], "temp_in.fasta", "fasta") os.system("mafft --auto temp_in.fasta > temp_out.fasta 2>tmp") tmp_aln = np.array(AlignIO.read('temp_out.fasta', 'fasta')) scores.append((olineage, (tmp_aln[0]==tmp_aln[1]).sum())) scores.sort(key = lambda x:x[1], reverse=True) if scores[0][1]>min_score_percentage*len(record.seq): print("Lineage based on similarity:", scores[0][0], doc['strain'], len(record.seq), scores) return self.outgroup_patterns[scores[0][0]] else: print("Couldn't parse virus subtype and lineage from aligning sequence: ", doc['strain'], len(record.seq), scores) return None except: print("Alignment failed: " + doc['strain']) return None
Example #22
Source File: sf_miscellaneous.py From pan-genome-analysis with GNU General Public License v3.0 | 5 votes |
def gbk_seq(each_gb_path, gbk_name): # get sequence from gbk file gb = each_gb_path + gbk_name genome = SeqIO.read(gb,'genbank') allseq = genome.seq return allseq
Example #23
Source File: intronerate.py From HybPiper with GNU General Public License v3.0 | 5 votes |
def parse_gff(filename): '''Parse a GFF file created for a single gene, return a list of lists containing the annotation info''' with open(filename) as gff_file: gff_dump = gff_file.read() gff_split = gff_dump.split("# --- END OF GFF DUMP ---") raw_hits = [x.split('\n') for x in gff_split[:-1]] hits = [] for h in raw_hits: new_hit = [] for line in h: if not line.startswith("#") and not line == "": new_hit.append(line.rstrip().split('\t')) hits.append(new_hit) return hits
Example #24
Source File: paralog_retriever.py From HybPiper with GNU General Public License v3.0 | 5 votes |
def retrieve_seqs(path,name,gene): seqs_to_write = None if os.path.isdir(os.path.join(path,name,gene,name,'paralogs')): seqs_to_write = [x for x in SeqIO.parse(os.path.join(path,name,gene,name,'paralogs','{}_paralogs.fasta'.format(gene)),'fasta')] num_seqs = str(len(seqs_to_write)) elif os.path.isfile(os.path.join(path,name,gene,name,'sequences','FNA','{}.FNA'.format(gene))): seqs_to_write = SeqIO.read(os.path.join(path,name,gene,name,'sequences','FNA','{}.FNA'.format(gene)),'fasta') num_seqs = "1" if seqs_to_write: SeqIO.write(seqs_to_write,sys.stdout,'fasta') else: num_seqs = "0" return num_seqs
Example #25
Source File: depth_calculator.py From HybPiper with GNU General Public License v3.0 | 5 votes |
def merge_seqs(genelist,prefix,file_type="coding"): '''Given a list of gene sequences, retreive the sequences and generate a single FASTA file.''' with open("{}_sequences.fasta".format(file_type),"w") as outfile: for gene in genelist: if file_type == "coding": seqfile = "{}/{}/sequences/FNA/{}.FNA".format(gene,prefix,gene) else: seqfile = "{}/{}/sequences/intron/{}_supercontig.fasta".format(gene,prefix,gene) seq = SeqIO.read(seqfile,'fasta') seq.id = seq.id + "-" + gene SeqIO.write(seq,outfile,'fasta')
Example #26
Source File: align.py From augur with GNU Affero General Public License v3.0 | 5 votes |
def read_alignment(fname): try: return AlignIO.read(fname, 'fasta') except Exception as error: raise AlignmentError("\nERROR: Problem reading in {}: {}".format(fname, str(error)))
Example #27
Source File: align.py From augur with GNU Affero General Public License v3.0 | 5 votes |
def read_sequences(*fnames): """return list of sequences from all fnames""" seqs = {} try: for fname in fnames: for record in SeqIO.parse(fname, 'fasta'): if record.name in seqs and record.seq != seqs[record.name].seq: raise AlignmentError("Detected duplicate input strains \"%s\" but the sequences are different." % record.name) # if the same sequence then we can proceed (and we only take one) seqs[record.name] = record except FileNotFoundError: raise AlignmentError("\nCannot read sequences -- make sure the file %s exists and contains sequences in fasta format" % fname) except ValueError as error: raise AlignmentError("\nERROR: Problem reading in {}: {}".format(fname, str(error))) return list(seqs.values())
Example #28
Source File: export_v2.py From augur with GNU Affero General Public License v3.0 | 5 votes |
def set_description(data_json, cmd_line_description_file): """ Read Markdown file provided by *cmd_line_description_file* and set `meta.description` in *data_json* to the text provided. """ try: with open(cmd_line_description_file, encoding='utf-8') as description_file: markdown_text = description_file.read() data_json['meta']['description'] = markdown_text except FileNotFoundError: fatal("Provided desciption file {} does not exist".format(cmd_line_description_file))
Example #29
Source File: export_v2.py From augur with GNU Affero General Public License v3.0 | 5 votes |
def get_root_sequence(root_node, ref=None, translations=None): ''' create a json structure that contains the sequence of the root, both as nucleotide and as translations. This allows look-up of the sequence for all states, including those that are not variable. Parameters ---------- root_node : dict data associated with the node ref : str, optional filename of the root sequence translations : str, optional file name of translations Returns ------- dict dict of nucleotide sequence and translations ''' root_sequence = {} if ref and translations: from Bio import SeqIO refseq = SeqIO.read(ref, 'fasta') root_sequence['nuc']=str(refseq.seq) for gene in SeqIO.parse(translations, 'fasta'): root_sequence[gene.id] = str(gene.seq) else: root_sequence["nuc"] = root_node["sequence"] root_sequence.update(root_node["aa_sequences"]) return root_sequence
Example #30
Source File: uniprot.py From ssbio with MIT License | 5 votes |
def metadata_path(self, m_path): """Provide pointers to the paths of the metadata file Args: m_path: Path to metadata file """ if not m_path: self.metadata_dir = None self.metadata_file = None else: if not op.exists(m_path): raise OSError('{}: file does not exist!'.format(m_path)) if not op.dirname(m_path): self.metadata_dir = '.' else: self.metadata_dir = op.dirname(m_path) self.metadata_file = op.basename(m_path) # Parse the metadata file using Biopython's built in SeqRecord parser # Just updating IDs and stuff tmp_sr = SeqIO.read(self.metadata_path, 'uniprot-xml') parsed = parse_uniprot_xml_metadata(tmp_sr) self.update(parsed, overwrite=True)