Python pysam.FastxFile() Examples

The following are 4 code examples of pysam.FastxFile().
Example #1
Source File:    From qcat with Mozilla Public License 2.0
def create_reference(mapping, reference_file, fasta_path):
    ref_count = 0
    with open(reference_file, "w") as ref:
        for g in glob.glob(fasta_path):
            with pysam.FastxFile(g) as fh:
                id = os.path.splitext(os.path.basename(g))[0]
                for entry in fh:
                    if id in mapping:
                        ref_count += 1
                        name = ','.join([str(x) for x in mapping[id]])

                        print(">" + str(mapping[id][0]) + " " +,
                              entry.comment, file=ref)
                        print(entry.sequence, file=ref)
    if ref_count != len(mapping.keys()):
        raise RuntimeError(
            "Couldn't find all references, please check mappings!")
    return ref_count 
Example #2
Source File:    From pomoxis with Mozilla Public License 2.0
def split_fastx(fname, output, chunksize=10000):
    """Split records in a fasta/q into fixed lengths.

    :param fname: input filename.
    :param output: output filename.
    :param chunksize: (maximum) length of output records.
    with open(output, 'w') as fout:
        with pysam.FastxFile(fname, persist=False) as fin:
            for rec in fin:
                name =
                seq = rec.sequence
                qual = rec.quality
                if rec.comment is None:
                    comment = 'chunk_length={}'.format(chunksize)
                    comment = '{} chunk_length={}'.format(rec.comment, chunksize)
                if qual is None:
                    for i, s in enumerate(chunks(seq, chunksize)):
                        chunk_name = '{}_chunk{}'.format(name, i)
                        fout.write(">{} {}\n{}\n".format(
                            chunk_name, comment, ''.join(s)))
                    for i, (s, q) in enumerate(zip(chunks(seq, chunksize), chunks(qual, chunksize))):
                        chunk_name = '{}_chunk{}'.format(name, i)
                        fout.write('@{} {}\n{}\n+\n{}\n'.format(
                            chunk_name, comment, ''.join(s), ''.join(q))) 
Example #3
Source File:    From pomoxis with Mozilla Public License 2.0
def get_seq_lens(fastx):
    """Get sequence lengths from fastx file"""
    return [len(r.sequence) for r in pysam.FastxFile(fastx)] 
Example #4
Source File:    From medaka with Mozilla Public License 2.0
def multi_from_fastx(cls, fastx,
                         take_all=False, read_id=None, depth_filter=1,
        """Create multiple `Read` s from a fasta/q file.

        It is assumed that subreads are grouped by read and named with

        :param fastx: input file path.
        :param take_all: skip check on subread_ids, take all subreads in one
        :param read_id: name of `Read`. Only used for `take_all == True`. If
            not given the basename of the input file is used.
        :param depth_filter: require reads to have at least this many subreads.
        :param length_filter: require reads to have a median subread length
            above this value.

        depth_filter = max(1, depth_filter)
        if take_all and read_id is None:
            read_id = os.path.splitext(os.path.basename(fastx))[0]
            read_id = None
        subreads = []
        with pysam.FastxFile(fastx) as fh:
            for entry in fh:
                if not take_all:
                    cur_read_id ="_")[0]
                    if cur_read_id != read_id:
                        if len(subreads) >= depth_filter:
                            med_length = np.median(
                                [len(x.seq) for x in subreads])
                            if med_length > length_filter:
                                yield cls(read_id, subreads)
                        read_id = cur_read_id
                        subreads = []
                if len(entry.sequence) > 0:
                    subreads.append(Subread(, entry.sequence))

            if len(subreads) >= depth_filter:
                med_length = np.median([len(x.seq) for x in subreads])
                if med_length > length_filter:
                    yield cls(read_id, subreads)