Python pysam.index() Examples

The following are 30 code examples of pysam.index(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pysam , or try the search function .
Example #1
Source File: pipeline_cufflinks_optimization.py    From CGATPipelines with MIT License 6 votes vote down vote up
def linkBamToWorkingDirs(infiles, outfile):
    '''
    symlink the bam file and index to the working directories
    for execution of the transcript building pipeline
    '''

    bamfile = P.snip(infiles[0], ".bai")
    indexfile = infiles[0]
    directories = [P.snip(logfile, ".log") for logfile in infiles[1]]

    for directory in directories:
        os.symlink(os.path.abspath(bamfile), os.path.join(directory, bamfile))
        os.symlink(
            os.path.abspath(indexfile), os.path.join(directory, indexfile))
    updateFile(outfile)

###################################################################
###################################################################
################################################################### 
Example #2
Source File: download_example_data.py    From ga4gh-server with Apache License 2.0 6 votes vote down vote up
def createBamHeader(self, baseHeader):
        """
        Creates a new bam header based on the specified header from the
        parent BAM file.
        """
        header = dict(baseHeader)
        newSequences = []
        for index, referenceInfo in enumerate(header['SQ']):
            if index < self.numChromosomes:
                referenceName = referenceInfo['SN']
                # The sequence dictionary in the BAM file has to match up
                # with the sequence ids in the data, so we must be sure
                # that these still match up.
                assert referenceName == self.chromosomes[index]
                newReferenceInfo = {
                    'AS': self.referenceSetName,
                    'SN': referenceName,
                    'LN': 0,  # FIXME
                    'UR': 'http://example.com',
                    'M5': 'dbb6e8ece0b5de29da56601613007c2a',  # FIXME
                    'SP': 'Human'
                }
                newSequences.append(newReferenceInfo)
        header['SQ'] = newSequences
        return header 
Example #3
Source File: utils.py    From smallrnaseq with GNU General Public License v3.0 6 votes vote down vote up
def get_aligned_reads(samfile, collapsed=None, readcounts=None):
    """Get all aligned reads from a sam file into a pandas dataframe"""

    sam = HTSeq.SAM_Reader(str(samfile))
    f=[]
    for a in sam:
        if a.aligned == True:
            seq = a.read.seq.decode()
            f.append((seq,a.read.name,a.iv.chrom,a.iv.start,a.iv.end,a.iv.strand))
        #else:
        #    f.append((seq,a.read.name,'_unmapped'))
    counts = pd.DataFrame(f, columns=['seq','read','name','start','end','strand'])
    counts['length'] = counts.seq.str.len()
    counts = counts.drop(['read'],1)
    if collapsed is not None:
        readcounts = read_collapsed_file(collapsed)
    if readcounts is not None:
        counts = counts.merge(readcounts, on='seq')
        counts['align_id'] = counts.index
    return counts 
Example #4
Source File: utils.py    From GelReportModels with Apache License 2.0 6 votes vote down vote up
def parseArgs(self):
        description = "{} to {} conversion tool".format(
            self.inputFileFormat, self.outputFileFormat)
        parser = argparse.ArgumentParser(
            description=description)
        inputHelpText = "the name of the {} file to read".format(
            self.inputFileFormat)
        parser.add_argument(
            "inputFile", help=inputHelpText)
        outputHelpText = "the name of the {} file to write".format(
            self.outputFileFormat)
        defaultOutputFilePath = "out.{}".format(
            self.outputFileFormat.lower())
        parser.add_argument(
            "--outputFile", "-o", default=defaultOutputFilePath,
            help=outputHelpText)
        parser.add_argument(
            "--numLines", "-n", default=10,
            help="the number of lines to write")
        parser.add_argument(
            "--skipIndexing", default=False, action='store_true',
            help="don't create an index file")
        args = parser.parse_args()
        self.args = args 
Example #5
Source File: sambamconverter.py    From READemption with ISC License 6 votes vote down vote up
def sam_to_bam(self, sam_path, bam_path_prefix):
        if self._sam_file_is_empty(sam_path) is True:
            # pysam will generate an error if an emtpy SAM file will
            # be converted. Due to this an empty bam file with the
            # same header information will be generated from scratch
            self._generate_empty_bam_file(sam_path, bam_path_prefix)
            # Remove SAM file
            os.remove(sam_path)
            return
        temp_unsorted_bam_path = self._temp_unsorted_bam_path(
            bam_path_prefix)
        # Generate unsorted BAM file
        pysam.samtools.view(
            "-b", "-o{}".format(temp_unsorted_bam_path), sam_path,
            catch_stdout=False)
        # Generate sorted BAM file
        pysam.sort(temp_unsorted_bam_path, "-o", bam_path_prefix + ".bam")
        # Generate index for BAM file
        pysam.index("%s.bam" % bam_path_prefix)
        # Remove unsorted BAM file
        os.remove(temp_unsorted_bam_path)
        # Remove SAM file
        os.remove(sam_path) 
Example #6
Source File: rle.py    From medaka with Mozilla Public License 2.0 6 votes vote down vote up
def rlebam(args):
    """Entry point for merging run length information for fast5s to bam."""
    logger = medaka.common.get_named_logger('BAMDecor')
    read_index = medaka.common.read_key_value_tsv(args.read_index)
    logger.info("Found {} read in index\n".format(len(read_index)))

    def _ingress():
        for line in sys.stdin:
            if line[0] == '@':
                yield line.rstrip(), None, None, None
            else:
                read_id, flag, _ = line.split('\t', 2)
                is_rev = bool(int(flag) & 16)
                fname = read_index[read_id]
                yield line.rstrip(), read_id, is_rev, fname

    with concurrent.futures.ProcessPoolExecutor(
            max_workers=args.workers) as executor:
        for results in executor.map(
                _rle_bam_hdf_worker, _ingress(), chunksize=10):
            print(results) 
Example #7
Source File: smolecule.py    From medaka with Mozilla Public License 2.0 6 votes vote down vote up
def write_bam(fname, alignments, header, bam=True):
    """Write a `.bam` file for a set of alignments.

    :param fname: output filename.
    :param alignments: a list of `Alignment` tuples.
    :param header: bam header
    :param bam: write bam, else sam

    """
    mode = 'wb' if bam else 'w'
    with pysam.AlignmentFile(fname, mode, header=header) as fh:
        for ref_id, subreads in enumerate(alignments):
            for aln in sorted(subreads, key=lambda x: x.rstart):
                a = pysam.AlignedSegment()
                a.reference_id = ref_id
                a.query_name = aln.qname
                a.query_sequence = aln.seq
                a.reference_start = aln.rstart
                a.cigarstring = aln.cigar
                a.flag = aln.flag
                a.mapping_quality = 60
                fh.write(a)
    if mode == 'wb':
        pysam.index(fname) 
Example #8
Source File: BAMtoJunctionBED_alt.py    From altanalyze with Apache License 2.0 6 votes vote down vote up
def exportIndexes(input_dir):
    import unique
    bam_dirs = unique.read_directory(input_dir)
    print 'Building BAM index files',
    for file in bam_dirs:
        if string.lower(file[-4:]) == '.bam':
            bam_dir = input_dir+'/'+file
            bamf = pysam.AlignmentFile(bam_dir, "rb" )
            ### Is there an indexed .bai for the BAM? Check.
            try:
                for entry in bamf.fetch():
                    codes = map(lambda x: x[0],entry.cigar)
                    break
            except Exception:
                ### Make BAM Indexv lciv9df8scivx 
                print '.',
                bam_dir = str(bam_dir)
                #On Windows, this indexing step will fail if the __init__ pysam file line 51 is not set to - catch_stdout = False
                pysam.index(bam_dir) 
Example #9
Source File: BAMtoJunctionBED.py    From altanalyze with Apache License 2.0 6 votes vote down vote up
def exportIndexes(input_dir):
    import unique
    bam_dirs = unique.read_directory(input_dir)
    print 'Building BAM index files',
    for file in bam_dirs:
        if string.lower(file[-4:]) == '.bam':
            bam_dir = input_dir+'/'+file
            bamf = pysam.Samfile(bam_dir, "rb" )
            ### Is there an indexed .bai for the BAM? Check.
            try:
                for entry in bamf.fetch():
                    codes = map(lambda x: x[0],entry.cigar)
                    break
            except Exception:
                ### Make BAM Indexv lciv9df8scivx 
                print '.',
                bam_dir = str(bam_dir)
                #On Windows, this indexing step will fail if the __init__ pysam file line 51 is not set to - catch_stdout = False
                pysam.index(bam_dir)
                bamf = pysam.Samfile(bam_dir, "rb" ) 
Example #10
Source File: Opossum.py    From Opossum with GNU General Public License v3.0 6 votes vote down vote up
def CigarIndex(cigar, pos) :

	position = 0
	k = 0 # cigar index
	addpos = pos # extra positions

	for cigartype, cigarlength in cigar :

		position += cigarlength

		if position < pos : 
			k += 1
			addpos -= cigarlength
		else : return k, addpos



# Converts string of cigar symbol characters into the format used by Pysam 
Example #11
Source File: PipelinePeakcalling.py    From CGATPipelines with MIT License 6 votes vote down vote up
def getMacsPeakShiftEstimate(infile):
    '''
    Parses the peak shift estimate file from the estimateInsertSize
    function and returns the fragment size, which is used in the macs2
    postprocessing steps as the "offset" for bed2table
    Parameters
    ----------
    infile: str
        path to input file
    '''

    with IOTools.openFile(infile, "r") as inf:

        header = inf.readline().strip().split("\t")
        values = inf.readline().strip().split("\t")

    fragment_size_mean_ix = header.index("fragmentsize_mean")
    fragment_size = int(float(values[fragment_size_mean_ix]))

    return fragment_size 
Example #12
Source File: PipelinePeakcalling.py    From CGATPipelines with MIT License 6 votes vote down vote up
def makeBamLink(currentname, newname):
    '''
    Makes soft links to an existing bam file and its index - used instead
    of copying files.
    Generates and runs a command line statement.

    Parameters:
    currentname: str
        path to original file
    newname: str
        path to link location
    '''
    cwd = os.getcwd()
    os.system("""
    ln -s %(cwd)s/%(currentname)s %(cwd)s/%(newname)s;
    ln -s %(cwd)s/%(currentname)s.bai %(cwd)s/%(newname)s.bai;
    """ % locals()) 
Example #13
Source File: PipelineIDR.py    From CGATPipelines with MIT License 6 votes vote down vote up
def mergeBams(infile_list, outfile):
    infile_list = " ".join(infile_list)
    job_memory = "5G"
    statement = ("samtools merge - %(infile_list)s"
                 " | samtools sort - -o %(outfile)s"
                 " 2>%(outfile)s.log;"
                 " checkpoint;"
                 " samtools index %(outfile)s"
                 " 2>%(outfile)s.bai.log")
    P.run()

##########################################################################
##########################################################################
##########################################################################
# Run Peak Calling For IDR
########################################################################## 
Example #14
Source File: download_example_data.py    From ga4gh-server with Apache License 2.0 5 votes vote down vote up
def _downloadBam(self, sample):
        samplePath = '{}/alignment/'.format(sample)
        study = self.studyMap[sample]
        sourceFileName = (
            '{}.mapped.ILLUMINA.bwa.{}.'
            'low_coverage.20120522.bam'.format(sample, study))
        destFileName = os.path.join(
            self.dirName, "{}.bam".format(sample))
        baseUrl = self.getBamBaseUrl()
        sampleUrl = os.path.join(baseUrl, samplePath, sourceFileName)
        indexUrl = sampleUrl + ".bai"
        localIndexFile = os.path.join(self.tempDir, sourceFileName + ".bai")
        self._downloadIndex(indexUrl, localIndexFile)
        remoteFile = pysam.AlignmentFile(
            sampleUrl, filepath_index=localIndexFile)
        header = self.createBamHeader(remoteFile.header)
        self.log("Writing '{}'".format(destFileName))
        localFile = pysam.AlignmentFile(
            destFileName, 'wb', header=header)
        for chromosome in self.chromosomes:
            self.log("chromosome {}".format(chromosome))
            iterator = remoteFile.fetch(
                chromosome.encode('utf-8'),
                start=self.chromMinMax.getMinPos(chromosome),
                end=self.chromMinMax.getMaxPos(chromosome))
            for index, record in enumerate(iterator):
                # We only write records where we have the references
                # for the next mate. TODO we should take the positions
                # of these reads into account later when calculating
                # our reference bounds.
                if record.next_reference_id < self.numChromosomes:
                    if index >= self.maxReads:
                        break
                    localFile.write(record)
            self.log("{} records written".format(index))
        remoteFile.close()
        localFile.close()
        self.log("Indexing '{}'".format(destFileName))
        pysam.index(destFileName.encode('utf-8'))
        self.bamFilePaths.append(
            (destFileName, destFileName + ".bai")) 
Example #15
Source File: PipelinePeakcalling.py    From CGATPipelines with MIT License 5 votes vote down vote up
def mergeSortIndex(bamfiles, out):
    '''
    Merge bamfiles into a single sorted, indexed outfile.
    Generates and runs a command line statement.

    Example Statement:
    samtools merge ctmpljriXY.bam K9-13-1_filtered.bam K9-13-2_filtered.bam
    K9-13-3_filtered.bam;
    samtools sort ctmpljriXY.bam -o ctmpYH6llm.bam;
    samtools index ctmpYH6llm.bam;
    mv ctmpYH6llm.bam 13_Heart_pooled_filtered.bam;
    mv ctmpYH6llm.bam.bai 13_Heart_pooled_filtered.bam.bai;

    Parameters
    ----------
    bamfiles: list
        list of paths to bam files to merge
    out: str
        path to output file

    '''
    infiles = " ".join(bamfiles)
    T1 = P.getTempFilename(".")
    T2 = P.getTempFilename(".")
    statement = """samtools merge %(T1)s.bam %(infiles)s;
    samtools sort %(T1)s.bam -o %(T2)s.bam;
    samtools index %(T2)s.bam;
    mv %(T2)s.bam %(out)s;
    mv %(T2)s.bam.bai %(out)s.bai""" % locals()
    P.run()
    os.remove("%s.bam" % T1)
    os.remove(T1)
    os.remove(T2) 
Example #16
Source File: utils.py    From GelReportModels with Apache License 2.0 5 votes vote down vote up
def convert(self):
        # set flags
        if self.inputFileFormat == AlignmentFileConstants.SAM:
            inputFlags = "r"
        elif self.inputFileFormat == AlignmentFileConstants.BAM:
            inputFlags = "rb"
        if self.outputFileFormat == AlignmentFileConstants.SAM:
            outputFlags = "wh"
        elif self.outputFileFormat == AlignmentFileConstants.BAM:
            outputFlags = "wb"
        # open files
        inputFile = pysam.AlignmentFile(
            self.args.inputFile, inputFlags)
        outputFile = pysam.AlignmentFile(
            self.args.outputFile, outputFlags, header=inputFile.header)
        outputFilePath = outputFile.filename
        log("Creating alignment file '{}'".format(outputFilePath))
        # write new file
        for _ in range(self.args.numLines):
            alignedSegment = inputFile.next()
            outputFile.write(alignedSegment)
        # clean up
        inputFile.close()
        outputFile.close()
        # create index file
        if (not self.args.skipIndexing and
                self.outputFileFormat == AlignmentFileConstants.BAM):
            indexFilePath = "{}.{}".format(
                outputFilePath, AlignmentFileConstants.BAI.lower())
            log("Creating index file '{}'".format(indexFilePath))
            pysam.index(outputFilePath) 
Example #17
Source File: pipeline_cufflinks_optimization.py    From CGATPipelines with MIT License 5 votes vote down vote up
def indexBam(infile, outfile):
    '''
    index the reduced bam file
    '''
    pysam.index(infile)

###################################################################
###################################################################
################################################################### 
Example #18
Source File: app.py    From svviz with MIT License 5 votes vote down vote up
def saveReads(dataHub, nameExtra=None):
    if dataHub.args.save_reads:
        logging.info("* Saving relevant reads *")
        for i, sample in enumerate(dataHub):
            outbam_path = dataHub.args.save_reads
            if not outbam_path.endswith(".bam"):
                outbam_path += ".bam"

            if len(dataHub.samples) > 1:
                logging.debug("Using i = {}".format(i))
                outbam_path = outbam_path.replace(".bam", ".{}.bam".format(i))

            if nameExtra is not None:
                outbam_path = outbam_path.replace(".bam", ".{}.bam".format(nameExtra))

            logging.info("  Outpath: {}".format(outbam_path))

            # print out just the reads we're interested for use later
            bam_small = pysam.Samfile(outbam_path, "wb", template=sample.bam)
            for read in sample.reads:
                bam_small.write(read)

            for read in sample.readStatistics.reads:
                bam_small.write(read)

            bam_small.close()
            sorted_path = outbam_path.replace(".bam", ".sorted")
            pysam.sort(outbam_path, sorted_path)
            pysam.index(sorted_path+".bam") 
Example #19
Source File: download_example_data.py    From ga4gh-server with Apache License 2.0 5 votes vote down vote up
def _downloadIndex(self, indexUrl, localIndexFile):
        self.log("Downloading index from {} to {}".format(
            indexUrl, localIndexFile))
        response = urllib2.urlopen(indexUrl)
        with open(localIndexFile, "w") as destFile:
            destFile.write(response.read()) 
Example #20
Source File: PipelineChipseq.py    From CGATPipelines with MIT License 5 votes vote down vote up
def buildSimpleNormalizedBAM(infiles, outfile, nreads):
    '''normalize a bam file to given number of counts
       by random sampling
    '''
    infile, countfile = infiles

    pysam_in = pysam.Samfile(infile, "rb")

    fh = IOTools.openFile(countfile, "r")
    readcount = int(fh.read())
    fh.close()

    threshold = float(nreads) / float(readcount)

    pysam_out = pysam.Samfile(outfile, "wb", template=pysam_in)

    # iterate over mapped reads thinning by the threshold
    ninput, noutput = 0, 0
    for read in pysam_in.fetch():
        ninput += 1
        if random.random() <= threshold:
            pysam_out.write(read)
            noutput += 1

    pysam_in.close()
    pysam_out.close()
    pysam.index(outfile)

    E.info("buildNormalizedBam: %i input, %i output (%5.2f%%), should be %i" %
           (ninput, noutput, 100.0 * noutput / ninput, nreads)) 
Example #21
Source File: PipelinePeakcalling.py    From CGATPipelines with MIT License 5 votes vote down vote up
def normalize(infile, larger_nreads, outfile, smaller_nreads):
    threshold = float(smaller_nreads) / float(larger_nreads)

    pysam_in = pysam.Samfile(infile, "rb")
    pysam_out = pysam.Samfile(outfile, "wb", template=pysam_in)

    for read in pysam_in.fetch():
        if random.random() <= threshold:
            pysam_out.write(read)

    pysam_in.close()
    pysam_out.close()
    pysam.index(outfile)

    return outfile 
Example #22
Source File: PipelineIntervals.py    From CGATPipelines with MIT License 5 votes vote down vote up
def buildSimpleNormalizedBAM(bamfile, outfile, nreads):
    '''normalize a bam file to given number of counts
       by random sampling

    This method assumes that unmapped reads and multi-mapping
    reads have been removed from the :term:`bam` file.

    Arguments
    ---------
    bamfile : string
        Input file in :term:`bam` format.
    outfile : string
        Filename of output file in :term:`bam` format.
    nreads :int
        Number of reads to normalize to.
    '''

    readcount = BamTools.getNumReads(bamfile)

    pysam_in = pysam.Samfile(bamfile, "rb")

    threshold = float(nreads) / float(readcount)

    pysam_out = pysam.Samfile(outfile, "wb", template=pysam_in)

    # iterate over mapped reads thinning by the threshold
    ninput, noutput = 0, 0
    for read in pysam_in.fetch():
        ninput += 1
        if random.random() <= threshold:
            pysam_out.write(read)
            noutput += 1

    pysam_in.close()
    pysam_out.close()
    pysam.index(outfile)

    E.info("buildNormalizedBam: %i input, %i output (%5.2f%%), should be %i" %
           (ninput, noutput, 100.0 * noutput / ninput, nreads)) 
Example #23
Source File: misc.py    From slamdunk with GNU Affero General Public License v3.0 5 votes vote down vote up
def pysamIndex(outputBam):
    pysam.index(outputBam)  # @UndefinedVariable 
Example #24
Source File: tcounter.py    From slamdunk with GNU Affero General Public License v3.0 5 votes vote down vote up
def pysamIndex(outputBam):
    pysam.index(outputBam)  # @UndefinedVariable 
Example #25
Source File: Opossum.py    From Opossum with GNU General Public License v3.0 5 votes vote down vote up
def ReadCigarLength(cigar, k=100) :

	if k < len(cigar) :
		cigarlist = cigar[:(k+1)]
	else :
		cigarlist = cigar

	l = sum([c[1] for c in cigarlist])
		
	return l


# Input cigar and pos denoting index within a cigar transformed into a string sequence.
# Return what is the index in the original cigar and how many extra positions from the beginning
# of this index is the current position. 
Example #26
Source File: Opossum.py    From Opossum with GNU General Public License v3.0 5 votes vote down vote up
def UpdateFlag(flag) :

	if (flag & 16) == 16 : return 16
	else : return 0


# Returns the cigar length when cigar is expressed base by base,
# up to the index k of cigarlist (default is a very large index) 
Example #27
Source File: Opossum.py    From Opossum with GNU General Public License v3.0 5 votes vote down vote up
def MatchingBases(md, cigar, FromBeginning) :

	if not FromBeginning : 
		md = md[::-1]
		cigar = cigar[::-1]

	i = 0 # md index
	number = ''
	mlen = len(md)

	while i < mlen and md[i] in '0123456789' :
		number += md[i]
		i += 1

	if not number : number = '0'
	if FromBeginning : number = int(number)
	else : number = int(number[::-1])

	# Go through cigar to find possible inserts
	clen = 0	
	for cigartype, cigarlength in cigar :
		if cigartype in [1,3] :
			break	
		elif cigartype == 5 :
			pass
		else :
			clen += cigarlength

	if number < clen :
		number += AddClips(cigar, True) # cigar has been reversed here!

	return min(number, clen)


# Takes flag as input and returns whether alignment is primary (True) or not (False).
# If additional input parameter is set to True, then also check whether read is properly
# paired (definition of properly paired depends on mapper). 
Example #28
Source File: alignment_file.py    From ga4gh-server with Apache License 2.0 5 votes vote down vote up
def parseArgs(self):
        description = "{} to {} conversion tool".format(
            self.inputFileFormat, self.outputFileFormat)
        parser = argparse.ArgumentParser(
            description=description)
        inputHelpText = "the name of the {} file to read".format(
            self.inputFileFormat)
        parser.add_argument(
            "inputFile", help=inputHelpText)
        outputHelpText = "the name of the {} file to write".format(
            self.outputFileFormat)
        defaultOutputFilePath = "out.{}".format(
            self.outputFileFormat.lower())
        parser.add_argument(
            "--outputFile", "-o", default=defaultOutputFilePath,
            help=outputHelpText)
        parser.add_argument(
            "--numLines", "-n", default=10,
            help="the number of lines to write")
        parser.add_argument(
            "--skipIndexing", default=False, action='store_true',
            help="don't create an index file")
        args = parser.parse_args()
        self.args = args 
Example #29
Source File: alignment_file.py    From ga4gh-server with Apache License 2.0 5 votes vote down vote up
def convert(self):
        # set flags
        if self.inputFileFormat == AlignmentFileConstants.SAM:
            inputFlags = "r"
        elif self.inputFileFormat == AlignmentFileConstants.BAM:
            inputFlags = "rb"
        if self.outputFileFormat == AlignmentFileConstants.SAM:
            outputFlags = "wh"
        elif self.outputFileFormat == AlignmentFileConstants.BAM:
            outputFlags = "wb"
        # open files
        inputFile = pysam.AlignmentFile(
            self.args.inputFile, inputFlags)
        outputFile = pysam.AlignmentFile(
            self.args.outputFile, outputFlags, header=inputFile.header)
        outputFilePath = outputFile.filename
        utils.log("Creating alignment file '{}'".format(outputFilePath))
        # write new file
        for _ in xrange(self.args.numLines):
            alignedSegment = inputFile.next()
            outputFile.write(alignedSegment)
        # clean up
        inputFile.close()
        outputFile.close()
        # create index file
        if (not self.args.skipIndexing and
                self.outputFileFormat == AlignmentFileConstants.BAM):
            indexFilePath = "{}.{}".format(
                outputFilePath, AlignmentFileConstants.BAI.lower())
            utils.log("Creating index file '{}'".format(indexFilePath))
            pysam.index(outputFilePath) 
Example #30
Source File: PipelinePeakcalling.py    From CGATPipelines with MIT License 5 votes vote down vote up
def getContigSizes(idx):
    idx_file = pd.read_csv(idx, "\t",
                           header=None,
                           names=["Name", "Length", "Mapped", "Unmapped"])

    contigs = idx_file[['Name', 'Length']]

    contig_file = idx.replace(".idxstats", ".contig")

    contigs.to_csv(contig_file, sep="\t", header=None, index=None)

    return contig_file