htsjdk.samtools.util.IOUtil#assertFileIsWritable

Source File: AddCommentsToBam.java From picard with MIT License

6 votes

protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);

    if (INPUT.getAbsolutePath().endsWith(".sam")) {
        throw new PicardException("SAM files are not supported");
    }

    final SAMFileHeader samFileHeader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).getFileHeader(INPUT);
    for (final String comment : COMMENT) {
        if (comment.contains("\n")) {
            throw new PicardException("Comments can not contain a new line");
        }
        samFileHeader.addComment(comment);
    }

    BamFileIoUtils.reheaderBamFile(samFileHeader, INPUT, OUTPUT, CREATE_MD5_FILE, CREATE_INDEX);

    return 0;
}

Source File: SamToFastq.java From picard with MIT License

6 votes

private File makeReadGroupFile(final SAMReadGroupRecord readGroup, final String preExtSuffix) {
    String fileName = null;
    if (RG_TAG.equalsIgnoreCase("PU")) {
        fileName = readGroup.getPlatformUnit();
    } else if (RG_TAG.equalsIgnoreCase("ID")) {
        fileName = readGroup.getReadGroupId();
    }
    if (fileName == null) {
        throw new PicardException("The selected RG_TAG: " + RG_TAG + " is not present in the bam header.");
    }
    fileName = IOUtil.makeFileNameSafe(fileName);
    if (preExtSuffix != null) {
        fileName += preExtSuffix;
    }
    fileName += COMPRESS_OUTPUTS_PER_RG ? ".fastq.gz" : ".fastq";

    final File result = (OUTPUT_DIR != null)
            ? new File(OUTPUT_DIR, fileName)
            : new File(fileName);
    IOUtil.assertFileIsWritable(result);
    return result;
}

Source File: InsertSizeMetricsCollector.java From gatk with BSD 3-Clause "New" or "Revised" License

6 votes

/**
 * Calls R script to plot histogram(s) in PDF.
 */
private void writeHistogramPDF(final String inputName) throws RScriptExecutorException {
    // path to Picard R script for producing histograms in PDF files.
    final String R_SCRIPT = "insertSizeHistogram.R";

    File histFile = new File(inputArgs.histogramPlotFile);
    IOUtil.assertFileIsWritable(histFile);

    final RScriptExecutor executor = new RScriptExecutor();
    executor.addScript(new Resource(R_SCRIPT, InsertSizeMetricsCollector.class));
    executor.addArgs(
            inputArgs.output,           // text-based metrics file
            histFile.getAbsolutePath(), // PDF graphics file
            inputName                   // input bam file
    );
    if (inputArgs.histogramWidth != null) {
        executor.addArgs(String.valueOf(inputArgs.histogramWidth));
    }
    executor.exec();
}

Source File: BamTagOfTagCounts.java From Drop-seq with MIT License

6 votes

@Override
protected int doWork() {

	IOUtil.assertFileIsReadable(INPUT);
	IOUtil.assertFileIsWritable(OUTPUT);
	PrintStream out = new ErrorCheckingPrintStream(IOUtil.openFileForWriting(OUTPUT));

	writeHeader(out);

	TagOfTagResults<String,String> results= getResults(this.INPUT, this.PRIMARY_TAG, this.SECONDARY_TAG, this.FILTER_PCR_DUPLICATES, this.MINIMUM_MAPPING_QUALITY);

	for (String k: results.getKeys()) {
		Set<String> values = results.getValues(k);
		writeStats(k, values, out);
	}
	out.close();

	return(0);
}

Source File: SamFormatConverter.java From picard with MIT License

6 votes

/**
 * Convert a file from one of sam/bam/cram format to another based on the extension of output.
 *
 * @param input             input file in one of sam/bam/cram format
 * @param output            output to write converted file to, the conversion is based on the extension of this filename
 * @param referenceSequence the reference sequence to use, necessary when reading/writing cram
 * @param createIndex       whether or not an index should be written alongside the output file
 */
public static void convert(final File input, final File output, final File referenceSequence, final Boolean createIndex) {
    IOUtil.assertFileIsReadable(input);
    IOUtil.assertFileIsWritable(output);
    final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(referenceSequence).open(input);
    final SAMFileWriter writer = new SAMFileWriterFactory().makeWriter(reader.getFileHeader(), true, output, referenceSequence);
    if (createIndex && writer.getFileHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
        throw new PicardException("Can't CREATE_INDEX unless sort order is coordinate");
    }

    final ProgressLogger progress = new ProgressLogger(Log.getInstance(SamFormatConverter.class));
    for (final SAMRecord rec : reader) {
        writer.addAlignment(rec);
        progress.record(rec);
    }
    CloserUtil.close(reader);
    writer.close();
}

Source File: ExtractSequences.java From picard with MIT License

5 votes

@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INTERVAL_LIST);
    IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE);
    IOUtil.assertFileIsWritable(OUTPUT);

    final IntervalList intervals = IntervalList.fromFile(INTERVAL_LIST);
    final ReferenceSequenceFile ref = ReferenceSequenceFileFactory.getReferenceSequenceFile(REFERENCE_SEQUENCE);
    SequenceUtil.assertSequenceDictionariesEqual(intervals.getHeader().getSequenceDictionary(), ref.getSequenceDictionary());

    final BufferedWriter out = IOUtil.openFileForBufferedWriting(OUTPUT);

    for (final Interval interval : intervals) {
        final ReferenceSequence seq = ref.getSubsequenceAt(interval.getContig(), interval.getStart(), interval.getEnd());
        final byte[] bases = seq.getBases();
        if (interval.isNegativeStrand()) SequenceUtil.reverseComplement(bases);

        try {
            out.write(">");
            out.write(interval.getName());
            out.write("\n");

            for (int i=0; i<bases.length; ++i) {
                if (i > 0 && i % LINE_LENGTH == 0) out.write("\n");
                out.write(bases[i]);
            }

            out.write("\n");
        }
        catch (IOException ioe) {
            throw new PicardException("Error writing to file " + OUTPUT.getAbsolutePath(), ioe);

        }
    }

    CloserUtil.close(out);

    return 0;
}

Source File: VcfFormatConverter.java From picard with MIT License

5 votes

@Override
protected int doWork() {
    final ProgressLogger progress = new ProgressLogger(LOG, 10000);
    
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);

    final VCFFileReader reader = new VCFFileReader(INPUT, REQUIRE_INDEX);
    final VCFHeader header = new VCFHeader(reader.getFileHeader());
    final SAMSequenceDictionary sequenceDictionary = header.getSequenceDictionary();
    if (CREATE_INDEX && sequenceDictionary == null) {
        throw new PicardException("A sequence dictionary must be available in the input file when creating indexed output.");
    }

    final VariantContextWriterBuilder builder = new VariantContextWriterBuilder()
            .setOutputFile(OUTPUT)
            .setReferenceDictionary(sequenceDictionary);
    if (CREATE_INDEX)
        builder.setOption(Options.INDEX_ON_THE_FLY);
    else
        builder.unsetOption(Options.INDEX_ON_THE_FLY);
    final VariantContextWriter writer = builder.build();
    writer.writeHeader(header);
    final CloseableIterator<VariantContext> iterator = reader.iterator();

    while (iterator.hasNext()) {
        final VariantContext context = iterator.next();
        writer.add(context);
        progress.record(context.getContig(), context.getStart());
    }

    CloserUtil.close(iterator);
    CloserUtil.close(reader);
    writer.close();

    return 0;
}

Source File: CreateVerifyIDIntensityContaminationMetricsFile.java From picard with MIT License

5 votes

@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    final File metricsFile = new File(OUTPUT + "." + FILE_EXTENSION);
    IOUtil.assertFileIsWritable(metricsFile);

    final MetricsFile<VerifyIDIntensityContaminationMetrics, ?> verifyIDIntensityContaminationMetricsFile = getMetricsFile();

    final Pattern HEADER_PATTERN = Pattern.compile("^ID\\s+%Mix\\s+LLK\\s+LLK0\\s*$");
    final Pattern DASHES_PATTERN = Pattern.compile("^[-]+$");
    final Pattern DATA_PATTERN = Pattern.compile("^(\\d+)\\s+([0-9]*\\.?[0-9]+)\\s+([-0-9]*\\.?[0-9]+)\\s+([-0-9]*\\.?[0-9]+)\\s*$");
    try (BufferedReader br = new BufferedReader(new FileReader(INPUT))) {
        String line;
        line = br.readLine();
        lineMatch(line, HEADER_PATTERN);
        line = br.readLine();
        lineMatch(line, DASHES_PATTERN);
        while ((line = br.readLine()) != null) {
            final Matcher m = lineMatch(line, DATA_PATTERN);
            // Load up and store the metrics
            final VerifyIDIntensityContaminationMetrics metrics = new VerifyIDIntensityContaminationMetrics();
            metrics.ID = Integer.parseInt(m.group(1));
            metrics.PCT_MIX = Double.parseDouble(m.group(2));
            metrics.LLK = Double.parseDouble(m.group(3));
            metrics.LLK0 = Double.parseDouble(m.group(4));

            verifyIDIntensityContaminationMetricsFile.addMetric(metrics);
        }
        verifyIDIntensityContaminationMetricsFile.write(metricsFile);
    } catch (IOException e) {
        throw new PicardException("Error parsing VerifyIDIntensity Output", e);
    }
    return 0;
}

Source File: CollectGcBiasMetrics.java From picard with MIT License

5 votes

@Override
protected void setup(final SAMFileHeader header, final File samFile) {
    IOUtil.assertFileIsWritable(CHART_OUTPUT);
    IOUtil.assertFileIsWritable(SUMMARY_OUTPUT);
    IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE);

    //Calculate windowsByGc for the reference sequence
    final int[] windowsByGc = GcBiasUtils.calculateRefWindowsByGc(BINS, REFERENCE_SEQUENCE, SCAN_WINDOW_SIZE);

    //Delegate actual collection to GcBiasMetricCollector
    multiCollector = new GcBiasMetricsCollector(METRIC_ACCUMULATION_LEVEL, windowsByGc, header.getReadGroups(), SCAN_WINDOW_SIZE, IS_BISULFITE_SEQUENCED, ALSO_IGNORE_DUPLICATES);
}

Source File: MeanQualityByCycle.java From picard with MIT License

5 votes

@Override
protected void setup(final SAMFileHeader header, final File samFile) {
    IOUtil.assertFileIsWritable(CHART_OUTPUT);
    // If we're working with a single library, assign that library's name
    // as a suffix to the plot title
    final List<SAMReadGroupRecord> readGroups = header.getReadGroups();
    if (readGroups.size() == 1) {
        plotSubtitle = StringUtil.asEmptyIfNull(readGroups.get(0).getLibrary());
    }
}

Source File: DGEMatrix.java From Drop-seq with MIT License

5 votes

public void writeDropSeqMatrixMarket(final File output, final boolean formatAsInteger, final boolean transpose) {
	try {
		IOUtil.assertFileIsWritable(output);
		final int cardinality;
		if (expressionMatrix instanceof SparseMatrix)
			cardinality = ((SparseMatrix) expressionMatrix).cardinality();
		else
			cardinality = expressionMatrix.columns() * expressionMatrix.columns();
		final MatrixMarketWriter writer = new MatrixMarketWriter(output, MatrixMarketConstants.ElementType.real,
				transpose? expressionMatrix.columns(): expressionMatrix.rows(),
				transpose? expressionMatrix.rows(): expressionMatrix.columns(),
				cardinality, this.getGenes(),this.getCellBarcodes(),
				MatrixMarketConstants.GENES, MatrixMarketConstants.CELL_BARCODES);
		if (expressionMatrix instanceof SparseMatrix) {
               final SparseMatrix mat = (SparseMatrix)expressionMatrix;
               final MatrixIterator it = mat.nonZeroIterator();
               while (it.hasNext()) {
				final double val = it.next();
                   final int row = it.rowIndex();
                   final int col = it.columnIndex();
				writeMatrixMarketTriplet(writer, row, col, val, formatAsInteger, transpose);
               }
           } else
			for (int i = 0; i < expressionMatrix.rows(); ++i)
				for (int j = 0; j < expressionMatrix.columns(); ++j) {
					double exp = expressionMatrix.get(i, j);
					if (exp != SPARSE_VALUE)
						writeMatrixMarketTriplet(writer, i, j, exp, formatAsInteger, transpose);
				}
		writer.close();
	} catch (IOException e) {
		throw new RuntimeException("Trouble writing " + output.getAbsolutePath(), e);
	}
}

Source File: TagReadWithGeneExonFunction.java From Drop-seq with MIT License

5 votes

@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(this.INPUT);
    IOUtil.assertFileIsReadable(this.ANNOTATIONS_FILE);
    if (this.SUMMARY!=null) IOUtil.assertFileIsWritable(this.SUMMARY);
    IOUtil.assertFileIsWritable(this.OUTPUT);

    SamReader inputSam = SamReaderFactory.makeDefault().open(INPUT);

    SAMFileHeader header = inputSam.getFileHeader();
    SamHeaderUtil.addPgRecord(header, this);
    SAMSequenceDictionary bamDict = header.getSequenceDictionary();

    final OverlapDetector<Gene> geneOverlapDetector = GeneAnnotationReader.loadAnnotationsFile(ANNOTATIONS_FILE, bamDict);
    SAMFileWriter writer= new SAMFileWriterFactory().makeSAMOrBAMWriter(header, true, OUTPUT);

    for (SAMRecord r: inputSam) {
        pl.record(r);

        if (!r.getReadUnmappedFlag())
            r=	setAnnotations(r, geneOverlapDetector);
        writer.addAlignment(r);
    }

    CloserUtil.close(inputSam);
    writer.close();
    if (this.USE_STRAND_INFO) log.info(this.metrics.toString());
    if (SUMMARY==null) return 0;

    //process summary
    MetricsFile<ReadTaggingMetric, Integer> outFile = new MetricsFile<>();
    outFile.addMetric(this.metrics);
    outFile.write(this.SUMMARY);
    return 0;

}

Source File: CollectInsertSizeMetrics.java From picard with MIT License

5 votes

@Override protected void setup(final SAMFileHeader header, final File samFile) {
    IOUtil.assertFileIsWritable(OUTPUT);
    IOUtil.assertFileIsWritable(Histogram_FILE);

    //Delegate actual collection to InsertSizeMetricCollector
    multiCollector = new InsertSizeMetricsCollector(METRIC_ACCUMULATION_LEVEL, header.getReadGroups(), MINIMUM_PCT,
            HISTOGRAM_WIDTH, MIN_HISTOGRAM_WIDTH, DEVIATIONS, INCLUDE_DUPLICATES);
}

Source File: GatherVcfsCloud.java From gatk with BSD 3-Clause "New" or "Revised" License

4 votes

@Override
protected Object doWork() {
    log.info("Checking inputs.");
    final List<Path> inputPaths = inputs.stream().map(IOUtils::getPath).collect(Collectors.toList());

    if(!ignoreSafetyChecks) {
        for (final Path f : inputPaths) {
            IOUtil.assertFileIsReadable(f);
        }
    }

    IOUtil.assertFileIsWritable(output);

    final SAMSequenceDictionary sequenceDictionary = getHeader(inputPaths.get(0)).getSequenceDictionary();

    if (createIndex && sequenceDictionary == null) {
        throw new UserException("In order to index the resulting VCF, the input VCFs must contain ##contig lines.");
    }

    if( !ignoreSafetyChecks) {
        log.info("Checking file headers and first records to ensure compatibility.");
        assertSameSamplesAndValidOrdering(inputPaths, disableContigOrderingCheck);
    }

    if(gatherType == GatherType.AUTOMATIC) {
        if ( canBlockCopy(inputPaths, output) ) {
            gatherType = GatherType.BLOCK;
        } else {
            gatherType = GatherType.CONVENTIONAL;
        }
    }

    if (gatherType == GatherType.BLOCK && !canBlockCopy(inputPaths, output)) {
        throw new UserException.BadInput(
                "Requested block copy but some files are not bgzipped, all inputs and the output must be bgzipped to block copy");
    }

    switch (gatherType) {
        case BLOCK:
            log.info("Gathering by copying gzip blocks. Will not be able to validate position non-overlap of files.");
            if (createIndex) {
                log.warn("Index creation not currently supported when gathering block compressed VCFs.");
            }
            gatherWithBlockCopying(inputPaths, output, cloudPrefetchBuffer);
            break;
        case CONVENTIONAL:
            log.info("Gathering by conventional means.");
            gatherConventionally(sequenceDictionary, createIndex, inputPaths, output, cloudPrefetchBuffer, disableContigOrderingCheck);
            break;
        default:
            throw new GATKException.ShouldNeverReachHereException("Invalid gather type: " + gatherType + ".  Please report this bug to the developers.");
    }
    return null;
}

Source File: NonNFastaSize.java From picard with MIT License

4 votes

@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);

    // set up the reference and a mask so that we only count the positions requested by the user
    final ReferenceSequenceFile ref = ReferenceSequenceFileFactory.getReferenceSequenceFile(INPUT);
    final ReferenceSequenceMask referenceSequenceMask;
    if (INTERVALS != null) {
        IOUtil.assertFileIsReadable(INTERVALS);
        final IntervalList intervalList = IntervalList.fromFile(INTERVALS);
        referenceSequenceMask = new IntervalListReferenceSequenceMask(intervalList);
    } else {
        final SAMFileHeader header = new SAMFileHeader();
        header.setSequenceDictionary(ref.getSequenceDictionary());
        referenceSequenceMask = new WholeGenomeReferenceSequenceMask(header);
    }

    long nonNbases = 0L;

    for (final SAMSequenceRecord rec : ref.getSequenceDictionary().getSequences()) {
        // pull out the contig and set up the bases
        final ReferenceSequence sequence = ref.getSequence(rec.getSequenceName());
        final byte[] bases = sequence.getBases();
        StringUtil.toUpperCase(bases);

        for (int i = 0; i < bases.length; i++) {
            // only investigate this position if it's within our mask
            if (referenceSequenceMask.get(sequence.getContigIndex(), i+1)) {
                nonNbases += bases[i] == SequenceUtil.N ? 0 : 1;
            }
        }
    }

    try {
        final BufferedWriter out = IOUtil.openFileForBufferedWriting(OUTPUT);
        out.write(nonNbases + "\n");
        out.close();
    }
    catch (IOException ioe) {
        throw new PicardException("Error writing to file " + OUTPUT.getAbsolutePath(), ioe);
    }

    return 0;
}

Source File: CollectWgsMetrics.java From picard with MIT License

4 votes

@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);
    IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE);
    INTERVALS = intervalArugmentCollection.getIntervalFile();
    if (INTERVALS != null) {
        IOUtil.assertFileIsReadable(INTERVALS);
    }
    if (THEORETICAL_SENSITIVITY_OUTPUT != null) {
        IOUtil.assertFileIsWritable(THEORETICAL_SENSITIVITY_OUTPUT);
    }

    // it doesn't make sense for the locus accumulation cap to be lower than the coverage cap
    if (LOCUS_ACCUMULATION_CAP < COVERAGE_CAP) {
        log.warn("Setting the LOCUS_ACCUMULATION_CAP to be equal to the COVERAGE_CAP (" + COVERAGE_CAP + ") because it should not be lower");
        LOCUS_ACCUMULATION_CAP = COVERAGE_CAP;
    }

    // Setup all the inputs
    final ProgressLogger progress = new ProgressLogger(log, 10000000, "Processed", "loci");
    final ReferenceSequenceFileWalker refWalker = new ReferenceSequenceFileWalker(REFERENCE_SEQUENCE);
    final SamReader in = getSamReader();
    final AbstractLocusIterator iterator = getLocusIterator(in);

    final List<SamRecordFilter> filters = new ArrayList<>();
    final CountingFilter adapterFilter = new CountingAdapterFilter();
    final CountingFilter mapqFilter = new CountingMapQFilter(MINIMUM_MAPPING_QUALITY);
    final CountingFilter dupeFilter = new CountingDuplicateFilter();
    final CountingPairedFilter pairFilter = new CountingPairedFilter();
    // The order in which filters are added matters!
    filters.add(new SecondaryAlignmentFilter()); // Not a counting filter because we never want to count reads twice
    filters.add(adapterFilter);
    filters.add(mapqFilter);
    filters.add(dupeFilter);
    if (!COUNT_UNPAIRED) {
        filters.add(pairFilter);
    }
    iterator.setSamFilters(filters);
    iterator.setMappingQualityScoreCutoff(0); // Handled separately because we want to count bases
    iterator.setIncludeNonPfReads(false);

    final AbstractWgsMetricsCollector<?> collector = getCollector(COVERAGE_CAP, getIntervalsToExamine());
    final WgsMetricsProcessor processor = getWgsMetricsProcessor(progress, refWalker, iterator, collector);
    processor.processFile();

    final MetricsFile<WgsMetrics, Integer> out = getMetricsFile();
    processor.addToMetricsFile(out, INCLUDE_BQ_HISTOGRAM, dupeFilter, adapterFilter, mapqFilter, pairFilter);
    out.write(OUTPUT);

    if (THEORETICAL_SENSITIVITY_OUTPUT != null) {
        // Write out theoretical sensitivity results.
        final MetricsFile<TheoreticalSensitivityMetrics, ?> theoreticalSensitivityMetrics = getMetricsFile();
        log.info("Calculating theoretical sentitivity at " + ALLELE_FRACTION.size() + " allele fractions.");
        List<TheoreticalSensitivityMetrics> tsm = TheoreticalSensitivity.calculateSensitivities(SAMPLE_SIZE, collector.getUnfilteredDepthHistogram(), collector.getUnfilteredBaseQHistogram(), ALLELE_FRACTION);
        theoreticalSensitivityMetrics.addAllMetrics(tsm);
        theoreticalSensitivityMetrics.write(THEORETICAL_SENSITIVITY_OUTPUT);
    }

    return 0;
}

Source File: ComputeUMISharing.java From Drop-seq with MIT License

4 votes

@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);
    // Make sure this is modifiable
    EDIT_DISTANCE = new ArrayList<>(EDIT_DISTANCE);
    while (EDIT_DISTANCE.size() < COUNT_TAG.size()) {
        EDIT_DISTANCE.add(0);
    }
    parentEditDistanceMatcher = new ParentEditDistanceMatcher(this.COUNT_TAG, this.EDIT_DISTANCE, this.FIND_INDELS, this.NUM_THREADS);

    SamReader reader = SamReaderFactory.makeDefault().open(INPUT);
    final ProgressLogger progressLogger = new ProgressLogger(log, 1000000, "Sorting");
    Iterator<SAMRecord> iter = reader.iterator();
    if (LOCUS_FUNCTION_LIST.size() > 0) {
        iter = new GeneFunctionIteratorWrapper(iter, this.GENE_NAME_TAG,
                this.GENE_STRAND_TAG, this.GENE_FUNCTION_TAG, false, this.STRAND_STRATEGY,
                this.LOCUS_FUNCTION_LIST);
    }

    CloseableIterator<SAMRecord> sortedIter = SortingIteratorFactory.create(SAMRecord.class, iter,
            PARENT_CHILD_COMPARATOR, new BAMRecordCodec(reader.getFileHeader()), MAX_RECORDS_IN_RAM,
            (SortingIteratorFactory.ProgressCallback<SAMRecord>) progressLogger::record);

    PeekableIterator<List<SAMRecord>> subgroupIterator =
            new PeekableIterator<List<SAMRecord>>(new GroupingIterator<SAMRecord>(
                    new ProgressLoggingIterator(sortedIter, new ProgressLogger(log, 1000000, "Grouping")),
                    GROUPING_COMPARATOR));

    MetricsFile<UmiSharingMetrics, Integer> outFile = getMetricsFile();
    List<SAMRecord> parentSubgroup = null;
    Set<TagValues> parentTuples = new HashSet<>();

    while (subgroupIterator.hasNext()) {
        if (parentSubgroup == null ||
        !parentSubgroup.get(0).getAttribute(COLLAPSE_TAG).equals(subgroupIterator.peek().get(0).getAttribute(COLLAPSE_TAG))) {
            parentSubgroup = subgroupIterator.next();
            parentTuples = parentEditDistanceMatcher.getValues(parentSubgroup);                
        } else {                
            final List<SAMRecord> childSubgroup = subgroupIterator.next();
            final Set<TagValues> childTuples = parentEditDistanceMatcher.getValues(childSubgroup);                
            final UmiSharingMetrics metrics = new UmiSharingMetrics();
            metrics.PARENT = parentSubgroup.get(0).getAttribute(COLLAPSE_TAG).toString();
            metrics.CHILD = childSubgroup.get(0).getAttribute(UNCOLLAPSED_TAG).toString();
            metrics.NUM_PARENT = parentTuples.size();
            metrics.NUM_CHILD = childTuples.size();
            metrics.NUM_SHARED = parentEditDistanceMatcher.computeNumShared(parentTuples, childTuples);
            metrics.FRAC_SHARED = metrics.NUM_SHARED/(double)metrics.NUM_CHILD;
            outFile.addMetric(metrics);
        }
    }
    BufferedWriter w = IOUtil.openFileForBufferedWriting(OUTPUT);
    outFile.write(w);
    try {
        w.close();
    } catch (IOException e) {
        throw new RuntimeIOException("Problem writing " + OUTPUT.getAbsolutePath(), e);
    }
    CloserUtil.close(reader);
    return 0;
}

Source File: CollapseTagWithContext.java From Drop-seq with MIT License

4 votes

@Override
protected int doWork() {
	int vc = validateCommands();
	if (vc>0) return vc;

	if (this.COUNT_TAGS_EDIT_DISTANCE>0) this.medUMI = new MapBarcodesByEditDistance(false);

	med = new MapBarcodesByEditDistance(false, this.NUM_THREADS, 0);
	
	PrintStream outMetrics = null;
	if (this.ADAPTIVE_ED_METRICS_FILE!=null) {
		outMetrics = new ErrorCheckingPrintStream(IOUtil.openFileForWriting(this.ADAPTIVE_ED_METRICS_FILE));
		writeAdaptiveEditDistanceMetricsHeader(this.ADAPTIVE_ED_METRICS_ED_LIST, outMetrics);
	}
	
	if (this.MUTATIONAL_COLLAPSE_METRICS_FILE!=null) {
		med = new MapBarcodesByEditDistance(true, this.NUM_THREADS, 1000);
		outMetrics = new ErrorCheckingPrintStream(IOUtil.openFileForWriting(this.MUTATIONAL_COLLAPSE_METRICS_FILE));
		writeMutationalCollapseMetricsHeader(this.ADAPTIVE_ED_METRICS_ED_LIST, outMetrics);
	}

	IOUtil.assertFileIsReadable(INPUT);
       IOUtil.assertFileIsWritable(OUTPUT);

       SamReader reader = SamReaderFactory.makeDefault().open(INPUT);
       SAMFileHeader header =  reader.getFileHeader();
       SortOrder sortOrder= header.getSortOrder();
       
       SAMFileWriter writer = getWriter (reader);
       final ObjectSink<SAMRecord> recSink = new SamWriterSink(writer);
               
       PeekableGroupingIterator<SAMRecord> groupingIter = orderReadsByTagsPeekable(reader, this.COLLAPSE_TAG, this.CONTEXT_TAGS, this.READ_MQ, this.OUT_TAG, recSink);

       log.info("Collapsing tag and writing results");

       if (!LOW_MEMORY_MODE) 
       	fasterIteration(groupingIter, writer, outMetrics);
       else
       	lowMemoryIteration(groupingIter, writer, outMetrics, header);
               
       log.info("Re-sorting output BAM in "+ sortOrder.toString()+ " if neccesary");
       CloserUtil.close(groupingIter);
       CloserUtil.close(reader);
       writer.close();
       if (outMetrics!=null) CloserUtil.close(outMetrics);
       log.info("DONE");
	return 0;
}

Source File: MarkIlluminaAdapters.java From picard with MIT License

4 votes

@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(METRICS);

    final SamReader in = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(INPUT);
    final SAMFileHeader.SortOrder order = in.getFileHeader().getSortOrder();
    SAMFileWriter out = null;
    if (OUTPUT != null) {
        IOUtil.assertFileIsWritable(OUTPUT);
        out = new SAMFileWriterFactory().makeSAMOrBAMWriter(in.getFileHeader(), true, OUTPUT);
    }

    final Histogram<Integer> histo = new Histogram<Integer>("clipped_bases", "read_count");

    // Combine any adapters and custom adapter pairs from the command line into an array for use in clipping
    final AdapterPair[] adapters;
    {
        final List<AdapterPair> tmp = new ArrayList<AdapterPair>();
        tmp.addAll(ADAPTERS);
        if (FIVE_PRIME_ADAPTER != null && THREE_PRIME_ADAPTER != null) {
            tmp.add(new CustomAdapterPair(FIVE_PRIME_ADAPTER, THREE_PRIME_ADAPTER));
        }
        adapters = tmp.toArray(new AdapterPair[tmp.size()]);
    }

    ////////////////////////////////////////////////////////////////////////
    // Main loop that consumes reads, clips them and writes them to the output
    ////////////////////////////////////////////////////////////////////////
    final ProgressLogger progress = new ProgressLogger(log, 1000000, "Read");
    final SAMRecordIterator iterator = in.iterator();

    final AdapterMarker adapterMarker = new AdapterMarker(ADAPTER_TRUNCATION_LENGTH, adapters).
            setMaxPairErrorRate(MAX_ERROR_RATE_PE).setMinPairMatchBases(MIN_MATCH_BASES_PE).
            setMaxSingleEndErrorRate(MAX_ERROR_RATE_SE).setMinSingleEndMatchBases(MIN_MATCH_BASES_SE).
            setNumAdaptersToKeep(NUM_ADAPTERS_TO_KEEP).
            setThresholdForSelectingAdaptersToKeep(PRUNE_ADAPTER_LIST_AFTER_THIS_MANY_ADAPTERS_SEEN);

    while (iterator.hasNext()) {
        final SAMRecord rec = iterator.next();
        final SAMRecord rec2 = rec.getReadPairedFlag() && iterator.hasNext() ? iterator.next() : null;
        rec.setAttribute(ReservedTagConstants.XT, null);

        // Do the clipping one way for PE and another for SE reads
        if (rec.getReadPairedFlag()) {
            // Assert that the input file is in query name order only if we see some PE reads
            if (order != SAMFileHeader.SortOrder.queryname) {
                throw new PicardException("Input BAM file must be sorted by queryname");
            }

            if (rec2 == null) throw new PicardException("Missing mate pair for paired read: " + rec.getReadName());
            rec2.setAttribute(ReservedTagConstants.XT, null);

            // Assert that we did in fact just get two mate pairs
            if (!rec.getReadName().equals(rec2.getReadName())) {
                throw new PicardException("Adjacent reads expected to be mate-pairs have different names: " +
                        rec.getReadName() + ", " + rec2.getReadName());
            }

            // establish which of pair is first and which second
            final SAMRecord first, second;

            if (rec.getFirstOfPairFlag() && rec2.getSecondOfPairFlag()) {
                first = rec;
                second = rec2;
            } else if (rec.getSecondOfPairFlag() && rec2.getFirstOfPairFlag()) {
                first = rec2;
                second = rec;
            } else {
                throw new PicardException("Two reads with same name but not correctly marked as 1st/2nd of pair: " + rec.getReadName());
            }

            adapterMarker.adapterTrimIlluminaPairedReads(first, second);
        } else {
            adapterMarker.adapterTrimIlluminaSingleRead(rec);
        }

        // Then output the records, update progress and metrics
        for (final SAMRecord r : new SAMRecord[]{rec, rec2}) {
            if (r != null) {
                progress.record(r);
                if (out != null) out.addAlignment(r);

                final Integer clip = r.getIntegerAttribute(ReservedTagConstants.XT);
                if (clip != null) histo.increment(r.getReadLength() - clip + 1);
            }
        }
    }

    if (out != null) out.close();

    // Lastly output the metrics to file
    final MetricsFile<?, Integer> metricsFile = getMetricsFile();
    metricsFile.setHistogram(histo);
    metricsFile.write(METRICS);

    CloserUtil.close(in);
    return 0;
}

Source File: FilterBam.java From Drop-seq with MIT License

4 votes

@Override
protected int doWork() {
	IOUtil.assertFileIsReadable(INPUT);
	IOUtil.assertFileIsWritable(OUTPUT);
	buildPatterns();

	SamReader in = SamReaderFactory.makeDefault().open(INPUT);

	SAMFileHeader fileHeader = editSequenceDictionary(in.getFileHeader().clone());
	SamHeaderUtil.addPgRecord(fileHeader, this);
	SAMFileWriter out = new SAMFileWriterFactory().makeSAMOrBAMWriter(fileHeader, true, OUTPUT);
	ProgressLogger progLog=new ProgressLogger(log);

	final boolean sequencesRemoved = fileHeader.getSequenceDictionary().getSequences().size() != in.getFileHeader().getSequenceDictionary().getSequences().size();
	
	FilteredReadsMetric m = new FilteredReadsMetric();
	
	for (final SAMRecord r : in) {
		progLog.record(r);
		if (!filterRead(r)) {
               String sequenceName = stripReferencePrefix(r.getReferenceName());
               String mateSequenceName = null;
               if (r.getMateReferenceIndex() != -1)
				mateSequenceName = stripReferencePrefix(r.getMateReferenceName());
		    if (sequencesRemoved || sequenceName != null) {
		        if (sequenceName == null)
					sequenceName = r.getReferenceName();
                   // Even if sequence name has not been edited, if sequences have been removed, need to set
                   // reference name again to invalidate reference index cache.
                   r.setReferenceName(sequenceName);
               }
               if (r.getMateReferenceIndex() != -1 && (sequencesRemoved || mateSequenceName != null)) {
                   if (mateSequenceName == null)
					mateSequenceName = r.getMateReferenceName();
                   // It's possible that the mate was mapped to a reference sequence that has been dropped from
                   // the sequence dictionary.  If so, set the mate to be unmapped.
                   if (fileHeader.getSequenceDictionary().getSequence(mateSequenceName) != null)
					r.setMateReferenceName(mateSequenceName);
				else {
                       r.setMateUnmappedFlag(true);
                       r.setMateReferenceIndex(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX);
                       r.setMateAlignmentStart(0);
                   }
               }
			out.addAlignment(r);
			m.READS_ACCEPTED++;
		} else {
			m.READS_REJECTED++;
		}
	}
	// write the summary if the summary file is not null.
	writeSummary(this.SUMMARY, m);
       CloserUtil.close(in);
       out.close();
       FilterProgramUtils.reportAndCheckFilterResults("reads", m.READS_ACCEPTED, m.READS_REJECTED,
			PASSING_READ_THRESHOLD, log);
	return (0);
}

Java Code Examples for htsjdk.samtools.util.IOUtil#assertFileIsWritable()