htsjdk.samtools.util.IOUtil#assertFileIsReadable

Source File: MergePedIntoVcf.java From picard with MIT License

6 votes

@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(PED_FILE);
    IOUtil.assertFileIsReadable(MAP_FILE);
    IOUtil.assertFileIsReadable(ZCALL_THRESHOLDS_FILE);
    IOUtil.assertFileIsWritable(OUTPUT);
    try {
        parseZCallThresholds();
        final ZCallPedFile zCallPedFile = ZCallPedFile.fromFile(PED_FILE, MAP_FILE);
        final VCFFileReader vcfFileReader = new VCFFileReader(ORIGINAL_VCF, false);
        final VCFHeader header = vcfFileReader.getFileHeader();
        if (header.getGenotypeSamples().size() > 1) {
            throw new PicardException("MergePedIntoVCF only works with single-sample VCFs.");
        }
        addAdditionalHeaderFields(header);
        writeVcf(vcfFileReader.iterator(), OUTPUT, vcfFileReader.getFileHeader().getSequenceDictionary(), header, zCallPedFile);
    } catch (Exception e) {
        e.printStackTrace();
    }
    return 0;
}

Source File: BamToBfqWriter.java From picard with MIT License

6 votes

/**
 * Constructor
 *
 * @param bamFile        the BAM file to read from
 * @param outputPrefix   the directory and file prefix for the binary fastq files
 * @param total          the total number of records that should be written, drawn evenly
 *                       from throughout the file (null for all).
 * @param chunk          the maximum number of records that should be written to any one file
 * @param pairedReads    whether these reads are from  a paired-end run
 * @param namePrefix     The string to be stripped off the read name
 *                       before writing to the bfq file. May be null, in which case
 *                       the name will not be trimmed.
 * @param includeNonPfReads whether to include non pf-reads
 * @param clipAdapters    whether to replace adapters as marked with XT:i clipping position attribute
 */
public BamToBfqWriter(final File bamFile, final String outputPrefix, final Integer total,
                      final Integer chunk, final boolean pairedReads, String namePrefix,
                      boolean includeNonPfReads, boolean clipAdapters, Integer basesToWrite) {

    IOUtil.assertFileIsReadable(bamFile);
    this.bamFile = bamFile;
    this.outputPrefix = outputPrefix;
    this.pairedReads = pairedReads;
    if (total != null) {
        final double writeable = (double)countWritableRecords();
        this.increment = (int)Math.floor(writeable/total.doubleValue());
        if (this.increment == 0) {
            this.increment = 1;
        }
    }
    if (chunk != null) {
        this.chunk = chunk;
    }
    this.namePrefix = namePrefix;
    this.nameTrim = namePrefix != null ? namePrefix.length() : 0;
    this.includeNonPfReads = includeNonPfReads;
    this.clipAdapters = clipAdapters;
    this.basesToWrite = basesToWrite;
}

Source File: AddCommentsToBam.java From picard with MIT License

6 votes

protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);

    if (INPUT.getAbsolutePath().endsWith(".sam")) {
        throw new PicardException("SAM files are not supported");
    }

    final SAMFileHeader samFileHeader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).getFileHeader(INPUT);
    for (final String comment : COMMENT) {
        if (comment.contains("\n")) {
            throw new PicardException("Comments can not contain a new line");
        }
        samFileHeader.addComment(comment);
    }

    BamFileIoUtils.reheaderBamFile(samFileHeader, INPUT, OUTPUT, CREATE_MD5_FILE, CREATE_INDEX);

    return 0;
}

Source File: SamFormatConverter.java From picard with MIT License

6 votes

/**
 * Convert a file from one of sam/bam/cram format to another based on the extension of output.
 *
 * @param input             input file in one of sam/bam/cram format
 * @param output            output to write converted file to, the conversion is based on the extension of this filename
 * @param referenceSequence the reference sequence to use, necessary when reading/writing cram
 * @param createIndex       whether or not an index should be written alongside the output file
 */
public static void convert(final File input, final File output, final File referenceSequence, final Boolean createIndex) {
    IOUtil.assertFileIsReadable(input);
    IOUtil.assertFileIsWritable(output);
    final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(referenceSequence).open(input);
    final SAMFileWriter writer = new SAMFileWriterFactory().makeWriter(reader.getFileHeader(), true, output, referenceSequence);
    if (createIndex && writer.getFileHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
        throw new PicardException("Can't CREATE_INDEX unless sort order is coordinate");
    }

    final ProgressLogger progress = new ProgressLogger(Log.getInstance(SamFormatConverter.class));
    for (final SAMRecord rec : reader) {
        writer.addAlignment(rec);
        progress.record(rec);
    }
    CloserUtil.close(reader);
    writer.close();
}

Source File: SortSam.java From picard with MIT License

6 votes

protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);
    final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(INPUT);
    ;
    reader.getFileHeader().setSortOrder(SORT_ORDER.getSortOrder());
    final SAMFileWriter writer = new SAMFileWriterFactory().makeSAMOrBAMWriter(reader.getFileHeader(), false, OUTPUT);
    writer.setProgressLogger(
            new ProgressLogger(log, (int) 1e7, "Wrote", "records from a sorting collection"));

    final ProgressLogger progress = new ProgressLogger(log, (int) 1e7, "Read");
    for (final SAMRecord rec : reader) {
        writer.addAlignment(rec);
        progress.record(rec);
    }

    log.info("Finished reading inputs, merging and writing to output now.");

    CloserUtil.close(reader);
    writer.close();
    return 0;
}

Source File: PicardIndexedFastaSequenceFile.java From chipster with MIT License

6 votes

/**
 * Open the given indexed fasta sequence file.  Throw an exception if the file cannot be opened.
 * @param path The file to open.
 * @param index Pre-built FastaSequenceIndex, for the case in which one does not exist on disk.
 */
public PicardIndexedFastaSequenceFile(final Path path, final FastaSequenceIndex index) {
    super(path);
    if (index == null) throw new IllegalArgumentException("Null index for fasta " + path);
    this.index = index;
    IOUtil.assertFileIsReadable(path);
    try {
        this.channel = Files.newByteChannel(path);
    } catch (IOException e) {
        throw new SAMException("Fasta file should be readable but is not: " + path, e);
    }
    reset();

    if(getSequenceDictionary() != null)
        sanityCheckDictionaryAgainstIndex(path.toAbsolutePath().toString(),sequenceDictionary,index);
}

Source File: CheckTerminatorBlock.java From picard with MIT License

6 votes

@Override protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    try {
        final FileTermination term = BlockCompressedInputStream.checkTermination(INPUT);
        System.err.println(term.name());
        if (term == FileTermination.DEFECTIVE) {
            return 100;
        }
        else {
            return 0;
        }
    }
    catch (IOException ioe) {
        throw new PicardException("Exception reading terminator block of file: " + INPUT.getAbsolutePath());
    }
}

Source File: MMapBackedIteratorFactory.java From picard with MIT License

5 votes

private static void checkFactoryVars(final int headerSize, final File binaryFile) {
    IOUtil.assertFileIsReadable(binaryFile);

    if(headerSize < 0) {
        throw new PicardException("Header size cannot be negative.  HeaderSize(" + headerSize + ") for file " + binaryFile.getAbsolutePath());
    }

    if(headerSize > binaryFile.length()) {
        throw new PicardException("Header size(" + headerSize + ") is greater than file size(" + binaryFile.length() + ") for file " + binaryFile.getAbsolutePath());
    }
}

Source File: SplitReadsIntegrationTest.java From gatk with BSD 3-Clause "New" or "Revised" License

5 votes

private int getReadCounts(final Path tempDirectory, final String baseName, final String fileName) {
    final File path = tempDirectory.resolve(fileName).toFile();
    IOUtil.assertFileIsReadable(path);
    final SamReader in = SamReaderFactory.makeDefault().referenceSequence(new File(getTestDataDir(), getReferenceSequenceName(baseName))).open(path);
    int count = 0;
    for (@SuppressWarnings("unused") final SAMRecord rec : in) {
        count++;
    }
    CloserUtil.close(in);
    return count;
}

Source File: SelectCellsByNumTranscripts.java From Drop-seq with MIT License

5 votes

public static List<String> readBarcodes(final File file) {
    try {
        IOUtil.assertFileIsReadable(file);
        final BufferedReader reader = IOUtil.openFileForBufferedReading(file);
        final List<String> ret = new ArrayList<>();
        String barcode;
        while ((barcode = reader.readLine()) != null)
if (!barcode.isEmpty())
	ret.add(barcode);
        CloserUtil.close(reader);
        return ret;
    } catch (IOException e) {
        throw new RuntimeIOException("Exception reading " + file.getAbsolutePath());
    }
}

Source File: GatherMolecularBarcodeDistributionByGene.java From Drop-seq with MIT License

5 votes

@Override
protected int doWork() {

	IOUtil.assertFileIsReadable(INPUT);
	IOUtil.assertFileIsWritable(OUTPUT);
	BufferedWriter out = IOUtil.openFileForBufferedWriting(OUTPUT);

	writePerTranscriptHeader(out);


	Set<String> cellBarcodes=new HashSet<>(new BarcodeListRetrieval().getCellBarcodes(this.INPUT, this.CELL_BARCODE_TAG, this.MOLECULAR_BARCODE_TAG,
               this.GENE_NAME_TAG, this.GENE_STRAND_TAG, this.GENE_FUNCTION_TAG, this.STRAND_STRATEGY, this.LOCUS_FUNCTION_LIST,
               this.CELL_BC_FILE, this.READ_MQ, this.MIN_NUM_TRANSCRIPTS_PER_CELL,
               this.MIN_NUM_GENES_PER_CELL, this.MIN_NUM_READS_PER_CELL, this.NUM_CORE_BARCODES, this.EDIT_DISTANCE, this.MIN_BC_READ_THRESHOLD));

	UMIIterator umiIterator = new UMIIterator(SamFileMergeUtil.mergeInputs(Collections.singletonList(this.INPUT), false),
			GENE_NAME_TAG, GENE_STRAND_TAG, GENE_FUNCTION_TAG,
       		this.STRAND_STRATEGY, this.LOCUS_FUNCTION_LIST, this.CELL_BARCODE_TAG, this.MOLECULAR_BARCODE_TAG,
       		this.READ_MQ, false, cellBarcodes, true, false);

	UMICollection batch;

	while ((batch=umiIterator.next())!=null)
		if (batch.isEmpty()==false) {
			String cellTag = batch.getCellBarcode();
			if (cellBarcodes.contains(cellTag) || cellBarcodes.isEmpty())
				writePerTranscriptStats (batch.getGeneName(), batch.getCellBarcode(), batch.getMolecularBarcodeCountsCollapsed(this.EDIT_DISTANCE), out);
		}

	CloserUtil.close(umiIterator);

	try {
		out.close();
	} catch (IOException io) {
		throw new TranscriptomeException("Problem writing file", io);
	}

	return 0;
}

Source File: UpdateVcfSequenceDictionary.java From picard with MIT License

5 votes

@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsReadable(SEQUENCE_DICTIONARY);
    IOUtil.assertFileIsWritable(OUTPUT);

    final SAMSequenceDictionary samSequenceDictionary = SAMSequenceDictionaryExtractor.extractDictionary(SEQUENCE_DICTIONARY.toPath());

    final VCFFileReader fileReader = new VCFFileReader(INPUT, false);
    final VCFHeader fileHeader = fileReader.getFileHeader();

    final VariantContextWriterBuilder builder = new VariantContextWriterBuilder()
            .setReferenceDictionary(samSequenceDictionary)
            .clearOptions();
    if (CREATE_INDEX)
        builder.setOption(Options.INDEX_ON_THE_FLY);

    final VariantContextWriter vcfWriter = builder.setOutputFile(OUTPUT).build();
    fileHeader.setSequenceDictionary(samSequenceDictionary);
    vcfWriter.writeHeader(fileHeader);

    final ProgressLogger progress = new ProgressLogger(log, 10000);
    final CloseableIterator<VariantContext> iterator = fileReader.iterator();
    while (iterator.hasNext()) {
        final VariantContext context = iterator.next();
        vcfWriter.add(context);
        progress.record(context.getContig(), context.getStart());
    }

    CloserUtil.close(iterator);
    CloserUtil.close(fileReader);
    vcfWriter.close();

    return 0;
}

Source File: BiasedBarcodeCollectionFactory.java From Drop-seq with MIT License

5 votes

/**
 * Get a list of cell barcodes that can be queried in the BAM.
 * @param inputFiles One or more BAM files
 * @param cellBarcodeFile A file containing a list of cell barcodes, can be null.
 * @param numBarcodes A count of the number of barcodes to extract.  Use if cellBarcodeFile is null.
 * @param cellBarcodeTag The cell barcode tag.  Use if cellBarcodeFile is null.
 * @param readMQ The map quality of reads to use if cellBarcodeFile is null.
 * @return
 */
public List<String> getCellBarcodes (final List<File> inputFiles, final File cellBarcodeFile, final int numBarcodes, final String cellBarcodeTag, final int readMQ) {

	if (cellBarcodeFile!=null) {
		IOUtil.assertFileIsReadable(cellBarcodeFile);
		List<String> cellBarcodes = ParseBarcodeFile.readCellBarcodeFile(cellBarcodeFile);
		log.info("Found " + cellBarcodes.size()+ " cell barcodes in file");
		return (cellBarcodes);
	}
	log.info("Gathering barcodes for the top [" + numBarcodes +"] cells");
       return new BarcodeListRetrieval().getListCellBarcodesByReadCount(
               SamFileMergeUtil.mergeInputs(inputFiles, false, samReaderFactory).iterator,
               cellBarcodeTag, readMQ, null, numBarcodes);
}

Source File: CreateVerifyIDIntensityContaminationMetricsFile.java From picard with MIT License

5 votes

@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    final File metricsFile = new File(OUTPUT + "." + FILE_EXTENSION);
    IOUtil.assertFileIsWritable(metricsFile);

    final MetricsFile<VerifyIDIntensityContaminationMetrics, ?> verifyIDIntensityContaminationMetricsFile = getMetricsFile();

    final Pattern HEADER_PATTERN = Pattern.compile("^ID\\s+%Mix\\s+LLK\\s+LLK0\\s*$");
    final Pattern DASHES_PATTERN = Pattern.compile("^[-]+$");
    final Pattern DATA_PATTERN = Pattern.compile("^(\\d+)\\s+([0-9]*\\.?[0-9]+)\\s+([-0-9]*\\.?[0-9]+)\\s+([-0-9]*\\.?[0-9]+)\\s*$");
    try (BufferedReader br = new BufferedReader(new FileReader(INPUT))) {
        String line;
        line = br.readLine();
        lineMatch(line, HEADER_PATTERN);
        line = br.readLine();
        lineMatch(line, DASHES_PATTERN);
        while ((line = br.readLine()) != null) {
            final Matcher m = lineMatch(line, DATA_PATTERN);
            // Load up and store the metrics
            final VerifyIDIntensityContaminationMetrics metrics = new VerifyIDIntensityContaminationMetrics();
            metrics.ID = Integer.parseInt(m.group(1));
            metrics.PCT_MIX = Double.parseDouble(m.group(2));
            metrics.LLK = Double.parseDouble(m.group(3));
            metrics.LLK0 = Double.parseDouble(m.group(4));

            verifyIDIntensityContaminationMetricsFile.addMetric(metrics);
        }
        verifyIDIntensityContaminationMetricsFile.write(metricsFile);
    } catch (IOException e) {
        throw new PicardException("Error parsing VerifyIDIntensity Output", e);
    }
    return 0;
}

Source File: DetectBeadSynthesisErrors.java From Drop-seq with MIT License

5 votes

@Override
 protected String[] customCommandLineValidation() {
     for (final File input : INPUT)
IOUtil.assertFileIsReadable(input);
     IOUtil.assertFileIsWritable(this.OUTPUT_STATS);
     IOUtil.assertFileIsWritable(this.SUMMARY);
     if (this.REPORT!=null) IOUtil.assertFileIsWritable(this.REPORT);

     if (this.CELL_BC_FILE!=null & this.NUM_BARCODES!=null)
     	throw new TranscriptomeException("Must specify at most one of CELL_BC_FILE or  NUM_BARCODES");

     if (OUTPUT!=null) IOUtil.assertFileIsWritable(this.OUTPUT);
     return super.customCommandLineValidation();
 }

Source File: RevertSam.java From picard with MIT License

4 votes

protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    ValidationUtil.assertWritable(OUTPUT, OUTPUT_BY_READGROUP);

    final boolean sanitizing = SANITIZE;
    final SamReader in = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).validationStringency(VALIDATION_STRINGENCY).open(INPUT);
    final SAMFileHeader inHeader = in.getFileHeader();
    ValidationUtil.validateHeaderOverrides(inHeader, SAMPLE_ALIAS, LIBRARY_NAME);

    ////////////////////////////////////////////////////////////////////////////
    // Build the output writer with an appropriate header based on the options
    ////////////////////////////////////////////////////////////////////////////
    final boolean presorted = isPresorted(inHeader, SORT_ORDER, sanitizing);
    if (SAMPLE_ALIAS != null) overwriteSample(inHeader.getReadGroups(), SAMPLE_ALIAS);
    if (LIBRARY_NAME != null) overwriteLibrary(inHeader.getReadGroups(), LIBRARY_NAME);
    final SAMFileHeader singleOutHeader = createOutHeader(inHeader, SORT_ORDER, REMOVE_ALIGNMENT_INFORMATION);
    inHeader.getReadGroups().forEach(readGroup -> singleOutHeader.addReadGroup(readGroup));

    final Map<String, File> outputMap;
    final Map<String, SAMFileHeader> headerMap;
    if (OUTPUT_BY_READGROUP) {
        if (inHeader.getReadGroups().isEmpty()) {
            throw new PicardException(INPUT + " does not contain Read Groups");
        }

        final String defaultExtension;
        if (OUTPUT_BY_READGROUP_FILE_FORMAT == FileType.dynamic) {
            defaultExtension = getDefaultExtension(INPUT.toString());
        } else {
            defaultExtension = "." + OUTPUT_BY_READGROUP_FILE_FORMAT.toString();
        }

        outputMap = createOutputMap(OUTPUT_MAP, OUTPUT, defaultExtension, inHeader.getReadGroups());
        ValidationUtil.assertAllReadGroupsMapped(outputMap, inHeader.getReadGroups());
        headerMap = createHeaderMap(inHeader, SORT_ORDER, REMOVE_ALIGNMENT_INFORMATION);
    } else {
        outputMap = null;
        headerMap = null;
    }

    if (RESTORE_HARDCLIPS && !REMOVE_ALIGNMENT_INFORMATION) {
        throw new PicardException("Cannot revert sam file when RESTORE_HARDCLIPS is true and REMOVE_ALIGNMENT_INFORMATION is false.");
    }

    final SAMFileWriterFactory factory = new SAMFileWriterFactory();
    final RevertSamWriter out = new RevertSamWriter(OUTPUT_BY_READGROUP, headerMap, outputMap, singleOutHeader, OUTPUT, presorted, factory, REFERENCE_SEQUENCE);

    ////////////////////////////////////////////////////////////////////////////
    // Build a sorting collection to use if we are sanitizing
    ////////////////////////////////////////////////////////////////////////////
    final RevertSamSorter sorter;
    if (sanitizing)
        sorter = new RevertSamSorter(OUTPUT_BY_READGROUP, headerMap, singleOutHeader, MAX_RECORDS_IN_RAM);
    else sorter = null;

    final ProgressLogger progress = new ProgressLogger(log, 1000000, "Reverted");
    for (final SAMRecord rec : in) {
        // Weed out non-primary and supplemental read as we don't want duplicates in the reverted file!
        if (rec.isSecondaryOrSupplementary()) continue;

        // log the progress before you revert because otherwise the "last read position" might not be accurate
        progress.record(rec);

        // Actually do the reverting of the remaining records
        revertSamRecord(rec);

        if (sanitizing) sorter.add(rec);
        else out.addAlignment(rec);
    }
    CloserUtil.close(in);

    ////////////////////////////////////////////////////////////////////////////
    // Now if we're sanitizing, clean up the records and write them to the output
    ////////////////////////////////////////////////////////////////////////////
    if (!sanitizing) {
        out.close();
    } else {
        final Map<SAMReadGroupRecord, FastqQualityFormat> readGroupToFormat;
        try {
            readGroupToFormat = createReadGroupFormatMap(inHeader, REFERENCE_SEQUENCE, VALIDATION_STRINGENCY, INPUT, RESTORE_ORIGINAL_QUALITIES);
        } catch (final PicardException e) {
            log.error(e.getMessage());
            return -1;
        }

        final long[] sanitizeResults = sanitize(readGroupToFormat, sorter, out);
        final long discarded = sanitizeResults[0];
        final long total = sanitizeResults[1];
        out.close();

        final double discardRate = discarded / (double) total;
        final NumberFormat fmt = new DecimalFormat("0.000%");
        log.info("Discarded " + discarded + " out of " + total + " (" + fmt.format(discardRate) + ") reads in order to sanitize output.");

        if (discardRate > MAX_DISCARD_FRACTION) {
            throw new PicardException("Discarded " + fmt.format(discardRate) + " which is above MAX_DISCARD_FRACTION of " + fmt.format(MAX_DISCARD_FRACTION));
        }
    }

    return 0;
}

Source File: GatherVcfsCloud.java From gatk with BSD 3-Clause "New" or "Revised" License

4 votes

@Override
protected Object doWork() {
    log.info("Checking inputs.");
    final List<Path> inputPaths = inputs.stream().map(IOUtils::getPath).collect(Collectors.toList());

    if(!ignoreSafetyChecks) {
        for (final Path f : inputPaths) {
            IOUtil.assertFileIsReadable(f);
        }
    }

    IOUtil.assertFileIsWritable(output);

    final SAMSequenceDictionary sequenceDictionary = getHeader(inputPaths.get(0)).getSequenceDictionary();

    if (createIndex && sequenceDictionary == null) {
        throw new UserException("In order to index the resulting VCF, the input VCFs must contain ##contig lines.");
    }

    if( !ignoreSafetyChecks) {
        log.info("Checking file headers and first records to ensure compatibility.");
        assertSameSamplesAndValidOrdering(inputPaths, disableContigOrderingCheck);
    }

    if(gatherType == GatherType.AUTOMATIC) {
        if ( canBlockCopy(inputPaths, output) ) {
            gatherType = GatherType.BLOCK;
        } else {
            gatherType = GatherType.CONVENTIONAL;
        }
    }

    if (gatherType == GatherType.BLOCK && !canBlockCopy(inputPaths, output)) {
        throw new UserException.BadInput(
                "Requested block copy but some files are not bgzipped, all inputs and the output must be bgzipped to block copy");
    }

    switch (gatherType) {
        case BLOCK:
            log.info("Gathering by copying gzip blocks. Will not be able to validate position non-overlap of files.");
            if (createIndex) {
                log.warn("Index creation not currently supported when gathering block compressed VCFs.");
            }
            gatherWithBlockCopying(inputPaths, output, cloudPrefetchBuffer);
            break;
        case CONVENTIONAL:
            log.info("Gathering by conventional means.");
            gatherConventionally(sequenceDictionary, createIndex, inputPaths, output, cloudPrefetchBuffer, disableContigOrderingCheck);
            break;
        default:
            throw new GATKException.ShouldNeverReachHereException("Invalid gather type: " + gatherType + ".  Please report this bug to the developers.");
    }
    return null;
}

Source File: ReadsPathDataSource.java From gatk with BSD 3-Clause "New" or "Revised" License

4 votes

/**
 * Initialize this data source with multiple SAM/BAM/CRAM files, explicit indices for those files,
 * and a custom SamReaderFactory.
 *
 * @param samPaths paths to SAM/BAM/CRAM files, not null
 * @param samIndices indices for all of the SAM/BAM/CRAM files, in the same order as samPaths. May be null,
 *                   in which case index paths are inferred automatically.
 * @param customSamReaderFactory SamReaderFactory to use, if null a default factory with no reference and validation
 *                               stringency SILENT is used.
 * @param cloudWrapper caching/prefetching wrapper for the data, if on Google Cloud.
 * @param cloudIndexWrapper caching/prefetching wrapper for the index, if on Google Cloud.
 */
public ReadsPathDataSource( final List<Path> samPaths, final List<Path> samIndices,
                           SamReaderFactory customSamReaderFactory,
                           Function<SeekableByteChannel, SeekableByteChannel> cloudWrapper,
                           Function<SeekableByteChannel, SeekableByteChannel> cloudIndexWrapper ) {
    Utils.nonNull(samPaths);
    Utils.nonEmpty(samPaths, "ReadsPathDataSource cannot be created from empty file list");

    if ( samIndices != null && samPaths.size() != samIndices.size() ) {
        throw new UserException(String.format("Must have the same number of BAM/CRAM/SAM paths and indices. Saw %d BAM/CRAM/SAMs but %d indices",
                                              samPaths.size(), samIndices.size()));
    }

    readers = new LinkedHashMap<>(samPaths.size() * 2);
    backingPaths = new LinkedHashMap<>(samPaths.size() * 2);
    indicesAvailable = true;

    final SamReaderFactory samReaderFactory =
            customSamReaderFactory == null ?
                SamReaderFactory.makeDefault().validationStringency(ReadConstants.DEFAULT_READ_VALIDATION_STRINGENCY) :
                customSamReaderFactory;

    int samCount = 0;
    for ( final Path samPath : samPaths ) {
        // Ensure each file can be read
        try {
            IOUtil.assertFileIsReadable(samPath);
        }
        catch ( SAMException|IllegalArgumentException e ) {
            throw new UserException.CouldNotReadInputFile(samPath.toString(), e);
        }

        Function<SeekableByteChannel, SeekableByteChannel> wrapper =
            (BucketUtils.isEligibleForPrefetching(samPath)
                ? cloudWrapper
                : Function.identity());
        // if samIndices==null then we'll guess the index name from the file name.
        // If the file's on the cloud, then the search will only consider locations that are also
        // in the cloud.
        Function<SeekableByteChannel, SeekableByteChannel> indexWrapper =
            ((samIndices != null && BucketUtils.isEligibleForPrefetching(samIndices.get(samCount))
             || (samIndices == null && BucketUtils.isEligibleForPrefetching(samPath)))
                ? cloudIndexWrapper
                : Function.identity());

        SamReader reader;
        if ( samIndices == null ) {
            reader = samReaderFactory.open(samPath, wrapper, indexWrapper);
        }
        else {
            final SamInputResource samResource = SamInputResource.of(samPath, wrapper);
            Path indexPath = samIndices.get(samCount);
            samResource.index(indexPath, indexWrapper);
            reader = samReaderFactory.open(samResource);
        }

        // Ensure that each file has an index
        if ( ! reader.hasIndex() ) {
            indicesAvailable = false;
        }

        readers.put(reader, null);
        backingPaths.put(reader, samPath);
        ++samCount;
    }

    // Prepare a header merger only if we have multiple readers
    headerMerger = samPaths.size() > 1 ? createHeaderMerger() : null;
}

Source File: DownsampleSam.java From picard with MIT License

4 votes

@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);

    // Warn the user if they are running with P=1 or P=0 (which are legal, but odd)
    if (PROBABILITY == 1) {
        log.warn("Running DownsampleSam with PROBABILITY=1! This will likely just recreate the input file.");
    }

    if (PROBABILITY == 0) {
        log.warn("Running DownsampleSam with PROBABILITY=0! This will create an empty file.");
    }

    if (RANDOM_SEED == null) {
        RANDOM_SEED = new Random().nextInt();
        log.warn(String.format(
                "Drawing a random seed because RANDOM_SEED was not set. Set RANDOM_SEED to %s to reproduce these results in the future.", RANDOM_SEED));
    }

    final SamReader in = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(SamInputResource.of(INPUT));
    final SAMFileWriter out = new SAMFileWriterFactory().makeSAMOrBAMWriter(in.getFileHeader(), true, OUTPUT);
    final ProgressLogger progress = new ProgressLogger(log, (int) 1e7, "Wrote");
    final DownsamplingIterator iterator = DownsamplingIteratorFactory.make(in, STRATEGY, PROBABILITY, ACCURACY, RANDOM_SEED);
    final QualityYieldMetricsCollector metricsCollector = new QualityYieldMetricsCollector(true, false, false);

    while (iterator.hasNext()) {
        final SAMRecord rec = iterator.next();
        out.addAlignment(rec);
        if (METRICS_FILE != null) metricsCollector.acceptRecord(rec, null);
        progress.record(rec);
    }

    out.close();
    CloserUtil.close(in);
    final NumberFormat fmt = new DecimalFormat("0.00%");
    log.info("Finished downsampling.");
    log.info("Kept ", iterator.getAcceptedCount(), " out of ", iterator.getSeenCount(), " reads (", fmt.format(iterator.getAcceptedFraction()), ").");

    if (METRICS_FILE != null) {
        final MetricsFile<QualityYieldMetrics, Integer> metricsFile = getMetricsFile();
        metricsCollector.finish();
        metricsCollector.addMetricsToFile(metricsFile);
        metricsFile.write(METRICS_FILE);
    }

    return 0;
}

Source File: MarkIlluminaAdapters.java From picard with MIT License

4 votes

@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(METRICS);

    final SamReader in = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(INPUT);
    final SAMFileHeader.SortOrder order = in.getFileHeader().getSortOrder();
    SAMFileWriter out = null;
    if (OUTPUT != null) {
        IOUtil.assertFileIsWritable(OUTPUT);
        out = new SAMFileWriterFactory().makeSAMOrBAMWriter(in.getFileHeader(), true, OUTPUT);
    }

    final Histogram<Integer> histo = new Histogram<Integer>("clipped_bases", "read_count");

    // Combine any adapters and custom adapter pairs from the command line into an array for use in clipping
    final AdapterPair[] adapters;
    {
        final List<AdapterPair> tmp = new ArrayList<AdapterPair>();
        tmp.addAll(ADAPTERS);
        if (FIVE_PRIME_ADAPTER != null && THREE_PRIME_ADAPTER != null) {
            tmp.add(new CustomAdapterPair(FIVE_PRIME_ADAPTER, THREE_PRIME_ADAPTER));
        }
        adapters = tmp.toArray(new AdapterPair[tmp.size()]);
    }

    ////////////////////////////////////////////////////////////////////////
    // Main loop that consumes reads, clips them and writes them to the output
    ////////////////////////////////////////////////////////////////////////
    final ProgressLogger progress = new ProgressLogger(log, 1000000, "Read");
    final SAMRecordIterator iterator = in.iterator();

    final AdapterMarker adapterMarker = new AdapterMarker(ADAPTER_TRUNCATION_LENGTH, adapters).
            setMaxPairErrorRate(MAX_ERROR_RATE_PE).setMinPairMatchBases(MIN_MATCH_BASES_PE).
            setMaxSingleEndErrorRate(MAX_ERROR_RATE_SE).setMinSingleEndMatchBases(MIN_MATCH_BASES_SE).
            setNumAdaptersToKeep(NUM_ADAPTERS_TO_KEEP).
            setThresholdForSelectingAdaptersToKeep(PRUNE_ADAPTER_LIST_AFTER_THIS_MANY_ADAPTERS_SEEN);

    while (iterator.hasNext()) {
        final SAMRecord rec = iterator.next();
        final SAMRecord rec2 = rec.getReadPairedFlag() && iterator.hasNext() ? iterator.next() : null;
        rec.setAttribute(ReservedTagConstants.XT, null);

        // Do the clipping one way for PE and another for SE reads
        if (rec.getReadPairedFlag()) {
            // Assert that the input file is in query name order only if we see some PE reads
            if (order != SAMFileHeader.SortOrder.queryname) {
                throw new PicardException("Input BAM file must be sorted by queryname");
            }

            if (rec2 == null) throw new PicardException("Missing mate pair for paired read: " + rec.getReadName());
            rec2.setAttribute(ReservedTagConstants.XT, null);

            // Assert that we did in fact just get two mate pairs
            if (!rec.getReadName().equals(rec2.getReadName())) {
                throw new PicardException("Adjacent reads expected to be mate-pairs have different names: " +
                        rec.getReadName() + ", " + rec2.getReadName());
            }

            // establish which of pair is first and which second
            final SAMRecord first, second;

            if (rec.getFirstOfPairFlag() && rec2.getSecondOfPairFlag()) {
                first = rec;
                second = rec2;
            } else if (rec.getSecondOfPairFlag() && rec2.getFirstOfPairFlag()) {
                first = rec2;
                second = rec;
            } else {
                throw new PicardException("Two reads with same name but not correctly marked as 1st/2nd of pair: " + rec.getReadName());
            }

            adapterMarker.adapterTrimIlluminaPairedReads(first, second);
        } else {
            adapterMarker.adapterTrimIlluminaSingleRead(rec);
        }

        // Then output the records, update progress and metrics
        for (final SAMRecord r : new SAMRecord[]{rec, rec2}) {
            if (r != null) {
                progress.record(r);
                if (out != null) out.addAlignment(r);

                final Integer clip = r.getIntegerAttribute(ReservedTagConstants.XT);
                if (clip != null) histo.increment(r.getReadLength() - clip + 1);
            }
        }
    }

    if (out != null) out.close();

    // Lastly output the metrics to file
    final MetricsFile<?, Integer> metricsFile = getMetricsFile();
    metricsFile.setHistogram(histo);
    metricsFile.write(METRICS);

    CloserUtil.close(in);
    return 0;
}

Java Code Examples for htsjdk.samtools.util.IOUtil#assertFileIsReadable()