Java Code Examples for htsjdk.samtools.util.IOUtil#assertFileIsReadable()
The following examples show how to use
htsjdk.samtools.util.IOUtil#assertFileIsReadable() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: MergePedIntoVcf.java From picard with MIT License | 6 votes |
@Override protected int doWork() { IOUtil.assertFileIsReadable(PED_FILE); IOUtil.assertFileIsReadable(MAP_FILE); IOUtil.assertFileIsReadable(ZCALL_THRESHOLDS_FILE); IOUtil.assertFileIsWritable(OUTPUT); try { parseZCallThresholds(); final ZCallPedFile zCallPedFile = ZCallPedFile.fromFile(PED_FILE, MAP_FILE); final VCFFileReader vcfFileReader = new VCFFileReader(ORIGINAL_VCF, false); final VCFHeader header = vcfFileReader.getFileHeader(); if (header.getGenotypeSamples().size() > 1) { throw new PicardException("MergePedIntoVCF only works with single-sample VCFs."); } addAdditionalHeaderFields(header); writeVcf(vcfFileReader.iterator(), OUTPUT, vcfFileReader.getFileHeader().getSequenceDictionary(), header, zCallPedFile); } catch (Exception e) { e.printStackTrace(); } return 0; }
Example 2
Source File: BamToBfqWriter.java From picard with MIT License | 6 votes |
/** * Constructor * * @param bamFile the BAM file to read from * @param outputPrefix the directory and file prefix for the binary fastq files * @param total the total number of records that should be written, drawn evenly * from throughout the file (null for all). * @param chunk the maximum number of records that should be written to any one file * @param pairedReads whether these reads are from a paired-end run * @param namePrefix The string to be stripped off the read name * before writing to the bfq file. May be null, in which case * the name will not be trimmed. * @param includeNonPfReads whether to include non pf-reads * @param clipAdapters whether to replace adapters as marked with XT:i clipping position attribute */ public BamToBfqWriter(final File bamFile, final String outputPrefix, final Integer total, final Integer chunk, final boolean pairedReads, String namePrefix, boolean includeNonPfReads, boolean clipAdapters, Integer basesToWrite) { IOUtil.assertFileIsReadable(bamFile); this.bamFile = bamFile; this.outputPrefix = outputPrefix; this.pairedReads = pairedReads; if (total != null) { final double writeable = (double)countWritableRecords(); this.increment = (int)Math.floor(writeable/total.doubleValue()); if (this.increment == 0) { this.increment = 1; } } if (chunk != null) { this.chunk = chunk; } this.namePrefix = namePrefix; this.nameTrim = namePrefix != null ? namePrefix.length() : 0; this.includeNonPfReads = includeNonPfReads; this.clipAdapters = clipAdapters; this.basesToWrite = basesToWrite; }
Example 3
Source File: AddCommentsToBam.java From picard with MIT License | 6 votes |
protected int doWork() { IOUtil.assertFileIsReadable(INPUT); IOUtil.assertFileIsWritable(OUTPUT); if (INPUT.getAbsolutePath().endsWith(".sam")) { throw new PicardException("SAM files are not supported"); } final SAMFileHeader samFileHeader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).getFileHeader(INPUT); for (final String comment : COMMENT) { if (comment.contains("\n")) { throw new PicardException("Comments can not contain a new line"); } samFileHeader.addComment(comment); } BamFileIoUtils.reheaderBamFile(samFileHeader, INPUT, OUTPUT, CREATE_MD5_FILE, CREATE_INDEX); return 0; }
Example 4
Source File: SamFormatConverter.java From picard with MIT License | 6 votes |
/** * Convert a file from one of sam/bam/cram format to another based on the extension of output. * * @param input input file in one of sam/bam/cram format * @param output output to write converted file to, the conversion is based on the extension of this filename * @param referenceSequence the reference sequence to use, necessary when reading/writing cram * @param createIndex whether or not an index should be written alongside the output file */ public static void convert(final File input, final File output, final File referenceSequence, final Boolean createIndex) { IOUtil.assertFileIsReadable(input); IOUtil.assertFileIsWritable(output); final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(referenceSequence).open(input); final SAMFileWriter writer = new SAMFileWriterFactory().makeWriter(reader.getFileHeader(), true, output, referenceSequence); if (createIndex && writer.getFileHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) { throw new PicardException("Can't CREATE_INDEX unless sort order is coordinate"); } final ProgressLogger progress = new ProgressLogger(Log.getInstance(SamFormatConverter.class)); for (final SAMRecord rec : reader) { writer.addAlignment(rec); progress.record(rec); } CloserUtil.close(reader); writer.close(); }
Example 5
Source File: SortSam.java From picard with MIT License | 6 votes |
protected int doWork() { IOUtil.assertFileIsReadable(INPUT); IOUtil.assertFileIsWritable(OUTPUT); final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(INPUT); ; reader.getFileHeader().setSortOrder(SORT_ORDER.getSortOrder()); final SAMFileWriter writer = new SAMFileWriterFactory().makeSAMOrBAMWriter(reader.getFileHeader(), false, OUTPUT); writer.setProgressLogger( new ProgressLogger(log, (int) 1e7, "Wrote", "records from a sorting collection")); final ProgressLogger progress = new ProgressLogger(log, (int) 1e7, "Read"); for (final SAMRecord rec : reader) { writer.addAlignment(rec); progress.record(rec); } log.info("Finished reading inputs, merging and writing to output now."); CloserUtil.close(reader); writer.close(); return 0; }
Example 6
Source File: PicardIndexedFastaSequenceFile.java From chipster with MIT License | 6 votes |
/** * Open the given indexed fasta sequence file. Throw an exception if the file cannot be opened. * @param path The file to open. * @param index Pre-built FastaSequenceIndex, for the case in which one does not exist on disk. */ public PicardIndexedFastaSequenceFile(final Path path, final FastaSequenceIndex index) { super(path); if (index == null) throw new IllegalArgumentException("Null index for fasta " + path); this.index = index; IOUtil.assertFileIsReadable(path); try { this.channel = Files.newByteChannel(path); } catch (IOException e) { throw new SAMException("Fasta file should be readable but is not: " + path, e); } reset(); if(getSequenceDictionary() != null) sanityCheckDictionaryAgainstIndex(path.toAbsolutePath().toString(),sequenceDictionary,index); }
Example 7
Source File: CheckTerminatorBlock.java From picard with MIT License | 6 votes |
@Override protected int doWork() { IOUtil.assertFileIsReadable(INPUT); try { final FileTermination term = BlockCompressedInputStream.checkTermination(INPUT); System.err.println(term.name()); if (term == FileTermination.DEFECTIVE) { return 100; } else { return 0; } } catch (IOException ioe) { throw new PicardException("Exception reading terminator block of file: " + INPUT.getAbsolutePath()); } }
Example 8
Source File: MMapBackedIteratorFactory.java From picard with MIT License | 5 votes |
private static void checkFactoryVars(final int headerSize, final File binaryFile) { IOUtil.assertFileIsReadable(binaryFile); if(headerSize < 0) { throw new PicardException("Header size cannot be negative. HeaderSize(" + headerSize + ") for file " + binaryFile.getAbsolutePath()); } if(headerSize > binaryFile.length()) { throw new PicardException("Header size(" + headerSize + ") is greater than file size(" + binaryFile.length() + ") for file " + binaryFile.getAbsolutePath()); } }
Example 9
Source File: SplitReadsIntegrationTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
private int getReadCounts(final Path tempDirectory, final String baseName, final String fileName) { final File path = tempDirectory.resolve(fileName).toFile(); IOUtil.assertFileIsReadable(path); final SamReader in = SamReaderFactory.makeDefault().referenceSequence(new File(getTestDataDir(), getReferenceSequenceName(baseName))).open(path); int count = 0; for (@SuppressWarnings("unused") final SAMRecord rec : in) { count++; } CloserUtil.close(in); return count; }
Example 10
Source File: SelectCellsByNumTranscripts.java From Drop-seq with MIT License | 5 votes |
public static List<String> readBarcodes(final File file) { try { IOUtil.assertFileIsReadable(file); final BufferedReader reader = IOUtil.openFileForBufferedReading(file); final List<String> ret = new ArrayList<>(); String barcode; while ((barcode = reader.readLine()) != null) if (!barcode.isEmpty()) ret.add(barcode); CloserUtil.close(reader); return ret; } catch (IOException e) { throw new RuntimeIOException("Exception reading " + file.getAbsolutePath()); } }
Example 11
Source File: GatherMolecularBarcodeDistributionByGene.java From Drop-seq with MIT License | 5 votes |
@Override protected int doWork() { IOUtil.assertFileIsReadable(INPUT); IOUtil.assertFileIsWritable(OUTPUT); BufferedWriter out = IOUtil.openFileForBufferedWriting(OUTPUT); writePerTranscriptHeader(out); Set<String> cellBarcodes=new HashSet<>(new BarcodeListRetrieval().getCellBarcodes(this.INPUT, this.CELL_BARCODE_TAG, this.MOLECULAR_BARCODE_TAG, this.GENE_NAME_TAG, this.GENE_STRAND_TAG, this.GENE_FUNCTION_TAG, this.STRAND_STRATEGY, this.LOCUS_FUNCTION_LIST, this.CELL_BC_FILE, this.READ_MQ, this.MIN_NUM_TRANSCRIPTS_PER_CELL, this.MIN_NUM_GENES_PER_CELL, this.MIN_NUM_READS_PER_CELL, this.NUM_CORE_BARCODES, this.EDIT_DISTANCE, this.MIN_BC_READ_THRESHOLD)); UMIIterator umiIterator = new UMIIterator(SamFileMergeUtil.mergeInputs(Collections.singletonList(this.INPUT), false), GENE_NAME_TAG, GENE_STRAND_TAG, GENE_FUNCTION_TAG, this.STRAND_STRATEGY, this.LOCUS_FUNCTION_LIST, this.CELL_BARCODE_TAG, this.MOLECULAR_BARCODE_TAG, this.READ_MQ, false, cellBarcodes, true, false); UMICollection batch; while ((batch=umiIterator.next())!=null) if (batch.isEmpty()==false) { String cellTag = batch.getCellBarcode(); if (cellBarcodes.contains(cellTag) || cellBarcodes.isEmpty()) writePerTranscriptStats (batch.getGeneName(), batch.getCellBarcode(), batch.getMolecularBarcodeCountsCollapsed(this.EDIT_DISTANCE), out); } CloserUtil.close(umiIterator); try { out.close(); } catch (IOException io) { throw new TranscriptomeException("Problem writing file", io); } return 0; }
Example 12
Source File: UpdateVcfSequenceDictionary.java From picard with MIT License | 5 votes |
@Override protected int doWork() { IOUtil.assertFileIsReadable(INPUT); IOUtil.assertFileIsReadable(SEQUENCE_DICTIONARY); IOUtil.assertFileIsWritable(OUTPUT); final SAMSequenceDictionary samSequenceDictionary = SAMSequenceDictionaryExtractor.extractDictionary(SEQUENCE_DICTIONARY.toPath()); final VCFFileReader fileReader = new VCFFileReader(INPUT, false); final VCFHeader fileHeader = fileReader.getFileHeader(); final VariantContextWriterBuilder builder = new VariantContextWriterBuilder() .setReferenceDictionary(samSequenceDictionary) .clearOptions(); if (CREATE_INDEX) builder.setOption(Options.INDEX_ON_THE_FLY); final VariantContextWriter vcfWriter = builder.setOutputFile(OUTPUT).build(); fileHeader.setSequenceDictionary(samSequenceDictionary); vcfWriter.writeHeader(fileHeader); final ProgressLogger progress = new ProgressLogger(log, 10000); final CloseableIterator<VariantContext> iterator = fileReader.iterator(); while (iterator.hasNext()) { final VariantContext context = iterator.next(); vcfWriter.add(context); progress.record(context.getContig(), context.getStart()); } CloserUtil.close(iterator); CloserUtil.close(fileReader); vcfWriter.close(); return 0; }
Example 13
Source File: BiasedBarcodeCollectionFactory.java From Drop-seq with MIT License | 5 votes |
/** * Get a list of cell barcodes that can be queried in the BAM. * @param inputFiles One or more BAM files * @param cellBarcodeFile A file containing a list of cell barcodes, can be null. * @param numBarcodes A count of the number of barcodes to extract. Use if cellBarcodeFile is null. * @param cellBarcodeTag The cell barcode tag. Use if cellBarcodeFile is null. * @param readMQ The map quality of reads to use if cellBarcodeFile is null. * @return */ public List<String> getCellBarcodes (final List<File> inputFiles, final File cellBarcodeFile, final int numBarcodes, final String cellBarcodeTag, final int readMQ) { if (cellBarcodeFile!=null) { IOUtil.assertFileIsReadable(cellBarcodeFile); List<String> cellBarcodes = ParseBarcodeFile.readCellBarcodeFile(cellBarcodeFile); log.info("Found " + cellBarcodes.size()+ " cell barcodes in file"); return (cellBarcodes); } log.info("Gathering barcodes for the top [" + numBarcodes +"] cells"); return new BarcodeListRetrieval().getListCellBarcodesByReadCount( SamFileMergeUtil.mergeInputs(inputFiles, false, samReaderFactory).iterator, cellBarcodeTag, readMQ, null, numBarcodes); }
Example 14
Source File: CreateVerifyIDIntensityContaminationMetricsFile.java From picard with MIT License | 5 votes |
@Override protected int doWork() { IOUtil.assertFileIsReadable(INPUT); final File metricsFile = new File(OUTPUT + "." + FILE_EXTENSION); IOUtil.assertFileIsWritable(metricsFile); final MetricsFile<VerifyIDIntensityContaminationMetrics, ?> verifyIDIntensityContaminationMetricsFile = getMetricsFile(); final Pattern HEADER_PATTERN = Pattern.compile("^ID\\s+%Mix\\s+LLK\\s+LLK0\\s*$"); final Pattern DASHES_PATTERN = Pattern.compile("^[-]+$"); final Pattern DATA_PATTERN = Pattern.compile("^(\\d+)\\s+([0-9]*\\.?[0-9]+)\\s+([-0-9]*\\.?[0-9]+)\\s+([-0-9]*\\.?[0-9]+)\\s*$"); try (BufferedReader br = new BufferedReader(new FileReader(INPUT))) { String line; line = br.readLine(); lineMatch(line, HEADER_PATTERN); line = br.readLine(); lineMatch(line, DASHES_PATTERN); while ((line = br.readLine()) != null) { final Matcher m = lineMatch(line, DATA_PATTERN); // Load up and store the metrics final VerifyIDIntensityContaminationMetrics metrics = new VerifyIDIntensityContaminationMetrics(); metrics.ID = Integer.parseInt(m.group(1)); metrics.PCT_MIX = Double.parseDouble(m.group(2)); metrics.LLK = Double.parseDouble(m.group(3)); metrics.LLK0 = Double.parseDouble(m.group(4)); verifyIDIntensityContaminationMetricsFile.addMetric(metrics); } verifyIDIntensityContaminationMetricsFile.write(metricsFile); } catch (IOException e) { throw new PicardException("Error parsing VerifyIDIntensity Output", e); } return 0; }
Example 15
Source File: DetectBeadSynthesisErrors.java From Drop-seq with MIT License | 5 votes |
@Override protected String[] customCommandLineValidation() { for (final File input : INPUT) IOUtil.assertFileIsReadable(input); IOUtil.assertFileIsWritable(this.OUTPUT_STATS); IOUtil.assertFileIsWritable(this.SUMMARY); if (this.REPORT!=null) IOUtil.assertFileIsWritable(this.REPORT); if (this.CELL_BC_FILE!=null & this.NUM_BARCODES!=null) throw new TranscriptomeException("Must specify at most one of CELL_BC_FILE or NUM_BARCODES"); if (OUTPUT!=null) IOUtil.assertFileIsWritable(this.OUTPUT); return super.customCommandLineValidation(); }
Example 16
Source File: RevertSam.java From picard with MIT License | 4 votes |
protected int doWork() { IOUtil.assertFileIsReadable(INPUT); ValidationUtil.assertWritable(OUTPUT, OUTPUT_BY_READGROUP); final boolean sanitizing = SANITIZE; final SamReader in = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).validationStringency(VALIDATION_STRINGENCY).open(INPUT); final SAMFileHeader inHeader = in.getFileHeader(); ValidationUtil.validateHeaderOverrides(inHeader, SAMPLE_ALIAS, LIBRARY_NAME); //////////////////////////////////////////////////////////////////////////// // Build the output writer with an appropriate header based on the options //////////////////////////////////////////////////////////////////////////// final boolean presorted = isPresorted(inHeader, SORT_ORDER, sanitizing); if (SAMPLE_ALIAS != null) overwriteSample(inHeader.getReadGroups(), SAMPLE_ALIAS); if (LIBRARY_NAME != null) overwriteLibrary(inHeader.getReadGroups(), LIBRARY_NAME); final SAMFileHeader singleOutHeader = createOutHeader(inHeader, SORT_ORDER, REMOVE_ALIGNMENT_INFORMATION); inHeader.getReadGroups().forEach(readGroup -> singleOutHeader.addReadGroup(readGroup)); final Map<String, File> outputMap; final Map<String, SAMFileHeader> headerMap; if (OUTPUT_BY_READGROUP) { if (inHeader.getReadGroups().isEmpty()) { throw new PicardException(INPUT + " does not contain Read Groups"); } final String defaultExtension; if (OUTPUT_BY_READGROUP_FILE_FORMAT == FileType.dynamic) { defaultExtension = getDefaultExtension(INPUT.toString()); } else { defaultExtension = "." + OUTPUT_BY_READGROUP_FILE_FORMAT.toString(); } outputMap = createOutputMap(OUTPUT_MAP, OUTPUT, defaultExtension, inHeader.getReadGroups()); ValidationUtil.assertAllReadGroupsMapped(outputMap, inHeader.getReadGroups()); headerMap = createHeaderMap(inHeader, SORT_ORDER, REMOVE_ALIGNMENT_INFORMATION); } else { outputMap = null; headerMap = null; } if (RESTORE_HARDCLIPS && !REMOVE_ALIGNMENT_INFORMATION) { throw new PicardException("Cannot revert sam file when RESTORE_HARDCLIPS is true and REMOVE_ALIGNMENT_INFORMATION is false."); } final SAMFileWriterFactory factory = new SAMFileWriterFactory(); final RevertSamWriter out = new RevertSamWriter(OUTPUT_BY_READGROUP, headerMap, outputMap, singleOutHeader, OUTPUT, presorted, factory, REFERENCE_SEQUENCE); //////////////////////////////////////////////////////////////////////////// // Build a sorting collection to use if we are sanitizing //////////////////////////////////////////////////////////////////////////// final RevertSamSorter sorter; if (sanitizing) sorter = new RevertSamSorter(OUTPUT_BY_READGROUP, headerMap, singleOutHeader, MAX_RECORDS_IN_RAM); else sorter = null; final ProgressLogger progress = new ProgressLogger(log, 1000000, "Reverted"); for (final SAMRecord rec : in) { // Weed out non-primary and supplemental read as we don't want duplicates in the reverted file! if (rec.isSecondaryOrSupplementary()) continue; // log the progress before you revert because otherwise the "last read position" might not be accurate progress.record(rec); // Actually do the reverting of the remaining records revertSamRecord(rec); if (sanitizing) sorter.add(rec); else out.addAlignment(rec); } CloserUtil.close(in); //////////////////////////////////////////////////////////////////////////// // Now if we're sanitizing, clean up the records and write them to the output //////////////////////////////////////////////////////////////////////////// if (!sanitizing) { out.close(); } else { final Map<SAMReadGroupRecord, FastqQualityFormat> readGroupToFormat; try { readGroupToFormat = createReadGroupFormatMap(inHeader, REFERENCE_SEQUENCE, VALIDATION_STRINGENCY, INPUT, RESTORE_ORIGINAL_QUALITIES); } catch (final PicardException e) { log.error(e.getMessage()); return -1; } final long[] sanitizeResults = sanitize(readGroupToFormat, sorter, out); final long discarded = sanitizeResults[0]; final long total = sanitizeResults[1]; out.close(); final double discardRate = discarded / (double) total; final NumberFormat fmt = new DecimalFormat("0.000%"); log.info("Discarded " + discarded + " out of " + total + " (" + fmt.format(discardRate) + ") reads in order to sanitize output."); if (discardRate > MAX_DISCARD_FRACTION) { throw new PicardException("Discarded " + fmt.format(discardRate) + " which is above MAX_DISCARD_FRACTION of " + fmt.format(MAX_DISCARD_FRACTION)); } } return 0; }
Example 17
Source File: GatherVcfsCloud.java From gatk with BSD 3-Clause "New" or "Revised" License | 4 votes |
@Override protected Object doWork() { log.info("Checking inputs."); final List<Path> inputPaths = inputs.stream().map(IOUtils::getPath).collect(Collectors.toList()); if(!ignoreSafetyChecks) { for (final Path f : inputPaths) { IOUtil.assertFileIsReadable(f); } } IOUtil.assertFileIsWritable(output); final SAMSequenceDictionary sequenceDictionary = getHeader(inputPaths.get(0)).getSequenceDictionary(); if (createIndex && sequenceDictionary == null) { throw new UserException("In order to index the resulting VCF, the input VCFs must contain ##contig lines."); } if( !ignoreSafetyChecks) { log.info("Checking file headers and first records to ensure compatibility."); assertSameSamplesAndValidOrdering(inputPaths, disableContigOrderingCheck); } if(gatherType == GatherType.AUTOMATIC) { if ( canBlockCopy(inputPaths, output) ) { gatherType = GatherType.BLOCK; } else { gatherType = GatherType.CONVENTIONAL; } } if (gatherType == GatherType.BLOCK && !canBlockCopy(inputPaths, output)) { throw new UserException.BadInput( "Requested block copy but some files are not bgzipped, all inputs and the output must be bgzipped to block copy"); } switch (gatherType) { case BLOCK: log.info("Gathering by copying gzip blocks. Will not be able to validate position non-overlap of files."); if (createIndex) { log.warn("Index creation not currently supported when gathering block compressed VCFs."); } gatherWithBlockCopying(inputPaths, output, cloudPrefetchBuffer); break; case CONVENTIONAL: log.info("Gathering by conventional means."); gatherConventionally(sequenceDictionary, createIndex, inputPaths, output, cloudPrefetchBuffer, disableContigOrderingCheck); break; default: throw new GATKException.ShouldNeverReachHereException("Invalid gather type: " + gatherType + ". Please report this bug to the developers."); } return null; }
Example 18
Source File: ReadsPathDataSource.java From gatk with BSD 3-Clause "New" or "Revised" License | 4 votes |
/** * Initialize this data source with multiple SAM/BAM/CRAM files, explicit indices for those files, * and a custom SamReaderFactory. * * @param samPaths paths to SAM/BAM/CRAM files, not null * @param samIndices indices for all of the SAM/BAM/CRAM files, in the same order as samPaths. May be null, * in which case index paths are inferred automatically. * @param customSamReaderFactory SamReaderFactory to use, if null a default factory with no reference and validation * stringency SILENT is used. * @param cloudWrapper caching/prefetching wrapper for the data, if on Google Cloud. * @param cloudIndexWrapper caching/prefetching wrapper for the index, if on Google Cloud. */ public ReadsPathDataSource( final List<Path> samPaths, final List<Path> samIndices, SamReaderFactory customSamReaderFactory, Function<SeekableByteChannel, SeekableByteChannel> cloudWrapper, Function<SeekableByteChannel, SeekableByteChannel> cloudIndexWrapper ) { Utils.nonNull(samPaths); Utils.nonEmpty(samPaths, "ReadsPathDataSource cannot be created from empty file list"); if ( samIndices != null && samPaths.size() != samIndices.size() ) { throw new UserException(String.format("Must have the same number of BAM/CRAM/SAM paths and indices. Saw %d BAM/CRAM/SAMs but %d indices", samPaths.size(), samIndices.size())); } readers = new LinkedHashMap<>(samPaths.size() * 2); backingPaths = new LinkedHashMap<>(samPaths.size() * 2); indicesAvailable = true; final SamReaderFactory samReaderFactory = customSamReaderFactory == null ? SamReaderFactory.makeDefault().validationStringency(ReadConstants.DEFAULT_READ_VALIDATION_STRINGENCY) : customSamReaderFactory; int samCount = 0; for ( final Path samPath : samPaths ) { // Ensure each file can be read try { IOUtil.assertFileIsReadable(samPath); } catch ( SAMException|IllegalArgumentException e ) { throw new UserException.CouldNotReadInputFile(samPath.toString(), e); } Function<SeekableByteChannel, SeekableByteChannel> wrapper = (BucketUtils.isEligibleForPrefetching(samPath) ? cloudWrapper : Function.identity()); // if samIndices==null then we'll guess the index name from the file name. // If the file's on the cloud, then the search will only consider locations that are also // in the cloud. Function<SeekableByteChannel, SeekableByteChannel> indexWrapper = ((samIndices != null && BucketUtils.isEligibleForPrefetching(samIndices.get(samCount)) || (samIndices == null && BucketUtils.isEligibleForPrefetching(samPath))) ? cloudIndexWrapper : Function.identity()); SamReader reader; if ( samIndices == null ) { reader = samReaderFactory.open(samPath, wrapper, indexWrapper); } else { final SamInputResource samResource = SamInputResource.of(samPath, wrapper); Path indexPath = samIndices.get(samCount); samResource.index(indexPath, indexWrapper); reader = samReaderFactory.open(samResource); } // Ensure that each file has an index if ( ! reader.hasIndex() ) { indicesAvailable = false; } readers.put(reader, null); backingPaths.put(reader, samPath); ++samCount; } // Prepare a header merger only if we have multiple readers headerMerger = samPaths.size() > 1 ? createHeaderMerger() : null; }
Example 19
Source File: DownsampleSam.java From picard with MIT License | 4 votes |
@Override protected int doWork() { IOUtil.assertFileIsReadable(INPUT); IOUtil.assertFileIsWritable(OUTPUT); // Warn the user if they are running with P=1 or P=0 (which are legal, but odd) if (PROBABILITY == 1) { log.warn("Running DownsampleSam with PROBABILITY=1! This will likely just recreate the input file."); } if (PROBABILITY == 0) { log.warn("Running DownsampleSam with PROBABILITY=0! This will create an empty file."); } if (RANDOM_SEED == null) { RANDOM_SEED = new Random().nextInt(); log.warn(String.format( "Drawing a random seed because RANDOM_SEED was not set. Set RANDOM_SEED to %s to reproduce these results in the future.", RANDOM_SEED)); } final SamReader in = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(SamInputResource.of(INPUT)); final SAMFileWriter out = new SAMFileWriterFactory().makeSAMOrBAMWriter(in.getFileHeader(), true, OUTPUT); final ProgressLogger progress = new ProgressLogger(log, (int) 1e7, "Wrote"); final DownsamplingIterator iterator = DownsamplingIteratorFactory.make(in, STRATEGY, PROBABILITY, ACCURACY, RANDOM_SEED); final QualityYieldMetricsCollector metricsCollector = new QualityYieldMetricsCollector(true, false, false); while (iterator.hasNext()) { final SAMRecord rec = iterator.next(); out.addAlignment(rec); if (METRICS_FILE != null) metricsCollector.acceptRecord(rec, null); progress.record(rec); } out.close(); CloserUtil.close(in); final NumberFormat fmt = new DecimalFormat("0.00%"); log.info("Finished downsampling."); log.info("Kept ", iterator.getAcceptedCount(), " out of ", iterator.getSeenCount(), " reads (", fmt.format(iterator.getAcceptedFraction()), ")."); if (METRICS_FILE != null) { final MetricsFile<QualityYieldMetrics, Integer> metricsFile = getMetricsFile(); metricsCollector.finish(); metricsCollector.addMetricsToFile(metricsFile); metricsFile.write(METRICS_FILE); } return 0; }
Example 20
Source File: MarkIlluminaAdapters.java From picard with MIT License | 4 votes |
@Override protected int doWork() { IOUtil.assertFileIsReadable(INPUT); IOUtil.assertFileIsWritable(METRICS); final SamReader in = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(INPUT); final SAMFileHeader.SortOrder order = in.getFileHeader().getSortOrder(); SAMFileWriter out = null; if (OUTPUT != null) { IOUtil.assertFileIsWritable(OUTPUT); out = new SAMFileWriterFactory().makeSAMOrBAMWriter(in.getFileHeader(), true, OUTPUT); } final Histogram<Integer> histo = new Histogram<Integer>("clipped_bases", "read_count"); // Combine any adapters and custom adapter pairs from the command line into an array for use in clipping final AdapterPair[] adapters; { final List<AdapterPair> tmp = new ArrayList<AdapterPair>(); tmp.addAll(ADAPTERS); if (FIVE_PRIME_ADAPTER != null && THREE_PRIME_ADAPTER != null) { tmp.add(new CustomAdapterPair(FIVE_PRIME_ADAPTER, THREE_PRIME_ADAPTER)); } adapters = tmp.toArray(new AdapterPair[tmp.size()]); } //////////////////////////////////////////////////////////////////////// // Main loop that consumes reads, clips them and writes them to the output //////////////////////////////////////////////////////////////////////// final ProgressLogger progress = new ProgressLogger(log, 1000000, "Read"); final SAMRecordIterator iterator = in.iterator(); final AdapterMarker adapterMarker = new AdapterMarker(ADAPTER_TRUNCATION_LENGTH, adapters). setMaxPairErrorRate(MAX_ERROR_RATE_PE).setMinPairMatchBases(MIN_MATCH_BASES_PE). setMaxSingleEndErrorRate(MAX_ERROR_RATE_SE).setMinSingleEndMatchBases(MIN_MATCH_BASES_SE). setNumAdaptersToKeep(NUM_ADAPTERS_TO_KEEP). setThresholdForSelectingAdaptersToKeep(PRUNE_ADAPTER_LIST_AFTER_THIS_MANY_ADAPTERS_SEEN); while (iterator.hasNext()) { final SAMRecord rec = iterator.next(); final SAMRecord rec2 = rec.getReadPairedFlag() && iterator.hasNext() ? iterator.next() : null; rec.setAttribute(ReservedTagConstants.XT, null); // Do the clipping one way for PE and another for SE reads if (rec.getReadPairedFlag()) { // Assert that the input file is in query name order only if we see some PE reads if (order != SAMFileHeader.SortOrder.queryname) { throw new PicardException("Input BAM file must be sorted by queryname"); } if (rec2 == null) throw new PicardException("Missing mate pair for paired read: " + rec.getReadName()); rec2.setAttribute(ReservedTagConstants.XT, null); // Assert that we did in fact just get two mate pairs if (!rec.getReadName().equals(rec2.getReadName())) { throw new PicardException("Adjacent reads expected to be mate-pairs have different names: " + rec.getReadName() + ", " + rec2.getReadName()); } // establish which of pair is first and which second final SAMRecord first, second; if (rec.getFirstOfPairFlag() && rec2.getSecondOfPairFlag()) { first = rec; second = rec2; } else if (rec.getSecondOfPairFlag() && rec2.getFirstOfPairFlag()) { first = rec2; second = rec; } else { throw new PicardException("Two reads with same name but not correctly marked as 1st/2nd of pair: " + rec.getReadName()); } adapterMarker.adapterTrimIlluminaPairedReads(first, second); } else { adapterMarker.adapterTrimIlluminaSingleRead(rec); } // Then output the records, update progress and metrics for (final SAMRecord r : new SAMRecord[]{rec, rec2}) { if (r != null) { progress.record(r); if (out != null) out.addAlignment(r); final Integer clip = r.getIntegerAttribute(ReservedTagConstants.XT); if (clip != null) histo.increment(r.getReadLength() - clip + 1); } } } if (out != null) out.close(); // Lastly output the metrics to file final MetricsFile<?, Integer> metricsFile = getMetricsFile(); metricsFile.setHistogram(histo); metricsFile.write(METRICS); CloserUtil.close(in); return 0; }