Java Code Examples for htsjdk.samtools.SAMRecordIterator#next()
The following examples show how to use
htsjdk.samtools.SAMRecordIterator#next() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: MapBarcodesByEditDistanceTest.java From Drop-seq with MIT License | 6 votes |
private UMISharingData readUMISharingData (File f, String cellBarcode, ParentEditDistanceMatcher matcher) { Map<String, Set<TagValues>> umisPerBarcode = new HashMap<String, Set<TagValues>>(); ObjectCounter<String> barcodes = new ObjectCounter<>(); SamReader reader = SamReaderFactory.makeDefault().open(UmiSharingData); SAMRecordIterator iter = reader.iterator(); while (iter.hasNext()) { SAMRecord r = iter.next(); String barcode = r.getStringAttribute(cellBarcode); TagValues v = matcher.getValues(r); Set<TagValues> s = umisPerBarcode.get(barcode); if (s==null) { s = new HashSet<TagValues>(); umisPerBarcode.put(barcode, s); } if (!s.contains(v)) barcodes.increment(barcode); s.add(v); } UMISharingData result = new UMISharingData(); result.barcodes=barcodes; result.umisPerBarcode=umisPerBarcode; return result; }
Example 2
Source File: MergeBamAlignmentTest.java From picard with MIT License | 6 votes |
@Test(dataProvider="data") public void testSortingOnSamAlignmentMerger(final File unmapped, final File aligned, final boolean sorted, final boolean coordinateSorted, final String testName) throws IOException { final File target = File.createTempFile("target", "bam"); target.deleteOnExit(); final SamAlignmentMerger merger = new SamAlignmentMerger(unmapped, target, fasta, null, true, false, false, Collections.singletonList(aligned), 1, null, null, null, null, null, null, Collections.singletonList(SamPairUtil.PairOrientation.FR), coordinateSorted ? SAMFileHeader.SortOrder.coordinate : SAMFileHeader.SortOrder.queryname, new BestMapqPrimaryAlignmentSelectionStrategy(), false, false, 30); merger.mergeAlignment(Defaults.REFERENCE_FASTA); Assert.assertEquals(sorted, !merger.getForceSort()); final SAMRecordIterator it = SamReaderFactory.makeDefault().open(target).iterator(); int aln = 0; while (it.hasNext()) { final SAMRecord rec = it.next(); if (!rec.getReadUnmappedFlag()) { aln++; } } Assert.assertEquals(aln, 6, "Incorrect number of aligned reads in merged BAM file"); }
Example 3
Source File: FastqToSamTest.java From picard with MIT License | 6 votes |
private void convertFileAndVerifyRecordCount(final int expectedCount, final String fastqFilename1, final String fastqFilename2, final FastqQualityFormat version, final boolean permissiveFormat, final boolean useSequentialFastqs) throws IOException { final File samFile = convertFile(fastqFilename1, fastqFilename2, version, permissiveFormat, useSequentialFastqs); final SamReader samReader = SamReaderFactory.makeDefault().open(samFile); final SAMRecordIterator iterator = samReader.iterator(); int actualCount = 0; while (iterator.hasNext()) { iterator.next(); actualCount++; } samReader.close(); Assert.assertEquals(actualCount, expectedCount); }
Example 4
Source File: TrimStartingSequenceTest.java From Drop-seq with MIT License | 5 votes |
@Test public void testTrimStartingSequenceClp() throws IOException { final TrimStartingSequence clp = new TrimStartingSequence(); clp.INPUT = INPUT; clp.OUTPUT = File.createTempFile("TrimStartingSequenceTest.", ".sam"); clp.OUTPUT.deleteOnExit(); clp.OUTPUT_SUMMARY = File.createTempFile("TrimStartingSequenceTest.", ".summary"); clp.OUTPUT_SUMMARY.deleteOnExit(); clp.SEQUENCE = "AAGCAGTGGTATCAACGCAGAGTGAATGGG"; clp.MISMATCHES=0; clp.NUM_BASES=5; Assert.assertEquals(clp.doWork(), 0); // Confirm that the expected stuff is trimmed. final SamReader outputReader = SamReaderFactory.makeDefault().open(clp.OUTPUT); final SAMRecordIterator outputIterator = outputReader.iterator(); final SamReader inputReader = SamReaderFactory.makeDefault().open(clp.INPUT); for (final SAMRecord inputRec: inputReader) { Assert.assertTrue(outputIterator.hasNext()); final SAMRecord outputRec = outputIterator.next(); final String inputBases = inputRec.getReadString(); final String outputBases = outputRec.getReadString(); Assert.assertTrue(inputBases.endsWith(outputBases)); final String trimmedBases = inputBases.substring(0, inputBases.length() - outputBases.length()); Assert.assertTrue(clp.SEQUENCE.endsWith(trimmedBases)); } Assert.assertFalse(outputIterator.hasNext()); CloserUtil.close(Arrays.asList(outputReader, inputReader)); }
Example 5
Source File: BAMIOITCase.java From dataflow-java with Apache License 2.0 | 5 votes |
@Test public void openBAMTest() throws IOException { GCSOptions popts = PipelineOptionsFactory.create().as(GCSOptions.class); final Storage.Objects storageClient = Transport.newStorageClient(popts).build().objects(); SamReader samReader = BAMIO.openBAM(storageClient, TEST_BAM_FNAME, ValidationStringency.DEFAULT_STRINGENCY); SAMRecordIterator iterator = samReader.query("1", 550000, 560000, false); int readCount = 0; while (iterator.hasNext()) { iterator.next(); readCount++; } Assert.assertEquals("Unexpected count of unmapped reads", EXPECTED_UNMAPPED_READS_COUNT, readCount); }
Example 6
Source File: HeaderInfo.java From dataflow-java with Apache License 2.0 | 5 votes |
public static HeaderInfo getHeaderFromBAMFile(Storage.Objects storage, String BAMPath, Iterable<Contig> explicitlyRequestedContigs) throws IOException { HeaderInfo result = null; // Open and read start of BAM LOG.info("Reading header from " + BAMPath); final SamReader samReader = BAMIO .openBAM(storage, BAMPath, ValidationStringency.DEFAULT_STRINGENCY); final SAMFileHeader header = samReader.getFileHeader(); Contig firstContig = getFirstExplicitContigOrNull(header, explicitlyRequestedContigs); if (firstContig == null) { final SAMSequenceRecord seqRecord = header.getSequence(0); firstContig = new Contig(seqRecord.getSequenceName(), -1, -1); } LOG.info("Reading first chunk of reads from " + BAMPath); final SAMRecordIterator recordIterator = samReader.query( firstContig.referenceName, (int)firstContig.start + 1, (int)firstContig.end + 1, false); Contig firstShard = null; while (recordIterator.hasNext() && result == null) { SAMRecord record = recordIterator.next(); final int alignmentStart = record.getAlignmentStart(); if (firstShard == null && alignmentStart > firstContig.start && (alignmentStart < firstContig.end || firstContig.end == -1)) { firstShard = new Contig(firstContig.referenceName, alignmentStart, alignmentStart); LOG.info("Determined first shard to be " + firstShard); result = new HeaderInfo(header, firstShard); } } recordIterator.close(); samReader.close(); if (result == null) { throw new IOException("Did not find reads for the first contig " + firstContig.toString()); } LOG.info("Finished header reading from " + BAMPath); return result; }
Example 7
Source File: CleanSamTester.java From picard with MIT License | 5 votes |
protected void test() { try { final SamFileValidator validator = new SamFileValidator(new PrintWriter(System.out), 8000); // Validate it has the expected cigar validator.setIgnoreWarnings(true); validator.setVerbose(true, 1000); validator.setErrorsToIgnore(Arrays.asList(SAMValidationError.Type.MISSING_READ_GROUP)); SamReaderFactory factory = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.LENIENT); SamReader samReader = factory.open(getOutput()); final SAMRecordIterator iterator = samReader.iterator(); while (iterator.hasNext()) { final SAMRecord rec = iterator.next(); Assert.assertEquals(rec.getCigarString(), expectedCigar); if (SAMUtils.hasMateCigar(rec)) { Assert.assertEquals(SAMUtils.getMateCigarString(rec), expectedCigar); } } CloserUtil.close(samReader); // Run validation on the output file samReader = factory.open(getOutput()); final boolean validated = validator.validateSamFileVerbose(samReader, null); CloserUtil.close(samReader); Assert.assertTrue(validated, "ValidateSamFile failed"); } finally { IOUtil.recursiveDelete(getOutputDir().toPath()); } }
Example 8
Source File: SetNmMdAndUqTagsTest.java From picard with MIT License | 5 votes |
private void validateUq(final File input, final File reference) { final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(reference).open(input); final SAMRecordIterator iterator = reader.iterator(); while (iterator.hasNext()){ SAMRecord rec = iterator.next(); if (!rec.getReadUnmappedFlag()) Assert.assertNotNull(rec.getAttribute("UQ")); } }
Example 9
Source File: MergeBamAlignmentTest.java From picard with MIT License | 4 votes |
/** * Read a single fragment read from a file, and create one or more aligned records for the read pair based on * the lists, merge with the original read, and assert expected results. * @param description * @param hitSpecs List that describes the aligned SAMRecords to create. * @param expectedPrimaryHitIndex Expected value for the HI tag in the primary alignment in the merged output. * @param expectedNumReads Expected number of SAMRecords in the merged output. * @param expectedPrimaryMapq Mapq of both ends of primary alignment in the merged output. * @throws Exception */ @Test(dataProvider = "testFragmentMultiHitWithFilteringTestCases") public void testFragmentMultiHitWithFiltering(final String description, final List<HitSpec> hitSpecs, final Integer expectedPrimaryHitIndex, final int expectedNumReads, final int expectedPrimaryMapq) throws Exception { // Create the aligned file by copying bases, quals, readname from the unmapped read, and conforming to each HitSpec. final File unmappedSam = new File(TEST_DATA_DIR, "multihit.filter.fragment.unmapped.sam"); final SAMRecordIterator unmappedSamFileIterator = SamReaderFactory.makeDefault().open(unmappedSam).iterator(); final SAMRecord unmappedRec = unmappedSamFileIterator.next(); unmappedSamFileIterator.close(); final File alignedSam = File.createTempFile("aligned.", ".sam"); alignedSam.deleteOnExit(); final SAMFileHeader alignedHeader = new SAMFileHeader(); alignedHeader.setSortOrder(SAMFileHeader.SortOrder.queryname); alignedHeader.setSequenceDictionary(SamReaderFactory.makeDefault().getFileHeader(sequenceDict).getSequenceDictionary()); final SAMFileWriter alignedWriter = new SAMFileWriterFactory().makeSAMWriter(alignedHeader, true, alignedSam); for (int i = 0; i < hitSpecs.size(); ++i) { final HitSpec hitSpec = hitSpecs.get(i); final SAMRecord mappedRec = makeRead(alignedHeader, unmappedRec, hitSpec, i); if (mappedRec != null) { alignedWriter.addAlignment(mappedRec); } } alignedWriter.close(); // Merge aligned file with original unmapped file. final File mergedSam = File.createTempFile("merged.", ".sam"); mergedSam.deleteOnExit(); doMergeAlignment(unmappedSam, Collections.singletonList(alignedSam), null, null, null, null, false, true, false, 1, "0", "1.0", "align!", "myAligner", false, fasta, mergedSam, null, null, null, null, null, null); assertSamValid(mergedSam); // Tally metrics and check for agreement with expected. final SamReader mergedReader = SamReaderFactory.makeDefault().open(mergedSam); int numReads = 0; Integer primaryHitIndex = null; int primaryMapq = 0; for (final SAMRecord rec : mergedReader) { ++numReads; if (!rec.getNotPrimaryAlignmentFlag() && !rec.getReadUnmappedFlag()) { final Integer hitIndex = rec.getIntegerAttribute(SAMTag.HI.name()); final int newHitIndex = (hitIndex == null? -1: hitIndex); Assert.assertNull(primaryHitIndex); primaryHitIndex = newHitIndex; primaryMapq = rec.getMappingQuality(); } } Assert.assertEquals(numReads, expectedNumReads); Assert.assertEquals(primaryHitIndex, expectedPrimaryHitIndex); Assert.assertEquals(primaryMapq, expectedPrimaryMapq); }
Example 10
Source File: MergeBamAlignmentTest.java From picard with MIT License | 4 votes |
/** * Read a single paired-end read from a file, and create one or more aligned records for the read pair based on * the lists, merge with the original paired-end read, and assert expected results. * @param description * @param firstOfPair List that describes the aligned SAMRecords to create for the first end. * @param secondOfPair List that describes the aligned SAMRecords to create for the second end. * @param expectedPrimaryHitIndex Expected value for the HI tag in the primary alignment in the merged output. * @param expectedNumFirst Expected number of first-of-pair SAMRecords in the merged output. * @param expectedNumSecond Expected number of second-of-pair SAMRecords in the merged output. * @param expectedPrimaryMapq Sum of mapqs of both ends of primary alignment in the merged output. * @throws Exception */ @Test(dataProvider = "testPairedMultiHitWithFilteringTestCases") public void testPairedMultiHitWithFiltering(final String description, final List<HitSpec> firstOfPair, final List<HitSpec> secondOfPair, final Integer expectedPrimaryHitIndex, final int expectedNumFirst, final int expectedNumSecond, final int expectedPrimaryMapq) throws Exception { // Create the aligned file by copying bases, quals, readname from the unmapped read, and conforming to each HitSpec. final File unmappedSam = new File(TEST_DATA_DIR, "multihit.filter.unmapped.sam"); final SAMRecordIterator unmappedSamFileIterator = SamReaderFactory.makeDefault().open(unmappedSam).iterator(); final SAMRecord firstUnmappedRec = unmappedSamFileIterator.next(); final SAMRecord secondUnmappedRec = unmappedSamFileIterator.next(); unmappedSamFileIterator.close(); final File alignedSam = File.createTempFile("aligned.", ".sam"); alignedSam.deleteOnExit(); final SAMFileHeader alignedHeader = new SAMFileHeader(); alignedHeader.setSortOrder(SAMFileHeader.SortOrder.queryname); alignedHeader.setSequenceDictionary(SamReaderFactory.makeDefault().getFileHeader(sequenceDict).getSequenceDictionary()); final SAMFileWriter alignedWriter = new SAMFileWriterFactory().makeSAMWriter(alignedHeader, true, alignedSam); for (int i = 0; i < Math.max(firstOfPair.size(), secondOfPair.size()); ++i) { final HitSpec firstHitSpec = firstOfPair.isEmpty()? null: firstOfPair.get(i); final HitSpec secondHitSpec = secondOfPair.isEmpty()? null: secondOfPair.get(i); final SAMRecord first = makeRead(alignedHeader, firstUnmappedRec, firstHitSpec, true, i); final SAMRecord second = makeRead(alignedHeader, secondUnmappedRec, secondHitSpec, false, i); if (first != null && second != null) SamPairUtil.setMateInfo(first, second); if (first != null) { if (second == null) first.setMateUnmappedFlag(true); alignedWriter.addAlignment(first); } if (second != null) { if (first == null) second.setMateUnmappedFlag(true); alignedWriter.addAlignment(second); } } alignedWriter.close(); // Merge aligned file with original unmapped file. final File mergedSam = File.createTempFile("merged.", ".sam"); mergedSam.deleteOnExit(); doMergeAlignment(unmappedSam, Collections.singletonList(alignedSam), null, null, null, null, false, true, false, 1, "0", "1.0", "align!", "myAligner", true, fasta, mergedSam, null, null, null, null, null, null); assertSamValid(mergedSam); // Tally metrics and check for agreement with expected. final SamReader mergedReader = SamReaderFactory.makeDefault().open(mergedSam); int numFirst = 0; int numSecond = 0; Integer primaryHitIndex = null; int primaryMapq = 0; for (final SAMRecord rec : mergedReader) { if (rec.getFirstOfPairFlag()) ++numFirst; if (rec.getSecondOfPairFlag()) ++numSecond; if (!rec.getNotPrimaryAlignmentFlag() && !rec.getReadUnmappedFlag()) { final Integer hitIndex = rec.getIntegerAttribute(SAMTag.HI.name()); final int newHitIndex = (hitIndex == null? -1: hitIndex); if (primaryHitIndex == null) primaryHitIndex = newHitIndex; else Assert.assertEquals(newHitIndex, primaryHitIndex.intValue()); primaryMapq += rec.getMappingQuality(); } } Assert.assertEquals(numFirst, expectedNumFirst); Assert.assertEquals(numSecond, expectedNumSecond); Assert.assertEquals(primaryHitIndex, expectedPrimaryHitIndex); Assert.assertEquals(primaryMapq, expectedPrimaryMapq); }
Example 11
Source File: ReorderSam.java From picard with MIT License | 4 votes |
/** * Helper function that writes reads from iterator it into writer out, updating each SAMRecord along the way * according to the newOrder mapping from dictionary index -> index. Name is used for printing only. */ private void writeReads(final SAMFileWriter out, final SAMRecordIterator it, final Map<Integer, Integer> newOrder, final String name) { long counter = 0; log.info(" Processing " + name); while (it.hasNext()) { counter++; final SAMRecord read = it.next(); final int oldRefIndex = read.getReferenceIndex(); final int oldMateIndex = read.getMateReferenceIndex(); final int newRefIndex = newOrderIndex(read, oldRefIndex, newOrder); read.setHeader(out.getFileHeader()); read.setReferenceIndex(newRefIndex); // read becoming unmapped if (oldRefIndex != NO_ALIGNMENT_REFERENCE_INDEX && newRefIndex == NO_ALIGNMENT_REFERENCE_INDEX) { read.setAlignmentStart(NO_ALIGNMENT_START); read.setReadUnmappedFlag(true); read.setCigarString(SAMRecord.NO_ALIGNMENT_CIGAR); read.setMappingQuality(SAMRecord.NO_MAPPING_QUALITY); } final int newMateIndex = newOrderIndex(read, oldMateIndex, newOrder); if (oldMateIndex != NO_ALIGNMENT_REFERENCE_INDEX && newMateIndex == NO_ALIGNMENT_REFERENCE_INDEX) { // mate becoming unmapped read.setMateAlignmentStart(NO_ALIGNMENT_START); read.setMateUnmappedFlag(true); read.setAttribute(SAMTag.MC.name(), null); // Set the Mate Cigar String to null } read.setMateReferenceIndex(newMateIndex); out.addAlignment(read); } it.close(); log.info("Wrote " + counter + " reads"); }
Example 12
Source File: Reader.java From dataflow-java with Apache License 2.0 | 4 votes |
/** * To compare how sharded reading works vs. plain HTSJDK sequential iteration, * this method implements such iteration. * This makes it easier to discover errors such as reads that are somehow * skipped by a sharded approach. */ public static Iterable<Read> readSequentiallyForTesting(Objects storageClient, String storagePath, Contig contig, ReaderOptions options) throws IOException { Stopwatch timer = Stopwatch.createStarted(); SamReader samReader = BAMIO.openBAM(storageClient, storagePath, options.getStringency()); SAMRecordIterator iterator = samReader.queryOverlapping(contig.referenceName, (int) contig.start + 1, (int) contig.end); List<Read> reads = new ArrayList<Read>(); int recordsBeforeStart = 0; int recordsAfterEnd = 0; int mismatchedSequence = 0; int recordsProcessed = 0; Filter filter = setupFilter(options, contig.referenceName); while (iterator.hasNext()) { SAMRecord record = iterator.next(); final boolean passesFilter = passesFilter(record, filter, contig.referenceName); if (!passesFilter) { mismatchedSequence++; continue; } if (record.getAlignmentStart() < contig.start) { recordsBeforeStart++; continue; } if (record.getAlignmentStart() > contig.end) { recordsAfterEnd++; continue; } reads.add(ReadUtils.makeReadGrpc(record)); recordsProcessed++; } timer.stop(); LOG.info("NON SHARDED: Processed " + recordsProcessed + " in " + timer + ". Speed: " + (recordsProcessed*1000)/timer.elapsed(TimeUnit.MILLISECONDS) + " reads/sec" + ", skipped other sequences " + mismatchedSequence + ", skippedBefore " + recordsBeforeStart + ", skipped after " + recordsAfterEnd); return reads; }
Example 13
Source File: WriteBAIFn.java From dataflow-java with Apache License 2.0 | 4 votes |
@ProcessElement public void processElement(DoFn<String, String>.ProcessContext c) throws Exception { Metrics.counter(WriteBAIFn.class, "Initialized Indexing Shard Count").inc(); Stopwatch stopWatch = Stopwatch.createStarted(); final HeaderInfo header = c.sideInput(headerView); final String bamFilePath = c.sideInput(writtenBAMFilerView); final Iterable<KV<Integer, Long>> sequenceShardSizes = c.sideInput(sequenceShardSizesView); final String sequenceName = c.element(); final int sequenceIndex = header.header.getSequence(sequenceName).getSequenceIndex(); final String baiFilePath = bamFilePath + "-" + String.format("%02d",sequenceIndex) + "-" + sequenceName + ".bai"; long offset = 0; int skippedReferences = 0; long bytesToProcess = 0; for (KV<Integer, Long> sequenceShardSize : sequenceShardSizes) { if (sequenceShardSize.getKey() < sequenceIndex) { offset += sequenceShardSize.getValue(); skippedReferences++; } else if (sequenceShardSize.getKey() == sequenceIndex) { bytesToProcess = sequenceShardSize.getValue(); } } LOG.info("Generating BAI index: " + baiFilePath); LOG.info("Reading BAM file: " + bamFilePath + " for reference " + sequenceName + ", skipping " + skippedReferences + " references at offset " + offset + ", expecting to process " + bytesToProcess + " bytes"); Options options = c.getPipelineOptions().as(Options.class); final Storage.Objects storage = Transport.newStorageClient( options .as(GCSOptions.class)) .build() .objects(); final SamReader reader = BAMIO.openBAM(storage, bamFilePath, ValidationStringency.SILENT, true, offset); final OutputStream outputStream = Channels.newOutputStream( new GcsUtil.GcsUtilFactory().create(options) .create(GcsPath.fromUri(baiFilePath), BAMIO.BAM_INDEX_FILE_MIME_TYPE)); final BAMShardIndexer indexer = new BAMShardIndexer(outputStream, reader.getFileHeader(), sequenceIndex); long processedReads = 0; long skippedReads = 0; // create and write the content if (bytesToProcess > 0) { SAMRecordIterator it = reader.iterator(); boolean foundRecords = false; while (it.hasNext()) { SAMRecord r = it.next(); if (!r.getReferenceName().equals(sequenceName)) { if (foundRecords) { LOG.info("Finishing index building for " + sequenceName + " after processing " + processedReads); break; } skippedReads++; continue; } else if (!foundRecords) { LOG.info("Found records for refrence " + sequenceName + " after skipping " + skippedReads); foundRecords = true; } indexer.processAlignment(r); processedReads++; } it.close(); } else { LOG.info("No records for refrence " + sequenceName + ": writing empty index "); } long noCoordinateReads = indexer.finish(); c.output(baiFilePath); c.output(NO_COORD_READS_COUNT_TAG, noCoordinateReads); LOG.info("Generated " + baiFilePath + ", " + processedReads + " reads, " + noCoordinateReads + " no coordinate reads, " + skippedReads + ", skipped reads"); stopWatch.stop(); Metrics.distribution(WriteBAIFn.class, "Indexing Shard Processing Time (sec)").update( stopWatch.elapsed(TimeUnit.SECONDS)); Metrics.counter(WriteBAIFn.class, "Finished Indexing Shard Count").inc(); Metrics.counter(WriteBAIFn.class, "Indexed reads").inc(processedReads); Metrics.counter(WriteBAIFn.class, "Indexed no coordinate reads").inc(noCoordinateReads); Metrics.distribution(WriteBAIFn.class, "Reads Per Indexing Shard").update(processedReads); }
Example 14
Source File: MarkIlluminaAdapters.java From picard with MIT License | 4 votes |
@Override protected int doWork() { IOUtil.assertFileIsReadable(INPUT); IOUtil.assertFileIsWritable(METRICS); final SamReader in = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(INPUT); final SAMFileHeader.SortOrder order = in.getFileHeader().getSortOrder(); SAMFileWriter out = null; if (OUTPUT != null) { IOUtil.assertFileIsWritable(OUTPUT); out = new SAMFileWriterFactory().makeSAMOrBAMWriter(in.getFileHeader(), true, OUTPUT); } final Histogram<Integer> histo = new Histogram<Integer>("clipped_bases", "read_count"); // Combine any adapters and custom adapter pairs from the command line into an array for use in clipping final AdapterPair[] adapters; { final List<AdapterPair> tmp = new ArrayList<AdapterPair>(); tmp.addAll(ADAPTERS); if (FIVE_PRIME_ADAPTER != null && THREE_PRIME_ADAPTER != null) { tmp.add(new CustomAdapterPair(FIVE_PRIME_ADAPTER, THREE_PRIME_ADAPTER)); } adapters = tmp.toArray(new AdapterPair[tmp.size()]); } //////////////////////////////////////////////////////////////////////// // Main loop that consumes reads, clips them and writes them to the output //////////////////////////////////////////////////////////////////////// final ProgressLogger progress = new ProgressLogger(log, 1000000, "Read"); final SAMRecordIterator iterator = in.iterator(); final AdapterMarker adapterMarker = new AdapterMarker(ADAPTER_TRUNCATION_LENGTH, adapters). setMaxPairErrorRate(MAX_ERROR_RATE_PE).setMinPairMatchBases(MIN_MATCH_BASES_PE). setMaxSingleEndErrorRate(MAX_ERROR_RATE_SE).setMinSingleEndMatchBases(MIN_MATCH_BASES_SE). setNumAdaptersToKeep(NUM_ADAPTERS_TO_KEEP). setThresholdForSelectingAdaptersToKeep(PRUNE_ADAPTER_LIST_AFTER_THIS_MANY_ADAPTERS_SEEN); while (iterator.hasNext()) { final SAMRecord rec = iterator.next(); final SAMRecord rec2 = rec.getReadPairedFlag() && iterator.hasNext() ? iterator.next() : null; rec.setAttribute(ReservedTagConstants.XT, null); // Do the clipping one way for PE and another for SE reads if (rec.getReadPairedFlag()) { // Assert that the input file is in query name order only if we see some PE reads if (order != SAMFileHeader.SortOrder.queryname) { throw new PicardException("Input BAM file must be sorted by queryname"); } if (rec2 == null) throw new PicardException("Missing mate pair for paired read: " + rec.getReadName()); rec2.setAttribute(ReservedTagConstants.XT, null); // Assert that we did in fact just get two mate pairs if (!rec.getReadName().equals(rec2.getReadName())) { throw new PicardException("Adjacent reads expected to be mate-pairs have different names: " + rec.getReadName() + ", " + rec2.getReadName()); } // establish which of pair is first and which second final SAMRecord first, second; if (rec.getFirstOfPairFlag() && rec2.getSecondOfPairFlag()) { first = rec; second = rec2; } else if (rec.getSecondOfPairFlag() && rec2.getFirstOfPairFlag()) { first = rec2; second = rec; } else { throw new PicardException("Two reads with same name but not correctly marked as 1st/2nd of pair: " + rec.getReadName()); } adapterMarker.adapterTrimIlluminaPairedReads(first, second); } else { adapterMarker.adapterTrimIlluminaSingleRead(rec); } // Then output the records, update progress and metrics for (final SAMRecord r : new SAMRecord[]{rec, rec2}) { if (r != null) { progress.record(r); if (out != null) out.addAlignment(r); final Integer clip = r.getIntegerAttribute(ReservedTagConstants.XT); if (clip != null) histo.increment(r.getReadLength() - clip + 1); } } } if (out != null) out.close(); // Lastly output the metrics to file final MetricsFile<?, Integer> metricsFile = getMetricsFile(); metricsFile.setHistogram(histo); metricsFile.write(METRICS); CloserUtil.close(in); return 0; }
Example 15
Source File: ReadGenoAndAsFromIndividual.java From systemsgenetics with GNU General Public License v3.0 | 4 votes |
public static String get_allele_specific_overlap_at_snp(GeneticVariant this_variant, int sample_index, String chromosome, String position, SamReader bam_file){ int pos_int = Integer.parseInt(position); Alleles all_variants = this_variant.getVariantAlleles(); Character ref_allele_char = all_variants.getAllelesAsChars()[0]; String ref_allele = ref_allele_char.toString(); //System.out.println("ref_allele: " + ref_allele); Character alt_allele_char = all_variants.getAllelesAsChars()[1]; String alt_allele = alt_allele_char.toString(); //System.out.println("alt_allele: " + alt_allele); int ref_overlap = 0; int alt_overlap = 0; int no__overlap = 0; // now determine per individual the sample variants. // I'm assuming the ordering is the same as the individual names created // by the getSampleNames() method. // Otherwise the data will be nicely permuted, and I will have to convert some stuff. int position_of_snp = Integer.parseInt(position); //Check to make sure the variant position is not 0. if(position_of_snp <= 0){ System.out.println("A SNP was read with a position lower than 1. This is illegal"); System.out.println("Please adapt your genotype files by removing SNPs with these illegal positions"); System.out.println("\tchr: " + chromosome + " pos: " + position); throw new IllegalDataException("Variant Position was less than 1"); } SAMRecordIterator all_reads_in_region; try{ all_reads_in_region = bam_file.queryOverlapping(chromosome, position_of_snp, position_of_snp); } catch(IllegalArgumentException e){ System.out.println("Found an error when trying the following input:"); System.out.println("chr:\t"+chromosome); System.out.println("pos:\t"+ position); System.out.println("If these values look correct, please make sure your bam file is sorted AND indexed by samtools."); System.out.println("If the problem persists, perhaps the chromosome (or sequence) are not the same in the genotype or bam file"); all_reads_in_region = null; System.exit(0); } String bases = ""; while(all_reads_in_region.hasNext()){ SAMRecord read_in_region = all_reads_in_region.next(); Character base_in_read = get_base_at_position(read_in_region, pos_int); if(GlobalVariables.verbosity >= 100){ System.out.println("base_in_read: " + base_in_read); } if( base_in_read == ref_allele.charAt(0) ){ ref_overlap++; } else if(base_in_read == alt_allele.charAt(0) ){ alt_overlap++; }else if(base_in_read == '!' || base_in_read == 'N'){ continue; }else{ no__overlap++; } } //This line below cost me a day to figure out the error. all_reads_in_region.close(); String string_for_output; string_for_output = Integer.toString(ref_overlap) + "\t" + Integer.toString(alt_overlap) + "\t" + Integer.toString(no__overlap); return(string_for_output); }
Example 16
Source File: Cram2Fastq.java From cramtools with Apache License 2.0 | 4 votes |
@Override public void doRun() throws IOException { super.doRun(); fo.close(); if (fo.empty) return; log.info("Sorting overflow BAM: ", fo.file.length()); SAMFileReader.setDefaultValidationStringency(ValidationStringency.SILENT); SAMFileReader r = new SAMFileReader(fo.file); SAMRecordIterator iterator = r.iterator(); if (!iterator.hasNext()) { r.close(); fo.file.delete(); return; } SAMRecord r1 = iterator.next(); SAMRecord r2 = null; counter = multiFastqOutputter.getCounter(); log.info("Counter=" + counter); while (!brokenPipe.get() && iterator.hasNext()) { r2 = iterator.next(); if (r1.getReadName().equals(r2.getReadName())) { print(r1, r2); counter++; r1 = null; if (!iterator.hasNext()) break; r1 = iterator.next(); r2 = null; } else { print(r1, 0); r1 = r2; r2 = null; counter++; } } if (r1 != null) print(r1, 0); r.close(); fo.file.delete(); }
Example 17
Source File: TestBAMRecordView.java From cramtools with Apache License 2.0 | 4 votes |
@Test public void test() throws IOException { byte[] buf = new byte[1024]; BAMRecordView view = new BAMRecordView(buf); view.setRefID(0); view.setAlignmentStart(77); view.setMappingScore(44); view.setIndexBin(99); view.setFlags(555); view.setMateRefID(0); view.setMateAlStart(78); view.setInsertSize(133); view.setReadName("name1"); view.setCigar(TextCigarCodec.decode("10M")); view.setBases("AAAAAAAAAA".getBytes()); view.setQualityScores("BBBBBBBBBB".getBytes()); int id = 'A' << 16 | 'M' << 8 | 'A'; view.addTag(id, "Q".getBytes(), 0, 1); int len = view.finish(); System.out.println(Arrays.toString(Arrays.copyOf(buf, len))); ByteArrayOutputStream baos = new ByteArrayOutputStream(); SAMFileHeader header = new SAMFileHeader(); header.addSequence(new SAMSequenceRecord("14", 14)); ByteArrayOutputStream baos2 = new ByteArrayOutputStream(); SAMFileWriter writer = new SAMFileWriterFactory().makeBAMWriter(header, true, baos2); SAMRecord record = new SAMRecord(header); record.setReferenceIndex(0); record.setAlignmentStart(1); record.setCigarString("10M"); record.setFlags(555); record.setMappingQuality(44); record.setMateReferenceIndex(0); record.setMateAlignmentStart(0); record.setInferredInsertSize(133); record.setReadName("name1"); record.setReadBases("AAAAAAAAAA".getBytes()); record.setBaseQualities("BBBBBBBBBB".getBytes()); record.setAttribute("AM", 'Q'); System.out.println("BAMFileWriter.addAlignment():"); writer.addAlignment(record); System.out.println("."); writer.close(); System.out.println("------------------------------------------"); System.out.println(); System.out.println(new String(baos2.toByteArray())); System.out.println(); SAMFileReader.setDefaultValidationStringency(ValidationStringency.SILENT); SAMFileReader reader2 = new SAMFileReader(new ByteArrayInputStream(baos2.toByteArray())); SAMRecordIterator iterator = reader2.iterator(); while (iterator.hasNext()) { record = iterator.next(); System.out.println(record.getSAMString()); } System.out.println("------------------------------------------"); BlockCompressedOutputStream bcos = new BlockCompressedOutputStream(baos, null); bcos.write("BAM\1".getBytes()); bcos.write(toByteArray(header)); CramInt.writeInt32(header.getSequenceDictionary().size(), bcos); for (final SAMSequenceRecord sequenceRecord : header.getSequenceDictionary().getSequences()) { byte[] bytes = sequenceRecord.getSequenceName().getBytes(); CramInt.writeInt32(bytes.length + 1, bcos); bcos.write(sequenceRecord.getSequenceName().getBytes()); bcos.write(0); CramInt.writeInt32(sequenceRecord.getSequenceLength(), bcos); } bcos.write(buf, 0, len); bcos.close(); System.out.println(new String(baos.toByteArray())); SAMFileReader reader = new SAMFileReader(new ByteArrayInputStream(baos.toByteArray())); iterator = reader.iterator(); while (iterator.hasNext()) { record = iterator.next(); System.out.println(record.getSAMString()); } reader.close(); }