htsjdk.samtools.SamReaderFactory Java Examples
The following examples show how to use
htsjdk.samtools.SamReaderFactory.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: CleanSamTest.java From picard with MIT License | 6 votes |
@Test(dataProvider = "testCleanSamDataProvider") public void testCleanSam(final String samFile, final String expectedCigar) throws IOException { final File cleanedFile = File.createTempFile(samFile + ".", ".sam"); cleanedFile.deleteOnExit(); final String[] args = new String[]{ "INPUT=" + new File(TEST_DATA_DIR, samFile).getAbsolutePath(), "OUTPUT=" + cleanedFile.getAbsolutePath() }; Assert.assertEquals(runPicardCommandLine(args), 0); final SamFileValidator validator = new SamFileValidator(new PrintWriter(System.out), 8000); validator.setIgnoreWarnings(true); validator.setVerbose(true, 1000); validator.setErrorsToIgnore(Arrays.asList(SAMValidationError.Type.MISSING_READ_GROUP)); SamReader samReader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.LENIENT).open(cleanedFile); final SAMRecord rec = samReader.iterator().next(); samReader.close(); Assert.assertEquals(rec.getCigarString(), expectedCigar); samReader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.LENIENT).open(cleanedFile); final boolean validated = validator.validateSamFileVerbose(samReader, null); samReader.close(); Assert.assertTrue(validated, "ValidateSamFile failed"); }
Example #2
Source File: GetHetCoverageIntegrationTest.java From gatk-protected with BSD 3-Clause "New" or "Revised" License | 6 votes |
@BeforeClass public void initHeaders() throws IOException { try (final SamReader normalBamReader = SamReaderFactory.makeDefault().open(NORMAL_BAM_FILE); final SamReader tumorBamReader = SamReaderFactory.makeDefault().open(TUMOR_BAM_FILE)) { normalHeader = normalBamReader.getFileHeader(); tumorHeader = tumorBamReader.getFileHeader(); normalHetPulldownExpected = new Pulldown(normalHeader); normalHetPulldownExpected.add(new AllelicCount(new SimpleInterval("1", 11522, 11522), 7, 4)); normalHetPulldownExpected.add(new AllelicCount(new SimpleInterval("1", 12098, 12098), 8, 6)); normalHetPulldownExpected.add(new AllelicCount(new SimpleInterval("1", 14630, 14630), 9, 8)); normalHetPulldownExpected.add(new AllelicCount(new SimpleInterval("2", 14689, 14689), 6, 9)); normalHetPulldownExpected.add(new AllelicCount(new SimpleInterval("2", 14982, 14982), 6, 5)); tumorHetPulldownExpected = new Pulldown(tumorHeader); tumorHetPulldownExpected.add(new AllelicCount(new SimpleInterval("1", 11522, 11522), 7, 4)); tumorHetPulldownExpected.add(new AllelicCount(new SimpleInterval("1", 12098, 12098), 8, 6)); tumorHetPulldownExpected.add(new AllelicCount(new SimpleInterval("1", 14630, 14630), 9, 8)); tumorHetPulldownExpected.add(new AllelicCount(new SimpleInterval("2", 14689, 14689), 6, 9)); tumorHetPulldownExpected.add(new AllelicCount(new SimpleInterval("2", 14982, 14982), 6, 5)); } }
Example #3
Source File: ApplyBQSRIntegrationTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Test public void testAddingPG() throws IOException { final File inFile = new File(resourceDir, "NA12878.oq.read_consumes_zero_ref_bases.bam"); final File outFile = GATKBaseTest.createTempFile("testAddingPG", ".bam"); final String[] args = new String[] { "--input", inFile.getAbsolutePath(), "--" + StandardArgumentDefinitions.BQSR_TABLE_LONG_NAME, resourceDir + "NA12878.oq.gatk4.recal.gz", "--use-original-qualities", "--" + StandardArgumentDefinitions.ADD_OUTPUT_SAM_PROGRAM_RECORD, "--output", outFile.getAbsolutePath() }; runCommandLine(args); //The expected output is actually the same as inputs for this read (this ignores the PGs header) SamAssertionUtils.assertSamsEqual(outFile, inFile); //input has no GATK ApplyBQSR in headers Assert.assertNull(SamReaderFactory.makeDefault().open(inFile).getFileHeader().getProgramRecord("GATK ApplyBQSR")); //output has a GATK ApplyBQSR in headers Assert.assertNotNull(SamReaderFactory.makeDefault().open(outFile).getFileHeader().getProgramRecord("GATK ApplyBQSR")); }
Example #4
Source File: SamBamUtils.java From chipster with MIT License | 6 votes |
public void indexBam(File bamFile, File baiFile) { SAMFileReader.setDefaultValidationStringency(ValidationStringency.SILENT); final SamReader bam; // input from a normal file IOUtil.assertFileIsReadable(bamFile); bam = SamReaderFactory.makeDefault().referenceSequence(null) .enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS) .open(bamFile); if (bam.type() != SamReader.Type.BAM_TYPE) { throw new SAMException("Input file must be bam file, not sam file."); } if (!bam.getFileHeader().getSortOrder().equals(SAMFileHeader.SortOrder.coordinate)) { throw new SAMException("Input bam file must be sorted by coordinate"); } BAMIndexer.createIndex(bam, baiFile); CloserUtil.close(bam); }
Example #5
Source File: ReadsSparkSource.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
/** * Loads the header using Hadoop-BAM. * @param filePathSpecifier path to the bam. * @param referencePathSpecifier Reference path or null if not available. Reference is required for CRAM files. * @return the header for the bam. */ public SAMFileHeader getHeader(final GATKPath filePathSpecifier, final GATKPath referencePathSpecifier) { final GATKPath cramReferencePathSpec = checkCramReference(ctx, filePathSpecifier, referencePathSpecifier); // GCS case if (BucketUtils.isGcsUrl(filePathSpecifier)) { final SamReaderFactory factory = SamReaderFactory.makeDefault() .validationStringency(validationStringency) .referenceSequence(cramReferencePathSpec == null ? null : referencePathSpecifier.toPath()); try (final ReadsDataSource readsDataSource = new ReadsPathDataSource(Collections.singletonList(filePathSpecifier.toPath()), factory)) { return readsDataSource.getHeader(); } } // local file or HDFs case try { return HtsjdkReadsRddStorage.makeDefault(ctx) .validationStringency(validationStringency) .referenceSourcePath(cramReferencePathSpec == null ? null : cramReferencePathSpec.getRawInputString()) .read(filePathSpecifier.getRawInputString()) .getHeader(); } catch (IOException | IllegalArgumentException e) { throw new UserException("Failed to read bam header from " + filePathSpecifier.getRawInputString() + "\n Caused by:" + e.getMessage(), e); } }
Example #6
Source File: TestCRAMInputFormatOnHDFS.java From Hadoop-BAM with MIT License | 6 votes |
@Test public void testReader() throws Exception { int expectedCount = 0; SamReader samReader = SamReaderFactory.makeDefault() .referenceSequence(new File(URI.create(reference))).open(new File(input)); for (SAMRecord r : samReader) { expectedCount++; } CRAMInputFormat inputFormat = new CRAMInputFormat(); List<InputSplit> splits = inputFormat.getSplits(jobContext); assertEquals(1, splits.size()); RecordReader<LongWritable, SAMRecordWritable> reader = inputFormat .createRecordReader(splits.get(0), taskAttemptContext); reader.initialize(splits.get(0), taskAttemptContext); int actualCount = 0; while (reader.nextKeyValue()) { actualCount++; } assertEquals(expectedCount, actualCount); }
Example #7
Source File: SamFormatConverter.java From picard with MIT License | 6 votes |
/** * Convert a file from one of sam/bam/cram format to another based on the extension of output. * * @param input input file in one of sam/bam/cram format * @param output output to write converted file to, the conversion is based on the extension of this filename * @param referenceSequence the reference sequence to use, necessary when reading/writing cram * @param createIndex whether or not an index should be written alongside the output file */ public static void convert(final File input, final File output, final File referenceSequence, final Boolean createIndex) { IOUtil.assertFileIsReadable(input); IOUtil.assertFileIsWritable(output); final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(referenceSequence).open(input); final SAMFileWriter writer = new SAMFileWriterFactory().makeWriter(reader.getFileHeader(), true, output, referenceSequence); if (createIndex && writer.getFileHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) { throw new PicardException("Can't CREATE_INDEX unless sort order is coordinate"); } final ProgressLogger progress = new ProgressLogger(Log.getInstance(SamFormatConverter.class)); for (final SAMRecord rec : reader) { writer.addAlignment(rec); progress.record(rec); } CloserUtil.close(reader); writer.close(); }
Example #8
Source File: PrintReadsIntegrationTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Test public void testNoConflictPG() throws IOException { final File inFile = new File(TEST_DATA_DIR, "print_reads_withPG.sam"); final File outFile = GATKBaseTest.createTempFile("testNoConflictRG", ".sam"); final String[] args = new String[] { "--input" , inFile.getAbsolutePath(), "--" + StandardArgumentDefinitions.ADD_OUTPUT_SAM_PROGRAM_RECORD, "--output", outFile.getAbsolutePath() }; runCommandLine(args); //Make sure contents are the same SamAssertionUtils.assertSamsEqual(outFile, inFile); //input has GATK PrintReads not NOT GATK PrintReads.1 in headers Assert.assertNotNull(SamReaderFactory.makeDefault().open(inFile).getFileHeader().getProgramRecord("GATK PrintReads")); Assert.assertNull(SamReaderFactory.makeDefault().open(inFile).getFileHeader().getProgramRecord("GATK PrintReads.1")); //output has both GATK PrintReads and GATK PrintReads.1 in headers Assert.assertNotNull(SamReaderFactory.makeDefault().open(outFile).getFileHeader().getProgramRecord("GATK PrintReads")); Assert.assertNotNull(SamReaderFactory.makeDefault().open(outFile).getFileHeader().getProgramRecord("GATK PrintReads.1")); }
Example #9
Source File: TagReadWithGeneFunctionTest.java From Drop-seq with MIT License | 6 votes |
@Test public void testTagCodingUTR () { SamReader inputSam = SamReaderFactory.makeDefault().open(testBAMFile); SAMFileHeader header = inputSam.getFileHeader(); SAMSequenceDictionary bamDict = header.getSequenceDictionary(); final OverlapDetector<Gene> geneOverlapDetector = GeneAnnotationReader.loadAnnotationsFile(annotationsFile, bamDict); String recName="H575CBGXY:2:21206:5655:8103"; SAMRecord r = getRecord(testBAMFile, recName); TagReadWithGeneFunction tagger = new TagReadWithGeneFunction(); r=tagger.setAnnotations(r, geneOverlapDetector, false); String geneName = r.getStringAttribute("gn"); String geneStrand = r.getStringAttribute("gs"); String function = r.getStringAttribute("gf"); Assert.assertEquals(geneName, "Elp2"); Assert.assertEquals(geneStrand, "+"); Assert.assertEquals(function, "UTR"); Assert.assertEquals(r.getStringAttribute("XF"), LocusFunction.UTR.name()); }
Example #10
Source File: SAMHeaderReader.java From Hadoop-BAM with MIT License | 6 votes |
/** Does not close the stream. */ public static SAMFileHeader readSAMHeaderFrom( final InputStream in, final Configuration conf) { final ValidationStringency stringency = getValidationStringency(conf); SamReaderFactory readerFactory = SamReaderFactory.makeDefault() .setOption(SamReaderFactory.Option.EAGERLY_DECODE, false) .setUseAsyncIo(false); if (stringency != null) { readerFactory.validationStringency(stringency); } final ReferenceSource refSource = getReferenceSource(conf); if (null != refSource) { readerFactory.referenceSource(refSource); } return readerFactory.open(SamInputResource.of(in)).getFileHeader(); }
Example #11
Source File: AddCommentsToBamTest.java From picard with MIT License | 6 votes |
@Test public void testAddCommentsToBam() throws Exception { final File outputFile = File.createTempFile("addCommentsToBamTest.", BamFileIoUtils.BAM_FILE_EXTENSION); outputFile.deleteOnExit(); runIt(INPUT_FILE, outputFile, commentList); final SAMFileHeader newHeader = SamReaderFactory.makeDefault().getFileHeader(outputFile); // The original comments are massaged when they're added to the header. Perform the same massaging here, // and then compare the lists final List<String> massagedComments = new LinkedList<String>(); for (final String comment : commentList) { massagedComments.add(SAMTextHeaderCodec.COMMENT_PREFIX + comment); } Assert.assertEquals(newHeader.getComments(), massagedComments); outputFile.delete(); }
Example #12
Source File: RevertBaseQualityScoresIntegrationTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
private static boolean hasOriginalQualScores( final File bam ) throws IOException { try ( final SamReader reader = SamReaderFactory.makeDefault().open(bam) ) { for ( SAMRecord read : reader ) { final byte[] originalBaseQualities = read.getOriginalBaseQualities(); Assert.assertNotNull(originalBaseQualities); final byte[] baseQualities = read.getBaseQualities(); Assert.assertEquals(originalBaseQualities.length, baseQualities.length); for (int i = 0; i < originalBaseQualities.length; ++i) { if (originalBaseQualities[i] != baseQualities[i]) { return false; } } } } return true; }
Example #13
Source File: TagReadWithGeneExonFunctionTest.java From Drop-seq with MIT License | 6 votes |
@Test public void testTagCodingUTR () { SamReader inputSam = SamReaderFactory.makeDefault().open(testBAMFile); SAMFileHeader header = inputSam.getFileHeader(); SAMSequenceDictionary bamDict = header.getSequenceDictionary(); final OverlapDetector<Gene> geneOverlapDetector = GeneAnnotationReader.loadAnnotationsFile(annotationsFile, bamDict); String recName="H575CBGXY:2:21206:5655:8103"; SAMRecord r = getRecord(testBAMFile, recName); TagReadWithGeneExonFunction tagger = new TagReadWithGeneExonFunction(); r=tagger.setAnnotations(r, geneOverlapDetector); String geneName = r.getStringAttribute("GE"); String geneStrand = r.getStringAttribute("GS"); Assert.assertEquals(geneName, "Elp2"); Assert.assertEquals(geneStrand, "+"); Assert.assertEquals(r.getStringAttribute("XF"), LocusFunction.UTR.name()); }
Example #14
Source File: MergeSamFilesTest.java From picard with MIT License | 6 votes |
/** * Confirm that unsorted input can result in coordinate sorted output, with index created. */ @Test public void unsortedInputSortedOutputTest() throws Exception { final File unsortedInputTestDataDir = new File(TEST_DATA_DIR, "unsorted_input"); final File mergedOutput = File.createTempFile("unsortedInputSortedOutputTest.", BamFileIoUtils.BAM_FILE_EXTENSION); mergedOutput.deleteOnExit(); final File mergedOutputIndex = new File(mergedOutput.getParent(), IOUtil.basename(mergedOutput)+ BAMIndex.BAI_INDEX_SUFFIX); mergedOutputIndex.deleteOnExit(); final String[] args = { "I=" + new File(unsortedInputTestDataDir, "1.sam").getAbsolutePath(), "I=" + new File(unsortedInputTestDataDir, "2.sam").getAbsolutePath(), "O=" + mergedOutput.getAbsolutePath(), "CREATE_INDEX=true", "SO=coordinate" }; final int mergeExitStatus = runPicardCommandLine(args); Assert.assertEquals(mergeExitStatus, 0); final SamReader reader = SamReaderFactory.makeDefault().open(mergedOutput); Assert.assertEquals(reader.getFileHeader().getSortOrder(), SAMFileHeader.SortOrder.coordinate); Assert.assertTrue(reader.hasIndex()); new ValidateSamTester().assertSamValid(mergedOutput); Assert.assertTrue(mergedOutputIndex.delete()); CloserUtil.close(reader); }
Example #15
Source File: CountBamLinesApplication.java From hmftools with GNU General Public License v3.0 | 6 votes |
private void run() throws IOException, ExecutionException, InterruptedException { LOGGER.info("Reading GC Profile"); final Multimap<Chromosome, GCProfile> gcProfiles = GCProfileFactory.loadGCContent(config.windowSize(), config.gcProfilePath()); final SamReaderFactory readerFactory = readerFactory(config); final CountSupplier countSupplier = new CountSupplier(config.tumor(), config.outputDirectory(), config.windowSize(), config.minMappingQuality(), executorService, readerFactory); final Multimap<Chromosome, CobaltCount> readCounts = countSupplier.fromBam(config.referenceBamPath(), config.tumorBamPath()); final RatioSupplier ratioSupplier = new RatioSupplier(config.reference(), config.tumor(), config.outputDirectory()); final Multimap<Chromosome, CobaltRatio> ratios = ratioSupplier.generateRatios(gcProfiles, readCounts); final String outputFilename = CobaltRatioFile.generateFilenameForWriting(config.outputDirectory(), config.tumor()); LOGGER.info("Persisting cobalt ratios to {}", outputFilename); versionInfo.write(config.outputDirectory()); CobaltRatioFile.write(outputFilename, ratios); new RatioSegmentation(executorService, config.outputDirectory()).applySegmentation(config.reference(), config.tumor()); }
Example #16
Source File: AmberApplication.java From hmftools with GNU General Public License v3.0 | 6 votes |
private void runTumorOnly() throws InterruptedException, ExecutionException, IOException { final SamReaderFactory readerFactory = readerFactory(config); final ListMultimap<Chromosome, BaseDepth> allNormal = emptyNormalHetSites(sites); final ListMultimap<Chromosome, TumorBAF> tumorBAFMap = tumorBAF(readerFactory, allNormal); final List<TumorBAF> tumorBAFList = tumorBAFMap.values() .stream() .filter(x -> x.tumorRefSupport() >= config.tumorOnlyMinSupport()) .filter(x -> x.tumorAltSupport() >= config.tumorOnlyMinSupport()) .filter(x -> isFinite(x.refFrequency()) && Doubles.greaterOrEqual(x.refFrequency(), config.tumorOnlyMinVaf())) .filter(x -> isFinite(x.altFrequency()) && Doubles.greaterOrEqual(x.altFrequency(), config.tumorOnlyMinVaf())) .sorted() .collect(toList()); final List<AmberBAF> amberBAFList = tumorBAFList.stream().map(AmberBAF::create).filter(x -> Double.isFinite(x.tumorBAF())).collect(toList()); persistence.persisQC(amberBAFList, Lists.newArrayList()); persistence.persistVersionInfo(versionInfo); persistence.persistBafVcf(tumorBAFList, new AmberHetNormalEvidence()); persistence.persistBAF(amberBAFList); }
Example #17
Source File: TumorContaminationEvidence.java From hmftools with GNU General Public License v3.0 | 6 votes |
public TumorContaminationEvidence(int typicalReadDepth, int minMappingQuality, int minBaseQuality, final String contig, final String bamFile, final SamReaderFactory samReaderFactory, final List<BaseDepth> baseDepths) { this.bafFactory = new BaseDepthFactory(minBaseQuality); this.contig = contig; this.bamFile = bamFile; this.samReaderFactory = samReaderFactory; this.evidenceMap = Maps.newHashMap(); final List<ModifiableBaseDepth> tumorRecords = Lists.newArrayList(); for (BaseDepth baseDepth : baseDepths) { ModifiableBaseDepth modifiableBaseDepth = BaseDepthFactory.create(baseDepth); evidenceMap.put(baseDepth, modifiableBaseDepth); tumorRecords.add(modifiableBaseDepth); } this.selector = GenomePositionSelectorFactory.create(tumorRecords); final GenomeRegions builder = new GenomeRegions(contig, typicalReadDepth); baseDepths.forEach(x -> builder.addPosition(x.position())); this.supplier = new SAMSlicer(minMappingQuality, builder.build()); }
Example #18
Source File: ReplaceSamHeader.java From picard with MIT License | 6 votes |
/** * Do the work after command line has been parsed. * RuntimeException may be thrown by this method, and are reported appropriately. * * @return program exit status. */ @Override protected int doWork() { IOUtil.assertFileIsReadable(INPUT); IOUtil.assertFileIsReadable(HEADER); IOUtil.assertFileIsWritable(OUTPUT); final SAMFileHeader replacementHeader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).getFileHeader(HEADER); if (BamFileIoUtils.isBamFile(INPUT)) { blockCopyReheader(replacementHeader); } else { standardReheader(replacementHeader); } return 0; }
Example #19
Source File: ReplaceSamHeader.java From picard with MIT License | 6 votes |
private void standardReheader(final SAMFileHeader replacementHeader) { final SamReader recordReader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).validationStringency(ValidationStringency.SILENT).open(INPUT); if (replacementHeader.getSortOrder() != recordReader.getFileHeader().getSortOrder()) { throw new PicardException("Sort orders of INPUT (" + recordReader.getFileHeader().getSortOrder().name() + ") and HEADER (" + replacementHeader.getSortOrder().name() + ") do not agree."); } final SAMFileWriter writer = new SAMFileWriterFactory().makeSAMOrBAMWriter(replacementHeader, true, OUTPUT); final ProgressLogger progress = new ProgressLogger(Log.getInstance(ReplaceSamHeader.class)); for (final SAMRecord rec : recordReader) { rec.setHeader(replacementHeader); writer.addAlignment(rec); progress.record(rec); } writer.close(); CloserUtil.close(recordReader); }
Example #20
Source File: BamSlicerApplication.java From hmftools with GNU General Public License v3.0 | 6 votes |
private static void sliceFromVCF(@NotNull CommandLine cmd) throws IOException { String inputPath = cmd.getOptionValue(INPUT); String vcfPath = cmd.getOptionValue(VCF); int proximity = Integer.parseInt(cmd.getOptionValue(PROXIMITY, "500")); SamReaderFactory readerFactory = createFromCommandLine(cmd); SamReader reader = readerFactory.open(new File(inputPath)); QueryInterval[] intervals = getIntervalsFromVCF(vcfPath, reader.getFileHeader(), proximity); CloseableIterator<SAMRecord> iterator = reader.queryOverlapping(intervals); SAMFileWriter writer = new SAMFileWriterFactory().setCreateIndex(true) .makeBAMWriter(reader.getFileHeader(), true, new File(cmd.getOptionValue(OUTPUT))); writeToSlice(writer, iterator); writer.close(); reader.close(); }
Example #21
Source File: BAMIO.java From dataflow-java with Apache License 2.0 | 6 votes |
private static SamReader openBAMReader(SamInputResource resource, ValidationStringency stringency, boolean includeFileSource, long offset) throws IOException { SamReaderFactory samReaderFactory = SamReaderFactory .makeDefault() .validationStringency(stringency) .enable(SamReaderFactory.Option.CACHE_FILE_BASED_INDEXES); if (includeFileSource) { samReaderFactory.enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS); } if (offset == 0) { return samReaderFactory.open(resource); } LOG.info("Initializing seeking reader with the offset of " + offset); SeekingBAMFileReader primitiveReader = new SeekingBAMFileReader(resource, false, stringency, DefaultSAMRecordFactory.getInstance(), offset); final SeekingReaderAdapter reader = new SeekingReaderAdapter(primitiveReader, resource); samReaderFactory.reapplyOptions(reader); return reader; }
Example #22
Source File: MapBarcodesByEditDistanceTest.java From Drop-seq with MIT License | 6 votes |
private UMISharingData readUMISharingData (File f, String cellBarcode, ParentEditDistanceMatcher matcher) { Map<String, Set<TagValues>> umisPerBarcode = new HashMap<String, Set<TagValues>>(); ObjectCounter<String> barcodes = new ObjectCounter<>(); SamReader reader = SamReaderFactory.makeDefault().open(UmiSharingData); SAMRecordIterator iter = reader.iterator(); while (iter.hasNext()) { SAMRecord r = iter.next(); String barcode = r.getStringAttribute(cellBarcode); TagValues v = matcher.getValues(r); Set<TagValues> s = umisPerBarcode.get(barcode); if (s==null) { s = new HashSet<TagValues>(); umisPerBarcode.put(barcode, s); } if (!s.contains(v)) barcodes.increment(barcode); s.add(v); } UMISharingData result = new UMISharingData(); result.barcodes=barcodes; result.umisPerBarcode=umisPerBarcode; return result; }
Example #23
Source File: GatherBamFilesTest.java From picard with MIT License | 6 votes |
@Test public void sanityCheckTheGathering() throws Exception { final File outputFile = File.createTempFile("gatherBamFilesTest.samFile.", BamFileIoUtils.BAM_FILE_EXTENSION); outputFile.deleteOnExit(); final List<String> args = new ArrayList<String>(); for (final File splitBam : SPLIT_BAMS) { args.add("INPUT=" + splitBam.getAbsolutePath()); } args.add("OUTPUT=" + outputFile); runPicardCommandLine(args); try (final SamReader samReader1 = SamReaderFactory.makeDefault().open(ORIG_BAM); final SamReader samReader2 = SamReaderFactory.makeDefault().open(SPLIT_BAMS.get(0))) { final SamComparison samComparison = new SamComparison(samReader1, samReader2); Assert.assertFalse(samComparison.areEqual()); } }
Example #24
Source File: IntervalTagComparatorTest.java From Drop-seq with MIT License | 6 votes |
/** * Tests ordering of a few SAMRecords without use of a dictionary file. * This means that chromosomes are sorted in natural string ordering. * The difference is illustrated below, assuming that the sequence dictionary file has chromosomes * in the order 1,2,3,4,5,6,7,8,9,10,11....so chromosome 2 has index 2, chromosome 10 has index 10. * > sort (c("2", "10"), decreasing=F) * [1] "10" "2" * > sort (c(2, 10), decreasing=F) * [1] 2 10 */ @Test public void orderRecordsTestWithDictFile () { SamReader inputSam = SamReaderFactory.makeDefault().open(this.dictFile); List<SAMRecord> recs = getRecords(); Integer a = 1; // these start at 0 Integer b = 9; int exp = a.compareTo(b); // when we sort without sequence dictionary, we expect the chr10 tagged read to show up first. IntervalTagComparator c = new IntervalTagComparator(this.intervalTag, inputSam.getFileHeader().getSequenceDictionary()); int comp = c.compare(recs.get(0), recs.get(1)); // assert sorting same order. Assert.assertEquals(exp>0, comp>0); }
Example #25
Source File: SequenceDictionaryIntersectionTest.java From Drop-seq with MIT License | 6 votes |
@Test public void testNoMatch() { final File leftDictFile = new File(TESTDATA_DIR, CHR_BASENAME + "sam"); final SamReader leftDict = SamReaderFactory.makeDefault().open(leftDictFile); try { final SAMSequenceDictionary nonMatchingDict = new SAMSequenceDictionary( leftDict.getFileHeader().getSequenceDictionary().getSequences().stream(). map((s) -> new SAMSequenceRecord("xyz_" + s.getSequenceName(), s.getSequenceLength())). collect(Collectors.toList())); SequenceDictionaryIntersection sdi = new SequenceDictionaryIntersection( leftDict, leftDictFile.getName(), nonMatchingDict, "with prefixes"); Assert.assertEquals(sdi.getIntersection().size(), 0); System.out.println("Terse with descriptions: " + sdi.message(false)); System.out.println("Verbose with descriptions: " + sdi.message(true)); sdi = new SequenceDictionaryIntersection(leftDict, nonMatchingDict); Assert.assertEquals(sdi.getIntersection().size(), 0); System.out.println("Terse without descriptions: " + sdi.message(false) + "\n"); System.out.println("Verbose without descriptions: " + sdi.message(true) + "\n"); } finally { CloserUtil.close(leftDict); } }
Example #26
Source File: UnmarkDuplicatesIntegrationTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
private static int getDuplicateCountForBam(final File bam, final File referenceFile) throws IOException { int duplicateCount = 0; final SamReaderFactory factory = SamReaderFactory.makeDefault(); if(referenceFile != null) { factory.referenceSequence(referenceFile); } try ( final SamReader reader = factory.open(bam) ) { for ( SAMRecord read : reader ) { if ( read.getDuplicateReadFlag() ) { ++duplicateCount; } } } return duplicateCount; }
Example #27
Source File: SAMRecordReader.java From Hadoop-BAM with MIT License | 5 votes |
private SamReader createSamReader(InputStream in, ValidationStringency stringency) { SamReaderFactory readerFactory = SamReaderFactory.makeDefault() .setOption(SamReaderFactory.Option.EAGERLY_DECODE, false) .setUseAsyncIo(false); if (stringency != null) { readerFactory.validationStringency(stringency); } return readerFactory.open(SamInputResource.of(in)); }
Example #28
Source File: GeneFunctionIteratorWrapperTest.java From Drop-seq with MIT License | 5 votes |
private SAMRecord getRecord (final String recName) { SamReader inputSam = SamReaderFactory.makeDefault().open(testBAMFile); for (SAMRecord r: inputSam) if (r.getReadName().equals(recName)) return (r); Assert.fail("Should never happen"); throw new IllegalArgumentException("Never found " + recName); }
Example #29
Source File: AggregatedTagOrderIteratorTest.java From Drop-seq with MIT License | 5 votes |
private Iterator<List<SAMRecord>> filterSortAndGroupByTags(final File bamFile, final String...tags) { final SamReader reader = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.EAGERLY_DECODE).open(bamFile); final Iterator<SAMRecord> filteringIterator = new MissingTagFilteringIterator(reader.iterator(), tags); final List<Comparator<SAMRecord>> comparators = new ArrayList<>(tags.length); for (final String tag: tags) { comparators.add(new StringTagComparator(tag)); } final MultiComparator<SAMRecord> comparator = new MultiComparator<>(comparators); final CloseableIterator<SAMRecord> sortingIterator = SamRecordSortingIteratorFactory.create(reader.getFileHeader(), filteringIterator, comparator, null); return new GroupingIterator<>(sortingIterator, comparator); }
Example #30
Source File: MergeBamAlignmentTest.java From picard with MIT License | 5 votes |
/** * Various scenarios for EarliestFragmentStrategy. Confirms that one of the expected ones is selected. * Note that there may be an arbitrary selection due to a tie. */ @Test(dataProvider = "testEarliestFragmentStrategyDataProvider") public void testEarliestFragmentStrategy(final String testName, final MultipleAlignmentSpec[] specs) throws IOException { final File output = File.createTempFile(testName, ".sam"); output.deleteOnExit(); final File[] sams = createSamFilesToBeMerged(specs); doMergeAlignment(sams[0], Collections.singletonList(sams[1]), null, null, null, null, false, true, false, 1, "0", "1.0", "align!", "myAligner", true, fasta, output, SamPairUtil.PairOrientation.FR, MergeBamAlignment.PrimaryAlignmentStrategy.EarliestFragment, ONE_OF_THE_BEST_TAG, null, false, null); final SamReader mergedReader = SamReaderFactory.makeDefault().open(output); boolean seenPrimary = false; for (final SAMRecord rec : mergedReader) { if (!rec.getNotPrimaryAlignmentFlag()) { seenPrimary = true; final Integer oneOfTheBest = rec.getIntegerAttribute(ONE_OF_THE_BEST_TAG); Assert.assertEquals(oneOfTheBest, new Integer(1), "Read not marked as one of the best is primary: " + rec); } } CloserUtil.close(mergedReader); Assert.assertTrue(seenPrimary, "Never saw primary alignment"); }