htsjdk.samtools.SAMFileHeader Java Examples
The following examples show how to use
htsjdk.samtools.SAMFileHeader.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: IntervalUtilsUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Test(expectedExceptions={UserException.CouldNotReadInputFile.class, UserException.MalformedGenomeLoc.class, UserException.MalformedFile.class}, dataProvider="invalidIntervalTestData") public void testIntervalFileToListInvalidPicardIntervalHandling(GenomeLocParser genomeLocParser, String contig, int intervalStart, int intervalEnd ) throws Exception { final SAMFileHeader picardFileHeader = new SAMFileHeader(); picardFileHeader.addSequence(genomeLocParser.getContigInfo("1")); picardFileHeader.addSequence(new SAMSequenceRecord("2", 100000)); final IntervalList picardIntervals = new IntervalList(picardFileHeader); picardIntervals.add(new Interval(contig, intervalStart, intervalEnd, true, "dummyname")); final File picardIntervalFile = createTempFile("testIntervalFileToListInvalidPicardIntervalHandling", ".interval_list"); picardIntervals.write(picardIntervalFile); // loadIntervals() will validate all intervals against the sequence dictionary in our genomeLocParser, // and should throw for all intervals in our invalidIntervalTestData set IntervalUtils.parseIntervalArguments(genomeLocParser, picardIntervalFile.getAbsolutePath()); }
Example #2
Source File: SAMFileDestination.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
/** * Create a new file destination. * * @param outPath path where output is written (doesn't have to be local) * @param createBamOutIndex true to create an index file for the bamout * @param createBamOutMD5 true to create an md5 file for the bamout * @param sourceHeader SAMFileHeader used to seed the output SAMFileHeader for this destination, must not be null * @param haplotypeReadGroupID read group ID used when writing haplotypes as reads */ public SAMFileDestination( final Path outPath, final boolean createBamOutIndex, final boolean createBamOutMD5, final SAMFileHeader sourceHeader, final String haplotypeReadGroupID) { super(sourceHeader, haplotypeReadGroupID); samWriter = new SAMFileGATKReadWriter(ReadUtils.createCommonSAMWriter( outPath, null, getBAMOutputHeader(), // use the header derived from the source header by HaplotypeBAMDestination false, createBamOutIndex, createBamOutMD5 )); }
Example #3
Source File: SATagBuilderUnitTests.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Test public void testAddTag() { final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader(); GATKRead read = ArtificialReadUtils.createArtificialRead("40M"); read.setAttribute("SA","2,500,+,3S2=1X2=2S,60,1;"); SATagBuilder builder = new SATagBuilder(read); GATKRead read1 = ArtificialReadUtils.createArtificialRead(header, "read1", 1, 10000, new byte[] {}, new byte[] {}, "4M"); read1.setIsSupplementaryAlignment(true); GATKRead read2 = ArtificialReadUtils.createArtificialRead(header, "read2", 2, 10001, new byte[] {}, new byte[] {}, "4S"); read2.setIsSupplementaryAlignment(true); SATagBuilder read2builder = new SATagBuilder(read2); GATKRead read3 = ArtificialReadUtils.createArtificialRead(header, "read3", 3, 10003, new byte[] {}, new byte[] {}, "4D"); read3.setIsSupplementaryAlignment(false); builder.addTag(read1).addTag(read3).addTag(read2builder).setSATag(); Assert.assertEquals(read.getAttributeAsString("SA"),"4,10003,+,4D,0,*;2,500,+,3S2=1X2=2S,60,1;2,10000,+,4M,0,*;3,10001,+,4S,0,*;"); }
Example #4
Source File: Merge.java From cramtools with Apache License 2.0 | 6 votes |
private static SAMFileHeader mergeHeaders(List<RecordSource> sources) { SAMFileHeader header = new SAMFileHeader(); for (RecordSource source : sources) { SAMFileHeader h = source.reader.getFileHeader(); for (SAMSequenceRecord seq : h.getSequenceDictionary().getSequences()) { if (header.getSequenceDictionary().getSequence(seq.getSequenceName()) == null) header.addSequence(seq); } for (SAMProgramRecord pro : h.getProgramRecords()) { if (header.getProgramRecord(pro.getProgramGroupId()) == null) header.addProgramRecord(pro); } for (String comment : h.getComments()) header.addComment(comment); for (SAMReadGroupRecord rg : h.getReadGroups()) { if (header.getReadGroup(rg.getReadGroupId()) == null) header.addReadGroup(rg); } } return header; }
Example #5
Source File: PathSeqBwaSpark.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
/** * Writes RDD of reads to path. Note writeReads() is not used because there are separate paired/unpaired outputs. * Header sequence dictionary is reduced to only those that were aligned to. */ private void writeBam(final JavaRDD<GATKRead> reads, final String inputBamPath, final boolean isPaired, final JavaSparkContext ctx, SAMFileHeader header) { //Only retain header sequences that were aligned to. //This invokes an action and therefore the reads must be cached. reads.persist(StorageLevel.MEMORY_AND_DISK_SER()); header = PSBwaUtils.removeUnmappedHeaderSequences(header, reads, logger); final String outputPath = isPaired ? outputPaired : outputUnpaired; try { ReadsSparkSink.writeReads(ctx, outputPath, null, reads, header, shardedOutput ? ReadsWriteFormat.SHARDED : ReadsWriteFormat.SINGLE, PSUtils.pathseqGetRecommendedNumReducers(inputBamPath, numReducers, getTargetPartitionSize()), shardedPartsDir, true, splittingIndexGranularity); } catch (final IOException e) { throw new UserException.CouldNotCreateOutputFile(outputPath, "Writing failed", e); } }
Example #6
Source File: FindBreakpointEvidenceSpark.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
static SVIntervalTree<SVInterval> findGenomewideHighCoverageIntervalsToIgnore(final FindBreakpointEvidenceSparkArgumentCollection params, final ReadMetadata readMetadata, final JavaSparkContext ctx, final SAMFileHeader header, final JavaRDD<GATKRead> unfilteredReads, final SVReadFilter filter, final Logger logger, final Broadcast<ReadMetadata> broadcastMetadata) { final int capacity = header.getSequenceDictionary().getSequences().stream() .mapToInt(seqRec -> (seqRec.getSequenceLength() + DEPTH_WINDOW_SIZE - 1)/DEPTH_WINDOW_SIZE).sum(); final List<SVInterval> depthIntervals = new ArrayList<>(capacity); for (final SAMSequenceRecord sequenceRecord : header.getSequenceDictionary().getSequences()) { final int contigID = readMetadata.getContigID(sequenceRecord.getSequenceName()); final int contigLength = sequenceRecord.getSequenceLength(); for (int i = 1; i < contigLength; i = i + DEPTH_WINDOW_SIZE) { depthIntervals.add(new SVInterval(contigID, i, Math.min(contigLength, i + DEPTH_WINDOW_SIZE))); } } final List<SVInterval> highCoverageSubintervals = findHighCoverageSubintervalsAndLog( params, ctx, broadcastMetadata, depthIntervals, unfilteredReads, filter, logger); final SVIntervalTree<SVInterval> highCoverageSubintervalTree = new SVIntervalTree<>(); highCoverageSubintervals.forEach(i -> highCoverageSubintervalTree.put(i, i)); return highCoverageSubintervalTree; }
Example #7
Source File: FilterBam.java From Drop-seq with MIT License | 6 votes |
private SAMFileHeader editSequenceDictionary(final SAMFileHeader fileHeader) { if (DROP_REJECTED_REF || !STRIP_REF_PREFIX.isEmpty()) { // Make mutable copy of sequence list final ArrayList<SAMSequenceRecord> sequences = new ArrayList<>(fileHeader.getSequenceDictionary().getSequences()); final ListIterator<SAMSequenceRecord> it = sequences.listIterator(); while (it.hasNext()) { final SAMSequenceRecord sequence = it.next(); if (DROP_REJECTED_REF && filterReference(sequence.getSequenceName())) it.remove(); else { final String editedSequenceName = stripReferencePrefix(sequence.getSequenceName()); if (editedSequenceName != null) it.set(cloneWithNewName(sequence, editedSequenceName)); } } fileHeader.getSequenceDictionary().setSequences(sequences); } return fileHeader; }
Example #8
Source File: SamBamUtils.java From chipster with MIT License | 6 votes |
public static void sortSamBam(File samBamFile, File sortedBamFile) { SAMFileReader.setDefaultValidationStringency(ValidationStringency.SILENT); SAMFileReader reader = new SAMFileReader(IOUtil.openFileForReading(samBamFile)); SAMFileWriter writer = null; try { reader.getFileHeader().setSortOrder(SAMFileHeader.SortOrder.coordinate); writer = new SAMFileWriterFactory().makeBAMWriter(reader.getFileHeader(), false, sortedBamFile); Iterator<SAMRecord> iterator = reader.iterator(); while (iterator.hasNext()) { writer.addAlignment(iterator.next()); } } finally { closeIfPossible(reader); closeIfPossible(writer); } }
Example #9
Source File: PreprocessingTools.java From halvade with GNU General Public License v3.0 | 6 votes |
public int callElPrep(String input, String output, String rg, int threads, SAMRecordIterator SAMit, SAMFileHeader header, String dictFile, boolean updateRG, boolean keepDups, String RGID) throws InterruptedException, QualityException { SAMRecord sam; SAMFileWriterFactory factory = new SAMFileWriterFactory(); SAMFileWriter Swriter = factory.makeSAMWriter(header, true, new File(input)); int reads = 0; while(SAMit.hasNext()) { sam = SAMit.next(); if(updateRG) sam.setAttribute(SAMTag.RG.name(), RGID); Swriter.addAlignment(sam); reads++; } Swriter.close(); String customArgs = HalvadeConf.getCustomArgs(context.getConfiguration(), "elprep", ""); String[] command = CommandGenerator.elPrep(bin, input, output, threads, true, rg, null, !keepDups, customArgs); long estimatedTime = runProcessAndWait("elPrep", command); if(context != null) context.getCounter(HalvadeCounters.TIME_ELPREP).increment(estimatedTime); return reads; }
Example #10
Source File: Feature.java From abra2 with MIT License | 6 votes |
public static int findFirstOverlappingRegion(SAMFileHeader samHeader, SAMRecordWrapper read, int readStart, int readEnd, List<Feature> regions, int start) { if (start < 0) { start = 0; } for (int idx=start; idx<regions.size(); idx++) { Feature region = regions.get(idx); if ( (read.getSamRecord().getReferenceIndex() < samHeader.getSequenceDictionary().getSequenceIndex(region.getSeqname())) || (read.getSamRecord().getReferenceName().equals(region.getSeqname()) && readStart < region.getStart()) ) { // This read is in between regions // TODO: adjust start region here return -1; } else if (region.overlaps(read.getSamRecord().getReferenceName(), readStart, readEnd)) { return idx; } } // This read is beyond all regions return -1; }
Example #11
Source File: ReadsSparkSource.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
/** * Loads ADAM reads stored as Parquet. * @param inputPathSpecifier path to the Parquet data * @return RDD of (ADAM-backed) GATKReads from the file. */ public JavaRDD<GATKRead> getADAMReads(final GATKPath inputPathSpecifier, final TraversalParameters traversalParameters, final SAMFileHeader header) throws IOException { Job job = Job.getInstance(ctx.hadoopConfiguration()); AvroParquetInputFormat.setAvroReadSchema(job, AlignmentRecord.getClassSchema()); Broadcast<SAMFileHeader> bHeader; if (header == null) { bHeader= ctx.broadcast(null); } else { bHeader = ctx.broadcast(header); } @SuppressWarnings("unchecked") JavaRDD<AlignmentRecord> recordsRdd = ctx.newAPIHadoopFile( inputPathSpecifier.getRawInputString(), AvroParquetInputFormat.class, Void.class, AlignmentRecord.class, job.getConfiguration()) .values(); JavaRDD<GATKRead> readsRdd = recordsRdd.map(record -> new BDGAlignmentRecordToGATKReadAdapter(record, bHeader.getValue())); JavaRDD<GATKRead> filteredRdd = readsRdd.filter(record -> samRecordOverlaps(record.convertToSAMRecord(header), traversalParameters)); return fixPartitionsIfQueryGrouped(ctx, header, filteredRdd); }
Example #12
Source File: ReadPileupUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Test public void testSortedIterator() throws Exception { final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader(); final GATKRead read1 = ArtificialReadUtils.createArtificialRead(header, "10M"); final GATKRead read2 = ArtificialReadUtils.createArtificialRead(header, "10M"); read1.setPosition(new SimpleInterval("22", 200, 210)); read2.setPosition(new SimpleInterval("22", 208, 218)); read1.setMatePosition(read2); read2.setMatePosition(read1); final Locatable loc = new SimpleInterval("22", 208, 208); final Map<String, ReadPileup> stratified = new LinkedHashMap<>(); stratified.put("sample1", new ReadPileup(loc, Arrays.asList(read2), 0)); stratified.put("sample2", new ReadPileup(loc, Arrays.asList(read1), 9)); final ReadPileup combined = new ReadPileup(loc, stratified); final Iterator<PileupElement> sortedIterator = combined.sortedIterator(); Assert.assertSame(sortedIterator.next().getRead(), read1); Assert.assertSame(sortedIterator.next().getRead(), read2); }
Example #13
Source File: GenomeLoc.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
/** * Calculates the distance between two genomeLocs across contigs (if necessary). * * Returns minDistance(other) if in same contig. * Works with intervals! * Uses the SAMFileHeader to extract the size of the contigs and follows the order in the dictionary. * * @param other the genome loc to compare to * @param samFileHeader the contig information * @return the sum of all the bases in between the genomeLocs, including entire contigs */ public long distanceAcrossContigs(final GenomeLoc other, final SAMFileHeader samFileHeader) { if (onSameContig(other)) { return minDistance(other); } // add the distance from the first genomeLoc to the end of it's contig and the distance from the // second genomeLoc to the beginning of it's contig. long distance = 0; if (contigIndex < other.contigIndex) { distance += samFileHeader.getSequence(contigIndex).getSequenceLength() - stop; distance += other.start; } else { distance += samFileHeader.getSequence(other.contigIndex).getSequenceLength() - other.stop; distance += start; } // add any contig (in its entirety) in between the two genomeLocs for (int i=Math.min(this.contigIndex, other.contigIndex) + 1; i < Math.max(this.contigIndex, other.contigIndex); i++) { distance += samFileHeader.getSequence(i).getSequenceLength(); } return distance; }
Example #14
Source File: GatherBamFiles.java From picard with MIT License | 6 votes |
/** * Simple implementation of a gather operations that uses SAMFileReaders and Writers in order to concatenate * multiple BAM files. */ private static void gatherNormally(final List<File> inputs, final File output, final boolean createIndex, final boolean createMd5, final File referenceFasta) { final SAMFileHeader header; { header = SamReaderFactory.makeDefault().referenceSequence(referenceFasta).getFileHeader(inputs.get(0)); } final SAMFileWriter out = new SAMFileWriterFactory().setCreateIndex(createIndex).setCreateMd5File(createMd5).makeSAMOrBAMWriter(header, true, output); for (final File f : inputs) { log.info("Gathering " + f.getAbsolutePath()); final SamReader in = SamReaderFactory.makeDefault().referenceSequence(referenceFasta).open(f); for (final SAMRecord rec : in) out.addAlignment(rec); CloserUtil.close(in); } out.close(); }
Example #15
Source File: SamUtilsTest.java From rtg-tools with BSD 2-Clause "Simplified" License | 6 votes |
public void testRunCommentUpdate() { final SAMFileHeader sf = new SAMFileHeader(); sf.addComment("READ-SDF-ID:" + Long.toHexString(123456)); final UUID uuid = UUID.randomUUID(); sf.addComment(SamUtils.RUN_ID_ATTRIBUTE + uuid); sf.addComment("TEMPLATE-SDF-ID:$$$$$$$"); sf.addComment("BLAH:KLJERLWJ"); SamUtils.updateRunId(sf); for (final String s : sf.getComments()) { if (s.startsWith(SamUtils.RUN_ID_ATTRIBUTE)) { assertFalse(s.contains(uuid.toString())); } } }
Example #16
Source File: FastqToSam.java From picard with MIT License | 6 votes |
/** Creates a simple header with the values provided on the command line. */ public SAMFileHeader createSamFileHeader() { final SAMReadGroupRecord rgroup = new SAMReadGroupRecord(this.READ_GROUP_NAME); rgroup.setSample(this.SAMPLE_NAME); if (this.LIBRARY_NAME != null) rgroup.setLibrary(this.LIBRARY_NAME); if (this.PLATFORM != null) rgroup.setPlatform(this.PLATFORM); if (this.PLATFORM_UNIT != null) rgroup.setPlatformUnit(this.PLATFORM_UNIT); if (this.SEQUENCING_CENTER != null) rgroup.setSequencingCenter(SEQUENCING_CENTER); if (this.PREDICTED_INSERT_SIZE != null) rgroup.setPredictedMedianInsertSize(PREDICTED_INSERT_SIZE); if (this.DESCRIPTION != null) rgroup.setDescription(this.DESCRIPTION); if (this.RUN_DATE != null) rgroup.setRunDate(this.RUN_DATE); if (this.PLATFORM_MODEL != null) rgroup.setPlatformModel(this.PLATFORM_MODEL); if (this.PROGRAM_GROUP != null) rgroup.setProgramGroup(this.PROGRAM_GROUP); final SAMFileHeader header = new SAMFileHeader(); header.addReadGroup(rgroup); for (final String comment : COMMENT) { header.addComment(comment); } header.setSortOrder(this.SORT_ORDER); return header ; }
Example #17
Source File: Feature.java From abra2 with MIT License | 6 votes |
public static List<Integer> findAllOverlappingRegions(SAMFileHeader samHeader, SAMRecordWrapper read, List<Feature> regions, int start) { List<Integer> overlappingRegions = new ArrayList<Integer>(); for (Span span : read.getSpanningRegions()) { int idx = findFirstOverlappingRegion(samHeader, read, span.start, span.end, regions, start); if (idx > -1) { overlappingRegions.add(idx); boolean isOverlap = true; idx += 1; while (isOverlap && idx < regions.size()) { Feature region = regions.get(idx); if (region.overlaps(read.getSamRecord().getReferenceName(), span.start, span.end)) { overlappingRegions.add(idx); } else { isOverlap = false; } idx += 1; } } } return overlappingRegions; }
Example #18
Source File: VarDictLauncher.java From VarDictJava with MIT License | 5 votes |
/** * Read map of chromosome lengths * @param bam BAM file name * @return Map of chromosome lengths. Key - chromosome name, value - length * @throws IOException if BAM/SAM file can't be opened */ public static Map<String, Integer> readChr(String bam) throws IOException { try (SamReader reader = SamReaderFactory.makeDefault().open(new File(bam))) { SAMFileHeader header = reader.getFileHeader(); Map<String, Integer> chrs = new HashMap<>(); for (SAMSequenceRecord record : header.getSequenceDictionary().getSequences()) { record.getSequenceLength(); String sn = record.getSequenceName(); int ln = record.getSequenceLength(); chrs.put(sn, ln); } return chrs; } }
Example #19
Source File: GATKReadFilterPluginDescriptorTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
private GATKRead simpleGoodRead( final SAMFileHeader header ) { final String cigarString = "101M"; final Cigar cigar = TextCigarCodec.decode(cigarString); GATKRead read = createRead(header, cigar, 1, 0, 10); read.setMappingQuality(50); return read; }
Example #20
Source File: ReadGroupCovariateUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
private static void runTest(final SAMReadGroupRecord rg, final String expected, final ReadGroupCovariate covariate) { final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeaderWithReadGroup(rg); GATKRead read = ArtificialReadUtils.createRandomRead(header, 10); read.setReadGroup(rg.getReadGroupId()); ReadCovariates readCovariates = new ReadCovariates(read.getLength(), 1, new CovariateKeyCache()); covariate.recordValues(read, header, readCovariates, true); verifyCovariateArray(readCovariates.getMismatchesKeySet(), expected, covariate); }
Example #21
Source File: ExampleCollectSingleMetricsSpark.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
/** * Execute the actual metrics collection. * @param filteredReads Input reads, already filtered using the filter returned by {@link #getDefaultReadFilters(SAMFileHeader)} * @param samHeader SAMFileHeader for the input */ @Override protected void collectMetrics( final JavaRDD<GATKRead> filteredReads, final SAMFileHeader samHeader) { exampleSingleCollector.collectMetrics(filteredReads, samHeader); }
Example #22
Source File: RevertSam.java From picard with MIT License | 5 votes |
private SAMFileHeader createOutHeader( final SAMFileHeader inHeader, final SAMFileHeader.SortOrder sortOrder, final boolean removeAlignmentInformation) { final SAMFileHeader outHeader = new SAMFileHeader(); outHeader.setSortOrder(sortOrder); if (!removeAlignmentInformation) { outHeader.setSequenceDictionary(inHeader.getSequenceDictionary()); outHeader.setProgramRecords(inHeader.getProgramRecords()); } return outHeader; }
Example #23
Source File: CollectGcBiasMetricsTest.java From picard with MIT License | 5 votes |
public void setupTest2(final int ID, final String readGroupId, final SAMReadGroupRecord readGroupRecord, final String sample, final String library, final SAMFileHeader header, final SAMRecordSetBuilder setBuilder) throws IOException { final String separator = ":"; final int contig1 = 0; final int contig2 = 1; final int contig3 = 2; readGroupRecord.setSample(sample); readGroupRecord.setPlatform(platform); readGroupRecord.setLibrary(library); readGroupRecord.setPlatformUnit(readGroupId); setBuilder.setReadGroup(readGroupRecord); setBuilder.setUseNmFlag(true); setBuilder.setHeader(header); final int max = 800; final int min = 1; final Random rg = new Random(5); //add records that align to all 3 chr in reference file for (int i = 0; i < NUM_READS; i++) { final int start = rg.nextInt(max) + min; final String newReadName = READ_NAME + separator + ID + separator + i; if (i<=NUM_READS/3) { setBuilder.addPair(newReadName, contig1, start + ID, start + ID + LENGTH); } else if (i< (NUM_READS - (NUM_READS/3))) { setBuilder.addPair(newReadName, contig2, start + ID, start + ID + LENGTH); } else { setBuilder.addPair(newReadName, contig3, start + ID, start + ID + LENGTH); } } }
Example #24
Source File: HaplotypeCallerSpark.java From gatk-protected with BSD 3-Clause "New" or "Revised" License | 5 votes |
/** * @return an {@link OverlapDetector} loaded with {@link ShardBoundary} * based on the -L intervals */ private static OverlapDetector<ShardBoundary> getShardBoundaryOverlapDetector(final SAMFileHeader header, final List<SimpleInterval> intervals, final int readShardSize, final int readShardPadding) { final OverlapDetector<ShardBoundary> shardBoundaryOverlapDetector = new OverlapDetector<>(0, 0); intervals.stream() .flatMap(interval -> Shard.divideIntervalIntoShards(interval, readShardSize, readShardPadding, header.getSequenceDictionary()).stream()) .forEach(boundary -> shardBoundaryOverlapDetector.addLhs(boundary, boundary.getPaddedInterval())); return shardBoundaryOverlapDetector; }
Example #25
Source File: SAMRecordWriter.java From Hadoop-BAM with MIT License | 5 votes |
private void init( OutputStream output, SAMFileHeader header, boolean writeHeader) throws IOException { this.header = header; writer = new SAMTextWriter(output); writer.setSortOrder(header.getSortOrder(), false); if (writeHeader) writer.setHeader(header); }
Example #26
Source File: Fragment.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
public Fragment(final GATKRead first, final SAMFileHeader header, int partitionIndex, MarkDuplicatesScoringStrategy scoringStrategy, Map<String, Byte> headerLibraryMap) { super(partitionIndex, first.getName()); this.score = scoringStrategy.score(first); this.R1R = first.isReverseStrand(); this.key = ReadsKey.getKeyForFragment(ReadUtils.getStrandedUnclippedStart(first), isRead1ReverseStrand(), (short)ReadUtils.getReferenceIndex(first, header), headerLibraryMap.get(MarkDuplicatesSparkUtils.getLibraryForRead(first, header, LibraryIdGenerator.UNKNOWN_LIBRARY))); }
Example #27
Source File: SATagBuilderUnitTests.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Test public static void testSetReadsAsSupplementalNo() { final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader(); GATKRead primarySup = ArtificialReadUtils.createArtificialRead(header, "read1", 1, 10000, new byte[] {}, new byte[] {}, "4M"); primarySup.setMappingQuality(100); primarySup.setAttribute("NM",20); primarySup.setIsReverseStrand(true); GATKRead secondarySup = ArtificialReadUtils.createArtificialRead(header, "read1", 2, 10001, new byte[] {}, new byte[] {}, "4S"); secondarySup.setMappingQuality(200); List<GATKRead> sups = new ArrayList<>(); sups.add(secondarySup); SATagBuilder.setReadsAsSupplemental(primarySup,sups); Assert.assertEquals(primarySup.getAttributeAsString("SA"), "3,10001,+,4S,200,*;"); Assert.assertEquals(primarySup.isSupplementaryAlignment(), false); Assert.assertEquals(secondarySup.getAttributeAsString("SA"), "2,10000,-,4M,100,20;"); Assert.assertEquals(secondarySup.isSupplementaryAlignment(), true); GATKRead tertiarySup = ArtificialReadUtils.createArtificialRead(header, "read1", 3, 10003, new byte[] {}, new byte[] {}, "4D"); tertiarySup.setMappingQuality(200); primarySup.clearAttribute("SA"); secondarySup.clearAttribute("SA"); sups.add(tertiarySup); SATagBuilder.setReadsAsSupplemental(primarySup,sups); Assert.assertEquals(sups.size(), 2); Assert.assertEquals(sups.get(0).getAttributeAsString("SA"), "2,10000,-,4M,100,20;4,10003,+,4D,200,*;"); Assert.assertTrue(sups.get(1).getAttributeAsString("SA").startsWith("2,10000,-,4M,100,20;")); Assert.assertTrue(primarySup.getAttributeAsString("SA").contains("4,10003,+,4D,200,*;")); Assert.assertTrue(primarySup.getAttributeAsString("SA").contains("3,10001,+,4S,200,*;")); GATKRead unmappedSup = ArtificialReadUtils.createArtificialUnmappedRead(header,new byte[] {}, new byte[] {}); primarySup.clearAttribute("SA"); sups.clear(); sups.add(unmappedSup); SATagBuilder.setReadsAsSupplemental(primarySup,sups); Assert.assertEquals(primarySup.getAttributeAsString("SA"), "*,0,+,*,0,*;"); Assert.assertEquals(sups.get(0).getAttributeAsString("SA"), "2,10000,-,4M,100,20;"); }
Example #28
Source File: GATKProtectedVariantContextUtilsUnitTest.java From gatk-protected with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Test public void testGetPileup() { final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader(1, 1, 1000); final Locatable loc = new SimpleInterval("chr1", 10, 10); final int readLength = 3; //this read doesn't overlap {@code loc} final GATKRead read1 = ArtificialReadUtils.createArtificialRead(header, "read1", 0, 1, readLength); read1.setBases(Utils.dupBytes((byte) 'A', readLength)); read1.setBaseQualities(Utils.dupBytes((byte) 30, readLength)); read1.setCigar("3M"); //this read overlaps {@code loc} with a Q30 'A' final GATKRead read2 = ArtificialReadUtils.createArtificialRead(header, "read2", 0, 10, readLength); read2.setBases(Utils.dupBytes((byte) 'A', readLength)); read2.setBaseQualities(Utils.dupBytes((byte) 30, readLength)); read2.setCigar("3M"); //this read overlaps {@code loc} with a Q20 'C' (due to the deletion) final GATKRead read3 = ArtificialReadUtils.createArtificialRead(header, "read3", 0, 7, readLength); read3.setBases(Utils.dupBytes((byte) 'C', readLength)); read3.setBaseQualities(Utils.dupBytes((byte) 20, readLength)); read3.setCigar("1M1D2M"); //this read doesn't overlap {@code loc} due to the deletion final GATKRead read4 = ArtificialReadUtils.createArtificialRead(header, "read4", 0, 7, readLength); read4.setBases(Utils.dupBytes((byte) 'C', readLength)); read4.setBaseQualities(Utils.dupBytes((byte) 20, readLength)); read4.setCigar("1M5D2M"); final ReadPileup pileup = GATKProtectedVariantContextUtils.getPileup(loc, Arrays.asList(read1, read2, read3, read4)); // the pileup should contain a Q30 'A' and a Q20 'C' final int[] counts = pileup.getBaseCounts(); Assert.assertEquals(counts, new int[]{1, 1, 0, 0}); }
Example #29
Source File: MarkDuplicatesSpark.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
private boolean treatAsReadGroupOrdered(SAMFileHeader header, boolean treatUnsortedAsReadGrouped) { final SAMFileHeader.SortOrder sortOrder = header.getSortOrder(); if( ReadUtils.isReadNameGroupedBam(header) ){ return true; } else if ( treatUnsortedAsReadGrouped && (sortOrder.equals(SAMFileHeader.SortOrder.unknown) || sortOrder.equals(SAMFileHeader.SortOrder.unsorted))) { logger.warn("Input bam was marked as " + sortOrder.toString() + " but " + TREAT_UNSORTED_AS_ORDERED + " is specified so it's being treated as read name grouped"); return true; } return false; }
Example #30
Source File: AssemblyRegionIterator.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
/** * Constructs an AssemblyRegionIterator over a provided read shard * @param readShard MultiIntervalShard containing the reads that will go into the assembly regions. * Must have a MAPPED filter set on it. * @param readHeader header for the reads * @param reference source of reference bases (may be null) * @param features source of arbitrary features (may be null) * @param evaluator evaluator used to determine whether a locus is active */ public AssemblyRegionIterator(final MultiIntervalShard<GATKRead> readShard, final SAMFileHeader readHeader, final ReferenceDataSource reference, final FeatureManager features, final AssemblyRegionEvaluator evaluator, final AssemblyRegionArgumentCollection assemblyRegionArgs) { Utils.nonNull(readShard); Utils.nonNull(readHeader); Utils.nonNull(evaluator); assemblyRegionArgs.validate(); this.readShard = readShard; this.readHeader = readHeader; this.reference = reference; this.features = features; this.evaluator = evaluator; this.assemblyRegionArgs = assemblyRegionArgs; this.readyRegion = null; this.previousRegionReads = null; this.pendingRegions = new ArrayDeque<>(); this.readCachingIterator = new ReadCachingIterator(readShard.iterator()); this.readCache = new ArrayDeque<>(); this.activityProfile = new BandPassActivityProfile(assemblyRegionArgs.maxProbPropagationDistance, assemblyRegionArgs.activeProbThreshold, BandPassActivityProfile.MAX_FILTER_SIZE, BandPassActivityProfile.DEFAULT_SIGMA, readHeader); // We wrap our LocusIteratorByState inside an IntervalAlignmentContextIterator so that we get empty loci // for uncovered locations. This is critical for reproducing GATK 3.x behavior! this.libs = new LocusIteratorByState(readCachingIterator, DownsamplingMethod.NONE, false, ReadUtils.getSamplesFromHeader(readHeader), readHeader, true); final IntervalLocusIterator intervalLocusIterator = new IntervalLocusIterator(readShard.getIntervals().iterator()); this.locusIterator = new IntervalAlignmentContextIterator(libs, intervalLocusIterator, readHeader.getSequenceDictionary()); readyRegion = loadNextAssemblyRegion(); }