htsjdk.samtools.SAMFileHeader Java Exaples

Source File: IntervalUtilsUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License

6 votes

@Test(expectedExceptions={UserException.CouldNotReadInputFile.class, UserException.MalformedGenomeLoc.class, UserException.MalformedFile.class}, dataProvider="invalidIntervalTestData")
public void testIntervalFileToListInvalidPicardIntervalHandling(GenomeLocParser genomeLocParser,
                                   String contig, int intervalStart, int intervalEnd ) throws Exception {

    final SAMFileHeader picardFileHeader = new SAMFileHeader();
    picardFileHeader.addSequence(genomeLocParser.getContigInfo("1"));
    picardFileHeader.addSequence(new SAMSequenceRecord("2", 100000));

    final IntervalList picardIntervals = new IntervalList(picardFileHeader);
    picardIntervals.add(new Interval(contig, intervalStart, intervalEnd, true, "dummyname"));

    final File picardIntervalFile = createTempFile("testIntervalFileToListInvalidPicardIntervalHandling", ".interval_list");
    picardIntervals.write(picardIntervalFile);

    // loadIntervals() will validate all intervals against the sequence dictionary in our genomeLocParser,
    // and should throw for all intervals in our invalidIntervalTestData set
    IntervalUtils.parseIntervalArguments(genomeLocParser, picardIntervalFile.getAbsolutePath());
}

Source File: SAMFileDestination.java From gatk with BSD 3-Clause "New" or "Revised" License

6 votes

/**
 * Create a new file destination.
 *
 * @param outPath path where output is written (doesn't have to be local)
 * @param createBamOutIndex true to create an index file for the bamout
 * @param createBamOutMD5 true to create an md5 file for the bamout
 * @param sourceHeader SAMFileHeader used to seed the output SAMFileHeader for this destination, must not be null
 * @param haplotypeReadGroupID  read group ID used when writing haplotypes as reads
 */
public SAMFileDestination(
        final Path outPath,
        final boolean createBamOutIndex,
        final boolean createBamOutMD5,
        final SAMFileHeader sourceHeader,
        final String haplotypeReadGroupID)
{
    super(sourceHeader, haplotypeReadGroupID);
    samWriter = new SAMFileGATKReadWriter(ReadUtils.createCommonSAMWriter(
            outPath,
            null,
            getBAMOutputHeader(), // use the header derived from the source header by HaplotypeBAMDestination
            false,
            createBamOutIndex,
            createBamOutMD5
    ));
}

Source File: SATagBuilderUnitTests.java From gatk with BSD 3-Clause "New" or "Revised" License

6 votes

@Test
public void testAddTag() {
    final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader();
    GATKRead read = ArtificialReadUtils.createArtificialRead("40M");
    read.setAttribute("SA","2,500,+,3S2=1X2=2S,60,1;");
    SATagBuilder builder = new SATagBuilder(read);

    GATKRead read1 = ArtificialReadUtils.createArtificialRead(header, "read1", 1, 10000, new byte[] {}, new byte[] {}, "4M");
    read1.setIsSupplementaryAlignment(true);
    GATKRead read2 = ArtificialReadUtils.createArtificialRead(header, "read2", 2, 10001, new byte[] {}, new byte[] {}, "4S");
    read2.setIsSupplementaryAlignment(true);
    SATagBuilder read2builder = new SATagBuilder(read2);
    GATKRead read3 = ArtificialReadUtils.createArtificialRead(header, "read3", 3, 10003, new byte[] {}, new byte[] {}, "4D");
    read3.setIsSupplementaryAlignment(false);

    builder.addTag(read1).addTag(read3).addTag(read2builder).setSATag();
    Assert.assertEquals(read.getAttributeAsString("SA"),"4,10003,+,4D,0,*;2,500,+,3S2=1X2=2S,60,1;2,10000,+,4M,0,*;3,10001,+,4S,0,*;");
}

Source File: Merge.java From cramtools with Apache License 2.0

6 votes

private static SAMFileHeader mergeHeaders(List<RecordSource> sources) {
	SAMFileHeader header = new SAMFileHeader();
	for (RecordSource source : sources) {
		SAMFileHeader h = source.reader.getFileHeader();

		for (SAMSequenceRecord seq : h.getSequenceDictionary().getSequences()) {
			if (header.getSequenceDictionary().getSequence(seq.getSequenceName()) == null)
				header.addSequence(seq);
		}

		for (SAMProgramRecord pro : h.getProgramRecords()) {
			if (header.getProgramRecord(pro.getProgramGroupId()) == null)
				header.addProgramRecord(pro);
		}

		for (String comment : h.getComments())
			header.addComment(comment);

		for (SAMReadGroupRecord rg : h.getReadGroups()) {
			if (header.getReadGroup(rg.getReadGroupId()) == null)
				header.addReadGroup(rg);
		}
	}

	return header;
}

Source File: PathSeqBwaSpark.java From gatk with BSD 3-Clause "New" or "Revised" License

6 votes

/**
 * Writes RDD of reads to path. Note writeReads() is not used because there are separate paired/unpaired outputs.
 * Header sequence dictionary is reduced to only those that were aligned to.
 */
private void writeBam(final JavaRDD<GATKRead> reads, final String inputBamPath, final boolean isPaired,
                      final JavaSparkContext ctx, SAMFileHeader header) {

    //Only retain header sequences that were aligned to.
    //This invokes an action and therefore the reads must be cached.
    reads.persist(StorageLevel.MEMORY_AND_DISK_SER());
    header = PSBwaUtils.removeUnmappedHeaderSequences(header, reads, logger);

    final String outputPath = isPaired ? outputPaired : outputUnpaired;
    try {
        ReadsSparkSink.writeReads(ctx, outputPath, null, reads, header,
                shardedOutput ? ReadsWriteFormat.SHARDED : ReadsWriteFormat.SINGLE,
                PSUtils.pathseqGetRecommendedNumReducers(inputBamPath, numReducers, getTargetPartitionSize()), shardedPartsDir, true, splittingIndexGranularity);
    } catch (final IOException e) {
        throw new UserException.CouldNotCreateOutputFile(outputPath, "Writing failed", e);
    }
}

Source File: FindBreakpointEvidenceSpark.java From gatk with BSD 3-Clause "New" or "Revised" License

6 votes

static SVIntervalTree<SVInterval> findGenomewideHighCoverageIntervalsToIgnore(final FindBreakpointEvidenceSparkArgumentCollection params,
                                                                              final ReadMetadata readMetadata,
                                                                              final JavaSparkContext ctx,
                                                                              final SAMFileHeader header,
                                                                              final JavaRDD<GATKRead> unfilteredReads,
                                                                              final SVReadFilter filter,
                                                                              final Logger logger,
                                                                              final Broadcast<ReadMetadata> broadcastMetadata) {
    final int capacity = header.getSequenceDictionary().getSequences().stream()
            .mapToInt(seqRec -> (seqRec.getSequenceLength() + DEPTH_WINDOW_SIZE - 1)/DEPTH_WINDOW_SIZE).sum();
    final List<SVInterval> depthIntervals = new ArrayList<>(capacity);
    for (final SAMSequenceRecord sequenceRecord : header.getSequenceDictionary().getSequences()) {
        final int contigID = readMetadata.getContigID(sequenceRecord.getSequenceName());
        final int contigLength = sequenceRecord.getSequenceLength();
        for (int i = 1; i < contigLength; i = i + DEPTH_WINDOW_SIZE) {
            depthIntervals.add(new SVInterval(contigID, i, Math.min(contigLength, i + DEPTH_WINDOW_SIZE)));
        }
    }

    final List<SVInterval> highCoverageSubintervals = findHighCoverageSubintervalsAndLog(
            params, ctx, broadcastMetadata, depthIntervals, unfilteredReads, filter, logger);
    final SVIntervalTree<SVInterval> highCoverageSubintervalTree = new SVIntervalTree<>();
    highCoverageSubintervals.forEach(i -> highCoverageSubintervalTree.put(i, i));

    return highCoverageSubintervalTree;
}

Source File: FilterBam.java From Drop-seq with MIT License

6 votes

private SAMFileHeader editSequenceDictionary(final SAMFileHeader fileHeader) {
 if (DROP_REJECTED_REF || !STRIP_REF_PREFIX.isEmpty()) {
     // Make mutable copy of sequence list
        final ArrayList<SAMSequenceRecord> sequences = new ArrayList<>(fileHeader.getSequenceDictionary().getSequences());
        final ListIterator<SAMSequenceRecord> it = sequences.listIterator();
        while (it.hasNext()) {
            final SAMSequenceRecord sequence = it.next();
            if (DROP_REJECTED_REF && filterReference(sequence.getSequenceName()))
	it.remove();
else {
                final String editedSequenceName = stripReferencePrefix(sequence.getSequenceName());
                if (editedSequenceName != null)
		it.set(cloneWithNewName(sequence, editedSequenceName));
            }
        }
        fileHeader.getSequenceDictionary().setSequences(sequences);
    }
 return fileHeader;
}

Source File: SamBamUtils.java From chipster with MIT License

6 votes

public static void sortSamBam(File samBamFile, File sortedBamFile) {
	
	SAMFileReader.setDefaultValidationStringency(ValidationStringency.SILENT);
	SAMFileReader reader = new SAMFileReader(IOUtil.openFileForReading(samBamFile));
	SAMFileWriter writer = null;
	try {
		
		reader.getFileHeader().setSortOrder(SAMFileHeader.SortOrder.coordinate);
		writer = new SAMFileWriterFactory().makeBAMWriter(reader.getFileHeader(), false, sortedBamFile);
		Iterator<SAMRecord> iterator = reader.iterator();
		while (iterator.hasNext()) {
			writer.addAlignment(iterator.next());
		}
		
	} finally {
		closeIfPossible(reader);
		closeIfPossible(writer);
	}
}

Source File: PreprocessingTools.java From halvade with GNU General Public License v3.0

6 votes

public int callElPrep(String input, String output, String rg, int threads, 
        SAMRecordIterator SAMit,
        SAMFileHeader header, String dictFile, boolean updateRG, boolean keepDups, String RGID) throws InterruptedException, QualityException {
    
    SAMRecord sam;
    SAMFileWriterFactory factory = new SAMFileWriterFactory();
    SAMFileWriter Swriter = factory.makeSAMWriter(header, true, new File(input));
    
    int reads = 0;
    while(SAMit.hasNext()) {
        sam = SAMit.next();
        if(updateRG)
            sam.setAttribute(SAMTag.RG.name(), RGID);
        Swriter.addAlignment(sam);
        reads++;
    }
    Swriter.close();
    
    String customArgs = HalvadeConf.getCustomArgs(context.getConfiguration(), "elprep", "");  
    String[] command = CommandGenerator.elPrep(bin, input, output, threads, true, rg, null, !keepDups, customArgs);
    long estimatedTime = runProcessAndWait("elPrep", command);
    if(context != null)
        context.getCounter(HalvadeCounters.TIME_ELPREP).increment(estimatedTime);
    
    return reads;
}

Source File: Feature.java From abra2 with MIT License

6 votes

public static int findFirstOverlappingRegion(SAMFileHeader samHeader, SAMRecordWrapper read, int readStart, int readEnd, List<Feature> regions, int start) {
	if (start < 0) {
		start = 0;
	}

	for (int idx=start; idx<regions.size(); idx++) {
		Feature region = regions.get(idx);
		if ( (read.getSamRecord().getReferenceIndex() < samHeader.getSequenceDictionary().getSequenceIndex(region.getSeqname())) ||
			 (read.getSamRecord().getReferenceName().equals(region.getSeqname()) && readStart < region.getStart()) ) {
			
			// This read is in between regions
			// TODO: adjust start region here
			return -1;
		} else if (region.overlaps(read.getSamRecord().getReferenceName(), readStart, readEnd)) {
			return idx;
		}
	}
	
	// This read is beyond all regions
	return -1;
}

Source File: ReadsSparkSource.java From gatk with BSD 3-Clause "New" or "Revised" License

6 votes

/**
 * Loads ADAM reads stored as Parquet.
 * @param inputPathSpecifier path to the Parquet data
 * @return RDD of (ADAM-backed) GATKReads from the file.
 */
public JavaRDD<GATKRead> getADAMReads(final GATKPath inputPathSpecifier, final TraversalParameters traversalParameters, final SAMFileHeader header) throws IOException {
    Job job = Job.getInstance(ctx.hadoopConfiguration());
    AvroParquetInputFormat.setAvroReadSchema(job, AlignmentRecord.getClassSchema());
    Broadcast<SAMFileHeader> bHeader;
    if (header == null) {
        bHeader= ctx.broadcast(null);
    } else {
        bHeader = ctx.broadcast(header);
    }
    @SuppressWarnings("unchecked")
    JavaRDD<AlignmentRecord> recordsRdd = ctx.newAPIHadoopFile(
            inputPathSpecifier.getRawInputString(), AvroParquetInputFormat.class, Void.class, AlignmentRecord.class, job.getConfiguration())
            .values();
    JavaRDD<GATKRead> readsRdd = recordsRdd.map(record -> new BDGAlignmentRecordToGATKReadAdapter(record, bHeader.getValue()));
    JavaRDD<GATKRead> filteredRdd = readsRdd.filter(record -> samRecordOverlaps(record.convertToSAMRecord(header), traversalParameters));

    return fixPartitionsIfQueryGrouped(ctx, header, filteredRdd);
}

Source File: ReadPileupUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License

6 votes

@Test
public void testSortedIterator() throws Exception {
    final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader();
    final GATKRead read1 = ArtificialReadUtils.createArtificialRead(header, "10M");
    final GATKRead read2 = ArtificialReadUtils.createArtificialRead(header, "10M");
    read1.setPosition(new SimpleInterval("22", 200, 210));
    read2.setPosition(new SimpleInterval("22", 208, 218));
    read1.setMatePosition(read2);
    read2.setMatePosition(read1);
    final Locatable loc = new SimpleInterval("22", 208, 208);
    final Map<String, ReadPileup> stratified = new LinkedHashMap<>();
    stratified.put("sample1", new ReadPileup(loc, Arrays.asList(read2), 0));
    stratified.put("sample2", new ReadPileup(loc, Arrays.asList(read1), 9));
    final ReadPileup combined = new ReadPileup(loc, stratified);
    final Iterator<PileupElement> sortedIterator = combined.sortedIterator();
    Assert.assertSame(sortedIterator.next().getRead(), read1);
    Assert.assertSame(sortedIterator.next().getRead(), read2);
}

Source File: GenomeLoc.java From gatk with BSD 3-Clause "New" or "Revised" License

6 votes

/**
 * Calculates the distance between two genomeLocs across contigs (if necessary).
 *
 * Returns minDistance(other) if in same contig.
 * Works with intervals!
 * Uses the SAMFileHeader to extract the size of the contigs and follows the order in the dictionary.
 *
 * @param other         the genome loc to compare to
 * @param samFileHeader the contig information
 * @return the sum of all the bases in between the genomeLocs, including entire contigs
 */
public long distanceAcrossContigs(final GenomeLoc other, final SAMFileHeader samFileHeader) {
    if (onSameContig(other)) {
        return minDistance(other);
    }

    // add the distance from the first genomeLoc to the end of it's contig and the distance from the
    // second genomeLoc to the beginning of it's contig.
    long distance = 0;
    if (contigIndex < other.contigIndex) {
        distance += samFileHeader.getSequence(contigIndex).getSequenceLength() - stop;
        distance += other.start;
    } else {
        distance += samFileHeader.getSequence(other.contigIndex).getSequenceLength() - other.stop;
        distance += start;
    }

    // add any contig (in its entirety) in between the two genomeLocs
    for (int i=Math.min(this.contigIndex, other.contigIndex) + 1; i < Math.max(this.contigIndex, other.contigIndex); i++) {
        distance += samFileHeader.getSequence(i).getSequenceLength();
    }
    return distance;
}

Source File: GatherBamFiles.java From picard with MIT License

6 votes

/**
 * Simple implementation of a gather operations that uses SAMFileReaders and Writers in order to concatenate
 * multiple BAM files.
 */
private static void gatherNormally(final List<File> inputs, final File output, final boolean createIndex, final boolean createMd5,
                                   final File referenceFasta) {
    final SAMFileHeader header;
    {
        header = SamReaderFactory.makeDefault().referenceSequence(referenceFasta).getFileHeader(inputs.get(0));
    }

    final SAMFileWriter out = new SAMFileWriterFactory().setCreateIndex(createIndex).setCreateMd5File(createMd5).makeSAMOrBAMWriter(header, true, output);

    for (final File f : inputs) {
        log.info("Gathering " + f.getAbsolutePath());
        final SamReader in = SamReaderFactory.makeDefault().referenceSequence(referenceFasta).open(f);
        for (final SAMRecord rec : in) out.addAlignment(rec);
        CloserUtil.close(in);
    }

    out.close();
}

Source File: SamUtilsTest.java From rtg-tools with BSD 2-Clause "Simplified" License

6 votes

public void testRunCommentUpdate() {
  final SAMFileHeader sf = new SAMFileHeader();
  sf.addComment("READ-SDF-ID:" + Long.toHexString(123456));
  final UUID uuid = UUID.randomUUID();
  sf.addComment(SamUtils.RUN_ID_ATTRIBUTE + uuid);
  sf.addComment("TEMPLATE-SDF-ID:$$$$$$$");
  sf.addComment("BLAH:KLJERLWJ");

  SamUtils.updateRunId(sf);

  for (final String s : sf.getComments()) {
    if (s.startsWith(SamUtils.RUN_ID_ATTRIBUTE)) {
      assertFalse(s.contains(uuid.toString()));
    }
  }
}

Source File: FastqToSam.java From picard with MIT License

6 votes

/** Creates a simple header with the values provided on the command line. */
public SAMFileHeader createSamFileHeader() {
    final SAMReadGroupRecord rgroup = new SAMReadGroupRecord(this.READ_GROUP_NAME);
    rgroup.setSample(this.SAMPLE_NAME);
    if (this.LIBRARY_NAME != null) rgroup.setLibrary(this.LIBRARY_NAME);
    if (this.PLATFORM != null) rgroup.setPlatform(this.PLATFORM);
    if (this.PLATFORM_UNIT != null) rgroup.setPlatformUnit(this.PLATFORM_UNIT);
    if (this.SEQUENCING_CENTER != null) rgroup.setSequencingCenter(SEQUENCING_CENTER);
    if (this.PREDICTED_INSERT_SIZE != null) rgroup.setPredictedMedianInsertSize(PREDICTED_INSERT_SIZE);
    if (this.DESCRIPTION != null) rgroup.setDescription(this.DESCRIPTION);
    if (this.RUN_DATE != null) rgroup.setRunDate(this.RUN_DATE);
    if (this.PLATFORM_MODEL != null) rgroup.setPlatformModel(this.PLATFORM_MODEL);
    if (this.PROGRAM_GROUP != null) rgroup.setProgramGroup(this.PROGRAM_GROUP);

    final SAMFileHeader header = new SAMFileHeader();
    header.addReadGroup(rgroup);

    for (final String comment : COMMENT) {
        header.addComment(comment);
    }

    header.setSortOrder(this.SORT_ORDER);
    return header ;
}

Source File: Feature.java From abra2 with MIT License

6 votes

public static List<Integer> findAllOverlappingRegions(SAMFileHeader samHeader, SAMRecordWrapper read, List<Feature> regions, int start) {
	List<Integer> overlappingRegions = new ArrayList<Integer>();
	
	for (Span span : read.getSpanningRegions()) {
	
		int idx = findFirstOverlappingRegion(samHeader, read, span.start, span.end, regions, start);
		if (idx > -1) {
			overlappingRegions.add(idx);
			boolean isOverlap = true;
			idx += 1;
			
			while (isOverlap && idx < regions.size()) {
				Feature region = regions.get(idx);
				if (region.overlaps(read.getSamRecord().getReferenceName(), span.start, span.end)) {
					overlappingRegions.add(idx);
				} else {
					isOverlap = false;
				}
				idx += 1;
			}
		}
	}
	
	return overlappingRegions;
}

Source File: VarDictLauncher.java From VarDictJava with MIT License

5 votes

/**
 * Read map of chromosome lengths
 * @param bam BAM file name
 * @return Map of chromosome lengths. Key - chromosome name, value - length
 * @throws IOException if BAM/SAM file can't be opened
 */
public static Map<String, Integer> readChr(String bam) throws IOException {
    try (SamReader reader = SamReaderFactory.makeDefault().open(new File(bam))) {
        SAMFileHeader header = reader.getFileHeader();
        Map<String, Integer> chrs = new HashMap<>();
        for (SAMSequenceRecord record : header.getSequenceDictionary().getSequences()) {
            record.getSequenceLength();
            String sn = record.getSequenceName();
            int ln = record.getSequenceLength();
            chrs.put(sn, ln);
        }
        return chrs;
    }
}

Source File: GATKReadFilterPluginDescriptorTest.java From gatk with BSD 3-Clause "New" or "Revised" License

5 votes

private GATKRead simpleGoodRead( final SAMFileHeader header ) {
    final String cigarString = "101M";
    final Cigar cigar = TextCigarCodec.decode(cigarString);
    GATKRead read = createRead(header, cigar, 1, 0, 10);
    read.setMappingQuality(50);
    return read;
}

Source File: ReadGroupCovariateUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License

5 votes

private static void runTest(final SAMReadGroupRecord rg, final String expected, final ReadGroupCovariate covariate) {
    final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeaderWithReadGroup(rg);

    GATKRead read = ArtificialReadUtils.createRandomRead(header, 10);
    read.setReadGroup(rg.getReadGroupId());

    ReadCovariates readCovariates = new ReadCovariates(read.getLength(), 1, new CovariateKeyCache());
    covariate.recordValues(read, header, readCovariates, true);
    verifyCovariateArray(readCovariates.getMismatchesKeySet(), expected, covariate);

}

Source File: ExampleCollectSingleMetricsSpark.java From gatk with BSD 3-Clause "New" or "Revised" License

5 votes

/**
 * Execute the actual metrics collection.
 * @param filteredReads Input reads, already filtered using the filter returned by {@link #getDefaultReadFilters(SAMFileHeader)}
 * @param samHeader SAMFileHeader for the input
 */
@Override
protected void collectMetrics(
        final JavaRDD<GATKRead> filteredReads,
        final SAMFileHeader samHeader) {
    exampleSingleCollector.collectMetrics(filteredReads, samHeader);
}

Source File: RevertSam.java From picard with MIT License

5 votes

private SAMFileHeader createOutHeader(
        final SAMFileHeader inHeader,
        final SAMFileHeader.SortOrder sortOrder,
        final boolean removeAlignmentInformation) {

    final SAMFileHeader outHeader = new SAMFileHeader();
    outHeader.setSortOrder(sortOrder);
    if (!removeAlignmentInformation) {
        outHeader.setSequenceDictionary(inHeader.getSequenceDictionary());
        outHeader.setProgramRecords(inHeader.getProgramRecords());
    }
    return outHeader;
}

Source File: CollectGcBiasMetricsTest.java From picard with MIT License

5 votes

public void setupTest2(final int ID, final String readGroupId, final SAMReadGroupRecord readGroupRecord, final String sample,
                       final String library, final SAMFileHeader header, final SAMRecordSetBuilder setBuilder)
        throws IOException {

    final String separator = ":";
    final int contig1 = 0;
    final int contig2 = 1;
    final int contig3 = 2;
    readGroupRecord.setSample(sample);
    readGroupRecord.setPlatform(platform);
    readGroupRecord.setLibrary(library);
    readGroupRecord.setPlatformUnit(readGroupId);
    setBuilder.setReadGroup(readGroupRecord);
    setBuilder.setUseNmFlag(true);

    setBuilder.setHeader(header);

    final int max = 800;
    final int min = 1;
    final Random rg = new Random(5);

    //add records that align to all 3 chr in reference file
    for (int i = 0; i < NUM_READS; i++) {
        final int start = rg.nextInt(max) + min;
        final String newReadName = READ_NAME + separator + ID + separator + i;

        if (i<=NUM_READS/3) {
            setBuilder.addPair(newReadName, contig1, start + ID, start + ID + LENGTH);
        } else if (i< (NUM_READS - (NUM_READS/3))) {
            setBuilder.addPair(newReadName, contig2, start + ID, start + ID + LENGTH);
        } else {
            setBuilder.addPair(newReadName, contig3, start + ID, start + ID + LENGTH);
        }
    }
}

Source File: HaplotypeCallerSpark.java From gatk-protected with BSD 3-Clause "New" or "Revised" License

5 votes

/**
 * @return an {@link OverlapDetector} loaded with {@link ShardBoundary}
 * based on the -L intervals
 */
private static OverlapDetector<ShardBoundary> getShardBoundaryOverlapDetector(final SAMFileHeader header, final List<SimpleInterval> intervals, final int readShardSize, final int readShardPadding) {
    final OverlapDetector<ShardBoundary> shardBoundaryOverlapDetector = new OverlapDetector<>(0, 0);
    intervals.stream()
            .flatMap(interval -> Shard.divideIntervalIntoShards(interval, readShardSize, readShardPadding, header.getSequenceDictionary()).stream())
            .forEach(boundary -> shardBoundaryOverlapDetector.addLhs(boundary, boundary.getPaddedInterval()));
    return shardBoundaryOverlapDetector;
}

Source File: SAMRecordWriter.java From Hadoop-BAM with MIT License

5 votes

private void init(
		OutputStream output, SAMFileHeader header, boolean writeHeader)
	throws IOException
{
	this.header = header;
	writer = new SAMTextWriter(output);

	writer.setSortOrder(header.getSortOrder(), false);
	if (writeHeader)
		writer.setHeader(header);
}

Source File: Fragment.java From gatk with BSD 3-Clause "New" or "Revised" License

5 votes

public Fragment(final GATKRead first, final SAMFileHeader header, int partitionIndex, MarkDuplicatesScoringStrategy scoringStrategy, Map<String, Byte> headerLibraryMap) {
    super(partitionIndex, first.getName());

    this.score = scoringStrategy.score(first);
    this.R1R = first.isReverseStrand();
    this.key = ReadsKey.getKeyForFragment(ReadUtils.getStrandedUnclippedStart(first),
            isRead1ReverseStrand(),
            (short)ReadUtils.getReferenceIndex(first, header),
            headerLibraryMap.get(MarkDuplicatesSparkUtils.getLibraryForRead(first, header, LibraryIdGenerator.UNKNOWN_LIBRARY)));
}

Source File: SATagBuilderUnitTests.java From gatk with BSD 3-Clause "New" or "Revised" License

5 votes

@Test
public static void testSetReadsAsSupplementalNo() {
    final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader();
    GATKRead primarySup = ArtificialReadUtils.createArtificialRead(header, "read1", 1, 10000, new byte[] {}, new byte[] {}, "4M");
    primarySup.setMappingQuality(100);
    primarySup.setAttribute("NM",20);
    primarySup.setIsReverseStrand(true);
    GATKRead secondarySup = ArtificialReadUtils.createArtificialRead(header, "read1", 2, 10001, new byte[] {}, new byte[] {}, "4S");
    secondarySup.setMappingQuality(200);
    List<GATKRead> sups = new ArrayList<>();
    sups.add(secondarySup);

    SATagBuilder.setReadsAsSupplemental(primarySup,sups);
    Assert.assertEquals(primarySup.getAttributeAsString("SA"), "3,10001,+,4S,200,*;");
    Assert.assertEquals(primarySup.isSupplementaryAlignment(), false);
    Assert.assertEquals(secondarySup.getAttributeAsString("SA"), "2,10000,-,4M,100,20;");
    Assert.assertEquals(secondarySup.isSupplementaryAlignment(), true);

    GATKRead tertiarySup = ArtificialReadUtils.createArtificialRead(header, "read1", 3, 10003, new byte[] {}, new byte[] {}, "4D");
    tertiarySup.setMappingQuality(200);
    primarySup.clearAttribute("SA");
    secondarySup.clearAttribute("SA");
    sups.add(tertiarySup);

    SATagBuilder.setReadsAsSupplemental(primarySup,sups);
    Assert.assertEquals(sups.size(), 2);
    Assert.assertEquals(sups.get(0).getAttributeAsString("SA"), "2,10000,-,4M,100,20;4,10003,+,4D,200,*;");
    Assert.assertTrue(sups.get(1).getAttributeAsString("SA").startsWith("2,10000,-,4M,100,20;"));
    Assert.assertTrue(primarySup.getAttributeAsString("SA").contains("4,10003,+,4D,200,*;"));
    Assert.assertTrue(primarySup.getAttributeAsString("SA").contains("3,10001,+,4S,200,*;"));

    GATKRead unmappedSup = ArtificialReadUtils.createArtificialUnmappedRead(header,new byte[] {}, new byte[] {});
    primarySup.clearAttribute("SA");
    sups.clear();
    sups.add(unmappedSup);

    SATagBuilder.setReadsAsSupplemental(primarySup,sups);
    Assert.assertEquals(primarySup.getAttributeAsString("SA"), "*,0,+,*,0,*;");
    Assert.assertEquals(sups.get(0).getAttributeAsString("SA"), "2,10000,-,4M,100,20;");
}

Source File: GATKProtectedVariantContextUtilsUnitTest.java From gatk-protected with BSD 3-Clause "New" or "Revised" License

5 votes

@Test
public void testGetPileup() {
    final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader(1, 1, 1000);
    final Locatable loc = new SimpleInterval("chr1", 10, 10);
    final int readLength = 3;

    //this read doesn't overlap {@code loc}
    final GATKRead read1 = ArtificialReadUtils.createArtificialRead(header, "read1", 0, 1, readLength);
    read1.setBases(Utils.dupBytes((byte) 'A', readLength));
    read1.setBaseQualities(Utils.dupBytes((byte) 30, readLength));
    read1.setCigar("3M");

    //this read overlaps {@code loc} with a Q30 'A'
    final GATKRead read2 = ArtificialReadUtils.createArtificialRead(header, "read2", 0, 10, readLength);
    read2.setBases(Utils.dupBytes((byte) 'A', readLength));
    read2.setBaseQualities(Utils.dupBytes((byte) 30, readLength));
    read2.setCigar("3M");

    //this read overlaps {@code loc} with a Q20 'C' (due to the deletion)
    final GATKRead read3 = ArtificialReadUtils.createArtificialRead(header, "read3", 0, 7, readLength);
    read3.setBases(Utils.dupBytes((byte) 'C', readLength));
    read3.setBaseQualities(Utils.dupBytes((byte) 20, readLength));
    read3.setCigar("1M1D2M");

    //this read doesn't overlap {@code loc} due to the deletion
    final GATKRead read4 = ArtificialReadUtils.createArtificialRead(header, "read4", 0, 7, readLength);
    read4.setBases(Utils.dupBytes((byte) 'C', readLength));
    read4.setBaseQualities(Utils.dupBytes((byte) 20, readLength));
    read4.setCigar("1M5D2M");

    final ReadPileup pileup = GATKProtectedVariantContextUtils.getPileup(loc, Arrays.asList(read1, read2, read3, read4));

    // the pileup should contain a Q30 'A' and a Q20 'C'
    final int[] counts = pileup.getBaseCounts();
    Assert.assertEquals(counts, new int[]{1, 1, 0, 0});

}

Source File: MarkDuplicatesSpark.java From gatk with BSD 3-Clause "New" or "Revised" License

5 votes

private boolean treatAsReadGroupOrdered(SAMFileHeader header, boolean treatUnsortedAsReadGrouped) {
    final SAMFileHeader.SortOrder sortOrder = header.getSortOrder();
    if( ReadUtils.isReadNameGroupedBam(header) ){
        return true;
    } else if ( treatUnsortedAsReadGrouped && (sortOrder.equals(SAMFileHeader.SortOrder.unknown) || sortOrder.equals(SAMFileHeader.SortOrder.unsorted))) {
        logger.warn("Input bam was marked as " + sortOrder.toString() + " but " + TREAT_UNSORTED_AS_ORDERED + " is specified so it's being treated as read name grouped");
        return true;
    }
    return false;
}

Source File: AssemblyRegionIterator.java From gatk with BSD 3-Clause "New" or "Revised" License

5 votes

/**
 * Constructs an AssemblyRegionIterator over a provided read shard
 *  @param readShard MultiIntervalShard containing the reads that will go into the assembly regions.
 *                  Must have a MAPPED filter set on it.
 * @param readHeader header for the reads
 * @param reference source of reference bases (may be null)
 * @param features source of arbitrary features (may be null)
 * @param evaluator evaluator used to determine whether a locus is active
 */
public AssemblyRegionIterator(final MultiIntervalShard<GATKRead> readShard,
                              final SAMFileHeader readHeader,
                              final ReferenceDataSource reference,
                              final FeatureManager features,
                              final AssemblyRegionEvaluator evaluator,
                              final AssemblyRegionArgumentCollection assemblyRegionArgs) {

    Utils.nonNull(readShard);
    Utils.nonNull(readHeader);
    Utils.nonNull(evaluator);
    assemblyRegionArgs.validate();


    this.readShard = readShard;
    this.readHeader = readHeader;
    this.reference = reference;
    this.features = features;
    this.evaluator = evaluator;
    this.assemblyRegionArgs = assemblyRegionArgs;

    this.readyRegion = null;
    this.previousRegionReads = null;
    this.pendingRegions = new ArrayDeque<>();
    this.readCachingIterator = new ReadCachingIterator(readShard.iterator());
    this.readCache = new ArrayDeque<>();
    this.activityProfile = new BandPassActivityProfile(assemblyRegionArgs.maxProbPropagationDistance, assemblyRegionArgs.activeProbThreshold, BandPassActivityProfile.MAX_FILTER_SIZE, BandPassActivityProfile.DEFAULT_SIGMA, readHeader);

    // We wrap our LocusIteratorByState inside an IntervalAlignmentContextIterator so that we get empty loci
    // for uncovered locations. This is critical for reproducing GATK 3.x behavior!
    this.libs = new LocusIteratorByState(readCachingIterator, DownsamplingMethod.NONE, false, ReadUtils.getSamplesFromHeader(readHeader), readHeader, true);
    final IntervalLocusIterator intervalLocusIterator = new IntervalLocusIterator(readShard.getIntervals().iterator());
    this.locusIterator = new IntervalAlignmentContextIterator(libs, intervalLocusIterator, readHeader.getSequenceDictionary());

    readyRegion = loadNextAssemblyRegion();
}

htsjdk.samtools.SAMFileHeader Java Examples