htsjdk.samtools.reference.ReferenceSequence Java Examples

The following examples show how to use htsjdk.samtools.reference.ReferenceSequence. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: AbstractWgsMetricsCollectorTest.java    From picard with MIT License 6 votes vote down vote up
@Test
public void testForCollectorWithoutData(){
    long[] templateQualHistogram =  new long[127];
    long[] templateHistogramArray = new long[11];
    CollectWgsMetrics collectWgsMetrics = new CollectWgsMetrics();
    AbstractWgsMetricsCollector collector = new AbstractWgsMetricsCollector(collectWgsMetrics,
            10, createIntervalList()) {
        @Override
        public void addInfo(AbstractLocusInfo info, ReferenceSequence ref, boolean referenceBaseN) {
        }
    };
    assertEquals(templateHistogramArray, collector.highQualityDepthHistogramArray);
    assertEquals(templateQualHistogram, collector.unfilteredBaseQHistogramArray);
    assertEquals(0, collector.basesExcludedByCapping);
    assertEquals(0, collector.basesExcludedByOverlap);
    assertEquals(0, collector.basesExcludedByBaseq);
}
 
Example #2
Source File: ReferenceResource.java    From VarDictJava with MIT License 6 votes vote down vote up
/**
 * Method convert fasta data to array contains header and nucleotide bases.
 * @param fasta path to fasta file
 * @param chr chromosome name of region
 * @param start start position of region
 * @param end end position of region
 * @return array of nucleotide bases in the region of fasta
 */
public String[] retrieveSubSeq(String fasta, String chr, int start, int end) {
    try {
        IndexedFastaSequenceFile idx = fetchFasta(fasta);
        ReferenceSequence seq = idx.getSubsequenceAt(chr, start, end);
        byte[] bases = seq.getBases();
        return new String[]{">" + chr + ":" + start + "-" + end, bases != null ? new String(bases) : ""};
    } catch (SAMException e){
        if (UNABLE_FIND_CONTIG.matcher(e.getMessage()).find()){
            throw new WrongFastaOrBamException(chr, e);
        } else if (WRONG_START_OR_END.matcher(e.getMessage()).find()){
            throw new RegionBoundariesException(chr, start, end, e);
        } else {
            throw e;
        }
    }
}
 
Example #3
Source File: SequenceDictionaryUtils.java    From picard with MIT License 6 votes vote down vote up
/**
 * Create one SAMSequenceRecord from a single fasta sequence
 */
private SAMSequenceRecord makeSequenceRecord(final ReferenceSequence refSeq) {
    final SAMSequenceRecord ret = new SAMSequenceRecord(refSeq.getName(), refSeq.length());

    // Compute MD5 of upcased bases
    final byte[] bases = refSeq.getBases();
    for (int i = 0; i < bases.length; ++i) {
        bases[i] = StringUtil.toUpperCase(bases[i]);
    }

    ret.setAttribute(SAMSequenceRecord.MD5_TAG, md5Hash(bases));
    if (genomeAssembly != null) {
        ret.setAttribute(SAMSequenceRecord.ASSEMBLY_TAG, genomeAssembly);
    }
    ret.setAttribute(SAMSequenceRecord.URI_TAG, uri);
    if (species != null) {
        ret.setAttribute(SAMSequenceRecord.SPECIES_TAG, species);
    }
    return ret;
}
 
Example #4
Source File: BaitDesigner.java    From picard with MIT License 6 votes vote down vote up
@Override
List<Bait> design(final BaitDesigner designer, final Interval target, final ReferenceSequence reference) {
    final List<Bait> baits = new LinkedList<Bait>();
    final int baitSize = designer.BAIT_SIZE;
    final int baitOffset = designer.BAIT_OFFSET;
    final int lastPossibleBaitStart = Math.min(target.getEnd(), reference.length() - baitSize);
    final int baitCount = 1 + (int) Math.floor((lastPossibleBaitStart - target.getStart()) / (double) baitOffset);

    int i = 0;
    for (int start = target.getStart(); start < lastPossibleBaitStart; start += baitOffset) {
        final Bait bait = new Bait(target.getContig(),
                start,
                CoordMath.getEnd(start, baitSize),
                target.isNegativeStrand(),
                designer.makeBaitName(target.getName(), ++i, baitCount));
        bait.addBases(reference, designer.DESIGN_ON_TARGET_STRAND);
        baits.add(bait);
    }
    return baits;
}
 
Example #5
Source File: GcBiasUtils.java    From picard with MIT License 6 votes vote down vote up
public static int[] calculateRefWindowsByGc(final int windows, final File referenceSequence, final int windowSize) {
    final ReferenceSequenceFile refFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(referenceSequence);
    ReferenceSequence ref;

    final int [] windowsByGc = new int [windows];

    while ((ref = refFile.nextSequence()) != null) {
        final byte[] refBases = ref.getBases();
        StringUtil.toUpperCase(refBases);
        final int refLength = refBases.length;
        final int lastWindowStart = refLength - windowSize;

        final CalculateGcState state = new GcBiasUtils().new CalculateGcState();

        for (int i = 1; i < lastWindowStart; ++i) {
            final int windowEnd = i + windowSize;
            final int gcBin = calculateGc(refBases, i, windowEnd, state);
            if (gcBin != -1) windowsByGc[gcBin]++;
        }
    }

    return windowsByGc;
}
 
Example #6
Source File: MaskReferenceSequence.java    From Drop-seq with MIT License 6 votes vote down vote up
private void processByPartialContig (final ReferenceSequenceFile ref, final FastaSequenceFileWriter writer, final File intervalListFile) {
	SAMSequenceDictionary sd = ref.getSequenceDictionary();
	// validate that the intervals and the reference have the same sequence dictionary.
	IntervalList iList = IntervalList.fromFile(intervalListFile);
	iList.getHeader().getSequenceDictionary().assertSameDictionary(sd);
	// map the intervals to a map to each contig.
	Map<String, List<Interval>> intervalsPerContig = getIntervalsForContig(iList);

	for (SAMSequenceRecord r: sd.getSequences()) {
		String contig = r.getSequenceName();
		log.info("Processing partial contig " + contig);
		// this list can be null.
		List<Interval> intervalsToMask = intervalsPerContig.get(contig);
		ReferenceSequence rs = ref.getSequence(contig);
		writeSequence(rs, intervalsToMask, writer);
	}
}
 
Example #7
Source File: CreateSequenceDictionary.java    From varsim with BSD 2-Clause "Simplified" License 6 votes vote down vote up
/**
 * Create one SAMSequenceRecord from a single fasta sequence
 */
private SAMSequenceRecord makeSequenceRecord(final ReferenceSequence refSeq) {
  final SAMSequenceRecord ret = new SAMSequenceRecord(refSeq.getName(), refSeq.length());

  // Compute MD5 of upcased bases
  final byte[] bases = refSeq.getBases();
  for (int i = 0; i < bases.length; ++i) {
    bases[i] = StringUtil.toUpperCase(bases[i]);
  }

  ret.setAttribute(SAMSequenceRecord.MD5_TAG, md5Hash(bases));
  if (GENOME_ASSEMBLY != null) {
    ret.setAttribute(SAMSequenceRecord.ASSEMBLY_TAG, GENOME_ASSEMBLY);
  }
  ret.setAttribute(SAMSequenceRecord.URI_TAG, URI);
  if (SPECIES != null) {
    ret.setAttribute(SAMSequenceRecord.SPECIES_TAG, SPECIES);
  }
  return ret;
}
 
Example #8
Source File: FastWgsMetricsCollector.java    From picard with MIT License 6 votes vote down vote up
@Override
public void addInfo(final AbstractLocusInfo<EdgingRecordAndOffset> info, final ReferenceSequence ref, boolean referenceBaseN) {
    prepareCollector(info);
    for (final EdgingRecordAndOffset record : info.getRecordAndOffsets()) {
        final String readName = record.getReadName();
        Optional<Set<EdgingRecordAndOffset>> recordsAndOffsetsForName = Optional.ofNullable((Set<EdgingRecordAndOffset>)readsNames.get(readName));
        if (record.getType() == EdgingRecordAndOffset.Type.BEGIN) {
            processRecord(info.getPosition(), ref, record, recordsAndOffsetsForName.orElse(new HashSet<>()));
        } else {
            recordsAndOffsetsForName.ifPresent(
                    edgingRecordAndOffsets -> removeRecordFromMap(record,
                            edgingRecordAndOffsets));
        }
    }
    if (!referenceBaseN) {
        final int readNamesSize = pileupSize.get(info.getPosition());
        final int highQualityDepth = Math.min(readNamesSize, coverageCap);
        if (highQualityDepth < readNamesSize) {
            basesExcludedByCapping += readNamesSize - coverageCap;
        }
        highQualityDepthHistogramArray[highQualityDepth]++;
        unfilteredDepthHistogramArray[unfilteredDepthSize.get(info.getPosition())]++;
    }
}
 
Example #9
Source File: CollectBaseDistributionByCycle.java    From picard with MIT License 5 votes vote down vote up
@Override
protected void acceptRead(final SAMRecord rec, final ReferenceSequence ref) {
    if ((PF_READS_ONLY) && (rec.getReadFailsVendorQualityCheckFlag())) {
        return;
    }
    if ((ALIGNED_READS_ONLY) && (rec.getReadUnmappedFlag())) {
        return;
    }
    if (rec.isSecondaryOrSupplementary()) {
        return;
    }
    hist.addRecord(rec);
}
 
Example #10
Source File: CollectSequencingArtifactMetrics.java    From picard with MIT License 5 votes vote down vote up
private String getRefContext(final ReferenceSequence ref, final int contextStartIndex, final int contextFullLength) {
    // cache the upper-cased string version of this reference so we don't need to create a string for every base in every read
    if (currentRefIndex != ref.getContigIndex()) {
        currentRefString = new String(ref.getBases()).toUpperCase();
        currentRefIndex = ref.getContigIndex();
    }
    return currentRefString.substring(contextStartIndex, contextStartIndex + contextFullLength);
}
 
Example #11
Source File: AlignmentSummaryMetricsCollector.java    From picard with MIT License 5 votes vote down vote up
public void addRecord(final SAMRecord record, final ReferenceSequence ref) {
    if (record.getNotPrimaryAlignmentFlag()) {
        // only want 1 count per read so skip non primary alignments
        return;
    }

    collectReadData(record);
    collectQualityData(record, ref);
}
 
Example #12
Source File: MultiLevelCollector.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Construct a argument of ARGTYPE using the given SAMRecord and ReferenceSequence then pass
 * this value to all collectors that should include this record
 */
public void acceptRecord(final SAMRecord record, final ReferenceSequence refSeq) {
    final ARGTYPE arg = makeArg(record, refSeq);

    for(final Distributor collector : outputOrderedDistributors) {
        collector.acceptRecord(arg, record.getReadGroup());
    }
}
 
Example #13
Source File: LiftoverVcfTest.java    From picard with MIT License 5 votes vote down vote up
@Test
public void testReverseComplementFailureDoesNotErrorOut() {
    final VariantContextBuilder builder = new VariantContextBuilder().source("test").loc("chr1", 1, 4);
    final Allele originalRef = Allele.create("CCCC", true);
    final Allele originalAlt = Allele.create("C", false);
    builder.alleles(Arrays.asList(originalRef, originalAlt));

    final Interval interval = new Interval("chr1", 1, 4, true, "test ");

    final String reference = "ATGATGATGA";
    final ReferenceSequence refSeq = new ReferenceSequence("chr1", 10, reference.getBytes());

    // we don't actually care what the results are here -- we just want to make sure that it doesn't fail
    final VariantContextBuilder result = LiftoverUtils.reverseComplementVariantContext(builder.make(), interval, refSeq);
}
 
Example #14
Source File: WgsMetricsProcessorImpl.java    From picard with MIT License 5 votes vote down vote up
/**
 * Method gets the data from iterator for each locus and processes it with the help of collector.
 */
@Override
public void processFile() {
    long counter = 0;

    while (iterator.hasNext()) {
        final AbstractLocusInfo<T> info = iterator.next();
        final ReferenceSequence ref = refWalker.get(info.getSequenceIndex());
        boolean referenceBaseN = collector.isReferenceBaseN(info.getPosition(), ref);
        collector.addInfo(info, ref, referenceBaseN);
        if (referenceBaseN) {
            continue;
        }

        progress.record(info.getSequenceName(), info.getPosition());
        if (collector.isTimeToStop(++counter)) {
            break;
        }
        collector.setCounter(counter);
    }
    // check that we added the same number of bases to the raw coverage histogram and the base quality histograms
    final long sumBaseQ = Arrays.stream(collector.unfilteredBaseQHistogramArray).sum();
    final long sumDepthHisto = LongStream.rangeClosed(0, collector.coverageCap).map(i -> (i * collector.unfilteredDepthHistogramArray[(int) i])).sum();
    if (sumBaseQ != sumDepthHisto) {
        log.error("Coverage and baseQ distributions contain different amount of bases!");
    }
}
 
Example #15
Source File: GcBiasMetricsCollector.java    From picard with MIT License 5 votes vote down vote up
@Override
public void acceptRecord(final GcBiasCollectorArgs args) {
    final SAMRecord rec = args.getRec();
    if (logCounter < 100 && rec.getReadBases().length == 0) {
        log.warn("Omitting read " + rec.getReadName() + " with '*' in SEQ field.");
        if (++logCounter == 100) {
            log.warn("There are more than 100 reads with '*' in SEQ field in file.");
        }
        return;
    }
    if (!rec.getReadUnmappedFlag()) {
        if (referenceIndex != rec.getReferenceIndex() || gc == null) {
            final ReferenceSequence ref = args.getRef();
            refBases = ref.getBases();
            StringUtil.toUpperCase(refBases);
            final int refLength = refBases.length;
            final int lastWindowStart = refLength - scanWindowSize;
            gc = GcBiasUtils.calculateAllGcs(refBases, lastWindowStart, scanWindowSize);
            referenceIndex = rec.getReferenceIndex();
        }

        addReadToGcData(rec, this.gcData);
        if (ignoreDuplicates && !rec.getDuplicateReadFlag()) {
            addReadToGcData(rec, this.gcDataNonDups);
        }
    } else {
        updateTotalClusters(rec, this.gcData);
        if (ignoreDuplicates && !rec.getDuplicateReadFlag()) {
            updateTotalClusters(rec, this.gcDataNonDups);
        }
    }
}
 
Example #16
Source File: LiftoverVcfTest.java    From picard with MIT License 5 votes vote down vote up
@Test(dataProvider = "indelFlipData")
public void testFlipIndel(final VariantContext source, final ReferenceSequence reference, final VariantContext result) {

    final LiftOver liftOver = new LiftOver(CHAIN_FILE);
    final Interval originalLocus = new Interval(source.getContig(), source.getStart(), source.getEnd());
    final Interval target = liftOver.liftOver(originalLocus);
    if (target != null && !target.isNegativeStrand()) {
        throw new RuntimeException("not reversed");
    }

    final VariantContext flipped = LiftoverUtils.liftVariant(source, target, reference, false, false);

    VcfTestUtils.assertEquals(flipped, result);
}
 
Example #17
Source File: GatherGeneGCLength.java    From Drop-seq with MIT License 5 votes vote down vote up
public void writeTranscriptSequence (final Gene gene, final ReferenceSequence fastaRef, final SAMSequenceDictionary dict, final FastaSequenceFileWriter outSequence ) {

		for (Transcript t : gene) {
			String sequence=getTranscriptSequence(t, fastaRef, dict);
			outSequence.	writeSequence(gene.getName(), sequence, t.name);
		}
	}
 
Example #18
Source File: MaskReferenceSequence.java    From Drop-seq with MIT License 5 votes vote down vote up
private void processByWholeContig (final ReferenceSequenceFile ref, final FastaSequenceFileWriter writer, final List<String> contigPatternToIgnore) {
	SAMSequenceDictionary sd = ref.getSequenceDictionary();
	Set<String> contigsToIgnore = selectContigsToIgnore(sd, contigPatternToIgnore);
	// write out each contig.

	for (SAMSequenceRecord r: sd.getSequences()) {
		String contig = r.getSequenceName();
		log.info("Processing complete contig " + contig);
		ReferenceSequence rs = ref.getSequence(contig);
		boolean setSequenceToN = contigsToIgnore.contains(contig);
		writeSequence(rs, setSequenceToN, writer);
	}

}
 
Example #19
Source File: InsertSizeMetricsCollector.java    From picard with MIT License 5 votes vote down vote up
@Override
protected InsertSizeCollectorArgs makeArg(SAMRecord samRecord, ReferenceSequence refSeq) {
    final int insertSize = Math.abs(samRecord.getInferredInsertSize());
    final SamPairUtil.PairOrientation orientation = SamPairUtil.getPairOrientation(samRecord);

    return new InsertSizeCollectorArgs(insertSize, orientation);
}
 
Example #20
Source File: BGZF_ReferenceSequenceFile.java    From cramtools with Apache License 2.0 5 votes vote down vote up
@Override
public ReferenceSequence nextSequence() {
	if (iterator == null)
		iterator = index.keySet().iterator();

	if (!iterator.hasNext())
		return null;

	String name = iterator.next();
	try {
		return findSequence(name, 0, 0);
	} catch (IOException e) {
		throw new RuntimeException(e);
	}
}
 
Example #21
Source File: BGZF_ReferenceSequenceFile.java    From cramtools with Apache License 2.0 5 votes vote down vote up
@Override
public ReferenceSequence getSubsequenceAt(String contig, long start, long stop) {
	try {
		return findSequence(contig, start, stop);
	} catch (IOException e) {
		throw new RuntimeException(e);
	}
}
 
Example #22
Source File: ValidateReference.java    From Drop-seq with MIT License 5 votes vote down vote up
private void validateReferenceBases(File referenceFile) {
    final ReferenceSequenceFile refSeqFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(referenceFile, true);
    ReferenceSequence sequence;
    while ((sequence = refSeqFile.nextSequence()) != null) {
        for (final byte base: sequence.getBases()) {
            if (!IUPAC_TABLE[base]) {
                messages.baseErrors = String.format("WARNING: AT least one invalid base '%c' (decimal %d) in reference sequence named %s",
                        StringUtil.byteToChar(base), base, sequence.getName());
                break;
            }
        }
    }
}
 
Example #23
Source File: FastWgsMetricsCollector.java    From picard with MIT License 5 votes vote down vote up
private void processRecord(int position, ReferenceSequence ref, EdgingRecordAndOffset record, Set<EdgingRecordAndOffset> recordsAndOffsetsForName) {
    long processedLoci = counter;
    readsNames.put(record.getReadName(), recordsAndOffsetsForName);
    final byte[] qualities = record.getBaseQualities();
    final byte[] bases = record.getRecord().getReadBases();
    for (int i = 0; i < record.getLength(); i++) {
        final int index = i + record.getRefPos();
        if (isReferenceBaseN(index, ref)) {
            continue;
        }
        final byte quality = qualities[i + record.getOffset()];
        if (quality <= 2) {
            basesExcludedByBaseq++;
        } else {
            if (unfilteredDepthSize.get(index) < coverageCap) {
                unfilteredBaseQHistogramArray[quality]++;
                unfilteredDepthSize.increment(index);
            }
            if (quality < collectWgsMetrics.MINIMUM_BASE_QUALITY || SequenceUtil.isNoCall(bases[i + record.getOffset()])){
                basesExcludedByBaseq++;
            } else {
                final int bsq = excludeByQuality(recordsAndOffsetsForName, index);
                if (recordsAndOffsetsForName.size() - bsq > 0) {
                    basesExcludedByOverlap++;
                } else {
                    pileupSize.increment(index);
                }
            }
        }
        if (isTimeToStop(++processedLoci)) {
            break;
        }
    }
    recordsAndOffsetsForName.add(record);
}
 
Example #24
Source File: CollectWgsMetrics.java    From picard with MIT License 5 votes vote down vote up
@Override
public void addInfo(final AbstractLocusInfo<SamLocusIterator.RecordAndOffset> info, final ReferenceSequence ref, boolean referenceBaseN) {

    if (referenceBaseN) {
        return;
    }
    // Figure out the coverage while not counting overlapping reads twice, and excluding various things
    final HashSet<String> readNames = new HashSet<>(info.getRecordAndOffsets().size());
    int pileupSize = 0;
    int unfilteredDepth = 0;

    for (final SamLocusIterator.RecordAndOffset recs : info.getRecordAndOffsets()) {
        if (recs.getBaseQuality() <= 2) { ++basesExcludedByBaseq;   continue; }

        // we add to the base quality histogram any bases that have quality > 2
        // the raw depth may exceed the coverageCap before the high-quality depth does. So stop counting once we reach the coverage cap.
        if (unfilteredDepth < coverageCap) {
            unfilteredBaseQHistogramArray[recs.getRecord().getBaseQualities()[recs.getOffset()]]++;
            unfilteredDepth++;
        }

        if (recs.getBaseQuality() < collectWgsMetrics.MINIMUM_BASE_QUALITY ||
                SequenceUtil.isNoCall(recs.getReadBase())) {
            ++basesExcludedByBaseq;
            continue;
        }
        if (!readNames.add(recs.getRecord().getReadName())) {
            ++basesExcludedByOverlap;
            continue;
        }
        pileupSize++;
    }
    final int highQualityDepth = Math.min(pileupSize, coverageCap);
    if (highQualityDepth < pileupSize) basesExcludedByCapping += pileupSize - coverageCap;
    highQualityDepthHistogramArray[highQualityDepth]++;
    unfilteredDepthHistogramArray[unfilteredDepth]++;
}
 
Example #25
Source File: LiftoverVcfTest.java    From picard with MIT License 5 votes vote down vote up
@Test(dataProvider = "indelNoFlipData")
public void testLiftOverIndels(final LiftOver liftOver, final ReferenceSequence reference, final VariantContext source, final VariantContext result) {

    final Interval target = liftOver.liftOver(new Interval(source.getContig(), source.getStart(), source.getEnd()), 1);

    final VariantContext vc = LiftoverUtils.liftVariant(source, target, reference, false, false);
    VcfTestUtils.assertEquals(vc, result);
}
 
Example #26
Source File: MeanQualityByCycle.java    From picard with MIT License 5 votes vote down vote up
@Override
protected void acceptRead(final SAMRecord rec, final ReferenceSequence ref) {
    // Skip unwanted records
    if (PF_READS_ONLY && rec.getReadFailsVendorQualityCheckFlag()) return;
    if (ALIGNED_READS_ONLY && rec.getReadUnmappedFlag()) return;
    if (rec.isSecondaryOrSupplementary()) return;

    q.addRecord(rec);
    oq.addRecord(rec);
}
 
Example #27
Source File: CachingIndexedFastaSequenceFileUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test(dataProvider = "fastas")
public void testCachingIndexedFastaReaderTwoStage(Path fasta, Path unzipped, int cacheSize, int querySize) throws IOException {
    try(final ReferenceSequenceFile uncached = ReferenceSequenceFileFactory.getReferenceSequenceFile(unzipped);
        final CachingIndexedFastaSequenceFile caching = new CachingIndexedFastaSequenceFile(fasta, getCacheSize(cacheSize), true, false)) {

        SAMSequenceRecord contig = uncached.getSequenceDictionary().getSequence(0);

        int middleStart = (contig.getSequenceLength() - querySize) / 2;
        int middleStop = middleStart + querySize;

        logger.debug(String.format(
                "Checking contig %s length %d with cache size %d and query size %d with intermediate query",
                contig.getSequenceName(), contig.getSequenceLength(), cacheSize, querySize));

        for (int i = 0; i < contig.getSequenceLength(); i += 10) {
            int start = i;
            int stop = start + querySize;
            if (stop <= contig.getSequenceLength()) {
                ReferenceSequence grabMiddle = caching.getSubsequenceAt(contig.getSequenceName(), middleStart,
                                                                        middleStop);
                ReferenceSequence cachedVal = caching.getSubsequenceAt(contig.getSequenceName(), start, stop);
                ReferenceSequence uncachedVal = uncached.getSubsequenceAt(contig.getSequenceName(), start, stop);

                Assert.assertEquals(cachedVal.getName(), uncachedVal.getName());
                Assert.assertEquals(cachedVal.getContigIndex(), uncachedVal.getContigIndex());
                Assert.assertEquals(cachedVal.getBases(), uncachedVal.getBases());
            }
        }
    }
}
 
Example #28
Source File: ExtractSequences.java    From picard with MIT License 5 votes vote down vote up
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INTERVAL_LIST);
    IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE);
    IOUtil.assertFileIsWritable(OUTPUT);

    final IntervalList intervals = IntervalList.fromFile(INTERVAL_LIST);
    final ReferenceSequenceFile ref = ReferenceSequenceFileFactory.getReferenceSequenceFile(REFERENCE_SEQUENCE);
    SequenceUtil.assertSequenceDictionariesEqual(intervals.getHeader().getSequenceDictionary(), ref.getSequenceDictionary());

    final BufferedWriter out = IOUtil.openFileForBufferedWriting(OUTPUT);

    for (final Interval interval : intervals) {
        final ReferenceSequence seq = ref.getSubsequenceAt(interval.getContig(), interval.getStart(), interval.getEnd());
        final byte[] bases = seq.getBases();
        if (interval.isNegativeStrand()) SequenceUtil.reverseComplement(bases);

        try {
            out.write(">");
            out.write(interval.getName());
            out.write("\n");

            for (int i=0; i<bases.length; ++i) {
                if (i > 0 && i % LINE_LENGTH == 0) out.write("\n");
                out.write(bases[i]);
            }

            out.write("\n");
        }
        catch (IOException ioe) {
            throw new PicardException("Error writing to file " + OUTPUT.getAbsolutePath(), ioe);

        }
    }

    CloserUtil.close(out);

    return 0;
}
 
Example #29
Source File: ReferenceFileSparkSource.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
public Map<String, ReferenceBases> getAllReferenceBases() throws IOException {
    try ( ReferenceSequenceFile referenceSequenceFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(getReferencePath()) ) {
        Map<String, ReferenceBases> bases = new LinkedHashMap<>();
        ReferenceSequence seq;
        while ( (seq = referenceSequenceFile.nextSequence()) != null ) {
            String name = seq.getName();
            bases.put(name, new ReferenceBases(seq.getBases(), new SimpleInterval(name, 1, seq.length())));
        }
        return bases;
    }
}
 
Example #30
Source File: BAQ.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
public BAQCalculationResult calcBAQFromHMM(GATKRead read, ReferenceDataSource refDS) {
    final SimpleInterval referenceWindow = getReferenceWindowForRead(read, getBandWidth());

    if ( referenceWindow.getEnd() > refDS.getSequenceDictionary().getSequence(read.getContig()).getSequenceLength() ) {
        return null;
    } else {
        // now that we have the start and stop, get the reference sequence covering it
        final ReferenceSequence refSeq = refDS.queryAndPrefetch(referenceWindow.getContig(), referenceWindow.getStart(), referenceWindow.getEnd());
        return calcBAQFromHMM(read, refSeq.getBases(), (referenceWindow.getStart() - read.getStart()));
    }
}