htsjdk.samtools.util.Interval Java Examples
The following examples show how to use
htsjdk.samtools.util.Interval.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: BaitDesigner.java From picard with MIT License | 6 votes |
@Override List<Bait> design(final BaitDesigner designer, final Interval target, final ReferenceSequence reference) { final List<Bait> baits = new LinkedList<Bait>(); final int baitSize = designer.BAIT_SIZE; final int baitOffset = designer.BAIT_OFFSET; final int lastPossibleBaitStart = Math.min(target.getEnd(), reference.length() - baitSize); final int baitCount = 1 + (int) Math.floor((lastPossibleBaitStart - target.getStart()) / (double) baitOffset); int i = 0; for (int start = target.getStart(); start < lastPossibleBaitStart; start += baitOffset) { final Bait bait = new Bait(target.getContig(), start, CoordMath.getEnd(start, baitSize), target.isNegativeStrand(), designer.makeBaitName(target.getName(), ++i, baitCount)); bait.addBases(reference, designer.DESIGN_ON_TARGET_STRAND); baits.add(bait); } return baits; }
Example #2
Source File: SNPUMIBasePileupTest.java From Drop-seq with MIT License | 6 votes |
@Test(enabled=true) public void testAddBaseQuals () { int snpPos=76227022; Interval snpInterval = new Interval("HUMAN_1", snpPos, snpPos, true, "test"); SNPUMIBasePileup p = new SNPUMIBasePileup(snpInterval, "ACADM", "fake_cell", "AAAAA"); char [] bases = {'A', 'A'}; byte [] quals = {27,55}; byte [] bases2 = new byte [bases.length]; StringUtil.charsToBytes(bases, 0, bases.length, bases2, 0); boolean passes=true; try { p.setBasesAndQualities(bases2, quals); } catch (IllegalArgumentException e) { passes=false; } Assert.assertTrue(passes); }
Example #3
Source File: IntervalUtilsUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Test(expectedExceptions={UserException.MalformedFile.class, UserException.MalformedGenomeLoc.class}, dataProvider="invalidIntervalTestData") public void testLoadIntervalsInvalidPicardIntervalHandling(GenomeLocParser genomeLocParser, String contig, int intervalStart, int intervalEnd ) throws Exception { SAMFileHeader picardFileHeader = new SAMFileHeader(); picardFileHeader.addSequence(genomeLocParser.getContigInfo("1")); // Intentionally add an extra contig not in our genomeLocParser's sequence dictionary // so that we can add intervals to the Picard interval file for contigs not in our dictionary picardFileHeader.addSequence(new SAMSequenceRecord("2", 100000)); IntervalList picardIntervals = new IntervalList(picardFileHeader); picardIntervals.add(new Interval(contig, intervalStart, intervalEnd, true, "dummyname")); File picardIntervalFile = createTempFile("testLoadIntervalsInvalidPicardIntervalHandling", ".intervals"); picardIntervals.write(picardIntervalFile); List<String> intervalArgs = new ArrayList<>(1); intervalArgs.add(picardIntervalFile.getAbsolutePath()); // loadIntervals() will validate all intervals against the sequence dictionary in our genomeLocParser, // and should throw for all intervals in our invalidIntervalTestData set IntervalUtils.loadIntervals(intervalArgs, IntervalSetRule.UNION, IntervalMergingRule.ALL, 0, genomeLocParser); }
Example #4
Source File: SNPUMICellReadIteratorWrapper.java From Drop-seq with MIT License | 6 votes |
/** * Filters and copies reads for generating SNPCellUMIBasePileups. * Filters out reads in which the read cell barcode does not match a barcode in the list (if list is not null) * Reads that are marked as secondary or supplementary are filtered out * Filters reads based on read map quality, removing reads below that quality * Optionally filters reads where the annotated gene and the strand of the read don't match, or can clone a read and return it multiple times * if the read maps to more than one gene and <assignReadsToAllGenes> is true. * @param underlyingIterator Source of reads * @param cellBarcodeTag The cell barcode BAM tag * @param cellBarcodeList A list of cell barcodes, or null to ignore. If populated, reads where the cellBarcodeTag matches one of these Strings will be retained * @param geneTag The gene/exon tag. * @param strandTag The strand tag * @param readMQ The minimum map quality of a read to be retained. */ public SNPUMICellReadIteratorWrapper(final Iterator<SAMRecord> underlyingIterator, final IntervalList snpIntervals, final String cellBarcodeTag, final Collection<String> cellBarcodeList, final String geneTag, final String snpTag, final int readMQ) { super(underlyingIterator); this.cellBarcodeTag = cellBarcodeTag; this.cellBarcodeList = new HashSet<String>(cellBarcodeList); this.geneTag=geneTag; this.snpTag=snpTag; // construct OverlapDetector OverlapDetector<Interval> od = new OverlapDetector<>(0, 0); od.addAll(snpIntervals.getIntervals(), snpIntervals.getIntervals()); this.snpIntervals=od; }
Example #5
Source File: SNPUMIBasePileupTest.java From Drop-seq with MIT License | 6 votes |
@Test(enabled=true) public void testAllReadsPileup() { SamReader reader = SamReaderFactory.makeDefault().open(smallBAMFile); Iterator<SAMRecord> iter = reader.iterator(); int snpPos=76227022; Interval snpInterval = new Interval("HUMAN_1", snpPos, snpPos, true, "test"); SNPUMIBasePileup p = new SNPUMIBasePileup(snpInterval, "ACADM", "fake_cell", "AAAAA"); while (iter.hasNext()) p.addRead(iter.next()); List<Character> bases= p.getBasesAsCharacters(); ObjectCounter<Character> baseCounter = new ObjectCounter<>(); for (Character c: bases) baseCounter.increment(c); Assert.assertEquals(6, baseCounter.getCountForKey('A')); Assert.assertEquals(7, baseCounter.getCountForKey('G')); Assert.assertNotNull(p.toString()); }
Example #6
Source File: GenomicsDBImport.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
/** * Before traversal, fix configuration parameters and initialize * GenomicsDB. Hard-coded to handle only VCF files and headers */ @Override public void onTraversalStart() { if (getIntervalsFromExistingWorkspace) { final IntervalList outputList = new IntervalList(getBestAvailableSequenceDictionary()); intervals.forEach(i -> outputList.add(new Interval(i.getContig(), i.getStart(), i.getEnd()))); final Path intervalListOutputPath = IOUtils.getPath(intervalListOutputPathString); outputList.write(intervalListOutputPath.toFile()); return; } String workspaceDir = BucketUtils.makeFilePathAbsolute(overwriteCreateOrCheckWorkspace()); vidMapJSONFile = IOUtils.appendPathToDir(workspaceDir, GenomicsDBConstants.DEFAULT_VIDMAP_FILE_NAME); callsetMapJSONFile = IOUtils.appendPathToDir(workspaceDir, GenomicsDBConstants.DEFAULT_CALLSETMAP_FILE_NAME); vcfHeaderFile = IOUtils.appendPathToDir(workspaceDir, GenomicsDBConstants.DEFAULT_VCFHEADER_FILE_NAME); if (doIncrementalImport) { logger.info("Callset Map JSON file will be re-written to " + callsetMapJSONFile); logger.info("Incrementally importing to workspace - " + workspaceDir); } else { logger.info("Vid Map JSON file will be written to " + vidMapJSONFile); logger.info("Callset Map JSON file will be written to " + callsetMapJSONFile); logger.info("Complete VCF Header will be written to " + vcfHeaderFile); logger.info("Importing to workspace - " + workspaceDir); } initializeInputPreloadExecutorService(); }
Example #7
Source File: CreateSnpIntervalFromVcfTest.java From Drop-seq with MIT License | 6 votes |
@Test public void testBasic() throws IOException { final CreateSnpIntervalFromVcf clp = new CreateSnpIntervalFromVcf(); clp.INPUT = TEST_FILE; clp.OUTPUT = File.createTempFile("CreateSnpIntervalFromVcfTest.", ".intervals"); clp.OUTPUT.deleteOnExit(); Assert.assertEquals(clp.doWork(), 0); final IntervalList intervals = IntervalList.fromFile(clp.OUTPUT); // 6 variants in input, but one is an indel and one is filtered Assert.assertEquals(intervals.size(), EXPECTED_START_POS.length); final Iterator<Interval> it = intervals.iterator(); for (final int startPos : EXPECTED_START_POS) { Assert.assertTrue(it.hasNext()); final Interval interval = it.next(); Assert.assertEquals(interval.getContig(), EXPECTED_CONTIG); Assert.assertEquals(interval.getStart(), startPos); Assert.assertEquals(interval.length(), 1); } Assert.assertFalse(it.hasNext()); }
Example #8
Source File: TargetMetricsCollector.java From picard with MIT License | 6 votes |
/** Emits a file with per base coverage if an output file has been set. */ private void emitPerBaseCoverageIfRequested() { if (this.perBaseOutput == null) return; final PrintWriter out = new PrintWriter(IOUtil.openFileForBufferedWriting(this.perBaseOutput)); out.println("chrom\tpos\ttarget\tcoverage"); for (final Map.Entry<Interval,Coverage> entry : this.highQualityCoverageByTarget.entrySet()) { final Interval interval = entry.getKey(); final String chrom = interval.getContig(); final int firstBase = interval.getStart(); final int[] cov = entry.getValue().getDepths(); for (int i = 0; i < cov.length; ++i) { out.print(chrom); out.print('\t'); out.print(firstBase + i); out.print('\t'); out.print(interval.getName()); out.print('\t'); out.print(cov[i]); out.println(); } } out.close(); }
Example #9
Source File: BAMInputFormat.java From Hadoop-BAM with MIT License | 6 votes |
/** * Converts an interval in SimpleInterval format into an htsjdk QueryInterval. * * In doing so, a header lookup is performed to convert from contig name to index * * @param interval interval to convert * @param sequenceDictionary sequence dictionary used to perform the conversion * @return an equivalent interval in QueryInterval format */ private static QueryInterval convertSimpleIntervalToQueryInterval( final Interval interval, final SAMSequenceDictionary sequenceDictionary ) { if (interval == null) { throw new IllegalArgumentException("interval may not be null"); } if (sequenceDictionary == null) { throw new IllegalArgumentException("sequence dictionary may not be null"); } final int contigIndex = sequenceDictionary.getSequenceIndex(interval.getContig()); if ( contigIndex == -1 ) { throw new IllegalArgumentException("Contig " + interval.getContig() + " not present in reads sequence " + "dictionary"); } return new QueryInterval(contigIndex, interval.getStart(), interval.getEnd()); }
Example #10
Source File: MultiCellDigitalAlleleCountsIterator.java From Drop-seq with MIT License | 6 votes |
@Override public MultiCellDigitalAlleleCounts next() { if (!iter.hasNext()) return (null); // get the pileup object and make the initial DAC with the first pileup. DigitalAlleleCounts dac = iter.next(); MultiCellDigitalAlleleCounts multiDAC = new MultiCellDigitalAlleleCounts(dac.getSnpInterval(), dac.getGene()); String currentGene = dac.getGene(); Interval currentSNP = dac.getSnpInterval(); multiDAC.add(dac); // get SNPUMIBasePileup objects until the gene, cell, or snpInterval changes. while (this.iter.hasNext()) { dac=iter.peek(); // check the next object to see if it should be added. if (dac.getGene().equals(currentGene) && dac.getSnpInterval().equals(currentSNP)) { dac=iter.next(); // convert from the peeked object to the real object to advance the iterator. multiDAC.add(dac); } else { break; // the next peeked object was a a pileup for a different DAC, quit the loop. } } return (multiDAC); }
Example #11
Source File: IntervalTagComparator.java From Drop-seq with MIT License | 6 votes |
public static int compare (final Interval i1, final Interval i2, final SAMSequenceDictionary dict) { int result = 0; // if there's a sequence dictionary, compare on the index of the sequence instead of the contig name. if (dict!=null) { int seqIdx1 = dict.getSequenceIndex(i1.getContig()); int seqIdx2 = dict.getSequenceIndex(i2.getContig()); result = seqIdx1 - seqIdx2; } else result = i1.getContig().compareTo(i2.getContig()); // same as Interval.compareTo if (result == 0) { if (i1.getStart() == i2.getStart()) result = i1.getEnd() - i2.getEnd(); else result = i1.getStart() - i2.getStart(); // added bonus, sort on interval names to tie break, if both intervals have names. if (result==0) { String n1 = i1.getName(); String n2 = i2.getName(); if (n1!=null && n2!=null) result = n1.compareTo(n2); } } return result; }
Example #12
Source File: FilterIntervals.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Override public Object doWork() { validateArguments(); resolveAndValidateIntervals(); final SimpleIntervalCollection filteredIntervals = filterIntervals(); logger.info(String.format("Writing filtered intervals to %s...", outputFilteredIntervalsFile.getAbsolutePath())); final IntervalList filteredIntervalList = new IntervalList(filteredIntervals.getMetadata().getSequenceDictionary()); filteredIntervals.getIntervals().forEach(i -> filteredIntervalList.add(new Interval(i))); filteredIntervalList.write(outputFilteredIntervalsFile); logger.info(String.format("%s complete.", getClass().getSimpleName())); return null; }
Example #13
Source File: IntervalListToBed.java From picard with MIT License | 6 votes |
@Override protected int doWork() { IOUtil.assertFileIsReadable(INPUT); IOUtil.assertFileIsWritable(OUTPUT); IntervalList intervals = IntervalList.fromFile(INPUT); if (SORT) intervals = intervals.sorted(); try { final BufferedWriter out = IOUtil.openFileForBufferedWriting(OUTPUT); for (final Interval i : intervals) { final String strand = i.isNegativeStrand() ? "-" : "+"; final List<?> fields = CollectionUtil.makeList(i.getContig(), i.getStart()-1, i.getEnd(), i.getName(), SCORE, strand); out.append(fields.stream().map(String::valueOf).collect(Collectors.joining("\t"))); out.newLine(); } out.close(); } catch (IOException ioe) { throw new RuntimeIOException(ioe); } return 0; }
Example #14
Source File: TestBAMInputFormat.java From Hadoop-BAM with MIT License | 6 votes |
@Test public void testIntervalsAndUnmapped() throws Exception { input = BAMTestUtil.writeBamFile(1000, SAMFileHeader.SortOrder.coordinate) .getAbsolutePath(); List<Interval> intervals = new ArrayList<Interval>(); intervals.add(new Interval("chr21", 5000, 9999)); // includes two unpaired fragments intervals.add(new Interval("chr21", 20000, 22999)); completeSetupWithBoundedTraversal(intervals, true); jobContext.getConfiguration().setInt(FileInputFormat.SPLIT_MAXSIZE, 40000); BAMInputFormat inputFormat = new BAMInputFormat(); List<InputSplit> splits = inputFormat.getSplits(jobContext); assertEquals(2, splits.size()); List<SAMRecord> split0Records = getSAMRecordsFromSplit(inputFormat, splits.get(0)); List<SAMRecord> split1Records = getSAMRecordsFromSplit(inputFormat, splits.get(1)); assertEquals(16, split0Records.size()); assertEquals(2, split1Records.size()); }
Example #15
Source File: TestVCFInputFormat.java From Hadoop-BAM with MIT License | 6 votes |
@Parameterized.Parameters public static Collection<Object> data() { return Arrays.asList(new Object[][] { {"test.vcf", NUM_SPLITS.ANY, null}, {"test.vcf.gz", NUM_SPLITS.EXACTLY_ONE, null}, {"test.vcf.bgzf.gz", NUM_SPLITS.ANY, null}, // BCF tests currently fail due to https://github.com/samtools/htsjdk/issues/507 // {"test.uncompressed.bcf", NUM_SPLITS.ANY, null}, // {"test.bgzf.bcf", NUM_SPLITS.ANY, null}, {"HiSeq.10000.vcf", NUM_SPLITS.MORE_THAN_ONE, null}, {"HiSeq.10000.vcf.gz", NUM_SPLITS.EXACTLY_ONE, null}, {"HiSeq.10000.vcf.bgzf.gz", NUM_SPLITS.MORE_THAN_ONE, null}, {"HiSeq.10000.vcf.bgzf.gz", NUM_SPLITS.EXACTLY_ONE, new Interval("chr1", 2700000, 2800000)}, // chosen to fall in one split {"HiSeq.10000.vcf.bgz", NUM_SPLITS.MORE_THAN_ONE, null}, {"HiSeq.10000.vcf.bgz", NUM_SPLITS.EXACTLY_ONE, new Interval("chr1", 2700000, 2800000)} // chosen to fall in one split }); }
Example #16
Source File: SNPUMIBasePileupTest.java From Drop-seq with MIT License | 5 votes |
@Test(enabled=true) public void singleReadTest2() { int snpPos=76227022; Interval snpInterval = new Interval("HUMAN_1", snpPos, snpPos, true, "test"); SNPUMIBasePileup p = new SNPUMIBasePileup(snpInterval, "ACADM", "fake_cell", "AAAAA"); SAMRecord r1 = getReadByName("NS500217:67:H14GMBGXX:1:13306:23964:12839", this.smallBAMFile); p.addRead(r1); List<Character> bases = p.getBasesAsCharacters(); Assert.assertTrue('G'==bases.get(0)); Assert.assertTrue(32==p.getQualities().get(0)); }
Example #17
Source File: ByIntervalListVariantContextIterator.java From picard with MIT License | 5 votes |
/** If the current iterator is null or exhausted, move to the next interval. */ private void advance() { while ((currentIterator == null || !currentIterator.hasNext()) && this.intervals.hasNext()) { if (currentIterator != null) currentCloseableIterator.close(); final Interval interval = this.intervals.next(); final Interval previousInterval = this.lastInterval; this.currentCloseableIterator = this.reader.query(interval.getContig(), interval.getStart(), interval.getEnd()); this.currentIterator = this.currentCloseableIterator.stream().filter ( ctx -> null == previousInterval || !overlapsInterval(ctx, previousInterval) ).iterator(); this.lastInterval = interval; } }
Example #18
Source File: VcfFileSegmentGeneratorTest.java From picard with MIT License | 5 votes |
@Test public void ensureOverlapExclusionTest() { final OverlapDetector<Interval> oneTinyIntervalDetector = new OverlapDetector<Interval>(0, 0); final Interval theInterval = new Interval("1", 5, 10); oneTinyIntervalDetector.addLhs(theInterval, theInterval); final VcfFileSegmentGenerator noFilter = VcfFileSegmentGenerator.byWholeContigSubdividingWithWidth(TEN_MILLION); Assert.assertEquals(Iterables.size(noFilter.forVcf(VCF_WITH_LOGS_OF_GAPS)), 382); // The number of subdivisions of 10 million of this vcf final VcfFileSegmentGenerator allFiltered = VcfFileSegmentGenerator.excludingNonOverlaps(noFilter, oneTinyIntervalDetector); Assert.assertEquals(Iterables.size(allFiltered.forVcf(VCF_WITH_LOGS_OF_GAPS)), 1); }
Example #19
Source File: LiftoverUtils.java From picard with MIT License | 5 votes |
protected static VariantContextBuilder liftSimpleVariantContext(VariantContext source, Interval target) { if (target == null || source.getReference().length() != target.length()) { return null; } // Build the new variant context. Note: this will copy genotypes, annotations and filters final VariantContextBuilder builder = new VariantContextBuilder(source); builder.chr(target.getContig()); builder.start(target.getStart()); builder.stop(target.getEnd()); return builder; }
Example #20
Source File: MendelianViolationDetector.java From picard with MIT License | 5 votes |
/** * Tests whether the variant is within one of the pseudo-autosomal regions */ private boolean isInPseudoAutosomalRegion(final String chr, final int pos) { for (final Interval par : parIntervals) { if (par.getContig().equals(chr) && pos >= par.getStart() && pos <= par.getEnd()) return true; } return false; }
Example #21
Source File: MendelianViolationDetector.java From picard with MIT License | 5 votes |
MendelianViolationDetector(final Set<String> skip_chroms, final Set<String> male_chroms, final Set<String> female_chroms, final double min_het_fraction, final int min_gq, final int min_dp, final List<MendelianViolationMetrics> trios, final List<Interval> parIntervals, final ProgressLogger logger) { SKIP_CHROMS = skip_chroms; MALE_CHROMS = male_chroms; FEMALE_CHROMS = female_chroms; MIN_HET_FRACTION = min_het_fraction; MIN_GQ = min_gq; MIN_DP = min_dp; this.trios = trios; this.parIntervals = parIntervals; this.logger = logger; familyToViolations = new MendelianViolationsByFamily(); }
Example #22
Source File: FindMendelianViolations.java From picard with MIT License | 5 votes |
private Set<Interval> parseIntervalLists(final Set<String> intervalLists){ // Parse the PARs final Set<Interval> intervals = new HashSet<>(intervalLists.size()); for (final String par : intervalLists) { final String[] splits1 = par.split(":"); final String[] splits2 = splits1[1].split("-"); intervals.add(new Interval(splits1[0], Integer.parseInt(splits2[0]), Integer.parseInt(splits2[1]))); } return intervals; }
Example #23
Source File: BaitDesigner.java From picard with MIT License | 5 votes |
/** Writes a Bait out in fasta format to an output BufferedWriter. */ private void writeBaitFasta(final BufferedWriter out, final Interval i, final boolean rc) { try { final Bait bait = (Bait) i; out.append(">"); out.append(bait.getName()); out.newLine(); final String sequence = getBaitSequence(bait, rc); out.append(sequence); out.newLine(); } catch (IOException ioe) { throw new PicardException("Error writing out bait information.", ioe); } }
Example #24
Source File: ExtractSequences.java From picard with MIT License | 5 votes |
@Override protected int doWork() { IOUtil.assertFileIsReadable(INTERVAL_LIST); IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE); IOUtil.assertFileIsWritable(OUTPUT); final IntervalList intervals = IntervalList.fromFile(INTERVAL_LIST); final ReferenceSequenceFile ref = ReferenceSequenceFileFactory.getReferenceSequenceFile(REFERENCE_SEQUENCE); SequenceUtil.assertSequenceDictionariesEqual(intervals.getHeader().getSequenceDictionary(), ref.getSequenceDictionary()); final BufferedWriter out = IOUtil.openFileForBufferedWriting(OUTPUT); for (final Interval interval : intervals) { final ReferenceSequence seq = ref.getSubsequenceAt(interval.getContig(), interval.getStart(), interval.getEnd()); final byte[] bases = seq.getBases(); if (interval.isNegativeStrand()) SequenceUtil.reverseComplement(bases); try { out.write(">"); out.write(interval.getName()); out.write("\n"); for (int i=0; i<bases.length; ++i) { if (i > 0 && i % LINE_LENGTH == 0) out.write("\n"); out.write(bases[i]); } out.write("\n"); } catch (IOException ioe) { throw new PicardException("Error writing to file " + OUTPUT.getAbsolutePath(), ioe); } } CloserUtil.close(out); return 0; }
Example #25
Source File: CollectTargetedMetricsTest.java From picard with MIT License | 5 votes |
@Test public void testCoverageGetTotalOverflow() { final Interval interval = new Interval("chr1", 1, 2); final TargetMetricsCollector.Coverage coverage = new TargetMetricsCollector.Coverage(interval, 0); for (int offset = 0; offset <= interval.length(); offset++) { coverage.addBase(offset, Integer.MAX_VALUE - 1); } Assert.assertEquals((long)coverage.getTotal(), 2 * (long)(Integer.MAX_VALUE - 1)); }
Example #26
Source File: SNPUMIBasePileupTest.java From Drop-seq with MIT License | 5 votes |
@Test(enabled=true) public void singleReadTest1() { int snpPos=76227022; Interval snpInterval = new Interval("HUMAN_1", snpPos, snpPos, true, "test"); SNPUMIBasePileup p = new SNPUMIBasePileup(snpInterval, "ACADM", "fake_cell", "AAAAA"); SAMRecord r1 = getReadByName("NS500217:67:H14GMBGXX:4:13611:8735:3829", this.smallBAMFile); p.addRead(r1); List<Character> bases = p.getBasesAsCharacters(); Assert.assertTrue('A'==bases.get(0)); Assert.assertTrue(8==p.getQualities().get(0)); }
Example #27
Source File: ByIntervalListVariantContextIteratorTest.java From picard with MIT License | 5 votes |
@Test public void testNoOverlapDifferentContig() { final IntervalList intervalList = new IntervalList(header); intervalList.add(new Interval("3", 167166899, 167166899)); final VCFFileReader reader = getReader(CEU_TRIOS_SNPS_VCF); final Iterator<VariantContext> iterator = new ByIntervalListVariantContextIterator(reader, intervalList); Assert.assertFalse(iterator.hasNext()); reader.close(); }
Example #28
Source File: SplitIntervals.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Override public void onTraversalStart() { ParamUtils.isPositive(scatterCount, "scatter-count must be > 0."); if (!outputDir.exists() && !outputDir.mkdir()) { throw new RuntimeIOException("Unable to create directory: " + outputDir.getAbsolutePath()); } // in general dictionary will be from the reference, but using -I reads.bam or -F variants.vcf // to use the sequence dict from a bam or vcf is also supported final SAMSequenceDictionary sequenceDictionary = getBestAvailableSequenceDictionary(); final List<SimpleInterval> intervals = hasUserSuppliedIntervals() ? intervalArgumentCollection.getIntervals(sequenceDictionary) : IntervalUtils.getAllIntervalsForReference(sequenceDictionary).stream() .filter(contig -> contig.getLengthOnReference() >= minContigSize) .collect(Collectors.toList()); final IntervalList intervalList = new IntervalList(sequenceDictionary); intervals.stream().map(si -> new Interval(si.getContig(), si.getStart(), si.getEnd())).forEach(intervalList::add); final IntervalListScatterer scatterer = subdivisionMode.make(); final List<IntervalList> scattered = scatterer.scatter(intervalList, scatterCount); // optionally split interval lists that contain intervals from multiple contigs final List<IntervalList> scatteredFinal = !dontMixContigs ? scattered : scattered.stream().flatMap(il -> il.getIntervals().stream() .collect(Collectors.groupingBy(Interval::getContig)).entrySet().stream() // group each interval list into sublists .sorted(Comparator.comparingInt(entry -> sequenceDictionary.getSequenceIndex(entry.getKey()))) // sort entries by contig .map(entry -> entry.getValue()) // discard the keys and just keep the lists of intervals .map(list -> { final IntervalList singleContigList = new IntervalList(sequenceDictionary); singleContigList.addall(list); return singleContigList; }) // turn the intervals back into an IntervalList ).collect(Collectors.toList()); final int maxNumberOfPlaces = Math.max((int)Math.floor(Math.log10(scatterCount-1))+1, DEFAULT_NUMBER_OF_DIGITS); final String formatString = "%0" + maxNumberOfPlaces + "d"; IntStream.range(0, scatteredFinal.size()).forEach(n -> scatteredFinal.get(n).write(new File(outputDir, String.format(formatString, n) + extension))); }
Example #29
Source File: ByIntervalListVariantContextIteratorTest.java From picard with MIT License | 5 votes |
@Test public void testVariantOverlappingMultipleIntervalsIsReturnedOnlyOnce() { final IntervalList intervalList = new IntervalList(header); intervalList.add(new Interval("12", 68921962, 68921962)); // deletion spans this intervalList.add(new Interval("12", 68921964, 68921964)); // deletion spans this final VCFFileReader reader = getReader(CEU_TRIOS_INDELS_VCF); final Iterator<VariantContext> iterator = new ByIntervalListVariantContextIterator(reader, intervalList); Assert.assertTrue(iterator.hasNext()); final VariantContext ctx = iterator.next(); Assert.assertEquals(ctx.getStart(), 68921960); Assert.assertEquals(ctx.getEnd(), 68921966); Assert.assertFalse(iterator.hasNext()); reader.close(); }
Example #30
Source File: GQuadruplexTest.java From Drop-seq with MIT License | 5 votes |
@Test public void f() { // handy examples http://bsbe.iiti.ac.in/bsbe/ipdb/g4dna.php // I trimmed a set of starting T's off the 5' end. String seq1 = "GGGGACTTTCCGGGAGGCGTGGGGGTTTTTGGGGG"; // not a G-quadraplex. Random sequence from the start of chromosome 1, hg19. String seq4 = "GGACGCATTTAAAGCAGTGTGTAAAGAGACATTTATAGCACTAAATGCCCACAAGAGACCTCTGCCTGAGAACGTGGGTTTCAGCCTAAGAGTTGTAATA"; List<GQuadruplex> r1 = GQuadruplex.find("valid1", seq1); List<GQuadruplex> r4 = GQuadruplex.find("invalid1", seq4); Assert.assertNotNull(r1); Assert.assertTrue(r1.size()==1); GQuadruplex t1= r1.get(0); // only 1 is found. Assert.assertEquals(35, t1.getMatchInterval().length()); Assert.assertEquals(t1.getMatchInterval(), new Interval ("valid1", 1,35)); //G1: GGGG //L1: ACTTTCC //G2: GGG //L2: AGGCGTG //G3: GGGG //L3: TTTTTGG //G4: GGG Assert.assertEquals(t1.getG1(), "GGGG"); Assert.assertEquals(t1.getG2(), "GGG"); Assert.assertEquals(t1.getG3(), "GGGG"); Assert.assertEquals(t1.getG4(), "GGG"); Assert.assertEquals(t1.getL1(), "ACTTTCC"); Assert.assertEquals(t1.getL2(), "AGGCGTG"); Assert.assertEquals(t1.getL3(), "TTTTTGG"); t1.toString(); }