Java Code Examples for htsjdk.samtools.SAMSequenceDictionary#addSequence()
The following examples show how to use
htsjdk.samtools.SAMSequenceDictionary#addSequence() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: BGZF_ReferenceSequenceFile.java From cramtools with Apache License 2.0 | 6 votes |
public BGZF_ReferenceSequenceFile(File file) throws FileNotFoundException { if (!file.canRead()) throw new RuntimeException("Cannot find or read fasta file: " + file.getAbsolutePath()); File indexFile = new File(file.getAbsolutePath() + ".fai"); if (!indexFile.canRead()) throw new RuntimeException("Cannot find or read fasta index file: " + indexFile.getAbsolutePath()); Scanner scanner = new Scanner(indexFile); int seqID = 0; dictionary = new SAMSequenceDictionary(); while (scanner.hasNextLine()) { String line = scanner.nextLine(); FAIDX_FastaIndexEntry entry = FAIDX_FastaIndexEntry.fromString(seqID++, line); index.put(entry.getName(), entry); dictionary.addSequence(new SAMSequenceRecord(entry.getName(), entry.getLen())); } scanner.close(); if (index.isEmpty()) log.warn("No entries in the index: " + indexFile.getAbsolutePath()); is = new BlockCompressedInputStream(new SeekableFileStream(file)); }
Example 2
Source File: SamRegionRestrictionTest.java From rtg-tools with BSD 2-Clause "Simplified" License | 6 votes |
public void testSomeMethod() { SamRegionRestriction srr = new SamRegionRestriction("fooo"); final SAMSequenceDictionary sdd = new SAMSequenceDictionary(); sdd.addSequence(new SAMSequenceRecord("fooo", 9876)); assertEquals(0, srr.resolveStart()); assertEquals(9876, srr.resolveEnd(sdd)); assertEquals("fooo", srr.getSequenceName()); assertEquals(RegionRestriction.MISSING, srr.getStart()); assertEquals(RegionRestriction.MISSING, srr.getEnd()); assertEquals("fooo", srr.toString()); srr = new SamRegionRestriction("fooo", 75, 555); assertEquals(75, srr.resolveStart()); assertEquals(555, srr.resolveEnd(sdd)); assertEquals("fooo", srr.getSequenceName()); assertEquals(75, srr.getStart()); assertEquals(555, srr.getEnd()); assertEquals("fooo:76-555", srr.toString()); srr = new SamRegionRestriction("fooo", 75, RegionRestriction.MISSING); assertEquals("fooo:76", srr.toString()); }
Example 3
Source File: HaplotypeMapTest.java From picard with MIT License | 6 votes |
@DataProvider(name="haplotypeMapForWriting") public Object[][] haplotypeMapForWriting() { SAMFileHeader header = new SAMFileHeader(); header.setSortOrder(SAMFileHeader.SortOrder.coordinate); SAMSequenceDictionary sd = new SAMSequenceDictionary(); sd.addSequence(new SAMSequenceRecord("chr1", 101)); sd.addSequence(new SAMSequenceRecord("chr2", 101)); sd.addSequence(new SAMSequenceRecord("chr3", 101)); header.setSequenceDictionary(sd); HaplotypeMap newMap = new HaplotypeMap(header); HaplotypeBlock t1 = new HaplotypeBlock(0.151560926); t1.addSnp(new Snp("snp2", "chr1", 83, (byte)'A', (byte)'G', .16, null)); t1.addSnp(new Snp("snp1", "chr1", 51, (byte)'T', (byte)'C', 0.15, Collections.singletonList("SQNM_1CHIP_FingerprintAssays"))); newMap.addHaplotype(t1); HaplotypeBlock t2 = new HaplotypeBlock(.02d); t2.addSnp(new Snp("snp3", "chr2", 24, (byte)'C', (byte)'A', .20, null)); newMap.addHaplotype(t2); return new Object[][]{{newMap}}; }
Example 4
Source File: PSFilterTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Test(groups = "spark") public void testDoSetPairFlags() { final JavaSparkContext ctx = SparkContextFactory.getTestSparkContext(); final SAMSequenceDictionary seq = new SAMSequenceDictionary(); seq.addSequence(new SAMSequenceRecord("test_seq", 1000)); final SAMFileHeader header = new SAMFileHeader(seq); final List<GATKRead> readList = makeReadSet(header); final JavaRDD<GATKRead> reads = ctx.parallelize(readList); ; final List<GATKRead> result = PSFilter.setPairFlags(reads, 100).collect(); Assert.assertEquals(result.size(), 6); for (final GATKRead read : result) { if (read.getName().equals("paired_1") || read.getName().equals("paired_2")) { Assert.assertTrue(read.isPaired()); } else { Assert.assertFalse(read.isPaired()); } } }
Example 5
Source File: IntervalUtilsUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Test public void testCompareLocatables() throws Exception { final SAMSequenceDictionary dict = new SAMSequenceDictionary(); dict.addSequence(new SAMSequenceRecord("1", 1000)); dict.addSequence(new SAMSequenceRecord("2", 1000)); final SimpleInterval chr1_1_100 = new SimpleInterval("1:1-100"); final SimpleInterval chr1_5_100 = new SimpleInterval("1:5-100"); final SimpleInterval chr2_1_100 = new SimpleInterval("2:1-100"); // equal intervals comparison return 0 Assert.assertEquals(IntervalUtils.compareLocatables(chr1_1_100, chr1_1_100, dict), 0); Assert.assertEquals(IntervalUtils.compareLocatables(chr1_5_100, chr1_5_100, dict), 0); Assert.assertEquals(IntervalUtils.compareLocatables(chr2_1_100, chr2_1_100, dict), 0); // first < second return negative Assert.assertTrue(IntervalUtils.compareLocatables(chr1_1_100, chr1_5_100, dict) < 0); Assert.assertTrue(IntervalUtils.compareLocatables(chr1_1_100, chr2_1_100, dict) < 0); Assert.assertTrue(IntervalUtils.compareLocatables(chr1_5_100, chr2_1_100, dict) < 0); // first > second return positive Assert.assertTrue(IntervalUtils.compareLocatables(chr2_1_100, chr1_1_100, dict) > 0); Assert.assertTrue(IntervalUtils.compareLocatables(chr2_1_100, chr1_5_100, dict) > 0); Assert.assertTrue(IntervalUtils.compareLocatables(chr1_5_100, chr1_1_100, dict) > 0); }
Example 6
Source File: IntervalUtilsUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
@DataProvider public Object[][] testLocatableOrderingData() { final SAMSequenceDictionary dict = new SAMSequenceDictionary(); dict.addSequence(new SAMSequenceRecord("1", 1000)); dict.addSequence(new SAMSequenceRecord("2", 1000)); return new Object[][] { // first, second, dictionary, expectedIsBefore, expectedIsAfter, expectedContigComparison { new SimpleInterval("1", 1, 100), new SimpleInterval("1", 200, 300), dict, true, false, 0 }, { new SimpleInterval("1", 1, 100), new SimpleInterval("1", 101, 200), dict, true, false, 0 }, { new SimpleInterval("1", 1, 100), new SimpleInterval("1", 100, 200), dict, false, false, 0 }, { new SimpleInterval("1", 1, 100), new SimpleInterval("1", 99, 200), dict, false, false, 0 }, { new SimpleInterval("1", 200, 300), new SimpleInterval("1", 1, 100), dict, false, true, 0 }, { new SimpleInterval("1", 101, 200), new SimpleInterval("1", 1, 100), dict, false, true, 0 }, { new SimpleInterval("1", 100, 200), new SimpleInterval("1", 1, 100), dict, false, false, 0 }, { new SimpleInterval("1", 99, 200), new SimpleInterval("1", 1, 100), dict, false, false, 0 }, { new SimpleInterval("1", 1, 100), new SimpleInterval("2", 1, 100), dict, true, false, -1 }, { new SimpleInterval("1", 1, 100), new SimpleInterval("2", 101, 200), dict, true, false, -1 }, { new SimpleInterval("1", 101, 200), new SimpleInterval("2", 1, 100), dict, true, false, -1 }, { new SimpleInterval("2", 1, 100), new SimpleInterval("1", 1, 100), dict, false, true, 1 }, { new SimpleInterval("2", 101, 200), new SimpleInterval("1", 1, 100), dict, false, true, 1 }, { new SimpleInterval("2", 1, 100), new SimpleInterval("1", 101, 200), dict, false, true, 1 } }; }
Example 7
Source File: GenomeLocParserUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Test public void testContigHasColon() { SAMFileHeader header = new SAMFileHeader(); header.setSortOrder(htsjdk.samtools.SAMFileHeader.SortOrder.coordinate); SAMSequenceDictionary dict = new SAMSequenceDictionary(); SAMSequenceRecord rec = new SAMSequenceRecord("c:h:r1", 10); rec.setSequenceLength(10); dict.addSequence(rec); header.setSequenceDictionary(dict); final GenomeLocParser myGenomeLocParser = new GenomeLocParser(header.getSequenceDictionary()); GenomeLoc loc = myGenomeLocParser.parseGenomeLoc("c:h:r1:4-5"); assertEquals(0, loc.getContigIndex()); assertEquals(loc.getStart(), 4); assertEquals(loc.getStop(), 5); }
Example 8
Source File: IntervalUtilsUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Test public void testMalformedAmbiguousQuerySucceeds() { final SAMSequenceDictionary sd = getAmbiguousSequenceDictionary(); final String queryString = "contig:abc-dce"; sd.addSequence(new SAMSequenceRecord("contig", 99)); sd.addSequence(new SAMSequenceRecord(queryString, 99)); // the query string matches as a full contig, but also has a prefix that matches the contig "contig". When // viewed as the latter, however, the start-end positions fail to parse as numbers. This resolves as a query // against the longer contig "contig:abc-dce", and logs a warning HashSet<SimpleInterval> expectedIntervals = new HashSet<>(); expectedIntervals.add(new SimpleInterval(queryString, 1, 99)); Assert.assertEquals( new HashSet<>(IntervalUtils.getResolvedIntervals(queryString, sd)), new HashSet<>(expectedIntervals)); }
Example 9
Source File: IntervalUtilsUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Test public void testTerminatingPlusSign() { // special case test for a query "prefix:+", where "prefix:" is a valid contig final SAMSequenceDictionary sd = getAmbiguousSequenceDictionary(); final String contigPrefix = "prefix:"; sd.addSequence(new SAMSequenceRecord(contigPrefix, 99)); Assert.assertEquals( new HashSet<>(IntervalUtils.getResolvedIntervals(contigPrefix + "+", sd)), new HashSet<>()); sd.addSequence(new SAMSequenceRecord(contigPrefix + "+", 99)); HashSet<SimpleInterval> expectedIntervals = new HashSet<>(); expectedIntervals.add(new SimpleInterval("prefix:+", 1, 99)); Assert.assertEquals( new HashSet<>(IntervalUtils.getResolvedIntervals(contigPrefix + "+", sd)), expectedIntervals); }
Example 10
Source File: IntervalUtilsUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Test(expectedExceptions = NumberFormatException.class) public void testMalformedQueryFail() { final SAMSequenceDictionary sd = getAmbiguousSequenceDictionary(); sd.addSequence(new SAMSequenceRecord("contig:1-10", 99)); sd.addSequence(new SAMSequenceRecord("contig", 99)); IntervalUtils.getResolvedIntervals("contig:abc-dce", sd); }
Example 11
Source File: SamBamUtils.java From chipster with MIT License | 5 votes |
public void normaliseBam(File bamFile, File normalisedBamFile) { // Read in a BAM file and its header SAMFileReader.setDefaultValidationStringency(ValidationStringency.SILENT); SAMFileReader reader = new SAMFileReader(IOUtil.openFileForReading(bamFile)); SAMFileWriter writer = null; try { SAMFileHeader normalisedHeader = reader.getFileHeader(); // Alter the chromosome names in header's SAMSequenceDictionary SAMSequenceDictionary normalisedDictionary = new SAMSequenceDictionary(); for (SAMSequenceRecord sequenceRecord : normalisedHeader.getSequenceDictionary().getSequences()) { // Normalise chromosome String sequenceName = chromosomeNormaliser.normaliseChromosome(sequenceRecord.getSequenceName()); normalisedDictionary.addSequence(new SAMSequenceRecord(sequenceName, sequenceRecord.getSequenceLength())); } normalisedHeader.setSequenceDictionary(normalisedDictionary); // Write new BAM file with normalised chromosome names writer = new SAMFileWriterFactory().makeBAMWriter(normalisedHeader, true, normalisedBamFile); for (final SAMRecord rec : reader) { rec.setHeader(normalisedHeader); writer.addAlignment(rec); } } finally { closeIfPossible(reader); closeIfPossible(writer); } }
Example 12
Source File: HalvadeConf.java From halvade with GNU General Public License v3.0 | 5 votes |
public static SAMSequenceDictionary getSequenceDictionary(Configuration conf) throws IOException { int counter = conf.getInt(dictionaryCount, 0); SAMSequenceDictionary dict = new SAMSequenceDictionary(); for(int i = 0; i < counter; i++) { String seqName = conf.get(dictionarySequenceName + i); int seqLength = conf.getInt(dictionarySequenceLength + i, 0); SAMSequenceRecord seq = new SAMSequenceRecord(seqName, seqLength); dict.addSequence(seq); } return dict; }
Example 13
Source File: IntervalOverlappingIteratorUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
@DataProvider(name="data") public Object[][] getData() { // the sequence dictionary final SAMSequenceDictionary dictionary = new SAMSequenceDictionary(); dictionary.addSequence(new SAMSequenceRecord("1", 1000000)); dictionary.addSequence(new SAMSequenceRecord("2", 1000000)); // the set of intervals final List<SimpleInterval> intervals_1 = Arrays.asList(new SimpleInterval("1:500-600"), new SimpleInterval("1:700-800")); final List<SimpleInterval> intervals_2 = Arrays.asList(new SimpleInterval("2:100-200"), new SimpleInterval("2:400-1000")); // some records final SimpleInterval record_1_1_100 = new SimpleInterval("1:1-100"); final SimpleInterval record_1_1_800 = new SimpleInterval("1:1-800"); final SimpleInterval record_1_500_600 = new SimpleInterval("1:500-600"); final SimpleInterval record_1_700_750 = new SimpleInterval("1:700-750"); final SimpleInterval record_2_100_150 = new SimpleInterval("2:100-150"); final SimpleInterval record_2_900_999 = new SimpleInterval("2:900-999"); // test cases return new Object[][] { // first record starts before the first interval, second record overlaps the first interval {intervals_1, dictionary, new SimpleInterval[]{record_1_1_100, record_1_500_600, record_2_900_999}, new SimpleInterval[]{record_1_500_600}}, // first record starts after the first interval, second interval overlaps the first record {intervals_1, dictionary, new SimpleInterval[]{record_1_700_750, record_2_900_999}, new SimpleInterval[]{record_1_700_750}}, // first interval is on a later contig than the first record, but overlaps later records {intervals_2, dictionary, new SimpleInterval[]{record_1_1_100, record_2_900_999}, new SimpleInterval[]{record_2_900_999}}, // first interval is on an earlier contig than the first record, but later records overlap later intervals {ListUtils.union(intervals_1, intervals_2), dictionary, new SimpleInterval[]{record_2_100_150, record_2_900_999}, new SimpleInterval[]{record_2_100_150, record_2_900_999}}, // no records overlap any intervals {intervals_1, dictionary, new SimpleInterval[]{record_2_900_999}, new SimpleInterval[0]}, // an interval overlaps multiple records {intervals_1, dictionary, new SimpleInterval[]{record_1_1_800, record_1_500_600, record_2_900_999}, new SimpleInterval[]{record_1_1_800, record_1_500_600}}, // a record overlaps multiple intervals {intervals_1, dictionary, new SimpleInterval[]{record_1_1_800, record_2_100_150}, new SimpleInterval[]{record_1_1_800}} }; }
Example 14
Source File: GenomeLocParserUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Test public void testParseUnknownSequenceLength() { SAMSequenceDictionary seqDict = new SAMSequenceDictionary(); seqDict.addSequence(new SAMSequenceRecord("1", SAMSequenceRecord.UNKNOWN_SEQUENCE_LENGTH)); Assert.assertEquals(seqDict.getSequence("1").getSequenceLength(), SAMSequenceRecord.UNKNOWN_SEQUENCE_LENGTH); GenomeLocParser myLocParser = new GenomeLocParser(seqDict); GenomeLoc genomeLoc = myLocParser.parseGenomeLoc("1:1-99"); Assert.assertEquals(genomeLoc.getEnd(), 99); }
Example 15
Source File: IntervalTagComparatorTest.java From Drop-seq with MIT License | 5 votes |
/** * Make this a little more like actual sequence data, where reads mapped to GL are practically non existent. * @param dict * @return */ private SAMSequenceDictionary filterSD (final SAMSequenceDictionary dict) { SAMSequenceDictionary result = new SAMSequenceDictionary(); for (SAMSequenceRecord r: dict.getSequences()) if (r.getSequenceLength()>10000000) result.addSequence(r); return result; }
Example 16
Source File: IndexUtilsUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Test public void testIsSequenceDictionaryFromIndexNegative() throws Exception { SAMSequenceDictionary dict = new SAMSequenceDictionary(); dict.addSequence(new SAMSequenceRecord("1", 99)); dict.addSequence(new SAMSequenceRecord("2", 99)); Assert.assertFalse(IndexUtils.isSequenceDictionaryFromIndex(dict)); }
Example 17
Source File: UmiUtilTest.java From picard with MIT License | 5 votes |
@Test(dataProvider = "topStrandDataProvider") public void testIsTopStrand(final int referenceIndex, final int alignmentStart, final int mateReferenceIndex, final int mateAlignmentStart, final boolean firstOfPairFlag, final boolean negativeStrandFlag, final boolean mateNegativeStrandFlag, final boolean mapped, final boolean mateMapped, final UmiUtil.ReadStrand strand) { final int readLength = 15; final int contigLength = 500; final SAMFileHeader header = new SAMFileHeader(); final SAMSequenceDictionary sequenceDictionary = new SAMSequenceDictionary(); sequenceDictionary.addSequence(new SAMSequenceRecord("chr1", contigLength)); sequenceDictionary.addSequence(new SAMSequenceRecord("chr2", contigLength)); header.setSequenceDictionary(sequenceDictionary); final SAMRecord rec = new SAMRecord(header); rec.setReadUnmappedFlag(!mapped); rec.setMateUnmappedFlag(!mateMapped); rec.setReadPairedFlag(true); rec.setCigarString(readLength + "M"); rec.setAttribute("MC", readLength + "M"); rec.setReferenceIndex(referenceIndex); rec.setAlignmentStart(alignmentStart); rec.setMateReferenceIndex(mateReferenceIndex); rec.setMateAlignmentStart(mateAlignmentStart); rec.setFirstOfPairFlag(firstOfPairFlag); rec.setReadNegativeStrandFlag(negativeStrandFlag); rec.setMateNegativeStrandFlag(mateNegativeStrandFlag); Assert.assertEquals(UmiUtil.getStrand(rec), strand); }
Example 18
Source File: SageHotspotAnnotationTest.java From hmftools with GNU General Public License v3.0 | 5 votes |
@Test public void testComparatorUsesAlleles() { final String line1 = "15\t12345678\trs1;UCSC\tC\tA,G\t2\tPASS\tinfo;\tGT:AD:DP\t0/1:60,60:121"; final String line2 = "15\t12345678\trs1;UCSC\tC\tA,GT\t2\tPASS\tinfo;\tGT:AD:DP\t0/1:60,60:121"; SAMSequenceDictionary samSequenceDictionary = new SAMSequenceDictionary(); samSequenceDictionary.addSequence(new SAMSequenceRecord("15", 123456789)); SageHotspotAnnotation.VCComparator comparator = new SageHotspotAnnotation.VCComparator(samSequenceDictionary); assertEquals(0, comparator.compare(codec.decode(line1), codec.decode(line1))); assertNotEquals(0, comparator.compare(codec.decode(line1), codec.decode(line2))); }
Example 19
Source File: GATKVariantContextUtilsUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 4 votes |
private SAMSequenceDictionary makeSimpleSequenceDictionary() { final SAMSequenceDictionary seqDictionary = new SAMSequenceDictionary(); seqDictionary.addSequence(new SAMSequenceRecord("chr1", 10)); return seqDictionary; }
Example 20
Source File: AlignmentsTagsTest.java From cramtools with Apache License 2.0 | 4 votes |
private void doTest(byte[] ref, int alignmentStart, byte[] readBases) throws IOException, CloneNotSupportedException { SAMSequenceRecord sequenceRecord = new SAMSequenceRecord("1", ref.length); SAMSequenceDictionary sequenceDictionary = new SAMSequenceDictionary(); sequenceDictionary.addSequence(sequenceRecord); SAMFileHeader header = new SAMFileHeader(); header.setSequenceDictionary(sequenceDictionary); SAMRecord samRecord = new SAMRecord(header); samRecord.setReadUnmappedFlag(false); samRecord.setAlignmentStart(alignmentStart); samRecord.setReferenceIndex(0); samRecord.setReadBases(readBases); samRecord.setCigarString(samRecord.getReadLength() + "M"); ReferenceSource referenceSource = new ReferenceSource() { @Override public synchronized ReferenceRegion getRegion(SAMSequenceRecord record, int start_1based, int endInclusive_1based) throws IOException { int zbInclusiveStart = start_1based - 1; int zbExlcusiveEnd = endInclusive_1based; return new ReferenceRegion(Arrays.copyOfRange(ref, zbInclusiveStart, zbExlcusiveEnd), sequenceRecord.getSequenceIndex(), sequenceRecord.getSequenceName(), start_1based); } }; AlignmentsTags.calculateMdAndNmTags(samRecord, referenceSource, sequenceDictionary, true, true); SAMRecord checkRecord = (SAMRecord) samRecord.clone(); SequenceUtil.calculateMdAndNmTags(checkRecord, ref, true, true); // System.out.printf("TEST: ref %s, start %d, read bases %s\n", new // String(ref), alignmentStart, new String( // readBases)); // System.out // .println(referenceSource.getRegion(sequenceRecord, alignmentStart, // alignmentStart + readBases.length)); // System.out.printf("NM: %s x %s\n", samRecord.getAttribute("NM"), // checkRecord.getAttribute("NM")); // System.out.printf("MD: %s x %s\n", samRecord.getAttribute("MD"), // checkRecord.getAttribute("MD")); Assert.assertEquals(checkRecord.getAttribute("NM"), samRecord.getAttribute("NM")); Assert.assertEquals(checkRecord.getAttribute("MD"), samRecord.getAttribute("MD")); }