htsjdk.samtools.TextCigarCodec Java Examples

The following examples show how to use htsjdk.samtools.TextCigarCodec. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: CigarUtilsUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test
public void testClipCountsByHand() {
    // int[] is leftHard, leftSoft, rightHard, rightSoft
    final List<Pair<Cigar, int[]>> tests = Arrays.asList(
            Pair.of("13H3M35D13M2I45S30H", new int[] {13, 0, 30, 45}),
            Pair.of("1H3S67M13S", new int[] {1, 3, 0, 13}),
            Pair.of("13M30S10S1H", new int[] {0, 0, 1, 40}),
            Pair.of("113S4M", new int[] {0, 113, 0, 0}),
            Pair.of("5H3H10M2S1S", new int[] {8, 0, 0, 3}),
            Pair.of("10M", new int[] {0, 0, 0, 0}),
            Pair.of("1H2H3S4S10M5S6S7H8H", new int[] {3, 7, 15, 11})
    )
            .stream().map(pair -> Pair.of(TextCigarCodec.decode(pair.getLeft()), pair.getRight())).collect(Collectors.toList());

    for (final Pair<Cigar, int[]> test : tests) {
        Assert.assertEquals(CigarUtils.countClippedBases(test.getLeft(), ClippingTail.LEFT_TAIL, CigarOperator.HARD_CLIP), test.getRight()[0]);
        Assert.assertEquals(CigarUtils.countClippedBases(test.getLeft(), ClippingTail.LEFT_TAIL, CigarOperator.SOFT_CLIP), test.getRight()[1]);
        Assert.assertEquals(CigarUtils.countClippedBases(test.getLeft(), ClippingTail.RIGHT_TAIL, CigarOperator.HARD_CLIP), test.getRight()[2]);
        Assert.assertEquals(CigarUtils.countClippedBases(test.getLeft(), ClippingTail.RIGHT_TAIL, CigarOperator.SOFT_CLIP), test.getRight()[3]);
    }
}
 
Example #2
Source File: ReadClipperUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test
public void testRevertSoftClippedBasesDoesntExplodeOnCompletelyClippedRead() {
    final GATKRead originalRead = ArtificialReadUtils.createArtificialRead(TextCigarCodec.decode("41S59H"));
    // It's important that the read be AT the start of the contig for this test, so that
    // we clip away ALL of the reverted soft-clipped bases, resulting in an empty read.
    originalRead.setPosition(originalRead.getContig(), 1);

    final GATKRead clippedRead = ReadClipper.revertSoftClippedBases(originalRead);

    Assert.assertEquals(clippedRead.getLength(), 0);
    Assert.assertTrue(clippedRead.isEmpty());
    Assert.assertEquals(clippedRead.getBases().length, 0);
    Assert.assertEquals(clippedRead.getBaseQualities().length, 0);
    Assert.assertEquals(clippedRead.numCigarElements(), 0);
    Assert.assertTrue(clippedRead.isUnmapped());
}
 
Example #3
Source File: ReadClipperUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test
public void testHardClipSoftClippedBasesResultsInEmptyReadDontSetNegativeStartPosition() {
    final GATKRead originalRead = ArtificialReadUtils.createArtificialRead(TextCigarCodec.decode("170H70S"));
    // It's important that the read be near the start of the contig for this test, to test
    // that we don't attempt to set the read's start position to a negative value during clipping.
    // See https://github.com/broadinstitute/gatk/issues/3466
    originalRead.setPosition(originalRead.getContig(), 100);

    final GATKRead clippedRead = ReadClipper.hardClipSoftClippedBases(originalRead);
    Assert.assertEquals(clippedRead.getLength(), 0);
    Assert.assertTrue(clippedRead.isEmpty());
    Assert.assertEquals(clippedRead.getBases().length, 0);
    Assert.assertEquals(clippedRead.getBaseQualities().length, 0);
    Assert.assertEquals(clippedRead.numCigarElements(), 0);
    Assert.assertTrue(clippedRead.isUnmapped());
}
 
Example #4
Source File: SAMRecordUtils.java    From abra2 with MIT License 6 votes vote down vote up
public static String getLeadingClips(SAMRecord read) {
	List<CigarElement> elems = read.getCigar().getCigarElements();
	
	List<CigarElement> leading = new ArrayList<CigarElement>();
	
	for (CigarElement elem : elems) {
		if (isClip(elem)) {
			leading.add(elem);
		} else {
			break;
		}
	}
	
	String ret = "";
	if (leading.size() > 0) {
		Cigar cigar = new Cigar(leading);
		ret = TextCigarCodec.encode(cigar);
	}
	
	return ret;
}
 
Example #5
Source File: CpxVariantInterpreterUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@DataProvider
private Object[][] forOverlapYieldingStrategy() {
    final List<Object[]> data = new ArrayList<>(20);

    final AlignmentInterval one = new AlignmentInterval(new SimpleInterval("chr1", 100001, 100100), 1, 100, TextCigarCodec.decode("100M"), false, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE);
    final AlignmentInterval two = new AlignmentInterval(new SimpleInterval("chr1", 100041, 100070), 33, 62, TextCigarCodec.decode("30M"), true, 30, 5, 26, ContigAlignmentsModifier.AlnModType.NONE);
    data.add(new Object[]{one, two, true, bareBoneHg38SAMSeqDict, IllegalArgumentException.class});

    data.add(new Object[]{two, one, true, bareBoneHg38SAMSeqDict, IllegalArgumentException.class});

    data.add(new Object[]{fromSAMRecordString("asm004677:tig00000\t2064\tchr1\t202317371\t60\t1393H50M1085H\t*\t0\t0\tGTCTTGCTCTGTTGCCCAGGCTGGAGTGCAGTAGAGCAATCATAGCTCAC\t*\tSA:Z:chr3,15736242,-,1282M1246S,60,0;chr3,15737523,-,1425S377M1D726M,60,4;\tMD:Z:41T8\tRG:Z:GATKSVContigAlignments\tNM:i:1\tAS:i:45\tXS:i:0", true),
                          fromSAMRecordString("asm004677:tig00000\t16\tchr3\t15736242\t60\t1282M1246S\t*\t0\t0\tTTATCTCTTACTGTGCCTTAATTATAAATTAAATTTTATCATCGGTATGTATAGGAAAAAAACACAGTATCTATAGGGTTTGGTACTATCCAATGTTTCAGGCATCCATAGGGGGTCCAGGAACATATCCTCTGTAGATAAGTGGGAACTACTTTGAAAAAGACATGGCAGCCACTAGCTCCCAGCCACAGAATCTTGGGAATGTTACTTTACCTATAAGTTCCTACCCCTGCAACAAAAATGAAAGACTAGACAAGTTGGCTTCAAAGGTTCTTCTGTTACCTTCTCTAAATCTTTGGTGTACACATAAACCCCAATCTTTGCTATGACACCTAGCACAGACAACGTTTGTTGTGTAAGATAACTAAATAGCAATTTTCTTAAATTGTTTTGATGGTTCTGTCACTAAAGTATTATCTTGACTAAATGTTAATAAACTACTAAAAAGCATGTTTTCTAAATTCTGCCTTCCTTTGCTATTTACTTCTGGAAGAACAATAACATATCTCTGTCTTCACATATTTTTCCTGCCACAATAATAGAACATACTTTTCCTGTTGTGTTAAGAGTTGGTATTGTTTTCCTTGGTCACCACAGGTTAACTGTGCACAAGTAGTATCTACTACATACATTTCTAAATTTGAGAAAGTTATAAAAGGAGGCAAAATTAGACAAAAATGAGAAAGATAAATAGTTCTGTATGCCTTTACTATGCAAAAATGCAAGTTCTTTAATAAGTGGGGGGTGGACAGGAGGAGAGAAAAATAATGGTCTAATTGAATGCCACCTACCTCACCATGTCCACTGAAAGCTGCATGATGTAATGCAGTCCTCCCTGCTCGATCAGATACGTTTACATTACTCAGAAGAGGTACCAAAGCTTCAGCACACTTTACAGCTTTATTAGCAGCAGCTATATGTAAAGGGGTTTGCCAATTTTTGTCTCGAGCATTAACATCTGCAGAATGCTTCAAAAGTACCTGAACTGCTTCCTAAAACATATGAAAAGTTATAAAAGACAAATGAGTTAAGAGTTTGAGAAATTATTTCTATATATAGTGTGCGTGTGTGTGTATAAATATGTATCTACATATGAAGAAAATAAAAAAGCTCTCATAGAACTCACTATAAACAAGTAGGAATTTACCCAAGGAAAGAAAATATTTTGAATGTAAAGCGATACTGAGAGTACTCAGAGCCATATAAGATGCATCAAGATCAAACATTCCCTTTCCCTACCCTACCATTAAATGCATAAACATTCCTATCAAAATGTGCAAGAAGAAATACTGACAACTATTATTCAAAAACATTCATTTCTTATATCTTATTTTTCTAGGTAACAAGATACAATTTATACTTTGAATAAATAGTTTTTTGGTTTTGTTTTTTGGTCTTGCTCTGTTGCCCAGGCTGGAGTGCAGTAGAGCAATCATAGCTCACCGTAACTTCAAATTCCTTGGCTTAAGTGATCCTCCCACCTTTGCTTCCTGAGTAGCTAGGACTACAGGTGCATGCCACCACCCCTGGCTAATTTTTTAGTTATTTTGTAGAGACAAGGTCTTGCTATGTTGCCCAGGCTGGTCTCAAACTCCTGGCCTCAAGTGATTCTCCTTCCTTGGCCTCCTGAAGTGTTGGGGTTACAAACATGAGCCAGTCATAGAGCCACACCTGGCTCTTTGAACAATTTTAAGCAATAGATTTTTTACACTGCATATATAAAAATTAAAAACATACTATTTCTCAATGTATTATACATGCTATATTTGACAAAATGAAAAATCACTAGGATCAATTTCATTAAAAAAAAAAAAAGACACAACATAGACAAGGTTCTTTTTGCAGTGCTGCTTAGAGAGAAAGTTAAGGCAATTTAAGAGTCTACTGTCCAAGGGATTTATCTCCCAGAAGCGCATGGAAAACATCACAACACTACCTCTATAAATTGGACTCATGGCAGGATTCCAAGGAATTCAGTATGCCATCAACATTAAGCAATATAATCAGAACTAAACTCAACAAAATTTTTCTTTCAGTGATAATCCTAATTTTTGGTAAATTATCGGGGAACCTGCCCCCAATAATTCAAAGTGAGTCCTTTTCTATTTTCCCTAAGTGTCGGCTGGTCTGAGAAATAAAGGGAAAGAGTACAAAAGAGAGAAATTTTAAAGCTGGGTGTCTGGGGAGACATCACATGCCCAAGCCGCAAAACCAGCAAGTTTTTATTAGTGATTTTCAAAAGGGGAGGGAGAGTACGAATAGGGTATGAGTCACAGAGATCACATGCTTCACAAGGTAATAAAATATTACAAGGCAAATGGAGGCAGGGCGAGATCACAGGACCAGGTGAAATTAACATTGCTAATGAAGTTTTGGGCACACATTGTCATTGATAACATCTTATCAGGAGACAGGGGCCGGGCACAGTGGCGCGTGCCTGTAGTCCCAGCTACTCGGGAGGCTGAGGTGGGAGGATCGCTTGAGCCCAGGAGTTCTGGGCTGTAGTGCGCTATGCCGATCGGGTGTCC\t*\tSA:Z:chr3,15737523,-,1425S377M1D726M,60,4;chr1,202317371,-,1393S50M1085S,60,1;\tMD:Z:1282\tRG:Z:GATKSVContigAlignments\tNM:i:0\tAS:i:1282\tXS:i:0", true),
                          true, null, IllegalArgumentException.class
    });

    for (final ValidLocalData validInput : validInputs) {
        data.add(new Object[]{validInput.one, validInput.two, validInput.expectedOneShouldYieldToTwo, bareBoneHg38SAMSeqDict, null});
    }

    return data.toArray(new Object[data.size()][]);
}
 
Example #6
Source File: ContigAlignmentsModifierUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test(groups = "sv")
public void testGappedAlignmentBreaker_OneDeletion() {
    final Cigar cigar = TextCigarCodec.decode("2S205M2D269M77S");
    final AlignmentInterval alignmentInterval = new AlignmentInterval(new SimpleInterval("1", 100, 575),
            3, 476, cigar, true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE);

    final List<AlignmentInterval> generatedARList = Utils.stream(ContigAlignmentsModifier.splitGappedAlignment(alignmentInterval,
            1, cigar.getReadLength())).collect(Collectors.toList());
    Assert.assertEquals(generatedARList.size(), 2);
    Assert.assertEquals(generatedARList.get(0), new AlignmentInterval(new SimpleInterval("1", 100, 304),
            3, 207, TextCigarCodec.decode("2S205M346S"), true,
            60, NO_NM, NO_AS, ContigAlignmentsModifier.AlnModType.FROM_SPLIT_GAPPED_ALIGNMENT));
    Assert.assertEquals(generatedARList.get(1), new AlignmentInterval(new SimpleInterval("1", 307, 575),
            208, 476, TextCigarCodec.decode("207S269M77S"), true,
            60, NO_NM, NO_AS, ContigAlignmentsModifier.AlnModType.FROM_SPLIT_GAPPED_ALIGNMENT));
}
 
Example #7
Source File: IndelShifterTest.java    From abra2 with MIT License 6 votes vote down vote up
@Test (groups = "unit" )
public void testShiftIndelsLeft() throws Exception {
	
	CompareToReference2 c2r = new CompareToReference2();
	c2r.init("test-data/test.fa");
	/*
	TCGAATCGATATATTTCCGGAACAGACTCAG
	------CGATAT--TTCCGGAA--------- <-- orig
	------CG--ATATTTCCGGAA--------- <-- new
	1234567890123456789012
	*/
	
	int refStart = 7;
	int refEnd = 22;
	Cigar cigar = TextCigarCodec.decode("6M2D8M");
	String seq = "CGATATTTCCGGAA";
	
	// 1 based input
	Cigar newCigar = indelShifter.shiftIndelsLeft(refStart, refEnd, "seq1", cigar, seq, c2r);
	assertEquals(TextCigarCodec.encode(newCigar), "2M2D12M");
}
 
Example #8
Source File: SWNativeAlignerWrapperUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Override
protected SmithWatermanAligner getAligner() {
    final SWAlignerNativeBinding javaBackedNativeBinding = new SWAlignerNativeBinding() {

        @Override
        public boolean load(File tmpDir) {
            return true;
        }

        @Override
        public SWNativeAlignerResult align(byte[] ref, byte[] alt, SWParameters parameters, SWOverhangStrategy overhangStrategy) {
            final SmithWatermanAlignment alignment = SmithWatermanJavaAligner.getInstance().align(ref, alt,
                                                                                                      parameters,
                                                                                                      overhangStrategy);
            return new SWNativeAlignerResult(TextCigarCodec.encode(alignment.getCigar()), alignment.getAlignmentOffset());
        }
    };
    return new SWNativeAlignerWrapper(javaBackedNativeBinding);
}
 
Example #9
Source File: AssemblyContigAlignmentsConfigPickerUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@DataProvider Object[][] forGetMaxOverlapPairs() {
    final List<Object[]> data = new ArrayList<>(20);

    final List<AlignmentInterval> alignments = Arrays.asList(new AlignmentInterval(new SimpleInterval("chr18:77371711-77373157"), 1, 1447, TextCigarCodec.decode("1447M1164S"), true, 60, AlignmentInterval.NO_NM, AlignmentInterval.NO_AS, ContigAlignmentsModifier.AlnModType.FROM_SPLIT_GAPPED_ALIGNMENT),
                                                             new AlignmentInterval(new SimpleInterval("chr18:77373176-77373513"), 1166, 1503, TextCigarCodec.decode("1165H338M1108S"), true, 60, AlignmentInterval.NO_NM, AlignmentInterval.NO_AS, ContigAlignmentsModifier.AlnModType.FROM_SPLIT_GAPPED_ALIGNMENT),
                                                             new AlignmentInterval(new SimpleInterval("chr18:77373208-77373391"), 1448, 1631, TextCigarCodec.decode("1447S184M980S"), true, 60, AlignmentInterval.NO_NM, AlignmentInterval.NO_AS, ContigAlignmentsModifier.AlnModType.FROM_SPLIT_GAPPED_ALIGNMENT),
                                                             new AlignmentInterval(new SimpleInterval("chr18:77373514-77374571"), 1554, 2611, TextCigarCodec.decode("1165H388S1058M"), true, 60, AlignmentInterval.NO_NM, AlignmentInterval.NO_AS, ContigAlignmentsModifier.AlnModType.FROM_SPLIT_GAPPED_ALIGNMENT));

    final Map<AlignmentInterval, Tuple2<Integer, Integer>> overlapMap = new HashMap<>(4);
    overlapMap.put(alignments.get(0), new Tuple2<>(-1, 282));
    overlapMap.put(alignments.get(1), new Tuple2<>(282, 56));
    overlapMap.put(alignments.get(2), new Tuple2<>(56, 78));
    overlapMap.put(alignments.get(3), new Tuple2<>(78, -1));
    data.add(new Object[]{alignments, overlapMap});

    return data.toArray(new Object[data.size()][]);
}
 
Example #10
Source File: CigarUtilsUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@DataProvider(name = "readWalkDistanceTestDataException")
private Object[][] createReadWalkDistanceTestDataException() {
    final List<Object[]> data = new ArrayList<>(20);

    final Cigar cigar = TextCigarCodec.decode("35H40S10M20I25M30D50M55S60H");
    data.add(new Object[]{cigar, -1, 10, false, 0});
    data.add(new Object[]{cigar, 0, 10, false, 0});
    data.add(new Object[]{cigar, 41, -1, false, 0});
    data.add(new Object[]{cigar, 41, 0, false, 0});

    data.add(new Object[]{cigar, 1, 116, false, 0});
    data.add(new Object[]{cigar, 200, 116, false, 0});

    data.add(new Object[]{cigar, 96, 51, false, 0});
    data.add(new Object[]{cigar, 145, 2, false, 0});
    data.add(new Object[]{cigar, 146, 1, false, 0});

    data.add(new Object[]{cigar, 96, 67, true, 0});
    data.add(new Object[]{cigar, 40, 1, true, 0});
    data.add(new Object[]{cigar, 41, 2, true, 0});

    return data.toArray(new Object[data.size()][]);
}
 
Example #11
Source File: CigarUtilsUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@DataProvider(name = "readWalkDistanceTestData")
private Object[][] createReadWalkDistanceTestData() {

    final List<Object[]> data = new ArrayList<>(20);
    final Cigar cigar = TextCigarCodec.decode("35H40S10M20I25M30D50M55S60H");

    data.add(new Object[]{cigar, 1, 5, false, 45});
    data.add(new Object[]{cigar, 1, 10, false, 50});
    data.add(new Object[]{cigar, 1, 16, false, 76});
    data.add(new Object[]{cigar, 1, 64, false, 95});
    data.add(new Object[]{cigar, 1, 66, false, 96});
    data.add(new Object[]{cigar, 11, 5, false, 35});
    data.add(new Object[]{cigar, 41, 64, false, 55});

    data.add(new Object[]{cigar, 146, 1, true, 2});

    data.add(new Object[]{cigar, 181, 10, true, 46});
    data.add(new Object[]{cigar, 181, 50, true, 86});
    data.add(new Object[]{cigar, 181, 51, true, 86});
    data.add(new Object[]{cigar, 181, 80, true, 86});
    data.add(new Object[]{cigar, 181, 105, true, 111});
    data.add(new Object[]{cigar, 181, 106, true, 132});
    data.add(new Object[]{cigar, 181, 115, true, 141});

    return data.toArray(new Object[data.size()][]);
}
 
Example #12
Source File: ContigAlignmentsModifierUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test(groups = "sv")
public void testGappedAlignmentBreaker_OneInsertion() {

    final Cigar cigar = TextCigarCodec.decode("56S27M15I32M21S");
    final AlignmentInterval alignmentInterval = new AlignmentInterval(new SimpleInterval("1", 100, 158),
            57, 130, cigar, true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE);

    final List<AlignmentInterval> generatedARList = Utils.stream(ContigAlignmentsModifier.splitGappedAlignment(alignmentInterval,
            1, cigar.getReadLength())).collect(Collectors.toList());
    Assert.assertEquals(generatedARList.size(), 2);
    Assert.assertEquals(generatedARList.get(0), new AlignmentInterval(new SimpleInterval("1", 100, 126),
            57, 83, TextCigarCodec.decode("56S27M68S"), true,
            60, NO_NM, NO_AS, ContigAlignmentsModifier.AlnModType.FROM_SPLIT_GAPPED_ALIGNMENT));
    Assert.assertEquals(generatedARList.get(1), new AlignmentInterval(new SimpleInterval("1", 127, 158),
            99, 130, TextCigarCodec.decode("98S32M21S"), true,
            60, NO_NM, NO_AS, ContigAlignmentsModifier.AlnModType.FROM_SPLIT_GAPPED_ALIGNMENT));

}
 
Example #13
Source File: SimpleChimeraUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private List<TestData> casesForInvertedDuplication() {

        final List<TestData> result = new ArrayList<>(20);

        AlignmentInterval intervalOne = new AlignmentInterval(new SimpleInterval("chr21:25625477-25625587"), 1, 111, TextCigarCodec.decode("111M212H"), false, 60, 0, 111, ContigAlignmentsModifier.AlnModType.NONE);
        AlignmentInterval intervalTwo = new AlignmentInterval(new SimpleInterval("chr21:25625379-25625595"), 107, 323, TextCigarCodec.decode("106S217M"), true, 60, 0, 127, ContigAlignmentsModifier.AlnModType.NONE);
        result.add(new TestData(intervalOne, intervalTwo, bareBoneHg38SAMSeqDict, null,
                StrandSwitch.REVERSE_TO_FORWARD, false, true,
                false, true, TypeInferredFromSimpleChimera.INTRA_CHR_STRAND_SWITCH_33));

        intervalOne = new AlignmentInterval(new SimpleInterval("chr20", 48513458, 48513545), 1, 88, TextCigarCodec.decode("88M227H"), true, 39, 1, 83, ContigAlignmentsModifier.AlnModType.NONE);
        intervalTwo = new AlignmentInterval(new SimpleInterval("chr20", 48513297, 48513578), 84, 365, TextCigarCodec.decode("83S282M"), false, 60, 0, 282, ContigAlignmentsModifier.AlnModType.NONE);
        result.add(new TestData(intervalOne, intervalTwo, TestUtilsForAssemblyBasedSVDiscovery.bareBoneHg38SAMSeqDict, null,
                StrandSwitch.FORWARD_TO_REVERSE, true, true,
                false, true, TypeInferredFromSimpleChimera.INTRA_CHR_STRAND_SWITCH_55));
        return result;
    }
 
Example #14
Source File: ContigAlignmentsModifierUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test(groups = "sv")
public void testGappedAlignmentBreaker_HardAndSoftClip() {

    final Cigar cigar = TextCigarCodec.decode("1H2S3M5I10M20D6M7S8H");
    final AlignmentInterval alignmentInterval = new AlignmentInterval(new SimpleInterval("1", 100, 138),
            4, 27, cigar, true, 60, 0,
            100, ContigAlignmentsModifier.AlnModType.NONE);

    final List<AlignmentInterval> generatedARList = Utils.stream(ContigAlignmentsModifier.splitGappedAlignment(alignmentInterval,
            1, cigar.getReadLength()+1+8)).collect(Collectors.toList());

    Assert.assertEquals(generatedARList.get(0), new AlignmentInterval(new SimpleInterval("1", 100, 102),
            4, 6, TextCigarCodec.decode("1H2S3M28S8H"), true,
            60, NO_NM, NO_AS, ContigAlignmentsModifier.AlnModType.FROM_SPLIT_GAPPED_ALIGNMENT));
    Assert.assertEquals(generatedARList.get(1), new AlignmentInterval(new SimpleInterval("1", 103, 112),
            12, 21, TextCigarCodec.decode("1H10S10M13S8H"), true,
            60, NO_NM, NO_AS, ContigAlignmentsModifier.AlnModType.FROM_SPLIT_GAPPED_ALIGNMENT));
    Assert.assertEquals(generatedARList.get(2), new AlignmentInterval(new SimpleInterval("1", 133, 138),
            22, 27, TextCigarCodec.decode("1H20S6M7S8H"), true,
            60, NO_NM, NO_AS, ContigAlignmentsModifier.AlnModType.FROM_SPLIT_GAPPED_ALIGNMENT));
}
 
Example #15
Source File: ReadPosRankSumTestUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private GATKRead makeRead(final int start, final int mq) {
    Cigar cigar = TextCigarCodec.decode("10M");
    final GATKRead read = ArtificialReadUtils.createArtificialRead(cigar);
    read.setMappingQuality(mq);
    read.setPosition(CONTIG, start);
    return read;
}
 
Example #16
Source File: QualByDepthUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test
public void testUsingReads(){
    final List<Allele> ALLELES = Arrays.asList(REF, ALT);
    final int depth = 20;
    final String sample1 = "sample1";
    final int dpDepth = 30; //Note: using a different value on purpose so that we can check that reads are preferred over DP
    final Genotype gAC = new GenotypeBuilder(sample1, ALLELES).DP(dpDepth).make();

    final double log10PError = -5;
    final double qual = -10.0 * log10PError;

    final List<GATKRead> reads = IntStream.range(0, depth)
            .mapToObj(n -> ArtificialReadUtils.createArtificialRead(TextCigarCodec.decode("10M"))).collect(Collectors.toList());

    final AlleleLikelihoods<GATKRead, Allele> likelihoods =
            ArtificialAnnotationUtils.makeLikelihoods(sample1, reads, -100.0, REF, ALT);

    final VariantContext vc = new VariantContextBuilder("test", "20", 10, 10, ALLELES).log10PError(log10PError).genotypes(Arrays.asList(gAC)).make();
    final Map<String, Object> annotatedMap = new QualByDepth().annotate(null, vc, likelihoods);
    Assert.assertNotNull(annotatedMap, vc.toString());
    final String QD = (String)annotatedMap.get(GATKVCFConstants.QUAL_BY_DEPTH_KEY);

    final double expectedQD = qual/depth;
    Assert.assertEquals(Double.valueOf(QD), expectedQD, 0.0001);

    //Now we test that when AD is present, it trumps everything
    final Genotype gAC_withAD = new GenotypeBuilder("1", ALLELES).DP(dpDepth).AD(new int[]{5,5}).make();
    final VariantContext vc_withAD = new VariantContextBuilder("test", "20", 10, 10, ALLELES).log10PError(log10PError).genotypes(Arrays.asList(gAC_withAD)).make();
    final Map<String, Object> annotatedMap_withAD = new QualByDepth().annotate(null, vc_withAD, likelihoods);
    final String QD_withAD = (String)annotatedMap_withAD.get(GATKVCFConstants.QUAL_BY_DEPTH_KEY);
    final double expectedQD_withAD = qual/(5+5);//two AD fields
    Assert.assertEquals(Double.valueOf(QD_withAD), expectedQD_withAD, 0.0001);

}
 
Example #17
Source File: ContigAlignmentsModifierUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test(groups = "sv")
public void testGappedAlignmentBreaker_Complex() {

    final Cigar cigar = TextCigarCodec.decode("397S118M2D26M6I50M7I26M1I8M13D72M398S");
    final AlignmentInterval alignmentInterval = new AlignmentInterval(new SimpleInterval("1", 100, 414),
            398, 711, cigar, true, 60, 65, 100, ContigAlignmentsModifier.AlnModType.NONE);

    final List<AlignmentInterval> generatedARList = Utils.stream(ContigAlignmentsModifier.splitGappedAlignment(alignmentInterval,
            1, cigar.getReadLength())).collect(Collectors.toList());

    Assert.assertEquals(generatedARList.size(), 6);

    Assert.assertEquals(generatedARList.get(0), new AlignmentInterval(new SimpleInterval("1", 100, 217),
            398, 515, TextCigarCodec.decode("397S118M594S"),
            true, 60, NO_NM, NO_AS, ContigAlignmentsModifier.AlnModType.FROM_SPLIT_GAPPED_ALIGNMENT));
    Assert.assertEquals(generatedARList.get(1), new AlignmentInterval(new SimpleInterval("1", 220, 245),
            516, 541, TextCigarCodec.decode("515S26M568S"), true,
            60, NO_NM, NO_AS, ContigAlignmentsModifier.AlnModType.FROM_SPLIT_GAPPED_ALIGNMENT));
    Assert.assertEquals(generatedARList.get(2), new AlignmentInterval(new SimpleInterval("1", 246, 295),
            548, 597, TextCigarCodec.decode("547S50M512S"), true,
            60, NO_NM, NO_AS, ContigAlignmentsModifier.AlnModType.FROM_SPLIT_GAPPED_ALIGNMENT));
    Assert.assertEquals(generatedARList.get(3), new AlignmentInterval(new SimpleInterval("1", 296, 321),
            605, 630, TextCigarCodec.decode("604S26M479S"), true,
            60, NO_NM, NO_AS, ContigAlignmentsModifier.AlnModType.FROM_SPLIT_GAPPED_ALIGNMENT));
    Assert.assertEquals(generatedARList.get(4), new AlignmentInterval(new SimpleInterval("1", 322, 329),
            632, 639, TextCigarCodec.decode("631S8M470S"), true,
            60, NO_NM, NO_AS, ContigAlignmentsModifier.AlnModType.FROM_SPLIT_GAPPED_ALIGNMENT));
    Assert.assertEquals(generatedARList.get(5), new AlignmentInterval(new SimpleInterval("1", 343, 414),
            640, 711, TextCigarCodec.decode("639S72M398S"), true,
            60, NO_NM, NO_AS, ContigAlignmentsModifier.AlnModType.FROM_SPLIT_GAPPED_ALIGNMENT));
}
 
Example #18
Source File: CigarUtilsUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test(dataProvider = "testData_invertCigar")
public void testInvertCigar(final String cigarStrIn, final String expectedCigarStrOut){
    final Cigar cigarIn = TextCigarCodec.decode(cigarStrIn);
    final Cigar cigarOut = CigarUtils.invertCigar(cigarIn);
    final String actualCigarStrOut = TextCigarCodec.encode(cigarOut);
    Assert.assertEquals(actualCigarStrOut, expectedCigarStrOut);
}
 
Example #19
Source File: OrientationBiasReadCountsUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private GATKRead makeRead(final boolean isRefRead, final boolean isF1R2Read, int name){
    final GATKRead read = ArtificialReadUtils.createArtificialRead(TextCigarCodec.decode(10 + "M"), "random_read_" + isRefRead + "_" + isF1R2Read + "_" + name);
    read.setMappingQuality(20);
    if (isF1R2Read){
        read.setIsPaired(true);
        read.setIsReverseStrand(false);
        read.setIsFirstOfPair();
    } else {
        read.setIsReverseStrand(false);
        read.setIsPaired(true);
        read.setIsSecondOfPair();
    }
    return read;
}
 
Example #20
Source File: OrientationBiasReadCountsUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private Pair<VariantContext, AlleleLikelihoods<GATKRead, Allele>> makeReads(int alt_F1R2, int alt_F2R1, int ref_F1R2, int ref_F2R1, Allele refAllele, Allele altAllele, List<Allele> alleles, Genotype g) {
    final List<GATKRead> altReads = Stream.concat(IntStream.range(0, alt_F1R2).mapToObj(i -> makeRead(false, true, i)),
            IntStream.range(0, alt_F2R1).mapToObj(i -> makeRead(false, false, i))).collect(Collectors.toList());
    final List<GATKRead> refReads = Stream.concat(IntStream.range(0, ref_F1R2).mapToObj(i -> makeRead(true, true, i)),
            IntStream.range(0, ref_F2R1).mapToObj(i -> makeRead(true, false, i))).collect(Collectors.toList());
    final GATKRead badRead = ArtificialReadUtils.createArtificialRead(TextCigarCodec.decode(10 + "M"));
    badRead.setMappingQuality(20);

    final AlleleLikelihoods<GATKRead, Allele> likelihoods =
            ArtificialAnnotationUtils.makeLikelihoods(sample1, refReads, altReads, Arrays.asList(badRead), -100.0, -10.0, -1.1, refAllele, altAllele);

    return ImmutablePair.of(new VariantContextBuilder("test", "20", 10003, 10003, alleles).genotypes(Arrays.asList(g)).make(),
            likelihoods);
}
 
Example #21
Source File: ContigAlignmentsModifierUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test(groups = "sv")
public void testGappedAlignmentBreaker_TerminalInsertionOperatorToSoftClip() {

    // beginning with 'I'
    Cigar cigar = TextCigarCodec.decode("10I10M5I10M");
    AlignmentInterval alignmentInterval = new AlignmentInterval(new SimpleInterval("1", 101, 120),
            11, 35, cigar, true, 60, 0,
            100, ContigAlignmentsModifier.AlnModType.NONE);

    List<AlignmentInterval> generatedARList = Utils.stream(ContigAlignmentsModifier.splitGappedAlignment(alignmentInterval, 1,
            cigar.getReadLength())).collect(Collectors.toList());

    Assert.assertEquals(generatedARList.size(), 2);
    Assert.assertEquals(generatedARList.get(0), new AlignmentInterval(
            new SimpleInterval("1", 101, 110), 11, 20,
            TextCigarCodec.decode("10S10M15S"),
            true, 60, NO_NM, NO_AS, ContigAlignmentsModifier.AlnModType.FROM_SPLIT_GAPPED_ALIGNMENT));
    Assert.assertEquals(generatedARList.get(1), new AlignmentInterval(
            new SimpleInterval("1", 111, 120), 26, 35,
            TextCigarCodec.decode("25S10M"),
            true, 60, NO_NM, NO_AS, ContigAlignmentsModifier.AlnModType.FROM_SPLIT_GAPPED_ALIGNMENT));

    // ending with 'I'
    cigar = TextCigarCodec.decode("10M5I10M10I");
    alignmentInterval = new AlignmentInterval(
            new SimpleInterval("1", 101, 120), 1, 25, cigar,
            true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE);

    generatedARList = Utils.stream(ContigAlignmentsModifier.splitGappedAlignment(alignmentInterval, 1, cigar.getReadLength())).collect(Collectors.toList());
    Assert.assertEquals(generatedARList.size(), 2);
    Assert.assertEquals(generatedARList.get(0), new AlignmentInterval(
            new SimpleInterval("1", 101, 110), 1, 10,
            TextCigarCodec.decode("10M25S"), true, 60,
            NO_NM, NO_AS, ContigAlignmentsModifier.AlnModType.FROM_SPLIT_GAPPED_ALIGNMENT));
    Assert.assertEquals(generatedARList.get(1), new AlignmentInterval(
            new SimpleInterval("1", 111, 120), 16, 25,
            TextCigarCodec.decode("15S10M10S"), true, 60,
            NO_NM, NO_AS, ContigAlignmentsModifier.AlnModType.FROM_SPLIT_GAPPED_ALIGNMENT));
}
 
Example #22
Source File: AssemblyContigAlignmentsConfigPickerUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@DataProvider
private Object[][] forFilterSecondaryConfigurationsByMappingQualityThreshold() {

    final List<Object[]> data = new ArrayList<>(20);

    AlignmentInterval intervalOne = new AlignmentInterval(
            new SimpleInterval("chr21", 100001, 100100),
            1, 100, TextCigarCodec.decode("100M220S"),
            true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE);
    AlignmentInterval intervalTwo = new AlignmentInterval(
            new SimpleInterval("chr21", 100099, 100122),
            99, 122, TextCigarCodec.decode("98S24M78S"),
            true, 10, 3, 241, ContigAlignmentsModifier.AlnModType.NONE);
    AlignmentInterval intervalThree = new AlignmentInterval(
            new SimpleInterval("chr21", 100123, 100200),
            223, 300,  TextCigarCodec.decode("222S78M"),
            true, 60, 0, 78, ContigAlignmentsModifier.AlnModType.NONE);
    final GoodAndBadMappings rep1 =
            new GoodAndBadMappings(Arrays.asList(intervalOne, intervalThree),
                    Collections.singletonList(intervalThree));
    final GoodAndBadMappings rep2 =
            new GoodAndBadMappings(Arrays.asList(intervalOne, intervalTwo, intervalThree),
                    Collections.emptyList());

    data.add(new Object[]{Arrays.asList(rep1, rep2), 0, Arrays.asList(rep1, rep2)});

    data.add(new Object[]{Arrays.asList(rep1, rep2), 10, Collections.singletonList(rep1)});

    final AlignedContig alignedContig = fromPrimarySAMRecordString("asm031090:tig00000\t16\tchr5\t49659827\t60\t332S112M161S\t*\t0\t0\tCATTCCGTTCCGTTCCATTCCATTCCATTCCATTCTATTCGGGTTAATTCCATTCCATTCCATTCGATTGCAATCGAGTTGATTCCATTCCCTAACATTCCATTCCATTCCATTCCATTCCATTCCATTCCATTCCTTTCCATTCCATTACGGATGATTCCATTCCATTGCATTCCATTCCATTCCATTCCCCTGTACTCGGGTTGATTCCATTCCATTGCATTCCAATCCATGCCCTTCCACTCGTGTTGATTCCATTCTTTCCATTCCATTCAAGTTGAATCCATTCCATTGCAATCCATTCCATTCGATTCCATTCGATTGCACTCGGGTTGATTCCATTCCATTGCATTCCATTCCATTCCATTCCATTCCATTCCGTTCCATTCCTTTCCATTACATTCGGATTGATTCTATTCAATTCCCTTACACTCCATTACATTCCATTTCATTCCGGTAGTTTTCACTCCATTCCATTCCATTTCTCTCCATTCCATTGCACTCGGGTTGATTCCATTCCATTGCATTCCATTCCATTTGGGTAGTTTCCACTCCATTCCATTCCATTTCTCTCCATTCCATTGCACTCGGGTTGATTCCATTCC\t*\tSA:Z:chr22_KI270736v1_random,101512,+,455S56M94S,0,1;chr10,41903518,+,372S74M159S,48,7;chr20,31162579,+,37S59M509S,0,5;chr20,31188805,+,298S43M264S,0,2;chr4,49639434,+,331S37M237S,60,1;chrUn_KI270519v1,137524,+,101S37M467S,3,1;chrUn_KN707896v1_decoy,6014,-,81M15I253M5D189M67S,0,34;chrUn_KN707896v1_decoy,6436,-,517S88M,60,3;\tMD:Z:58A7C7G18T12C5\tRG:Z:GATKSVContigAlignments\tNM:i:5\tAS:i:87\tXS:i:55",
            true);
    final List<GoodAndBadMappings> goodAndBadMappings = AssemblyContigAlignmentsConfigPicker.pickBestConfigurations(alignedContig,
            new HashSet<>(Arrays.asList("chr4", "chr5", "chr10", "chr20", "")), 0.0);
    final List<AlignmentInterval> goodAfterTieBreak = fromPrimarySAMRecordString("asm031090:tig00000\t16\tchr5\t49659827\t60\t332S112M161S\t*\t0\t0\tCATTCCGTTCCGTTCCATTCCATTCCATTCCATTCTATTCGGGTTAATTCCATTCCATTCCATTCGATTGCAATCGAGTTGATTCCATTCCCTAACATTCCATTCCATTCCATTCCATTCCATTCCATTCCATTCCTTTCCATTCCATTACGGATGATTCCATTCCATTGCATTCCATTCCATTCCATTCCCCTGTACTCGGGTTGATTCCATTCCATTGCATTCCAATCCATGCCCTTCCACTCGTGTTGATTCCATTCTTTCCATTCCATTCAAGTTGAATCCATTCCATTGCAATCCATTCCATTCGATTCCATTCGATTGCACTCGGGTTGATTCCATTCCATTGCATTCCATTCCATTCCATTCCATTCCATTCCGTTCCATTCCTTTCCATTACATTCGGATTGATTCTATTCAATTCCCTTACACTCCATTACATTCCATTTCATTCCGGTAGTTTTCACTCCATTCCATTCCATTTCTCTCCATTCCATTGCACTCGGGTTGATTCCATTCCATTGCATTCCATTCCATTTGGGTAGTTTCCACTCCATTCCATTCCATTTCTCTCCATTCCATTGCACTCGGGTTGATTCCATTCC\t*\tSA:Z:chr10,41903518,+,372S74M159S,48,7;chr4,49639434,+,331S37M237S,60,1;chrUn_KI270519v1,137524,+,101S37M467S,3,1;chrUn_KN707896v1_decoy,6436,-,517S88M,60,3;\tMD:Z:58A7C7G18T12C5\tRG:Z:GATKSVContigAlignments\tNM:i:5\tAS:i:87\tXS:i:55",
            true).getAlignments();
    final ArrayList<AlignmentInterval> copy = new ArrayList<>(alignedContig.getAlignments());
    copy.removeAll(goodAfterTieBreak);
    data.add(new Object[]{goodAndBadMappings, 0, Collections.singletonList(new GoodAndBadMappings(goodAfterTieBreak, copy))});

    return data.toArray(new Object[data.size()][]);
}
 
Example #23
Source File: ContigAlignmentsModifierUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test(groups = "sv")
public void testGappedAlignmentBreaker_TerminalInsertionNeighboringClippings(){

    Cigar cigar = TextCigarCodec.decode("10H20S30I40M50I60S70H");
    AlignmentInterval alignmentInterval = new AlignmentInterval(
            new SimpleInterval("1", 101, 140), 61, 100, cigar,
            true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE);
    List<AlignmentInterval> generatedARList = Utils.stream(ContigAlignmentsModifier.splitGappedAlignment(alignmentInterval, 1, cigar.getReadLength()+10+70))
            .collect(Collectors.toList());
    // no internal gap, so nothing should change
    Assert.assertEquals(generatedARList.size(), 1);
    Assert.assertEquals(generatedARList.get(0), alignmentInterval);

    cigar = TextCigarCodec.decode("10H20S30I40M5D15M50I60S70H");
    alignmentInterval = new AlignmentInterval(
            new SimpleInterval("1", 101, 160), 61, 115, cigar,
            true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE);
    generatedARList = Utils.stream(ContigAlignmentsModifier.splitGappedAlignment(alignmentInterval, 1, cigar.getReadLength()+10+70)).collect(Collectors.toList());
    Assert.assertEquals(generatedARList.size(), 2);

    Assert.assertEquals(generatedARList.get(0), new AlignmentInterval(
            new SimpleInterval("1", 101, 140), 61, 100,
            TextCigarCodec.decode("10H50S40M125S70H"), true, 60,
            NO_NM, NO_AS, ContigAlignmentsModifier.AlnModType.FROM_SPLIT_GAPPED_ALIGNMENT));
    Assert.assertEquals(generatedARList.get(1), new AlignmentInterval(
            new SimpleInterval("1", 146, 160), 101, 115,
            TextCigarCodec.decode("10H90S15M110S70H"), true, 60,
            NO_NM, NO_AS, ContigAlignmentsModifier.AlnModType.FROM_SPLIT_GAPPED_ALIGNMENT));
}
 
Example #24
Source File: CigarBuilderUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test(dataProvider = "initial_and_final_deletions")
public void testInitialAndFinalDeletions(final List<String> cigarElementStrings, final String expected) {
    final CigarBuilder builder = new CigarBuilder();
    for (final String elementString : cigarElementStrings) {
        builder.add(TextCigarCodec.decode(elementString).getFirstCigarElement());
    }

    Assert.assertEquals(builder.make().toString(), expected);
}
 
Example #25
Source File: CigarBuilderUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test(dataProvider = "removed_deletions")
public void testRemovedDeletions(final List<String> cigarElementStrings, final int removedLeading, final int removedTrailing) {
    final CigarBuilder builder = new CigarBuilder();
    for (final String elementString : cigarElementStrings) {
        builder.add(TextCigarCodec.decode(elementString).getFirstCigarElement());
    }

    builder.make();
    Assert.assertEquals(builder.getLeadingDeletionBasesRemoved(), removedLeading);
    Assert.assertEquals(builder.getTrailingDeletionBasesRemoved(), removedTrailing);
}
 
Example #26
Source File: CigarBuilderUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test(dataProvider = "invalid", expectedExceptions = IllegalStateException.class)
public void testInvalid(final List<String> cigarElementStrings) {
    final CigarBuilder builder = new CigarBuilder();
    for (final String elementString : cigarElementStrings) {
        builder.add(TextCigarCodec.decode(elementString).getFirstCigarElement());
    }

    builder.make();
}
 
Example #27
Source File: JunctionTreeLinkedDeBruijnGraphUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test
public void testWholeTailIsInsertion() {
    final JunctionTreeLinkedDeBruijnGraph rtgraph = new JunctionTreeLinkedDeBruijnGraph(10);
    final JunctionTreeLinkedDeBruijnGraph.DanglingChainMergeHelper result = new JunctionTreeLinkedDeBruijnGraph.DanglingChainMergeHelper(null, null, "AXXXXX".getBytes(), "AAAAAA".getBytes(), TextCigarCodec.decode("5I1M"));
    final int mergeResult = rtgraph.mergeDanglingTail(result);
    Assert.assertEquals(mergeResult, 0);
}
 
Example #28
Source File: CigarBuilderUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test(dataProvider = "tricky")
public void testTrickyCases(final List<String> cigarElementStrings, final String expected) {
    final CigarBuilder builder = new CigarBuilder();
    for (final String elementString : cigarElementStrings) {
        builder.add(TextCigarCodec.decode(elementString).getFirstCigarElement());
    }

    Assert.assertEquals(builder.make().toString(), expected);
}
 
Example #29
Source File: CigarBuilderUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test(dataProvider = "retain_deletions")
public void testRetainDeletions(final List<String> cigarElementStrings, final String expected) {
    final CigarBuilder builder = new CigarBuilder(false);
    for (final String elementString : cigarElementStrings) {
        builder.add(TextCigarCodec.decode(elementString).getFirstCigarElement());
    }

    Assert.assertEquals(builder.make().toString(), expected);
}
 
Example #30
Source File: AssemblyBasedSVDiscoveryTestDataProviderForSimpleSV.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * 100-'A' + 100-'T' and a 50 bases of 'C' is inserted at the A->T junction point (forward strand description)
 * Return a list of two entries for positive and reverse strand representations.
 */
private static List<TestDataForSimpleSV>
forSimpleInsertion() {
    final List<TestDataForSimpleSV> result = new ArrayList<>();

    // simple insertion '+' strand representation
    final String leftRefFlank = TestUtilsForAssemblyBasedSVDiscovery.makeDummySequence('A', 100);
    final String insertedSeq  = TestUtilsForAssemblyBasedSVDiscovery.makeDummySequence('C', 50);
    final String rightRefFlank = TestUtilsForAssemblyBasedSVDiscovery.makeDummySequence('T', 100);
    byte[] contigSeq = (leftRefFlank + insertedSeq + rightRefFlank).getBytes();
    String contigName = "simple_ins_+";

    final SimpleInterval expectedLeftBreakpoint = new SimpleInterval("21:17000100-17000100");
    final SimpleInterval expectedRightBreakpoint = new SimpleInterval("21:17000100-17000100");
    final BreakpointComplications expectedBreakpointComplications = new BreakpointComplications.SimpleInsDelOrReplacementBreakpointComplications("", insertedSeq);
    final byte[] expectedAltSeq = insertedSeq.getBytes();
    final NovelAdjacencyAndAltHaplotype expectedNovelAdjacencyAndAltHaplotype = new NovelAdjacencyAndAltHaplotype(expectedLeftBreakpoint, expectedRightBreakpoint, NO_SWITCH, expectedBreakpointComplications, SIMPLE_INS, expectedAltSeq);
    AlignmentInterval region1 = new AlignmentInterval(new SimpleInterval("21", 17000001, 17000100), 1 ,100, TextCigarCodec.decode("100M100S"), true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE);
    AlignmentInterval region2 = new AlignmentInterval(new SimpleInterval("21", 17000101, 17000200), 151 ,250, TextCigarCodec.decode("100S100M"), true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE);
    SimpleChimera expectedSimpleChimera = new SimpleChimera(contigName, region1, region2, NO_SWITCH, true, Collections.emptyList(), NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME);
    DistancesBetweenAlignmentsOnRefAndOnRead expectedDistances = new DistancesBetweenAlignmentsOnRefAndOnRead(0, 50, 17000100, 17000101, 100, 151);
    final List<SvType> expectedSVTypes = Collections.singletonList(makeInsertionType(new SimpleInterval("21:17000100-17000100"), Allele.create("G", true),50));
    final List<VariantContext> expectedVariants = Collections.singletonList(
            addStandardAttributes(makeInsertion("21", 17000100, 17000100, 50, Allele.create("G", true)),
                    100, contigName, SimpleSVType.SupportedType.INS.name(), 17000100, 50, insertedSeq, "", insertedSeq).make());
    result.add(new TestDataForSimpleSV(region1, region2, contigName, contigSeq, false, expectedSimpleChimera, expectedNovelAdjacencyAndAltHaplotype, expectedSVTypes, expectedVariants, expectedDistances, BreakpointsInference.SimpleInsertionDeletionBreakpointsInference.class));

    // simple insertion '-' strand representation
    contigSeq = (SequenceUtil.reverseComplement(rightRefFlank) + SequenceUtil.reverseComplement(insertedSeq) + SequenceUtil.reverseComplement(leftRefFlank)).getBytes();
    contigName = "simple_ins_-";
    region1 = new AlignmentInterval(new SimpleInterval("21", 17000101, 17000200), 1 ,100, TextCigarCodec.decode("100M100S"), false, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE);
    region2 = new AlignmentInterval(new SimpleInterval("21", 17000001, 17000100), 151 ,250, TextCigarCodec.decode("100S100M"), false, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE);
    expectedDistances = new DistancesBetweenAlignmentsOnRefAndOnRead(0, 50, 17000100, 17000101, 100, 151);
    expectedSimpleChimera = new SimpleChimera(contigName, region1, region2, NO_SWITCH, false, Collections.emptyList(), NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME);
    result.add(new TestDataForSimpleSV(region1, region2, contigName, contigSeq, true, expectedSimpleChimera, expectedNovelAdjacencyAndAltHaplotype, expectedSVTypes, expectedVariants, expectedDistances, BreakpointsInference.SimpleInsertionDeletionBreakpointsInference.class));

    return result;
}