Java Code Examples for htsjdk.variant.variantcontext.VariantContextBuilder#attribute()

The following examples show how to use htsjdk.variant.variantcontext.VariantContextBuilder#attribute() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: AnnotatedVariantProducer.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@VisibleForTesting
static VariantContextBuilder annotateWithExternalCNVCalls(final String recordContig, final int pos, final int end,
                                                          final VariantContextBuilder inputBuilder,
                                                          final Broadcast<SAMSequenceDictionary> broadcastSequenceDictionary,
                                                          final Broadcast<SVIntervalTree<VariantContext>> broadcastCNVCalls,
                                                          final String sampleId) {
    if (broadcastCNVCalls == null)
        return inputBuilder;
    final SVInterval variantInterval = new SVInterval(broadcastSequenceDictionary.getValue().getSequenceIndex(recordContig), pos, end);
    final SVIntervalTree<VariantContext> cnvCallTree = broadcastCNVCalls.getValue();
    final String cnvCallAnnotation =
            Utils.stream(cnvCallTree.overlappers(variantInterval))
                    .map(overlapper -> formatExternalCNVCallAnnotation(overlapper.getValue(), sampleId))
                    .collect(Collectors.joining(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR));
    if (!cnvCallAnnotation.isEmpty()) {
        return inputBuilder.attribute(GATKSVVCFConstants.EXTERNAL_CNV_CALLS, cnvCallAnnotation);
    } else
        return inputBuilder;
}
 
Example 2
Source File: VariantEvalUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
public VariantContext ensureAnnotations(final VariantContext vc, final VariantContext vcsub) {
    final int originalAlleleCount = vc.getHetCount() + 2 * vc.getHomVarCount();
    final int newAlleleCount = vcsub.getHetCount() + 2 * vcsub.getHomVarCount();
    final boolean isSingleton = originalAlleleCount == newAlleleCount && newAlleleCount == 1;
    final boolean hasChrCountAnnotations = vcsub.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) &&
            vcsub.hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY) &&
            vcsub.hasAttribute(VCFConstants.ALLELE_NUMBER_KEY);

    if ( ! isSingleton && hasChrCountAnnotations ) {
        // nothing to update
        return vcsub;
    } else {
        // have to do the work
        VariantContextBuilder builder = new VariantContextBuilder(vcsub);

        if ( isSingleton )
            builder.attribute(VariantEval.IS_SINGLETON_KEY, true);

        if ( ! hasChrCountAnnotations )
            VariantContextUtils.calculateChromosomeCounts(builder, true);

        return builder.make();
    }
}
 
Example 3
Source File: CNNScoreVariants.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Override
protected void secondPassApply(VariantContext variant, ReadsContext readsContext, ReferenceContext referenceContext, FeatureContext featureContext) {
    String sv = scoreScan.nextLine();
    String[] scoredVariant = sv.split("\\t");

    if (variant.getContig().equals(scoredVariant[CONTIG_INDEX])
            && Integer.toString(variant.getStart()).equals(scoredVariant[POS_INDEX])
            && variant.getReference().getBaseString().equals(scoredVariant[REF_INDEX])
            && variant.getAlternateAlleles().toString().equals(scoredVariant[ALT_INDEX])) {

        final VariantContextBuilder builder = new VariantContextBuilder(variant);
        if (scoredVariant.length > KEY_INDEX) {
            builder.attribute(scoreKey, scoredVariant[KEY_INDEX]);
        }
        vcfWriter.add(builder.make());

    } else {
        String errorMsg = "Score file out of sync with original VCF. Score file has:" + sv;
        errorMsg += "\n But VCF has:" + variant.toStringWithoutGenotypes();
        throw new GATKException(errorMsg);
    }
}
 
Example 4
Source File: CpxVariantInterpreterUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test(groups = "sv")
public void testTurnIntoVariantContext() throws IOException {
    // test two contigs give the same variant and annotated accordingly
    final CpxVariantInducingAssemblyContig tig13846_3 = new CpxVariantInducingAssemblyContig(new AssemblyContigWithFineTunedAlignments(fromPrimarySAMRecordString("asm013846:tig00003\t0\tchr20\t54849615\t60\t192S49M2I55M1I50M1I246M\t*\t0\t0\tATATTTCTCAGAGGGTCTCTGGGGAGTTGATAGGCTTTGGATGTTTGCCTCCTCCAAATCTCATGTTGAAATGTAATCCCCAGTGTTGGAGGGGGGCAGATCCCTCATGAGTGGCTTGGTGCCCTTCCCATGGTAATGAGTGAGTTCTTGCTCTGTTAGTTCATGAGAGAGCTGATTGTTTAAAGGAGTCTGGCACCTCCTCTCTCTCCTTTCTTCCTCTCTCACCATGTGACACTCCTATCCCCCTTTGCCTTCTTGCCTGAGTAAAAGCTTCCTAAGGCCTCACCAGAAGCCGAGCAGATGCTGTTGCCATGCTTGTAGTCTGCAGAACCATAACCCAAATAAACCCAAGTTTTTATAAATTACCCAGCTTCAGGTATTCCTTGATAGCAACGCAAAATGGATTAACACCTAACCACAGGTGCCCACAGCTGGAACTTGCTCCTTGCCTTATGCTTTGTTGACATTTTTCCCTTCCCTGATTTGCTTTCTTCACTTTCCTCACTCTCTCATTGTGCTTCCTGGAATTATCTCCCAAATAATCAATCTGCACTTACATCCTTTTTATCTCAGGGTCTACTTTTGGAGATACCAAA\t*\tSA:Z:chr20,54849438,+,54M542H,60,0,54;chr15,68774173,+,92H63M441H,60,3,48;\tMD:Z:400\tRG:Z:GATKSVContigAlignments\tNM:i:4\tAS:i:348\tXS:i:0", true)), bareBoneHg38SAMSeqDict);;
    final CpxVariantInducingAssemblyContig tig28220_5 = new CpxVariantInducingAssemblyContig(new AssemblyContigWithFineTunedAlignments(fromPrimarySAMRecordString("asm028220:tig00005\t16\tchr20\t54849438\t60\t54M206S\t*\t0\t0\tATATTTCTCAGAGGGTCTCTGGGGAGTTGATAGGCTTTGGATGTTTGCCTCCTCCAAATCTCATGTTGAAATGTAATCCCCAGTGTTGGAGGGGGGCAGATCCCTCATGAGTGGCTTGGTGCCCTTCCCATGGTAATGAGTGAGTTCTTGCTCTGTTAGTTCATGAGAGAGCTGATTGTTTAAAGGAGTCTGGCACCTCCTCTCTCTCCTTTCTTCCTCTCTCACCATGTGACACTCCTATCCCCCTTTGCCTTCTTGCC\t*\tSA:Z:chr20,54849615,-,192H49M2I17M,60,2,52;chr15,68774173,-,92H63M105H,60,3,48;\tMD:Z:54\tRG:Z:GATKSVContigAlignments\tNM:i:0\tAS:i:54\tXS:i:0", true)), bareBoneHg38SAMSeqDict);
    final CpxVariantCanonicalRepresentation cpxVariantCanonicalRepresentation = new CpxVariantCanonicalRepresentation(tig13846_3);
    final Tuple2<CpxVariantCanonicalRepresentation, Iterable<CpxVariantInducingAssemblyContig>> tuple2 =
            new Tuple2<>(cpxVariantCanonicalRepresentation, Arrays.asList(tig13846_3, tig28220_5));
    final byte[] refBases = b38_reference_chr20_chr21.getReferenceBases(new SimpleInterval("chr20", 54849491, 54849491)).getBases();

    final VariantContextBuilder baseVariantContextBuilder = cpxVariantCanonicalRepresentation.toVariantContext(refBases);
    baseVariantContextBuilder.attribute(GATKSVVCFConstants.TOTAL_MAPPINGS, 2);
    baseVariantContextBuilder.attribute(GATKSVVCFConstants.CONTIG_NAMES, Arrays.asList(tig13846_3.getPreprocessedTig().getContigName(), tig28220_5.getPreprocessedTig().getContigName()));
    baseVariantContextBuilder.attribute(GATKSVVCFConstants.MAPPING_QUALITIES, Arrays.asList("60", "60"));
    baseVariantContextBuilder.attribute(GATKSVVCFConstants.HQ_MAPPINGS, "2");
    baseVariantContextBuilder.attribute(GATKSVVCFConstants.ALIGN_LENGTHS, Arrays.asList("54", "54"));
    baseVariantContextBuilder.attribute(GATKSVVCFConstants.MAX_ALIGN_LENGTH, "54");

    final VariantContext variantContext = CpxVariantInterpreter.turnIntoVariantContext(tuple2, b38_reference_chr20_chr21);
    VariantContextTestUtils.assertVariantContextsAreEqual(variantContext, baseVariantContextBuilder.make(), Collections.emptyList(), Collections.emptyList());
}
 
Example 5
Source File: FilterFuncotations.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Mark a variant as matching a set of Funcotation filters, or as matching no filters.
 */
private VariantContext applyFilters(final VariantContext variant, final Set<String> matchingFilters) {
    final VariantContextBuilder variantContextBuilder = new VariantContextBuilder(variant);
    final boolean isSignificant = !matchingFilters.isEmpty();

    // Mark the individual filters that make the variant significant, if any.
    final String clinicalSignificance = isSignificant ?
            String.join(FilterFuncotationsConstants.FILTER_DELIMITER, matchingFilters) :
            FilterFuncotationsConstants.CLINSIG_INFO_NOT_SIGNIFICANT;
    variantContextBuilder.attribute(FilterFuncotationsConstants.CLINSIG_INFO_KEY, clinicalSignificance);

    // Also set the filter field for insignificant variants, to make it easier for
    // downstream tools to extract out the interesting data.
    if (isSignificant) {
        variantContextBuilder.passFilters();
    } else {
        variantContextBuilder.filter(FilterFuncotationsConstants.NOT_CLINSIG_FILTER);
    }

    return variantContextBuilder.make();
}
 
Example 6
Source File: ExampleTwoPassVariantWalker.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Override
protected void secondPassApply(VariantContext variant, ReadsContext readsContext, ReferenceContext referenceContext, FeatureContext featureContext) {
    VariantContextBuilder vcb = new VariantContextBuilder(variant);
    final double qualByDepth = qualByDepths.get(counter++);
    final double distanceFromMean = Math.abs((qualByDepth - averageQualByDepth)/ Math.sqrt(sampleVarianceOfQDs));
    vcb.attribute(QD_DISTANCE_FROM_MEAN, distanceFromMean);
    vcb.attribute(COPY_OF_QD_KEY_NAME, qualByDepth);

    vcfWriter.add(vcb.make());
}
 
Example 7
Source File: AnnotatedVariantProducer.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Given novel adjacency and inferred variant types that should be linked together,
 * produce annotated, and linked VCF records.
 */
public static List<VariantContext> produceLinkedAssemblyBasedVariants(final Tuple2<SvType, SvType> linkedVariants,
                                                                      final SimpleNovelAdjacencyAndChimericAlignmentEvidence simpleNovelAdjacencyAndChimericAlignmentEvidence,
                                                                      final Broadcast<ReferenceMultiSparkSource> broadcastReference,
                                                                      final Broadcast<SAMSequenceDictionary> broadcastSequenceDictionary,
                                                                      final Broadcast<SVIntervalTree<VariantContext>> broadcastCNVCalls,
                                                                      final String sampleId,
                                                                      final String linkKey) {

    final VariantContext firstVar = produceAnnotatedVcFromAssemblyEvidence(linkedVariants._1, simpleNovelAdjacencyAndChimericAlignmentEvidence,
            broadcastReference, broadcastSequenceDictionary, broadcastCNVCalls, sampleId).make();
    final VariantContext secondVar = produceAnnotatedVcFromAssemblyEvidence(linkedVariants._2, simpleNovelAdjacencyAndChimericAlignmentEvidence,
            broadcastReference, broadcastSequenceDictionary, broadcastCNVCalls, sampleId).make();

    final VariantContextBuilder builder1 = new VariantContextBuilder(firstVar);
    builder1.attribute(linkKey, secondVar.getID());

    final VariantContextBuilder builder2 = new VariantContextBuilder(secondVar);
    builder2.attribute(linkKey, firstVar.getID());

    // manually remove inserted sequence information from RPL event-produced DEL, when it can be linked with an INS
    if (linkedVariants._1 instanceof SimpleSVType.Deletion)
        return Arrays.asList(builder1.rmAttribute(GATKSVVCFConstants.INSERTED_SEQUENCE)
                                     .rmAttribute(GATKSVVCFConstants.INSERTED_SEQUENCE_LENGTH)
                                     .rmAttribute(GATKSVVCFConstants.SEQ_ALT_HAPLOTYPE)
                                     .make(),
                             builder2.make());
    else if (linkedVariants._2 instanceof SimpleSVType.Deletion) {
        return Arrays.asList(builder1.make(),
                             builder2.rmAttribute(GATKSVVCFConstants.INSERTED_SEQUENCE)
                                     .rmAttribute(GATKSVVCFConstants.INSERTED_SEQUENCE_LENGTH)
                                     .rmAttribute(GATKSVVCFConstants.SEQ_ALT_HAPLOTYPE)
                                     .make());
    } else
        return Arrays.asList(builder1.make(), builder2.make());
}
 
Example 8
Source File: AnnotatedVariantProducer.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Produces a VC from a {@link NovelAdjacencyAndAltHaplotype}
 * (consensus among different assemblies if they all point to the same breakpoint).
 * @param inferredType                      inferred type of variant
 * @param simpleNovelAdjacencyAndChimericAlignmentEvidence novel adjacency and evidence simple chimera contig(s) that support this {@code novelAdjacencyAndAltHaplotype}
 * @param broadcastReference                broadcast reference
 * @param broadcastSequenceDictionary       broadcast reference sequence dictionary
 * @param broadcastCNVCalls                 broadcast of external CNV calls, if available, will be used for annotating the assembly based calls
 * @param sampleId                          sample identifier of the current sample
 */
public static VariantContextBuilder produceAnnotatedVcFromAssemblyEvidence(final SvType inferredType,
                                                                           final SimpleNovelAdjacencyAndChimericAlignmentEvidence simpleNovelAdjacencyAndChimericAlignmentEvidence,
                                                                           final Broadcast<ReferenceMultiSparkSource> broadcastReference,
                                                                           final Broadcast<SAMSequenceDictionary> broadcastSequenceDictionary,
                                                                           final Broadcast<SVIntervalTree<VariantContext>> broadcastCNVCalls,
                                                                           final String sampleId) {

    final NovelAdjacencyAndAltHaplotype novelAdjacencyAndAltHaplotype = simpleNovelAdjacencyAndChimericAlignmentEvidence.getNovelAdjacencyReferenceLocations();
    final List<SimpleChimera> contigEvidence = simpleNovelAdjacencyAndChimericAlignmentEvidence.getAlignmentEvidence();

    // basic information and attributes
    final VariantContextBuilder vcBuilder = inferredType.getBasicInformation();

    // attributes from complications
    novelAdjacencyAndAltHaplotype.getComplication().toVariantAttributes().forEach(vcBuilder::attribute);

    // evidence used for producing the novel adjacency
    getAssemblyEvidenceRelatedAnnotations(contigEvidence).forEach(vcBuilder::attribute);

    // alt seq for non-BND variants, and if available or not empty
    final byte[] altHaplotypeSequence = novelAdjacencyAndAltHaplotype.getAltHaplotypeSequence();
    if (inferredType instanceof BreakEndVariantType) {
        return annotateWithExternalCNVCalls(inferredType.getVariantChromosome(), inferredType.getVariantStart(), inferredType.getVariantStop(),
                vcBuilder, broadcastSequenceDictionary, broadcastCNVCalls, sampleId);
    } else if (altHaplotypeSequence != null && altHaplotypeSequence.length != 0)
        vcBuilder.attribute(GATKSVVCFConstants.SEQ_ALT_HAPLOTYPE, StringUtil.bytesToString(altHaplotypeSequence));

    return annotateWithExternalCNVCalls(inferredType.getVariantChromosome(), inferredType.getVariantStart(), inferredType.getVariantStop(),
            vcBuilder, broadcastSequenceDictionary, broadcastCNVCalls, sampleId);
}
 
Example 9
Source File: GATKToolUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private void writeHeaderAndBadVariant(final VariantContextWriter writer) {
    final VariantContextBuilder vcBuilder = new VariantContextBuilder(
            "chr1","1", 1, 1, Arrays.asList(Allele.create("A", true)));
    vcBuilder.attribute("fake", new Object());
    final VariantContext vc = vcBuilder.make();
    final VCFHeader vcfHeader = new VCFHeader();
    writer.writeHeader(vcfHeader);
    writer.add(vc);
}
 
Example 10
Source File: AnnotatedVariantProducer.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private static VariantContext annotateWithImpreciseEvidenceLinks(final VariantContext variant,
                                                                 final PairedStrandedIntervalTree<EvidenceTargetLink> evidenceTargetLinks,
                                                                 final SAMSequenceDictionary referenceSequenceDictionary,
                                                                 final ReadMetadata metadata,
                                                                 final int defaultUncertainty) {
    if (variant.getStructuralVariantType() == StructuralVariantType.DEL) {
        SVContext svc = SVContext.of(variant);
        final int padding = (metadata == null) ? defaultUncertainty : (metadata.getMaxMedianFragmentSize() / 2);
        PairedStrandedIntervals svcIntervals = svc.getPairedStrandedIntervals(metadata, referenceSequenceDictionary, padding);

        final Iterator<Tuple2<PairedStrandedIntervals, EvidenceTargetLink>> overlappers = evidenceTargetLinks.overlappers(svcIntervals);
        int readPairs = 0;
        int splitReads = 0;
        while (overlappers.hasNext()) {
            final Tuple2<PairedStrandedIntervals, EvidenceTargetLink> next = overlappers.next();
            readPairs += next._2.getReadPairs();
            splitReads += next._2.getSplitReads();
            overlappers.remove();
        }
        final VariantContextBuilder variantContextBuilder = new VariantContextBuilder(variant);
        if (readPairs > 0) {
            variantContextBuilder.attribute(GATKSVVCFConstants.READ_PAIR_SUPPORT, readPairs);
        }
        if (splitReads > 0) {
            variantContextBuilder.attribute(GATKSVVCFConstants.SPLIT_READ_SUPPORT, splitReads);
        }

        return variantContextBuilder.make();
    } else {
        return variant;
    }
}
 
Example 11
Source File: LiftoverUtilsTest.java    From picard with MIT License 4 votes vote down vote up
@DataProvider
public Object[][] swapRefAltData() {

    String testName = "test1";

    final List<Object[]> tests = new ArrayList<>();

    VariantContextBuilder builder = new VariantContextBuilder(testName, "chr1", 2, 2, Arrays.asList(RefA, C));
    VariantContextBuilder resultBuilder = new VariantContextBuilder(testName, "chr1", 2, 2, Arrays.asList(RefC, A));
    resultBuilder.attribute("SwappedAlleles", true);

    tests.add(new Object[]{builder.make(), resultBuilder.make()});

    testName = "test2";
    builder.source(testName);
    builder.attribute(AF, .2);

    resultBuilder.source(testName);
    resultBuilder.attribute(AF, .8);
    tests.add(new Object[]{builder.make(), resultBuilder.make()});

    testName = "test3";
    builder.source(testName);
    builder.attribute(AF, ".");

    resultBuilder.source(testName);
    resultBuilder.attribute(AF, ".");
    tests.add(new Object[]{builder.make(), resultBuilder.make()});

    testName = "test4";
    builder.source(testName);
    builder.attribute(AF, 1);

    resultBuilder.source(testName);
    resultBuilder.attribute(AF, 0D);
    tests.add(new Object[]{builder.make(), resultBuilder.make()});

    testName = "test5";
    builder.source(testName);
    builder.attribute(MAX_AF, 1);

    resultBuilder.source(testName);
    tests.add(new Object[]{builder.make(), resultBuilder.make()});

    testName = "test5";
    builder.source(testName);
    builder.attribute("AF_MAX_shouldnt_be_dropped", 1);

    resultBuilder.source(testName);
    resultBuilder.attribute("AF_MAX_shouldnt_be_dropped", 1);
    tests.add(new Object[]{builder.make(), resultBuilder.make()});

    return tests.toArray(new Object[0][]);
}
 
Example 12
Source File: TestUtilsCpxVariantInference.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
private static PreprocessedAndAnalysisReadyContigWithExpectedResults buildForCaseWithZeroSegment(final Set<String> canonicalChromosomes,
                                                                                                 final SAMSequenceDictionary refSeqDict) {
    final AssemblyContigWithFineTunedAlignments
            analysisReadyContig = makeContigAnalysisReadyForCpxBranch("asm028558:tig00002\t0\tchr20\t51740561\t60\t1653S1369M\t*\t0\t0\tTTCTCCTCCCTCAGCCTCCCGAGTAGCTGGGACTACAGGCACCCGCCACCATGCCCGGCTAATTTTTTGTGTTTTTTTTAGCAGAGACAGGGTTTCACCTTGTTAGCCAGGATGGTCTCGATCTCCTGACCTCACCTACACTCACTCTTTAATCATGGTTGTAAAAGTCAATTTCCTCCATCTTCTTTCCCAGGGGTTTAGCTGAGCCCAATCCCATCTCTGAATTTCAGGCATGATGTGCGCAGGCTCCAGGCTTAGCCAGGGAGAGCCCTGGATGTTCTGGACATGGTGACAGGCTTGAGAAGGGACAGTGAGACCCCTGGCTGGGATTTTGCTAGAAATACTTGGAAAGATGTGTTCTTCTCCCTGGGGTTGCTAAGGTAGCAGAAAACATGCCTGGGCAGGGGACAGACGTCCTGTTCCCATTGAGGGACTAGTCGGCCTGAAAATGAAGCCCCCAAGGAGGACAGCAGAGACAAGAAGTTCCCGATAGCCTTGAGAGACTGCAAAGATTCACCCACCCCTGGACTTTCAGTCTCAGAGTCAATAAAATAGCTTTTGTTGTACTTTACTTTGCTCAAGCCAGTTTACTTTGTTTCACTGTTATTTATTTATTTAGAGACAGGGTCTCACTCTGTTACCCAGGCTGCAGTGCAGTGGCGAGATCATAGCTCTCTGGAGCCTCAACCTCCTGGACTCAAGCAATCCTCCCACCTCAGCCTCCCGAGTAGCTGGGACTACAGGCATGCACCACCACACCTGGCATATTTTATAATTTTTTGTAGAGTCAGGGGTCTCCCTACATAGCCCAGGTTGGTCTCAAACTCCTGGGCTCAAGCGATCTGCCCACCTCAGCCTCCCAAAGTGCTGGGGTTACAGGTATGAGCCACCACGCCCCAGCCAACACAAACCAATTTTAGTTGGGAGTCTGTCAGCCACTATCTAAAATCTTTTAAGATTCCTGTCTGGCAGGCCAGGCGCGGTGGCTCACACCTGTATTCCCAGCACTTTGGGAGGTCAAGGCGGGTGGATTACCTGAGGTCAGGAGTTCAAGACCAGCCTGACCAACATAGTGAAACCCCGTCTCTACAAAAAATACAAAAAATTAGGCTGGGCACAGTGGCTCACACCTGTAATCCCAGCACTTTGGGAGGCCAAGGCAGGTGGATCACCTGAGGTCGGGAGTTCAAGACCAGCCTAACCAACATGAGGAAACCCCGTCTCTACTAAAAATACAAAATTAGATGGGCGTGGTGGCGCATGCCTGTAATTCAAACTACTTGGAAGGCTGAGGCAGGAGAATTGCTTGAACCCAGGAGACAGAGGTTGTGGTAAGCCAAGATCATGCCATTGTACTCCAGCATGGGCAACAAGAGTGAGACTCCATCTCAAAAAAAAAAAAAATTAGCCAGGCGTGGTGGTGGGCACCTGTAATCCCAGCTACCCTGGAGACTGAGGCAGAAGAATCGCTTGAACCCAGGAGGCGGAGATTGCAGTGAGCCAAGATTACGCCACTGCACTCCAGCCTGGGCACCAAGAGCAAAACCCTGTCTCAAAAAAATTAACAAATAAAAAGATTTCTGTCTGCCACACGGCTGGTCCATGTGTAAAGACACATTCCTGTTGGTTTTATGTGTCTTGAATTCTAATGGGTTTTGTGTTGTTGTTTTTGTTTTTTGAGACAAGGTCTCATTCTGTCACCCAGGCAGGACTGTGGTGGCACCATCATGGCTCAGCGCAGCCTCCTTTTCCCCAGGCTCAAGTGATCCTCTTGCCTCAGCCTCCCACGTGGCTGGGACTACAGGTGTGTACCACCACTCCCGGATAATTTTTTTTATTTTTTATTTTTAGTAAAGACAGTCTCACTATGTTGCCCAGGCTGGTCTCCAACTCCTGGTCTCAAGCAATCCTCCCAGTTCAGCCTCTCAAAGTGCTGGGATTACAGATGTGAGCCACAATACCCGGCCCCAATTCTAATGTTTAAAGAGTACAGTCTACACCTTAAAGCCTGCATTTTATCATCCTGTCCTCACTGCTCTGACTTCTTTACAGTTGTGCTGTCCACCTTGGCGGCTTCTACCACATGTGGCTATTTTAAGTTTCAATTAATTAAAATTAAATTTTAATTTAATTAATTAAAAATAAATTTTAATTAATTAATTAAAAATTAAAAATTCCTTTTCCCTGGCCACACTGAGTGCTCCATAGCTACATGTGACCACTAACTACGGACTAGACTGTGCAGACACAGAACATTCCATCATGGCTGAGAGTTCTACTGGGCAACACTGCTGGAGAGCATCCCTTTCAAGACACCTCCTTCCACCTTACTCTTGAACTGATGTCAATGGCAAAGGAAGCACACACGAACATTAGTAAGTGGAGCTCACAGGCCAGGCGCGGTGGCTCACGCCTGTAATCCCAGTACTTTGGGAGGCCGAGGCAGGCGGATCACGAGGTCAGGGGATCGAGACCATCTGACCAACATGGAGAAACCCCATCTCTACTAAAAATACAAAATTAGCCAGGCGTGGTGGCGCATGCCTGTAATCTCAGCTACTCAGGGGGCTGACACAGGACAATCGCTTGAACCTGGGAGGCTGAGGTTGCAGTGAGCCGAGATCACACCATTGCACTCTAGCCTGGACAATAAGAGCAAAACTCCGTCTCAAAAAATAAAAATAAAAATAAAAAAGGAAGTGGACCTCACAATGGAGGGAAACTCGCCTGAACAGAGTTTCCCAAACTTCAGCCATTTTCATACACACATGGGATTTCTATGATCTCCAAATACCTCTAGCTTCTACCTACACAATTTTGTCTAAATTTGATCACCTTACATCAGTGGTTCTCAACTACAGGCAATTTTGCACCCCCAGGGTGGGTATCTGGCAATATCTGGAAACACTGTTGTCTGTCACAACTCCAAGATGCTACTCTCATCTAGTGGATGGAGGCAGCCAAGGATGCTGCGAGACACCTTACAATGCACAGACAACCTCCACAACAGAGAAGTATCCAG\t*\tSA:Z:chr20,51739457,+,1104M1918H,60,1,1099;chr14,82163062,+,1129H275M1618H,5,25,150;chr19,10120394,+,1390H141M1491H,0,11,86;chr18,11642876,-,1866H56M1100H,60,0,56;\tMD:Z:1369\tRG:Z:GATKSVContigAlignments\tNM:i:0\tAS:i:1369\tXS:i:67",
            canonicalChromosomes, refSeqDict);

    final CpxVariantInducingAssemblyContig.BasicInfo manuallyCalculatedBasicInfo =
            new CpxVariantInducingAssemblyContig.BasicInfo("chr20", true,
                    new SimpleInterval("chr20", 51739457, 51739457), new SimpleInterval("chr20", 51741929, 51741929));

    //////////
    final List<CpxVariantInducingAssemblyContig.Jump> manuallyCalculatedJumps =
            Arrays.asList(
                    new CpxVariantInducingAssemblyContig.Jump(new SimpleInterval("chr20", 51740560, 51740560), new SimpleInterval("chr18", 11642927, 11642927), StrandSwitch.FORWARD_TO_REVERSE, 0),
                    new CpxVariantInducingAssemblyContig.Jump(new SimpleInterval("chr18", 11642876, 11642876), new SimpleInterval("chr20", 51740561, 51740561), StrandSwitch.REVERSE_TO_FORWARD, 497)
            );
    final List<SimpleInterval> manuallyCalculatedEventPrimaryChromosomeSegmentingLocations =
            Arrays.asList(
                    new SimpleInterval("chr20", 51740560, 51740560),
                    new SimpleInterval("chr20", 51740561, 51740561)
            );
    final CpxVariantInducingAssemblyContig manuallyCalculatedCpxVariantInducingAssemblyContig =
            new CpxVariantInducingAssemblyContig(analysisReadyContig,
                    manuallyCalculatedBasicInfo,
                    manuallyCalculatedJumps,
                    manuallyCalculatedEventPrimaryChromosomeSegmentingLocations);

    //////////
    final SimpleInterval manuallyCalculatedAffectedRefRegion =
            new SimpleInterval("chr20", 51740560, 51740561);
    final List<SimpleInterval> manuallyCalculatedSegments = Collections.emptyList();
    final List<String> manuallyCalculatedAltArrangementDescription =
            Arrays.asList("-chr18:11642876-11642927","UINS-497");
    final byte[] manuallyCalculatedAltSeq = "AATTAGGCTGGGCACAGTGGCTCACACCTGTAATCCCAGCACTTTGGGAGGCCAAGGCAGGTGGATCACCTGAGGTCGGGAGTTCAAGACCAGCCTAACCAACATGAGGAAACCCCGTCTCTACTAAAAATACAAAATTAGATGGGCGTGGTGGCGCATGCCTGTAATTCAAACTACTTGGAAGGCTGAGGCAGGAGAATTGCTTGAACCCAGGAGACAGAGGTTGTGGTAAGCCAAGATCATGCCATTGTACTCCAGCATGGGCAACAAGAGTGAGACTCCATCTCAAAAAAAAAAAAAATTAGCCAGGCGTGGTGGTGGGCACCTGTAATCCCAGCTACCCTGGAGACTGAGGCAGAAGAATCGCTTGAACCCAGGAGGCGGAGATTGCAGTGAGCCAAGATTACGCCACTGCACTCCAGCCTGGGCACCAAGAGCAAAACCCTGTCTCAAAAAAATTAACAAATAAAAAGATTTCTGTCTGCCACACGGCTGGTCCATGTGTAAAGACACATTCCTGTTGGTTTTATGTGTCTTGAATTCTAATGGGT".getBytes();
    final CpxVariantCanonicalRepresentation manuallyCalculatedCpxVariantCanonicalRepresentation =
            new CpxVariantCanonicalRepresentation(
                    manuallyCalculatedAffectedRefRegion,
                    manuallyCalculatedSegments,
                    manuallyCalculatedAltArrangementDescription,
                    manuallyCalculatedAltSeq);

    //////////
    final byte[] dummyRefSequence = "T".getBytes();
    final VariantContextBuilder variantContextBuilder = new VariantContextBuilder()
            .chr("chr20").start(51740560).stop(51740561)
            .alleles(Arrays.asList(Allele.create(dummyRefSequence, true), Allele.create(SimpleSVType.createBracketedSymbAlleleString(CPX_SV_SYB_ALT_ALLELE_STR), false)))
            .id("CPX_chr20:51740560-51740561")
            .attribute(VCFConstants.END_KEY, 51740561)
            .attribute(SVTYPE, GATKSVVCFConstants.CPX_SV_SYB_ALT_ALLELE_STR)
            .attribute(SVLEN, 549)
            .attribute(SEQ_ALT_HAPLOTYPE, new String(manuallyCalculatedAltSeq));
    variantContextBuilder.attribute(CPX_EVENT_ALT_ARRANGEMENTS,
            String.join(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR, manuallyCalculatedAltArrangementDescription));
    final VariantContext manuallyCalculatedVariantContext = variantContextBuilder.make();

    //////////
    final Set<SimpleInterval> manuallyCalculatedTwoBaseBoundaries = new HashSet<>();
    manuallyCalculatedTwoBaseBoundaries.add(new SimpleInterval("chr20", 51739457, 51739458));
    manuallyCalculatedTwoBaseBoundaries.add(new SimpleInterval("chr20", 51740559, 51740560));
    manuallyCalculatedTwoBaseBoundaries.add(new SimpleInterval("chr20", 51740561, 51740562));
    manuallyCalculatedTwoBaseBoundaries.add(new SimpleInterval("chr20", 51741928, 51741929));

    //////////
    return new PreprocessedAndAnalysisReadyContigWithExpectedResults(
            manuallyCalculatedCpxVariantInducingAssemblyContig,
            manuallyCalculatedCpxVariantCanonicalRepresentation,
            manuallyCalculatedVariantContext,
            dummyRefSequence,
            manuallyCalculatedTwoBaseBoundaries);
}
 
Example 13
Source File: TestUtilsCpxVariantInference.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
private static PreprocessedAndAnalysisReadyContigWithExpectedResults buildForContig_22672_4(final Set<String> canonicalChromosomes,
                                                                                            final SAMSequenceDictionary refSeqDict) {
    final AssemblyContigWithFineTunedAlignments
            analysisReadyContig = makeContigAnalysisReadyForCpxBranch("asm022672:tig00004\t16\tchr9\t130954964\t60\t1631S192M60I99M24I54M156I1430M\t*\t0\t0\tTCATCCAGGTTGGAGTGCAGTGGTGCTATCTCAGCTCACTGCAGCCTCCACCCCTGGATTGAATCGATTCTCCTGCCTCAGCCTCCTGAGTAGCTGGGATTACAGGAGTGCACCACCACGCCCGGCTCATGTTTGTGTTTTTAGTAGAGACAGGGTTTCACCACATTGGCCAGGCTGGTCTCAAACTCCTGACCTCAAGTGATCTGGCTGCCTTGGCCTCCCAAAGTGCTGGGATTATAGGTGTGAGCCACCACACCCAGCCAGAGTGGGTAGTTTTTAAAACCACCACAATTGCCCGCCAGGGGCACAAAGAGGACTCATGGGGATGGGTCTGATAAGTGCTGAGCCCAGTGCTGGGCACCTGCAGGCACTCAGTAGGTGGTGGACACTTGTTTTATTATGATTATCCAGCACCTAGCACCCAGTCTACCCCAAATAGCACCATCAACATATCTTCCTGAATCCTGCCTCCCTCATTTCAGCCTTTGCTCTCAGGCAGCCAGGGGTTCCCCATTTTCAACAAGAAAAAGCCCAAACCCTTTTGCTTGGCAGTCAACCCCACCCCATCCAAACCAATCCCTCCTTTCTCTGCTGAATGTTTCCCCTGTGCCAGCCACTTCCTTCCCACCACCACACCCTTAGCCAGGCTATCACCCACCAAGAATCCTGCCCCATCTTGAGCCTCTGCCTTCATTTGAAGCCTATCATCCCCTGTGCCTCAGTATTTTCCTTCAGCTCTAGATTCTTGCAGTGCTCTAGCCATTTACACAGCCATCAAATTCCAGGCCTCTTCTGTCACTCACCGGTTCACCTCTGTCATCTTGTCTCCTGCCCAAGACTGCATCTCCTCTTGTCCCTTCTCTGAGAGGCCTGTGGTTGAGCACAGGGATGAATGAGAAAGAAACCCAGCCTGTCCTCAAGAAGTTGACAATGTGTCCCTCAAAGACTGGGCTCATTGCTGCTACCTCTGGGAGTCCCAATGTGGAGGAGAGCTCTCAGTGGGTGTGCAGAAAATCTTGTCAGAAGTTACTGTCCCTGGGCAGGGCTAATACCACCAGTATCACCATTATCATCATCACCACTATCGTCATCATCACCACCATCACCATCATTACCACCATCATCACCATCAATATCGACACCATCCTCATCATCATCATCACCACCATCATCAGCATCATCATCAGCAGCAGCACCACCATTACCATCATTATCCCTACCATCTTCATCACCACCATCACCATCACCATCACCATCATCATCACCACCATCACCATCATCACCACCACCATCATCATCACCACCATCACCACCATCATCACCATCATCATCACCATCATCACCACCATCATCACCATCATCATCATCAGCACCACCACCACTATTACCATCATTATCACTACCATCCTCATCACCACCATCACTATCACCATCACCATCATCATCACCATCATCACCATCATCATCACCATCACCATCATCACCATCACCATCACCATCATCACCATCATCATCATCACCATCACCATCATCATCACCATCATCACCACCATCATCACCATCATCATCATCAGCACCACCACCACTATTACCATCATTATCCCTACCATCTTCATCACCACCATCACCATCACCATCACCATCATCATCACCACCATCACCATCACCATCACCATCATCATCACCATCATCACCACCATCATCACCATCATCATCATCAGCACCACCACCACTATTACCATCATTATCCCTACCATCCTCATCACCACCATCACCATCACCATCACCATCATCATCACCATCACCATCATCACCATCACCATCATCAACATCACCATCACCATCATCACCATCATCATCATCACCATCACCATCATCATCACCATCATCACCACCATCATCACCATCATCAGCAGCAGCAGCACCACCACCACTATTACCATCATTATCCCTACCATCCTCATCACCACCATCACCATCACCATCATCACCATCACCATCACCATCATCACCATCACCATCATCATCATCACCACCATCATCACCATCATCATCAGCAGCAGCACCACCACCACTATTACCATCATTATTCCTACCATCCTCATCACCACCCTCACCATCACCATCATCATCATGACCATCATCATCAGCACCACCGTTATCATCATTATCCCCACCGTCCTCATCACCATCATCACCATCACCATCATCATTGTCACCATCACCATGATCATCGTCACCATCTTTATCCCCACCATCCTCATCACCATCATCACTACCATCAGCACCATCACCATCATCACCATTACCACCATTATCACCACCATCATCATCACCAACACCATCCTCAGCACTACCACCACCATCACATTCTCATTCATCCCTGTGCTCAACCACAGGCCTCTCAGAGAAGGGACAAGAGGAGATGCAGTCTTGGGCAGGAGACCACTTTTGGGGGCAAAAGCAGCTCCTGAAACCACACCCCAAAGCAGGCTCCACTCCATTCCATCAGACCCTGCAGTCAGGAAGGGCCGTGGGGTGTCCTGGCCTTCATCTGTGAGAACTGCCTTACCCATGCTGATTTCCACCCACATGGCATCAGAGGACTCCGTGCCCTGACACTACAATCCTTGTCCCCTTGTGTAGCCACCTCAGGGTCCAGCACCAACTGTCCTGTCTACCTGGAGCATAACACCATGAGGTCCCCAGCTCCTGGCTCTCCCCTGGGGGCTTGCCATGACCCGTTGGCCTGAGCTTTGGGGTGTCAGAAGCCCCCATGGAATGCACTGCTAAGACAAGCCCATCCTGCCAGCCTTCCTACCTCTCATAGCTGACCCTGCTGGGTTCTATGTTACAAATTCCCCTGCCCCAGCACCACTCTGTGACCTGCAGTGAGCGCAGCTGTCACTCCCCTCAAGGGTGCCCAGGCAATAGGGAGATGTGAGGACTGTGGGGGCATAAGAGTTGTCTCAGGGCTGTGCAGAGGAAGCCAGGGCCCCCCTAAATATCTCCAGCTCATCGCACCAGCTCCATTCTGCCCGGAGCCCCATGTCTGAGCCCTCAGCTCAGAGCAGACAACCGTGCTAATCCTGTCCCAGGCTGGTGGCCGCCCCCACTGAGGAGGGGGAGCAAGCTGCCCAGAAAGCTGGTGTGGAGTGCCCGTTAGCTGTGGATGGCATCGTGGTGCCACCGGGAGATTATCCACACGCAGCTAGTTCCCAGCAGCCGACCCCAGTGCCCCAAAAGAAGGCAGCTAGCTATTAAGGAGATTCATGGGCAGGGGTGAGGCGAGGAGGAAGGCTAATCAAGCTGTCCTGATTGTAATTGTCCGAAGGTGCTGGCCTCTCTCGTAAACATGCCAACTGCAGGCCCTGCTTCTGCGTCTCAGCAGGACTTCTCCCTGCTGGGACCGTGCTGGGGAATGTTCTTTCAACATAACTCTATTTAAATTCACATTTCCATCATCCCCCAGAGGAGCCGAGAGAGCTGAGCTGCGTGGTATAAGCCGGGAAAGGATTAGATGGGGTGGGTGTTATTTTTTTTCCTTTTATTTTCCCTTAGTGATGGAGATGGGGTTGTGGGGGGTGGGCCATTCTAGAATTCTGCAGTATTGGAACTGGAAGAGCTGTTACAAACCATCCAGCTC\t*\tSA:Z:chr9,130953867,-,1274M42I167M2163H,60,55,1318;chr9,130955093,-,1469H33M3I179M1962H,19,14,138;\tMD:Z:15T1C5T11A0T3G0A2C4C3T8T5C5T2G8G5G2G6C24T45T2C23T5C14T46T33T5C32T1433\tRG:Z:GATKSVContigAlignments\tNM:i:268\tAS:i:1347\tXS:i:176",
            canonicalChromosomes, refSeqDict);

    final CpxVariantInducingAssemblyContig.BasicInfo manuallyCalculatedBasicInfo =
            new CpxVariantInducingAssemblyContig.BasicInfo("chr9", false,
                    new SimpleInterval("chr9", 130953867, 130953867), new SimpleInterval("chr9", 130956738, 130956738));

    //////////
    final List<CpxVariantInducingAssemblyContig.Jump> manuallyCalculatedJumps =
            Arrays.asList(
                    new CpxVariantInducingAssemblyContig.Jump(new SimpleInterval("chr9", 130955309, 130955309), new SimpleInterval("chr9", 130955308, 130955308), StrandSwitch.NO_SWITCH, 156),
                    new CpxVariantInducingAssemblyContig.Jump(new SimpleInterval("chr9", 130955156, 130955156), new SimpleInterval("chr9", 130955155, 130955155), StrandSwitch.NO_SWITCH, 60),
                    new CpxVariantInducingAssemblyContig.Jump(new SimpleInterval("chr9", 130954964, 130954964), new SimpleInterval("chr9", 130955307, 130955307), StrandSwitch.NO_SWITCH, 148)
            );
    final List<SimpleInterval> manuallyCalculatedEventPrimaryChromosomeSegmentingLocations =
            Arrays.asList(
                    new SimpleInterval("chr9", 130954964, 130954964),
                    new SimpleInterval("chr9", 130955155, 130955155),
                    new SimpleInterval("chr9", 130955156, 130955156),
                    new SimpleInterval("chr9", 130955307, 130955307),
                    new SimpleInterval("chr9", 130955308, 130955308),
                    new SimpleInterval("chr9", 130955309, 130955309)
            );
    final CpxVariantInducingAssemblyContig manuallyCalculatedCpxVariantInducingAssemblyContig =
            new CpxVariantInducingAssemblyContig(analysisReadyContig,
                    manuallyCalculatedBasicInfo,
                    manuallyCalculatedJumps,
                    manuallyCalculatedEventPrimaryChromosomeSegmentingLocations);

    //////////
    final SimpleInterval manuallyCalculatedAffectedRefRegion =
            new SimpleInterval("chr9", 130954964, 130955309);
    final List<SimpleInterval> manuallyCalculatedSegments =
            Arrays.asList(
                    new SimpleInterval("chr9", 130954964, 130955155),
                    new SimpleInterval("chr9", 130955156, 130955307),
                    new SimpleInterval("chr9", 130955307, 130955308));
    final List<String> manuallyCalculatedAltArrangementDescription =
            Arrays.asList("1", "2", "UINS-148", "1", "UINS-60", "2", "3", "UINS-156");
    final byte[] manuallyCalculatedAltSeq = Arrays.copyOfRange(analysisReadyContig.getContigSequence(), 1429, 2549);
    SequenceUtil.reverseComplement(manuallyCalculatedAltSeq);
    final CpxVariantCanonicalRepresentation manuallyCalculatedCpxVariantCanonicalRepresentation =
            new CpxVariantCanonicalRepresentation(
                    manuallyCalculatedAffectedRefRegion,
                    manuallyCalculatedSegments,
                    manuallyCalculatedAltArrangementDescription,
                    manuallyCalculatedAltSeq);

    //////////
    final byte[] dummyRefSequence = "T".getBytes();
    final VariantContextBuilder variantContextBuilder = new VariantContextBuilder()
            .chr("chr9").start(130954964).stop(130955309)
            .alleles(Arrays.asList(Allele.create(dummyRefSequence, true), Allele.create(SimpleSVType.createBracketedSymbAlleleString(CPX_SV_SYB_ALT_ALLELE_STR), false)))
            .id("CPX_chr9:130954964-130955309")
            .attribute(VCFConstants.END_KEY, 130955309)
            .attribute(SVTYPE, GATKSVVCFConstants.CPX_SV_SYB_ALT_ALLELE_STR)
            .attribute(SVLEN, 774)
            .attribute(SEQ_ALT_HAPLOTYPE, new String(manuallyCalculatedAltSeq));
    variantContextBuilder.attribute(CPX_EVENT_ALT_ARRANGEMENTS,
            String.join(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR, manuallyCalculatedAltArrangementDescription));
    variantContextBuilder.attribute(CPX_SV_REF_SEGMENTS,
            String.join(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR, manuallyCalculatedSegments.stream().map(SimpleInterval::toString).collect(Collectors.toList())));
    final VariantContext manuallyCalculatedVariantContext = variantContextBuilder.make();

    //////////
    final Set<SimpleInterval> manuallyCalculatedTwoBaseBoundaries = new HashSet<>();
    manuallyCalculatedTwoBaseBoundaries.add(new SimpleInterval("chr9", 130955309, 130955310));
    manuallyCalculatedTwoBaseBoundaries.add(new SimpleInterval("chr9", 130956737, 130956738));
    manuallyCalculatedTwoBaseBoundaries.add(new SimpleInterval("chr9", 130955156, 130955157));
    manuallyCalculatedTwoBaseBoundaries.add(new SimpleInterval("chr9", 130955307, 130955308));
    manuallyCalculatedTwoBaseBoundaries.add(new SimpleInterval("chr9", 130954964, 130954965));
    manuallyCalculatedTwoBaseBoundaries.add(new SimpleInterval("chr9", 130955154, 130955155));
    manuallyCalculatedTwoBaseBoundaries.add(new SimpleInterval("chr9", 130953867, 130953868));
    manuallyCalculatedTwoBaseBoundaries.add(new SimpleInterval("chr9", 130955306, 130955307));

    //////////
    return new PreprocessedAndAnalysisReadyContigWithExpectedResults(
            manuallyCalculatedCpxVariantInducingAssemblyContig,
            manuallyCalculatedCpxVariantCanonicalRepresentation,
            manuallyCalculatedVariantContext,
            dummyRefSequence,
            manuallyCalculatedTwoBaseBoundaries);
}
 
Example 14
Source File: ApplyVQSR.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
/**
 * Calculate the allele-specific filter status of vc
 * @param vc
 * @param recals
 * @param builder   is modified by adding attributes
 * @return a String with the filter status for this site
 */
private String doAlleleSpecificFiltering(final VariantContext vc, final List<VariantContext> recals, final VariantContextBuilder builder) {
    double bestLod = VariantRecalibratorEngine.MIN_ACCEPTABLE_LOD_SCORE;
    final List<String> culpritStrings = new ArrayList<>();
    final List<String> lodStrings = new ArrayList<>();
    final List<String> AS_filterStrings = new ArrayList<>();

    String[] prevCulpritList = null;
    String[] prevLodList = null;
    String[] prevASfiltersList = null;

    //get VQSR annotations from previous run of ApplyRecalibration, if applicable
    if(foundINDELTranches || foundSNPTranches) {
        final String prevCulprits = vc.getAttributeAsString(GATKVCFConstants.AS_CULPRIT_KEY,"");
        prevCulpritList = prevCulprits.isEmpty()? new String[0] : prevCulprits.split(listPrintSeparator);
        final String prevLodString = vc.getAttributeAsString(GATKVCFConstants.AS_VQS_LOD_KEY,"");
        prevLodList = prevLodString.isEmpty()? new String[0] : prevLodString.split(listPrintSeparator);
        final String prevASfilters = vc.getAttributeAsString(GATKVCFConstants.AS_FILTER_STATUS_KEY,"");
        prevASfiltersList = prevASfilters.isEmpty()? new String[0] : prevASfilters.split(listPrintSeparator);
    }

    //for each allele in the current VariantContext
    for (int altIndex = 0; altIndex < vc.getNAlleles()-1; altIndex++) {
        final Allele allele = vc.getAlternateAllele(altIndex);

        //if the current allele is not part of this recalibration mode, add its annotations to the list and go to the next allele
        if (!VariantDataManager.checkVariationClass(vc, allele, MODE)) {
            updateAnnotationsWithoutRecalibrating(altIndex, prevCulpritList, prevLodList, prevASfiltersList, culpritStrings, lodStrings, AS_filterStrings);
            continue;
        }

        //if the current allele does need to have recalibration applied...

        //initialize allele-specific VQSR annotation data with values for spanning deletion
        String alleleLodString = emptyFloatValue;
        String alleleFilterString = emptyStringValue;
        String alleleCulpritString = emptyStringValue;

        //if it's not a spanning deletion, replace those allele strings with the real values
        if (!GATKVCFConstants.isSpanningDeletion(allele)) {
            VariantContext recalDatum = getMatchingRecalVC(vc, recals, allele);
            if (recalDatum == null) {
                throw new UserException("Encountered input allele which isn't found in the input recal file. Please make sure VariantRecalibrator and ApplyRecalibration were run on the same set of input variants with flag -AS. First seen at: " + vc);
            }

            //compare VQSLODs for all alleles in the current mode for filtering later
            final double lod = recalDatum.getAttributeAsDouble(GATKVCFConstants.VQS_LOD_KEY, VariantRecalibratorEngine.MIN_ACCEPTABLE_LOD_SCORE);
            if (lod > bestLod)
                bestLod = lod;

            alleleLodString = String.format("%.4f", lod);
            alleleFilterString = generateFilterString(lod);
            alleleCulpritString = recalDatum.getAttributeAsString(GATKVCFConstants.CULPRIT_KEY, ".");

            if(recalDatum != null) {
                if (recalDatum.hasAttribute(GATKVCFConstants.POSITIVE_LABEL_KEY))
                    builder.attribute(GATKVCFConstants.POSITIVE_LABEL_KEY, true);
                if (recalDatum.hasAttribute(GATKVCFConstants.NEGATIVE_LABEL_KEY))
                    builder.attribute(GATKVCFConstants.NEGATIVE_LABEL_KEY, true);
            }
        }

        //append per-allele VQSR annotations
        lodStrings.add(alleleLodString);
        AS_filterStrings.add(alleleFilterString);
        culpritStrings.add(alleleCulpritString);
    }

    // Annotate the new record with its VQSLOD, AS_FilterStatus, and the worst performing annotation
    if(!AS_filterStrings.isEmpty() )
        builder.attribute(GATKVCFConstants.AS_FILTER_STATUS_KEY, AnnotationUtils.encodeStringList(AS_filterStrings));
    if(!lodStrings.isEmpty())
        builder.attribute(GATKVCFConstants.AS_VQS_LOD_KEY, AnnotationUtils.encodeStringList(lodStrings));
    if(!culpritStrings.isEmpty())
        builder.attribute(GATKVCFConstants.AS_CULPRIT_KEY, AnnotationUtils.encodeStringList(culpritStrings));

    return generateFilterStringFromAlleles(vc, bestLod);
}
 
Example 15
Source File: LiftoverUtils.java    From picard with MIT License 4 votes vote down vote up
/**
 * method to swap the reference and alt alleles of a bi-allelic, SNP
 *
 * @param vc                   the {@link VariantContext} (bi-allelic SNP) that needs to have it's REF and ALT alleles swapped.
 * @param annotationsToReverse INFO field annotations (of double value) that will be reversed (x->1-x)
 * @param annotationsToDrop    INFO field annotations that will be dropped from the result since they are invalid when REF and ALT are swapped
 * @return a new {@link VariantContext} with alleles swapped, INFO fields modified and in the genotypes, GT, AD and PL corrected appropriately
 */
public static VariantContext swapRefAlt(final VariantContext vc, final Collection<String> annotationsToReverse, final Collection<String> annotationsToDrop) {

    if (!vc.isBiallelic() || !vc.isSNP()) {
        throw new IllegalArgumentException("swapRefAlt can only process biallelic, SNPS, found " + vc.toString());
    }

    final VariantContextBuilder swappedBuilder = new VariantContextBuilder(vc);

    swappedBuilder.attribute(SWAPPED_ALLELES, true);

    // Use getBaseString() (rather than the Allele itself) in order to create new Alleles with swapped
    // reference and non-variant attributes
    swappedBuilder.alleles(Arrays.asList(vc.getAlleles().get(1).getBaseString(), vc.getAlleles().get(0).getBaseString()));

    final Map<Allele, Allele> alleleMap = new HashMap<>();

    // A mapping from the old allele to the new allele, to be used when fixing the genotypes
    alleleMap.put(vc.getAlleles().get(0), swappedBuilder.getAlleles().get(1));
    alleleMap.put(vc.getAlleles().get(1), swappedBuilder.getAlleles().get(0));

    final GenotypesContext swappedGenotypes = GenotypesContext.create(vc.getGenotypes().size());
    for (final Genotype genotype : vc.getGenotypes()) {
        final List<Allele> swappedAlleles = new ArrayList<>();
        for (final Allele allele : genotype.getAlleles()) {
            if (allele.isNoCall()) {
                swappedAlleles.add(allele);
            } else {
                swappedAlleles.add(alleleMap.get(allele));
            }
        }
        // Flip AD
        final GenotypeBuilder builder = new GenotypeBuilder(genotype).alleles(swappedAlleles);
        if (genotype.hasAD() && genotype.getAD().length == 2) {
            final int[] ad = ArrayUtils.clone(genotype.getAD());
            ArrayUtils.reverse(ad);
            builder.AD(ad);
        } else {
            builder.noAD();
        }

        //Flip PL
        if (genotype.hasPL() && genotype.getPL().length == 3) {
            final int[] pl = ArrayUtils.clone(genotype.getPL());
            ArrayUtils.reverse(pl);
            builder.PL(pl);
        } else {
            builder.noPL();
        }
        swappedGenotypes.add(builder.make());
    }
    swappedBuilder.genotypes(swappedGenotypes);

    for (final String key : vc.getAttributes().keySet()) {
        if (annotationsToDrop.contains(key)) {
            swappedBuilder.rmAttribute(key);
        } else if (annotationsToReverse.contains(key) && !vc.getAttributeAsString(key, "").equals(VCFConstants.MISSING_VALUE_v4)) {
            final double attributeToReverse = vc.getAttributeAsDouble(key, -1);

            if (attributeToReverse < 0 || attributeToReverse > 1) {
                log.warn("Trying to reverse attribute " + key +
                        " but found value that isn't between 0 and 1: (" + attributeToReverse + ") in variant " + vc + ". Results might be wrong.");
            }
            swappedBuilder.attribute(key, 1 - attributeToReverse);
        }
    }

    return swappedBuilder.make();
}
 
Example 16
Source File: TestUtilsCpxVariantInference.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
private static PreprocessedAndAnalysisReadyContigWithExpectedResults buildForContig_2548_1(final Set<String> canonicalChromosomes,
                                                                                           final SAMSequenceDictionary refSeqDict) {
    final AssemblyContigWithFineTunedAlignments
            analysisReadyContig = makeContigAnalysisReadyForCpxBranch("asm002548:tig00001\t16\tchr2\t16224630\t60\t513M634S\t*\t0\t0\tTCAGCACAATGTCTGCTATCCAGTCACTGCTCTGTATTAGCCTCGCCTTAAATTGCTCGAATAGTAAGCAAGGGTTTATAAAGCATCTGTGTGCTAATCCCATCCTAAAGTGACAGGGATCAATTTAACCTCCTCATTTAAAAGATGAGGAGACAGGCTCAAAGGGCAAAGCCGACTCACCCAGGCTCACACAACCAGAGTGGCAGAGCCTCAGGGCTTGCCCTGCACAGCGTAGCTTCTGTTCTGAGAGGAATGCACCTGGGTTTTGTGTCTTTGGTGCTGGGGTGGTGAATGGGAGGCTCTGACCGATGGCAGAGTTCCCAGGGGATCCTTCAGGCAGCCGGGGGGCCTTTCTTATGGCAGGGCTCTGAAGCAAGTCCTGGCTGACCTTTTCTTGAAGCTGAGCTCTTCTGCCAAGACCCTTGGCCCTGCCCTTGGCCCCTGTAGAAATGCACACCTGTAAGCCTGTAATACCTTCTCCAGATCACAGCAACTAGGGAACTGCTGGAAGCCACGGAGATCAGCAGGGACCGCACCATCCCCAGAAGTGTGGATCCACCCGAGGGTCACGTGGATCCTCAAGGAGTGGAGAGTAGACCCTGAGCCACGTGGGCTTCTAGCAGTTCCCTGGGCCTGCTGGCACCAAGATGGGCTCCCTGTCACAGGCCTGACGGGACCTCTGTGAGGGTCCCAGGAGATGAAGCACATGAAGAGGGTTGGTGCACCAAGCAGGCTCACCAAATATTCACTCCCTGACCCCTTTCCCCACGCAAGCTGTGGTGGGGAAGGAGTGTTTCATGGTGGGCAGCCCAGGTGCCAGTCCCCCCCTTCCCCAGTGATGGGGTACCATTACCCCCACGACAGCCCAGCTTGGTGCCAGCAGGCCCAGGGAACTGCTGGAAGCCCACGTGGCTCAGGTCTACTCCTCACTCCTTGAGGATCCACGTGGCCCTTGGGTGGATCCACACTTCTGGGGATGGTGCGGTCCCAGCTGATCTCCCTGGGGGTCCAGTTAGCCTCATCCTCCTTCACCCAGGGTGGCTGCATGTCAGCACCTGCTCTACCTTCTACCTTAATGGCGTCCCCTAGGCTATCTGTCAGGGGAGTCTATGCAAAGGGGCCCCTTTGAACTTGCCTGTGCCACTCC\t*\tSA:Z:chr2,16225125,-,887H260M,60,0,260;chr2,16226497,-,657H170M1D53M267H,60,14,141;chr2,16225125,+,518H29M1I84M515H,60,7,66;\tMD:Z:513\tRG:Z:GATKSVContigAlignments\tNM:i:0\tAS:i:513\tXS:i:0",
            canonicalChromosomes, refSeqDict);

    final CpxVariantInducingAssemblyContig.BasicInfo manuallyCalculatedBasicInfo =
            new CpxVariantInducingAssemblyContig.BasicInfo("chr2", false,
                    new SimpleInterval("chr2", 16224630, 16224630), new SimpleInterval("chr2", 16225384, 16225384));

    //////////
    final List<CpxVariantInducingAssemblyContig.Jump> manuallyCalculatedJumps =
            Arrays.asList(
                    new CpxVariantInducingAssemblyContig.Jump(new SimpleInterval("chr2", 16225125, 16225125), new SimpleInterval("chr2", 16226720, 16226720), StrandSwitch.NO_SWITCH, 7),
                    new CpxVariantInducingAssemblyContig.Jump(new SimpleInterval("chr2", 16226497, 16226497), new SimpleInterval("chr2", 16225125, 16225125), StrandSwitch.REVERSE_TO_FORWARD, 28),
                    new CpxVariantInducingAssemblyContig.Jump(new SimpleInterval("chr2", 16225237, 16225237), new SimpleInterval("chr2", 16225142, 16225142), StrandSwitch.FORWARD_TO_REVERSE, 2)
            );
    final List<SimpleInterval> manuallyCalculatedEventPrimaryChromosomeSegmentingLocations =
            Arrays.asList(
                    new SimpleInterval("chr2", 16225125, 16225125),
                    new SimpleInterval("chr2", 16225142, 16225142),
                    new SimpleInterval("chr2", 16225237, 16225237)
            );
    final CpxVariantInducingAssemblyContig manuallyCalculatedCpxVariantInducingAssemblyContig =
            new CpxVariantInducingAssemblyContig(analysisReadyContig,
                    manuallyCalculatedBasicInfo,
                    manuallyCalculatedJumps,
                    manuallyCalculatedEventPrimaryChromosomeSegmentingLocations);

    //////////
    final SimpleInterval manuallyCalculatedAffectedRefRegion =
            new SimpleInterval("chr2", 16225125, 16225237);
    final List<SimpleInterval> manuallyCalculatedSegments =
            Arrays.asList(
                    new SimpleInterval("chr2", 16225125, 16225142),
                    new SimpleInterval("chr2", 16225142, 16225237));
    final List<String> manuallyCalculatedAltArrangementDescription =
            Arrays.asList("1", "UINS-2", "-2", "-1", "UINS-28", "chr2:16226497-16226720", "UINS-7", "1", "2");
    final byte[] manuallyCalculatedAltSeq = Arrays.copyOfRange(analysisReadyContig.getContigSequence(), 147, 652);
    SequenceUtil.reverseComplement(manuallyCalculatedAltSeq);
    final CpxVariantCanonicalRepresentation manuallyCalculatedCpxVariantCanonicalRepresentation =
            new CpxVariantCanonicalRepresentation(
                    manuallyCalculatedAffectedRefRegion,
                    manuallyCalculatedSegments,
                    manuallyCalculatedAltArrangementDescription,
                    manuallyCalculatedAltSeq);

    //////////
    final byte[] dummyRefSequence = "T".getBytes();
    final VariantContextBuilder variantContextBuilder = new VariantContextBuilder()
            .chr("chr2").start(16225125).stop(16225237)
            .alleles(Arrays.asList(Allele.create(dummyRefSequence, true), Allele.create(SimpleSVType.createBracketedSymbAlleleString(CPX_SV_SYB_ALT_ALLELE_STR), false)))
            .id("CPX_chr2:16225125-16225237")
            .attribute(VCFConstants.END_KEY, 16225237)
            .attribute(SVTYPE, GATKSVVCFConstants.CPX_SV_SYB_ALT_ALLELE_STR)
            .attribute(SVLEN, 392)
            .attribute(SEQ_ALT_HAPLOTYPE, new String(manuallyCalculatedAltSeq));
    variantContextBuilder.attribute(CPX_EVENT_ALT_ARRANGEMENTS,
            String.join(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR, manuallyCalculatedAltArrangementDescription));
    variantContextBuilder.attribute(CPX_SV_REF_SEGMENTS,
            String.join(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR, manuallyCalculatedSegments.stream().map(SimpleInterval::toString).collect(Collectors.toList())));
    final VariantContext manuallyCalculatedVariantContext = variantContextBuilder.make();

    //////////
    final Set<SimpleInterval> manuallyCalculatedTwoBaseBoundaries = new HashSet<>();
    manuallyCalculatedTwoBaseBoundaries.add(new SimpleInterval("chr2", 16225125, 16225126));
    manuallyCalculatedTwoBaseBoundaries.add(new SimpleInterval("chr2", 16225383, 16225384));
    manuallyCalculatedTwoBaseBoundaries.add(new SimpleInterval("chr2", 16225236, 16225237));
    manuallyCalculatedTwoBaseBoundaries.add(new SimpleInterval("chr2", 16224630, 16224631));
    manuallyCalculatedTwoBaseBoundaries.add(new SimpleInterval("chr2", 16225141, 16225142));

    //////////
    return new PreprocessedAndAnalysisReadyContigWithExpectedResults(
            manuallyCalculatedCpxVariantInducingAssemblyContig,
            manuallyCalculatedCpxVariantCanonicalRepresentation,
            manuallyCalculatedVariantContext,
            dummyRefSequence,
            manuallyCalculatedTwoBaseBoundaries);
}
 
Example 17
Source File: GtcToVcf.java    From picard with MIT License 4 votes vote down vote up
private VariantContext makeVariantContext(Build37ExtendedIlluminaManifestRecord record, final InfiniumGTCRecord gtcRecord,
                                          final InfiniumEGTFile egtFile, final ProgressLogger progressLogger) {
    // If the record is not flagged as errant in the manifest we include it in the VCF
    Allele A = record.getAlleleA();
    Allele B = record.getAlleleB();
    Allele ref = record.getRefAllele();

    final String chr = record.getB37Chr();
    final Integer position = record.getB37Pos();
    final Integer endPosition = position + ref.length() - 1;
    Integer egtIndex = egtFile.rsNameToIndex.get(record.getName());
    if (egtIndex == null) {
        throw new PicardException("Found no record in cluster file for manifest entry '" + record.getName() + "'");
    }

    progressLogger.record(chr, position);

    // Create list of unique alleles
    final List<Allele> assayAlleles = new ArrayList<>();
    assayAlleles.add(ref);

    if (A.equals(B)) {
        throw new PicardException("Found same allele (" + A.getDisplayString() + ") for A and B ");
    }

    if (!ref.equals(A, true)) {
        assayAlleles.add(A);
    }

    if (!ref.equals(B, true)) {
        assayAlleles.add(B);
    }

    final String sampleName = FilenameUtils.removeExtension(INPUT.getName());
    final Genotype genotype = getGenotype(sampleName, gtcRecord, record, A, B);

    final VariantContextBuilder builder = new VariantContextBuilder();

    builder.source(record.getName());
    builder.chr(chr);
    builder.start(position);
    builder.stop(endPosition);
    builder.alleles(assayAlleles);
    builder.log10PError(VariantContext.NO_LOG10_PERROR);
    builder.id(record.getName());
    builder.genotypes(genotype);

    VariantContextUtils.calculateChromosomeCounts(builder, false);

    //custom info fields
    builder.attribute(InfiniumVcfFields.ALLELE_A, record.getAlleleA());
    builder.attribute(InfiniumVcfFields.ALLELE_B, record.getAlleleB());
    builder.attribute(InfiniumVcfFields.ILLUMINA_STRAND, record.getIlmnStrand());
    builder.attribute(InfiniumVcfFields.PROBE_A, record.getAlleleAProbeSeq());
    builder.attribute(InfiniumVcfFields.PROBE_B, record.getAlleleBProbeSeq());
    builder.attribute(InfiniumVcfFields.BEADSET_ID, record.getBeadSetId());
    builder.attribute(InfiniumVcfFields.ILLUMINA_CHR, record.getChr());
    builder.attribute(InfiniumVcfFields.ILLUMINA_POS, record.getPosition());
    builder.attribute(InfiniumVcfFields.ILLUMINA_BUILD, record.getGenomeBuild());
    builder.attribute(InfiniumVcfFields.SOURCE, record.getSource().replace(' ', '_'));
    builder.attribute(InfiniumVcfFields.GC_SCORE, formatFloatForVcf(egtFile.totalScore[egtIndex]));

    for (InfiniumVcfFields.GENOTYPE_VALUES gtValue : InfiniumVcfFields.GENOTYPE_VALUES.values()) {
        final int ordinalValue = gtValue.ordinal();
        builder.attribute(InfiniumVcfFields.N[ordinalValue], egtFile.n[egtIndex][ordinalValue]);
        builder.attribute(InfiniumVcfFields.DEV_R[ordinalValue], formatFloatForVcf(egtFile.devR[egtIndex][ordinalValue]));
        builder.attribute(InfiniumVcfFields.MEAN_R[ordinalValue], formatFloatForVcf(egtFile.meanR[egtIndex][ordinalValue]));
        builder.attribute(InfiniumVcfFields.DEV_THETA[ordinalValue], formatFloatForVcf(egtFile.devTheta[egtIndex][ordinalValue]));
        builder.attribute(InfiniumVcfFields.MEAN_THETA[ordinalValue], formatFloatForVcf(egtFile.meanTheta[egtIndex][ordinalValue]));

        final EuclideanValues genotypeEuclideanValues = polarToEuclidean(egtFile.meanR[egtIndex][ordinalValue], egtFile.devR[egtIndex][ordinalValue],
                egtFile.meanTheta[egtIndex][ordinalValue], egtFile.devTheta[egtIndex][ordinalValue]);
        builder.attribute(InfiniumVcfFields.DEV_X[ordinalValue], formatFloatForVcf(genotypeEuclideanValues.devX));
        builder.attribute(InfiniumVcfFields.MEAN_X[ordinalValue], formatFloatForVcf(genotypeEuclideanValues.meanX));
        builder.attribute(InfiniumVcfFields.DEV_Y[ordinalValue], formatFloatForVcf(genotypeEuclideanValues.devY));
        builder.attribute(InfiniumVcfFields.MEAN_Y[ordinalValue], formatFloatForVcf(genotypeEuclideanValues.meanY));
    }


    final String rsid = record.getRsId();
    if (StringUtils.isNotEmpty(rsid)) {
        builder.attribute(InfiniumVcfFields.RS_ID, rsid);
    }
    if (egtFile.totalScore[egtIndex] == 0.0) {
        builder.filter(InfiniumVcfFields.ZEROED_OUT_ASSAY);
        if (genotype.isCalled()) {
            if (DO_NOT_ALLOW_CALLS_ON_ZEROED_OUT_ASSAYS) {
                throw new PicardException("Found a call (genotype: " + genotype + ") on a zeroed out Assay!!");
            } else {
                log.warn("Found a call (genotype: " + genotype + ") on a zeroed out Assay. " +
                        "This could occur if you called genotypes on a different cluster file than used here.");
            }
        }
    }
    if (record.isDupe()) {
        builder.filter(InfiniumVcfFields.DUPE);
    }
    return builder.make();
}
 
Example 18
Source File: FilterAlignmentArtifacts.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
@Override
public void apply(List<VariantContext> variantContexts, ReferenceContext referenceContext, final List<ReadsContext> readsContexts) {


    // for now we do one variant at a time but eventually we will want to combine all reads supporting all variants
    // into a single graph.  This is non-trivial because there may be more than one phasing between variants.
    for (final VariantContext vc : variantContexts) {
        final AssemblyRegion assemblyRegion = makeAssemblyRegionFromVariantReads(readsContexts, vc);


        // TODO: give this tool M2 Assembler args to allow override default M2ArgumentCollection?
        final AssemblyResultSet assemblyResult = AssemblyBasedCallerUtils.assembleReads(assemblyRegion, Collections.emptyList(), MTAC, bamHeader, samplesList, logger, referenceReader, assemblyEngine, ALIGNER, false);
        final AssemblyRegion regionForGenotyping = assemblyResult.getRegionForGenotyping();

        final Map<String,List<GATKRead>> reads = AssemblyBasedCallerUtils.splitReadsBySample(samplesList, bamHeader, regionForGenotyping.getReads());

        final AlleleLikelihoods<GATKRead, Haplotype> readLikelihoods = likelihoodCalculationEngine.computeReadLikelihoods(assemblyResult,samplesList,reads);
        readLikelihoods.switchToNaturalLog();
        final Map<GATKRead,GATKRead> readRealignments = AssemblyBasedCallerUtils.realignReadsToTheirBestHaplotype(readLikelihoods, assemblyResult.getReferenceHaplotype(), assemblyResult.getPaddedReferenceLoc(), ALIGNER);
        readLikelihoods.changeEvidence(readRealignments);
        writeBamOutput(assemblyResult, readLikelihoods, new HashSet<>(readLikelihoods.alleles()), regionForGenotyping.getSpan());

        final LocusIteratorByState libs = new LocusIteratorByState(regionForGenotyping.getReads().iterator(), DownsamplingMethod.NONE, false, samplesList.asListOfSamples(), bamHeader, true);

        final List<byte[]> unitigs = getUnitigs(libs);

        final VariantContextBuilder vcb = new VariantContextBuilder(vc)
                .attribute(GATKVCFConstants.UNITIG_SIZES_KEY, unitigs.stream().mapToInt(u -> u.length).toArray());

        final List<List<BwaMemAlignment>> unitigAlignments = unitigs.stream()
                .map(realignmentEngine::realign).collect(Collectors.toList());

        final List<List<BwaMemAlignment>> jointAlignments = RealignmentEngine.findJointAlignments(unitigAlignments, realignmentArgumentCollection.maxReasonableFragmentLength);
        vcb.attribute(GATKVCFConstants.JOINT_ALIGNMENT_COUNT_KEY, jointAlignments.size());
        jointAlignments.sort(Comparator.comparingInt(FilterAlignmentArtifacts::jointAlignmentScore).reversed());

        // best mapping to another contig
        if (!jointAlignments.isEmpty() && jointAlignments.get(0).get(0).getRefId() != getReferenceDictionary().getSequenceIndex(vc.getContig())) {
            vcb.filter(GATKVCFConstants.ALIGNMENT_ARTIFACT_FILTER_NAME);
        } else if (jointAlignments.size() > 1) {

            final int totalBases = unitigs.stream().mapToInt(unitig -> unitig.length).sum();
            final int scoreDiff = jointAlignmentScore(jointAlignments.get(0)) - jointAlignmentScore(jointAlignments.get(1));
            final int mismatchDiff = totalMismatches(jointAlignments.get(1)) - totalMismatches(jointAlignments.get(0));

            vcb.attribute(GATKVCFConstants.ALIGNMENT_SCORE_DIFFERENCE_KEY, scoreDiff);

            final boolean multimapping = (double) scoreDiff / totalBases < realignmentArgumentCollection.minAlignerScoreDifferencePerBase
                    && (double) mismatchDiff / totalBases < realignmentArgumentCollection.minMismatchDifferencePerBase;

            if (multimapping) {
                vcb.filter(GATKVCFConstants.ALIGNMENT_ARTIFACT_FILTER_NAME);
            }
        }

        vcfWriter.add(vcb.make());
    }
}
 
Example 19
Source File: MethylationTypeCaller.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
@Override
public void apply(AlignmentContext alignmentContext, ReferenceContext referenceContext, FeatureContext featureContext) {
    final byte referenceBase = referenceContext.getBases()[0];
    final int unconvertedBases;
    final int convertedBases;
    final byte alt;
    byte[] context = null;

    // check the forward strand for methylated coverage
    if (referenceBase == (byte)'C') {
        alt = (byte)'T';
        final ReadPileup forwardBasePileup = alignmentContext.stratify(AlignmentContext.ReadOrientation.FORWARD).getBasePileup();
        // unconverted: C, index=1; converted: T, index=3
        final int[] forwardBaseCounts = forwardBasePileup.getBaseCounts();
        unconvertedBases = forwardBaseCounts[BaseUtils.simpleBaseToBaseIndex((byte)'C')];
        convertedBases = forwardBaseCounts[BaseUtils.simpleBaseToBaseIndex((byte)'T')];

        // if there is methylated coverage
        if (unconvertedBases + convertedBases > 0) {
            context = referenceContext.getBases(0, REFERENCE_CONTEXT_LENGTH);
        }
    }
    // check the reverse strand for methylated coverage
    else if (referenceBase == (byte)'G') {
        alt = (byte)'A';
        final ReadPileup reverseBasePileup = alignmentContext.stratify(AlignmentContext.ReadOrientation.REVERSE).getBasePileup();
        // unconverted: G, index=2; converted: A, index=0
        final int[] reverseBaseCounts = reverseBasePileup.getBaseCounts();
        unconvertedBases = reverseBaseCounts[BaseUtils.simpleBaseToBaseIndex((byte)'G')];
        convertedBases = reverseBaseCounts[BaseUtils.simpleBaseToBaseIndex((byte)'A')];

        // if there is methylated coverage
        if (unconvertedBases + convertedBases > 0) {
            // get the reverse complement for context b/c we are on the reverse strand
            context = BaseUtils.simpleReverseComplement(referenceContext.getBases(REFERENCE_CONTEXT_LENGTH,0));
        }
    }
    // if reference strand does not support methylation
    else {
        return;
    }

    // if there are reads that have methylated coverage
    if (context != null) {
        final LinkedHashSet<Allele> alleles = new LinkedHashSet<>();
        alleles.add(Allele.create(referenceBase, true));
        alleles.add(Allele.create(alt, false));

        final VariantContextBuilder vcb = new VariantContextBuilder();
        vcb.chr(alignmentContext.getContig());
        vcb.start(alignmentContext.getPosition());
        vcb.stop(alignmentContext.getPosition());
        vcb.noID();
        vcb.alleles(alleles);
        vcb.noGenotypes();
        vcb.unfiltered();
        vcb.attribute(GATKVCFConstants.UNCONVERTED_BASE_COVERAGE_KEY, unconvertedBases);
        vcb.attribute(GATKVCFConstants.CONVERTED_BASE_COVERAGE_KEY, convertedBases);
        vcb.attribute(GATKVCFConstants.METHYLATION_REFERENCE_CONTEXT_KEY, new String(context));
        vcb.attribute(VCFConstants.DEPTH_KEY, alignmentContext.size());

        // write to VCF
        final VariantContext vc = vcb.make();
        vcfWriter.add(vc);
    }
}
 
Example 20
Source File: TestUtilsCpxVariantInference.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
private static PreprocessedAndAnalysisReadyContigWithExpectedResults buildForContig_2428_0(final Set<String> canonicalChromosomes,
                                                                                           final SAMSequenceDictionary refSeqDict) {
    final AssemblyContigWithFineTunedAlignments analysisReadyContig =
            makeContigAnalysisReadyForCpxBranch("asm002428:tig00000\t16\tchr2\t4450935\t60\t1216M3I256M1030S\t*\t0\t0\tTTCCTCTGTGGAGGACACAGCATTCAAGGCGCCATCTTGGAATCAGAATCACTAACCACATCAAAGCCTAATACTGGACTTCCCAACCTCCTAAACTGTAAGCAAATAATTTTGTTCATAAGTTACCCAGCCTGTGCTATTCTTCTATACCAGCACATATGGACTAAGACACTACGTATTCTAATAAGTGTGTGAAAACCCAAGATAAAATAGGCGTTATTTCCCATTTTACAGATGAGAAAACTGAAGCTTGAGGAATTTGTGAATTTGTTTAATCCAAATCTGTTAGAAAATAGTAGACCTCACCTCTTCAATAGTTGAGCATGGATAATTATGATACTAATTTTAGACTCATATTTCTTCATTTGTACATGATTTAAATCTCAGAAAAAATTTAGAATTTCTCTTTACTGTCTGTTGGTATTATGAAATGTGATAGCAGTATGTCTAGGCACAGATTTTTTTTTTTTTTTTCCTTAGCAGGGGTAGTTATTTTTTTCTTTCTTAGCAAGGGTGGCTTTTTCAGTCTGAAAGCTGAGACATACACTTTTCTCTGAGAAGAAATATTTTCTATTATTTGAATGTTTCACCTCTCCTTTTTCCTATTCTCTCTTTCTGAGACATACATTGATTCTCCAAGTTATCATCTTTCTCTCTTATCTCTATGTTTGTCTTTGGTTCTCTTTTTTCTCTGTATTCTGGGAAATTTTCATAGCTTTATTCAAGTTCATTTTTCAAGCTCTAAGAACTCTCTTTTTGTCTGATTTTACCATTTTTATTATACTAGTTGTTACAGTTTTGATAGATAAACTATCTTCCAAAATTTCTAAACGTATGGGTTTGGGAAATTGCTTTTAAAATCCTTAGGGATATAGCCAAATTACAGCATGCCAAGAAATGACAGAAATGTATTTTTTAATTAAAAAAAGGAGTATTAGTAAAGCTTCAACTTTAGAACAGGGTCTATGTCAGAGAAACACCACAGAGCAATGAAAACTGTGTTCTCCAAATTATTTAGAACGTAGGAGGATTTCCAAAATATTTATTTATTTTGAAAGTAGAATGATTAACATCTGCTGTCTATAAGCAACACTTTTGAAAAAAGGAATTTACTCCCTCCAATTTGCTCCATAGCCTACAGTTTCTTCTTTTATCTACCTCTTCCTCCTCCTCCTCCACCACCTTCTTTTCTTGTTGTTGTTGTTGTTCTTCTTCTTCCTCCTCCTCCTCTTCCTCTTCTTCTTCTTCTCCTTCTTCTTCCCCTCCCTTCCTTCCTTCCTTCTTTCCTTCCTTCCTTCCTCTTGTTCTTCTTCTTCCTCTTATTCTCCTTCTCCTTCTTCTTCTTCTCCTGCTTCTTCCTCTTCTCCTTCTTCTTCTTCCTCCTCCTCCTTCTTCTCTTCTTCTGCTTTTCTTCTCTTCTTCTTCTCTTTTCCTTCCTTCCTTCTTTCCTTCCTCTTCTTTCTCTTCTTCTTCCTCTTATTCTCCTTCTTCCTCCTCTTCTCCTTCTTCCTTCTCCTCCTCTTCCTCCTCGTTGTTCTTGTCATTGTTGTTTTTGTTGTTCTTCCTCCTCTTCTCTCTCCTCCTTCTCCTCCTCCTCCTCCTTCTTCTTCTCCTTGTCCTCCTCCTCTGTCTCTGTCTTCTTCTTTTTCTTCTTGTTCCTCTTCTTCTTCCTCTTATTCTCCTTCTTCTTCCTCCCCTTCTCCCTCTTCCGTCTTCTCCTCCTCCTCCCTCTTCCTTCTTCTCCTCCTCCTCCTTCTTTTTCTTTCTTCTTCTTTCTTCTTGTTCTTGTTCTTCTTCTTCTTCCTCCTCCTCATCCTTCTTCTCTTCTTCTGCTTTTCTTCTGTTCTTCTTCTTCTTTCCCTCCCTCCCTCCCTCCCTCCCTTCCTTCCTTCCTTCCTTCCTCTTCTTTCTCCTCTTCTTCTTCTTGTTCTTGTTCTTCTTCTTCCTCTTATTCTCCTTCTTCCTCCTCTTCTCCTTCTTCTTCCTTTTGCTCCTCTTCCTCCTTCTTGTTGTTGTTGTTCTTCTTCTTCTTCCTCTCATTCTTCTTCTTCCTCCTCTTCTCCCTCCTCCTTCTCCTCCTCCTCCTTCTTCTTCTCCTTCTCCTCCTCCTTTTTCTTCTCCTTCTCCTCCTCTTCCATCTCTGTCTTTGCCTTCTTCTTCTTGTTCCTCATCTTCTTCTTCCTCTTATTCTCCTTCTTCTTCCTTCCCTTCTCCCTCTTCCTTTTTCTCCTCCTCCTCCTTCTTTCTTCTTTCTTCCTTCTTCTTCTTTCTCTTCTTCTTCTTCTCCTTCTTCTTCTTCCTCCTCCTCCTCCCCTTCCCCCTTCCCCTCCTCCTCCTCCTTCTCCTCCTTCTCCTCCTCCTTCTTCTTCTTCCTCTTCTTCTTCTTCTTCTCCTTCTTCTTCTTCTCCTTCTTCTTCTTCTTCCTCCTCCTCCTCCCCTTCTCCCTTCCCCTCCTCCTCCTCCTCCTTCTCCTCCTCCTTCTTCTTCTTCTTCTTCTTCTTCTTC\t*\tSA:Z:chr2,4452399,-,2250H75M3D52M15I113M,60,25,157;chr9,34840411,-,2065H67M373H,33,2,57;chr10,75714830,-,1851H51M603H,0,0,51;chr2,4452298,-,1793H60M652H,60,3,45;chr6,15853372,-,1907H43M555H,60,0,43;\tMD:Z:259C382G365G125G337\tRG:Z:GATKSVContigAlignments\tNM:i:7\tAS:i:1433\tXS:i:0", canonicalChromosomes, refSeqDict);

    final CpxVariantInducingAssemblyContig.BasicInfo manuallyCalculatedBasicInfo =
            new CpxVariantInducingAssemblyContig.BasicInfo("chr2", false,
                    new SimpleInterval("chr2", 4450935, 4450935), new SimpleInterval("chr2", 4452641, 4452641));

    //////////
    final List<CpxVariantInducingAssemblyContig.Jump> manuallyCalculatedJumps =
            Arrays.asList(
                    new CpxVariantInducingAssemblyContig.Jump(new SimpleInterval("chr2", 4452399, 4452399), new SimpleInterval("chr9", 34840477, 34840477), StrandSwitch.NO_SWITCH, 118),
                    new CpxVariantInducingAssemblyContig.Jump(new SimpleInterval("chr9", 34840411, 34840411), new SimpleInterval("chr6", 15853414, 15853414), StrandSwitch.NO_SWITCH, 115),
                    new CpxVariantInducingAssemblyContig.Jump(new SimpleInterval("chr6", 15853372, 15853372), new SimpleInterval("chr2", 4452357, 4452357), StrandSwitch.NO_SWITCH, 54),
                    new CpxVariantInducingAssemblyContig.Jump(new SimpleInterval("chr2", 4452298, 4452298), new SimpleInterval("chr2", 4452406, 4452406), StrandSwitch.NO_SWITCH, 318)
            );
    final List<SimpleInterval> manuallyCalculatedEventPrimaryChromosomeSegmentingLocations =
            Arrays.asList(
                    new SimpleInterval("chr2", 4452298, 4452298),
                    new SimpleInterval("chr2", 4452357, 4452357),
                    new SimpleInterval("chr2", 4452399, 4452399),
                    new SimpleInterval("chr2", 4452406, 4452406)
            );
    final CpxVariantInducingAssemblyContig manuallyCalculatedCpxVariantInducingAssemblyContig =
            new CpxVariantInducingAssemblyContig(analysisReadyContig,
                    manuallyCalculatedBasicInfo,
                    manuallyCalculatedJumps,
                    manuallyCalculatedEventPrimaryChromosomeSegmentingLocations);

    //////////
    final SimpleInterval manuallyCalculatedAffectedRefRegion =
            new SimpleInterval("chr2", 4452298, 4452406);
    final List<SimpleInterval> manuallyCalculatedSegments =
            Arrays.asList(
                    new SimpleInterval("chr2", 4452298, 4452357),
                    new SimpleInterval("chr2", 4452357, 4452399),
                    new SimpleInterval("chr2", 4452399, 4452406));
    final List<String> manuallyCalculatedAltArrangementDescription =
            Arrays.asList("1", "2", "3", "UINS-318", "1", "UINS-54", "chr6:15853372-15853414", "UINS-115", "chr9:34840411-34840477", "UINS-118", "3");
    final byte[] manuallyCalculatedAltSeq = Arrays.copyOfRange(analysisReadyContig.getContigSequence(), 247, 1139);
    SequenceUtil.reverseComplement(manuallyCalculatedAltSeq);
    final CpxVariantCanonicalRepresentation manuallyCalculatedCpxVariantCanonicalRepresentation =
            new CpxVariantCanonicalRepresentation(
                    manuallyCalculatedAffectedRefRegion,
                    manuallyCalculatedSegments,
                    manuallyCalculatedAltArrangementDescription,
                    manuallyCalculatedAltSeq);

    //////////
    final byte[] dummyRefSequence = "T".getBytes();
    final VariantContextBuilder variantContextBuilder = new VariantContextBuilder()
            .chr("chr2").start(4452298).stop(4452406)
            .alleles(Arrays.asList(Allele.create(dummyRefSequence, true), Allele.create(SimpleSVType.createBracketedSymbAlleleString(CPX_SV_SYB_ALT_ALLELE_STR), false)))
            .id("CPX_chr2:4452298-4452406")
            .attribute(VCFConstants.END_KEY, 4452406)
            .attribute(SVTYPE, GATKSVVCFConstants.CPX_SV_SYB_ALT_ALLELE_STR)
            .attribute(SVLEN, 783)
            .attribute(SEQ_ALT_HAPLOTYPE, new String(manuallyCalculatedAltSeq));
    variantContextBuilder.attribute(CPX_EVENT_ALT_ARRANGEMENTS,
            String.join(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR, manuallyCalculatedAltArrangementDescription));
    variantContextBuilder.attribute(CPX_SV_REF_SEGMENTS,
            String.join(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR, manuallyCalculatedSegments.stream().map(SimpleInterval::toString).collect(Collectors.toList())));
    final VariantContext manuallyCalculatedVariantContext = variantContextBuilder.make();

    //////////
    final Set<SimpleInterval> manuallyCalculatedTwoBaseBoundaries = new HashSet<>();
    manuallyCalculatedTwoBaseBoundaries.add(new SimpleInterval("chr2", 4452399, 4452400));
    manuallyCalculatedTwoBaseBoundaries.add(new SimpleInterval("chr2", 4452640, 4452641));
    manuallyCalculatedTwoBaseBoundaries.add(new SimpleInterval("chr2", 4452298, 4452299));
    manuallyCalculatedTwoBaseBoundaries.add(new SimpleInterval("chr2", 4452356, 4452357));
    manuallyCalculatedTwoBaseBoundaries.add(new SimpleInterval("chr2", 4450935, 4450936));
    manuallyCalculatedTwoBaseBoundaries.add(new SimpleInterval("chr2", 4452405, 4452406));

    //////////
    return new PreprocessedAndAnalysisReadyContigWithExpectedResults(
            manuallyCalculatedCpxVariantInducingAssemblyContig,
            manuallyCalculatedCpxVariantCanonicalRepresentation,
            manuallyCalculatedVariantContext,
            dummyRefSequence,
            manuallyCalculatedTwoBaseBoundaries);
}