Java Code Examples for htsjdk.variant.variantcontext.VariantContextUtils#calculateChromosomeCounts()

The following examples show how to use htsjdk.variant.variantcontext.VariantContextUtils#calculateChromosomeCounts() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: VariantEvalUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
public VariantContext ensureAnnotations(final VariantContext vc, final VariantContext vcsub) {
    final int originalAlleleCount = vc.getHetCount() + 2 * vc.getHomVarCount();
    final int newAlleleCount = vcsub.getHetCount() + 2 * vcsub.getHomVarCount();
    final boolean isSingleton = originalAlleleCount == newAlleleCount && newAlleleCount == 1;
    final boolean hasChrCountAnnotations = vcsub.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) &&
            vcsub.hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY) &&
            vcsub.hasAttribute(VCFConstants.ALLELE_NUMBER_KEY);

    if ( ! isSingleton && hasChrCountAnnotations ) {
        // nothing to update
        return vcsub;
    } else {
        // have to do the work
        VariantContextBuilder builder = new VariantContextBuilder(vcsub);

        if ( isSingleton )
            builder.attribute(VariantEval.IS_SINGLETON_KEY, true);

        if ( ! hasChrCountAnnotations )
            VariantContextUtils.calculateChromosomeCounts(builder, true);

        return builder.make();
    }
}
 
Example 2
Source File: CalculateGenotypePosteriors.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Override
public void apply(final VariantContext variant,
                  final ReadsContext readsContext,
                  final ReferenceContext referenceContext,
                  final FeatureContext featureContext) {
    final Collection<VariantContext> vcs = featureContext.getValues(getDrivingVariantsFeatureInput());

    final Collection<VariantContext> otherVCs = featureContext.getValues(supportVariants);

    final int missing = supportVariants.size() - otherVCs.size();

    for ( final VariantContext vc : vcs ) {
        final VariantContext vc_familyPriors;
        final VariantContext vc_bothPriors;

        //do family priors first (if applicable)
        final VariantContextBuilder builder = new VariantContextBuilder(vc);
        //only compute family priors for biallelelic sites
        if (!skipFamilyPriors && vc.isBiallelic()){
            final GenotypesContext gc = famUtils.calculatePosteriorGLs(vc);
            builder.genotypes(gc);
        }
        VariantContextUtils.calculateChromosomeCounts(builder, false);
        vc_familyPriors = builder.make();

        if (!skipPopulationPriors) {
            vc_bothPriors = PosteriorProbabilitiesUtils.calculatePosteriorProbs(vc_familyPriors, otherVCs, missing * numRefIfMissing, globalPrior, !ignoreInputSamples, defaultToAC, useACoff);
        } else {
            final VariantContextBuilder builder2 = new VariantContextBuilder(vc_familyPriors);
            VariantContextUtils.calculateChromosomeCounts(builder, false);
            vc_bothPriors = builder2.make();
        }
        vcfWriter.add(vc_bothPriors);
    }
}
 
Example 3
Source File: CalculateGenotypePosteriors.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Override
public void apply(final VariantContext variant,
                  final ReadsContext readsContext,
                  final ReferenceContext referenceContext,
                  final FeatureContext featureContext) {

    final Collection<VariantContext> otherVCs = featureContext.getValues(supportVariants);

    //If no resource contains a matching variant, then add numRefIfMissing as a pseudocount to the priors
    List<VariantContext> resourcesWithMatchingStarts = otherVCs.stream()
            .filter(vc -> variant.getStart() == vc.getStart()).collect(Collectors.toList());

    //do family priors first (if applicable)
    final VariantContextBuilder builder = new VariantContextBuilder(variant);
    //only compute family priors for biallelelic sites
    if (!skipFamilyPriors && variant.isBiallelic()){
        final GenotypesContext gc = famUtils.calculatePosteriorGLs(variant);
        builder.genotypes(gc);
    }
    VariantContextUtils.calculateChromosomeCounts(builder, false);
    final VariantContext vc_familyPriors = builder.make();

    final VariantContext vc_bothPriors;
    if (!skipPopulationPriors) {
        vc_bothPriors = PosteriorProbabilitiesUtils.calculatePosteriorProbs(vc_familyPriors, resourcesWithMatchingStarts,
                resourcesWithMatchingStarts.isEmpty() ? numRefIfMissing : 0, options);
    } else {
        vc_bothPriors = vc_familyPriors;
    }
    vcfWriter.add(vc_bothPriors);
}
 
Example 4
Source File: ChromosomeCounts.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Override
public Map<String, Object> annotate(final ReferenceContext ref,
                                    final VariantContext vc,
                                    AlleleLikelihoods<GATKRead, Allele> likelihoods) {
    Utils.nonNull(vc);
    if ( ! vc.hasGenotypes() ) {
        return Collections.emptyMap();
    }

    return VariantContextUtils.calculateChromosomeCounts(vc, new LinkedHashMap<>(), true, Collections.emptySet());
}
 
Example 5
Source File: GtcToVcf.java    From picard with MIT License 4 votes vote down vote up
private VariantContext makeVariantContext(Build37ExtendedIlluminaManifestRecord record, final InfiniumGTCRecord gtcRecord,
                                          final InfiniumEGTFile egtFile, final ProgressLogger progressLogger) {
    // If the record is not flagged as errant in the manifest we include it in the VCF
    Allele A = record.getAlleleA();
    Allele B = record.getAlleleB();
    Allele ref = record.getRefAllele();

    final String chr = record.getB37Chr();
    final Integer position = record.getB37Pos();
    final Integer endPosition = position + ref.length() - 1;
    Integer egtIndex = egtFile.rsNameToIndex.get(record.getName());
    if (egtIndex == null) {
        throw new PicardException("Found no record in cluster file for manifest entry '" + record.getName() + "'");
    }

    progressLogger.record(chr, position);

    // Create list of unique alleles
    final List<Allele> assayAlleles = new ArrayList<>();
    assayAlleles.add(ref);

    if (A.equals(B)) {
        throw new PicardException("Found same allele (" + A.getDisplayString() + ") for A and B ");
    }

    if (!ref.equals(A, true)) {
        assayAlleles.add(A);
    }

    if (!ref.equals(B, true)) {
        assayAlleles.add(B);
    }

    final String sampleName = FilenameUtils.removeExtension(INPUT.getName());
    final Genotype genotype = getGenotype(sampleName, gtcRecord, record, A, B);

    final VariantContextBuilder builder = new VariantContextBuilder();

    builder.source(record.getName());
    builder.chr(chr);
    builder.start(position);
    builder.stop(endPosition);
    builder.alleles(assayAlleles);
    builder.log10PError(VariantContext.NO_LOG10_PERROR);
    builder.id(record.getName());
    builder.genotypes(genotype);

    VariantContextUtils.calculateChromosomeCounts(builder, false);

    //custom info fields
    builder.attribute(InfiniumVcfFields.ALLELE_A, record.getAlleleA());
    builder.attribute(InfiniumVcfFields.ALLELE_B, record.getAlleleB());
    builder.attribute(InfiniumVcfFields.ILLUMINA_STRAND, record.getIlmnStrand());
    builder.attribute(InfiniumVcfFields.PROBE_A, record.getAlleleAProbeSeq());
    builder.attribute(InfiniumVcfFields.PROBE_B, record.getAlleleBProbeSeq());
    builder.attribute(InfiniumVcfFields.BEADSET_ID, record.getBeadSetId());
    builder.attribute(InfiniumVcfFields.ILLUMINA_CHR, record.getChr());
    builder.attribute(InfiniumVcfFields.ILLUMINA_POS, record.getPosition());
    builder.attribute(InfiniumVcfFields.ILLUMINA_BUILD, record.getGenomeBuild());
    builder.attribute(InfiniumVcfFields.SOURCE, record.getSource().replace(' ', '_'));
    builder.attribute(InfiniumVcfFields.GC_SCORE, formatFloatForVcf(egtFile.totalScore[egtIndex]));

    for (InfiniumVcfFields.GENOTYPE_VALUES gtValue : InfiniumVcfFields.GENOTYPE_VALUES.values()) {
        final int ordinalValue = gtValue.ordinal();
        builder.attribute(InfiniumVcfFields.N[ordinalValue], egtFile.n[egtIndex][ordinalValue]);
        builder.attribute(InfiniumVcfFields.DEV_R[ordinalValue], formatFloatForVcf(egtFile.devR[egtIndex][ordinalValue]));
        builder.attribute(InfiniumVcfFields.MEAN_R[ordinalValue], formatFloatForVcf(egtFile.meanR[egtIndex][ordinalValue]));
        builder.attribute(InfiniumVcfFields.DEV_THETA[ordinalValue], formatFloatForVcf(egtFile.devTheta[egtIndex][ordinalValue]));
        builder.attribute(InfiniumVcfFields.MEAN_THETA[ordinalValue], formatFloatForVcf(egtFile.meanTheta[egtIndex][ordinalValue]));

        final EuclideanValues genotypeEuclideanValues = polarToEuclidean(egtFile.meanR[egtIndex][ordinalValue], egtFile.devR[egtIndex][ordinalValue],
                egtFile.meanTheta[egtIndex][ordinalValue], egtFile.devTheta[egtIndex][ordinalValue]);
        builder.attribute(InfiniumVcfFields.DEV_X[ordinalValue], formatFloatForVcf(genotypeEuclideanValues.devX));
        builder.attribute(InfiniumVcfFields.MEAN_X[ordinalValue], formatFloatForVcf(genotypeEuclideanValues.meanX));
        builder.attribute(InfiniumVcfFields.DEV_Y[ordinalValue], formatFloatForVcf(genotypeEuclideanValues.devY));
        builder.attribute(InfiniumVcfFields.MEAN_Y[ordinalValue], formatFloatForVcf(genotypeEuclideanValues.meanY));
    }


    final String rsid = record.getRsId();
    if (StringUtils.isNotEmpty(rsid)) {
        builder.attribute(InfiniumVcfFields.RS_ID, rsid);
    }
    if (egtFile.totalScore[egtIndex] == 0.0) {
        builder.filter(InfiniumVcfFields.ZEROED_OUT_ASSAY);
        if (genotype.isCalled()) {
            if (DO_NOT_ALLOW_CALLS_ON_ZEROED_OUT_ASSAYS) {
                throw new PicardException("Found a call (genotype: " + genotype + ") on a zeroed out Assay!!");
            } else {
                log.warn("Found a call (genotype: " + genotype + ") on a zeroed out Assay. " +
                        "This could occur if you called genotypes on a different cluster file than used here.");
            }
        }
    }
    if (record.isDupe()) {
        builder.filter(InfiniumVcfFields.DUPE);
    }
    return builder.make();
}
 
Example 6
Source File: MergePedIntoVcf.java    From picard with MIT License 4 votes vote down vote up
/**
 * Writes out the VariantContext objects in the order presented to the supplied output file
 * in VCF format.
 */
private void writeVcf(final CloseableIterator<VariantContext> variants,
                      final File output,
                      final SAMSequenceDictionary dict,
                      final VCFHeader vcfHeader, ZCallPedFile zCallPedFile) {

    try(VariantContextWriter writer = new VariantContextWriterBuilder()
            .setOutputFile(output)
            .setReferenceDictionary(dict)
            .setOptions(VariantContextWriterBuilder.DEFAULT_OPTIONS)
            .build()) {
        writer.writeHeader(vcfHeader);
        while (variants.hasNext()) {
            final VariantContext context = variants.next();

            final VariantContextBuilder builder = new VariantContextBuilder(context);
            if (zCallThresholds.containsKey(context.getID())) {
                final String[] zThresh = zCallThresholds.get(context.getID());
                builder.attribute(InfiniumVcfFields.ZTHRESH_X, zThresh[0]);
                builder.attribute(InfiniumVcfFields.ZTHRESH_Y, zThresh[1]);
            }
            final Genotype originalGenotype = context.getGenotype(0);
            final Map<String, Object> newAttributes = originalGenotype.getExtendedAttributes();
            final VCFEncoder vcfEncoder = new VCFEncoder(vcfHeader, false, false);
            final Map<Allele, String> alleleMap = vcfEncoder.buildAlleleStrings(context);

            final String zCallAlleles = zCallPedFile.getAlleles(context.getID());
            if (zCallAlleles == null) {
                throw new PicardException("No zCall alleles found for snp " + context.getID());
            }
            final List<Allele> zCallPedFileAlleles = buildNewAllelesFromZCall(zCallAlleles, context.getAttributes());
            newAttributes.put(InfiniumVcfFields.GTA, alleleMap.get(originalGenotype.getAllele(0)) + UNPHASED + alleleMap.get(originalGenotype.getAllele(1)));
            newAttributes.put(InfiniumVcfFields.GTZ, alleleMap.get(zCallPedFileAlleles.get(0)) + UNPHASED + alleleMap.get(zCallPedFileAlleles.get(1)));

            final Genotype newGenotype = GenotypeBuilder.create(originalGenotype.getSampleName(), zCallPedFileAlleles,
                    newAttributes);
            builder.genotypes(newGenotype);
            logger.record("0", 0);
            // AC, AF, and AN are recalculated here
            VariantContextUtils.calculateChromosomeCounts(builder, false);
            final VariantContext newContext = builder.make();
            writer.add(newContext);
        }
    }
}
 
Example 7
Source File: CombineGenotypingArrayVcfs.java    From picard with MIT License 4 votes vote down vote up
/**
 * Merges multiple VariantContexts all for the same locus into a single hybrid.
 *
 * @param variantContexts           list of VCs
 * @return new VariantContext       representing the merge of variantContexts
 */
public static VariantContext merge(final List<VariantContext> variantContexts) {
    if ( variantContexts == null || variantContexts.isEmpty() )
        return null;

    // establish the baseline info from the first VC
    final VariantContext first = variantContexts.get(0);
    final String name = first.getSource();

    final Set<String> filters = new HashSet<>();

    int depth = 0;
    double log10PError = CommonInfo.NO_LOG10_PERROR;
    boolean anyVCHadFiltersApplied = false;
    GenotypesContext genotypes = GenotypesContext.create();

    // Go through all the VCs, verify that the loci and ID and other attributes agree.
    final Map<String, Object> firstAttributes = first.getAttributes();
    for (final VariantContext vc : variantContexts ) {
        if ((vc.getStart() != first.getStart()) || (!vc.getContig().equals(first.getContig()))) {
            throw new PicardException("Mismatch in loci among input VCFs");
        }
        if (!vc.getID().equals(first.getID())) {
            throw new PicardException("Mismatch in ID field among input VCFs");
        }
        if (!vc.getReference().equals(first.getReference())) {
            throw new PicardException("Mismatch in REF allele among input VCFs");
        }
        checkThatAllelesMatch(vc, first);

        genotypes.addAll(vc.getGenotypes());

        // We always take the QUAL of the first VC with a non-MISSING qual for the combined value
        if ( log10PError == CommonInfo.NO_LOG10_PERROR )
            log10PError =  vc.getLog10PError();

        filters.addAll(vc.getFilters());
        anyVCHadFiltersApplied |= vc.filtersWereApplied();

        // add attributes
        // special case DP (add it up)
        if (vc.hasAttribute(VCFConstants.DEPTH_KEY))
            depth += vc.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0);

        // Go through all attributes - if any differ (other than AC, AF, or AN) that's an error.  (recalc AC, AF, and AN later)
        for (final Map.Entry<String, Object> p : vc.getAttributes().entrySet()) {
            final String key = p.getKey();
            if ((!key.equals("AC")) && (!key.equals("AF")) && (!key.equals("AN")) && (!key.equals("devX_AB")) && (!key.equals("devY_AB"))) {
                final Object value = p.getValue();
                final Object extantValue = firstAttributes.get(key);
                if (extantValue == null) {
                    // attribute in one VCF but not another.  Die!
                    throw new PicardException("Attribute '" + key + "' not found in all VCFs");
                }
                else if (!extantValue.equals(value)) {
                    // Attribute disagrees in value between one VCF Die! (if not AC, AF, nor AN, skipped above)
                    throw new PicardException("Values for attribute '" + key + "' disagrees among input VCFs");
                }
            }
        }
    }

    if (depth > 0)
        firstAttributes.put(VCFConstants.DEPTH_KEY, String.valueOf(depth));

    final VariantContextBuilder builder = new VariantContextBuilder().source(name).id(first.getID());
    builder.loc(first.getContig(), first.getStart(), first.getEnd());
    builder.alleles(first.getAlleles());
    builder.genotypes(genotypes);

    builder.attributes(new TreeMap<>(firstAttributes));
    // AC, AF, and AN are recalculated here
    VariantContextUtils.calculateChromosomeCounts(builder, false);

    builder.log10PError(log10PError);
    if ( anyVCHadFiltersApplied ) {
        builder.filters(filters.isEmpty() ? filters : new TreeSet<>(filters));
    }

    return builder.make();
}
 
Example 8
Source File: SelectVariants.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
private void addAnnotations(final VariantContextBuilder builder, final VariantContext originalVC, final Set<String> selectedSampleNames) {
    if (fullyDecode) {
        return; // TODO -- annotations are broken with fully decoded data
    }

    if (keepOriginalChrCounts) {
        final int[] indexOfOriginalAlleleForNewAllele;
        final List<Allele> newAlleles = builder.getAlleles();
        final int numOriginalAlleles = originalVC.getNAlleles();

        // if the alleles already match up, we can just copy the previous list of counts
        if (numOriginalAlleles == newAlleles.size()) {
            indexOfOriginalAlleleForNewAllele = null;
        }
        // otherwise we need to parse them and select out the correct ones
        else {
            indexOfOriginalAlleleForNewAllele = new int[newAlleles.size() - 1];
            Arrays.fill(indexOfOriginalAlleleForNewAllele, -1);

            // note that we don't care about the reference allele at position 0
            for (int newI = 1; newI < newAlleles.size(); newI++) {
                final Allele newAlt = newAlleles.get(newI);
                for (int oldI = 0; oldI < numOriginalAlleles - 1; oldI++) {
                    if (newAlt.equals(originalVC.getAlternateAllele(oldI), false)) {
                        indexOfOriginalAlleleForNewAllele[newI - 1] = oldI;
                        break;
                    }
                }
            }
        }

        if (originalVC.hasAttribute(VCFConstants.ALLELE_COUNT_KEY)) {
            builder.attribute(GATKVCFConstants.ORIGINAL_AC_KEY,
                    getReorderedAttributes(originalVC.getAttribute(VCFConstants.ALLELE_COUNT_KEY), indexOfOriginalAlleleForNewAllele));
        }
        if (originalVC.hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY)) {
            builder.attribute(GATKVCFConstants.ORIGINAL_AF_KEY,
                    getReorderedAttributes(originalVC.getAttribute(VCFConstants.ALLELE_FREQUENCY_KEY), indexOfOriginalAlleleForNewAllele));
        }
        if (originalVC.hasAttribute(VCFConstants.ALLELE_NUMBER_KEY)) {
            builder.attribute(GATKVCFConstants.ORIGINAL_AN_KEY, originalVC.getAttribute(VCFConstants.ALLELE_NUMBER_KEY));
        }
    }

    VariantContextUtils.calculateChromosomeCounts(builder, false);

    if (keepOriginalDepth && originalVC.hasAttribute(VCFConstants.DEPTH_KEY)) {
        builder.attribute(GATKVCFConstants.ORIGINAL_DP_KEY, originalVC.getAttribute(VCFConstants.DEPTH_KEY));
    }

    boolean sawDP = false;
    int depth = 0;
    for (final String sample : selectedSampleNames ) {
        final Genotype g = originalVC.getGenotype(sample);
        if (!g.isFiltered()) {
            if (g.hasDP()) {
                depth += g.getDP();
                sawDP = true;
            }
        }
    }

    if (sawDP) {
        builder.attribute(VCFConstants.DEPTH_KEY, depth);
    }
}