Java Code Examples for htsjdk.variant.vcf.VCFHeader#getGenotypeSamples()
The following examples show how to use
htsjdk.variant.vcf.VCFHeader#getGenotypeSamples() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: FilterAlignmentArtifacts.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Override public void onTraversalStart() { realignmentEngine = new RealignmentEngine(realignmentArgumentCollection); vcfWriter = createVCFWriter(new File(outputVcf)); final VCFHeader inputHeader = getHeaderForVariants(); final Set<VCFHeaderLine> headerLines = new HashSet<>(inputHeader.getMetaDataInSortedOrder()); headerLines.add(GATKVCFHeaderLines.getFilterLine(GATKVCFConstants.ALIGNMENT_ARTIFACT_FILTER_NAME)); headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.UNITIG_SIZES_KEY)); headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.ALIGNMENT_SCORE_DIFFERENCE_KEY)); headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.JOINT_ALIGNMENT_COUNT_KEY)); headerLines.addAll(getDefaultToolVCFHeaderLines()); final VCFHeader vcfHeader = new VCFHeader(headerLines, inputHeader.getGenotypeSamples()); vcfWriter.writeHeader(vcfHeader); bamHeader = getHeaderForReads(); samplesList = new IndexedSampleList(new ArrayList<>(ReadUtils.getSamplesFromHeader(bamHeader))); referenceReader = AssemblyBasedCallerUtils.createReferenceReader(Utils.nonNull(referenceArguments.getReferenceSpecifier())); assemblyEngine = MTAC.createReadThreadingAssembler(); likelihoodCalculationEngine = AssemblyBasedCallerUtils.createLikelihoodCalculationEngine(MTAC.likelihoodArgs); haplotypeBAMWriter = bamOutputPath == null ? Optional.empty() : Optional.of(new HaplotypeBAMWriter(HaplotypeBAMWriter.WriterType.ALL_POSSIBLE_HAPLOTYPES, IOUtils.getPath(bamOutputPath), true, false, getHeaderForSAMWriter())); }
Example 2
Source File: GnarlyGenotyper.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Override public void onTraversalStart() { final VCFHeader inputVCFHeader = getHeaderForVariants(); if(onlyOutputCallsStartingInIntervals) { if( !intervalArgumentCollection.intervalsSpecified()) { throw new CommandLineException.MissingArgument("-L or -XL", "Intervals are required if --" + GenotypeGVCFs.ONLY_OUTPUT_CALLS_STARTING_IN_INTERVALS_FULL_NAME + " was specified."); } } intervals = intervalArgumentCollection.intervalsSpecified() ? intervalArgumentCollection.getIntervals(getBestAvailableSequenceDictionary()) : Collections.emptyList(); final SampleList samples = new IndexedSampleList(inputVCFHeader.getGenotypeSamples()); setupVCFWriter(inputVCFHeader, samples); genotyperEngine = new GnarlyGenotyperEngine(keepAllSites, genotypeArgs.MAX_ALTERNATE_ALLELES, SUMMARIZE_PLs, stripASAnnotations); Reflections reflections = new Reflections("org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific"); //not InfoFieldAnnotation.class because we don't want AS_InbreedingCoeff allAlleleSpecificAnnotations.addAll(reflections.getSubTypesOf(AS_StrandBiasTest.class)); allAlleleSpecificAnnotations.addAll(reflections.getSubTypesOf(AS_RankSumTest.class)); allAlleleSpecificAnnotations.add(AS_RMSMappingQuality.class); allAlleleSpecificAnnotations.add(AS_QualByDepth.class); }
Example 3
Source File: AnnotateVcfWithExpectedAlleleFraction.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Override public void onTraversalStart() { final VCFHeader inputHeader = getHeaderForVariants(); final Set<VCFHeaderLine> headerLines = new HashSet<>(inputHeader.getMetaDataInSortedOrder()); headerLines.add(new VCFInfoHeaderLine(EXPECTED_ALLELE_FRACTION_NAME, 1, VCFHeaderLineType.Float, "expected allele fraction in pooled bam")); final VCFHeader vcfHeader = new VCFHeader(headerLines, inputHeader.getGenotypeSamples()); headerLines.addAll(getDefaultToolVCFHeaderLines()); vcfWriter = createVCFWriter(outputVcf); vcfWriter.writeHeader(vcfHeader); final List<MixingFraction> mixingFractionsList = MixingFraction.readMixingFractions(inputMixingFractions); final Map<String, Double> mixingfractionsMap = mixingFractionsList.stream() .collect(Collectors.toMap(MixingFraction::getSample, MixingFraction::getMixingFraction)); mixingFractionsInSampleOrder = inputHeader.getSampleNamesInOrder().stream() .mapToDouble(mixingfractionsMap::get).toArray(); }
Example 4
Source File: AnnotateVcfWithExpectedAlleleFraction.java From gatk-protected with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Override public void onTraversalStart() { final VCFHeader inputHeader = getHeaderForVariants(); final Set<VCFHeaderLine> headerLines = new HashSet<>(inputHeader.getMetaDataInSortedOrder()); headerLines.add(new VCFInfoHeaderLine(EXPECTED_ALLELE_FRACTION_NAME, 1, VCFHeaderLineType.Float, "expected allele fraction in pooled bam")); final VCFHeader vcfHeader = new VCFHeader(headerLines, inputHeader.getGenotypeSamples()); headerLines.addAll(getDefaultToolVCFHeaderLines()); vcfWriter = createVCFWriter(outputVcf); vcfWriter.writeHeader(vcfHeader); final List<MixingFraction> mixingFractionsList = MixingFraction.readMixingFractions(inputMixingFractions); final Map<String, Double> mixingfractionsMap = mixingFractionsList.stream() .collect(Collectors.toMap(MixingFraction::getSample, MixingFraction::getMixingFraction)); mixingFractionsInSampleOrder = inputHeader.getSampleNamesInOrder().stream() .mapToDouble(mixingfractionsMap::get).toArray(); }
Example 5
Source File: FilterMutectCalls.java From gatk-protected with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Override public void onTraversalStart() { final VCFHeader inputHeader = getHeaderForVariants(); final Set<VCFHeaderLine> headerLines = new HashSet<>(inputHeader.getMetaDataInSortedOrder()); Mutect2FilteringEngine.M_2_FILTER_NAMES.stream().map(GATKVCFHeaderLines::getFilterLine).forEach(headerLines::add); headerLines.add(new VCFFilterHeaderLine(Mutect2FilteringEngine.ARTIFACT_IN_NORMAL_FILTER_NAME, "artifact_in_normal")); headerLines.add(new VCFFilterHeaderLine(Mutect2FilteringEngine.MEDIAN_BASE_QUALITY_DIFFERENCE_FILTER_NAME, "ref - alt median base quality")); headerLines.add(new VCFFilterHeaderLine(Mutect2FilteringEngine.MEDIAN_MAPPING_QUALITY_DIFFERENCE_FILTER_NAME, "ref - alt median mapping quality")); headerLines.add(new VCFFilterHeaderLine(Mutect2FilteringEngine.MEDIAN_CLIPPING_DIFFERENCE_FILTER_NAME, "ref - alt median clipping")); headerLines.add(new VCFFilterHeaderLine(Mutect2FilteringEngine.MEDIAN_FRAGMENT_LENGTH_DIFFERENCE_FILTER_NAME, "abs(ref - alt) median fragment length")); headerLines.add(new VCFFilterHeaderLine(Mutect2FilteringEngine.READ_POSITION_FILTER_NAME, "median distance of alt variants from end of reads")); headerLines.add(new VCFFilterHeaderLine(Mutect2FilteringEngine.CONTAMINATION_FILTER_NAME, "contamination")); headerLines.addAll(getDefaultToolVCFHeaderLines()); final VCFHeader vcfHeader = new VCFHeader(headerLines, inputHeader.getGenotypeSamples()); vcfWriter = createVCFWriter(new File(outputVcf)); vcfWriter.writeHeader(vcfHeader); }
Example 6
Source File: GvcfMetricAccumulator.java From picard with MIT License | 5 votes |
@Override public void setup(final VCFHeader vcfHeader) { final List<String> samples = vcfHeader.getGenotypeSamples(); if (samples == null || samples.size() != 1) { throw new IllegalArgumentException("Expected to have exactly 1 sample in a GVCF, found " + ((samples == null) ? "0" : samples.size())); } sample = samples.get(0); }
Example 7
Source File: FilterMutectCalls.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Override public void onTraversalStart() { Utils.resetRandomGenerator(); final VCFHeader inputHeader = getHeaderForVariants(); final Set<VCFHeaderLine> headerLines = inputHeader.getMetaDataInSortedOrder().stream() .filter(line -> !line.getKey().equals(FILTERING_STATUS_VCF_KEY)) //remove header line from Mutect2 stating that calls are unfiltered. .collect(Collectors.toSet()); headerLines.add(new VCFHeaderLine(FILTERING_STATUS_VCF_KEY, "These calls have been filtered by " + FilterMutectCalls.class.getSimpleName() + " to label false positives with a list of failed filters and true positives with PASS.")); // all possible filters, even allele specific (since they can apply to the site as well GATKVCFConstants.MUTECT_FILTER_NAMES.stream().map(GATKVCFHeaderLines::getFilterLine).forEach(headerLines::add); // these are the possible allele specific filters which will be in the INFO section // when all relevant alleles (non-symbolic, etc) are filtered, the filter will be applied to the site level filter also GATKVCFConstants.MUTECT_AS_FILTER_NAMES.stream().map(GATKVCFHeaderLines::getInfoLine).forEach(headerLines::add); headerLines.addAll(getDefaultToolVCFHeaderLines()); final VCFHeader vcfHeader = new VCFHeader(headerLines, inputHeader.getGenotypeSamples()); vcfWriter = createVCFWriter(new File(outputVcf)); vcfWriter.writeHeader(vcfHeader); final File mutect2StatsTable = new File(statsTable == null ? drivingVariantFile + Mutect2.DEFAULT_STATS_EXTENSION : statsTable); filteringEngine = new Mutect2FilteringEngine(MTFAC, vcfHeader, mutect2StatsTable); if (!mutect2StatsTable.exists()) { throw new UserException.CouldNotReadInputFile("Mutect stats table " + mutect2StatsTable + " not found. When Mutect2 outputs a file calls.vcf it also creates" + " a calls.vcf" + Mutect2.DEFAULT_STATS_EXTENSION + " file. Perhaps this file was not moved along with the vcf, or perhaps it was not delocalized from a" + " virtual machine while running in the cloud." ); } }
Example 8
Source File: UpdateVCFSequenceDictionary.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Override public void onTraversalStart() { VCFHeader inputHeader = getHeaderForVariants(); VCFHeader outputHeader = inputHeader == null ? new VCFHeader() : new VCFHeader(inputHeader.getMetaDataInInputOrder(), inputHeader.getGenotypeSamples()) ; getDefaultToolVCFHeaderLines().forEach(line -> outputHeader.addMetaDataLine(line)); sourceDictionary = getBestAvailableSequenceDictionary(); // If -replace is set, do not need to check the sequence dictionary for validity here -- it will still be // checked in our normal sequence dictionary validation. Warn and require opt-in via -replace if we're about to // clobber a valid sequence dictionary. Check the input file directly via the header rather than using the // engine, since it might dig one up from an index. if (!replace) { SAMSequenceDictionary oldDictionary = inputHeader == null ? null : inputHeader.getSequenceDictionary(); if (oldDictionary != null && !oldDictionary.getSequences().isEmpty()) { throw new CommandLineException.BadArgumentValue( String.format( "The input variant file %s already contains a sequence dictionary. " + "Use %s to force the dictionary to be replaced.", getDrivingVariantsFeatureInput().getName(), REPLACE_ARGUMENT_NAME ) ); } } outputHeader.setSequenceDictionary(sourceDictionary); vcfWriter = createVCFWriter(new File(outFile)); vcfWriter.writeHeader(outputHeader); }
Example 9
Source File: RemoveNearbyIndels.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Override public void onTraversalStart() { final VCFHeader inputHeader = getHeaderForVariants(); final VCFHeader vcfHeader = new VCFHeader(inputHeader.getMetaDataInSortedOrder(), inputHeader.getGenotypeSamples()); getDefaultToolVCFHeaderLines().forEach(vcfHeader::addMetaDataLine); vcfWriter = createVCFWriter(new File(outputVcf)); vcfWriter.writeHeader(vcfHeader); }
Example 10
Source File: LazyBCFGenotypesContext.java From Hadoop-BAM with MIT License | 5 votes |
@Override public void setHeader(VCFHeader header) { genoFieldDecoders = new BCF2GenotypeFieldDecoders(header); fieldDict = BCF2Utils.makeDictionary(header); builders = new GenotypeBuilder[header.getNGenotypeSamples()]; final List<String> genotypeSamples = header.getGenotypeSamples(); for (int i = 0; i < builders.length; ++i) builders[i] = new GenotypeBuilder(genotypeSamples.get(i)); sampleNamesInOrder = header.getSampleNamesInOrder(); sampleNameToOffset = header.getSampleNameToOffset(); }
Example 11
Source File: RemoveNearbyIndels.java From gatk-protected with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Override public void onTraversalStart() { final VCFHeader inputHeader = getHeaderForVariants(); final VCFHeader vcfHeader = new VCFHeader(inputHeader.getMetaDataInSortedOrder(), inputHeader.getGenotypeSamples()); getDefaultToolVCFHeaderLines().forEach(vcfHeader::addMetaDataLine); vcfWriter = createVCFWriter(new File(outputVcf)); vcfWriter.writeHeader(vcfHeader); }
Example 12
Source File: FastVCFFileReader.java From imputationserver with GNU Affero General Public License v3.0 | 5 votes |
public FastVCFFileReader(String vcfFilename) throws IOException { super(vcfFilename); // load header VCFFileReader reader = new VCFFileReader(new File(vcfFilename), false); VCFHeader header = reader.getFileHeader(); samples = header.getGenotypeSamples(); samplesCount = samples.size(); variantContext = new MinimalVariantContext(samplesCount); reader.close(); parser = new VCFLineParser(samplesCount); }
Example 13
Source File: SageHotspotAnnotation.java From hmftools with GNU General Public License v3.0 | 5 votes |
@NotNull private static VCFHeader generateOutputHeader(@NotNull final VCFHeader template, @NotNull final VCFHeader hotspotVCF) { final VCFHeader outputVCFHeader = new VCFHeader(template.getMetaDataInInputOrder(), template.getGenotypeSamples()); outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine(HOTSPOT_FLAG, 0, VCFHeaderLineType.Flag, HOTSPOT_DESCRIPTION)); outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine(NEAR_HOTSPOT_FLAG, 0, VCFHeaderLineType.Flag, NEAR_HOTSPOT_DESCRIPTION)); outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine(RECOVERED_FLAG, 0, VCFHeaderLineType.Flag, RECOVERED_FLAG_DESCRIPTION)); for (VCFInfoHeaderLine headerLine : hotspotVCF.getInfoHeaderLines()) { outputVCFHeader.addMetaDataLine(headerLine); } return outputVCFHeader; }
Example 14
Source File: StructuralVariantHeader.java From hmftools with GNU General Public License v3.0 | 5 votes |
@NotNull public static VCFHeader generateHeader(@NotNull final String purpleVersion, @NotNull final VCFHeader template) { final VCFHeader outputVCFHeader = new VCFHeader(template.getMetaDataInInputOrder(), template.getGenotypeSamples()); outputVCFHeader.addMetaDataLine(new VCFHeaderLine("purpleVersion", purpleVersion)); outputVCFHeader.addMetaDataLine(VCFStandardHeaderLines.getFormatLine("GT")); outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine(StructuralVariantFactory.RECOVERED, 0, VCFHeaderLineType.Flag, RECOVERED_DESC)); outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine(StructuralVariantFactory.INFERRED, 0, VCFHeaderLineType.Flag, INFERRED_DESC)); outputVCFHeader.addMetaDataLine(new VCFFilterHeaderLine(INFERRED, INFERRED_DESC)); outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine(StructuralVariantFactory.IMPRECISE, 0, VCFHeaderLineType.Flag, IMPRECISE_DESC)); outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine(CIPOS, 2, VCFHeaderLineType.Integer, CIPOS_DESC)); outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine(SVTYPE, 1, VCFHeaderLineType.String, SVTYPE_DESC)); outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine(PURPLE_AF_INFO, UNBOUNDED, VCFHeaderLineType.Float, PURPLE_AF_DESC)); outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine(PURPLE_CN_INFO, UNBOUNDED, VCFHeaderLineType.Float, PURPLE_CN_DESC)); outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine(RECOVERY_METHOD, 1, VCFHeaderLineType.String, RECOVERY_METHOD_DESC)); outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine(RECOVERY_FILTER, UNBOUNDED, VCFHeaderLineType.String, RECOVERY_FILTER_DESC)); outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine(PURPLE_JUNCTION_COPY_NUMBER_INFO, 1, VCFHeaderLineType.Float, PURPLE_JUNCTION_COPY_NUMBER_DESC)); outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine(PURPLE_CN_CHANGE_INFO, UNBOUNDED, VCFHeaderLineType.Float, PURPLE_CN_CHANGE_DESC)); return outputVCFHeader; }
Example 15
Source File: GatherVcfsCloud.java From gatk with BSD 3-Clause "New" or "Revised" License | 4 votes |
/** Validates that all headers contain the same set of genotyped samples and that files are in order by position of first record. */ private static void assertSameSamplesAndValidOrdering(final List<Path> inputFiles, final boolean disableContigOrderingCheck) { final VCFHeader firstHeader = getHeader(inputFiles.get(0)); final SAMSequenceDictionary dict = firstHeader.getSequenceDictionary(); if ( dict == null) { throw new UserException.BadInput("The first VCF specified is missing the required sequence dictionary. " + "This is required to perform validation. You can skip this validation " + "using --"+IGNORE_SAFETY_CHECKS_LONG_NAME +" but ignoring safety checks " + "can result in invalid output."); } final VariantContextComparator comparator = new VariantContextComparator(dict); final List<String> samples = firstHeader.getGenotypeSamples(); Path lastFile = null; VariantContext lastContext = null; for (final Path f : inputFiles) { final FeatureReader<VariantContext> in = getReaderFromVCFUri(f, 0); final VCFHeader header = (VCFHeader)in.getHeader(); dict.assertSameDictionary(header.getSequenceDictionary()); final List<String> theseSamples = header.getGenotypeSamples(); if (!samples.equals(theseSamples)) { final SortedSet<String> s1 = new TreeSet<>(samples); final SortedSet<String> s2 = new TreeSet<>(theseSamples); s1.removeAll(theseSamples); s2.removeAll(samples); throw new IllegalArgumentException("VCFs do not have identical sample lists." + " Samples unique to first file: " + s1 + ". Samples unique to " + f.toUri().toString() + ": " + s2 + "."); } try(final CloseableIterator<VariantContext> variantIterator = in.iterator()) { if (variantIterator.hasNext()) { final VariantContext currentContext = variantIterator.next(); if (lastContext != null) { if ( disableContigOrderingCheck ) { if ( lastContext.getContig().equals(currentContext.getContig()) && lastContext.getStart() >= currentContext.getStart() ) { throw new IllegalArgumentException( "First record in file " + f.toUri().toString() + " is not after first record in " + "previous file " + lastFile.toUri().toString()); } } else { if ( comparator.compare(lastContext, currentContext) >= 0 ) { throw new IllegalArgumentException( "First record in file " + f.toUri().toString() + " is not after first record in " + "previous file " + lastFile.toUri().toString()); } } } lastContext = currentContext; lastFile = f; } } catch (final IOException e) { throw new UserException.CouldNotReadInputFile(f, e.getMessage(), e); } CloserUtil.close(in); } }
Example 16
Source File: AnnotateStrelkaWithAllelicDepth.java From hmftools with GNU General Public License v3.0 | 4 votes |
@NotNull private VCFHeader generateOutputHeader(@NotNull final VCFHeader template) { final VCFHeader outputVCFHeader = new VCFHeader(template.getMetaDataInInputOrder(), template.getGenotypeSamples()); outputVCFHeader.addMetaDataLine(VCFStandardHeaderLines.getFormatLine("AD")); return outputVCFHeader; }
Example 17
Source File: VcfToVariant.java From genomewarp with Apache License 2.0 | 4 votes |
@VisibleForTesting static List<VariantCall> getCalls(VariantContext vc, VCFHeader header) { List<VariantCall> toReturn = new ArrayList<>(); for (String currSample : header.getGenotypeSamples()) { if (!vc.hasGenotype(currSample)) { continue; } Genotype currGenotype = vc.getGenotype(currSample); VariantCall.Builder vcBuilder = VariantCall.newBuilder(); vcBuilder.setCallSetName(currSample); // Get GT info. final Map<Allele, Integer> alleleStrings = buildAlleleMap(vc); vcBuilder.addGenotype(alleleStrings.get(currGenotype.getAllele(0))); for (int i = 1; i < currGenotype.getPloidy(); i++) { vcBuilder.addGenotype(alleleStrings.get(currGenotype.getAllele(i))); } // Set phasing (not applicable to haploid). if (currGenotype.isPhased() && currGenotype.getPloidy() > 1) { vcBuilder.setPhaseset("*"); } // Get rest of the genotype info. Map<String, ListValue> genotypeInfo = new HashMap<>(); // Set filters if (currGenotype.isFiltered()) { genotypeInfo.put(VCFConstants.GENOTYPE_FILTER_KEY, ListValue.newBuilder() .addValues(Value.newBuilder().setStringValue(currGenotype.getFilters()).build()) .build()); } for (final String field : vc.calcVCFGenotypeKeys(header)) { // We've already handled genotype if (field.equals(VCFConstants.GENOTYPE_KEY)) { continue; } ListValue.Builder listValueBuilder = ListValue.newBuilder(); if (field.equals(VCFConstants.GENOTYPE_FILTER_KEY)) { // This field has already been dealt with continue; } else { final IntGenotypeFieldAccessors.Accessor accessor = GENOTYPE_FIELD_ACCESSORS.getAccessor(field); if (accessor != null) { // The field is a default inline field. if (!parseInlineGenotypeFields(field, vcBuilder, listValueBuilder, accessor, currGenotype)) { continue; } } else { // Other field, we'll get type/other info from header. if (!parseOtherGenotypeFields(field, vc, listValueBuilder, currGenotype, header)) { continue; } } } genotypeInfo.put(field, listValueBuilder.build()); } vcBuilder.putAllInfo(genotypeInfo); toReturn.add(vcBuilder.build()); } return toReturn; }