Java Code Examples for htsjdk.variant.variantcontext.VariantContext#getStart()
The following examples show how to use
htsjdk.variant.variantcontext.VariantContext#getStart() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SegmentedCpxVariantSimpleVariantExtractor.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
@VisibleForTesting static boolean deletionConsistencyCheck(final VariantContext simple, final Set<SimpleInterval> missingSegments) { if (missingSegments.isEmpty()) return false; final SimpleInterval deletedRange = new SimpleInterval(simple.getContig(), simple.getStart() + 1, simple.getEnd()); // dummy number for chr to be used in constructing SVInterval, since 2 input AI's both map to the same chr by this point final int dummyChr = 0; final SVInterval intervalOne = new SVInterval(dummyChr, deletedRange.getStart() - 1, deletedRange.getEnd()); for (final SimpleInterval missing : missingSegments) { if ( ! missing.overlaps(deletedRange) ) return false; final SVInterval intervalTwo = new SVInterval(dummyChr, missing.getStart() - 1, missing.getEnd()); // allow 1-base fuzziness from either end if ( Math.abs(missing.size() - deletedRange.size()) > 2 ) return false; if( 2 >= Math.abs( Math.min(missing.size(), deletedRange.size()) - intervalTwo.overlapLen(intervalOne) ) ){ return true; } } return false; }
Example 2
Source File: KataegisQueue.java From hmftools with GNU General Public License v3.0 | 6 votes |
@NotNull private KataegisWindow longestViableWindow(@NotNull final VariantContext first) { KataegisWindow result = new KataegisWindow(first); final KataegisWindow window = new KataegisWindow(first); for (VariantContext context : buffer) { if (context.getStart() - window.end() > MAX_ABS_DISTANCE) { return result; } if (candidate.test(context)) { window.add(context); } if (window.isViable(MIN_COUNT, MAX_AVG_DISTANCE)) { result = new KataegisWindow(window); } } return result; }
Example 3
Source File: KataegisQueue.java From hmftools with GNU General Public License v3.0 | 5 votes |
private void processFirstContext() { if (!buffer.isEmpty()) { final VariantContext first = buffer.peekFirst(); if (!candidate.test(first)) { consumer.accept(buffer.pollFirst()); } else { final KataegisWindow window = longestViableWindow(first); final boolean isWindowViable = window.isViable(MIN_COUNT, MAX_AVG_DISTANCE); if (isWindowViable) { identifier++; } while (!buffer.isEmpty()) { final VariantContext peek = buffer.peekFirst(); if (peek.getStart() > window.end()) { return; } if (isWindowViable && candidate.test(peek)) { peek.getCommonInfo().putAttribute(KATAEGIS_FLAG, idPrefix + "_" + identifier, true); } consumer.accept(buffer.pollFirst()); } } } }
Example 4
Source File: SegmentedCpxVariantSimpleVariantExtractor.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
private AnnotatedInterval(final VariantContext vc) { sourceVC = vc; interval = new SimpleInterval( vc.getContig(), vc.getStart(), vc.getEnd()); id = vc.getID(); type = vc.getAttributeAsString(SVTYPE, ""); svlen = vc.getAttributeAsInt(SVLEN, 0); alleles = vc.getAlleles(); }
Example 5
Source File: StrelkaPostProcess.java From hmftools with GNU General Public License v3.0 | 5 votes |
@VisibleForTesting static GenomePosition variantGenomePosition(@NotNull final VariantContext variant) { return new GenomePosition() { @Override @NotNull public String chromosome() { return variant.getContig(); } @Override public long position() { return variant.getStart(); } }; }
Example 6
Source File: ArHetvarFilter.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
private boolean variantContextsMatch(VariantContext v1, VariantContext v2) { return v1.getContig().equals(v2.getContig()) && v1.getStart() == v2.getStart() && v1.getEnd() == v2.getEnd() && v1.getReference() == v2.getReference() && v1.getAlternateAlleles().size() == v2.getAlternateAlleles().size() && v1.getAlternateAlleles().containsAll(v2.getAlternateAlleles()); }
Example 7
Source File: PileupSummary.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
public PileupSummary(final VariantContext vc, final ReadPileup pileup) { contig = vc.getContig(); position = vc.getStart(); alleleFrequency = vc.getAttributeAsDouble(VCFConstants.ALLELE_FREQUENCY_KEY, 0); final byte altBase = vc.getAlternateAllele(0).getBases()[0]; final byte refBase = vc.getReference().getBases()[0]; final int[] baseCounts = pileup.getBaseCounts(); altCount = baseCounts[BaseUtils.simpleBaseToBaseIndex(altBase)]; refCount = baseCounts[BaseUtils.simpleBaseToBaseIndex(refBase)]; totalCount = (int) MathUtils.sum(baseCounts); otherAltsCount = totalCount - altCount - refCount; }
Example 8
Source File: SVVCFReader.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
public static SVIntervalTree<String> readBreakpointsFromTruthVCF(final String truthVCF, final SAMSequenceDictionary dictionary, final int padding ) { SVIntervalTree<String> breakpoints = new SVIntervalTree<>(); try ( final FeatureDataSource<VariantContext> dataSource = new FeatureDataSource<>(truthVCF, null, 0, VariantContext.class) ) { for ( final VariantContext vc : dataSource ) { final StructuralVariantType svType = vc.getStructuralVariantType(); if ( svType == null ) continue; final String eventName = vc.getID(); final int contigID = dictionary.getSequenceIndex(vc.getContig()); if ( contigID < 0 ) { throw new UserException("VCF contig " + vc.getContig() + " does not appear in dictionary."); } final int start = vc.getStart(); switch ( svType ) { case DEL: case INV: case CNV: final int end = vc.getEnd(); breakpoints.put(new SVInterval(contigID,start-padding, end+padding), eventName); break; case INS: case DUP: case BND: breakpoints.put(new SVInterval(contigID,start-padding, start+padding), eventName); break; } } } return breakpoints; }
Example 9
Source File: LiftoverVcfTest.java From picard with MIT License | 5 votes |
@Test(dataProvider = "indelFlipData") public void testFlipIndel(final VariantContext source, final ReferenceSequence reference, final VariantContext result) { final LiftOver liftOver = new LiftOver(CHAIN_FILE); final Interval originalLocus = new Interval(source.getContig(), source.getStart(), source.getEnd()); final Interval target = liftOver.liftOver(originalLocus); if (target != null && !target.isNegativeStrand()) { throw new RuntimeException("not reversed"); } final VariantContext flipped = LiftoverUtils.liftVariant(source, target, reference, false, false); VcfTestUtils.assertEquals(flipped, result); }
Example 10
Source File: KataegisWindow.java From hmftools with GNU General Public License v3.0 | 4 votes |
KataegisWindow(final VariantContext context) { this.contig = context.getContig(); this.start = context.getStart(); this.end = this.start; this.count = 0; }
Example 11
Source File: FuncotatorEngineUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 4 votes |
@Test(dataProvider = "provideGt") public void testGetFuncotationFactoriesAndCreateFuncotationMapForVariant(final File vcfFile, final List<String> correspondingGeneName, final boolean[] hasClinvarHit) { final Pair<VCFHeader, List<VariantContext>> entireVcf = VariantContextTestUtils.readEntireVCFIntoMemory(vcfFile.getAbsolutePath()); final Map<Path, Properties> configData = DataSourceUtils.getAndValidateDataSourcesFromPaths("hg19", Collections.singletonList(DS_PIK3CA_DIR)); final Pair<VCFHeader, List<VariantContext>> vcfFileContents = VariantContextTestUtils.readEntireVCFIntoMemory(vcfFile.getAbsolutePath()); // Set up our arguments: final FuncotatorVariantArgumentCollection funcotatorArguments = new FuncotatorVariantArgumentCollection(); funcotatorArguments.referenceVersion = BaseFuncotatorArgumentCollection.FuncotatorReferenceVersionHg19; funcotatorArguments.transcriptSelectionMode = TranscriptSelectionMode.CANONICAL; funcotatorArguments.lookaheadFeatureCachingInBp = FuncotatorArgumentDefinitions.LOOKAHEAD_CACHE_IN_BP_DEFAULT_VALUE; // Create the metadata directly from the input. final FuncotatorEngine funcotatorEngine = new FuncotatorEngine( funcotatorArguments, vcfFileContents.getLeft().getSequenceDictionary(), VcfFuncotationMetadata.create(new ArrayList<>(entireVcf.getLeft().getInfoHeaderLines())), DataSourceUtils.createDataSourceFuncotationFactoriesForDataSources( configData, new LinkedHashMap<>(), TranscriptSelectionMode.CANONICAL, new HashSet<>(), new DummyPlaceholderGatkTool(), FuncotatorArgumentDefinitions.LOOKAHEAD_CACHE_IN_BP_DEFAULT_VALUE, new FlankSettings(0, 0), false, FuncotatorUtils.DEFAULT_MIN_NUM_BASES_FOR_VALID_SEGMENT) ); for (int i = 0; i < entireVcf.getRight().size(); i++) { final VariantContext vc = entireVcf.getRight().get(i); final SimpleInterval variantInterval = new SimpleInterval(vc.getContig(), vc.getStart(), vc.getEnd()); final ReferenceContext referenceContext = new ReferenceContext(ReferenceDataSource.of(Paths.get(FuncotatorReferenceTestUtils.retrieveHg19Chr3Ref())), variantInterval); final FeatureContext featureContext = FuncotatorTestUtils.createFeatureContext(funcotatorEngine.getFuncotationFactories(), "TEST", variantInterval, 0,0,0, null); final FuncotationMap funcotationMap = funcotatorEngine.createFuncotationMapForVariant(vc, referenceContext, featureContext); // Check that all of the transcripts at this location have the same gene name as the corresponding gene. // The ground truth selected has the same gene name for all transcripts. // Also, input VCF has no multiallelics. for (final String txId : funcotationMap.getTranscriptList()) { Assert.assertEquals(funcotationMap.getFieldValue(txId, "Gencode_19_hugoSymbol", vc.getAlternateAllele(0)), correspondingGeneName.get(i)); Assert.assertTrue((funcotationMap.getFieldValue(txId, "dummy_ClinVar_VCF_ALLELEID", vc.getAlternateAllele(0)).isEmpty()) != hasClinvarHit[i]); } } }
Example 12
Source File: CosmicFuncotationFactory.java From gatk with BSD 3-Clause "New" or "Revised" License | 4 votes |
@Override protected List<Funcotation> createFuncotationsOnVariant(final VariantContext variant, final ReferenceContext referenceContext, final List<Feature> featureList, final List<GencodeFuncotation> gencodeFuncotations) { final List<Funcotation> outputFuncotations = new ArrayList<>(); // Keep count of each overlapping mutation here: final Map<String, Integer> proteinChangeCounts = new LinkedHashMap<>(); // If we have gencodeFuncotations we go through them and get the gene name // Then query our DB for matches on the gene name. // Then grab Genome position / Protein position and see if we overlap. // If any do, we create our CosmicFuncotation for ( final GencodeFuncotation gencodeFuncotation : gencodeFuncotations ) { final String geneName = gencodeFuncotation.getHugoSymbol(); final SimpleInterval genomePosition = new SimpleInterval(variant.getContig(), variant.getStart(), variant.getEnd()); final SimpleInterval proteinPosition; if ( gencodeFuncotation.getProteinChange() != null ) { proteinPosition = parseProteinString(gencodeFuncotation.getProteinChange()); } else { proteinPosition = null; } try { try ( final Statement statement = dbConnection.createStatement() ) { try ( final ResultSet resultSet = statement.executeQuery(RESULT_QUERY_TEMPLATE + "\"" + geneName + "\";") ) { // iterate through our results: while ( resultSet.next() ) { // Get the genome position: final SimpleInterval cosmicGenomePosition = getGenomePositionFromResults(resultSet); // Try to match on genome position first: if ( cosmicGenomePosition != null ) { if ( genomePosition.overlaps(cosmicGenomePosition) ) { // If we overlap the records, we get the protein change and add it to the map: updateProteinChangeCountMap(proteinChangeCounts, resultSet); continue; } } // Get the protein position: final SimpleInterval cosmicProteinPosition = getProteinPositionFromResults(resultSet); // Now try to match on protein position: if ( proteinPosition != null ) { // If we overlap the records, we update the counter: if ( proteinPosition.overlaps(cosmicProteinPosition) ) { updateProteinChangeCountMap(proteinChangeCounts, resultSet); } } // NOTE: We can't annotate if the protein position is null. } } } } catch (final SQLException ex) { throw new GATKException("Unable to query the database for geneName: " + geneName, ex); } } // Add our counts to all alternate alleles in this variant: for ( final Allele altAllele : variant.getAlternateAlleles() ) { outputFuncotations.add( TableFuncotation.create( new ArrayList<>(supportedFields), Collections.singletonList(proteinChangeCounts.entrySet().stream() .map(entry -> entry.getKey() + '('+ entry.getValue() + ')') .collect(Collectors.joining("|"))), altAllele, name, null ) ); } return outputFuncotations; }
Example 13
Source File: CreateSnpIntervalFromVcf.java From Drop-seq with MIT License | 4 votes |
public IntervalList processData(final File vcfFile, final File sdFile, final Set<String> sample, int GQThreshold, final boolean hetSNPsOnly) { final VCFFileReader reader = new VCFFileReader(vcfFile, false); if (!VCFUtils.GQInHeader(reader)) { GQThreshold=-1; log.info("Genotype Quality [GQ] not found in header. Disabling GQ_THRESHOLD parameter"); } final VCFHeader inputVcfHeader = new VCFHeader(reader.getFileHeader().getMetaDataInInputOrder()); SAMSequenceDictionary sequenceDictionary = inputVcfHeader.getSequenceDictionary(); Set<String> sampleListFinal = sample; if (sample==null || sample.isEmpty()) { ArrayList<String> s = reader.getFileHeader().getSampleNamesInOrder(); sampleListFinal=new TreeSet<String>(s); } if (sdFile != null) sequenceDictionary = getSequenceDictionary(sdFile); final ProgressLogger progress = new ProgressLogger(this.log, 500000); final SAMFileHeader samHeader = new SAMFileHeader(); samHeader.setSequenceDictionary(sequenceDictionary); IntervalList result = new IntervalList(samHeader); // Go through the input, find sites we want to keep. final PeekableIterator<VariantContext> iterator = new PeekableIterator<>(reader.iterator()); validateRequestedSamples (iterator, sampleListFinal); while (iterator.hasNext()) { final VariantContext site = iterator.next(); progress.record(site.getContig(), site.getStart()); // for now drop any filtered site. if (site.isFiltered()) continue; // move onto the next record if the site is not a SNP or the samples aren't all heterozygous. if (!site.isSNP()) continue; if (!sitePassesFilters(site, sampleListFinal, GQThreshold, hetSNPsOnly)) continue; Interval varInt = new Interval(site.getContig(), site.getStart(), site.getEnd(), true, site.getID()); // final Interval site = findHeterozygousSites(full, SAMPLE); result.add(varInt); } CloserUtil.close(iterator); CloserUtil.close(reader); return (result); }
Example 14
Source File: HotspotEnrichment.java From hmftools with GNU General Public License v3.0 | 4 votes |
private static boolean exactMatch(@NotNull final VariantHotspot hotspot, @NotNull final VariantContext variant) { return hotspot.position() == variant.getStart() && hotspot.ref().equals(variant.getReference().getBaseString()) && variant.getAlternateAlleles().stream().map(Allele::getBaseString).collect(Collectors.toList()).contains(hotspot.alt()); }
Example 15
Source File: ReadPosRankSumTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 4 votes |
@Override public boolean isUsableRead(final GATKRead read, final VariantContext vc) { Utils.nonNull(read); // we use vc.getEnd() + 1 in case of a leading indel -- if this isn't relevant getReadPosition will return empty return super.isUsableRead(read, vc) && read.getSoftStart() <= vc.getEnd() + 1 && read.getSoftEnd() >= vc.getStart(); }
Example 16
Source File: CombineGVCFsIntegrationTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 4 votes |
@Test public void testCombineSomaticGvcfs() throws Exception { final File output = createTempFile("combinegvcfs", ".vcf"); final ArgumentsBuilder args = new ArgumentsBuilder(); args.addReference(new File(b37Reference)).addOutput(output) .addVCF(getTestFile("NA12878.MT.filtered.g.vcf")) .addVCF(getTestFile("NA19240.MT.filtered.g.vcf")) .add(CombineGVCFs.SOMATIC_INPUT_LONG_NAME, true); runCommandLine(args); final List<VariantContext> actualVC = getVariantContexts(output); for (final VariantContext vc : actualVC) { if (vc.getAlternateAlleles().size() > 1) { //if there's a real ALT Assert.assertTrue(vc.filtersWereApplied()); //filtering should happen during combine } else { Assert.assertFalse(vc.filtersWereApplied()); } //MT:302 has an alphabet soup of alleles in the GVCF if (vc.getStart() == 302) { Assert.assertEquals(vc.getNAlleles(), 9); } //make sure phasing is retained if (vc.getStart() == 317 || vc.getStart() == 320) { VariantContextTestUtils.assertGenotypeIsPhasedWithAttributes(vc.getGenotype(1)); } //MT:4769 in combined GVCF has uncalled alleles, but we should keep them around if (vc.getStart() == 4769) { Assert.assertEquals(vc.getNAlleles(), 4); Assert.assertTrue(vc.getAlternateAlleles().contains(Allele.create("G", false))); Assert.assertTrue(vc.getAlternateAlleles().contains(Allele.create("T", false))); Assert.assertTrue(vc.getAlternateAlleles().contains(Allele.NON_REF_ALLELE)); } //check genotype filtering if (vc.getStart() == 14872) { Assert.assertTrue(!vc.isFiltered()); Assert.assertTrue(!vc.getGenotype(0).isFiltered()); } } final List<VariantContext> expectedVC = getVariantContexts(getTestFile("twoSamples.MT.g.vcf")); final VCFHeader header = getHeaderFromFile(output); assertForEachElementInLists(actualVC, expectedVC, (a, e) -> VariantContextTestUtils.assertVariantContextsAreEqualAlleleOrderIndependent(a, e, Arrays.asList(), Collections.emptyList(), header)); }
Example 17
Source File: CustomMafFuncotationCreatorUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 4 votes |
/** * As a side effect, this also tests (https://github.com/broadinstitute/gatk/issues/4972) */ @Test(dataProvider = "provideDbSnpVariants") public void testCreateDbSnpCustomFields(final VariantContext variant, final int gtNumHits, final String gtDbSnpValStatusField) { final Path sourceFilePath = IOUtils.getPath(FuncotatorTestConstants.DBSNP_HG19_SNIPPET_FILE_PATH); final DataSourceFuncotationFactory vcfFuncotationFactory = new VcfFuncotationFactory(DBSNP_DS_NAME, "snippetTest", sourceFilePath, new LinkedHashMap<>(), new FeatureInput<VariantContext>(sourceFilePath.toString(), DBSNP_DS_NAME, new HashMap<>()) ); /* dbSNP records of relevance. 1 10177 rs367896724 A AC . . RS=367896724;RSPOS=10177;dbSNPBuildID=138;SSR=0;SAO=0;VP=0x050000020005170026000200;GENEINFO=DDX11L1:100287102;WGT=1;VC=DIV;R5;ASP;VLD;G5A;G5;KGPhase3;CAF=0.5747,0.4253;COMMON=1 1 10352 rs555500075 T TA . . RS=555500075;RSPOS=10352;dbSNPBuildID=142;SSR=0;SAO=0;VP=0x050000020005170026000200;GENEINFO=DDX11L1:100287102;WGT=1;VC=DIV;R5;ASP;VLD;G5A;G5;KGPhase3;CAF=0.5625,0.4375;COMMON=1 1 10352 rs145072688 T TA . . RS=145072688;RSPOS=10353;dbSNPBuildID=134;SSR=0;SAO=0;VP=0x050000020005000002000200;GENEINFO=DDX11L1:100287102;WGT=1;VC=DIV;R5;ASP;CAF=0.5625,0.4375;COMMON=1 */ // Create features from the file: final List<Feature> vcfFeatures; try (final VCFFileReader vcfReader = new VCFFileReader(IOUtils.getPath(FuncotatorTestConstants.DBSNP_HG19_SNIPPET_FILE_PATH))) { vcfFeatures = vcfReader.query(variant.getContig(), variant.getStart(), variant.getEnd()).stream().collect(Collectors.toList()); } Assert.assertEquals(vcfFeatures.size(), gtNumHits); final Map<String, List<Feature>> vcfFuncotationSourceMap = ImmutableMap.of(DBSNP_DS_NAME, vcfFeatures); final ReferenceContext referenceContext = new ReferenceContext(ReferenceDataSource.of(Paths.get(HG19_CHR1_1M_FASTA)), new SimpleInterval(variant.getContig(), variant.getStart(), variant.getEnd())); final FeatureContext featureContext = FuncotatorTestUtils.createFeatureContext(Collections.singletonList(vcfFuncotationFactory), "TEST_CREATE_DB_SNP_CUSTOM_FIELDS", new SimpleInterval(variant.getContig(), variant.getStart(), variant.getEnd()), 0, 0, 0, null); final List<Funcotation> funcotations = vcfFuncotationFactory.createFuncotations(variant, referenceContext, featureContext); Assert.assertTrue(funcotations.size() > 0); for (final Funcotation f : funcotations) { Assert.assertEquals(StringUtils.split(f.getField(DBSNP_DS_NAME + "_VLD"), "|").length, vcfFuncotationSourceMap.get(DBSNP_DS_NAME).size()); } final List<Funcotation> customDbSnpFuncotations = CustomMafFuncotationCreator.createCustomMafDbSnpFields(funcotations); Assert.assertEquals(customDbSnpFuncotations.stream().map(f -> f.getField(MAF_DBSNP_VAL_STATUS_FIELD)).collect(Collectors.toList()), Collections.singletonList(gtDbSnpValStatusField)); // Now add some dummy (non-DbSNP) funcotations and make sure that we are not getting any additional custom dbsnp maf fields. final List<String> dummyFieldNames = Arrays.asList("foo_field", "bar_field"); final Funcotation dummyFuncotation = TableFuncotation.create(dummyFieldNames, Arrays.asList("1", "2"), Allele.create("AA"), "DUMMY", FuncotationMetadataUtils.createWithUnknownAttributes(dummyFieldNames)); funcotations.add(dummyFuncotation); final List<Funcotation> customDbSnpFuncotationsWithoutDummies = CustomMafFuncotationCreator.createCustomMafDbSnpFields(funcotations); Assert.assertEquals(customDbSnpFuncotationsWithoutDummies, customDbSnpFuncotations); }
Example 18
Source File: MNVRegionValidator.java From hmftools with GNU General Public License v3.0 | 4 votes |
private boolean containsAllMNVPositions(@NotNull final SAMRecord record) { final VariantContext lastVariant = region().variants().get(region().variants().size() - 1); return record.getAlignmentStart() <= region().start() && record.getAlignmentEnd() >= lastVariant.getStart() + lastVariant.getReference().length() - 1; }
Example 19
Source File: TandemRepeat.java From gatk with BSD 3-Clause "New" or "Revised" License | 4 votes |
public static Pair<List<Integer>, byte[]> getNumTandemRepeatUnits(final ReferenceContext ref, final VariantContext vc) { final byte[] refBases = ref.getBases(); final int startIndex = vc.getStart() + 1 - ref.getWindow().getStart(); // +1 to exclude leading match base common to VC's ref and alt alleles return GATKVariantContextUtils.getNumTandemRepeatUnits(vc, Arrays.copyOfRange(refBases, startIndex, refBases.length)); }