htsjdk.variant.variantcontext.VariantContextComparator Java Examples
The following examples show how to use
htsjdk.variant.variantcontext.VariantContextComparator.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: MultiVariantDataSource.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
/** * Close any existing iterator, create a new iterator and update the local cached iterator reference. * @param iteratorFromSource function to retrieve individual iterator, to be applied to each data source * @return */ private Iterator<VariantContext> getMergedIteratorFromDataSources( final Function<FeatureDataSource<VariantContext>, Iterator<VariantContext>> iteratorFromSource) { // Tribble documentation states that having multiple iterators open simultaneously over the same FeatureReader // results in undefined behavior closeOpenIterationIfNecessary(); if (featureDataSources.size() > 1) { final List<CloseableIterator<VariantContext>> iterators = new ArrayList<>(featureDataSources.size()); featureDataSources.forEach(ds -> iterators.add(getCloseableIteratorWrapper(iteratorFromSource.apply((ds))))); final VariantContextComparator varComparator = new VariantContextComparator(getSequenceDictionary()); currentIterator = new MergingIterator<>(varComparator, iterators); } else { currentIterator = getCloseableIteratorWrapper(iteratorFromSource.apply(featureDataSources.get(0))); } return currentIterator; }
Example #2
Source File: AbstractVcfMergingClpTester.java From picard with MIT License | 6 votes |
/** * Make sure that the order of the output file is identical to the order * of the input files by iterating through the output, making sure that, * if the context is an indel (snp), the next genomic position in the indel * (snp) queue is the same. Also make sure that the context is in the order * specified by the input files. */ private void validateSnpAndIndelResults(final File output, final Queue<String> indelContigPositions, final Queue<String> snpContigPositions) { final VCFFileReader outputReader = new VCFFileReader(output, false); final VariantContextComparator outputComparator = outputReader.getFileHeader().getVCFRecordComparator(); VariantContext last = null; final CloseableIterator<VariantContext> iterator = outputReader.iterator(); while (iterator.hasNext()) { final VariantContext outputContext = iterator.next(); if (outputContext.isIndel()) Assert.assertEquals(getContigPosition(outputContext), indelContigPositions.poll()); if (outputContext.isSNP()) Assert.assertEquals(getContigPosition(outputContext), snpContigPositions.poll()); if (last != null) Assert.assertTrue(outputComparator.compare(last, outputContext) <= 0); last = outputContext; } iterator.close(); // We should have polled everything off the indel (snp) queues Assert.assertEquals(indelContigPositions.size(), 0); Assert.assertEquals(snpContigPositions.size(), 0); }
Example #3
Source File: SortVcfsTest.java From picard with MIT License | 6 votes |
/** * Checks the ordering and total number of variant context entries in the specified output VCF file. * Does NOT check explicitly that the VC genomic positions match exactly those from the inputs. We assume this behavior from other tests. * * @param output VCF file representing the output of SortVCF * @param expectedVariantContextCount the total number of variant context entries from all input files that were merged/sorted */ private void validateSortingResults(final File output, final int expectedVariantContextCount) { final VCFFileReader outputReader = new VCFFileReader(output, false); final VariantContextComparator outputComparator = outputReader.getFileHeader().getVCFRecordComparator(); VariantContext last = null; int variantContextCount = 0; final CloseableIterator<VariantContext> iterator = outputReader.iterator(); while (iterator.hasNext()) { final VariantContext outputContext = iterator.next(); if (last != null) Assert.assertTrue(outputComparator.compare(last, outputContext) <= 0); last = outputContext; variantContextCount++; } iterator.close(); Assert.assertEquals(variantContextCount, expectedVariantContextCount); }
Example #4
Source File: CreateSomaticPanelOfNormals.java From gatk-protected with BSD 3-Clause "New" or "Revised" License | 5 votes |
public Object doWork() { final List<File> inputVcfs = new ArrayList<>(vcfs); final Collection<CloseableIterator<VariantContext>> iterators = new ArrayList<>(inputVcfs.size()); final Collection<VCFHeader> headers = new HashSet<>(inputVcfs.size()); final VCFHeader headerOfFirstVcf = new VCFFileReader(inputVcfs.get(0), false).getFileHeader(); final SAMSequenceDictionary sequenceDictionary = headerOfFirstVcf.getSequenceDictionary(); final VariantContextComparator comparator = headerOfFirstVcf.getVCFRecordComparator(); for (final File vcf : inputVcfs) { final VCFFileReader reader = new VCFFileReader(vcf, false); iterators.add(reader.iterator()); final VCFHeader header = reader.getFileHeader(); Utils.validateArg(comparator.isCompatible(header.getContigLines()), () -> vcf.getAbsolutePath() + " has incompatible contigs."); headers.add(header); } final VariantContextWriter writer = GATKVariantContextUtils.createVCFWriter(outputVcf, sequenceDictionary, false, Options.INDEX_ON_THE_FLY); writer.writeHeader(new VCFHeader(VCFUtils.smartMergeHeaders(headers, false))); final MergingIterator<VariantContext> mergingIterator = new MergingIterator<>(comparator, iterators); SimpleInterval currentPosition = new SimpleInterval("FAKE", 1, 1); final List<VariantContext> variantsAtThisPosition = new ArrayList<>(20); while (mergingIterator.hasNext()) { final VariantContext vc = mergingIterator.next(); if (!currentPosition.overlaps(vc)) { processVariantsAtSamePosition(variantsAtThisPosition, writer); variantsAtThisPosition.clear(); currentPosition = new SimpleInterval(vc.getContig(), vc.getStart(), vc.getStart()); } variantsAtThisPosition.add(vc); } mergingIterator.close(); writer.close(); return "SUCCESS"; }
Example #5
Source File: AbstractConcordanceWalker.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Override protected final void onStartup() { super.onStartup(); initializeTruthVariantsIfNecessary(); evalVariants = new FeatureDataSource<>(new FeatureInput<>(evalVariantsFile, "eval"), CACHE_LOOKAHEAD, VariantContext.class, cloudPrefetchBuffer, cloudIndexPrefetchBuffer); if ( hasUserSuppliedIntervals() ) { truthVariants.setIntervalsForTraversal(userIntervals); evalVariants.setIntervalsForTraversal(userIntervals); } dict = getBestAvailableSequenceDictionary(); variantContextComparator = new VariantContextComparator(dict); }
Example #6
Source File: TestVCFRoundTrip.java From Hadoop-BAM with MIT License | 5 votes |
@Test public void testRoundTripWithMerge() throws Exception { Path vcfPath = new Path("file://" + testVCFFileName); // run a MR job to write out a VCF file Path outputPath = doMapReduce(vcfPath, false); // merge the output VCFHeader vcfHeader = VCFHeaderReader.readHeaderFrom(new SeekableFileStream(new File(testVCFFileName))); final File outFile = File.createTempFile("testVCFWriter", testVCFFileName.substring(testVCFFileName.lastIndexOf("."))); outFile.deleteOnExit(); VCFFileMerger.mergeParts(outputPath.toUri().toString(), outFile.toURI().toString(), vcfHeader); List<VariantContext> actualVariants = new ArrayList<>(); VCFFileReader vcfFileReaderActual = parseVcf(outFile); Iterators.addAll(actualVariants, vcfFileReaderActual.iterator()); // verify the output is the same as the input List<VariantContext> expectedVariants = new ArrayList<>(); VCFFileReader vcfFileReader = parseVcf(new File(testVCFFileName)); Iterators.addAll(expectedVariants, vcfFileReader.iterator()); // use a VariantContextComparator to check variants are equal VariantContextComparator vcfRecordComparator = vcfHeader.getVCFRecordComparator(); assertEquals(expectedVariants.size(), actualVariants.size()); for (int i = 0; i < expectedVariants.size(); i++) { assertEquals(0, vcfRecordComparator.compare(expectedVariants.get(i), actualVariants.get(i))); } }
Example #7
Source File: VariantContextComparatorTest.java From picard with MIT License | 5 votes |
@Test(expectedExceptions = IllegalArgumentException.class) public void testThrowsOnDuplicateContig() { final List<String> contigs = new ArrayList<String>(3); contigs.add("one"); contigs.add("two"); contigs.add("one"); new VariantContextComparator(contigs); }
Example #8
Source File: VariantContextComparatorTest.java From picard with MIT License | 5 votes |
@Test public void testCombinationTwo() { final VariantContext contextOne = buildVariantContext("source", "one", 150); final VariantContext contextTwo = buildVariantContext("source", "two", 100); final List<String> contigs = getOrderedContigList(contextOne, contextTwo); Assert.assertTrue(new VariantContextComparator(contigs).compare(contextOne, contextTwo) < 0); Assert.assertTrue(new VariantContextComparator(contigs).compare(contextTwo, contextOne) > 0); }
Example #9
Source File: VariantContextComparatorTest.java From picard with MIT License | 5 votes |
@Test public void testCombinationOne() { final VariantContext contextOne = buildVariantContext("source", "one", 100); final VariantContext contextTwo = buildVariantContext("source", "two", 150); final List<String> contigs = getOrderedContigList(contextOne, contextTwo); Assert.assertTrue(new VariantContextComparator(contigs).compare(contextOne, contextTwo) < 0); Assert.assertTrue(new VariantContextComparator(contigs).compare(contextTwo, contextOne) > 0); }
Example #10
Source File: VariantContextComparatorTest.java From picard with MIT License | 5 votes |
@Test public void testContigs() { final VariantContext contextOne = buildVariantContext("source", "one", 100); final VariantContext contextTwo = buildVariantContext("source", "two", 100); final List<String> contigs = getOrderedContigList(contextOne, contextTwo); Assert.assertTrue(new VariantContextComparator(contigs).compare(contextOne, contextTwo) < 0); Assert.assertTrue(new VariantContextComparator(contigs).compare(contextTwo, contextOne) > 0); }
Example #11
Source File: VariantContextComparatorTest.java From picard with MIT License | 5 votes |
@Test public void testPositions() { final VariantContext contextOne = buildVariantContext("source", "one", 100); final VariantContext contextTwo = buildVariantContext("source", "one", 150); final List<String> contigs = getOrderedContigList(contextOne, contextTwo); Assert.assertTrue(new VariantContextComparator(contigs).compare(contextOne, contextTwo) < 0); Assert.assertTrue(new VariantContextComparator(contigs).compare(contextTwo, contextOne) > 0); }
Example #12
Source File: PairedVariantSubContextIterator.java From picard with MIT License | 5 votes |
public PairedVariantSubContextIterator(final Iterator<VariantContext> leftIterator, final String leftSample, final Iterator<VariantContext> rightIterator, final String rightSample, final SAMSequenceDictionary dict) { this.leftIterator = new PeekableIterator<>(leftIterator); this.leftSample = leftSample; this.rightIterator = new PeekableIterator<>(rightIterator); this.rightSample = rightSample; this.comparator = new VariantContextComparator(dict); }
Example #13
Source File: FindMendelianViolations.java From picard with MIT License | 5 votes |
private void writeAllViolations(final MendelianViolationDetector.Result result) { if (VCF_DIR != null) { LOG.info(String.format("Writing family violation VCFs to %s/", VCF_DIR.getAbsolutePath())); final VariantContextComparator vcComparator = new VariantContextComparator(inputHeader.get().getContigLines()); final Set<VCFHeaderLine> headerLines = new LinkedHashSet<>(inputHeader.get().getMetaDataInInputOrder()); headerLines.add(new VCFInfoHeaderLine(MendelianViolationDetector.MENDELIAN_VIOLATION_KEY, 1, VCFHeaderLineType.String, "Type of mendelian violation.")); headerLines.add(new VCFInfoHeaderLine(MendelianViolationDetector.ORIGINAL_AC, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Original AC")); headerLines.add(new VCFInfoHeaderLine(MendelianViolationDetector.ORIGINAL_AF, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Original AF")); headerLines.add(new VCFInfoHeaderLine(MendelianViolationDetector.ORIGINAL_AN, 1, VCFHeaderLineType.Integer, "Original AN")); for (final PedFile.PedTrio trio : pedFile.get().values()) { final File outputFile = new File(VCF_DIR, IOUtil.makeFileNameSafe(trio.getFamilyId() + IOUtil.VCF_FILE_EXTENSION)); LOG.info(String.format("Writing %s violation VCF to %s", trio.getFamilyId(), outputFile.getAbsolutePath())); final VariantContextWriter out = new VariantContextWriterBuilder() .setOutputFile(outputFile) .unsetOption(INDEX_ON_THE_FLY) .build(); final VCFHeader newHeader = new VCFHeader(headerLines, CollectionUtil.makeList(trio.getMaternalId(), trio.getPaternalId(), trio.getIndividualId())); final TreeSet<VariantContext> orderedViolations = new TreeSet<>(vcComparator); orderedViolations.addAll(result.violations().get(trio.getFamilyId())); out.writeHeader(newHeader); orderedViolations.forEach(out::add); out.close(); } } }
Example #14
Source File: GtcToVcf.java From picard with MIT License | 5 votes |
@Override protected int doWork() { Sex fingerprintSex = getFingerprintSex(FINGERPRINT_GENOTYPES_VCF_FILE); String gtcGender = getGenderFromGtcFile(GENDER_GTC, ILLUMINA_BEAD_POOL_MANIFEST_FILE); try (InfiniumGTCFile infiniumGTCFile = new InfiniumGTCFile(INPUT, ILLUMINA_BEAD_POOL_MANIFEST_FILE); InfiniumEGTFile infiniumEGTFile = new InfiniumEGTFile(CLUSTER_FILE)) { final Build37ExtendedIlluminaManifest manifest = setupAndGetManifest(infiniumGTCFile); final VCFHeader vcfHeader = createVCFHeader(manifest, infiniumGTCFile, gtcGender, fingerprintSex, CLUSTER_FILE, REFERENCE_SEQUENCE, refSeq.getSequenceDictionary()); // Setup a collection that will sort contexts properly // Necessary because input GTC file is not sorted final SortingCollection<VariantContext> contexts = SortingCollection.newInstance( VariantContext.class, new VCFRecordCodec(vcfHeader), new VariantContextComparator(refSeq.getSequenceDictionary()), MAX_RECORDS_IN_RAM, TMP_DIR.stream().map(File::toPath).toArray(Path[]::new)); // fill the sorting collection fillContexts(contexts, infiniumGTCFile, manifest, infiniumEGTFile); writeVcf(contexts, OUTPUT, refSeq.getSequenceDictionary(), vcfHeader); return 0; } catch (IOException e) { throw new PicardException("Error processing GTC File: " + INPUT.getAbsolutePath(), e); } }
Example #15
Source File: AbstractConcordanceWalker.java From gatk-protected with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Override protected final void onStartup() { super.onStartup(); initializeTruthVariantsIfNecessary(); evalVariants = new FeatureDataSource<>(new FeatureInput<>(evalVariantsFile, "eval"), CACHE_LOOKAHEAD, VariantContext.class); if ( hasIntervals() ) { truthVariants.setIntervalsForTraversal(intervalsForTraversal); evalVariants.setIntervalsForTraversal(intervalsForTraversal); } dict = getBestAvailableSequenceDictionary(); variantContextComparator = new VariantContextComparator(dict); }
Example #16
Source File: VariantContextComparatorTest.java From picard with MIT License | 4 votes |
@Test public void testIdentical() { final VariantContext contextOne = buildVariantContext("source", "one", 100); final List<String> contigs = getOrderedContigList(contextOne); Assert.assertEquals(0, new VariantContextComparator(contigs).compare(contextOne, contextOne)); }
Example #17
Source File: MergeVcfs.java From picard with MIT License | 4 votes |
@Override protected int doWork() { final ProgressLogger progress = new ProgressLogger(log, 10000); final List<String> sampleList = new ArrayList<String>(); INPUT = IOUtil.unrollFiles(INPUT, IOUtil.VCF_EXTENSIONS); final Collection<CloseableIterator<VariantContext>> iteratorCollection = new ArrayList<CloseableIterator<VariantContext>>(INPUT.size()); final Collection<VCFHeader> headers = new HashSet<VCFHeader>(INPUT.size()); VariantContextComparator variantContextComparator = null; SAMSequenceDictionary sequenceDictionary = null; if (SEQUENCE_DICTIONARY != null) { sequenceDictionary = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(SEQUENCE_DICTIONARY).getFileHeader().getSequenceDictionary(); } for (final File file : INPUT) { IOUtil.assertFileIsReadable(file); final VCFFileReader fileReader = new VCFFileReader(file, false); final VCFHeader fileHeader = fileReader.getFileHeader(); if (fileHeader.getContigLines().isEmpty()) { if (sequenceDictionary == null) { throw new IllegalArgumentException(SEQ_DICT_REQUIRED); } else { fileHeader.setSequenceDictionary(sequenceDictionary); } } if (variantContextComparator == null) { variantContextComparator = fileHeader.getVCFRecordComparator(); } else { if (!variantContextComparator.isCompatible(fileHeader.getContigLines())) { throw new IllegalArgumentException( "The contig entries in input file " + file.getAbsolutePath() + " are not compatible with the others."); } } if (sequenceDictionary == null) sequenceDictionary = fileHeader.getSequenceDictionary(); if (sampleList.isEmpty()) { sampleList.addAll(fileHeader.getSampleNamesInOrder()); } else { if (!sampleList.equals(fileHeader.getSampleNamesInOrder())) { throw new IllegalArgumentException("Input file " + file.getAbsolutePath() + " has sample entries that don't match the other files."); } } // add comments in the first header if (headers.isEmpty()) { COMMENT.stream().forEach(C -> fileHeader.addMetaDataLine(new VCFHeaderLine("MergeVcfs.comment", C))); } headers.add(fileHeader); iteratorCollection.add(fileReader.iterator()); } if (CREATE_INDEX && sequenceDictionary == null) { throw new PicardException(String.format("Index creation failed. %s", SEQ_DICT_REQUIRED)); } final VariantContextWriterBuilder builder = new VariantContextWriterBuilder() .setOutputFile(OUTPUT) .setReferenceDictionary(sequenceDictionary); if (CREATE_INDEX) { builder.setOption(Options.INDEX_ON_THE_FLY); } else { builder.unsetOption(Options.INDEX_ON_THE_FLY); } final VariantContextWriter writer = builder.build(); writer.writeHeader(new VCFHeader(VCFUtils.smartMergeHeaders(headers, false), sampleList)); final MergingIterator<VariantContext> mergingIterator = new MergingIterator<VariantContext>(variantContextComparator, iteratorCollection); while (mergingIterator.hasNext()) { final VariantContext context = mergingIterator.next(); writer.add(context); progress.record(context.getContig(), context.getStart()); } CloserUtil.close(mergingIterator); writer.close(); return 0; }
Example #18
Source File: TestVCFRoundTrip.java From Hadoop-BAM with MIT License | 4 votes |
@Test public void testRoundTrip() throws Exception { Path vcfPath = new Path("file://" + testVCFFileName); // run a MR job to write out a VCF file Path outputPath = doMapReduce(vcfPath, true); // verify the output is the same as the input List<VariantContext> expectedVariants = new ArrayList<>(); VCFFileReader vcfFileReader = parseVcf(new File(testVCFFileName)); Iterators.addAll(expectedVariants, vcfFileReader.iterator()); int splits = 0; List<VariantContext> actualVariants = new ArrayList<>(); File[] vcfFiles = new File(outputPath.toUri()).listFiles( pathname -> (!pathname.getName().startsWith(".") && !pathname.getName().startsWith("_"))); Arrays.sort(vcfFiles); // ensure files are sorted by name for (File vcf : vcfFiles) { splits++; Iterators.addAll(actualVariants, parseVcf(vcf).iterator()); if (BGZFCodec.class.equals(codecClass)) { assertTrue(BlockCompressedInputStream.isValidFile( new BufferedInputStream(new FileInputStream(vcf)))); } else if (BGZFEnhancedGzipCodec.class.equals(codecClass)) { assertTrue(VCFFormat.isGzip( new BufferedInputStream(new FileInputStream(vcf)))); } } switch (expectedSplits) { case EXACTLY_ONE: assertEquals("Should be exactly one split", 1, splits); break; case MORE_THAN_ONE: assertTrue("Should be more than one split", splits > 1); break; case ANY: default: break; } // use a VariantContextComparator to check variants are equal VCFHeader vcfHeader = VCFHeaderReader.readHeaderFrom(new SeekableFileStream(new File(testVCFFileName))); VariantContextComparator vcfRecordComparator = vcfHeader.getVCFRecordComparator(); assertEquals(expectedVariants.size(), actualVariants.size()); for (int i = 0; i < expectedVariants.size(); i++) { assertEquals(0, vcfRecordComparator.compare(expectedVariants.get(i), actualVariants.get(i))); } }
Example #19
Source File: GatherVcfsCloud.java From gatk with BSD 3-Clause "New" or "Revised" License | 4 votes |
/** Validates that all headers contain the same set of genotyped samples and that files are in order by position of first record. */ private static void assertSameSamplesAndValidOrdering(final List<Path> inputFiles, final boolean disableContigOrderingCheck) { final VCFHeader firstHeader = getHeader(inputFiles.get(0)); final SAMSequenceDictionary dict = firstHeader.getSequenceDictionary(); if ( dict == null) { throw new UserException.BadInput("The first VCF specified is missing the required sequence dictionary. " + "This is required to perform validation. You can skip this validation " + "using --"+IGNORE_SAFETY_CHECKS_LONG_NAME +" but ignoring safety checks " + "can result in invalid output."); } final VariantContextComparator comparator = new VariantContextComparator(dict); final List<String> samples = firstHeader.getGenotypeSamples(); Path lastFile = null; VariantContext lastContext = null; for (final Path f : inputFiles) { final FeatureReader<VariantContext> in = getReaderFromVCFUri(f, 0); final VCFHeader header = (VCFHeader)in.getHeader(); dict.assertSameDictionary(header.getSequenceDictionary()); final List<String> theseSamples = header.getGenotypeSamples(); if (!samples.equals(theseSamples)) { final SortedSet<String> s1 = new TreeSet<>(samples); final SortedSet<String> s2 = new TreeSet<>(theseSamples); s1.removeAll(theseSamples); s2.removeAll(samples); throw new IllegalArgumentException("VCFs do not have identical sample lists." + " Samples unique to first file: " + s1 + ". Samples unique to " + f.toUri().toString() + ": " + s2 + "."); } try(final CloseableIterator<VariantContext> variantIterator = in.iterator()) { if (variantIterator.hasNext()) { final VariantContext currentContext = variantIterator.next(); if (lastContext != null) { if ( disableContigOrderingCheck ) { if ( lastContext.getContig().equals(currentContext.getContig()) && lastContext.getStart() >= currentContext.getStart() ) { throw new IllegalArgumentException( "First record in file " + f.toUri().toString() + " is not after first record in " + "previous file " + lastFile.toUri().toString()); } } else { if ( comparator.compare(lastContext, currentContext) >= 0 ) { throw new IllegalArgumentException( "First record in file " + f.toUri().toString() + " is not after first record in " + "previous file " + lastFile.toUri().toString()); } } } lastContext = currentContext; lastFile = f; } } catch (final IOException e) { throw new UserException.CouldNotReadInputFile(f, e.getMessage(), e); } CloserUtil.close(in); } }