Java Code Examples for htsjdk.samtools.util.CloseableIterator#next()
The following examples show how to use
htsjdk.samtools.util.CloseableIterator#next() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SingleCellRnaSeqMetricsCollector.java From Drop-seq with MIT License | 7 votes |
RnaSeqMetricsCollector getRNASeqMetricsCollector(final String cellBarcodeTag, final List<String> cellBarcodes, final File inBAM, final RnaSeqMetricsCollector.StrandSpecificity strand, final double rRNAFragmentPCT, final int readMQ, final File annotationsFile, final File rRNAIntervalsFile) { CollectorFactory factory = new CollectorFactory(inBAM, strand, rRNAFragmentPCT, annotationsFile, rRNAIntervalsFile); RnaSeqMetricsCollector collector= factory.getCollector(cellBarcodes); List<SAMReadGroupRecord> rg = factory.getReadGroups(cellBarcodes); // iterate by cell barcodes. Skip all the reads without cell barcodes. CloseableIterator<SAMRecord> iter = getReadsInTagOrder (inBAM, cellBarcodeTag, rg, cellBarcodes, readMQ); ProgressLogger p = new ProgressLogger(log, 1000000, "Accumulating metrics"); while (iter.hasNext()) { SAMRecord r = iter.next(); String cellBarcode = r.getStringAttribute(cellBarcodeTag); r.setAttribute("RG", cellBarcode); p.record(r); collector.acceptRecord(r, null); } collector.finish(); return (collector); }
Example 2
Source File: SortVcfsTest.java From picard with MIT License | 6 votes |
/** * Checks the ordering and total number of variant context entries in the specified output VCF file. * Does NOT check explicitly that the VC genomic positions match exactly those from the inputs. We assume this behavior from other tests. * * @param output VCF file representing the output of SortVCF * @param expectedVariantContextCount the total number of variant context entries from all input files that were merged/sorted */ private void validateSortingResults(final File output, final int expectedVariantContextCount) { final VCFFileReader outputReader = new VCFFileReader(output, false); final VariantContextComparator outputComparator = outputReader.getFileHeader().getVCFRecordComparator(); VariantContext last = null; int variantContextCount = 0; final CloseableIterator<VariantContext> iterator = outputReader.iterator(); while (iterator.hasNext()) { final VariantContext outputContext = iterator.next(); if (last != null) Assert.assertTrue(outputComparator.compare(last, outputContext) <= 0); last = outputContext; variantContextCount++; } iterator.close(); Assert.assertEquals(variantContextCount, expectedVariantContextCount); }
Example 3
Source File: TestFilterVcf.java From picard with MIT License | 6 votes |
@Test(dataProvider = "goodInputVcfs") public void testJavaScript(final File input) throws Exception { final File out = VcfTestUtils.createTemporaryIndexedFile("filterVcfTestJS.", ".vcf"); final FilterVcf filterer = new FilterVcf(); filterer.INPUT = input; filterer.OUTPUT = out; filterer.JAVASCRIPT_FILE = quickJavascriptFilter("variant.getStart()%5 != 0"); final int retval = filterer.doWork(); Assert.assertEquals(retval, 0); //count the number of reads final int expectedNumber = 4; int count = 0; VCFFileReader in = new VCFFileReader(filterer.OUTPUT, false); CloseableIterator<VariantContext> iter = in.iterator(); while (iter.hasNext()) { final VariantContext ctx = iter.next(); count += (ctx.isFiltered() ? 1 : 0); } iter.close(); in.close(); Assert.assertEquals(count, expectedNumber); }
Example 4
Source File: VcfFormatConverter.java From picard with MIT License | 5 votes |
@Override protected int doWork() { final ProgressLogger progress = new ProgressLogger(LOG, 10000); IOUtil.assertFileIsReadable(INPUT); IOUtil.assertFileIsWritable(OUTPUT); final VCFFileReader reader = new VCFFileReader(INPUT, REQUIRE_INDEX); final VCFHeader header = new VCFHeader(reader.getFileHeader()); final SAMSequenceDictionary sequenceDictionary = header.getSequenceDictionary(); if (CREATE_INDEX && sequenceDictionary == null) { throw new PicardException("A sequence dictionary must be available in the input file when creating indexed output."); } final VariantContextWriterBuilder builder = new VariantContextWriterBuilder() .setOutputFile(OUTPUT) .setReferenceDictionary(sequenceDictionary); if (CREATE_INDEX) builder.setOption(Options.INDEX_ON_THE_FLY); else builder.unsetOption(Options.INDEX_ON_THE_FLY); final VariantContextWriter writer = builder.build(); writer.writeHeader(header); final CloseableIterator<VariantContext> iterator = reader.iterator(); while (iterator.hasNext()) { final VariantContext context = iterator.next(); writer.add(context); progress.record(context.getContig(), context.getStart()); } CloserUtil.close(iterator); CloserUtil.close(reader); writer.close(); return 0; }
Example 5
Source File: CleanSam.java From picard with MIT License | 5 votes |
/** * Do the work after command line has been parsed. * RuntimeException may be thrown by this method, and are reported appropriately. * * @return program exit status. */ @Override protected int doWork() { IOUtil.assertFileIsReadable(INPUT); IOUtil.assertFileIsWritable(OUTPUT); final SamReaderFactory factory = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE); if (VALIDATION_STRINGENCY == ValidationStringency.STRICT) { factory.validationStringency(ValidationStringency.LENIENT); } final SamReader reader = factory.open(INPUT); final SAMFileWriter writer = new SAMFileWriterFactory().makeSAMOrBAMWriter(reader.getFileHeader(), true, OUTPUT); final CloseableIterator<SAMRecord> it = reader.iterator(); final ProgressLogger progress = new ProgressLogger(Log.getInstance(CleanSam.class)); // If the read (or its mate) maps off the end of the alignment, clip it while (it.hasNext()) { final SAMRecord rec = it.next(); // If the read (or its mate) maps off the end of the alignment, clip it AbstractAlignmentMerger.createNewCigarsIfMapsOffEndOfReference(rec); // check the read's mapping quality if (rec.getReadUnmappedFlag() && 0 != rec.getMappingQuality()) { rec.setMappingQuality(0); } writer.addAlignment(rec); progress.record(rec); } writer.close(); it.close(); CloserUtil.close(reader); return 0; }
Example 6
Source File: InputValidationTest.java From imputationserver with GNU Affero General Public License v3.0 | 5 votes |
public void testTabixIndexCreationChr1() throws IOException { String configFolder = "test-data/configs/hapmap-chr1"; // input folder contains no vcf or vcf.gz files String inputFolder = "test-data/data/single"; // create workflow context WorkflowTestContext context = buildContext(inputFolder, "hapmap2"); context.setInput("phasing", "eagle"); // create step instance InputValidation inputValidation = new InputValidationMock(configFolder); // run and test boolean result = run(context, inputValidation); // check if step is failed assertEquals(true, result); assertTrue(context.hasInMemory("[OK] 1 valid VCF file(s) found.")); // test tabix index and count snps String vcfFilename = inputFolder + "/minimac_test.50.vcf.gz"; VCFFileReader vcfReader = new VCFFileReader(new File(vcfFilename), new File(vcfFilename + TabixUtils.STANDARD_INDEX_EXTENSION), true); CloseableIterator<VariantContext> snps = vcfReader.query("1", 1, 1000000000); int count = 0; while (snps.hasNext()) { snps.next(); count++; } snps.close(); vcfReader.close(); //check snps assertEquals(905, count); }
Example 7
Source File: InputValidationTest.java From imputationserver with GNU Affero General Public License v3.0 | 5 votes |
public void testTabixIndexCreationChr20() throws IOException { String configFolder = "test-data/configs/hapmap-chr1"; // input folder contains no vcf or vcf.gz files String inputFolder = "test-data/data/chr20-phased"; // create workflow context WorkflowTestContext context = buildContext(inputFolder, "hapmap2"); // create step instance InputValidation inputValidation = new InputValidationMock(configFolder); // run and test boolean result = run(context, inputValidation); // check if step is failed assertEquals(true, result); assertTrue(context.hasInMemory("[OK] 1 valid VCF file(s) found.")); // test tabix index and count snps String vcfFilename = inputFolder + "/chr20.R50.merged.1.330k.recode.small.vcf.gz"; VCFFileReader vcfReader = new VCFFileReader(new File(vcfFilename), new File(vcfFilename + TabixUtils.STANDARD_INDEX_EXTENSION), true); CloseableIterator<VariantContext> snps = vcfReader.query("20", 1, 1000000000); int count = 0; while (snps.hasNext()) { snps.next(); count++; } snps.close(); vcfReader.close(); //check snps assertEquals(7824, count); }
Example 8
Source File: FastQualityControlTest.java From imputationserver with GNU Affero General Public License v3.0 | 5 votes |
public void testCountSamplesInCreatedChunk() throws IOException { String configFolder = "test-data/configs/hapmap-chr1"; String inputFolder = "test-data/data/simulated-chip-1chr-imputation"; // create workflow context WorkflowTestContext context = buildContext(inputFolder, "hapmap2"); // get output directory String out = context.getOutput("chunksDir"); // create step instance FastQualityControlMock qcStats = new FastQualityControlMock(configFolder); // run and test assertTrue(run(context, qcStats)); for (File file : new File(out).listFiles()) { if (file.getName().endsWith("chunk_1_80000001_100000000.vcf.gz")) { VCFFileReader vcfReader = new VCFFileReader(file, false); CloseableIterator<VariantContext> it = vcfReader.iterator(); if (it.hasNext()) { VariantContext a = it.next(); assertEquals(255, a.getNSamples()); } vcfReader.close(); } } }
Example 9
Source File: ThreadsafeTest.java From picard with MIT License | 5 votes |
@Test public void ensureUniqueVariantObservationsEspeciallyMultiAllelicOnesThatAppearAtChunkingBoundaries() { final VariantIteratorProducer.Threadsafe iteratorFactory = new VariantIteratorProducer.Threadsafe( VcfFileSegmentGenerator.byWholeContigSubdividingWithWidth(TEN_MILLION), Arrays.asList(VCF_WITH_MULTI_ALLELIC_VARIANT_AT_POSITION_10MILLION) ); final Set<String> observed = new HashSet<String>(); for (final CloseableIterator<VariantContext> i : iteratorFactory.iterators()) { while (i.hasNext()) { final VariantContext next = i.next(); Assert.assertTrue(observed.add(next.toString()), "Second observation for " + next.toString()); } } }
Example 10
Source File: IntervalTagComparatorTest.java From Drop-seq with MIT License | 5 votes |
@Test(enabled=false) /** * For this test, we generate a lot of data, and sort it. * Since the SAMRecord doesn't really need to change, generate one static record and a LOT of intervals to tag the record with. * Size of the SAMRecord doesn't change, but the speed of the comparator should. */ public void testSpeed () { int numRecords = 30000000; int readLength=60; SamReader inputSam = SamReaderFactory.makeDefault().open(dictFile); SAMSequenceDictionary dict= inputSam.getFileHeader().getSequenceDictionary(); dict = filterSD(dict); long start = System.currentTimeMillis(); GenerateIntervalTaggedSAMRecords gen = new GenerateIntervalTaggedSAMRecords(dict, intervalTag, readLength, numRecords); IntervalTagComparator comparator = new IntervalTagComparator(this.intervalTag); final CloseableIterator<SAMRecord> sortingIterator = SamRecordSortingIteratorFactory.create(inputSam.getFileHeader(), gen, comparator, null); while(sortingIterator.hasNext()) { SAMRecord r = sortingIterator.next(); Assert.assertNotNull(r); } long elapsed = System.currentTimeMillis() - start; System.out.println("elapsed time = " + elapsed + " ms"); System.out.println("elapsed time = " + Math.round(elapsed/1000) + " seconds"); }
Example 11
Source File: SNPUMICellReadIteratorWrapperTest.java From Drop-seq with MIT License | 5 votes |
/** * Integration style test to see if I can throw null pointers, etc with a bit of real data. */ @Test(enabled=true) public void processReads() { List<String> cellBarcodeList = ParseBarcodeFile.readCellBarcodeFile(cellBCFile); IntervalList loci = IntervalList.fromFile(snpIntervals); SamReader reader = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.EAGERLY_DECODE).open(bamFile); // Filter records before sorting, to reduce I/O final MissingTagFilteringIterator filteringIterator = new MissingTagFilteringIterator(reader.iterator(), GENE_NAME_TAG, cellBarcodeTag, molBCTag); MapQualityFilteredIterator filteringIterator2 = new MapQualityFilteredIterator(filteringIterator, readMQ, true); GeneFunctionIteratorWrapper gfteratorWrapper = new GeneFunctionIteratorWrapper(filteringIterator2, GENE_NAME_TAG, GENE_STRAND_TAG, GENE_FUNCTION_TAG, false, STRAND_STRATEGY, LOCUS_FUNCTION_LIST); SNPUMICellReadIteratorWrapper snpumiCellReadIterator = new SNPUMICellReadIteratorWrapper(gfteratorWrapper, loci, cellBarcodeTag, cellBarcodeList, GENE_NAME_TAG, snpTag, readMQ); // create comparators in the order the data should be sorted final MultiComparator<SAMRecord> multiComparator = new MultiComparator<>( new StringTagComparator(cellBarcodeTag), new StringTagComparator(GENE_NAME_TAG), new StringTagComparator(molBCTag)); final CloseableIterator<SAMRecord> sortingIterator = SamRecordSortingIteratorFactory.create(reader.getFileHeader(), snpumiCellReadIterator, multiComparator, null); Assert.assertTrue(sortingIterator.hasNext()); SAMRecord nextRead = sortingIterator.next(); List<SAMTagAndValue> tagValues= nextRead.getAttributes(); String snpTagValue = nextRead.getStringAttribute(this.snpTag); CloserUtil.close(snpumiCellReadIterator); }
Example 12
Source File: ImputationChrXTest.java From imputationserver with GNU Affero General Public License v3.0 | 4 votes |
@Test public void testChrXLeaveOneOutPipelinePhased() throws IOException, ZipException { // SNP 26963697 from input excluded and imputed! // true genotypes: // 1,1|1,1|1,1|1,1,1|1,1,1|1,1|1,1,0,1|1,1|0,1,1,1,1,1,1|1,1,1|1,1|1,1|1,1|1,1|1,1|0, String configFolder = "test-data/configs/hapmap-chrX"; String inputFolder = "test-data/data/chrX-phased-loo"; File file = new File("test-data/tmp"); if (file.exists()) { FileUtil.deleteDirectory(file); } // create workflow context WorkflowTestContext context = buildContext(inputFolder, "phase1"); // run qc to create chunkfile QcStatisticsMock qcStats = new QcStatisticsMock(configFolder); boolean result = run(context, qcStats); assertTrue(result); // add panel to hdfs importRefPanel(FileUtil.path(configFolder, "ref-panels")); // importMinimacMap("test-data/B38_MAP_FILE.map"); importBinaries("files/bin"); // run imputation ImputationMinimac3Mock imputation = new ImputationMinimac3Mock(configFolder); result = run(context, imputation); assertTrue(result); // run export CompressionEncryptionMock export = new CompressionEncryptionMock("files"); result = run(context, export); assertTrue(result); ZipFile zipFile = new ZipFile("test-data/tmp/local/chr_X.zip", PASSWORD.toCharArray()); zipFile.extractAll("test-data/tmp"); VcfFile vcfFile = VcfFileUtil.load("test-data/tmp/chrX.dose.vcf.gz", 100000000, false); VCFFileReader vcfReader = new VCFFileReader(new File(vcfFile.getVcfFilename()), false); CloseableIterator<VariantContext> it = vcfReader.iterator(); while (it.hasNext()) { VariantContext line = it.next(); if (line.getStart() == 26963697) { assertEquals(2, line.getHetCount()); assertEquals(1, line.getHomRefCount()); assertEquals(23, line.getHomVarCount()); } } vcfReader.close(); FileUtil.deleteDirectory(file); }
Example 13
Source File: ImputationChrXTest.java From imputationserver with GNU Affero General Public License v3.0 | 4 votes |
@Test public void testPipelineChrXWithEaglePhasingOnly() throws IOException, ZipException { if (!new File( "test-data/configs/hapmap-chrX-hg38/ref-panels/ALL.X.nonPAR.phase1_v3.snps_indels_svs.genotypes.all.noSingleton.recode.hg38.bcf") .exists()) { System.out.println("chrX bcf nonPAR file not available"); return; } String configFolder = "test-data/configs/hapmap-chrX"; String inputFolder = "test-data/data/chrX-unphased"; // create workflow context WorkflowTestContext context = buildContext(inputFolder, "phase1"); context.setInput("mode", "phasing"); // run qc to create chunkfile QcStatisticsMock qcStats = new QcStatisticsMock(configFolder); boolean result = run(context, qcStats); assertTrue(result); // add panel to hdfs importRefPanel(FileUtil.path(configFolder, "ref-panels")); // importMinimacMap("test-data/B38_MAP_FILE.map"); importBinaries("files/bin"); // run imputation ImputationMinimac3Mock imputation = new ImputationMinimac3Mock(configFolder); result = run(context, imputation); assertTrue(result); // run export CompressionEncryptionMock export = new CompressionEncryptionMock("files"); result = run(context, export); assertTrue(result); ZipFile zipFile = new ZipFile("test-data/tmp/local/chr_X.zip", PASSWORD.toCharArray()); zipFile.extractAll("test-data/tmp"); VcfFile vcfFile = VcfFileUtil.load("test-data/tmp/chrX.phased.vcf.gz", 100000000, false); assertEquals(true, vcfFile.isPhased()); VCFFileReader vcfReader = new VCFFileReader(new File(vcfFile.getVcfFilename()), false); CloseableIterator<VariantContext> it = vcfReader.iterator(); while (it.hasNext()) { VariantContext line = it.next(); if (line.getStart() == 44322058) { assertEquals("A", line.getGenotype("HG00096").getGenotypeString()); System.out.println(line.getGenotype("HG00097").getGenotypeString()); assertEquals("A|A", line.getGenotype("HG00097").getGenotypeString()); } } vcfReader.close(); FileUtil.deleteDirectory("test-data/tmp"); }
Example 14
Source File: BamToDetailsConversion.java From chipster with MIT License | 4 votes |
/** * Find reads in a given range. * * <p> * TODO add cigar to the list of returned values * <p> * TODO add pair information to the list of returned values * * @param request * @return * @throws InterruptedException */ @Override protected void processDataRequest(DataRequest request) throws InterruptedException { // Read the given region CloseableIterator<SAMRecord> iterator = dataSource.query(request.start.chr, request.start.bp.intValue(), request.end.bp.intValue()); // Produce results while (iterator.hasNext()) { List<Feature> responseList = new LinkedList<Feature>(); // Split results into chunks for (int c = 0; c < RESULT_CHUNK_SIZE && iterator.hasNext(); c++) { SAMRecord record = iterator.next(); // Region for this read Region recordRegion = new Region((long) record.getAlignmentStart(), (long) record.getAlignmentEnd(), request.start.chr); // Values for this read LinkedHashMap<DataType, Object> values = new LinkedHashMap<DataType, Object>(); Feature read = new Feature(recordRegion, values); if (request.getRequestedContents().contains(DataType.ID)) { values.put(DataType.ID, record.getReadName()); } if (request.getRequestedContents().contains(DataType.STRAND)) { values.put(DataType.STRAND, BamUtils.getStrand(record, coverageType)); } if (request.getRequestedContents().contains(DataType.QUALITY)) { values.put(DataType.QUALITY, record.getBaseQualityString()); } if (request.getRequestedContents().contains(DataType.CIGAR)) { Cigar cigar = new Cigar(read, record.getCigar()); values.put(DataType.CIGAR, cigar); } // TODO Deal with "=" and "N" in read string if (request.getRequestedContents().contains(DataType.SEQUENCE)) { String seq = record.getReadString(); values.put(DataType.SEQUENCE, seq); } if (request.getRequestedContents().contains(DataType.MATE_POSITION)) { BpCoord mate = new BpCoord((Long)(long)record.getMateAlignmentStart(), new Chromosome(record.getMateReferenceName())); values.put(DataType.MATE_POSITION, mate); } if (request.getRequestedContents().contains(DataType.BAM_TAG_NH)) { Object ng = record.getAttribute("NH"); if (ng != null) { values.put(DataType.BAM_TAG_NH, (Integer)record.getAttribute("NH")); } } /* * NOTE! RegionContents created from the same read area has to be equal in methods equals, hash and compareTo. Primary types * should be ok, but objects (including tables) has to be handled in those methods separately. Otherwise tracks keep adding * the same reads to their read sets again and again. */ responseList.add(read); } // Send result super.createDataResult(new DataResult(request.getStatus(), responseList)); } // We are done iterator.close(); }
Example 15
Source File: pullLargeLengths.java From HMMRATAC with GNU General Public License v3.0 | 4 votes |
/** * Read the data and create a list of lengths */ private void read(){ int counter = 0; SAMFileReader reader = new SAMFileReader(bam,index); ArrayList<Double> temp = new ArrayList<Double>(); for (int i = 0; i < genome.size();i++){ String chr = genome.get(i).getChrom(); int start = genome.get(i).getStart(); int stop = genome.get(i).getStop(); CloseableIterator<SAMRecord> iter = reader.query(chr,start,stop,false); while (iter.hasNext()){ SAMRecord record = null; try{ record = iter.next(); } catch(SAMFormatException ex){ System.out.println("SAM Record is problematic. Has mapQ != 0 for unmapped read. Will continue anyway"); } if(record != null){ if(!record.getReadUnmappedFlag() && !record.getMateUnmappedFlag() && record.getFirstOfPairFlag()) { if (record.getMappingQuality() >= minQ){ if (Math.abs(record.getInferredInsertSize()) > 100 && Math.abs(record.getInferredInsertSize()) < 1000){ counter+=1; temp.add((double)Math.abs(record.getInferredInsertSize())); } } } } } iter.close(); } reader.close(); lengths = new double[counter]; for (int i = 0;i < temp.size();i++){ if (temp.get(i) > 100){ lengths[i] = temp.get(i); } } }
Example 16
Source File: SomaticLocusCaller.java From abra2 with MIT License | 4 votes |
private Counts getCounts(SamReader reader, LocusInfo locus) { int depth = 0; int altCount = 0; int refCount = 0; CloseableIterator<SAMRecord> iter = reader.queryOverlapping(locus.chromosome, locus.posStart, locus.posStop); while (iter.hasNext()) { SAMRecord read = iter.next(); if (!read.getDuplicateReadFlag()) { depth += 1; Object[] baseAndQual = getBaseAndQualAtPosition(read, locus.posStart); Character base = (Character) baseAndQual[0]; int baseQual = (Integer) baseAndQual[1]; Character refBase = c2r.getSequence(locus.chromosome, locus.posStart, 1).charAt(0); // Override input with actual reference // locus.ref = new String(new char[] { refBase }); locus.actualRef = new String(new char[] { refBase }); if (locus.isIndel()) { if (hasIndel(read, locus)) { altCount += 1; } else if (!base.equals('N') && base.equals(refBase)) { refCount += 1; } } else if (baseQual >= minBaseQual) { if (!base.equals('N') && base.equals(refBase)) { refCount += 1; } else if (base.equals(locus.alt.charAt(0))) { altCount += 1; } } } } iter.close(); return new Counts(refCount, altCount, depth); }
Example 17
Source File: BamToCoverageConversion.java From chipster with MIT License | 4 votes |
private void calculateCoverage(DataRequest request, BpCoord from, BpCoord to) throws InterruptedException { //query data for full average bins, because merging them later would be difficult long start = CoverageTool.getBin(from.bp); long end = CoverageTool.getBin(to.bp) + CoverageTool.BIN_SIZE - 1; //if end coordinate is smaller than 1 Picard returns a full chromosome and we'll run out of memory if (end < 1) { end = 1; } CloseableIterator<SAMRecord> iterator = dataSource.query(from.chr, (int)start, (int)end); BaseStorage forwardBaseStorage = new BaseStorage(); BaseStorage reverseBaseStorage = new BaseStorage(); while (iterator.hasNext()) { SAMRecord record = iterator.next(); LinkedHashMap<DataType, Object> values = new LinkedHashMap<DataType, Object>(); Region recordRegion = new Region((long) record.getAlignmentStart(), (long) record.getAlignmentEnd(), request.start.chr); Feature read = new Feature(recordRegion, values); values.put(DataType.ID, record.getReadName()); values.put(DataType.STRAND, BamUtils.getStrand(record, coverageType)); Cigar cigar = new Cigar(read, record.getCigar()); values.put(DataType.CIGAR, cigar); String seq = record.getReadString(); values.put(DataType.SEQUENCE, seq); // Split read into continuous blocks (elements) by using the cigar List<ReadPart> parts = Cigar.splitElements(read); for (ReadPart part : parts) { if (read.values.get(DataType.STRAND) == Strand.FORWARD) { forwardBaseStorage.addNucleotideCounts(part); } else if (read.values.get(DataType.STRAND) == Strand.REVERSE) { reverseBaseStorage.addNucleotideCounts(part); } } } // We are done iterator.close(); /* Reads that overlap query regions create nucleotide counts outside the query region. * Remove those extra nucleotide counts, because they don't contain all reads of those regions and would show * wrong information. */ Region filterRegion = new Region(start, end, from.chr); forwardBaseStorage.filter(filterRegion); reverseBaseStorage.filter(filterRegion); // Send result LinkedList<Feature> resultList = new LinkedList<Feature>(); createResultList(from, forwardBaseStorage, resultList, Strand.FORWARD); createResultList(from, reverseBaseStorage, resultList, Strand.REVERSE); LinkedList<Feature> averageCoverage = CoverageTool.average(resultList, from.chr); if (request.getRequestedContents().contains(DataType.COVERAGE)) { super.createDataResult(new DataResult(request, resultList)); } if (request.getRequestedContents().contains(DataType.COVERAGE_AVERAGE)) { super.createDataResult(new DataResult(request, averageCoverage)); } }
Example 18
Source File: MergePedIntoVcf.java From picard with MIT License | 4 votes |
/** * Writes out the VariantContext objects in the order presented to the supplied output file * in VCF format. */ private void writeVcf(final CloseableIterator<VariantContext> variants, final File output, final SAMSequenceDictionary dict, final VCFHeader vcfHeader, ZCallPedFile zCallPedFile) { try(VariantContextWriter writer = new VariantContextWriterBuilder() .setOutputFile(output) .setReferenceDictionary(dict) .setOptions(VariantContextWriterBuilder.DEFAULT_OPTIONS) .build()) { writer.writeHeader(vcfHeader); while (variants.hasNext()) { final VariantContext context = variants.next(); final VariantContextBuilder builder = new VariantContextBuilder(context); if (zCallThresholds.containsKey(context.getID())) { final String[] zThresh = zCallThresholds.get(context.getID()); builder.attribute(InfiniumVcfFields.ZTHRESH_X, zThresh[0]); builder.attribute(InfiniumVcfFields.ZTHRESH_Y, zThresh[1]); } final Genotype originalGenotype = context.getGenotype(0); final Map<String, Object> newAttributes = originalGenotype.getExtendedAttributes(); final VCFEncoder vcfEncoder = new VCFEncoder(vcfHeader, false, false); final Map<Allele, String> alleleMap = vcfEncoder.buildAlleleStrings(context); final String zCallAlleles = zCallPedFile.getAlleles(context.getID()); if (zCallAlleles == null) { throw new PicardException("No zCall alleles found for snp " + context.getID()); } final List<Allele> zCallPedFileAlleles = buildNewAllelesFromZCall(zCallAlleles, context.getAttributes()); newAttributes.put(InfiniumVcfFields.GTA, alleleMap.get(originalGenotype.getAllele(0)) + UNPHASED + alleleMap.get(originalGenotype.getAllele(1))); newAttributes.put(InfiniumVcfFields.GTZ, alleleMap.get(zCallPedFileAlleles.get(0)) + UNPHASED + alleleMap.get(zCallPedFileAlleles.get(1))); final Genotype newGenotype = GenotypeBuilder.create(originalGenotype.getSampleName(), zCallPedFileAlleles, newAttributes); builder.genotypes(newGenotype); logger.record("0", 0); // AC, AF, and AN are recalculated here VariantContextUtils.calculateChromosomeCounts(builder, false); final VariantContext newContext = builder.make(); writer.add(newContext); } } }
Example 19
Source File: SplitVcfs.java From picard with MIT License | 4 votes |
@Override protected int doWork() { IOUtil.assertFileIsReadable(INPUT); final ProgressLogger progress = new ProgressLogger(log, 10000); final VCFFileReader fileReader = new VCFFileReader(INPUT, false); final VCFHeader fileHeader = fileReader.getFileHeader(); final SAMSequenceDictionary sequenceDictionary = SEQUENCE_DICTIONARY != null ? SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).getFileHeader(SEQUENCE_DICTIONARY).getSequenceDictionary() : fileHeader.getSequenceDictionary(); if (CREATE_INDEX && sequenceDictionary == null) { throw new PicardException("A sequence dictionary must be available (either through the input file or by setting it explicitly) when creating indexed output."); } final VariantContextWriterBuilder builder = new VariantContextWriterBuilder() .setReferenceDictionary(sequenceDictionary) .clearOptions(); if (CREATE_INDEX) builder.setOption(Options.INDEX_ON_THE_FLY); final VariantContextWriter snpWriter = builder.setOutputFile(SNP_OUTPUT).build(); final VariantContextWriter indelWriter = builder.setOutputFile(INDEL_OUTPUT).build(); snpWriter.writeHeader(fileHeader); indelWriter.writeHeader(fileHeader); int incorrectVariantCount = 0; final CloseableIterator<VariantContext> iterator = fileReader.iterator(); while (iterator.hasNext()) { final VariantContext context = iterator.next(); if (context.isIndel()) indelWriter.add(context); else if (context.isSNP()) snpWriter.add(context); else { if (STRICT) throw new IllegalStateException("Found a record with type " + context.getType().name()); else incorrectVariantCount++; } progress.record(context.getContig(), context.getStart()); } if (incorrectVariantCount > 0) { log.debug("Found " + incorrectVariantCount + " records that didn't match SNP or INDEL"); } CloserUtil.close(iterator); CloserUtil.close(fileReader); snpWriter.close(); indelWriter.close(); return 0; }
Example 20
Source File: BamTagOfTagCounts.java From Drop-seq with MIT License | 4 votes |
public TagOfTagResults<String,String> getResults (final File inputBAM, final String primaryTag, final String secondaryTag, final boolean filterPCRDuplicates, final Integer readQuality) { TagOfTagResults<String,String> result = new TagOfTagResults<>(); SamReader reader = SamReaderFactory.makeDefault().open(inputBAM); CloseableIterator<SAMRecord> iter = CustomBAMIterators.getReadsInTagOrder(reader, primaryTag); CloserUtil.close(reader); String currentTag=""; Set<String> otherTagCollection=new HashSet<>(); ProgressLogger progress = new ProgressLogger(log); while (iter.hasNext()) { SAMRecord r = iter.next(); progress.record(r); // skip reads that don't pass filters. boolean discardResult=(filterPCRDuplicates && r.getDuplicateReadFlag()) || r.getMappingQuality()<readQuality || r.isSecondaryOrSupplementary(); // short circuit if read is below the quality to be considered. if (discardResult) continue; Object d = r.getAttribute(secondaryTag); // short circuit if there's no tag for this read. if (d==null) continue; String data=null; if (d instanceof String) data=r.getStringAttribute(secondaryTag); else if (d instanceof Integer) data=Integer.toString(r.getIntegerAttribute(secondaryTag)); String newTag = r.getStringAttribute(primaryTag); if (newTag==null) newTag=""; // if you see a new tag. if (!currentTag.equals(newTag)) { // write out tag results, if any. if (!currentTag.equals("") && otherTagCollection.size()>0) result.addEntries(currentTag, otherTagCollection); currentTag=newTag; otherTagCollection.clear(); if (!discardResult) otherTagCollection=addTagToCollection (data,otherTagCollection); } else // gather stats if (!discardResult) otherTagCollection=addTagToCollection (data,otherTagCollection); } if (otherTagCollection.size()>0) result.addEntries(currentTag, otherTagCollection); return (result); }