htsjdk.samtools.util.CollectionUtil Java Examples
The following examples show how to use
htsjdk.samtools.util.CollectionUtil.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: GeneFromGTFBuilder.java From Drop-seq with MIT License | 6 votes |
private GeneFromGTF makeGeneWithTranscriptsFromGTFRecords(final Collection<GTFRecord> gtfRecords) { final GeneFromGTF gene = makeGeneFromGTFRecords(gtfRecords); // Remove featureType==gene before making transcripts final Collection<GTFRecord> nonGeneGTFRecords = CollectionUtil.makeCollection(new GeneAnnotationFilter(gtfRecords.iterator())); final Map<String, List<GTFRecord>> gtfLinesByTranscript = gatherByTranscriptId(nonGeneGTFRecords); for (final Map.Entry<String, List<GTFRecord>> entry : gtfLinesByTranscript.entrySet()) { if (entry.getKey() == null) // Skip gene entries continue; addTranscriptToGeneFromGTFRecords(gene, entry.getValue()); } if (!gene.iterator().hasNext()) throw new AnnotationException("No transcript in GTF for gene " + gene.getName()); return gene; }
Example #2
Source File: HaplotypeProbabilitiesTest.java From picard with MIT License | 6 votes |
@Test(dataProvider = "symmetricLODdata") public void testSymmetricLOD(final double[] llikelihoods1, final double[] llikelihoods2) { final HaplotypeBlock haplotypeBlock = new HaplotypeBlock(0.1); final Snp testSnp = new Snp("test", "chrTest", 1, (byte) 'A', (byte) 'C', .1, Collections.emptyList()); haplotypeBlock.addSnp(testSnp); final HaplotypeProbabilitiesFromGenotypeLikelihoods hp1 = new HaplotypeProbabilitiesFromGenotypeLikelihoods(haplotypeBlock); final HaplotypeProbabilitiesFromGenotypeLikelihoods hp2 = new HaplotypeProbabilitiesFromGenotypeLikelihoods(haplotypeBlock); hp1.addToLogLikelihoods(testSnp, CollectionUtil.makeList(Allele.ALT_A, Allele.REF_C), llikelihoods1); hp2.addToLogLikelihoods(testSnp, CollectionUtil.makeList(Allele.ALT_A, Allele.REF_C), llikelihoods2); TestNGUtil.assertEqualDoubleArrays(MathUtil.pNormalizeVector(hp1.getPosteriorProbabilities()), MathUtil.pNormalizeVector(MathUtil.multiply(MathUtil.pNormalizeLogProbability(llikelihoods1), haplotypeBlock.getHaplotypeFrequencies())), .00001); TestNGUtil.assertEqualDoubleArrays(MathUtil.pNormalizeVector(hp2.getPosteriorProbabilities()), MathUtil.pNormalizeVector(MathUtil.multiply(MathUtil.pNormalizeLogProbability(llikelihoods2), haplotypeBlock.getHaplotypeFrequencies())), .00001); final double ll21 = hp1.scaledEvidenceProbabilityUsingGenotypeFrequencies(hp2.getPosteriorLikelihoods()); final double ll12 = hp2.scaledEvidenceProbabilityUsingGenotypeFrequencies(hp1.getPosteriorLikelihoods()); Assert.assertTrue(TestNGUtil.compareDoubleWithAccuracy(ll12, ll21, 0.001), "found : " + ll12 + " and " + ll21); }
Example #3
Source File: ReduceGtfTest.java From Drop-seq with MIT License | 6 votes |
@Test(enabled=true, groups={"dropseq", "transcriptome"}) public void testAPITD1() { Iterator<GTFRecord> gtfIterator = parseGtf(GTF_FILE4); Assert.assertNotNull(gtfIterator); Collection<GTFRecord> records = CollectionUtil.makeCollection(gtfIterator); // gunzip -c human_APITD1.gtf.gz | grep -v CDS |grep -v start_codon |grep -v stop_codon |wc -l Assert.assertEquals(records.size(),26); final GeneFromGTFBuilder geneBuilder = new GeneFromGTFBuilder(records.iterator()); Collection<GeneFromGTF> genes = CollectionUtil.makeCollection(geneBuilder); Assert.assertEquals(genes.size(),1); final EnhanceGTFRecords enhancer = new EnhanceGTFRecords(); for (final GeneFromGTF gene : genes) Assert.assertNotNull(enhancer.enhanceGene(gene)); }
Example #4
Source File: ReduceGtfTest.java From Drop-seq with MIT License | 6 votes |
@Test(enabled=true, groups={"dropseq", "transcriptome"}) public void testAPITD1Complex() { Iterator<GTFRecord> gtfIterator = parseGtf(GTF_FILE5); Assert.assertNotNull(gtfIterator); Collection<GTFRecord> records = CollectionUtil.makeCollection(gtfIterator); // gunzip -c human_APITD1.gtf.gz | grep -v CDS |grep -v start_codon |grep -v stop_codon |wc -l Assert.assertEquals(records.size(),42); final GeneFromGTFBuilder geneBuilder = new GeneFromGTFBuilder(records.iterator()); Collection<GeneFromGTF> genes = CollectionUtil.makeCollection(geneBuilder); Assert.assertEquals(genes.size(),2); final EnhanceGTFRecords enhancer = new EnhanceGTFRecords(); for (final GeneFromGTF gene : genes) Assert.assertNotNull(enhancer.enhanceGene(gene)); }
Example #5
Source File: RExecutor.java From hmftools with GNU General Public License v3.0 | 6 votes |
private static int executeFromFile(final String rScriptName, final File scriptFile, final String... arguments) throws IOException, InterruptedException { final String[] command = new String[arguments.length + 2]; command[0] = R_EXE; command[1] = scriptFile.getAbsolutePath(); System.arraycopy(arguments, 0, command, 2, arguments.length); final File outputFile = File.createTempFile(rScriptName, ".out"); final File errorFile = File.createTempFile(rScriptName, ".error"); LOGGER.info(String.format("Executing R script via command: %s", CollectionUtil.join(Arrays.asList(command), " "))); int result = new ProcessBuilder(command).redirectError(errorFile).redirectOutput(outputFile).start().waitFor(); if (result != 0) { LOGGER.fatal("Error executing R script. Examine error file {} for details.", errorFile.toString()); } return result; }
Example #6
Source File: HaplotypeProbabilitiesTest.java From picard with MIT License | 6 votes |
@Test(dataProvider = "dataTestpEvidenceGivenPriorFromGLs") public void testpEvidenceGivenPriorFromGLs(final HaplotypeProbabilitiesFromGenotypeLikelihoods hp, final List<Snp> snps, final List<Boolean> swaps, final List<double[]> logLikelihoods) { for (int i = 0; i < snps.size(); ++i) { final Allele a = Allele.create(swaps.get(i) ? snps.get(i).getAllele2() : snps.get(i).getAllele1()); final Allele b = Allele.create(swaps.get(i) ? snps.get(i).getAllele1() : snps.get(i).getAllele2()); hp.addToLogLikelihoods(snps.get(i), CollectionUtil.makeList(a, b), logLikelihoods.get(i)); } final double[] postLogLikelihood = new double[nGenotypes]; for (final int genotype:genotypes) { postLogLikelihood[genotype] = log10(hp.getHaplotype().getHaplotypeFrequency(genotype)); for (int i = 0; i < logLikelihoods.size(); i++) { final double[] genotypeLogLikelihoods = logLikelihoods.get(i); Assert.assertEquals(genotypeLogLikelihoods.length, nGenotypes); final int swappedGenotype = swaps.get(i) ? nGenotypes - genotype - 1 : genotype; postLogLikelihood[genotype] += genotypeLogLikelihoods[swappedGenotype]; } } assertEqualDoubleArrays(hp.getPosteriorProbabilities(), MathUtil.pNormalizeLogProbability(postLogLikelihood), 1e-10); }
Example #7
Source File: BclQualityEvaluationStrategy.java From picard with MIT License | 6 votes |
/** * Reviews the qualities observed thus far and throws an exception if any are below the minimum quality threshold. */ public void assertMinimumQualities() { final Collection<String> errorTokens = new LinkedList<String>(); for (final Map.Entry<Byte, AtomicInteger> entry : this.qualityCountMap.entrySet()) { /** * We're comparing revised qualities here, not observed, but the qualities that are logged in qualityCountMap are observed * qualities. So as we iterate through it, convert observed qualities into their revised value. */ if (generateRevisedQuality(entry.getKey()) < minimumRevisedQuality) { errorTokens.add(String.format("quality %s observed %s times", entry.getKey(), entry.getValue())); } } if (!errorTokens.isEmpty()) { throw new PicardException(String.format( "Found BCL qualities that fell beneath minimum threshold of %s: %s.", minimumRevisedQuality, CollectionUtil.join(errorTokens, "; ") )); } }
Example #8
Source File: TileMetricsUtil.java From picard with MIT License | 6 votes |
private static Collection<Tile> getTileClusterRecordsV3( final Map<String, ? extends Collection<IlluminaTileMetrics>> locationToMetricsMap, final Map<Integer, Map<Integer, Collection<TilePhasingValue>>> phasingValues, final float density) { final Collection<Tile> tiles = new LinkedList<>(); for (final Map.Entry<String, ? extends Collection<IlluminaTileMetrics>> entry : locationToMetricsMap.entrySet()) { final Collection<IlluminaTileMetrics> tileRecords = entry.getValue(); final IlluminaTileMetrics record = CollectionUtil.getSoleElement(tileRecords); //only create for cluster records if (record.isClusterRecord()) { final Collection<TilePhasingValue> tilePhasingValues = phasingValues.get(record.getLaneNumber()).get(record.getTileNumber()); tiles.add(new Tile(record.getLaneNumber(), record.getTileNumber(), density, record.getMetricValue(), tilePhasingValues.toArray(new TilePhasingValue[tilePhasingValues.size()]))); } } return Collections.unmodifiableCollection(tiles); }
Example #9
Source File: IntervalListToBed.java From picard with MIT License | 6 votes |
@Override protected int doWork() { IOUtil.assertFileIsReadable(INPUT); IOUtil.assertFileIsWritable(OUTPUT); IntervalList intervals = IntervalList.fromFile(INPUT); if (SORT) intervals = intervals.sorted(); try { final BufferedWriter out = IOUtil.openFileForBufferedWriting(OUTPUT); for (final Interval i : intervals) { final String strand = i.isNegativeStrand() ? "-" : "+"; final List<?> fields = CollectionUtil.makeList(i.getContig(), i.getStart()-1, i.getEnd(), i.getName(), SCORE, strand); out.append(fields.stream().map(String::valueOf).collect(Collectors.joining("\t"))); out.newLine(); } out.close(); } catch (IOException ioe) { throw new RuntimeIOException(ioe); } return 0; }
Example #10
Source File: MarkDuplicatesTest.java From picard with MIT License | 6 votes |
@Test(dataProvider = "testOpticalDuplicateDetectionDataProvider") public void testOpticalDuplicateDetection(final File sam, final long expectedNumOpticalDuplicates) { final File outputDir = IOUtil.createTempDir(TEST_BASE_NAME + ".", ".tmp"); outputDir.deleteOnExit(); final File outputSam = new File(outputDir, TEST_BASE_NAME + ".sam"); outputSam.deleteOnExit(); final File metricsFile = new File(outputDir, TEST_BASE_NAME + ".duplicate_metrics"); metricsFile.deleteOnExit(); // Run MarkDuplicates, merging the 3 input files, and either enabling or suppressing PG header // record creation according to suppressPg. final MarkDuplicates markDuplicates = new MarkDuplicates(); markDuplicates.setupOpticalDuplicateFinder(); markDuplicates.INPUT = CollectionUtil.makeList(sam.getAbsolutePath()); markDuplicates.OUTPUT = outputSam; markDuplicates.METRICS_FILE = metricsFile; markDuplicates.TMP_DIR = CollectionUtil.makeList(outputDir); // Needed to suppress calling CommandLineProgram.getVersion(), which doesn't work for code not in a jar markDuplicates.PROGRAM_RECORD_ID = null; Assert.assertEquals(markDuplicates.doWork(), 0); Assert.assertEquals(markDuplicates.numOpticalDuplicates(), expectedNumOpticalDuplicates); IOUtil.recursiveDelete(outputDir.toPath()); }
Example #11
Source File: IntervalListScattererWithSubdivision.java From picard with MIT License | 5 votes |
@Override public List<Interval> takeSome(final Interval interval, final long idealSplitWeight, final long currentSize, final double projectSizeOfRemaining) { final long amount = idealSplitWeight - currentSize; if (amount >= interval.length()) { return CollectionUtil.makeList(interval, null); } if (amount == 0) { return CollectionUtil.makeList(null, interval); } final Interval left = new Interval( interval.getContig(), interval.getStart(), interval.getStart() + (int) amount - 1, interval.isNegativeStrand(), interval.getName() ); final Interval right = new Interval( interval.getContig(), interval.getStart() + (int) amount, interval.getEnd(), interval.isNegativeStrand(), interval.getName() ); return CollectionUtil.makeList(left, right); }
Example #12
Source File: GeneFromGTFBuilder.java From Drop-seq with MIT License | 5 votes |
private GeneFromGTF makeGeneFromGTFRecords(final Collection<GTFRecord> gtfRecords) { GTFRecord lineOne=gtfRecords.iterator().next(); String geneName=lineOne.getGeneName(); final boolean transcriptNegStrand = lineOne.isNegativeStrand(); // Figure out the extend of the gene int start = Integer.MAX_VALUE; int end = Integer.MIN_VALUE; final Set<String> geneIds = new HashSet<>(); final Set<String> chromosomes = new HashSet<>(); for (final GTFRecord r: gtfRecords) { start = Math.min(start, r.getStart()); end = Math.max(end, r.getEnd()); geneIds.add(r.getGeneID()); chromosomes.add(r.getChromosome()); } if (chromosomes.size() > 1) throw new AnnotationException("Chromosome disagreement(" + CollectionUtil.join(chromosomes, ", ") + ") in GTF file for gene " + geneName); final GeneFromGTF gene = new GeneFromGTF(lineOne.getChromosome(), start, end, transcriptNegStrand, geneName, lineOne.getFeatureType(), lineOne.getGeneID(), lineOne.getTranscriptType(), lineOne.getGeneVersion()); for (final GTFRecord gtfRecord : gtfRecords) validateGTFRecord(gtfRecord, gene); if (geneIds.size() > 1) throw new AnnotationException(String.format("Multiple gene IDs for gene %s: %s", geneName, CollectionUtil.join(geneIds, ", "))); return gene; }
Example #13
Source File: IntervalListScattererWithoutSubdivision.java From picard with MIT License | 5 votes |
@Override public List<Interval> takeSome(final Interval interval, final long idealSplitWeight, final long currentSize, final double projectedSizeOfRemaining) { final long projectedSize = currentSize + intervalWeight(interval); if (shouldIncludeInterval(idealSplitWeight, projectedSizeOfRemaining, projectedSize)) { return CollectionUtil.makeList(interval, null); } else { return CollectionUtil.makeList(null, interval); } }
Example #14
Source File: RExecutor.java From picard with MIT License | 5 votes |
/** * Executes the given R script that is stored in a file by a call to Rscript. * Blocks until the R script is complete. * * @param scriptFile the file object for the script * @param arguments any arguments required by the script * @return the return code of the R process */ public static int executeFromFile(final File scriptFile, final String... arguments) { final String[] command = new String[arguments.length + 2]; command[0] = R_EXE; command[1] = scriptFile.getAbsolutePath(); System.arraycopy(arguments, 0, command, 2, arguments.length); LOG.info(String.format("Executing R script via command: %s", CollectionUtil.join(Arrays.asList(command), " "))); return ProcessExecutor.execute(command); }
Example #15
Source File: ReadNameParserTests.java From picard with MIT License | 5 votes |
/** Tests rapidParseInt for positive and negative numbers, as well as non-digit suffixes */ @Test public void testRapidParseIntFails() { List<String> values = CollectionUtil.makeList("foo", "bar", "abc123", "-foo", "f00", "-f00"); for (String s : values) { try { ReadNameParser.rapidParseInt(s); Assert.fail("Should have failed to rapid-parse " + s + " as an int."); } catch (NumberFormatException nfe) { /* expected */ } } }
Example #16
Source File: TestFilterVcf.java From picard with MIT License | 5 votes |
/** * Tests that sites with a het allele balance < 0.4 are marked as filtered out. */ @Test public void testAbFiltering() throws Exception { final Set<String> fails = CollectionUtil.makeSet("tf2", "rs28566954", "rs28548431"); final File out = testFiltering(INPUT, ".vcf.gz", 0.4, 0, 0, Double.MAX_VALUE); final ListMap<String, String> filters = slurpFilters(out); Assert.assertEquals(sorted(filters.keySet()), sorted(fails), "Failed sites did not match expected set of failed sites."); }
Example #17
Source File: TestFilterVcf.java From picard with MIT License | 5 votes |
/** * Tests that genotypes with DP < 18 are marked as failed, but not >= 18. */ @Test public void testDpFiltering() throws Exception { final Set<String> fails = CollectionUtil.makeSet("rs71509448", "rs71628926", "rs13302979", "rs2710876"); final File out = testFiltering(INPUT, ".vcf.gz", 0, 18, 0, Double.MAX_VALUE); final ListMap<String, String> filters = slurpFilters(out); Assert.assertEquals(sorted(filters.keySet()), sorted(fails), "Failed sites did not match expected set of failed sites."); }
Example #18
Source File: TestFilterVcf.java From picard with MIT License | 5 votes |
/** * Tests that genotypes with DP < 18 are marked as failed, but not >= 18. */ @Test public void testDpFilteringToVcf() throws Exception { final Set<String> fails = CollectionUtil.makeSet("rs71509448", "rs71628926", "rs13302979", "rs2710876"); final File out = testFiltering(INPUT, ".vcf", 0, 18, 0, Double.MAX_VALUE); final ListMap<String, String> filters = slurpFilters(out); Assert.assertEquals(sorted(filters.keySet()), sorted(fails), "Failed sites did not match expected set of failed sites."); }
Example #19
Source File: TestFilterVcf.java From picard with MIT License | 5 votes |
/** * Tests that genotypes with DP < 18 are marked as failed, but not >= 18. */ @Test(dataProvider = "goodInputVcfs") public void testFsFiltering(final File input) throws Exception { final Set<String> fails = CollectionUtil.makeSet("rs13303033", "rs28548431", "rs2799066"); final File out = testFiltering(input, ".vcf.gz", 0, 0, 0, 5.0d); final ListMap<String, String> filters = slurpFilters(out); Assert.assertEquals(sorted(filters.keySet()), sorted(fails), "Failed sites did not match expected set of failed sites."); }
Example #20
Source File: TestFilterVcf.java From picard with MIT License | 5 votes |
@Test public void testCombinedFiltering() throws Exception { final TreeSet<String> fails = new TreeSet<String>(CollectionUtil.makeSet("rs13302979", "rs13303033", "rs2710876", "rs2799066", "rs28548431", "rs28566954", "rs71509448", "rs71628926", "tf2")); final File out = testFiltering(INPUT, ".vcf.gz", 0.4, 18, 22, 5.0d); final ListMap<String, String> filters = slurpFilters(out); Assert.assertEquals(new TreeSet<String>(filters.keySet()), fails, "Failed sites did not match expected set of failed sites."); }
Example #21
Source File: HaplotypeProbabilitiesTest.java From picard with MIT License | 5 votes |
@DataProvider(name = "dataTestpEvidenceGivenPriorFromGLs") public Object[][] dataTestpEvidenceGivenPriorFromGLs() { return new Object[][]{ new Object[]{ new HaplotypeProbabilitiesFromGenotypeLikelihoods(hb1), Collections.singletonList(snp1), Collections.singletonList(false), Collections.singletonList(new double[]{0, -1, -2})}, new Object[]{ new HaplotypeProbabilitiesFromGenotypeLikelihoods(hb2), Collections.singletonList(snp2), Collections.singletonList(true), Collections.singletonList(new double[]{-2, -1, 0})}, new Object[]{ new HaplotypeProbabilitiesFromGenotypeLikelihoods(hb2), CollectionUtil.makeList(snp1, snp2), CollectionUtil.makeList(false, false), CollectionUtil.makeList(new double[]{0, -1, -2}, new double[]{0, -1, -2})}, new Object[]{ new HaplotypeProbabilitiesFromGenotypeLikelihoods(hb2), CollectionUtil.makeList(snp2, snp1), CollectionUtil.makeList(false, false), CollectionUtil.makeList(new double[]{0, -1, -2}, new double[]{-1, 0, -1})}, new Object[]{ new HaplotypeProbabilitiesFromGenotypeLikelihoods(hb2), CollectionUtil.makeList(snp1, snp2), CollectionUtil.makeList(false, false), CollectionUtil.makeList(new double[]{0, -1, -2}, new double[]{-2, -1, 0})}, }; }
Example #22
Source File: RenameSampleInVcf.java From picard with MIT License | 5 votes |
@Override protected int doWork() { IOUtil.assertFileIsReadable(INPUT); IOUtil.assertFileIsWritable(OUTPUT); final VCFFileReader in = new VCFFileReader(INPUT, false); final VCFHeader header = in.getFileHeader(); if (header.getGenotypeSamples().size() > 1) { throw new IllegalArgumentException("Input VCF must be single-sample."); } if (OLD_SAMPLE_NAME != null && !OLD_SAMPLE_NAME.equals(header.getGenotypeSamples().get(0))) { throw new IllegalArgumentException("Input VCF did not contain expected sample. Contained: " + header.getGenotypeSamples().get(0)); } final EnumSet<Options> options = EnumSet.copyOf(VariantContextWriterBuilder.DEFAULT_OPTIONS); if (CREATE_INDEX) options.add(Options.INDEX_ON_THE_FLY); else options.remove(Options.INDEX_ON_THE_FLY); final VCFHeader outHeader = new VCFHeader(header.getMetaDataInInputOrder(), CollectionUtil.makeList(NEW_SAMPLE_NAME)); final VariantContextWriter out = new VariantContextWriterBuilder() .setOptions(options) .setOutputFile(OUTPUT).setReferenceDictionary(outHeader.getSequenceDictionary()).build(); out.writeHeader(outHeader); for (final VariantContext ctx : in) { out.add(ctx); } out.close(); in.close(); return 0; }
Example #23
Source File: SingleCellRnaSeqMetricsCollector.java From Drop-seq with MIT License | 5 votes |
public RnaSeqMetricsCollector getCollector(final List<String> cellBarcodes) { List<SAMReadGroupRecord> readGroups = getReadGroups(cellBarcodes); return new RnaSeqMtMetricsCollector(CollectionUtil.makeSet(MetricAccumulationLevel.READ_GROUP), readGroups, ribosomalBasesInitialValue, geneOverlapDetector, ribosomalSequenceOverlapDetector, ignoredSequenceIndices, MINIMUM_TRANSCRIPT_LENGTH, specificity, this.rnaFragPct, false, genesWithLongEnoughTranscripts); }
Example #24
Source File: CircosExecution.java From hmftools with GNU General Public License v3.0 | 5 votes |
@Nullable public Integer generateCircos(@NotNull final String inputConfig, @NotNull final String outputPath, @NotNull final String outputFile, @NotNull final String errorPath) throws IOException, InterruptedException { final File redirectErrorFile = new File(errorPath + File.separator + outputFile + ".error"); final File redirectOutputFile = new File(errorPath + File.separator + outputFile + ".out"); final String[] command = new String[8]; command[0] = executable; command[1] = "-nosvg"; command[2] = "-conf"; command[3] = new File(inputConfig).getAbsolutePath(); command[4] = "-outputdir"; command[5] = new File(outputPath).getAbsolutePath(); command[6] = "-outputfile"; command[7] = outputFile; LOGGER.info(String.format("Generating " + outputFile + " via command: %s", CollectionUtil.join(Arrays.asList(command), " "))); int result = new ProcessBuilder(command).redirectError(redirectErrorFile).redirectOutput(redirectOutputFile).start().waitFor(); if (result != 0) { LOGGER.fatal("Fatal error creating circos plot. Examine error file " + redirectErrorFile.toString() + " for details."); System.exit(1); } final File finalFile = new File(outputPath + File.separator + outputFile); if (!finalFile.exists()) { LOGGER.fatal("Failed to create file {}", finalFile.toString()); System.exit(1); } return 0; }
Example #25
Source File: TileMetricsUtil.java From picard with MIT License | 5 votes |
/** * Returns an unmodifiable collection of tile data read from the provided file. For each tile we will extract: * - lane number * - tile number * - density * - cluster ID * - Phasing & Prephasing for first template read (if available) * - Phasing & Prephasing for second template read (if available) */ public static Collection<Tile> parseTileMetrics(final File tileMetricsOutFile, final ReadStructure readStructure, final ValidationStringency validationStringency) throws FileNotFoundException { // Get the tile metrics lines from TileMetricsOut, keeping only the last value for any Lane/Tile/Code combination final Collection<IlluminaTileMetrics> tileMetrics = determineLastValueForLaneTileMetricsCode(new TileMetricsOutReader (tileMetricsOutFile, TileMetricsOutReader.TileMetricsVersion.TWO)); // Collect the tiles by lane & tile, and then collect the metrics by lane final Map<String, ? extends Collection<IlluminaTileMetrics>> locationToMetricsMap = partitionTileMetricsByLocation(tileMetrics); final Collection<Tile> tiles = new LinkedList<>(); for (final Map.Entry<String, ? extends Collection<IlluminaTileMetrics>> entry : locationToMetricsMap.entrySet()) { final Collection<IlluminaTileMetrics> tileRecords = entry.getValue(); // Get a mapping from metric code number to the corresponding IlluminaTileMetrics final Map<Integer, ? extends Collection<IlluminaTileMetrics>> codeMetricsMap = partitionTileMetricsByCode(tileRecords); final Set<Integer> observedCodes = codeMetricsMap.keySet(); if (!(observedCodes.contains(IlluminaMetricsCode.DENSITY_ID.getMetricsCode()) && observedCodes.contains(IlluminaMetricsCode.CLUSTER_ID.getMetricsCode()))) throw new PicardException(String.format("Expected to find cluster and density record codes (%s and %s) in records read for tile location %s (lane:tile), but found only %s.", IlluminaMetricsCode.CLUSTER_ID.getMetricsCode(), IlluminaMetricsCode.DENSITY_ID.getMetricsCode(), entry.getKey(), observedCodes)); final IlluminaTileMetrics densityRecord = CollectionUtil.getSoleElement(codeMetricsMap.get(IlluminaMetricsCode.DENSITY_ID.getMetricsCode())); final IlluminaTileMetrics clusterRecord = CollectionUtil.getSoleElement(codeMetricsMap.get(IlluminaMetricsCode.CLUSTER_ID.getMetricsCode())); // Snag the phasing data for each read in the read structure. For both types of phasing values, this is the median of all of the individual values seen final Collection<TilePhasingValue> tilePhasingValues = getTilePhasingValues(codeMetricsMap, readStructure, validationStringency); tiles.add(new Tile(densityRecord.getLaneNumber(), densityRecord.getTileNumber(), densityRecord.getMetricValue(), clusterRecord.getMetricValue(), tilePhasingValues.toArray(new TilePhasingValue[tilePhasingValues.size()]))); } return Collections.unmodifiableCollection(tiles); }
Example #26
Source File: VariantIteratorProducer.java From picard with MIT License | 5 votes |
@Override protected CollectionUtil.DefaultingMap<File, VCFFileReader> initialValue() { return new CollectionUtil.DefaultingMap<>(file -> { final VCFFileReader reader = new VCFFileReader(file); LOG.debug(String.format("Producing a reader of %s for %s.", file, Thread.currentThread())); allReaders.add(reader); return reader; }, true); }
Example #27
Source File: FindMendelianViolations.java From picard with MIT License | 5 votes |
private void writeAllViolations(final MendelianViolationDetector.Result result) { if (VCF_DIR != null) { LOG.info(String.format("Writing family violation VCFs to %s/", VCF_DIR.getAbsolutePath())); final VariantContextComparator vcComparator = new VariantContextComparator(inputHeader.get().getContigLines()); final Set<VCFHeaderLine> headerLines = new LinkedHashSet<>(inputHeader.get().getMetaDataInInputOrder()); headerLines.add(new VCFInfoHeaderLine(MendelianViolationDetector.MENDELIAN_VIOLATION_KEY, 1, VCFHeaderLineType.String, "Type of mendelian violation.")); headerLines.add(new VCFInfoHeaderLine(MendelianViolationDetector.ORIGINAL_AC, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Original AC")); headerLines.add(new VCFInfoHeaderLine(MendelianViolationDetector.ORIGINAL_AF, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Original AF")); headerLines.add(new VCFInfoHeaderLine(MendelianViolationDetector.ORIGINAL_AN, 1, VCFHeaderLineType.Integer, "Original AN")); for (final PedFile.PedTrio trio : pedFile.get().values()) { final File outputFile = new File(VCF_DIR, IOUtil.makeFileNameSafe(trio.getFamilyId() + IOUtil.VCF_FILE_EXTENSION)); LOG.info(String.format("Writing %s violation VCF to %s", trio.getFamilyId(), outputFile.getAbsolutePath())); final VariantContextWriter out = new VariantContextWriterBuilder() .setOutputFile(outputFile) .unsetOption(INDEX_ON_THE_FLY) .build(); final VCFHeader newHeader = new VCFHeader(headerLines, CollectionUtil.makeList(trio.getMaternalId(), trio.getPaternalId(), trio.getIndividualId())); final TreeSet<VariantContext> orderedViolations = new TreeSet<>(vcComparator); orderedViolations.addAll(result.violations().get(trio.getFamilyId())); out.writeHeader(newHeader); orderedViolations.forEach(out::add); out.close(); } } }
Example #28
Source File: Tile.java From picard with MIT License | 5 votes |
/** For any given TileTemplateRead, we want to make sure that there is only a single TilePhasingValue */ private static Collection<TilePhasingValue> ensureSoleTilePhasingValuesPerRead(final Collection<TilePhasingValue> tilePhasingValues) { final Map<TileTemplateRead, List<TilePhasingValue>> partitionedMap = tilePhasingValues.stream().collect(Collectors.groupingBy(TilePhasingValue::getTileTemplateRead)); final Collection<TilePhasingValue> newTilePhasingValues = new LinkedList<>(); for (final TileTemplateRead read : partitionedMap.keySet()) { newTilePhasingValues.add(CollectionUtil.getSoleElement(partitionedMap.get(read))); } return newTilePhasingValues; }
Example #29
Source File: IlluminaBasecallsToFastq.java From picard with MIT License | 5 votes |
/** * For each line in the MULTIPLEX_PARAMS file create a FastqRecordsWriter and put it in the sampleBarcodeFastqWriterMap map, * where the key to the map is the concatenation of all sampleBarcodes in order for the given line. */ private void populateWritersFromMultiplexParams() { final TabbedTextFileWithHeaderParser libraryParamsParser = new TabbedTextFileWithHeaderParser(MULTIPLEX_PARAMS); final Set<String> expectedColumnLabels = CollectionUtil.makeSet("OUTPUT_PREFIX"); final List<String> sampleBarcodeColumnLabels = new ArrayList<>(); for (int i = 1; i <= readStructure.sampleBarcodes.length(); i++) { sampleBarcodeColumnLabels.add("BARCODE_" + i); } expectedColumnLabels.addAll(sampleBarcodeColumnLabels); assertExpectedColumns(libraryParamsParser.columnLabels(), expectedColumnLabels); for (final TabbedTextFileWithHeaderParser.Row row : libraryParamsParser) { List<String> sampleBarcodeValues = null; if (!sampleBarcodeColumnLabels.isEmpty()) { sampleBarcodeValues = new ArrayList<>(); for (final String sampleBarcodeLabel : sampleBarcodeColumnLabels) { sampleBarcodeValues.add(row.getField(sampleBarcodeLabel)); } } final String key = (sampleBarcodeValues == null || sampleBarcodeValues.contains("N")) ? null : StringUtil.join("", sampleBarcodeValues); if (sampleBarcodeFastqWriterMap.containsKey(key)) { //This will catch the case of having more than 1 line in a non-barcoded MULTIPLEX_PARAMS file throw new PicardException("Row for barcode " + key + " appears more than once in MULTIPLEX_PARAMS file " + MULTIPLEX_PARAMS); } final FastqRecordsWriter writer = buildWriter(new File(row.getField("OUTPUT_PREFIX"))); sampleBarcodeFastqWriterMap.put(key, writer); } if (sampleBarcodeFastqWriterMap.isEmpty()) { throw new PicardException("MULTIPLEX_PARAMS file " + MULTIPLEX_PARAMS + " does have any data rows."); } libraryParamsParser.close(); }
Example #30
Source File: BaseErrorAggregation.java From picard with MIT License | 5 votes |
public BaseErrorAggregation(final Supplier<CALCULATOR> simpleAggregatorGenerator, final ReadBaseStratification.RecordAndOffsetStratifier stratifier) { this.stratifier = stratifier; this.simpleAggregatorGenerator = simpleAggregatorGenerator; this.strataAggregatorMap = new CollectionUtil.DefaultingMap<> (ignored -> simpleAggregatorGenerator.get(), true); }