Java Code Examples for htsjdk.samtools.reference.ReferenceSequenceFile#getSequenceDictionary()

The following examples show how to use htsjdk.samtools.reference.ReferenceSequenceFile#getSequenceDictionary() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: MaskReferenceSequence.java From Drop-seq with MIT License

6 votes

private void processByPartialContig (final ReferenceSequenceFile ref, final FastaSequenceFileWriter writer, final File intervalListFile) {
	SAMSequenceDictionary sd = ref.getSequenceDictionary();
	// validate that the intervals and the reference have the same sequence dictionary.
	IntervalList iList = IntervalList.fromFile(intervalListFile);
	iList.getHeader().getSequenceDictionary().assertSameDictionary(sd);
	// map the intervals to a map to each contig.
	Map<String, List<Interval>> intervalsPerContig = getIntervalsForContig(iList);

	for (SAMSequenceRecord r: sd.getSequences()) {
		String contig = r.getSequenceName();
		log.info("Processing partial contig " + contig);
		// this list can be null.
		List<Interval> intervalsToMask = intervalsPerContig.get(contig);
		ReferenceSequence rs = ref.getSequence(contig);
		writeSequence(rs, intervalsToMask, writer);
	}
}

Example 2

Source File: FingerprintUtils.java From picard with MIT License

6 votes

/**
 * A utility function that takes a fingerprint and returns a VariantContextSet with variants representing the haplotypes in the fingerprint
 *
 * @param fingerPrint A fingerprint
 * @param reference   A reference sequence that will be used to create the VariantContexts
 * @param sample      A sample name that will be used for the genotype field
 * @return VariantContextSet with variants representing the haplotypes in the fingerprint
 */
public static VariantContextSet createVCSetFromFingerprint(final Fingerprint fingerPrint, final ReferenceSequenceFile reference, final String sample) {

    final VariantContextSet variantContexts = new VariantContextSet(reference.getSequenceDictionary());

    // check that the same snp name isn't twice in the fingerprint.
    fingerPrint.values().stream()
            .map(hp -> hp.getRepresentativeSnp().getName())
            .filter(Objects::nonNull)
            .filter(n -> !n.equals(""))
            .collect(Collectors.groupingBy(Function.identity(), Collectors.counting()))
            .entrySet()
            .stream()
            .filter(e -> e.getValue() > 1)
            .findFirst()
            .ifPresent(e -> {
                throw new IllegalArgumentException("Found same SNP name twice (" + e.getKey() + ") in fingerprint. Cannot create a VCF.");
            });

    // convert all the haplotypes to variant contexts and add them to the set.
    fingerPrint.values().stream()
            .map(hp -> getVariantContext(reference, sample, hp))
            .forEach(variantContexts::add);

    return variantContexts;
}

Example 3

Source File: MaskReferenceSequence.java From Drop-seq with MIT License

5 votes

private void processByWholeContig (final ReferenceSequenceFile ref, final FastaSequenceFileWriter writer, final List<String> contigPatternToIgnore) {
	SAMSequenceDictionary sd = ref.getSequenceDictionary();
	Set<String> contigsToIgnore = selectContigsToIgnore(sd, contigPatternToIgnore);
	// write out each contig.

	for (SAMSequenceRecord r: sd.getSequences()) {
		String contig = r.getSequenceName();
		log.info("Processing complete contig " + contig);
		ReferenceSequence rs = ref.getSequence(contig);
		boolean setSequenceToN = contigsToIgnore.contains(contig);
		writeSequence(rs, setSequenceToN, writer);
	}

}

Example 4

Source File: ScatterIntervalsByNs.java From picard with MIT License

5 votes

@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE);
    IOUtil.assertFileIsWritable(OUTPUT);

    final ReferenceSequenceFile refFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(REFERENCE_SEQUENCE, true);
    if (!refFile.isIndexed()) {
        throw new IllegalStateException("Reference file must be indexed, but no index file was found");
    }
    if (refFile.getSequenceDictionary() == null) {
        throw new IllegalStateException("Reference file must include a dictionary, but no dictionary file was found");
    }

    // get the intervals
    final IntervalList intervals = segregateReference(refFile, MAX_TO_MERGE);

    log.info(String.format("Found %d intervals in %d loci during %s seconds", intervalProgress.getCount(), locusProgress.getCount(), locusProgress.getElapsedSeconds()));

    /**********************************
     * Now output regions for calling *
     **********************************/

    final IntervalList outputIntervals = new IntervalList(intervals.getHeader().clone());
    log.info(String.format("Collecting requested type of intervals (%s)", OUTPUT_TYPE));

    intervals.getIntervals().stream().filter(i -> OUTPUT_TYPE.accepts(i.getName())).forEach(outputIntervals::add);

    log.info("Writing Intervals.");
    outputIntervals.write(OUTPUT);

    log.info(String.format("Execution ending. Total time %d seconds", locusProgress.getElapsedSeconds()));

    return 0;
}

Example 5

Source File: ReferenceHadoopSparkSource.java From gatk with BSD 3-Clause "New" or "Revised" License

4 votes

@Override
public SAMSequenceDictionary getReferenceSequenceDictionary(final SAMSequenceDictionary optReadSequenceDictionaryToMatch) {
    ReferenceSequenceFile referenceSequenceFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(new GATKPath(referenceURI.toString()).toPath());
    return referenceSequenceFile.getSequenceDictionary();
}

Example 6

Source File: GenomeLocParser.java From gatk with BSD 3-Clause "New" or "Revised" License

2 votes

/**
 * set our internal reference contig order
 * @param refFile the reference file
 */
public GenomeLocParser(final ReferenceSequenceFile refFile) {
    this(refFile.getSequenceDictionary());
}