htsjdk.samtools.util.CloseableIterator#hasNext

Source File: CRAMFileWriter.java From cramtools with Apache License 2.0

6 votes

public static void main(String[] args) throws IOException {

		Log.setGlobalLogLevel(LogLevel.INFO);
		File bamFile = new File(args[0]);
		File outCramFile = new File(args[1]);
		ReferenceSource source = new ReferenceSource(new File(args[2]));
		int maxThreads = Integer.valueOf(args[3]);

		BAMFileReader reader = new BAMFileReader(bamFile, null, false, false, ValidationStringency.SILENT,
				new DefaultSAMRecordFactory());
		OutputStream os = new FileOutputStream(outCramFile);
		CRAMFileWriter writer = new CRAMFileWriter(os, source, reader.getFileHeader(), outCramFile.getName(),
				maxThreads);

		CloseableIterator<SAMRecord> iterator = reader.getIterator();

		while (iterator.hasNext()) {
			SAMRecord record = iterator.next();
			writer.addAlignment(record);
		}
		writer.close();
		reader.close();
	}

Source File: VariantAccumulatorExecutor.java From picard with MIT License

6 votes

@Override
public void run() {
    try {
        Optional<CloseableIterator<VariantContext>> readerMaybe;
        while ((readerMaybe = vcIterators.next()).isPresent()) {
            final CloseableIterator<VariantContext> reader = readerMaybe.get();
            while (reader.hasNext()) processor.accumulate(reader.next());
            reader.close();

            if (!childrenErrors.isEmpty()) {
                LOG.error(Thread.currentThread() + " aborting: observed error in another child thread.");
                break;
            }
        }
    } catch (final Throwable e) {
        childrenErrors.add(e);
        LOG.error(e, "Unexpected exception encountered in child thread.");
    } finally {
        LOG.debug(String.format("Thread %s is finishing.", Thread.currentThread()));
    }
}

Source File: InputValidationTest.java From imputationserver with GNU Affero General Public License v3.0

5 votes

public void testTabixIndexCreationChr20() throws IOException {

		String configFolder = "test-data/configs/hapmap-chr1";
		// input folder contains no vcf or vcf.gz files
		String inputFolder = "test-data/data/chr20-phased";

		// create workflow context
		WorkflowTestContext context = buildContext(inputFolder, "hapmap2");

		// create step instance
		InputValidation inputValidation = new InputValidationMock(configFolder);

		// run and test
		boolean result = run(context, inputValidation);

		// check if step is failed
		assertEquals(true, result);
		assertTrue(context.hasInMemory("[OK] 1 valid VCF file(s) found."));

		
		// test tabix index and count snps
		String vcfFilename = inputFolder + "/chr20.R50.merged.1.330k.recode.small.vcf.gz";
		VCFFileReader vcfReader = new VCFFileReader(new File(vcfFilename),
				new File(vcfFilename + TabixUtils.STANDARD_INDEX_EXTENSION), true);
		CloseableIterator<VariantContext> snps = vcfReader.query("20", 1, 1000000000);
		int count = 0;
		while (snps.hasNext()) {
			snps.next();
			count++;
		}
		snps.close();
		vcfReader.close();
		
		//check snps
		assertEquals(7824, count);

	}

Source File: VcfFormatConverter.java From picard with MIT License

5 votes

@Override
protected int doWork() {
    final ProgressLogger progress = new ProgressLogger(LOG, 10000);
    
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);

    final VCFFileReader reader = new VCFFileReader(INPUT, REQUIRE_INDEX);
    final VCFHeader header = new VCFHeader(reader.getFileHeader());
    final SAMSequenceDictionary sequenceDictionary = header.getSequenceDictionary();
    if (CREATE_INDEX && sequenceDictionary == null) {
        throw new PicardException("A sequence dictionary must be available in the input file when creating indexed output.");
    }

    final VariantContextWriterBuilder builder = new VariantContextWriterBuilder()
            .setOutputFile(OUTPUT)
            .setReferenceDictionary(sequenceDictionary);
    if (CREATE_INDEX)
        builder.setOption(Options.INDEX_ON_THE_FLY);
    else
        builder.unsetOption(Options.INDEX_ON_THE_FLY);
    final VariantContextWriter writer = builder.build();
    writer.writeHeader(header);
    final CloseableIterator<VariantContext> iterator = reader.iterator();

    while (iterator.hasNext()) {
        final VariantContext context = iterator.next();
        writer.add(context);
        progress.record(context.getContig(), context.getStart());
    }

    CloserUtil.close(iterator);
    CloserUtil.close(reader);
    writer.close();

    return 0;
}

Source File: CleanSam.java From picard with MIT License

5 votes

/**
 * Do the work after command line has been parsed.
 * RuntimeException may be thrown by this method, and are reported appropriately.
 *
 * @return program exit status.
 */
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);
    final SamReaderFactory factory = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE);
    if (VALIDATION_STRINGENCY == ValidationStringency.STRICT) {
        factory.validationStringency(ValidationStringency.LENIENT);
    }
    final SamReader reader = factory.open(INPUT);
    final SAMFileWriter writer = new SAMFileWriterFactory().makeSAMOrBAMWriter(reader.getFileHeader(), true, OUTPUT);
    final CloseableIterator<SAMRecord> it = reader.iterator();
    final ProgressLogger progress = new ProgressLogger(Log.getInstance(CleanSam.class));

    // If the read (or its mate) maps off the end of the alignment, clip it
    while (it.hasNext()) {
        final SAMRecord rec = it.next();

        // If the read (or its mate) maps off the end of the alignment, clip it
        AbstractAlignmentMerger.createNewCigarsIfMapsOffEndOfReference(rec);

        // check the read's mapping quality
        if (rec.getReadUnmappedFlag() && 0 != rec.getMappingQuality()) {
            rec.setMappingQuality(0);
        }

        writer.addAlignment(rec);
        progress.record(rec);
    }

    writer.close();
    it.close();
    CloserUtil.close(reader);
    return 0;
}

Source File: InputValidationTest.java From imputationserver with GNU Affero General Public License v3.0

5 votes

public void testTabixIndexCreationChr1() throws IOException {

		String configFolder = "test-data/configs/hapmap-chr1";
		// input folder contains no vcf or vcf.gz files
		String inputFolder = "test-data/data/single";

		// create workflow context
		WorkflowTestContext context = buildContext(inputFolder, "hapmap2");
		context.setInput("phasing", "eagle");

		// create step instance
		InputValidation inputValidation = new InputValidationMock(configFolder);

		// run and test
		boolean result = run(context, inputValidation);

		// check if step is failed
		assertEquals(true, result);
		assertTrue(context.hasInMemory("[OK] 1 valid VCF file(s) found."));

		
		// test tabix index and count snps
		String vcfFilename = inputFolder + "/minimac_test.50.vcf.gz";
		VCFFileReader vcfReader = new VCFFileReader(new File(vcfFilename),
				new File(vcfFilename + TabixUtils.STANDARD_INDEX_EXTENSION), true);
		CloseableIterator<VariantContext> snps = vcfReader.query("1", 1, 1000000000);
		int count = 0;
		while (snps.hasNext()) {
			snps.next();
			count++;
		}
		snps.close();
		vcfReader.close();
		
		//check snps
		assertEquals(905, count);

	}

Source File: FastQualityControlTest.java From imputationserver with GNU Affero General Public License v3.0

5 votes

public void testCountSamplesInCreatedChunk() throws IOException {

		String configFolder = "test-data/configs/hapmap-chr1";
		String inputFolder = "test-data/data/simulated-chip-1chr-imputation";

		// create workflow context
		WorkflowTestContext context = buildContext(inputFolder, "hapmap2");

		// get output directory
		String out = context.getOutput("chunksDir");

		// create step instance
		FastQualityControlMock qcStats = new FastQualityControlMock(configFolder);

		// run and test
		assertTrue(run(context, qcStats));
		for (File file : new File(out).listFiles()) {
			if (file.getName().endsWith("chunk_1_80000001_100000000.vcf.gz")) {
				VCFFileReader vcfReader = new VCFFileReader(file, false);
				CloseableIterator<VariantContext> it = vcfReader.iterator();
				if (it.hasNext()) {
					VariantContext a = it.next();
					assertEquals(255, a.getNSamples());
				}
				vcfReader.close();
			}

		}

	}

Source File: FastQualityControlTest.java From imputationserver with GNU Affero General Public License v3.0

5 votes

public void testCountSitesForOneChunkedContig() throws IOException {

		String configFolder = "test-data/configs/hapmap-chr1";
		String inputFolder = "test-data/data/simulated-chip-1chr-imputation";

		// create workflow context
		WorkflowTestContext context = buildContext(inputFolder, "hapmap2");

		String out = context.getOutput("chunksDir");

		// create step instance
		FastQualityControlMock qcStats = new FastQualityControlMock(configFolder);

		// run and test
		assertTrue(run(context, qcStats));

		File[] files = new File(out).listFiles();
		Arrays.sort(files);

		// baseline from a earlier job execution
		int[] array = { 4588, 968, 3002, 5781, 5116, 4750, 5699, 5174, 6334, 3188, 5106, 5832, 5318 };
		int pos = 0;

		for (File file : files) {
			int count = 0;
			if (file.getName().endsWith(".gz")) {
				VCFFileReader vcfReader = new VCFFileReader(file, false);
				CloseableIterator<VariantContext> it = vcfReader.iterator();
				while (it.hasNext()) {
					it.next();
					count++;
				}
				assertEquals(array[pos], count);
				vcfReader.close();
				pos++;
			}

		}

	}

Source File: AbstractVcfMergingClpTester.java From picard with MIT License

5 votes

static Queue<String> loadContigPositions(final File inputFile) {
	final VCFFileReader reader = new VCFFileReader(inputFile, false);
	final Queue<String> contigPositions = new LinkedList<String>();
	final CloseableIterator<VariantContext> iterator = reader.iterator();
	while (iterator.hasNext()) contigPositions.add(getContigPosition(iterator.next()));
	iterator.close();
	reader.close();
	return contigPositions;
}

Source File: AbstractMarkDuplicatesCommandLineProgramTest.java From picard with MIT License

5 votes

@Test
public void testPathologicalOrderingAtTheSamePosition() {
    final AbstractMarkDuplicatesCommandLineProgramTester tester = getTester();
    tester.getSamRecordSetBuilder().setReadLength(68);

    tester.setExpectedOpticalDuplicate(1);

    tester.addMatePair("RUNID:3:1:15013:113051", 0, 129384554, 129384554, false, false, false, false, "68M", "68M", false, false, false, false, false, DEFAULT_BASE_QUALITY);
    tester.addMatePair("RUNID:3:1:15029:113060", 0, 129384554, 129384554, false, false, true, true, "68M", "68M", false, false, false, false, false, DEFAULT_BASE_QUALITY);

    // Create the pathology
    final CloseableIterator<SAMRecord> iterator = tester.getRecordIterator();
    final int[] qualityOffset = {20, 30, 10, 40}; // creates an interesting pathological ordering
    int index = 0;
    while (iterator.hasNext()) {
        final SAMRecord record = iterator.next();
        final byte[] quals = new byte[record.getReadLength()];
        for (int i = 0; i < record.getReadLength(); i++) {
            quals[i] = (byte) (qualityOffset[index] + 10);
        }
        record.setBaseQualities(quals);
        index++;
    }
    iterator.close();

    // Run the test
    tester.runTest();
}

Source File: IntervalTagComparatorTest.java From Drop-seq with MIT License

5 votes

@Test(enabled=false)
/**
 * For this test, we generate a lot of data, and sort it.
 * Since the SAMRecord doesn't really need to change, generate one static record and a LOT of intervals to tag the record with.
 * Size of the SAMRecord doesn't change, but the speed of the comparator should.
 */
public void testSpeed () {
	int numRecords = 30000000;
	int readLength=60;
	SamReader inputSam = SamReaderFactory.makeDefault().open(dictFile);
	SAMSequenceDictionary dict= inputSam.getFileHeader().getSequenceDictionary();
	dict = filterSD(dict);

	long start = System.currentTimeMillis();

	GenerateIntervalTaggedSAMRecords gen = new GenerateIntervalTaggedSAMRecords(dict, intervalTag, readLength, numRecords);
	IntervalTagComparator comparator = new IntervalTagComparator(this.intervalTag);

	final CloseableIterator<SAMRecord> sortingIterator =
            SamRecordSortingIteratorFactory.create(inputSam.getFileHeader(), gen, comparator, null);

	while(sortingIterator.hasNext()) {
		SAMRecord r = sortingIterator.next();
		Assert.assertNotNull(r);
	}
	long elapsed = System.currentTimeMillis() - start;
	System.out.println("elapsed time = " + elapsed + " ms");
	System.out.println("elapsed time = " + Math.round(elapsed/1000) + " seconds");
}

Source File: UpdateVcfSequenceDictionary.java From picard with MIT License

5 votes

@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsReadable(SEQUENCE_DICTIONARY);
    IOUtil.assertFileIsWritable(OUTPUT);

    final SAMSequenceDictionary samSequenceDictionary = SAMSequenceDictionaryExtractor.extractDictionary(SEQUENCE_DICTIONARY.toPath());

    final VCFFileReader fileReader = new VCFFileReader(INPUT, false);
    final VCFHeader fileHeader = fileReader.getFileHeader();

    final VariantContextWriterBuilder builder = new VariantContextWriterBuilder()
            .setReferenceDictionary(samSequenceDictionary)
            .clearOptions();
    if (CREATE_INDEX)
        builder.setOption(Options.INDEX_ON_THE_FLY);

    final VariantContextWriter vcfWriter = builder.setOutputFile(OUTPUT).build();
    fileHeader.setSequenceDictionary(samSequenceDictionary);
    vcfWriter.writeHeader(fileHeader);

    final ProgressLogger progress = new ProgressLogger(log, 10000);
    final CloseableIterator<VariantContext> iterator = fileReader.iterator();
    while (iterator.hasNext()) {
        final VariantContext context = iterator.next();
        vcfWriter.add(context);
        progress.record(context.getContig(), context.getStart());
    }

    CloserUtil.close(iterator);
    CloserUtil.close(fileReader);
    vcfWriter.close();

    return 0;
}

Source File: pullLargeLengths.java From HMMRATAC with GNU General Public License v3.0

4 votes

/**
 * Read the data and create a list of lengths
 */
private void read(){
	int counter = 0;
	SAMFileReader reader = new SAMFileReader(bam,index);
	ArrayList<Double> temp = new ArrayList<Double>();
	for (int i = 0; i < genome.size();i++){
		String chr = genome.get(i).getChrom();
		int start = genome.get(i).getStart();
		int stop = genome.get(i).getStop();
		CloseableIterator<SAMRecord> iter = reader.query(chr,start,stop,false);
		while (iter.hasNext()){
			SAMRecord record = null;
			try{
				record = iter.next();
			}
			catch(SAMFormatException ex){
				System.out.println("SAM Record is problematic. Has mapQ != 0 for unmapped read. Will continue anyway");
			}
			if(record != null){
			if(!record.getReadUnmappedFlag() && !record.getMateUnmappedFlag() && record.getFirstOfPairFlag()) {
				if (record.getMappingQuality() >= minQ){
					
					if (Math.abs(record.getInferredInsertSize()) > 100 && Math.abs(record.getInferredInsertSize())
							< 1000){
						counter+=1;
						temp.add((double)Math.abs(record.getInferredInsertSize()));
					}
				}
			}
			}
		}
		iter.close();
	}
	reader.close();
	lengths = new double[counter];
	for (int i = 0;i < temp.size();i++){
		if (temp.get(i) > 100){
			lengths[i] = temp.get(i);
		}
	}
	
}

Source File: ImputationChrXTest.java From imputationserver with GNU Affero General Public License v3.0

4 votes

@Test
public void testPipelineChrXWithEaglePhasingOnly() throws IOException, ZipException {
	
	if (!new File(
			"test-data/configs/hapmap-chrX-hg38/ref-panels/ALL.X.nonPAR.phase1_v3.snps_indels_svs.genotypes.all.noSingleton.recode.hg38.bcf")
					.exists()) {
		System.out.println("chrX bcf nonPAR file not available");
		return;
	}


	String configFolder = "test-data/configs/hapmap-chrX";
	String inputFolder = "test-data/data/chrX-unphased";

	// create workflow context
	WorkflowTestContext context = buildContext(inputFolder, "phase1");
	
	context.setInput("mode", "phasing");

	// run qc to create chunkfile
	QcStatisticsMock qcStats = new QcStatisticsMock(configFolder);
	boolean result = run(context, qcStats);

	assertTrue(result);

	// add panel to hdfs
	importRefPanel(FileUtil.path(configFolder, "ref-panels"));
	// importMinimacMap("test-data/B38_MAP_FILE.map");
	importBinaries("files/bin");

	// run imputation
	ImputationMinimac3Mock imputation = new ImputationMinimac3Mock(configFolder);
	result = run(context, imputation);
	assertTrue(result);

	// run export
	CompressionEncryptionMock export = new CompressionEncryptionMock("files");
	result = run(context, export);
	assertTrue(result);

	ZipFile zipFile = new ZipFile("test-data/tmp/local/chr_X.zip", PASSWORD.toCharArray());
	zipFile.extractAll("test-data/tmp");

	VcfFile vcfFile = VcfFileUtil.load("test-data/tmp/chrX.phased.vcf.gz", 100000000, false);
	
	assertEquals(true, vcfFile.isPhased());
	
	VCFFileReader vcfReader = new VCFFileReader(new File(vcfFile.getVcfFilename()), false);
	
	CloseableIterator<VariantContext> it = vcfReader.iterator();

	while (it.hasNext()) {

		VariantContext line = it.next();

		if (line.getStart() == 44322058) {
			assertEquals("A", line.getGenotype("HG00096").getGenotypeString());
			System.out.println(line.getGenotype("HG00097").getGenotypeString());
			assertEquals("A|A", line.getGenotype("HG00097").getGenotypeString());
		}
	}
	
	vcfReader.close();

	FileUtil.deleteDirectory("test-data/tmp");

}

Source File: BamTagOfTagCounts.java From Drop-seq with MIT License

4 votes

public TagOfTagResults<String,String> getResults (final File inputBAM, final String primaryTag, final String secondaryTag, final boolean filterPCRDuplicates, final Integer readQuality) {
	TagOfTagResults<String,String> result = new TagOfTagResults<>();
	SamReader reader = SamReaderFactory.makeDefault().open(inputBAM);
	CloseableIterator<SAMRecord> iter = CustomBAMIterators.getReadsInTagOrder(reader, primaryTag);
	CloserUtil.close(reader);
	String currentTag="";
	Set<String> otherTagCollection=new HashSet<>();

	ProgressLogger progress = new ProgressLogger(log);

	while (iter.hasNext()) {
		SAMRecord r = iter.next();
		progress.record(r);
		// skip reads that don't pass filters.
		boolean discardResult=(filterPCRDuplicates && r.getDuplicateReadFlag()) || r.getMappingQuality()<readQuality || r.isSecondaryOrSupplementary();
		// short circuit if read is below the quality to be considered.
		if (discardResult) continue;

		Object d = r.getAttribute(secondaryTag);
		// short circuit if there's no tag for this read.
		if (d==null) continue;

		String data=null;

		if (d instanceof String)
			data=r.getStringAttribute(secondaryTag);
		else if (d instanceof Integer)
			data=Integer.toString(r.getIntegerAttribute(secondaryTag));


		String newTag = r.getStringAttribute(primaryTag);
		if (newTag==null)
			newTag="";

		// if you see a new tag.
		if (!currentTag.equals(newTag)) {
			// write out tag results, if any.
			if (!currentTag.equals("") && otherTagCollection.size()>0)
				result.addEntries(currentTag, otherTagCollection);
			currentTag=newTag;
			otherTagCollection.clear();
			if (!discardResult) otherTagCollection=addTagToCollection (data,otherTagCollection);

		} else // gather stats
		if (!discardResult) otherTagCollection=addTagToCollection (data,otherTagCollection);
	}
	if (otherTagCollection.size()>0)
		result.addEntries(currentTag, otherTagCollection);

	return (result);
}

Source File: SomaticLocusCaller.java From abra2 with MIT License

4 votes

private Counts getCounts(SamReader reader, LocusInfo locus) {
		
		int depth = 0;
		int altCount = 0;
		int refCount = 0;
		
		CloseableIterator<SAMRecord> iter = reader.queryOverlapping(locus.chromosome, locus.posStart, locus.posStop);
		while (iter.hasNext()) {
			SAMRecord read = iter.next();
			if (!read.getDuplicateReadFlag()) {
				depth += 1;
				
				Object[] baseAndQual = getBaseAndQualAtPosition(read, locus.posStart);
				Character base = (Character) baseAndQual[0];
				int baseQual = (Integer) baseAndQual[1];
				Character refBase = c2r.getSequence(locus.chromosome, locus.posStart, 1).charAt(0);
				
				// Override input with actual reference
//				locus.ref = new String(new char[] { refBase });
				locus.actualRef = new String(new char[] { refBase });
				
				if (locus.isIndel()) {
					if (hasIndel(read, locus)) {
						altCount += 1;
					} else if (!base.equals('N') && base.equals(refBase)) {
						refCount += 1;
					}
				} else if (baseQual >= minBaseQual) {
					
					if (!base.equals('N') && base.equals(refBase)) {
						refCount += 1;
					} else if (base.equals(locus.alt.charAt(0))) {
						altCount += 1;
					}
				}
			}
		}
		
		iter.close();
		
		return new Counts(refCount, altCount, depth);
	}

Source File: BamToDetailsConversion.java From chipster with MIT License

4 votes

/**
 * Find reads in a given range.
 * 
 * <p>
 * TODO add cigar to the list of returned values
 * <p>
 * TODO add pair information to the list of returned values
 * 
 * @param request
 * @return
 * @throws InterruptedException 
 */
@Override
protected void processDataRequest(DataRequest request) throws InterruptedException {
	
	// Read the given region
	CloseableIterator<SAMRecord> iterator = dataSource.query(request.start.chr, request.start.bp.intValue(), request.end.bp.intValue());
	
	// Produce results
	while (iterator.hasNext()) {

		List<Feature> responseList = new LinkedList<Feature>();

		// Split results into chunks
		for (int c = 0; c < RESULT_CHUNK_SIZE && iterator.hasNext(); c++) {
			SAMRecord record = iterator.next();

			// Region for this read
			Region recordRegion = new Region((long) record.getAlignmentStart(), (long) record.getAlignmentEnd(), request.start.chr);

			// Values for this read
			LinkedHashMap<DataType, Object> values = new LinkedHashMap<DataType, Object>();

			Feature read = new Feature(recordRegion, values);

			if (request.getRequestedContents().contains(DataType.ID)) {
				values.put(DataType.ID, record.getReadName());
			}

			if (request.getRequestedContents().contains(DataType.STRAND)) {
				values.put(DataType.STRAND, BamUtils.getStrand(record, coverageType));					
			}

			if (request.getRequestedContents().contains(DataType.QUALITY)) {
				values.put(DataType.QUALITY, record.getBaseQualityString());
			}

			if (request.getRequestedContents().contains(DataType.CIGAR)) {
				Cigar cigar = new Cigar(read, record.getCigar());
				values.put(DataType.CIGAR, cigar);
			}

			// TODO Deal with "=" and "N" in read string
			if (request.getRequestedContents().contains(DataType.SEQUENCE)) {
				String seq = record.getReadString();
				values.put(DataType.SEQUENCE, seq);
			}

			if (request.getRequestedContents().contains(DataType.MATE_POSITION)) {
				
				BpCoord mate = new BpCoord((Long)(long)record.getMateAlignmentStart(),
						new Chromosome(record.getMateReferenceName()));
				
				values.put(DataType.MATE_POSITION, mate);
			}
			
			if (request.getRequestedContents().contains(DataType.BAM_TAG_NH)) {
				Object ng = record.getAttribute("NH");
				if (ng != null) {
					values.put(DataType.BAM_TAG_NH, (Integer)record.getAttribute("NH"));
				}
			}
			
			/*
			 * NOTE! RegionContents created from the same read area has to be equal in methods equals, hash and compareTo. Primary types
			 * should be ok, but objects (including tables) has to be handled in those methods separately. Otherwise tracks keep adding
			 * the same reads to their read sets again and again.
			 */
			responseList.add(read);
		}

		// Send result			
		super.createDataResult(new DataResult(request.getStatus(), responseList));			
	}

	// We are done
	iterator.close();
}

Source File: MergePedIntoVcf.java From picard with MIT License

4 votes

/**
 * Writes out the VariantContext objects in the order presented to the supplied output file
 * in VCF format.
 */
private void writeVcf(final CloseableIterator<VariantContext> variants,
                      final File output,
                      final SAMSequenceDictionary dict,
                      final VCFHeader vcfHeader, ZCallPedFile zCallPedFile) {

    try(VariantContextWriter writer = new VariantContextWriterBuilder()
            .setOutputFile(output)
            .setReferenceDictionary(dict)
            .setOptions(VariantContextWriterBuilder.DEFAULT_OPTIONS)
            .build()) {
        writer.writeHeader(vcfHeader);
        while (variants.hasNext()) {
            final VariantContext context = variants.next();

            final VariantContextBuilder builder = new VariantContextBuilder(context);
            if (zCallThresholds.containsKey(context.getID())) {
                final String[] zThresh = zCallThresholds.get(context.getID());
                builder.attribute(InfiniumVcfFields.ZTHRESH_X, zThresh[0]);
                builder.attribute(InfiniumVcfFields.ZTHRESH_Y, zThresh[1]);
            }
            final Genotype originalGenotype = context.getGenotype(0);
            final Map<String, Object> newAttributes = originalGenotype.getExtendedAttributes();
            final VCFEncoder vcfEncoder = new VCFEncoder(vcfHeader, false, false);
            final Map<Allele, String> alleleMap = vcfEncoder.buildAlleleStrings(context);

            final String zCallAlleles = zCallPedFile.getAlleles(context.getID());
            if (zCallAlleles == null) {
                throw new PicardException("No zCall alleles found for snp " + context.getID());
            }
            final List<Allele> zCallPedFileAlleles = buildNewAllelesFromZCall(zCallAlleles, context.getAttributes());
            newAttributes.put(InfiniumVcfFields.GTA, alleleMap.get(originalGenotype.getAllele(0)) + UNPHASED + alleleMap.get(originalGenotype.getAllele(1)));
            newAttributes.put(InfiniumVcfFields.GTZ, alleleMap.get(zCallPedFileAlleles.get(0)) + UNPHASED + alleleMap.get(zCallPedFileAlleles.get(1)));

            final Genotype newGenotype = GenotypeBuilder.create(originalGenotype.getSampleName(), zCallPedFileAlleles,
                    newAttributes);
            builder.genotypes(newGenotype);
            logger.record("0", 0);
            // AC, AF, and AN are recalculated here
            VariantContextUtils.calculateChromosomeCounts(builder, false);
            final VariantContext newContext = builder.make();
            writer.add(newContext);
        }
    }
}

Source File: ViewSam.java From picard with MIT License

4 votes

/**
 * This is factored out of doWork only for unit testing.
 */
int writeSamText(PrintStream printStream) {
    try {
        final CloseableIterator<SAMRecord> samRecordsIterator;
        final SamReader samReader = SamReaderFactory.makeDefault()
                .referenceSequence(REFERENCE_SEQUENCE)
                .open(SamInputResource.of(INPUT));

        // if we are only using the header or we aren't using intervals, then use the reader as the iterator.
        // otherwise use the SamRecordIntervalIteratorFactory to make an interval-ed iterator
        if (HEADER_ONLY || INTERVAL_LIST == null) {
            samRecordsIterator = samReader.iterator();
        } else {
            IOUtil.assertFileIsReadable(INTERVAL_LIST);

            final List<Interval> intervals = IntervalList.fromFile(INTERVAL_LIST).uniqued().getIntervals();
            samRecordsIterator = new SamRecordIntervalIteratorFactory().makeSamRecordIntervalIterator(samReader, intervals, samReader.hasIndex());
        }
        final AsciiWriter writer = new AsciiWriter(printStream);
        final SAMFileHeader header = samReader.getFileHeader();
        if (!RECORDS_ONLY) {
            if (header.getTextHeader() != null) {
                writer.write(header.getTextHeader());
            } else {
                // Headers that are too large are not retained as text, so need to regenerate text
                new SAMTextHeaderCodec().encode(writer, header, true);
            }
        }
        if (!HEADER_ONLY) {
            while (samRecordsIterator.hasNext()) {
                final SAMRecord rec = samRecordsIterator.next();

                if (printStream.checkError()) {
                    return 1;
                }

                if (this.ALIGNMENT_STATUS == AlignmentStatus.Aligned && rec.getReadUnmappedFlag()) continue;
                if (this.ALIGNMENT_STATUS == AlignmentStatus.Unaligned && !rec.getReadUnmappedFlag()) continue;

                if (this.PF_STATUS == PfStatus.PF && rec.getReadFailsVendorQualityCheckFlag()) continue;
                if (this.PF_STATUS == PfStatus.NonPF && !rec.getReadFailsVendorQualityCheckFlag()) continue;
                writer.write(rec.getSAMString());
            }
        }
        writer.flush();
        if (printStream.checkError()) {
            return 1;
        }
        CloserUtil.close(writer);
        CloserUtil.close(samRecordsIterator);
        return 0;
    } catch (IOException e) {
        throw new PicardException("Exception writing SAM text", e);
    }
}

Source File: MakeSitesOnlyVcf.java From picard with MIT License

4 votes

@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);

    final VCFFileReader reader = new VCFFileReader(INPUT, false);
    final VCFHeader inputVcfHeader = new VCFHeader(reader.getFileHeader().getMetaDataInInputOrder());
    final SAMSequenceDictionary sequenceDictionary = inputVcfHeader.getSequenceDictionary();

    if (CREATE_INDEX && sequenceDictionary == null) {
        throw new PicardException("A sequence dictionary must be available (either through the input file or by setting it explicitly) when creating indexed output.");
    }

    final ProgressLogger progress = new ProgressLogger(Log.getInstance(MakeSitesOnlyVcf.class), 10000);

    // Setup the site-only file writer
    final VariantContextWriterBuilder builder = new VariantContextWriterBuilder()
            .setOutputFile(OUTPUT)
            .setReferenceDictionary(sequenceDictionary);
    if (CREATE_INDEX)
        builder.setOption(Options.INDEX_ON_THE_FLY);
    else
        builder.unsetOption(Options.INDEX_ON_THE_FLY);
    final VariantContextWriter writer = builder.build();

    final VCFHeader header = new VCFHeader(inputVcfHeader.getMetaDataInInputOrder(), SAMPLE);
    writer.writeHeader(header);

    // Go through the input, strip the records and write them to the output
    final CloseableIterator<VariantContext> iterator = reader.iterator();
    while (iterator.hasNext()) {
        final VariantContext full = iterator.next();
        final VariantContext site = subsetToSamplesWithOriginalAnnotations(full, SAMPLE);
        writer.add(site);
        progress.record(site.getContig(), site.getStart());
    }

    CloserUtil.close(iterator);
    CloserUtil.close(reader);
    writer.close();

    return 0;
}

Java Code Examples for htsjdk.samtools.util.CloseableIterator#hasNext()