htsjdk.tribble.readers.LineIterator Java Examples
The following examples show how to use
htsjdk.tribble.readers.LineIterator.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: VariantHotspotFile.java From hmftools with GNU General Public License v3.0 | 6 votes |
@NotNull public static ListMultimap<Chromosome, VariantHotspot> readFromVCF(@NotNull final String fileName) throws IOException { ListMultimap<Chromosome, VariantHotspot> result = ArrayListMultimap.create(); try (final AbstractFeatureReader<VariantContext, LineIterator> reader = AbstractFeatureReader.getFeatureReader(fileName, new VCFCodec(), false)) { for (VariantContext variantContext : reader.iterator()) { if (HumanChromosome.contains(variantContext.getContig())) { result.put(HumanChromosome.fromString(variantContext.getContig()), fromVariantContext(variantContext)); } } } return result; }
Example #2
Source File: TableCodec.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Override public List<String> readActualHeader(final LineIterator reader) { boolean isFirst = true; while (reader.hasNext()) { final String line = reader.peek(); // Peek to avoid reading non-header data if ( isFirst && ! line.startsWith(COMMENT_DELIMITER) && headerDelimiter != null && ! line.startsWith(headerDelimiter) ) { throw new UserException.MalformedFile("TableCodec file does not have a header"); } isFirst &= line.startsWith(COMMENT_DELIMITER); if (headerDelimiter == null || line.startsWith(headerDelimiter)) { reader.next(); // "Commit" the peek if (!header.isEmpty()) { throw new UserException.MalformedFile("Input table file seems to have two header lines. The second is = " + line); } final String[] spl = line.split(delimiter_regex); Collections.addAll(header, spl); return header; } else if (line.startsWith(COMMENT_DELIMITER)) { reader.next(); // "Commit" the peek } else { break; } } return header; }
Example #3
Source File: RefSeqCodec.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Override public Feature decodeLoc(final LineIterator lineIterator) { final String line = lineIterator.next(); if (line.startsWith(COMMENT_LINE_CHARACTER)){ return null; } final String fields[] = line.split(LINE_DELIMITER); if (fields.length < MINIMUM_LINE_FIELD_COUNT){ throw new TribbleException("RefSeq (decodeLoc) : Unable to parse line -> " + line + ", we expected at least 16 columns, we saw " + fields.length); } final String contig_name = fields[CONTIG_INDEX]; try { return new RefSeqFeature(new SimpleInterval(contig_name, Integer.parseInt(fields[INTERVAL_LEFT_BOUND_INDEX])+1, Integer.parseInt(fields[INTERVAL_RIGHT_BOUND_INDEX]))); //TODO maybe except for malformed simple intervals? Genome locs had that } catch ( NumberFormatException e ) { throw new UserException.MalformedFile("Could not parse location from line: " + line); } }
Example #4
Source File: GencodeGtfCodec.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Override List<String> readActualHeader(final LineIterator reader) { // Clear our version number too: versionNumber = -1; // Read in the header lines: ingestHeaderLines(reader); // Validate our header: validateHeader(header, true); // Set our version number: setVersionNumber(); // Set our line number to be the line of the first actual Feature: currentLineNum = header.size() + 1; return header; }
Example #5
Source File: EnsemblGtfCodec.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Override List<String> readActualHeader(final LineIterator reader) { // Read in the header lines: ingestHeaderLines(reader); // Validate our header: validateHeader(header, true); // Set our line number to be the line of the first actual Feature: currentLineNum = header.size() + 1; // Set up our version number: populateVersionNumber(); return header; }
Example #6
Source File: TableCodecUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Test public void testDecodeHeader2(){ TableCodec tc = new TableCodec(); final String str2= "1:1 1 2 3"; LineReader reader= makeReader(asList("HEADER a b c", str2)); LineIterator li= new LineIteratorImpl(reader); List<String> hd = tc.readActualHeader(li); Assert.assertEquals(hd, asList("HEADER", "a", "b", "c")); final TableFeature decode = tc.decode(str2); Assert.assertEquals(decode.get("a"), "1"); Assert.assertEquals(decode.get("b"), "2"); Assert.assertEquals(decode.get("c"), "3"); Assert.assertEquals(decode.getLocation().getContig(), "1"); Assert.assertEquals(decode.getContig(), "1"); Assert.assertEquals(decode.getLocation().getStart(), 1); Assert.assertEquals(decode.getLocation().getEnd(), 1); }
Example #7
Source File: GenotypeGVCFsIntegrationTest.java From gatk-protected with BSD 3-Clause "New" or "Revised" License | 6 votes |
/** * Returns a list of VariantContext records from a VCF file * * @param vcfFile VCF file * @return list of VariantContext records * @throws IOException if the file does not exist or can not be opened */ private static List<VariantContext> getVariantContexts(final File vcfFile) throws IOException { final VCFCodec codec = new VCFCodec(); final FileInputStream s = new FileInputStream(vcfFile); final LineIterator lineIteratorVCF = codec.makeSourceFromStream(new PositionalBufferedStream(s)); codec.readHeader(lineIteratorVCF); final List<VariantContext> VCs = new ArrayList<>(); while (lineIteratorVCF.hasNext()) { final String line = lineIteratorVCF.next(); Assert.assertFalse(line == null); VCs.add(codec.decode(line)); } return VCs; }
Example #8
Source File: GencodeFuncotationFactoryUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
private List<Feature> getCntn4Features() throws IOException { final GencodeGtfCodec gencodeGtfCodec = new GencodeGtfCodec(); Assert.assertTrue(gencodeGtfCodec.canDecode(CNTN4_GENCODE_ANNOTATIONS_FILE_NAME)); final List<Feature> gencodeFeatures = new ArrayList<>(); try (BufferedInputStream bufferedInputStream = new BufferedInputStream( new FileInputStream(CNTN4_GENCODE_ANNOTATIONS_FILE_NAME) ) ) { // Get the line iterator: final LineIterator lineIterator = gencodeGtfCodec.makeSourceFromStream(bufferedInputStream); // Get the header (required for the read to work correctly): gencodeGtfCodec.readHeader(lineIterator); while (lineIterator.hasNext()) { gencodeFeatures.add(gencodeGtfCodec.decode(lineIterator)); } Assert.assertTrue(gencodeFeatures.size() > 1); } return gencodeFeatures; }
Example #9
Source File: AmberSiteFactory.java From hmftools with GNU General Public License v3.0 | 6 votes |
@NotNull public static ListMultimap<Chromosome, AmberSite> sites(@NotNull final String vcfFile) throws IOException { final ListMultimap<Chromosome, AmberSite> result = ArrayListMultimap.create(); try (final AbstractFeatureReader<VariantContext, LineIterator> reader = getFeatureReader(vcfFile, new VCFCodec(), false)) { for (VariantContext variant : reader.iterator()) { if (variant.isNotFiltered()) { if (HumanChromosome.contains(variant.getContig())) { HumanChromosome chromosome = HumanChromosome.fromString(variant.getContig()); result.put(chromosome, ImmutableAmberSite.builder() .chromosome(variant.getContig()) .position(variant.getStart()) .ref(variant.getReference().getBaseString()) .alt(variant.getAlternateAllele(0).getBaseString()) .snpCheck(variant.hasAttribute("SNPCHECK")) .build()); } } } } return result; }
Example #10
Source File: GenomicsDBImportIntegrationTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Test public void testPreserveContigOrderingInHeader() throws IOException { final String workspace = createTempDir("testPreserveContigOrderingInHeader-").getAbsolutePath() + "/workspace"; ArrayList<SimpleInterval> intervals = new ArrayList<SimpleInterval>(Arrays.asList(new SimpleInterval("chr20", 17959479, 17959479))); writeToGenomicsDB(Arrays.asList(GENOMICSDB_TEST_DIR + "testHeaderContigLineSorting1.g.vcf", GENOMICSDB_TEST_DIR + "testHeaderContigLineSorting2.g.vcf"), intervals, workspace, 0, false, 0, 1); try ( final FeatureReader<VariantContext> genomicsDBFeatureReader = getGenomicsDBFeatureReader(workspace, b38_reference_20_21); final AbstractFeatureReader<VariantContext, LineIterator> inputGVCFReader = AbstractFeatureReader.getFeatureReader(GENOMICSDB_TEST_DIR + "testHeaderContigLineSorting1.g.vcf", new VCFCodec(), true); ) { final SAMSequenceDictionary dictionaryFromGenomicsDB = ((VCFHeader)genomicsDBFeatureReader.getHeader()).getSequenceDictionary(); final SAMSequenceDictionary dictionaryFromInputGVCF = ((VCFHeader)inputGVCFReader.getHeader()).getSequenceDictionary(); Assert.assertEquals(dictionaryFromGenomicsDB, dictionaryFromInputGVCF, "Sequence dictionary from GenomicsDB does not match original sequence dictionary from input GVCF"); } }
Example #11
Source File: TableCodecUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Test public void testDecodeComment(){ TableCodec tc = new TableCodec(); LineReader reader= makeReader(asList("#HEADER a b c", "HEADER d e f")); LineIterator li= new LineIteratorImpl(reader); List<String> hd = tc.readActualHeader(li); Assert.assertEquals(hd, asList("HEADER", "d", "e", "f")); }
Example #12
Source File: TableCodecUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Test public void testTwoHeaders(){ TableCodec tc = new TableCodec(); LineReader reader= makeReader(asList("HEADER a b c", "HEADER d e f")); LineIterator li= new LineIteratorImpl(reader); final List<String> strings = tc.readActualHeader(li); Assert.assertEquals(strings, asList("HEADER", "a", "b", "c")); }
Example #13
Source File: TableCodecUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Test public void testDecodeOnlyComments(){ TableCodec tc = new TableCodec(); LineReader reader= makeReader(asList("#HEADER a b c", "#HEADER d e f")); LineIterator li= new LineIteratorImpl(reader); final List<String> strings = tc.readActualHeader(li); Assert.assertEquals(strings, emptyList()); }
Example #14
Source File: EnsemblGtfCodecUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Test(dataProvider = "decodeTestProvider") public void testDecode( final String filePath, final List<GencodeGtfFeature> expected, final String expectedUcscVersion) throws IOException { final EnsemblGtfCodec ensemblGtfCodec = new EnsemblGtfCodec(); try (final BufferedInputStream bufferedInputStream = new BufferedInputStream( new FileInputStream(filePath) ) ) { // Get the line iterator: final LineIterator lineIterator = ensemblGtfCodec.makeSourceFromStream(bufferedInputStream); // Get the header (required for the read to work correctly): ensemblGtfCodec.readHeader(lineIterator); // Setup our expected data iterator: final Iterator<GencodeGtfFeature> expectedIterator = expected.iterator(); // Now read our features and make sure they're what we expect: // NOTE: We only decode the number of features expect to see. int numDecoded = 0; while ( lineIterator.hasNext() && (numDecoded < expected.size()) ) { final GencodeGtfFeature feature = ensemblGtfCodec.decode(lineIterator); Assert.assertTrue(expectedIterator.hasNext()); for ( final GencodeGtfFeature subFeature : feature.getAllFeatures() ) { Assert.assertEquals(subFeature.getUcscGenomeVersion(), expectedUcscVersion); } final GencodeGtfFeature expectedFeature = expectedIterator.next(); // Big equals check: Assert.assertEquals(feature, expectedFeature); ++numDecoded; } } }
Example #15
Source File: TableCodecUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Test(expectedExceptions = UserException.MalformedFile.class) public void testDecodeFailsNoHeader(){ TableCodec tc = new TableCodec(); LineReader reader= makeReader(asList("1:1 1 2 3")); LineIterator li= new LineIteratorImpl(reader); tc.readActualHeader(li); }
Example #16
Source File: TableCodecUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Test public void testDecodeHeader(){ TableCodec tc = new TableCodec(); LineReader reader= makeReader(asList("HEADER a b c")); LineIterator li= new LineIteratorImpl(reader); List<String> hd = tc.readActualHeader(li); Assert.assertEquals(hd, asList("HEADER", "a", "b", "c")); }
Example #17
Source File: LineIteratorReaderUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Test(dataProvider="testParameter") public void testRead(@SuppressWarnings("unused") final int textSize, @SuppressWarnings("unused") final int lineSize, final int bufferSize, final String text) { final LineIterator lineIterator = new TestLineIterator(text); final LineIteratorReader reader = new LineIteratorReader(lineIterator); final StringBuilder sb = new StringBuilder(); final char[] buffer = new char[bufferSize]; int readResult; while ((readResult = reader.read(buffer, 0, bufferSize)) >= 0) { sb.append(buffer, 0, readResult); } final String actualText = sb.toString(); Assert.assertEquals(actualText, text); }
Example #18
Source File: LongISLNDReadAlignmentMap.java From varsim with BSD 2-Clause "Simplified" License | 5 votes |
public LongISLNDReadAlignmentMap(final Collection<String> readAlignmentMapFiles) throws IOException { for (final String readAlignmentMapFileName : readAlignmentMapFiles) { log.info("Reading in read map from " + readAlignmentMapFileName); final AbstractFeatureReader<BEDFeature, LineIterator> featureReader = AbstractFeatureReader.getFeatureReader(readAlignmentMapFileName, new BEDCodec(), false); try { final CloseableTribbleIterator<BEDFeature> featureIterator = featureReader.iterator(); while (featureIterator.hasNext()) { final BEDFeature feature = featureIterator.next(); readAlignmentMap.put(feature.getName(), new LongISLNDReadMapRecord(feature).toReadMapRecord()); } } finally { featureReader.close(); } } }
Example #19
Source File: SimpleReference.java From varsim with BSD 2-Clause "Simplified" License | 5 votes |
public long getNumNonNBases(final File regions) throws IOException { loadAllSequences(); long count = 0; final FeatureCodec<BEDFeature, LineIterator> bedCodec = new BEDCodec(BEDCodec.StartOffset.ONE); final LineIterator lineIterator = new AsciiLineReaderIterator(new AsciiLineReader(new FileInputStream(regions))); while (lineIterator.hasNext()) { final BEDFeature bedFeature = bedCodec.decode(lineIterator); count += data.get(new ChrString(bedFeature.getContig())).getNumNonNBases(bedFeature.getStart(), bedFeature.getEnd()); } return count; }
Example #20
Source File: SAMPileupCodec.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
/** * For fast indexing */ @Override public Feature decodeLoc(final LineIterator lineIterator) throws IOException { String[] tokens = SPLIT_PATTERN.split(lineIterator.next(), -1); final int pos = parseInteger(tokens[1], "position"); return new SimpleFeature(tokens[0], pos, pos); }
Example #21
Source File: AnnotatedIntervalCodec.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Override public AnnotatedIntervalHeader readActualHeader(final LineIterator reader) { xsvLocatableTableCodec.readActualHeader(reader); header = new AnnotatedIntervalHeader(xsvLocatableTableCodec.getContigColumn(), xsvLocatableTableCodec.getStartColumn(), xsvLocatableTableCodec.getEndColumn(), xsvLocatableTableCodec.getHeaderWithoutLocationColumns(), xsvLocatableTableCodec.renderSamFileHeader()); return header; }
Example #22
Source File: StructuralVariantFileLoader.java From hmftools with GNU General Public License v3.0 | 5 votes |
@NotNull public static List<StructuralVariant> fromFile(@NotNull String vcfFileLocation, @NotNull VariantContextFilter filter) throws IOException { final StructuralVariantFactory factory = new StructuralVariantFactory(filter); try (final AbstractFeatureReader<VariantContext, LineIterator> reader = AbstractFeatureReader.getFeatureReader(vcfFileLocation, new VCFCodec(), false)) { reader.iterator().forEach(factory::addVariantContext); } return factory.results(); }
Example #23
Source File: SomaticVariantFactory.java From hmftools with GNU General Public License v3.0 | 5 votes |
public void fromVCFFile(@NotNull final String tumor, @Nullable final String reference, @Nullable final String rna, @NotNull final String vcfFile, Consumer<SomaticVariant> consumer) throws IOException { final List<VariantContext> variants = Lists.newArrayList(); try (final AbstractFeatureReader<VariantContext, LineIterator> reader = getFeatureReader(vcfFile, new VCFCodec(), false)) { final VCFHeader header = (VCFHeader) reader.getHeader(); if (!sampleInFile(tumor, header)) { throw new IllegalArgumentException("Sample " + tumor + " not found in vcf file " + vcfFile); } if (reference != null && !sampleInFile(reference, header)) { throw new IllegalArgumentException("Sample " + reference + " not found in vcf file " + vcfFile); } if (rna != null && !sampleInFile(rna, header)) { throw new IllegalArgumentException("Sample " + rna + " not found in vcf file " + vcfFile); } if (!header.hasFormatLine("AD")) { throw new IllegalArgumentException("Allelic depths is a required format field in vcf file " + vcfFile); } for (VariantContext variant : reader.iterator()) { if (filter.test(variant)) { createVariant(tumor, reference, rna, variant).ifPresent(consumer); } } } }
Example #24
Source File: SimpleCountCodec.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Override public SampleLocatableMetadata readActualHeader(final LineIterator reader) { final List<String> samHeaderLines = new ArrayList<>(SAM_HEADER_LINES_INITIAL_CAPACITY); //we check that the SAM header lines and the column header line are present in the correct order, then return the mandatory column header boolean isSAMHeaderPresent = false; while (reader.hasNext()) { final String line = reader.peek(); if (line.startsWith(CopyNumberFormatsUtils.COMMENT_PREFIX)) { isSAMHeaderPresent = true; samHeaderLines.add(line); reader.next(); } else { if (!isSAMHeaderPresent) { throw new UserException.MalformedFile("SAM header lines must be at the beginning of the file."); } else if (!line.startsWith(COLUMN_HEADER_STRING)) { throw new UserException.MalformedFile("File does not have a column header."); } else { //we just peeked at the column header line, so we need to advance past it reader.next(); break; } } } final SAMFileHeader samFileHeader = new SAMTextHeaderCodec() .decode(BufferedLineReader.fromString(StringUtils.join(samHeaderLines, System.lineSeparator())), null); return MetadataUtils.fromHeader(samFileHeader, Metadata.Type.SAMPLE_LOCATABLE); }
Example #25
Source File: BEDFileLoader.java From hmftools with GNU General Public License v3.0 | 5 votes |
@NotNull public static SortedSetMultimap<String, GenomeRegion> fromBedFile(@NotNull String bedFile) throws IOException { final SortedSetMultimap<String, GenomeRegion> regionMap = TreeMultimap.create(); String prevChromosome = null; GenomeRegion prevRegion = null; try (final AbstractFeatureReader<BEDFeature, LineIterator> reader = getFeatureReader(bedFile, new BEDCodec(), false)) { for (final BEDFeature bedFeature : reader.iterator()) { final String chromosome = bedFeature.getContig(); final long start = bedFeature.getStart(); final long end = bedFeature.getEnd(); if (end < start) { LOGGER.warn("Invalid genome region found in chromosome {}: start={}, end={}", chromosome, start, end); } else { final GenomeRegion region = GenomeRegions.create(chromosome, start, end); if (prevRegion != null && chromosome.equals(prevChromosome) && prevRegion.end() >= start) { LOGGER.warn("BED file is not sorted, please fix! Current={}, Previous={}", region, prevRegion); } else { regionMap.put(chromosome, region); prevChromosome = chromosome; prevRegion = region; } } } } return regionMap; }
Example #26
Source File: GermlineVcfReader.java From hmftools with GNU General Public License v3.0 | 5 votes |
private void processVcf(final String vcfFile) { try { LOGGER.info("processing germline VCF({})", vcfFile); mSampleGermlineSVs.clear(); mSvFactory = new StructuralVariantFactory(new AlwaysPassFilter()); final AbstractFeatureReader<VariantContext, LineIterator> reader = AbstractFeatureReader.getFeatureReader( vcfFile, new VCFCodec(), false); reader.iterator().forEach(x -> processVariant(x)); if(mSampleGermlineSVs.isEmpty()) return; if (mConfig.LinkByAssembly) annotateAssembledLinks(mSvAssemblyData); if(mConfig.CheckDisruptions) { final String sampleId = mSampleGermlineSVs.get(0).SampleId; mGeneImpact.findDisruptiveVariants(sampleId, mSampleGermlineSVs); } writeSVs(); } catch(IOException e) { LOGGER.error("error reading vcf({}): {}", vcfFile, e.toString()); } }
Example #27
Source File: MakeVcfSampleNameMap.java From picard with MIT License | 5 votes |
private static AbstractFeatureReader<VariantContext, LineIterator> getReaderFromPath(final Path variantPath) { final String variantURI = variantPath.toAbsolutePath().toUri().toString(); try { return AbstractFeatureReader.getFeatureReader(variantURI, null, new VCFCodec(), false, Function.identity(), Function.identity()); } catch (final TribbleException e) { throw new PicardException("Failed to create reader from " + variantURI, e); } }
Example #28
Source File: MakeVcfSampleNameMap.java From picard with MIT License | 5 votes |
private static VCFHeader getHeaderFromPath(final Path variantPath) { try(final AbstractFeatureReader<VariantContext, LineIterator> reader = getReaderFromPath(variantPath)) { final VCFHeader header = (VCFHeader) reader.getHeader(); if (header == null) { throw new PicardException("Null header found in " + variantPath.toUri() + "."); } return header; } catch (final IOException e) { throw new PicardException("Error while reading VCF header from " + variantPath.toUri(), e); } }
Example #29
Source File: LineIteratorReaderUnitTest.java From gatk-protected with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Test(dataProvider="testParameter") public void testRead(@SuppressWarnings("unused") final int textSize, @SuppressWarnings("unused") final int lineSize, final int bufferSize, final String text) { final LineIterator lineIterator = new TestLineIterator(text); final LineIteratorReader reader = new LineIteratorReader(lineIterator); final StringBuilder sb = new StringBuilder(); final char[] buffer = new char[bufferSize]; int readResult; while ((readResult = reader.read(buffer, 0, bufferSize)) >= 0) { sb.append(buffer, 0, readResult); } final String actualText = sb.toString(); Assert.assertEquals(actualText, text); }
Example #30
Source File: TargetCodec.java From gatk-protected with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Override public Object readActualHeader(final LineIterator reader) { sourceReader = new LineIteratorReader(reader); try { sourceTargetTableReader = new TargetTableReader(sourceReader); } catch (final IOException ex) { throw new GATKException("cannot read target table", ex); } return sourceTargetTableReader.columns(); }