htsjdk.tribble.bed.BEDFeature Java Examples
The following examples show how to use
htsjdk.tribble.bed.BEDFeature.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ConvertBedToTargetFile.java From gatk-protected with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Override protected Object doWork() { final FeatureCodec<? extends Feature, ?> codec = FeatureManager.getCodecForFile(inputBedFile); final Class<? extends Feature> featureType = codec.getFeatureType(); if (BEDFeature.class.isAssignableFrom(featureType)) { final FeatureDataSource<? extends BEDFeature> source = new FeatureDataSource<>(inputBedFile); try { final List<Target> targets = StreamSupport.stream(source.spliterator(), false).map(ConvertBedToTargetFile::createTargetFromBEDFeature) .collect(Collectors.toList()); TargetWriter.writeTargetsToFile(outFile, targets); } catch (final TribbleException e) { throw new UserException.BadInput(String.format("'%s' has a .bed extension but does not seem to be a valid BED file.", inputBedFile.getAbsolutePath())); } } else { throw new UserException.BadInput(String.format("'%s' does not seem to be a BED file.", inputBedFile.getAbsolutePath())); } return "SUCCESS"; }
Example #2
Source File: FeatureManagerUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Test public void testHandleRequestForValidFeatureInputIterator() { final ValidFeatureArgumentSource toolInstance = new ValidFeatureArgumentSource(); // Initialize two of the FeatureInput fields as they would be initialized by the argument-parsing // system to simulate a run of the tool with two FeatureInputs. toolInstance.variantContextFeatureInput = new FeatureInput<>(FEATURE_MANAGER_TEST_DIRECTORY + "feature_data_source_test.vcf"); toolInstance.bedListFeatureInput.add(new FeatureInput<>(FEATURE_MANAGER_TEST_DIRECTORY + "minimal_bed_file.bed")); final FeatureManager manager = new FeatureManager(toolInstance); final Iterator<VariantContext> vcIterator = manager.getFeatureIterator(toolInstance.variantContextFeatureInput); final Iterator<BEDFeature> bedIterator = manager.getFeatureIterator(toolInstance.bedListFeatureInput.get(0)); final List<VariantContext> variants = Utils.stream(vcIterator).collect(Collectors.toList()); final List<BEDFeature> beds = Utils.stream(bedIterator).collect(Collectors.toList()); Assert.assertEquals(variants.size(), 26); Assert.assertEquals(variants.get(4).getStart(),280); Assert.assertEquals(beds.size(), 1); }
Example #3
Source File: ExampleFeatureWalker.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Override public void apply(final BEDFeature feature, final ReadsContext readsContext, final ReferenceContext referenceContext, final FeatureContext featureContext) { outputStream.println("Current feature: " + toBedFeatureString(feature)); if ( referenceContext.hasBackingDataSource() ) { printReferenceBases(referenceContext); } if ( readsContext.hasBackingDataSource() ) { printReads(readsContext); } if ( featureContext.hasBackingDataSource() ) { printVariants(featureContext); } }
Example #4
Source File: FeatureManagerUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Test public void testHandleRequestForValidFeatureInputs() { ValidFeatureArgumentSource toolInstance = new ValidFeatureArgumentSource(); // Initialize two of the FeatureInput fields as they would be initialized by the argument-parsing // system to simulate a run of the tool with two FeatureInputs. toolInstance.variantContextFeatureInput = new FeatureInput<>(FEATURE_MANAGER_TEST_DIRECTORY + "feature_data_source_test.vcf"); toolInstance.bedListFeatureInput.add(new FeatureInput<>(FEATURE_MANAGER_TEST_DIRECTORY + "minimal_bed_file.bed")); FeatureManager manager = new FeatureManager(toolInstance); List<VariantContext> vcFeatures = manager.getFeatures(toolInstance.variantContextFeatureInput, new SimpleInterval("1", 1, 2000)); List<BEDFeature> bedFeatures = manager.getFeatures(toolInstance.bedListFeatureInput.get(0), new SimpleInterval("1", 1, 1)); Assert.assertEquals(vcFeatures.size(), 14, "Wrong number of Features returned from VariantContext test Feature file"); Assert.assertEquals(bedFeatures.size(), 1, "Wrong number of Features returned from BED test Feature file"); }
Example #5
Source File: AnnotateIntervals.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
/** * @param interval assumed to have non-zero length */ @Override Double apply(final Locatable interval, final ReferenceContext referenceContext, final FeatureContext featureContext) { double lengthWeightedSum = 0.; final List<BEDFeature> features = featureContext.getValues(trackPath); for (final BEDFeature feature : features) { final double scoreOrNaN = (double) feature.getScore(); final double score = Double.isNaN(scoreOrNaN) ? 1. : scoreOrNaN; // missing or NaN score -> score = 1 lengthWeightedSum += score * CoordMath.getOverlap( feature.getStart(), feature.getEnd() - 1, // zero-based interval.getStart(), interval.getEnd()); // one-based } return lengthWeightedSum / interval.getLengthOnReference(); }
Example #6
Source File: XGBoostEvidenceFilter.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
/** * Calculate fractional overlap of BreakpointEvidence with genome tract data. * Separately sum the number of base pairs in genomeIntervals that overlap evidence (allowing base pairs to * count multiple times if there is overlap in genomeIntervals) then divide by length of evidence. returned * value will be double >= 0, but may be larger than 1 if any genomeIntervals overlap each other. */ private static double getGenomeIntervalsOverlap(final BreakpointEvidence evidence, final FeatureDataSource<BEDFeature> genomeIntervals, final ReadMetadata readMetadata) { final SVInterval location = evidence.getLocation(); final SimpleInterval simpleInterval = new SimpleInterval(readMetadata.getContigName(location.getContig()), location.getStart(), location.getEnd() - 1); // "end - 1" because SimpleIntervals are closed on both ends int overlap = 0; for(final Iterator<BEDFeature> overlapperItr = genomeIntervals.query(simpleInterval); overlapperItr.hasNext();) { final BEDFeature overlapper = overlapperItr.next(); // " + 1" because genome tract data is semi-closed, but BEDFeature is fully closed final int overlapLength = Math.min(simpleInterval.getEnd(), overlapper.getEnd()) + 1 - Math.max(simpleInterval.getStart(), overlapper.getStart()); overlap += overlapLength; } return overlap / (double)simpleInterval.size(); }
Example #7
Source File: BEDFileLoader.java From hmftools with GNU General Public License v3.0 | 5 votes |
@NotNull public static SortedSetMultimap<String, GenomeRegion> fromBedFile(@NotNull String bedFile) throws IOException { final SortedSetMultimap<String, GenomeRegion> regionMap = TreeMultimap.create(); String prevChromosome = null; GenomeRegion prevRegion = null; try (final AbstractFeatureReader<BEDFeature, LineIterator> reader = getFeatureReader(bedFile, new BEDCodec(), false)) { for (final BEDFeature bedFeature : reader.iterator()) { final String chromosome = bedFeature.getContig(); final long start = bedFeature.getStart(); final long end = bedFeature.getEnd(); if (end < start) { LOGGER.warn("Invalid genome region found in chromosome {}: start={}, end={}", chromosome, start, end); } else { final GenomeRegion region = GenomeRegions.create(chromosome, start, end); if (prevRegion != null && chromosome.equals(prevChromosome) && prevRegion.end() >= start) { LOGGER.warn("BED file is not sorted, please fix! Current={}, Previous={}", region, prevRegion); } else { regionMap.put(chromosome, region); prevChromosome = chromosome; prevRegion = region; } } } } return regionMap; }
Example #8
Source File: SimpleReference.java From varsim with BSD 2-Clause "Simplified" License | 5 votes |
public long getNumNonNBases(final File regions) throws IOException { loadAllSequences(); long count = 0; final FeatureCodec<BEDFeature, LineIterator> bedCodec = new BEDCodec(BEDCodec.StartOffset.ONE); final LineIterator lineIterator = new AsciiLineReaderIterator(new AsciiLineReader(new FileInputStream(regions))); while (lineIterator.hasNext()) { final BEDFeature bedFeature = bedCodec.decode(lineIterator); count += data.get(new ChrString(bedFeature.getContig())).getNumNonNBases(bedFeature.getStart(), bedFeature.getEnd()); } return count; }
Example #9
Source File: LongISLNDReadAlignmentMap.java From varsim with BSD 2-Clause "Simplified" License | 5 votes |
public LongISLNDReadAlignmentMap(final Collection<String> readAlignmentMapFiles) throws IOException { for (final String readAlignmentMapFileName : readAlignmentMapFiles) { log.info("Reading in read map from " + readAlignmentMapFileName); final AbstractFeatureReader<BEDFeature, LineIterator> featureReader = AbstractFeatureReader.getFeatureReader(readAlignmentMapFileName, new BEDCodec(), false); try { final CloseableTribbleIterator<BEDFeature> featureIterator = featureReader.iterator(); while (featureIterator.hasNext()) { final BEDFeature feature = featureIterator.next(); readAlignmentMap.put(feature.getName(), new LongISLNDReadMapRecord(feature).toReadMapRecord()); } } finally { featureReader.close(); } } }
Example #10
Source File: LongISLNDReadMapRecord.java From varsim with BSD 2-Clause "Simplified" License | 5 votes |
LongISLNDReadMapRecord(final BEDFeature bedFeature) { chromosome = new ChrString(bedFeature.getContig()); start = bedFeature.getStart(); end = bedFeature.getEnd(); readName = bedFeature.getName(); strand = bedFeature.getStrand(); }
Example #11
Source File: FeatureManagerUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
@DataProvider(name = "DetectFeatureTypeFromFeatureInputFieldTestData") public Object[][] getDetectFeatureTypeFromFeatureInputFieldTestData() { return new Object[][] { { "variantContextFeatureInput", VariantContext.class }, { "variantContextListFeatureInput", VariantContext.class }, { "bedFeatureInput", BEDFeature.class }, { "bedListFeatureInput", BEDFeature.class } }; }
Example #12
Source File: FeatureManagerUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Test(expectedExceptions = UserException.WrongFeatureType.class) public void testRestrictCodecSelectionToWrongFeatureType() { final File vcf = new File(FEATURE_MANAGER_TEST_DIRECTORY + "minimal_vcf4_file.vcf"); // If we require BED Features from this vcf file, we should get a type mismatch exception FeatureManager.getCodecForFile(vcf.toPath(), BEDFeature.class); }
Example #13
Source File: AnnotateIntervals.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
private static void checkForOverlaps(final FeatureManager featureManager, final FeatureInput<BEDFeature> featureTrackPath, final SAMSequenceDictionary sequenceDictionary) { final List<BEDFeature> features = IteratorUtils.toList(featureManager.getFeatureIterator(featureTrackPath)); final GenomeLocParser parser = new GenomeLocParser(sequenceDictionary); final List<GenomeLoc> genomeLocs = IntervalUtils.genomeLocsFromLocatables(parser, features); final List<GenomeLoc> mergedGenomeLocs = IntervalUtils.mergeIntervalLocations( genomeLocs, IntervalMergingRule.OVERLAPPING_ONLY); if (genomeLocs.size() != mergedGenomeLocs.size()) { throw new UserException.BadInput(String.format("Feature track %s contains overlapping intervals; " + "these should be merged.", featureTrackPath)); } }
Example #14
Source File: AnnotateIntervals.java From gatk with BSD 3-Clause "New" or "Revised" License | 4 votes |
SegmentalDuplicationContentAnnotator(final FeatureInput<BEDFeature> segmentalDuplicationTrackPath) { super(segmentalDuplicationTrackPath); }
Example #15
Source File: AnnotateIntervals.java From gatk with BSD 3-Clause "New" or "Revised" License | 4 votes |
MappabilityAnnotator(final FeatureInput<BEDFeature> mappabilityTrackPath) { super(mappabilityTrackPath); }
Example #16
Source File: AnnotateIntervals.java From gatk with BSD 3-Clause "New" or "Revised" License | 4 votes |
BEDLengthWeightedAnnotator(final FeatureInput<BEDFeature> trackPath) { this.trackPath = trackPath; }
Example #17
Source File: ExampleFeatureWalker.java From gatk with BSD 3-Clause "New" or "Revised" License | 4 votes |
@Override protected boolean isAcceptableFeatureType(Class<? extends Feature> featureType) { return featureType.isAssignableFrom(BEDFeature.class); }
Example #18
Source File: BedToIntervalList.java From picard with MIT License | 4 votes |
@Override protected int doWork() { IOUtil.assertFileIsReadable(INPUT); IOUtil.assertFileIsReadable(SEQUENCE_DICTIONARY); IOUtil.assertFileIsWritable(OUTPUT); try { // create a new header that we will assign the dictionary provided by the SAMSequenceDictionaryExtractor to. final SAMFileHeader header = new SAMFileHeader(); final SAMSequenceDictionary samSequenceDictionary = SAMSequenceDictionaryExtractor.extractDictionary(SEQUENCE_DICTIONARY.toPath()); header.setSequenceDictionary(samSequenceDictionary); // set the sort order to be sorted by coordinate, which is actually done below // by getting the .uniqued() intervals list before we write out the file header.setSortOrder(SAMFileHeader.SortOrder.coordinate); final IntervalList intervalList = new IntervalList(header); final FeatureReader<BEDFeature> bedReader = AbstractFeatureReader.getFeatureReader(INPUT.getAbsolutePath(), new BEDCodec(), false); final CloseableTribbleIterator<BEDFeature> iterator = bedReader.iterator(); final ProgressLogger progressLogger = new ProgressLogger(LOG, (int) 1e6); while (iterator.hasNext()) { final BEDFeature bedFeature = iterator.next(); final String sequenceName = bedFeature.getContig(); final int start = bedFeature.getStart(); final int end = bedFeature.getEnd(); // NB: do not use an empty name within an interval final String name; if (bedFeature.getName().isEmpty()) { name = null; } else { name = bedFeature.getName(); } final SAMSequenceRecord sequenceRecord = header.getSequenceDictionary().getSequence(sequenceName); // Do some validation if (null == sequenceRecord) { if (DROP_MISSING_CONTIGS) { LOG.info(String.format("Dropping interval with missing contig: %s:%d-%d", sequenceName, bedFeature.getStart(), bedFeature.getEnd())); missingIntervals++; missingRegion += bedFeature.getEnd() - bedFeature.getStart(); continue; } throw new PicardException(String.format("Sequence '%s' was not found in the sequence dictionary", sequenceName)); } else if (start < 1) { throw new PicardException(String.format("Start on sequence '%s' was less than one: %d", sequenceName, start)); } else if (sequenceRecord.getSequenceLength() < start) { throw new PicardException(String.format("Start on sequence '%s' was past the end: %d < %d", sequenceName, sequenceRecord.getSequenceLength(), start)); } else if ((end == 0 && start != 1 ) //special case for 0-length interval at the start of a contig || end < 0 ) { throw new PicardException(String.format("End on sequence '%s' was less than one: %d", sequenceName, end)); } else if (sequenceRecord.getSequenceLength() < end) { throw new PicardException(String.format("End on sequence '%s' was past the end: %d < %d", sequenceName, sequenceRecord.getSequenceLength(), end)); } else if (end < start - 1) { throw new PicardException(String.format("On sequence '%s', end < start-1: %d <= %d", sequenceName, end, start)); } final boolean isNegativeStrand = bedFeature.getStrand() == Strand.NEGATIVE; final Interval interval = new Interval(sequenceName, start, end, isNegativeStrand, name); intervalList.add(interval); progressLogger.record(sequenceName, start); } CloserUtil.close(bedReader); if (DROP_MISSING_CONTIGS) { if (missingRegion == 0) { LOG.info("There were no missing regions."); } else { LOG.warn(String.format("There were %d missing regions with a total of %d bases", missingIntervals, missingRegion)); } } // Sort and write the output IntervalList out = intervalList; if (SORT) { out = out.sorted(); } if (UNIQUE) { out = out.uniqued(); } out.write(OUTPUT); LOG.info(String.format("Wrote %d intervals spanning a total of %d bases", out.getIntervals().size(),out.getBaseCount())); } catch (final IOException e) { throw new RuntimeException(e); } return 0; }
Example #19
Source File: ConvertBedToTargetFile.java From gatk-protected with BSD 3-Clause "New" or "Revised" License | 4 votes |
protected static Target createTargetFromBEDFeature(final BEDFeature bedFeature) { return new Target(bedFeature.getName(), new SimpleInterval(bedFeature.getContig(), bedFeature.getStart(), bedFeature.getEnd())); }
Example #20
Source File: ReplicationOriginAnnotator.java From hmftools with GNU General Public License v3.0 | 4 votes |
public void loadReplicationOrigins(final String filename) { if(filename.isEmpty()) return; try { String currentChr = ""; List<ReplicationOriginRegion> regionList = null; int regionCount = 0; final AbstractFeatureReader<BEDFeature, LineIterator> reader = getFeatureReader(filename, new BEDCodec(), false); for (final BEDFeature bedFeature : reader.iterator()) { final String chromosome = refGenomeChromosome(bedFeature.getContig(), RG_VERSION); if (!chromosome.equals(currentChr)) { currentChr = chromosome; regionList = mReplicationOrigins.get(chromosome); if (regionList == null) { regionList = Lists.newArrayList(); mReplicationOrigins.put(currentChr, regionList); } } ++regionCount; double originValue = Double.parseDouble(bedFeature.getName()) / 100; regionList.add(new ReplicationOriginRegion( chromosome, bedFeature.getStart(), bedFeature.getEnd(), originValue)); } LNX_LOGGER.debug("loaded {} replication origins", regionCount); } catch(IOException exception) { LNX_LOGGER.error("Failed to read replication origin BED file({})", filename); } }