Python intervaltree.IntervalTree() Examples
The following are 30
code examples of intervaltree.IntervalTree().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
intervaltree
, or try the search function
.
Example #1
Source File: hicBuildMatrix.py From HiCExplorer with GNU General Public License v3.0 | 8 votes |
def intervalListToIntervalTree(interval_list): r""" given a dictionary containing tuples of chrom, start, end, this is transformed to an interval trees. To each interval an id is assigned, this id corresponds to the position of the interval in the given array of tuples and if needed can be used to identify the index of a row/colum in the hic matrix. >>> bin_list = [('chrX', 0, 50000), ('chrX', 50000, 100000)] >>> res = intervalListToIntervalTree(bin_list) >>> sorted(res['chrX']) [Interval(0, 50000, 0), Interval(50000, 100000, 1)] """ bin_int_tree = {} for intval_id, intval in enumerate(interval_list): chrom, start, end = intval[0:3] if chrom not in bin_int_tree: bin_int_tree[chrom] = IntervalTree() bin_int_tree[chrom].add(Interval(start, end, intval_id)) return bin_int_tree
Example #2
Source File: bitwrap.py From pfp with MIT License | 6 votes |
def __init__(self, stream): """Init the bit-wrapped stream :stream: The normal byte stream """ self._stream = stream self._bits = collections.deque() self.closed = False # assume that bitfields end on an even boundary, # otherwise the entire stream will be treated as # a bit stream with no padding self.padded = True self.range_set = IntervalTree()
Example #3
Source File: basenji_data_read.py From basenji with Apache License 2.0 | 6 votes |
def read_blacklist(blacklist_bed, black_buffer=20): """Construct interval trees of blacklist regions for each chromosome.""" black_chr_trees = {} if blacklist_bed is not None and os.path.isfile(blacklist_bed): for line in open(blacklist_bed): a = line.split() chrm = a[0] start = max(0, int(a[1]) - black_buffer) end = int(a[2]) + black_buffer if chrm not in black_chr_trees: black_chr_trees[chrm] = intervaltree.IntervalTree() black_chr_trees[chrm][start:end] = True return black_chr_trees
Example #4
Source File: basenji_data_hic_read.py From basenji with Apache License 2.0 | 6 votes |
def read_blacklist(blacklist_bed, black_buffer=20): """Construct interval trees of blacklist regions for each chromosome.""" black_chr_trees = {} if blacklist_bed is not None and os.path.isfile(blacklist_bed): for line in open(blacklist_bed): a = line.split() chrm = a[0] start = max(0, int(a[1]) - black_buffer) end = int(a[2]) + black_buffer if chrm not in black_chr_trees: black_chr_trees[chrm] = intervaltree.IntervalTree() black_chr_trees[chrm][start:end] = True return black_chr_trees ################################################################################ # __main__ ################################################################################
Example #5
Source File: ragoo.py From RaGOO with MIT License | 6 votes |
def remove_gff_breaks(gff_ins, breaks): """ Given a list of candidate breakpoints proposed by misassembly correction, remove any such break points that fall within the interval of a gff feature. This should be called once per contig. :param gff_ins: List of GFFLines :param breaks: candidate break points :return: """ # Make an interval tree from the intervals of the gff lines t = IntervalTree() for line in gff_ins: # If the interval is one bp long, skip if line.start == line.end: continue t[line.start:line.end] = (line.start, line.end) return [i for i in breaks if not t[i]]
Example #6
Source File: test_vcf.py From medaka with Mozilla Public License 2.0 | 6 votes |
def intervaltree_prep(h1, h2, ref_seq): """Mock up trees as would be done by the `VCFReader` class so we can test medaka.vcf._merge_variants :param h1, h2: iterable of variants in first and second haplotype, respectively. :param ref_seq: str, reference sequence :returns: (`intervaltree.Interval` containing interable of all variants, [`intervaltree.IntervalTree` for each haplotype]) """ trees = [] for variants in h1, h2: trees.append(intervaltree.IntervalTree()) for v in variants: trees[-1].add(intervaltree.Interval(v.pos, v.pos + len(v.ref), data=v)) only_overlapping=True comb_tree = intervaltree.IntervalTree(trees[0].all_intervals.union(trees[1].all_intervals)) # if strict, merge only overlapping intervals (not adjacent ones) comb_tree.merge_overlaps(strict=only_overlapping, data_initializer=list(), data_reducer=lambda x,y: x + [y]) comb_interval = list(comb_tree.all_intervals)[0] return comb_interval, trees
Example #7
Source File: vlines.py From CoolBox with GNU General Public License v3.0 | 6 votes |
def __intervaltree_from_list(self, vlines_list): from intervaltree import IntervalTree itree = {} for v in vlines_list: if isinstance(v, str): grange = GenomeRange(v) elif isinstance(v, tuple): grange = GenomeRange(v[0], v[1], v[1]) elif isinstance(v, GenomeRange): grange = v else: raise ValueError("position must be a tuple or string.") chr_ = grange.chrom itree.setdefault(chr_, IntervalTree()) itree[chr_][grange.start:grange.end+1] = grange return itree
Example #8
Source File: datasets.py From RFHO with MIT License | 6 votes |
def __init__(self, data, row_sentence_bounds, window=5, process_all=False): """ Class for managing windowed input data (like TIMIT). :param data: Numpy matrix. Each row should be an example data :param row_sentence_bounds: Numpy matrix with bounds for padding. TODO add default NONE :param window: half-window size :param process_all: (default False) if True adds context to all data at object initialization. Otherwise the windowed data is created in runtime. """ self.window = window self.data = data base_shape = self.data.shape self.shape = (base_shape[0], (2 * self.window + 1) * base_shape[1]) self.tree = it.IntervalTree([it.Interval(int(e[0]), int(e[1]) + 1) for e in row_sentence_bounds]) if process_all: print('adding context to all the dataset', end='- ') self.data = self.generate_all() print('DONE') self.process_all = process_all
Example #9
Source File: decoder.py From DMCUProg with MIT License | 6 votes |
def _build_function_search_tree(self): self.function_tree = IntervalTree() for prog in self.subprograms: try: name = prog.attributes['DW_AT_name'].value low_pc = prog.attributes['DW_AT_low_pc'].value high_pc = prog.attributes['DW_AT_high_pc'].value # Skip subprograms excluded from the link. if low_pc == 0: continue # If high_pc is not explicitly an address, then it's an offset from the # low_pc value. if prog.attributes['DW_AT_high_pc'].form != 'DW_FORM_addr': high_pc = low_pc + high_pc fninfo = FunctionInfo(name=name, subprogram=prog, low_pc=low_pc, high_pc=high_pc) self.function_tree.addi(low_pc, high_pc, fninfo) except KeyError: pass
Example #10
Source File: decoder.py From DMCUProg with MIT License | 6 votes |
def _build_symbol_search_tree(self): self.symbol_tree = IntervalTree() symbols = self.symtab.iter_symbols() for symbol in symbols: # Only look for functions and objects. sym_type = symbol.entry['st_info']['type'] if sym_type not in ['STT_FUNC', 'STT_OBJECT']: continue sym_value = symbol.entry['st_value'] sym_size = symbol.entry['st_size'] # Cannot put an empty interval into the tree, so ensure symbols have # at least a size of 1. real_sym_size = sym_size if sym_size == 0: sym_size = 1 syminfo = SymbolInfo(name=symbol.name, address=sym_value, size=real_sym_size, type=sym_type) # Add to symbol dict. self.symbol_dict[symbol.name] = syminfo # Add to symbol tree. self.symbol_tree.addi(sym_value, sym_value+sym_size, syminfo)
Example #11
Source File: score_function.py From genmod with MIT License | 6 votes |
def __init__(self, match_type, equal=False): super(ScoreFunction, self).__init__() self.logger = logging.getLogger(__name__) self.logger.debug("Initializing match_type to:{0}".format(match_type)) self.match_type = match_type #['integer','float','flag','character','string'] self.logger.debug("Initializing string_dict to:{}") self._string_dict = {} self.logger.debug("Initializing interval_tree") self._interval_tree = IntervalTree() self.logger.debug("Initializing value_dict") self._value_dict = {} self.logger.debug("Initializing not_reported_score to 0") self._not_reported_score = 0 self.logger.debug("Initializing reported_score to 0") self._reported_score = 0 # only for 'flag' # If the score is the same as the value found: self.logger.debug("Initializing equal to {0}".format(equal)) self._equal = equal
Example #12
Source File: client.py From PyBase with Apache License 2.0 | 6 votes |
def __init__(self, zkquorum, pool_size): # Location of the ZooKeeper quorum (csv) self.zkquorum = zkquorum # Connection pool size per region server (and master!) self.pool_size = pool_size # Persistent connection to the master server. self.master_client = None # IntervalTree data structure that allows me to create ranges # representing known row keys that fall within a specific region. Any # 'region look up' is then O(logn) self.region_cache = IntervalTree() # Takes a client's host:port as key and maps it to a client instance. self.reverse_client_cache = {} # Mutex used for all caching operations. self._cache_lock = Lock() # Mutex used so only one thread can request meta information from # the master at a time. self._master_lookup_lock = Lock()
Example #13
Source File: gtf.py From seqc with GNU General Public License v2.0 | 6 votes |
def translate(self, chromosome, strand, pos): """translates a chromosome, position, and strand into a gene identifier Uses the IntervalTree data structure to rapidly search for the corresponding identifier. :param bytes chromosome: chromosome for this alignment :param bytes strand: strand for this alignment (one of ['+', '-']) :param int pos: position of the alignment within the chromosome :return int|None: Returns either an integer gene_id if a unique gene was found at the specified position, or None otherwise """ # todo remove duplicate exons during construction to save time try: result = set(x.data for x in self._chromosomes_to_genes[chromosome][strand][pos]) if len(result) == 1: return first(result) # just right else: return None # too many genes except KeyError: return None # no gene
Example #14
Source File: genotype_with_reference.py From pacbio_variant_caller with MIT License | 5 votes |
def has_gaps_in_region(read, region): """ Returns True if the given pysam read spans the given pybedtools.Interval, ``region``. """ # If the given read has gaps in its alignment to the reference inside the # given interval (more than one block inside the SV event itself), there are # gaps inside the SV. tree = intervaltree.IntervalTree() for block in read.get_blocks(): tree[block[0]:block[1]] = block return len(tree[region.start:region.end]) > 1
Example #15
Source File: transcript.py From mikado with GNU Lesser General Public License v3.0 | 5 votes |
def segmenttree(self): """ :rtype: intervaltree.IntervalTree """ if len(self.__segmenttree) != self.exon_num + len(self.introns): self._calculate_segment_tree() return self.__segmenttree
Example #16
Source File: paf.py From dgenies with GNU General Public License v3.0 | 5 votes |
def build_summary_stats(self, status_file): """ Get summary of identity :return: table with percents by category """ summary_file = self.paf + ".summary" self.parse_paf(False, False) if self.parsed: percents = {"-1": self.len_t} position_idy = IntervalTree() cats = sorted(self.lines.keys()) for cat in cats: percents[cat] = 0 for line in self.lines[cat]: start = min(line[0], line[1]) end = max(line[0], line[1]) + 1 position_idy[start:end] = int(cat) percents = self._remove_overlaps(position_idy, percents) for cat in percents: percents[cat] = percents[cat] / self.len_t * 100 with open(summary_file, "w") as summary_file: summary_file.write(json.dumps(percents)) os.remove(status_file) return percents shutil.move(status_file, status_file + ".fail") return None
Example #17
Source File: coverageByEnds.py From SDA with MIT License | 5 votes |
def defineRegions(): for myfile in regionFiles: f = open(myfile).readlines() for line in f: line = line.split() Chr = line[0] start = int(line[1]) end = int( line[2] ) if(Chr not in regions): regions[Chr] = intervaltree.IntervalTree() # first vlaue is number of starts in region, second is numer of ends in region regions[Chr][start:end+1] = [0,0]
Example #18
Source File: BedReader.py From slamdunk with GNU Affero General Public License v3.0 | 5 votes |
def bedToIntervallTree(bed): utrs = {} for utr in BedIterator(bed): if (not utr.chromosome in utrs) : utrs[utr.chromosome] = IntervalTree() utrs[utr.chromosome][utr.start:(utr.stop + 1)] = utr.name return utrs
Example #19
Source File: paf.py From dgenies with GNU General Public License v3.0 | 5 votes |
def _add_percents(self, percents, item): """ Update percents with interval :param percents: initial percents :type percents: dict :param item: interval from IntervalTree :type item: Interval :return: new percents :rtype: dict """ i_count = item.length() percents[str(item.data)] += i_count percents["-1"] -= i_count return percents
Example #20
Source File: ragoo.py From RaGOO with MIT License | 5 votes |
def get_location_confidence(in_ctg_alns): # Use interval tree to get all alignments with the reference span # Go through each of them and if any start is less than the min_pos or any end is greater than # the max_pos, change the borders to those values. Then use the algorithm that Mike gave me. min_pos = min(in_ctg_alns.ref_starts) max_pos = max(in_ctg_alns.ref_ends) t = IntervalTree() # Put the reference start and end position for every alignment into the tree for i in range(len(in_ctg_alns.ref_headers)): t[in_ctg_alns.ref_starts[i]:in_ctg_alns.ref_ends[i]] = (in_ctg_alns.ref_starts[i], in_ctg_alns.ref_ends[i]) overlaps = t[min_pos:max_pos] if not overlaps: return 0 # If any intervals fall beyond the boundaries, replace the start/end with the boundary it exceeds ovlp_list = [i.data for i in overlaps] bounded_list = [] for i in ovlp_list: if i[0] < min_pos: i[0] = min_pos if i[1] > max_pos: i[1] = max_pos bounded_list.append(i) # Now can just calculate the total range covered by the intervals ovlp_range = 0 sorted_intervals = sorted(bounded_list, key=lambda tup: tup[0]) max_end = -1 for j in sorted_intervals: start_new_terr = max(j[0], max_end) ovlp_range += max(0, j[1] - start_new_terr) max_end = max(max_end, j[1]) return ovlp_range / (max_pos - min_pos)
Example #21
Source File: db_info.py From bootloader_instrumentation_suite with MIT License | 5 votes |
def write_interval_info(self, hwname, pclo=None, pchi=None, substage_names=[], substage_entries={}): wt = self._get_writestable(hwname) if "framac" in hwname: return [(r['destlo'], r['desthi']) for r in pytable_utils.get_rows('(%d <= writepclo) & (%d <= writepchi) & (writepclo < %d) & (writepchi <= %d)' % \ (utils.addr_lo(pclo), utils.addr_hi(pclo), utils.addr_lo(pchi), utils.addr_hi(pchi)))] else: fns = substage_entries substages = substage_names num = 0 intervals = {n: intervaltree.IntervalTree() for n in substages} for r in wt.read_sorted('index'): pc = long(r['pc']) if num < len(fns) - 1: # check if we found the entrypoint to the next stage (lopc, hipc) = substage_entries[num + 1] if (lopc <= pc) and (pc < hipc): num += 1 if num in substages: start = long(r['dest']) end = start + pytable_utils.get_rows(wt, '(pclo == %d) & (pchi == %d)' % utils.addr_lo(long(r['pc'])), utils.addr_hi(long(r['pc']))[0]['writesize']) intervals[num].add(intervaltree.Interval(start, end)) return intervals
Example #22
Source File: variant.py From medaka with Mozilla Public License 2.0 | 5 votes |
def samples_to_bed(args): """Write a bed file from samples in a datastore file.""" logger = medaka.common.get_named_logger('Variants') index = medaka.datastore.DataIndex(args.inputs) trees = collections.defaultdict(intervaltree.IntervalTree) logger.info("Building interval tree") for s, f in index.samples: d = medaka.common.Sample.decode_sample_name(s) # start and end are string repr of floats (major.minor coordinates) start, end = int(float(d['start'])), int(float(d['end'])) # add one to end of interval, as intervaltree intervals and bed file # intervals are end-exclusive (i.e. they don't contain the last # coordinate), whilst the last position in a sample is included in that # sample. trees[d['ref_name']].add(intervaltree.Interval(start, end + 1)) with open(args.output, 'w') as fh: for contig, tree in trees.items(): # strict=False as consecutive samples can start and end on the same # major (overlap is in minor) hence if samples are abutting but not # overlapping in major coords, merge them tree.merge_overlaps(strict=False) logger.info("Writing intervals for {}".format(contig)) for i in sorted(tree.all_intervals): fh.write("{}\t{}\t{}\n".format(contig, i.begin, i.end)) logger.info("All done, bed file written to {}".format(args.output))
Example #23
Source File: vcf.py From medaka with Mozilla Public License 2.0 | 5 votes |
def variants(self): """Yield diploid variants. :yields `medaka.vcf.Variant` objs """ for chrom in medaka.common.loose_version_sort(self.chroms): self.logger.info('Merging variants in chrom {}'.format(chrom)) merged = [] trees = [vcf._tree[chrom] for vcf in self.vcfs] # assign haplotype so that otherwise identical variants in both # trees are not treated as identical (we need to be able to # distinguish between 0/1 and 1/1) for h, tree in enumerate(trees): for i in tree.all_intervals: i.data.info['mhap'] = h comb = intervaltree.IntervalTree( trees[0].all_intervals.union(trees[1].all_intervals)) # if strict, merge only overlapping intervals (not adjacent ones) comb.merge_overlaps( strict=self.only_overlapping, data_initializer=list(), data_reducer=lambda x, y: x + [y]) ref_seq = self.fasta.fetch(chrom).upper() for interval in comb.all_intervals: merged.append(_merge_variants( interval, trees, ref_seq, detailed_info=self.detailed_info, discard_phase=self.discard_phase)) yield from sorted(merged, key=lambda x: x.pos)
Example #24
Source File: vcf.py From medaka with Mozilla Public License 2.0 | 5 votes |
def index(self): """Index the input file for faster fetches.""" # calling this method implies caching self.cache = True if self._indexed or not self.cache: return if self._parse_lock.acquire(blocking=False): try: # clear out an incomplete parse, actually this doesn't matter # since the values in the tree are set-like. self._tree = collections.defaultdict(intervaltree.IntervalTree) for variant in self._parse(): self._tree[variant.chrom][ variant.pos:variant.pos + len(variant.ref)] = variant except Exception: raise else: # record we've done a complete parse self._indexed = True finally: self._parse_lock.release() else: # wait for lock to be released, then return self._parse_lock.acquire(blocking=True) if not self._indexed: raise IOError("Waited for parsing, but parsing did not occur.")
Example #25
Source File: subsample_bam.py From pomoxis with Mozilla Public License 2.0 | 5 votes |
def _nearest_overlapping_point(src, point): """Find the interval with the closest start point to a given point. :param src: IntervalTree instance. :param point: query point. :returns: Interval instance of interval with closest start. """ items = src.at(point) if len(items) == 0: return None items = sorted(items, key=lambda x: x.end - x.begin, reverse=True) items.sort(key=lambda x: abs(x.begin - point)) return items[0]
Example #26
Source File: util.py From pomoxis with Mozilla Public License 2.0 | 5 votes |
def intervaltrees_from_bed(path_to_bed): """Created dict of intervaltrees from a .bed file, indexed by chrom. :param path_to_bed: str, path to .bed file. :returns: { str chrom: `intervaltree.IntervalTree` obj }. """ trees = defaultdict(intervaltree.IntervalTree) for chrom, start, stop in yield_from_bed(path_to_bed): trees[chrom].add(intervaltree.Interval(begin=start, end=stop)) return trees
Example #27
Source File: add_genes.py From pheweb with GNU Affero General Public License v3.0 | 5 votes |
def __init__(self, interval_tuples): '''intervals is like [('22', 12321, 12345, 'APOL1'), ...]''' self._its = {} self._gene_starts = {} self._gene_ends = {} for interval_tuple in interval_tuples: chrom, pos_start, pos_end, gene_name = interval_tuple assert isinstance(pos_start, int) assert isinstance(pos_end, int) if chrom not in self._its: self._its[chrom] = intervaltree.IntervalTree() self._gene_starts[chrom] = [] self._gene_ends[chrom] = [] self._its[chrom].add(intervaltree.Interval(pos_start, pos_end, gene_name)) self._gene_starts[chrom].append((pos_start, gene_name)) self._gene_ends[chrom].append((pos_end, gene_name)) for chrom in self._its: self._gene_starts[chrom] = BisectFinder(self._gene_starts[chrom]) self._gene_ends[chrom] = BisectFinder(self._gene_ends[chrom])
Example #28
Source File: cytoband.py From scout with BSD 3-Clause "New" or "Revised" License | 5 votes |
def parse_cytoband(lines): """Parse iterable with cytoband coordinates Args: lines(iterable): Strings on format "chr1\t2300000\t5400000\tp36.32\tgpos25" Returns: cytobands(dict): Dictionary with chromosome names as keys and interval trees as values """ cytobands = {} for line in lines: line = line.rstrip() splitted_line = line.split("\t") chrom = splitted_line[0].lstrip("chr") start = int(splitted_line[1]) stop = int(splitted_line[2]) name = splitted_line[3] if chrom in cytobands: # Add interval to existing tree cytobands[chrom][start:stop] = name else: # Create a new interval tree new_tree = intervaltree.IntervalTree() # create the interval new_tree[start:stop] = name # Add the interval tree cytobands[chrom] = new_tree return cytobands
Example #29
Source File: pt.py From hase with BSD 2-Clause "Simplified" License | 5 votes |
def cfg(self) -> CFG: basic_blocks = IntervalTree() # type: IntervalTree leader = None # type: Optional[Instruction] terminator = None # type: Optional[Instruction] successors = [] # type: List[Instruction] edges = list(sorted(self.edges, key=sort_edges)) for i, edge in enumerate(edges): if isinstance(edge, JumpTarget): if leader is not None: self.append_basic_block( basic_blocks, leader, terminator, edge.instr, successors ) leader = edge.instr terminator = None successors = [] else: # we should not see jump originating from code that we not jumped to before if terminator is not None and terminator != edge.instr: print("foo") terminator = edge.instr successors.append(edge.target_instr) if terminator is not None and leader is not None: self.append_basic_block( basic_blocks, leader, terminator, edge.instr, successors ) return CFG(basic_blocks)
Example #30
Source File: pt.py From hase with BSD 2-Clause "Simplified" License | 5 votes |
def append_basic_block( self, basic_blocks: IntervalTree, leader: Instruction, terminator: Optional[Instruction], instr: Instruction, successors: List[Instruction], ) -> None: if terminator is None: size = instr.ip successors.append(instr) else: size = terminator.ip + terminator.size basic_blocks[leader.ip : size] = successors