Python pybedtools.Interval() Examples

The following are 30 code examples of pybedtools.Interval(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pybedtools , or try the search function .
Example #1
Source File: dataloader.py    From models with MIT License 7 votes vote down vote up
def __getitem__(self, idx):
        if self.fasta_extractor is None:
            self.fasta_extractor = FastaStringExtractor(self.fasta_file, use_strand=True,
                                                         force_upper=self.force_upper)
        if self.vcf is None:
            self.vcf = MultiSampleVCF(self.vcf_file)
        if self.vcf_extractor is None:
            self.vcf_extractor = VariantSeqExtractor(self.fasta_file)
        
        entry = self.bed.iloc[idx]
        entry_id = entry["id"]
        entry_chr = entry["chr"]
        entry_pos = entry["pos"]
        entry_strand = entry["strand"]
        
        ref_exons = []
        var_exons = []
        exon_pos_strings = []
        exon_var_strings = []
        for exon in entry_pos:
            # We get the interval
            interval = pybedtools.Interval(to_scalar(entry_chr), to_scalar(exon[0]), 
                                           to_scalar(exon[1]), strand=to_scalar(entry_strand))
            exon_pos_strings.append("%s-%s" % (str(exon[0]),str(exon[1])))

            # We get the reference sequence
            ref_seq = self.fasta_extractor.extract(interval)

            # We get the variants, insert them and also save them as metadata
            variants = list(self.vcf.fetch_variants(interval))
            if len(variants) == 0:
                ref_exons.append(ref_seq)
                var_exons.append(ref_seq)
            else:
                var_seq = self.vcf_extractor.extract(interval, variants=variants,
                    anchor=0, fixed_len=False)
                var_string = ";".join([str(var) for var in variants])

                ref_exons.append(ref_seq)
                var_exons.append(var_seq)
                exon_var_strings.append(var_string)
        
        # Combine
        if entry_strand == "-":
            ref_exons.reverse()
            var_exons.reverse()
        ref_seq = "".join(ref_exons)
        var_seq = "".join(var_exons)
        pos_string = ";".join(exon_pos_strings)
        var_string = ";".join(exon_var_strings)
        
        return {
            "inputs": np.array([ref_seq, var_seq]),
            "metadata": {
                "id": entry_id,
                "chr": entry_chr,
                "exon_positions": pos_string,
                "strand": entry_strand,
                "variants": var_string
            }
        } 
Example #2
Source File: generate_sv_intervals.py    From metasv with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def merged_interval_features(feature, bam_handle,find_svtype=False):
    support_list = feature.name.split(",")
    locations = sorted(map(int, support_list[0:-1:3]))
    other_bp_ends = support_list[-1] if not find_svtype else ','.join(support_list[1::3])
    num_unique_locations = len(set(locations))
    count_str = ",".join(["%s,%s" % (i, c) for (i, c) in collections.Counter(locations).items()])
    plus_support = len([i for i in support_list[2::3] if i == "+"])
    minus_support = len(locations) - plus_support
    locations_span = max(locations) - min(locations)
    interval_readcount = bam_handle.count(reference=str(feature.chrom), start=feature.start, end=feature.end)
    info = {"SC_PLUS_SUPPORT":plus_support, "SC_MINUS_SUPPORT":minus_support, 
            "SC_LOCATIONS_SPAN":locations_span, "SC_NUM_UNIQUE_LOCATIONS":num_unique_locations,
            "SC_COUNT_STR": count_str, "SC_COVERAGE":interval_readcount, "SC_OTHER_BP_ENDS": other_bp_ends, 
            "SC_SC_BP_ENDS": "%s-%s"%(feature.start, feature.end)}
    name = "%s,%s,0,SC" % (
        base64.b64encode(json.dumps(info)), feature.fields[6].split(',')[0])

    return pybedtools.Interval(feature.chrom, feature.start, feature.end, name=name, score=feature.score,
                               otherfields=[str(interval_readcount)]+feature.fields[6:]) 
Example #3
Source File: generate_final_vcf.py    From metasv with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def find_idp(feature, wiggle):
    n = len(feature.fields) / 2
    if feature.chrom != feature.fields[n]:
        return None
    start_dup = feature.start
    end_dup = feature.end
    start_del = int(feature.fields[n + 1])
    end_del = int(feature.fields[n + 2])
    if abs(start_del - end_del) > (abs(start_dup - end_dup) - wiggle):
        return None
    dist_ends = [abs(start_del - start_dup), abs(end_del - end_dup)]
    if min(dist_ends) > wiggle:
        return None
    del_pos = start_del if dist_ends[0] > dist_ends[1] else end_del
    name = "%s,%s" % (feature.name, feature.fields[n + 3])
    score = "%s,%s" % (feature.score, feature.fields[n + 4])
    return pybedtools.Interval(feature.chrom, feature.start, feature.end,
                               name=name, score=score,
                               otherfields=["%d" % del_pos,
                                            "%d-%d" % (start_del, end_del)]) 
Example #4
Source File: generate_sv_intervals.py    From metasv with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def find_other_bp_interval(feature,pad):
    name_fields = feature.name.split(",")
    sv_type = name_fields[1]
    sv_methods = name_fields[3]
    sv_length = int(name_fields[2])
    info = json.loads(base64.b64decode(name_fields[0]))

    other_bps=map(lambda y: map(int,y['SC_OTHER_BP_ENDS'].split('-')),info["SC_SUBINTERVAL_INFOs"])
    start_interval=min(map(lambda x:x[0],other_bps))
    end_interval=max(map(lambda x:x[1],other_bps))
    soft_clip_location = (start_interval+end_interval)/2
    sc_bp=feature.start if abs(soft_clip_location-feature.start)>abs(soft_clip_location-feature.end) else feature.end
    other_bp_field="%d-%d"%(max(sc_bp-pad,0),sc_bp+pad)
    name="%d,%d,+,%s"%(soft_clip_location,sc_bp,other_bp_field)
    return pybedtools.Interval(feature.chrom, start_interval, end_interval, name=name, score="1",
                               otherfields=[sv_type]) 
Example #5
Source File: generate_final_vcf.py    From metasv with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def filter_itxs(feature):
    n = len(feature.fields) / 2
    del_interval_idp = map(int, feature.fields[7].split("-"))
    del_interval_itx_1 = map(int,
                             feature.fields[n + 7].split(",")[0].split("-"))
    del_interval_itx_2 = map(int,
                             feature.fields[n + 7].split(",")[1].split("-"))
    if filter(lambda x: abs(x[0] - del_interval_idp[0]) + abs(
                    x[1] - del_interval_idp[1]) == 0,
              [del_interval_itx_1, del_interval_itx_2]) and "LowQual" not in \
            feature.fields[n + 4]:
        return None
    return pybedtools.Interval(feature.chrom, feature.start, feature.end,
                               name=feature.name,
                               score=feature.score,
                               otherfields=feature.fields[6:n]) 
Example #6
Source File: generate_sv_intervals.py    From metasv with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def get_full_interval(feature, pad):
    name_fields = feature.name.split(",")
    info = json.loads(base64.b64decode(name_fields[0]))
    other_bp_ends=info["SC_OTHER_BP_ENDS"]
    start = feature.start
    end = feature.end
    sv_type = name_fields[1]
    if "-" in other_bp_ends:
        other_bp_start,other_bp_end=map(lambda x:int(x),other_bp_ends.split("-"))
        if other_bp_start != 0 or other_bp_end != 0:
            start = min((feature.start+feature.end)/2,(other_bp_start+other_bp_end)/2)
            end = max((feature.start+feature.end)/2,(other_bp_start+other_bp_end)/2)

    sv_len = 0 if sv_type == "INS" else max(end-start,0)
    info["SOURCES"] = "%s-%d-%s-%d-%d-SoftClip" % (feature.chrom, start, feature.chrom, end, sv_len)
    name = "%s,%s,%d,%s"%(base64.b64encode(json.dumps(info)),sv_type,sv_len,'SC')

    return pybedtools.Interval(feature.chrom, start, end, name=name, score=feature.score,
                                   otherfields=feature.fields[6:]) 
Example #7
Source File: sv_interval.py    From metasv with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def to_bed_interval(self, sample_name):
        if self.start <= 0:
            return None
        if self.sv_type not in ["DEL", "INS", "INV", "ITX", "CTX", "DUP"]: return None
        # if not self.sub_intervals and list(self.sources)[0] == "HaplotypeCaller": return None
        # if len(self.sources) == 1 and list(self.sources)[0] == "HaplotypeCaller": return None
        if self.sv_type == "INS":
            end = self.end + 1
        elif self.sv_type in ["ITX","CTX"] :
            end = self.start + 1
        else:
            end = self.end
        info = self.get_info()
        
        if self.sv_type in ["ITX", "CTX"]:
            info["POS2"] = self.end
            info["END"] = end
            info["CHR2"] = self.chrom2
            if not self.is_precise:
                info.update({"IMPRECISE": True})

        return pybedtools.Interval(self.chrom, self.start, end, name="%s,%s,%d,%s" % (
            base64.b64encode(json.dumps(info)), self.sv_type, self.length,
            ";".join(self._get_svmethods())),
            score=str(len(self.sources))) 
Example #8
Source File: annotate_bed_for_ml.py    From metasv with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def add_weighted_score(in_bed, score_bed):
    out_bed = in_bed.intersect(score_bed, wao=True).saveas(os.path.join(args.tmpdir, "score.bed"))

    bed_array = []
    last_interval = pybedtools.Interval("", 0, 0)
    map_value = 0.0
    for interval in out_bed:
        if interval.chrom != last_interval.chrom or interval.start != last_interval.start or interval.end != last_interval.end:
            if last_interval.chrom:
                bed_array.append(tuple(last_interval.fields[:-5]) + (str(map_value),))
            map_value = 0.0
            last_interval = interval
        if float(interval.fields[-1]) > 0:
            map_value += float(interval.fields[-1]) * float(interval.fields[-2]) / float(interval.length)

    if last_interval.chrom:
        bed_array.append(tuple(last_interval.fields[:-5]) + (str(map_value),))

    return pybedtools.BedTool(bed_array) 
Example #9
Source File: annotate_bed_for_ml_del.py    From metasv with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def add_weighted_score(in_bed, score_bed):
    out_bed = in_bed.intersect(score_bed, wao=True).saveas(os.path.join(args.tmpdir, "score.bed"))

    bed_array = []
    last_interval = pybedtools.Interval("", 0, 0)
    map_value = 0.0
    for interval in out_bed:
        if interval.chrom != last_interval.chrom or interval.start != last_interval.start or interval.end != last_interval.end:
            if last_interval.chrom:
                bed_array.append(tuple(last_interval.fields[:-5]) + (str(map_value),))
            map_value = 0.0
            last_interval = interval
        if float(interval.fields[-1]) > 0:
            map_value += float(interval.fields[-1]) * float(interval.fields[-2]) / float(interval.length)

    if last_interval.chrom:
        bed_array.append(tuple(last_interval.fields[:-5]) + (str(map_value),))

    return pybedtools.BedTool(bed_array) 
Example #10
Source File: sequence.py    From kipoiseq with MIT License 6 votes vote down vote up
def __getitem__(self, idx):
        """Returns (pybedtools.Interval, labels)
        """
        row = self.df.iloc[idx]

        # TODO: use kipoiseq.dataclasses.interval instead of pybedtools
        import pybedtools
        interval = pybedtools.create_interval_from_list(
            [to_scalar(x) for x in row.iloc[:self.bed_columns]])

        if self.ignore_targets or self.n_tasks == 0:
            labels = {}
        else:
            labels = row.iloc[self.bed_columns:].values.astype(
                self.label_dtype)
        return interval, labels 
Example #11
Source File: genotype_with_reference.py    From pacbio_variant_caller with MIT License 6 votes vote down vote up
def soft_clips_at_breakpoint(read, region):
    """
    Returns True if the given pysam read maps up to the edge of the given
    pybedtools.Interval, ``region`` and soft clips at that edge.

    Two cases are possible:

    1. The end of the read soft clips such that the read's reference end stops
    at the breakpoint start and the end of the read is softclipped.

    2. The beginning of the read soft clips such that the read's reference start
    begins at the breakpoint end and the beginning of the read is softclipped.

    In both cases, the read should be mapped in its best location in the
    reference ("perfect mapping").
    """
    return (
        (read.reference_end == region.start and read.cigar[-1][0] == BAM_CSOFT_CLIP) or
        (read.reference_start == region.end + 1 and read.cigar[0][0] == BAM_CSOFT_CLIP)
    ) 
Example #12
Source File: genotype_with_reference.py    From pacbio_variant_caller with MIT License 5 votes vote down vote up
def spans_region(read, region):
    """
    Returns True if the given pysam read spans the given pybedtools.Interval,
    ``region``.
    """
    return has_perfect_mapping(read) and read.reference_start <= region.start and read.reference_end >= region.end 
Example #13
Source File: genotype_with_reference.py    From pacbio_variant_caller with MIT License 5 votes vote down vote up
def has_gaps_in_region(read, region):
    """
    Returns True if the given pysam read spans the given pybedtools.Interval,
    ``region``.
    """
    # If the given read has gaps in its alignment to the reference inside the
    # given interval (more than one block inside the SV event itself), there are
    # gaps inside the SV.
    tree = intervaltree.IntervalTree()
    for block in read.get_blocks():
        tree[block[0]:block[1]] = block

    return len(tree[region.start:region.end]) > 1 
Example #14
Source File: generate_sv_intervals.py    From metasv with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def remove_ins_padding(feature, pad):
    name_fields = feature.name.split(",")
    sv_type = name_fields[1]
    return pybedtools.Interval(feature.chrom, feature.start+pad,
         max(feature.end-pad,feature.start+pad+1), name=feature.name, score=feature.score) if sv_type  == "INS" else feature 
Example #15
Source File: genotype_with_reference.py    From pacbio_variant_caller with MIT License 5 votes vote down vote up
def pair_spans_regions(read_pair, regions):
    """
    Returns True if the given pysam reads spans the given pybedtools.Interval
    list, ``regions``. Read pairs can span the regions even if either read
    overlaps the region breakpoints.
    """
    return len(read_pair) == 2 and read_pair[0].reference_start < regions[0].start and read_pair[1].reference_end > regions[-1].end 
Example #16
Source File: genotype_with_reference.py    From pacbio_variant_caller with MIT License 5 votes vote down vote up
def get_insert_sizes_for_region(bam, region):
    """
    Return insert sizes reads in the given pysam.AlignmentFile, ``bam``, and the
    given pybedtools.Interval, ``region``.
    """
    return [read.tlen for read in bam.fetch(region.chrom, region.start, region.end)
            if has_perfect_mapping(read) and not read.mate_is_unmapped and read.tlen >= 0 and read.tlen <= 1000] 
Example #17
Source File: generate_final_vcf.py    From metasv with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def find_ctx(feature, overlap_ratio=0.9):
    n = len(feature.fields) / 2
    start_del_ins = int(feature.fields[n + 1])
    end_del_ins = int(feature.fields[n + 2])
    name = "%s,%s" % (feature.name, feature.fields[n + 3])
    score = "%s,%s" % (feature.score, feature.fields[n + 4])
    return pybedtools.Interval(feature.chrom, feature.start, feature.end,
                               name=name, score=score,
                               otherfields=[".", "%d-%d" % (
                               start_del_ins, end_del_ins)]) 
Example #18
Source File: utils.py    From FactorNet with MIT License 5 votes vote down vote up
def get_genome_bed():
    genome_sizes_info = np.loadtxt(genome_sizes_file, dtype=str)
    chroms = list(genome_sizes_info[:,0])
    chroms_sizes = list(genome_sizes_info[:,1].astype(int))
    genome_bed = []
    for chrom, chrom_size in zip(chroms, chroms_sizes):
        genome_bed.append(Interval(chrom, 0, chrom_size))
    genome_bed = BedTool(genome_bed)
    return chroms, chroms_sizes, genome_bed 
Example #19
Source File: generate_final_vcf.py    From metasv with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def build_chr2_ins(feature, thr_top=0.15):
    sc_chr2_str = feature.fields[6]
    if sc_chr2_str == ".":
        return []
    sub_str = map(lambda x: [x.split(";")[0], map(int, x.split(";")[1:])],
                  sc_chr2_str.split(","))
    chr2_dict = {}
    for chr2, poses in sub_str:
        if chr2 not in chr2_dict:
            chr2_dict[chr2] = []
        chr2_dict[chr2].append(poses)

    chr2_dict = {k: [sum(map(lambda x: x[0], v)), min(map(lambda x: x[1], v)),
                     max(map(lambda x: x[2], v))] for k, v in
                 chr2_dict.iteritems()}
    sorted_chr2 = sorted(chr2_dict.items(), key=lambda x: x[1][0], reverse=True)
    n_reads = sum(map(lambda x: x[1][0], sorted_chr2))
    top_chr2s = filter(
        lambda x: x[1][0] > (thr_top * n_reads) and x[0] not in ["-1",
                                                                 feature.chrom],
        sorted_chr2)
    if not top_chr2s:
        return []
    ctx_intervals = []
    for chr2, [cnt, start, end] in top_chr2s:
        ctx_intervals.append(pybedtools.Interval(chr2, start, end,
                                                 name=feature.name,
                                                 score=feature.score))
    return ctx_intervals 
Example #20
Source File: utils.py    From FactorNet with MIT License 5 votes vote down vote up
def data_to_bed(data):
    intervals = []
    for datum in data:
        chrom = datum[0]
        start = datum[1]
        stop = datum[2]
        intervals.append(Interval(chrom, start, stop))
    return BedTool(intervals) 
Example #21
Source File: generate_final_vcf.py    From metasv with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def find_itx(feature, wiggle):
    n = len(feature.fields) / 2
    start_idp1 = feature.start
    end_idp1 = feature.end
    start_idp2 = int(feature.fields[n + 1])
    end_idp2 = int(feature.fields[n + 2])
    dist_ends = [abs(start_idp1 - start_idp2), abs(end_idp1 - end_idp2)]
    if min(dist_ends) > wiggle:
        return None
    del_pos1 = int(feature.fields[6])
    del_pos2 = int(feature.fields[n + 6])
    if abs(del_pos1 - del_pos2) > wiggle:
        return None

    del_interval1 = map(int, feature.fields[7].split("-"))
    del_interval2 = map(int, feature.fields[n + 7].split("-"))
    lr_1 = 1 if abs(del_pos1 - del_interval1[0]) < abs(
        del_pos1 - del_interval1[1]) else 0
    lr_2 = 1 if abs(del_pos2 - del_interval2[0]) < abs(
        del_pos2 - del_interval2[1]) else 0
    if lr_1 == lr_2 or lr_2 < lr_1:
        return None

    del_id_2 = feature.name.split(",")[-1]
    del_filter_2 = feature.score.split(",")[-1]
    name = "%s,%s" % (feature.name, del_id_2)
    score = "%s,%s" % (feature.score, del_filter_2)

    return pybedtools.Interval(feature.chrom, feature.start, feature.end,
                               name=name, score=score,
                               otherfields=["%d" % ((del_pos1 + del_pos2) / 2),
                                            "%d-%d,%d-%d" % (
                                            del_interval1[0], del_interval1[1],
                                            del_interval2[0],
                                            del_interval2[1])]) 
Example #22
Source File: utils.py    From FactorNet with MIT License 5 votes vote down vote up
def make_blacklist():
    blacklist = BedTool(blacklist_file)
    blacklist = blacklist.slop(g=genome_sizes_file, b=L)
    # Add ends of the chromosomes to the blacklist
    genome_sizes_info = np.loadtxt(genome_sizes_file, dtype=str)
    chroms = list(genome_sizes_info[:,0])
    chroms_sizes = list(genome_sizes_info[:,1].astype(int))
    blacklist2 = []
    for chrom, size in zip(chroms, chroms_sizes):
        blacklist2.append(Interval(chrom, 0, L))
        blacklist2.append(Interval(chrom, size - L, size))
    blacklist2 = BedTool(blacklist2)
    blacklist = blacklist.cat(blacklist2)
    return blacklist 
Example #23
Source File: test_extractors.py    From genomelake with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_bigwig_and_intervals():
    bw_path = "tests/data/test_bigwig.bw"
    intervals = [Interval("chr1", 0, 10), Interval("chr2", 0, 10)]
    expected_chr1 = np.array([0.1] * 10, dtype=np.float32)
    expected_chr2 = np.array([0] + [9] * 9, dtype=np.float32)
    expected_data = np.stack([expected_chr1, expected_chr2])

    return (bw_path, intervals, expected_data) 
Example #24
Source File: test_BedDataset.py    From kipoiseq with MIT License 5 votes vote down vote up
def test_bed3(tmpdir):
    bed_file = write_tmp('chr1\t1\t2\nchr1\t1\t3', tmpdir)
    bt = BedDataset(bed_file)
    assert bt.n_tasks == 0
    assert len(bt) == 2
    assert np.all(bt.df[0] == 'chr1')
    assert bt[0] == (Interval("chr1", 1, 2), {})
    assert bt[1] == (Interval("chr1", 1, 3), {}) 
Example #25
Source File: test_BedDataset.py    From kipoiseq with MIT License 5 votes vote down vote up
def test_bed3_labels(tmpdir):
    bed_file = write_tmp('chr1\t1\t2\t1\t0\nchr1\t1\t3\t0\t1', tmpdir)
    bt = BedDataset(bed_file)
    assert np.all(bt.get_targets() == np.array([[1, 0],
                                                [0, 1]]))
    assert len(bt) == 2
    assert bt.n_tasks == 2
    assert np.all(bt.df[0] == 'chr1')
    assert bt[0][0] == Interval("chr1", 1, 2)
    assert np.all(bt[0][1] == np.array([1, 0]))

    assert bt[1][0] == Interval("chr1", 1, 3)
    assert np.all(bt[1][1] == np.array([0, 1]))
    assert len(bt) == 2 
Example #26
Source File: test_BedDataset.py    From kipoiseq with MIT License 5 votes vote down vote up
def test_incl_excl_chromosomes(tmpdir):
    bed_file = write_tmp(
        'chr1\t1\t2\t1\t0\nchr2\t1\t3\t0\t1\nchr3\t1\t3\t0\t1', tmpdir)
    bt = BedDataset(bed_file)
    assert len(bt) == 3

    bt = BedDataset(bed_file, incl_chromosomes=['chr1'])
    assert len(bt) == 1
    assert bt[0][0] == Interval("chr1", 1, 2)

    bt = BedDataset(bed_file, excl_chromosomes=['chr1'])
    assert len(bt) == 2
    assert bt[0][0] == Interval("chr2", 1, 3) 
Example #27
Source File: test_BedDataset.py    From kipoiseq with MIT License 5 votes vote down vote up
def test_tsvreader(tsv_file, num_chr, label_dtype):
    ds = BedDataset(tsv_file, label_dtype=label_dtype, num_chr=num_chr)
    interval, labels = ds[0]
    assert isinstance(interval, Interval)
    if not num_chr:
        assert interval.chrom.startswith("chr")
    assert isinstance(labels[0], label_dtype)
    assert interval.start == 2
    assert interval.end == 4 
Example #28
Source File: dataclasses.py    From kipoiseq with MIT License 5 votes vote down vote up
def from_pybedtools(cls, interval):
        """Create the ranges object from `pybedtools.Interval`

        # Arguments
            interval: `pybedtools.Interval` instance
        """
        return cls(chrom=interval.chrom,
                   start=interval.start,
                   end=interval.stop,
                   name=interval.name,
                   score=interval.score,
                   strand=interval.strand,
                   attrs=dict(interval.attrs or dict())
                   ) 
Example #29
Source File: test_extractors.py    From genomelake with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_fasta_extractor_valid_intervals():
    extractor = FastaExtractor("tests/data/fasta_test.fa")
    intervals = [Interval("chr1", 0, 10), Interval("chr2", 0, 10)]
    expected_data = np.array(
        [
            [
                [1., 0., 0., 0.],
                [0., 1., 0., 0.],
                [0., 1., 0., 0.],
                [0., 0., 1., 0.],
                [0., 0., 0., 1.],
                [1., 0., 0., 0.],
                [0., 1., 0., 0.],
                [0., 1., 0., 0.],
                [0., 0., 1., 0.],
                [0., 0., 0., 1.],
            ],
            [
                [1., 0., 0., 0.],
                [0., 1., 0., 0.],
                [0., 0., 1., 0.],
                [0., 0., 0., 1.],
                [0.25, 0.25, 0.25, 0.25],
                [1., 0., 0., 0.],
                [0., 1., 0., 0.],
                [0., 0., 1., 0.],
                [0., 0., 0., 1.],
                [0.25, 0.25, 0.25, 0.25],
            ],
        ],
        dtype=np.float32,
    )
    data = extractor(intervals)
    assert (data == expected_data).all() 
Example #30
Source File: test_extractors.py    From genomelake with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_fasta_extractor_over_chr_end():
    extractor = FastaExtractor("tests/data/fasta_test.fa")
    intervals = [Interval("chr1", 0, 100), Interval("chr1", 1, 101)]
    with pytest.raises(ValueError):
        data = extractor(intervals)