Python Examples of spacy.tokens.Span

Source File: document.py From neuralcoref with MIT License

6 votes

def __new__(
        cls,
        span,
        mention_index,
        utterance_index,
        utterance_start_sent,
        speaker=None,
        gold_label=None,
        *args,
        **kwargs,
    ):
        # We need to override __new__ see http://cython.readthedocs.io/en/latest/src/userguide/special_methods.html
        obj = spacy.tokens.Span.__new__(
            cls, span.doc, span.start, span.end, *args, **kwargs
        )
        return obj

Source File: helpers.py From errudite with GNU General Public License v2.0

6 votes

def span_to_json(sentence: Span, sid: int = 0) -> List[Dict]:
    '''
    @param  <Span> sentence: sentence in span type
    @return <Dict> json-seralized sentences
    '''
    if sentence is None:
        return None
    j_sentence = [{
        'idx': t.i,
        'text': t.text,
        'ner': t.ent_type_,
        'lemma': t.lemma_,
        'pos': t.pos_,
        'tag': t.tag_,
        'whitespace': t.whitespace_,
        'sid': sid #,
        #'matches': []
        } for t in sentence]
    return j_sentence

Source File: helpers.py From errudite with GNU General Public License v2.0

6 votes

def gen_text_from_sent_list(sentences: List[Span]) -> str:
    '''
    #TODO: to comment
    '''
    return ''.join([s.text + s[-1].whitespace_ for s in sentences])

Source File: helpers.py From errudite with GNU General Public License v2.0

6 votes

def convert_doc(doc: Union[Doc, Span, 'Target'], strict_format: str=None):
    def _strict_doc(doc):
        if not doc:
            return None
        if type(doc) == str:
            return doc
        if strict_format == 'doc':
            return doc if type(doc) == Doc else doc.as_doc()
        if strict_format == 'span':
            return doc if type(doc) == Span else doc[:]
        return doc
    def _convert(doc):
        if type(doc) == str:
            return doc
        if type(doc) == Doc or type(doc) == Span:
            return _strict_doc(doc)
        else:
            return _strict_doc(getattr(doc, 'doc', None))
    if not doc:
        return None
    if type(doc) == list:
        return [ _convert(d) for d in doc ]
    else:
        return _convert(doc)

Source File: context.py From errudite with GNU General Public License v2.0

6 votes

def get_sentence(self, sid: Union[int, List[int]]=0, doc: Doc=None) -> Union[Span, List[Span]]:
        """Query a sentence in a paragraph.
        
        Keyword Arguments:
            sid {Union[int, List[int]]} -- sid the sentence id; or. (default: {None})
        
        Returns:
            Union[Span, List[Span]] -- the sentence
        """
        if doc:
            sentences = list(doc.sents)
        else:
            sentences = list(self.doc.sents)
        if type(sid) == int or type(sid) == float:
            if int(sid) >= 0 and int(sid) < len(sentences):
               return sentences[int(sid)]
        # else if it's an array
        sid = [int(s) for s in sid if s >= 0 and s < len(sentences)]
        if len(sid) > 0:
            filtered = [sentences[s] for s in sid]
            return filtered[0] if len(filtered) == 1 else filtered
        if sentences:
            return sentences[0]
        return None

Source File: spacy_np_annotator.py From nlp-architect with Apache License 2.0

5 votes

def get_noun_phrases(doc: Doc) -> [Span]:
    """
    Get noun phrase tags from a spacy annotated document.

    Args:
        doc (Doc): a spacy type document

    Returns:
        a list of noun phrase Span objects
    """
    assert hasattr(doc._, "noun_phrases"), "no noun_phrase attributes in document"
    return doc._.noun_phrases

Source File: spacy_np_annotator.py From nlp-architect with Apache License 2.0

5 votes

def __call__(self, doc: Doc) -> Doc:
        """
        Annotate the document with noun phrase spans
        """
        spans = []
        doc_vecs = []
        doc_chars = []
        doc_lens = []
        if len(doc) < 1:
            return doc
        for sentence in doc.sents:
            features = self._feature_extractor([t.text for t in sentence])
            if isinstance(features, tuple):
                doc_vec = features[0]
                doc_chars.append(features[1])
            else:
                doc_vec = features
            doc_vecs.append(doc_vec)
            doc_lens.append(len(doc_vec))
        doc_vectors = pad_sentences(np.asarray(doc_vecs))
        inputs = doc_vectors
        if self.char_vocab:
            max_len = doc_vectors.shape[1]
            padded_chars = np.zeros((len(doc_chars), max_len, self.model.max_word_len))
            for idx, d in enumerate(doc_chars):
                d = d[:max_len]
                padded_chars[idx, -d.shape[0] :] = d
            inputs = [inputs, padded_chars]
        np_indexes = self._infer_chunks(inputs, doc_lens)
        for s, e in np_indexes:
            np_span = Span(doc, s, e)
            spans.append(np_span)
        spans = _NPPostprocessor.process(spans)
        set_noun_phrases(doc, spans)
        return doc

Source File: spacy_np_annotator.py From nlp-architect with Apache License 2.0

5 votes

def set_noun_phrases(doc: Doc, nps: [Span]) -> None:
    """
    Set noun phrase tags

    Args:
        doc (Doc): a spacy type document
        nps ([Span]): a list of Spans
    """
    assert hasattr(doc._, "noun_phrases"), "no noun_phrase attributes in document"
    doc._.set("noun_phrases", nps)

Source File: spacy_np_annotator.py From nlp-architect with Apache License 2.0

5 votes

def process(cls, noun_phrases: [Span]) -> [Span]:
        new_phrases = []
        for phrase in noun_phrases:
            p = _NPPostprocessor._phrase_process(phrase)
            if p is not None and len(p) > 0:
                new_phrases.append(p)
        return new_phrases

Source File: markov_engine.py From armchair-expert with MIT License

5 votes

def span_to_bigram(span: Span) -> list:

        grams = []

        for a_idx, a in enumerate(span):
            for b_idx, b in enumerate(span):

                dist = b_idx - a_idx
                if dist == 0:
                    continue

                elif abs(dist) <= MARKOV_WINDOW_SIZE:
                    grams.append([a, b, dist])

        return grams

Source File: spacy_np_annotator.py From nlp-architect with Apache License 2.0

5 votes

def _phrase_process(cls, phrase: Span) -> Span:
        last_phrase = None
        while phrase != last_phrase:
            last_phrase = phrase
            for func_args in post_processing_rules:
                pf = func_args[0]
                args = func_args[1:]
                if len(args) > 0:
                    phrase = pf(phrase, *args)
                else:
                    phrase = pf(phrase)
                if phrase is None:
                    break
        return phrase

Source File: volume_unit_component.py From medaCy with GNU General Public License v3.0

5 votes

def __call__(self, doc):
        nlp = self.nlp
        with doc.retokenize() as retokenizer:
            #match and tag volume units
            matches = self.volume_matcher(doc)
            for match_id, start, end in matches:
                span = Span(doc, start, end, label=nlp.vocab.strings['volume_unit'])
                for token in span:
                    token._.feature_is_volume_unit = True
                if len(span) > 1:
                    retokenizer.merge(span)
                doc.ents = list(doc.ents) + [span]
        return doc

Source File: frequency_unit_component.py From medaCy with GNU General Public License v3.0

5 votes

def __call__(self, doc):
        nlp = self.nlp
        with doc.retokenize() as retokenizer:
            # match and frequency indicators
            matches = self.frequency_matcher(doc)
            for match_id, start, end in matches:
                span = Span(doc, start, end, label=nlp.vocab.strings['frequency_indicator'])
                for token in span:
                    token._.feature_is_frequency_indicator = True
                if len(span) > 1:
                    retokenizer.merge(span)
                doc.ents = list(doc.ents) + [span]
        return doc

Source File: time_unit_component.py From medaCy with GNU General Public License v3.0

5 votes

def __call__(self, doc):
        nlp = self.nlp
        with doc.retokenize() as retokenizer:
            # match and tag time units
            matches = self.time_matcher(doc)
            for match_id, start, end in matches:
                span = Span(doc, start, end, label=nlp.vocab.strings['time_unit'])
                for token in span:
                    token._.feature_is_time_unit = True
                if len(span) > 1:
                    retokenizer.merge(span)
                doc.ents = list(doc.ents) + [span]
        return doc

Source File: measurement_unit_component.py From medaCy with GNU General Public License v3.0

5 votes

def __call__(self, doc):
        nlp = self.nlp
        with doc.retokenize() as retokenizer:
            # match units of measurement (x/y, , etc)
            matches = self.unit_of_measurement_matcher(doc)
            for match_id, start, end in matches:
                span = Span(doc, start, end, label=nlp.vocab.strings['measurement_unit'])
                for token in span:
                    token._.feature_is_measurement_unit = True
                if len(span) > 1:
                    retokenizer.merge(span)
                doc.ents = list(doc.ents) + [span]
        return doc

Source File: lexicon_component.py From medaCy with GNU General Public License v3.0

5 votes

def __call__(self, doc):
        """
        Runs a document through the lexicon component.  Utilizes SpaCy's PhraseMatcher to find spans
        in the doc that match the lexicon and overlays the appropriate label as 'feature_is_label_from_lexicon'
        over all tokens in the span.
        :param doc:
        :return:
        """
        logging.debug("Called Lexicon Component")

        matcher = PhraseMatcher(self.nlp.vocab, max_length=10)
        for label in self.lexicon:
            Token.set_extension('feature_is_' + label + '_from_lexicon', default=False, force=True)
            patterns = [self.nlp.make_doc(term) for term in self.lexicon[label]]
            logging.debug(patterns)
            matcher.add(label, None, *patterns)
        matches = matcher(doc)
        for match_id, start, end in matches:
            span = Span(doc, start, end)
            logging.debug(span)
            if span is not None:
                logging.debug('Lexicon term matched: %s Label: %s' % (span.text, self.nlp.vocab.strings[match_id]))
                for token in span:
                    token._.set('feature_is_' + self.nlp.vocab.strings[match_id] + '_from_lexicon', True)

        return doc

Source File: dataset_reader.py From errudite with GNU General Public License v2.0

5 votes

def _compute_span_info(self, 
        instance: Instance, spans: Span, feature_list: List[str], target: str, info_idxes):
        if target not in instance.entries:
            target_name = f'prediction(model="{target}")'
            target = 'predictions'
        else:
            target_name = target
        if len(list(np.unique(feature_list))) > 2:
            return info_idxes
        span_features = [ get_token_feature(t, feature_list[idx]).strip() for idx, t in enumerate(spans) ]
        if any([not s or s in ["(", ")", ","] for s in span_features]):
            return info_idxes
        if any([ f not in VBs +WHs + NNs and feature_list[idx] == 'tag' for idx, f in enumerate(span_features) ]):
            return info_idxes
        pattern = ' '.join(span_features)
        if pattern not in info_idxes[target]:
            info_idxes[target][pattern] = defaultdict(dict)
            info_idxes[target][pattern]['cover'] = defaultdict(dict)
        if target != 'predictions':
            info_idxes[target][pattern]['cover']['total'][instance.key()] = True
        predictions = instance.get_entry('predictions') or []    
        for prediction in predictions:
            model = prediction.model
            if target == 'predictions': 
                if model not in target_name:
                    continue
                else:
                    info_idxes[target][pattern]['cover'][model][instance.key()] = True      
            if instance.is_incorrect(model):
                info_idxes[target][pattern][model][instance.key()] = True
            #print(model, instance.is_incorrect(model))
            #print(info_idxes[target][pattern][model])
        return info_idxes

Source File: linguistic.py From errudite with GNU General Public License v2.0

5 votes

def STRING(target: Union['Target', Span]) -> str:
    """Get the raw string from a given span or target.
    
    Parameters
    ----------
    target : Union[Target, Span]
        The target to be converted to string.
    
    Returns
    -------
    str
        The string.
    """
    output = ""
    try:
        if not target:
            raise DSLValueError(f"No valid input to [ STRING ]. target: {target}")
        else:
            target = convert_list(target)
            doc = convert_doc(target)[0]
            if 'label' in target[0].__class__.__name__.lower():
                output = target[0].get_label()
            elif doc:
                output = doc.text
    except DSLValueError as e:
        #logger.error(e)
        raise(e)
    except Exception as e:
        ex = Exception(f"Unknown exception from [ STRING ]: {e}")
        #logger.error(ex)
        raise(ex)
    #finally:
    else:
        #pass
        return output

Source File: length.py From errudite with GNU General Public License v2.0

5 votes

def length(
    docs: Union['Target', Span, List[Union['Target', Span]]]) -> int:
    """
    The length of a given span, in tokens.
    If the input is a List, take the min length of all spans in the list.
    
    Parameters
    ----------
    docs : Union[Target, Span, List[Union[Target, Span]]]
        The input doc(s) for computing the length.
    
    Returns
    -------
    int
        The length.
    """
    output = 0
    try:
        def length_(doc):
            return len(convert_doc(doc)) if doc else 0
        if docs is None:
            raise DSLValueError(f"No valid input to [ length ]. input: {docs}")
        elif type(docs) == list and len(docs) > 0:
            output = min([ length_(doc) for doc in docs ]) # convert_token
        else:
            output = length_(docs) # convert_token
    except DSLValueError as e:
        #logger.error(e)
        raise(e)
    except Exception as e:
        #print(f'[is_digit]')
        #traceback.print_exc()
        ex = Exception(f"Unknown exception from [ length ]: {e}")
        #logger.error(ex)
        raise(ex)
    #finally:
    else:
        #pass
        return output

Source File: helpers.py From errudite with GNU General Public License v2.0

5 votes

def spans_to_json(sentences: List[Span]) -> Dict:
    '''
    @param  <Span[]> sentences: sentence in span type
    @return <Dict> json-seralized sentences
    '''
    spans = []
    for sid, sentence in enumerate(sentences):
        spans += span_to_json(sentence, sid=sid)
    return spans

Source File: mq.py From pydata2019-nlp-system with Apache License 2.0

5 votes

def default(self, obj):
        if isinstance(obj, Span): return str(obj)
        return json.JSONEncoder.default(self, obj)

Source File: document.py From neuralcoref with MIT License

5 votes

def exact_match(self, mention2):
        """ Does the Mention lowercase text matches another Mention/Span lowercase text"""
        return self.lower_ == mention2.lower_

Source File: document.py From neuralcoref with MIT License

5 votes

def heads_agree(self, mention2):
        """ Does the root of the Mention match the root of another Mention/Span"""
        # we allow same-type NEs to not match perfectly,
        # but rather one could be included in the other, e.g., "George" -> "George Bush"
        if (
            self.in_entities
            and mention2.in_entities
            and self.entity_label == mention2.entity_label
            and (
                self.root.lower_ in mention2.lower_
                or mention2.root.lower_ in self.lower_
            )
        ):
            return True
        return self.root.lower_ == mention2.root.lower_

Source File: document.py From neuralcoref with MIT License

5 votes

def _get_type(self):
        """ Find the type of the Span """
        conj = ["CC", ","]
        prp = ["PRP", "PRP$"]
        proper = ["NNP", "NNPS"]
        if any(t.tag_ in conj and t.ent_type_ not in ACCEPTED_ENTS for t in self):
            mention_type = MENTION_TYPE["LIST"]
        elif self.root.tag_ in prp:
            mention_type = MENTION_TYPE["PRONOMINAL"]
        elif self.root.ent_type_ in ACCEPTED_ENTS or self.root.tag_ in proper:
            mention_type = MENTION_TYPE["PROPER"]
        else:
            mention_type = MENTION_TYPE["NOMINAL"]
        return mention_type

Source File: document.py From neuralcoref with MIT License

5 votes

def __init__(
        self,
        span,
        mention_index,
        utterance_index,
        utterances_start_sent,
        speaker=None,
        gold_label=None,
    ):
        """
        Arguments:
            span (spaCy Span): the spaCy span from which creating the Mention object
            mention_index (int): index of the Mention in the Document
            utterance_index (int): index of the utterance of the Mention in the Document
            utterances_start_sent (int): index of the first sentence of the utterance of the Mention in the Document
                (an utterance can comprise several sentences)
            speaker (Speaker): the speaker of the mention
            gold_label (anything): a gold label associated to the Mention (for training)
        """
        self.index = mention_index
        self.utterance_index = utterance_index
        self.utterances_sent = utterances_start_sent + self._get_doc_sent_number()
        self.speaker = speaker
        self.gold_label = gold_label
        self.spans_embeddings = None
        self.words_embeddings = None
        self.features = None

        self.features_ = None
        self.spans_embeddings_ = None
        self.words_embeddings_ = None

        self.mention_type = self._get_type()
        self.propers = set(self.content_words)
        self.entity_label = self._get_entity_label()
        self.in_entities = self._get_in_entities()

Source File: token.py From errudite with GNU General Public License v2.0

4 votes

def token(
    docs: Union[Span, 'Target'],
    idxes: Union[int, List[int]]=None,
    pattern: Union[str, List[str]]=None) -> Union[Span, Token]:
    """
    Get a list of tokens from the target based on idxes (sub-list) and 
    pattern. Note that ``idxes`` runs before ``pattern``. 
    That is, if the idxes exist, the pattern filters the idxed doc tokens.
    
    Parameters
    ----------
    docs : Union[Target, Span]
        The doc to be queried.
    idxes : Union[int, List[int]], optional
        Retrive the sub-list of tokens from docs, with idx(es). By default None
    pattern : Union[str, List[str]], optional
        Used to filter and get the sub-list of spans in the doc span list.
        Pattern allows linguistic annotations and automatically detects queries 
        on POS tags and entity types, in ALL CAPS. For example,
        ``(what, which) NOUN)`` may query all docs that have ``what NOUN`` or 
        ``which NOUN``. If a list, then all the patterns in a list are "OR".
        By default None
    
    Returns
    -------
    Union[Span, Token]
        The queried sub-list.
    """
    output = []
    try:
        if not docs:
            raise DSLValueError("No input to [ token ].")
        docs_ = token_idxes(docs, idxes=idxes)
        if pattern:
            output = token_pattern(docs_, pattern)
        else:
            output = docs_
    except DSLValueError as e:
        #logger.error(e)
        raise(e)
    except Exception as e:
        ex = Exception(f"Unknown exception from [ token ]: {e}")
        raise(ex)
        #logger.error(ex)
    #finally:
    else:
        return output

Source File: token.py From errudite with GNU General Public License v2.0

4 votes

def has_pattern(
    docs: Union[Doc, Span, 'Target', List],
    idxes: Union[int, List[int]]=None,
    pattern: Union[str, List[str]]=None) -> bool:
    """
    To determine whether the targeted span contains a certain pattern.
    
    Parameters
    ----------
    docs : Union[Target, Span]
        The doc to be queried.
    idxes : Union[int, List[int]], optional
        Retrive the sub-list of tokens from docs, with idx(es). By default None
    pattern : Union[str, List[str]], optional
        Used to filter and get the sub-list of spans in the doc span list.
        Pattern allows linguistic annotations and automatically detects queries 
        on POS tags and entity types, in ALL CAPS. For example,
        ``(what, which) NOUN)`` may query all docs that have "what NOUN" or 
        "which NOUN". If a list, then all the patterns in a list are "OR".
        By default None
    
    Returns
    -------
    bool
        Whether the span/target has the pattern or not.
    """
    output = False
    try:
        if pattern is None:
            raise DSLValueError(f"[ {pattern} ] is not a valid pattern to [ has_pattern ].")
        else:
            tokens = token(docs, idxes=idxes, pattern=pattern)
            if ( type(docs) == list ):
                output = any([ o and length(o) > 0 for o in tokens ])
            else:
                output = tokens and length(tokens) > 0
    except DSLValueError as e:
        #logger.error(e)
        raise(e)
    except Exception as e:
        ex = Exception(f"Unknown exception from [ has_pattern ]: {e}")
        raise(ex)
        #logger.error(ex)
    #finally:
    else:
        return output

Source File: token.py From errudite with GNU General Public License v2.0

4 votes

def boundary_with (
    docs: Union[Span, 'Target'],
    pattern: Union[str, List[str]], 
    direction: str='start') -> Union[Doc, Span, Token]:
    """
    To determine whether the targeted span contains a certain pattern, at the beginning
    or the end of the doc.

    *When using the DSL parser*, this function can be called in alternative ways, 
    with ``direction`` being automatically filled in: 
    ``[starts_with|ends_with](...)``.
    
    Parameters
    ----------
    docs : Union[Target, Span]
        The doc to be queried.
    pattern : Union[str, List[str]]
        The same as in `has_pattern`.
    direction : str
        Either to test the "start" or the "end" of the doc.
    
    Returns
    -------
    bool
        Whether the span/target starts/ends with the pattern or not.
    """
    output = False
    try:
        if pattern is None:
            raise DSLValueError(f"[ {pattern} ] is not a valid pattern to [ boundary_with ].")
        pattern = convert_list(pattern)
        pattern_arr = merge_list([
            parse_cmd(p).gen_pattern_list() for p in pattern])
        if type(pattern_arr) in [ list, tuple ]:
            while type(pattern_arr[0]) in [ list, tuple ]:
                pattern_arr = pattern_arr[0]
        idx_length = len(pattern_arr)
        if direction == 'start':
            idxes = [ 0, idx_length ]
        else:
            idxes = [ -idx_length-1, 0 ] # to also cover the ednding cmd
        output = has_pattern(docs, idxes=idxes, pattern=pattern)
    except DSLValueError as e:
        #logger.error(e)
        raise(e)
    except Exception as e:
        ex = Exception(f"Unknown exception from [ {direction}s_with ]: {e}")
        #logger.error(ex)
        raise(ex)
    else:
        return output

Source File: token.py From errudite with GNU General Public License v2.0

4 votes

def token_pattern(
    docs: Union[Doc, Span, 'Target', List],
    pattern: Union[str, List[str]]) -> bool:
    output = []
    try:
        global CUR_SAVED_RULE
        if not pattern: # special case: just return everything
            output = docs
        else:
            if not docs:
                raise DSLValueError("No given doc to [ token_pattern ].")
            docs = convert_list(convert_doc(docs, strict_format='doc'))
            pattern = convert_list(pattern)
            pattern_key = 'pattern' + '::'.join(pattern)
            if pattern_key != CUR_SAVED_RULE:
                # define a matcher only when it's not the same rule currently used.
                patterns = merge_list([
                    parse_cmd(p).gen_pattern_list() for p in pattern])
                if patterns:
                    if 'matcher' in matcher:
                        matcher.remove('matcher')
                    matcher.add('matcher', None, *patterns)
                    CUR_SAVED_RULE = pattern_key
            returned_spans = []
            for doc in docs:
                for _, start, end in matcher(doc):
                    returned_spans.append(doc[start:end])
            if len(returned_spans) == 1:
                output = returned_spans[0]
            if not returned_spans:
                pass
                #raise DSLValueError(f"No match found for {pattern} in {docs}.")
            else:
                output = returned_spans
    except DSLValueError as e:
        #logger.error(e)
        raise(e)
    except Exception as e:
        ex = Exception(f"Unknown exception from [ token_pattern ]: {e}")
        #logger.error(ex)
        raise(ex)
    #finally:
    else:
        return output

Source File: get_sentence.py From errudite with GNU General Public License v2.0

4 votes

def sentence(
    answer: 'QAAnswer', context: 'Context', 
    shift: Union[int, List[int]]=0) -> Span:
    """
    *Machine Comprehension only* Get the sentence that contains a given answer. 
    Shift indicates if neighboring sentences should be included.
    
    Parameters
    ----------
    answer : QAAnswer
        The selected answer.
    context : Context
        The context target of a given instance.
        *Automatically filled in when using the DSL parser.*
    shift : Union[int, List[int]], optional
        Shift indicates if neighboring sentences should be included,  by default 0
        If ``shift==0``, then the actual sentence is returned; 
        if ``shift==[-2,-1,1,2]``, then the four sentences surrounding the answer sentence are returned.
    
    Returns
    -------
    Span
        The selected sentence that contains the answer.
    """
    output = None
    try:
        if not context or context.__class__.__name__ != "Context":
            raise DSLValueError(f"Cannot retrive the sentence, due to invalid context: [ {context} ].")
        if not answer or \
            not ("Answer" in answer.__class__.__name__ or \
            (type(answer) == list and "Answer"  in answer[0].__class__.__name__)):
            raise DSLValueError(f"Cannot retrive the sentence, due to invalid answer: [ {answer} ].")
        # only getting one sentence
        if type(answer) != list and type(shift) != list:
            output = context.get_sentence(answer.sid)
        # multiple sentences. Convert both into list
        answer = convert_list(answer)
        shift = convert_list(shift)
        sids = []
        for a in answer:
            sids += [a.sid + r for r in shift ]
        sids = np.unique(sids)
        output = context.get_sentence(sids)
    except DSLValueError as e:
        #logger.error(e)
        raise(e)
    except Exception as e:
        #print(f'[is_digit]')
        #traceback.print_exc()
        ex = Exception(f"Unknown exception from [ sentence ]: {e}")
        #logger.error(ex)
        raise(ex)
    else:
        #pass
        return output

Python spacy.tokens.Span() Examples