Python spacy.tokens.Span() Examples

The following are 30 code examples of spacy.tokens.Span(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module spacy.tokens , or try the search function .
Example #1
Source File: document.py    From neuralcoref with MIT License 6 votes vote down vote up
def __new__(
        cls,
        span,
        mention_index,
        utterance_index,
        utterance_start_sent,
        speaker=None,
        gold_label=None,
        *args,
        **kwargs,
    ):
        # We need to override __new__ see http://cython.readthedocs.io/en/latest/src/userguide/special_methods.html
        obj = spacy.tokens.Span.__new__(
            cls, span.doc, span.start, span.end, *args, **kwargs
        )
        return obj 
Example #2
Source File: helpers.py    From errudite with GNU General Public License v2.0 6 votes vote down vote up
def span_to_json(sentence: Span, sid: int = 0) -> List[Dict]:
    '''
    @param  <Span> sentence: sentence in span type
    @return <Dict> json-seralized sentences
    '''
    if sentence is None:
        return None
    j_sentence = [{
        'idx': t.i,
        'text': t.text,
        'ner': t.ent_type_,
        'lemma': t.lemma_,
        'pos': t.pos_,
        'tag': t.tag_,
        'whitespace': t.whitespace_,
        'sid': sid #,
        #'matches': []
        } for t in sentence]
    return j_sentence 
Example #3
Source File: helpers.py    From errudite with GNU General Public License v2.0 6 votes vote down vote up
def gen_text_from_sent_list(sentences: List[Span]) -> str:
    '''
    #TODO: to comment
    '''
    return ''.join([s.text + s[-1].whitespace_ for s in sentences]) 
Example #4
Source File: helpers.py    From errudite with GNU General Public License v2.0 6 votes vote down vote up
def convert_doc(doc: Union[Doc, Span, 'Target'], strict_format: str=None):
    def _strict_doc(doc):
        if not doc:
            return None
        if type(doc) == str:
            return doc
        if strict_format == 'doc':
            return doc if type(doc) == Doc else doc.as_doc()
        if strict_format == 'span':
            return doc if type(doc) == Span else doc[:]
        return doc
    def _convert(doc):
        if type(doc) == str:
            return doc
        if type(doc) == Doc or type(doc) == Span:
            return _strict_doc(doc)
        else:
            return _strict_doc(getattr(doc, 'doc', None))
    if not doc:
        return None
    if type(doc) == list:
        return [ _convert(d) for d in doc ]
    else:
        return _convert(doc) 
Example #5
Source File: context.py    From errudite with GNU General Public License v2.0 6 votes vote down vote up
def get_sentence(self, sid: Union[int, List[int]]=0, doc: Doc=None) -> Union[Span, List[Span]]:
        """Query a sentence in a paragraph.
        
        Keyword Arguments:
            sid {Union[int, List[int]]} -- sid the sentence id; or. (default: {None})
        
        Returns:
            Union[Span, List[Span]] -- the sentence
        """
        if doc:
            sentences = list(doc.sents)
        else:
            sentences = list(self.doc.sents)
        if type(sid) == int or type(sid) == float:
            if int(sid) >= 0 and int(sid) < len(sentences):
               return sentences[int(sid)]
        # else if it's an array
        sid = [int(s) for s in sid if s >= 0 and s < len(sentences)]
        if len(sid) > 0:
            filtered = [sentences[s] for s in sid]
            return filtered[0] if len(filtered) == 1 else filtered
        if sentences:
            return sentences[0]
        return None 
Example #6
Source File: spacy_np_annotator.py    From nlp-architect with Apache License 2.0 5 votes vote down vote up
def get_noun_phrases(doc: Doc) -> [Span]:
    """
    Get noun phrase tags from a spacy annotated document.

    Args:
        doc (Doc): a spacy type document

    Returns:
        a list of noun phrase Span objects
    """
    assert hasattr(doc._, "noun_phrases"), "no noun_phrase attributes in document"
    return doc._.noun_phrases 
Example #7
Source File: spacy_np_annotator.py    From nlp-architect with Apache License 2.0 5 votes vote down vote up
def __call__(self, doc: Doc) -> Doc:
        """
        Annotate the document with noun phrase spans
        """
        spans = []
        doc_vecs = []
        doc_chars = []
        doc_lens = []
        if len(doc) < 1:
            return doc
        for sentence in doc.sents:
            features = self._feature_extractor([t.text for t in sentence])
            if isinstance(features, tuple):
                doc_vec = features[0]
                doc_chars.append(features[1])
            else:
                doc_vec = features
            doc_vecs.append(doc_vec)
            doc_lens.append(len(doc_vec))
        doc_vectors = pad_sentences(np.asarray(doc_vecs))
        inputs = doc_vectors
        if self.char_vocab:
            max_len = doc_vectors.shape[1]
            padded_chars = np.zeros((len(doc_chars), max_len, self.model.max_word_len))
            for idx, d in enumerate(doc_chars):
                d = d[:max_len]
                padded_chars[idx, -d.shape[0] :] = d
            inputs = [inputs, padded_chars]
        np_indexes = self._infer_chunks(inputs, doc_lens)
        for s, e in np_indexes:
            np_span = Span(doc, s, e)
            spans.append(np_span)
        spans = _NPPostprocessor.process(spans)
        set_noun_phrases(doc, spans)
        return doc 
Example #8
Source File: spacy_np_annotator.py    From nlp-architect with Apache License 2.0 5 votes vote down vote up
def set_noun_phrases(doc: Doc, nps: [Span]) -> None:
    """
    Set noun phrase tags

    Args:
        doc (Doc): a spacy type document
        nps ([Span]): a list of Spans
    """
    assert hasattr(doc._, "noun_phrases"), "no noun_phrase attributes in document"
    doc._.set("noun_phrases", nps) 
Example #9
Source File: spacy_np_annotator.py    From nlp-architect with Apache License 2.0 5 votes vote down vote up
def process(cls, noun_phrases: [Span]) -> [Span]:
        new_phrases = []
        for phrase in noun_phrases:
            p = _NPPostprocessor._phrase_process(phrase)
            if p is not None and len(p) > 0:
                new_phrases.append(p)
        return new_phrases 
Example #10
Source File: markov_engine.py    From armchair-expert with MIT License 5 votes vote down vote up
def span_to_bigram(span: Span) -> list:

        grams = []

        for a_idx, a in enumerate(span):
            for b_idx, b in enumerate(span):

                dist = b_idx - a_idx
                if dist == 0:
                    continue

                elif abs(dist) <= MARKOV_WINDOW_SIZE:
                    grams.append([a, b, dist])

        return grams 
Example #11
Source File: spacy_np_annotator.py    From nlp-architect with Apache License 2.0 5 votes vote down vote up
def _phrase_process(cls, phrase: Span) -> Span:
        last_phrase = None
        while phrase != last_phrase:
            last_phrase = phrase
            for func_args in post_processing_rules:
                pf = func_args[0]
                args = func_args[1:]
                if len(args) > 0:
                    phrase = pf(phrase, *args)
                else:
                    phrase = pf(phrase)
                if phrase is None:
                    break
        return phrase 
Example #12
Source File: volume_unit_component.py    From medaCy with GNU General Public License v3.0 5 votes vote down vote up
def __call__(self, doc):
        nlp = self.nlp
        with doc.retokenize() as retokenizer:
            #match and tag volume units
            matches = self.volume_matcher(doc)
            for match_id, start, end in matches:
                span = Span(doc, start, end, label=nlp.vocab.strings['volume_unit'])
                for token in span:
                    token._.feature_is_volume_unit = True
                if len(span) > 1:
                    retokenizer.merge(span)
                doc.ents = list(doc.ents) + [span]
        return doc 
Example #13
Source File: frequency_unit_component.py    From medaCy with GNU General Public License v3.0 5 votes vote down vote up
def __call__(self, doc):
        nlp = self.nlp
        with doc.retokenize() as retokenizer:
            # match and frequency indicators
            matches = self.frequency_matcher(doc)
            for match_id, start, end in matches:
                span = Span(doc, start, end, label=nlp.vocab.strings['frequency_indicator'])
                for token in span:
                    token._.feature_is_frequency_indicator = True
                if len(span) > 1:
                    retokenizer.merge(span)
                doc.ents = list(doc.ents) + [span]
        return doc 
Example #14
Source File: time_unit_component.py    From medaCy with GNU General Public License v3.0 5 votes vote down vote up
def __call__(self, doc):
        nlp = self.nlp
        with doc.retokenize() as retokenizer:
            # match and tag time units
            matches = self.time_matcher(doc)
            for match_id, start, end in matches:
                span = Span(doc, start, end, label=nlp.vocab.strings['time_unit'])
                for token in span:
                    token._.feature_is_time_unit = True
                if len(span) > 1:
                    retokenizer.merge(span)
                doc.ents = list(doc.ents) + [span]
        return doc 
Example #15
Source File: measurement_unit_component.py    From medaCy with GNU General Public License v3.0 5 votes vote down vote up
def __call__(self, doc):
        nlp = self.nlp
        with doc.retokenize() as retokenizer:
            # match units of measurement (x/y, , etc)
            matches = self.unit_of_measurement_matcher(doc)
            for match_id, start, end in matches:
                span = Span(doc, start, end, label=nlp.vocab.strings['measurement_unit'])
                for token in span:
                    token._.feature_is_measurement_unit = True
                if len(span) > 1:
                    retokenizer.merge(span)
                doc.ents = list(doc.ents) + [span]
        return doc 
Example #16
Source File: lexicon_component.py    From medaCy with GNU General Public License v3.0 5 votes vote down vote up
def __call__(self, doc):
        """
        Runs a document through the lexicon component.  Utilizes SpaCy's PhraseMatcher to find spans
        in the doc that match the lexicon and overlays the appropriate label as 'feature_is_label_from_lexicon'
        over all tokens in the span.
        :param doc:
        :return:
        """
        logging.debug("Called Lexicon Component")

        matcher = PhraseMatcher(self.nlp.vocab, max_length=10)
        for label in self.lexicon:
            Token.set_extension('feature_is_' + label + '_from_lexicon', default=False, force=True)
            patterns = [self.nlp.make_doc(term) for term in self.lexicon[label]]
            logging.debug(patterns)
            matcher.add(label, None, *patterns)
        matches = matcher(doc)
        for match_id, start, end in matches:
            span = Span(doc, start, end)
            logging.debug(span)
            if span is not None:
                logging.debug('Lexicon term matched: %s Label: %s' % (span.text, self.nlp.vocab.strings[match_id]))
                for token in span:
                    token._.set('feature_is_' + self.nlp.vocab.strings[match_id] + '_from_lexicon', True)

        return doc 
Example #17
Source File: dataset_reader.py    From errudite with GNU General Public License v2.0 5 votes vote down vote up
def _compute_span_info(self, 
        instance: Instance, spans: Span, feature_list: List[str], target: str, info_idxes):
        if target not in instance.entries:
            target_name = f'prediction(model="{target}")'
            target = 'predictions'
        else:
            target_name = target
        if len(list(np.unique(feature_list))) > 2:
            return info_idxes
        span_features = [ get_token_feature(t, feature_list[idx]).strip() for idx, t in enumerate(spans) ]
        if any([not s or s in ["(", ")", ","] for s in span_features]):
            return info_idxes
        if any([ f not in VBs +WHs + NNs and feature_list[idx] == 'tag' for idx, f in enumerate(span_features) ]):
            return info_idxes
        pattern = ' '.join(span_features)
        if pattern not in info_idxes[target]:
            info_idxes[target][pattern] = defaultdict(dict)
            info_idxes[target][pattern]['cover'] = defaultdict(dict)
        if target != 'predictions':
            info_idxes[target][pattern]['cover']['total'][instance.key()] = True
        predictions = instance.get_entry('predictions') or []    
        for prediction in predictions:
            model = prediction.model
            if target == 'predictions': 
                if model not in target_name:
                    continue
                else:
                    info_idxes[target][pattern]['cover'][model][instance.key()] = True      
            if instance.is_incorrect(model):
                info_idxes[target][pattern][model][instance.key()] = True
            #print(model, instance.is_incorrect(model))
            #print(info_idxes[target][pattern][model])
        return info_idxes 
Example #18
Source File: linguistic.py    From errudite with GNU General Public License v2.0 5 votes vote down vote up
def STRING(target: Union['Target', Span]) -> str:
    """Get the raw string from a given span or target.
    
    Parameters
    ----------
    target : Union[Target, Span]
        The target to be converted to string.
    
    Returns
    -------
    str
        The string.
    """
    output = ""
    try:
        if not target:
            raise DSLValueError(f"No valid input to [ STRING ]. target: {target}")
        else:
            target = convert_list(target)
            doc = convert_doc(target)[0]
            if 'label' in target[0].__class__.__name__.lower():
                output = target[0].get_label()
            elif doc:
                output = doc.text
    except DSLValueError as e:
        #logger.error(e)
        raise(e)
    except Exception as e:
        ex = Exception(f"Unknown exception from [ STRING ]: {e}")
        #logger.error(ex)
        raise(ex)
    #finally:
    else:
        #pass
        return output 
Example #19
Source File: length.py    From errudite with GNU General Public License v2.0 5 votes vote down vote up
def length(
    docs: Union['Target', Span, List[Union['Target', Span]]]) -> int:
    """
    The length of a given span, in tokens.
    If the input is a List, take the min length of all spans in the list.
    
    Parameters
    ----------
    docs : Union[Target, Span, List[Union[Target, Span]]]
        The input doc(s) for computing the length.
    
    Returns
    -------
    int
        The length.
    """
    output = 0
    try:
        def length_(doc):
            return len(convert_doc(doc)) if doc else 0
        if docs is None:
            raise DSLValueError(f"No valid input to [ length ]. input: {docs}")
        elif type(docs) == list and len(docs) > 0:
            output = min([ length_(doc) for doc in docs ]) # convert_token
        else:
            output = length_(docs) # convert_token
    except DSLValueError as e:
        #logger.error(e)
        raise(e)
    except Exception as e:
        #print(f'[is_digit]')
        #traceback.print_exc()
        ex = Exception(f"Unknown exception from [ length ]: {e}")
        #logger.error(ex)
        raise(ex)
    #finally:
    else:
        #pass
        return output 
Example #20
Source File: helpers.py    From errudite with GNU General Public License v2.0 5 votes vote down vote up
def spans_to_json(sentences: List[Span]) -> Dict:
    '''
    @param  <Span[]> sentences: sentence in span type
    @return <Dict> json-seralized sentences
    '''
    spans = []
    for sid, sentence in enumerate(sentences):
        spans += span_to_json(sentence, sid=sid)
    return spans 
Example #21
Source File: mq.py    From pydata2019-nlp-system with Apache License 2.0 5 votes vote down vote up
def default(self, obj):
        if isinstance(obj, Span): return str(obj)
        return json.JSONEncoder.default(self, obj) 
Example #22
Source File: document.py    From neuralcoref with MIT License 5 votes vote down vote up
def exact_match(self, mention2):
        """ Does the Mention lowercase text matches another Mention/Span lowercase text"""
        return self.lower_ == mention2.lower_ 
Example #23
Source File: document.py    From neuralcoref with MIT License 5 votes vote down vote up
def heads_agree(self, mention2):
        """ Does the root of the Mention match the root of another Mention/Span"""
        # we allow same-type NEs to not match perfectly,
        # but rather one could be included in the other, e.g., "George" -> "George Bush"
        if (
            self.in_entities
            and mention2.in_entities
            and self.entity_label == mention2.entity_label
            and (
                self.root.lower_ in mention2.lower_
                or mention2.root.lower_ in self.lower_
            )
        ):
            return True
        return self.root.lower_ == mention2.root.lower_ 
Example #24
Source File: document.py    From neuralcoref with MIT License 5 votes vote down vote up
def _get_type(self):
        """ Find the type of the Span """
        conj = ["CC", ","]
        prp = ["PRP", "PRP$"]
        proper = ["NNP", "NNPS"]
        if any(t.tag_ in conj and t.ent_type_ not in ACCEPTED_ENTS for t in self):
            mention_type = MENTION_TYPE["LIST"]
        elif self.root.tag_ in prp:
            mention_type = MENTION_TYPE["PRONOMINAL"]
        elif self.root.ent_type_ in ACCEPTED_ENTS or self.root.tag_ in proper:
            mention_type = MENTION_TYPE["PROPER"]
        else:
            mention_type = MENTION_TYPE["NOMINAL"]
        return mention_type 
Example #25
Source File: document.py    From neuralcoref with MIT License 5 votes vote down vote up
def __init__(
        self,
        span,
        mention_index,
        utterance_index,
        utterances_start_sent,
        speaker=None,
        gold_label=None,
    ):
        """
        Arguments:
            span (spaCy Span): the spaCy span from which creating the Mention object
            mention_index (int): index of the Mention in the Document
            utterance_index (int): index of the utterance of the Mention in the Document
            utterances_start_sent (int): index of the first sentence of the utterance of the Mention in the Document
                (an utterance can comprise several sentences)
            speaker (Speaker): the speaker of the mention
            gold_label (anything): a gold label associated to the Mention (for training)
        """
        self.index = mention_index
        self.utterance_index = utterance_index
        self.utterances_sent = utterances_start_sent + self._get_doc_sent_number()
        self.speaker = speaker
        self.gold_label = gold_label
        self.spans_embeddings = None
        self.words_embeddings = None
        self.features = None

        self.features_ = None
        self.spans_embeddings_ = None
        self.words_embeddings_ = None

        self.mention_type = self._get_type()
        self.propers = set(self.content_words)
        self.entity_label = self._get_entity_label()
        self.in_entities = self._get_in_entities() 
Example #26
Source File: token.py    From errudite with GNU General Public License v2.0 4 votes vote down vote up
def token(
    docs: Union[Span, 'Target'],
    idxes: Union[int, List[int]]=None,
    pattern: Union[str, List[str]]=None) -> Union[Span, Token]:
    """
    Get a list of tokens from the target based on idxes (sub-list) and 
    pattern. Note that ``idxes`` runs before ``pattern``. 
    That is, if the idxes exist, the pattern filters the idxed doc tokens.
    
    Parameters
    ----------
    docs : Union[Target, Span]
        The doc to be queried.
    idxes : Union[int, List[int]], optional
        Retrive the sub-list of tokens from docs, with idx(es). By default None
    pattern : Union[str, List[str]], optional
        Used to filter and get the sub-list of spans in the doc span list.
        Pattern allows linguistic annotations and automatically detects queries 
        on POS tags and entity types, in ALL CAPS. For example,
        ``(what, which) NOUN)`` may query all docs that have ``what NOUN`` or 
        ``which NOUN``. If a list, then all the patterns in a list are "OR".
        By default None
    
    Returns
    -------
    Union[Span, Token]
        The queried sub-list.
    """
    output = []
    try:
        if not docs:
            raise DSLValueError("No input to [ token ].")
        docs_ = token_idxes(docs, idxes=idxes)
        if pattern:
            output = token_pattern(docs_, pattern)
        else:
            output = docs_
    except DSLValueError as e:
        #logger.error(e)
        raise(e)
    except Exception as e:
        ex = Exception(f"Unknown exception from [ token ]: {e}")
        raise(ex)
        #logger.error(ex)
    #finally:
    else:
        return output 
Example #27
Source File: token.py    From errudite with GNU General Public License v2.0 4 votes vote down vote up
def has_pattern(
    docs: Union[Doc, Span, 'Target', List],
    idxes: Union[int, List[int]]=None,
    pattern: Union[str, List[str]]=None) -> bool:
    """
    To determine whether the targeted span contains a certain pattern.
    
    Parameters
    ----------
    docs : Union[Target, Span]
        The doc to be queried.
    idxes : Union[int, List[int]], optional
        Retrive the sub-list of tokens from docs, with idx(es). By default None
    pattern : Union[str, List[str]], optional
        Used to filter and get the sub-list of spans in the doc span list.
        Pattern allows linguistic annotations and automatically detects queries 
        on POS tags and entity types, in ALL CAPS. For example,
        ``(what, which) NOUN)`` may query all docs that have "what NOUN" or 
        "which NOUN". If a list, then all the patterns in a list are "OR".
        By default None
    
    Returns
    -------
    bool
        Whether the span/target has the pattern or not.
    """
    output = False
    try:
        if pattern is None:
            raise DSLValueError(f"[ {pattern} ] is not a valid pattern to [ has_pattern ].")
        else:
            tokens = token(docs, idxes=idxes, pattern=pattern)
            if ( type(docs) == list ):
                output = any([ o and length(o) > 0 for o in tokens ])
            else:
                output = tokens and length(tokens) > 0
    except DSLValueError as e:
        #logger.error(e)
        raise(e)
    except Exception as e:
        ex = Exception(f"Unknown exception from [ has_pattern ]: {e}")
        raise(ex)
        #logger.error(ex)
    #finally:
    else:
        return output 
Example #28
Source File: token.py    From errudite with GNU General Public License v2.0 4 votes vote down vote up
def boundary_with (
    docs: Union[Span, 'Target'],
    pattern: Union[str, List[str]], 
    direction: str='start') -> Union[Doc, Span, Token]:
    """
    To determine whether the targeted span contains a certain pattern, at the beginning
    or the end of the doc.

    *When using the DSL parser*, this function can be called in alternative ways, 
    with ``direction`` being automatically filled in: 
    ``[starts_with|ends_with](...)``.
    
    Parameters
    ----------
    docs : Union[Target, Span]
        The doc to be queried.
    pattern : Union[str, List[str]]
        The same as in `has_pattern`.
    direction : str
        Either to test the "start" or the "end" of the doc.
    
    Returns
    -------
    bool
        Whether the span/target starts/ends with the pattern or not.
    """
    output = False
    try:
        if pattern is None:
            raise DSLValueError(f"[ {pattern} ] is not a valid pattern to [ boundary_with ].")
        pattern = convert_list(pattern)
        pattern_arr = merge_list([
            parse_cmd(p).gen_pattern_list() for p in pattern])
        if type(pattern_arr) in [ list, tuple ]:
            while type(pattern_arr[0]) in [ list, tuple ]:
                pattern_arr = pattern_arr[0]
        idx_length = len(pattern_arr)
        if direction == 'start':
            idxes = [ 0, idx_length ]
        else:
            idxes = [ -idx_length-1, 0 ] # to also cover the ednding cmd
        output = has_pattern(docs, idxes=idxes, pattern=pattern)
    except DSLValueError as e:
        #logger.error(e)
        raise(e)
    except Exception as e:
        ex = Exception(f"Unknown exception from [ {direction}s_with ]: {e}")
        #logger.error(ex)
        raise(ex)
    else:
        return output 
Example #29
Source File: token.py    From errudite with GNU General Public License v2.0 4 votes vote down vote up
def token_pattern(
    docs: Union[Doc, Span, 'Target', List],
    pattern: Union[str, List[str]]) -> bool:
    output = []
    try:
        global CUR_SAVED_RULE
        if not pattern: # special case: just return everything
            output = docs
        else:
            if not docs:
                raise DSLValueError("No given doc to [ token_pattern ].")
            docs = convert_list(convert_doc(docs, strict_format='doc'))
            pattern = convert_list(pattern)
            pattern_key = 'pattern' + '::'.join(pattern)
            if pattern_key != CUR_SAVED_RULE:
                # define a matcher only when it's not the same rule currently used.
                patterns = merge_list([
                    parse_cmd(p).gen_pattern_list() for p in pattern])
                if patterns:
                    if 'matcher' in matcher:
                        matcher.remove('matcher')
                    matcher.add('matcher', None, *patterns)
                    CUR_SAVED_RULE = pattern_key
            returned_spans = []
            for doc in docs:
                for _, start, end in matcher(doc):
                    returned_spans.append(doc[start:end])
            if len(returned_spans) == 1:
                output = returned_spans[0]
            if not returned_spans:
                pass
                #raise DSLValueError(f"No match found for {pattern} in {docs}.")
            else:
                output = returned_spans
    except DSLValueError as e:
        #logger.error(e)
        raise(e)
    except Exception as e:
        ex = Exception(f"Unknown exception from [ token_pattern ]: {e}")
        #logger.error(ex)
        raise(ex)
    #finally:
    else:
        return output 
Example #30
Source File: get_sentence.py    From errudite with GNU General Public License v2.0 4 votes vote down vote up
def sentence(
    answer: 'QAAnswer', context: 'Context', 
    shift: Union[int, List[int]]=0) -> Span:
    """
    *Machine Comprehension only* Get the sentence that contains a given answer. 
    Shift indicates if neighboring sentences should be included.
    
    Parameters
    ----------
    answer : QAAnswer
        The selected answer.
    context : Context
        The context target of a given instance.
        *Automatically filled in when using the DSL parser.*
    shift : Union[int, List[int]], optional
        Shift indicates if neighboring sentences should be included,  by default 0
        If ``shift==0``, then the actual sentence is returned; 
        if ``shift==[-2,-1,1,2]``, then the four sentences surrounding the answer sentence are returned.
    
    Returns
    -------
    Span
        The selected sentence that contains the answer.
    """
    output = None
    try:
        if not context or context.__class__.__name__ != "Context":
            raise DSLValueError(f"Cannot retrive the sentence, due to invalid context: [ {context} ].")
        if not answer or \
            not ("Answer" in answer.__class__.__name__ or \
            (type(answer) == list and "Answer"  in answer[0].__class__.__name__)):
            raise DSLValueError(f"Cannot retrive the sentence, due to invalid answer: [ {answer} ].")
        # only getting one sentence
        if type(answer) != list and type(shift) != list:
            output = context.get_sentence(answer.sid)
        # multiple sentences. Convert both into list
        answer = convert_list(answer)
        shift = convert_list(shift)
        sids = []
        for a in answer:
            sids += [a.sid + r for r in shift ]
        sids = np.unique(sids)
        output = context.get_sentence(sids)
    except DSLValueError as e:
        #logger.error(e)
        raise(e)
    except Exception as e:
        #print(f'[is_digit]')
        #traceback.print_exc()
        ex = Exception(f"Unknown exception from [ sentence ]: {e}")
        #logger.error(ex)
        raise(ex)
    else:
        #pass
        return output