Python spacy.matcher() Examples

The following are 10 code examples of spacy.matcher(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module spacy , or try the search function

Example #1

Source File: ecommerce_preprocess.py From DeepPavlov with Apache License 2.0

7 votes

def __init__(self, spacy_model: str = 'en_core_web_sm', disable: Optional[Iterable[str]] = None, **kwargs):
        if disable is None:
            disable = ['parser', 'ner']

        self.model = _try_load_spacy_model(spacy_model, disable=disable)

        below = lambda text: bool(re.compile(r'below|cheap').match(text))
        BELOW = self.model.vocab.add_flag(below)

        above = lambda text: bool(re.compile(r'above|start').match(text))
        ABOVE = self.model.vocab.add_flag(above)

        self.matcher = Matcher(self.model.vocab)

        self.matcher.add('below', None, [{BELOW: True}, {'LOWER': 'than', 'OP': '?'},
                                         {'LOWER': 'from', 'OP': '?'}, {'ORTH': '$', 'OP': '?'},
                                         {'ENT_TYPE': 'MONEY', 'LIKE_NUM': True}])

        self.matcher.add('above', None, [{ABOVE: True}, {'LOWER': 'than', 'OP': '?'},
                                         {'LOWER': 'from', 'OP': '?'}, {'ORTH': '$', 'OP': '?'},
                                         {'ENT_TYPE': 'MONEY', 'LIKE_NUM': True}])

Example #2

Source File: grounding_concepts.py From KagNet with MIT License

6 votes

def match_mentioned_concepts(nlp, sents, answers, batch_id = -1):
    matcher = load_matcher(nlp)

    res = []
    # print("Begin matching concepts.")
    for sid, s in tqdm(enumerate(sents), total=len(sents), desc="grounding batch_id:%d"%batch_id):
        a = answers[sid]
        all_concepts = ground_mentioned_concepts(nlp, matcher, s, a)
        answer_concepts = ground_mentioned_concepts(nlp, matcher, a)
        question_concepts = all_concepts - answer_concepts
        if len(question_concepts)==0:
            # print(s)
            question_concepts = hard_ground(nlp, s) # not very possible
        if len(answer_concepts)==0:
            print(a)
            answer_concepts = hard_ground(nlp, a) # some case
            print(answer_concepts)

        res.append({"sent": s, "ans": a, "qc": list(question_concepts), "ac": list(answer_concepts)})
    return res

Example #3

Source File: doc.py From textpipe with MIT License

5 votes

def match(self, matcher):
        """
        Run a SpaCy matcher over the cleaned content

        >>> import spacy.matcher
        >>> from textpipe.doc import Doc
        >>> matcher = spacy.matcher.Matcher(spacy.lang.en.English().vocab)
        >>> matcher.add('HASHTAG', None, [{'ORTH': '#'}, {'IS_ASCII': True}])
        >>> Doc('Test with #hashtag').match(matcher)
        [('#hashtag', 'HASHTAG')]
        """
        return [(self._spacy_doc[start:end].text, matcher.vocab.strings[match_id])
                for match_id, start, end in matcher(self._spacy_doc)]

Example #4

Source File: question_rules.py From squash-generation with MIT License

5 votes

def judgemental(question):
    matcher = PhraseMatcher(nlp.vocab)
    matcher.add('JUDGEMENT', None, nlp('your'), nlp('you'), nlp('Your'), nlp('You'))
    return len(matcher(question)) > 0

Example #5

Source File: question_rules.py From squash-generation with MIT License

5 votes

def judgemental(question):
    matcher = PhraseMatcher(nlp.vocab)
    matcher.add('JUDGEMENT', None, nlp('your'), nlp('you'), nlp('Your'), nlp('You'))
    return len(matcher(question)) > 0

Example #6

Source File: utils.py From ResumeParser with MIT License

5 votes

def extract_name(nlp_text, matcher):
    '''
    Helper function to extract name from spacy nlp text

    :param nlp_text: object of `spacy.tokens.doc.Doc`
    :param matcher: object of `spacy.matcher.Matcher`
    :return: string of full name
    '''
    pattern = [cs.NAME_PATTERN]
    
    matcher.add('NAME', None, *pattern)
    
    matches = matcher(nlp_text)
    
    for match_id, start, end in matches:
        span = nlp_text[start:end]
        return span.text

Example #7

Source File: resume_parser.py From ResumeParser with MIT License

5 votes

def __get_basic_details(self):
        name       = utils.extract_name(self.__nlp, matcher=self.__matcher)
        email      = utils.extract_email(self.__text)
        mobile     = utils.extract_mobile_number(self.__text)
        skills     = utils.extract_skills(self.__nlp, self.__noun_chunks)
        edu        = utils.extract_education([sent.string.strip() for sent in self.__nlp.sents])
        experience = utils.extract_experience(self.__text)
        entities   = utils.extract_entity_sections(self.__text_raw)
        self.__details['name'] = name
        self.__details['email'] = email
        self.__details['mobile_number'] = mobile
        self.__details['skills'] = skills
        # self.__details['education'] = entities['education']
        self.__details['education'] = edu
        self.__details['experience'] = experience
        try:
            self.__details['competencies'] = utils.extract_competencies(self.__text_raw, entities['experience'])
            self.__details['measurable_results'] = utils.extract_measurable_results(self.__text_raw, entities['experience'])
        except KeyError:
            self.__details['competencies'] = []
            self.__details['measurable_results'] = []
        return

Example #8

Source File: grounding_concepts.py From KagNet with MIT License

5 votes

def load_matcher(nlp):
    config = configparser.ConfigParser()
    config.read("paths.cfg")
    with open(config["paths"]["matcher_patterns"], "r", encoding="utf8") as f:
        all_patterns = json.load(f)

    matcher = Matcher(nlp.vocab)
    for concept, pattern in tqdm(all_patterns.items(), desc="Adding patterns to Matcher."):
        matcher.add(concept, None, pattern)
    return matcher

Example #9

Source File: ecommerce_preprocess.py From DeepPavlov with Apache License 2.0

5 votes

def extract_money(self, doc: spacy.tokens.Doc) -> Tuple[List, Tuple[float, float]]:
        """Extract money entities and money related tokens from `doc`.

        Parameters:
            doc: a list of tokens with corresponding tags, lemmas, etc.

        Returns:
            doc_no_money: doc with no money related tokens.
            money_range: money range from `money_range[0]` to `money_range[1]` extracted from the doc.
        """

        matches = self.matcher(doc)
        money_range: Tuple = ()
        doc_no_money = list(doc)
        negated = False

        for match_id, start, end in matches:
            string_id = self.model.vocab.strings[match_id]
            span = doc[start:end]
            for child in doc[start].children:
                if child.dep_ == 'neg':
                    negated = True

            num_token = [token for token in span if token.like_num == True]
            if (string_id == 'below' and negated == False) or (string_id == 'above' and negated == True):
                money_range = (0, float(num_token[0].text))

            if (string_id == 'above' and negated == False) or (string_id == 'below' and negated == True):
                money_range = (float(num_token[0].text), float(math.inf))

            del doc_no_money[start:end + 1]
        return doc_no_money, money_range

Example #10

Source File: grounding_concepts.py From KagNet with MIT License

4 votes

def ground_mentioned_concepts(nlp, matcher, s, ans = ""):
    s = s.lower()
    doc = nlp(s)
    matches = matcher(doc)

    mentioned_concepts = set()
    span_to_concepts = {}

    for match_id, start, end in matches:

        span = doc[start:end].text  # the matched span
        if len(set(span.split(" ")).intersection(set(ans.split(" ")))) > 0:
            continue
        original_concept = nlp.vocab.strings[match_id]
        # print("Matched '" + span + "' to the rule '" + string_id)

        if len(original_concept.split("_")) == 1:
            original_concept = list(lemmatize(nlp, original_concept))[0]

        if span not in span_to_concepts:
            span_to_concepts[span] = set()

        span_to_concepts[span].add(original_concept)

    for span, concepts in span_to_concepts.items():
        concepts_sorted = list(concepts)
        concepts_sorted.sort(key=len)

        # mentioned_concepts.update(concepts_sorted[0:2])

        shortest = concepts_sorted[0:3] #
        for c in shortest:
            if c in blacklist:
                continue
            lcs = lemmatize(nlp, c)
            intersect = lcs.intersection(shortest)
            if len(intersect)>0:
                mentioned_concepts.add(list(intersect)[0])
            else:
                mentioned_concepts.add(c)


    # stop = timeit.default_timer()
    # print('\t Done! Time: ', "{0:.2f} sec".format(float(stop - start_time)))

    # if __name__ == "__main__":
    #     print("Sentence: " + s)
    #     print(mentioned_concepts)
    #     print()
    return mentioned_concepts