Python spacy.matcher() Examples
The following are 10
code examples of spacy.matcher().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
spacy
, or try the search function
.
Example #1
Source File: ecommerce_preprocess.py From DeepPavlov with Apache License 2.0 | 7 votes |
def __init__(self, spacy_model: str = 'en_core_web_sm', disable: Optional[Iterable[str]] = None, **kwargs): if disable is None: disable = ['parser', 'ner'] self.model = _try_load_spacy_model(spacy_model, disable=disable) below = lambda text: bool(re.compile(r'below|cheap').match(text)) BELOW = self.model.vocab.add_flag(below) above = lambda text: bool(re.compile(r'above|start').match(text)) ABOVE = self.model.vocab.add_flag(above) self.matcher = Matcher(self.model.vocab) self.matcher.add('below', None, [{BELOW: True}, {'LOWER': 'than', 'OP': '?'}, {'LOWER': 'from', 'OP': '?'}, {'ORTH': '$', 'OP': '?'}, {'ENT_TYPE': 'MONEY', 'LIKE_NUM': True}]) self.matcher.add('above', None, [{ABOVE: True}, {'LOWER': 'than', 'OP': '?'}, {'LOWER': 'from', 'OP': '?'}, {'ORTH': '$', 'OP': '?'}, {'ENT_TYPE': 'MONEY', 'LIKE_NUM': True}])
Example #2
Source File: grounding_concepts.py From KagNet with MIT License | 6 votes |
def match_mentioned_concepts(nlp, sents, answers, batch_id = -1): matcher = load_matcher(nlp) res = [] # print("Begin matching concepts.") for sid, s in tqdm(enumerate(sents), total=len(sents), desc="grounding batch_id:%d"%batch_id): a = answers[sid] all_concepts = ground_mentioned_concepts(nlp, matcher, s, a) answer_concepts = ground_mentioned_concepts(nlp, matcher, a) question_concepts = all_concepts - answer_concepts if len(question_concepts)==0: # print(s) question_concepts = hard_ground(nlp, s) # not very possible if len(answer_concepts)==0: print(a) answer_concepts = hard_ground(nlp, a) # some case print(answer_concepts) res.append({"sent": s, "ans": a, "qc": list(question_concepts), "ac": list(answer_concepts)}) return res
Example #3
Source File: doc.py From textpipe with MIT License | 5 votes |
def match(self, matcher): """ Run a SpaCy matcher over the cleaned content >>> import spacy.matcher >>> from textpipe.doc import Doc >>> matcher = spacy.matcher.Matcher(spacy.lang.en.English().vocab) >>> matcher.add('HASHTAG', None, [{'ORTH': '#'}, {'IS_ASCII': True}]) >>> Doc('Test with #hashtag').match(matcher) [('#hashtag', 'HASHTAG')] """ return [(self._spacy_doc[start:end].text, matcher.vocab.strings[match_id]) for match_id, start, end in matcher(self._spacy_doc)]
Example #4
Source File: question_rules.py From squash-generation with MIT License | 5 votes |
def judgemental(question): matcher = PhraseMatcher(nlp.vocab) matcher.add('JUDGEMENT', None, nlp('your'), nlp('you'), nlp('Your'), nlp('You')) return len(matcher(question)) > 0
Example #5
Source File: question_rules.py From squash-generation with MIT License | 5 votes |
def judgemental(question): matcher = PhraseMatcher(nlp.vocab) matcher.add('JUDGEMENT', None, nlp('your'), nlp('you'), nlp('Your'), nlp('You')) return len(matcher(question)) > 0
Example #6
Source File: utils.py From ResumeParser with MIT License | 5 votes |
def extract_name(nlp_text, matcher): ''' Helper function to extract name from spacy nlp text :param nlp_text: object of `spacy.tokens.doc.Doc` :param matcher: object of `spacy.matcher.Matcher` :return: string of full name ''' pattern = [cs.NAME_PATTERN] matcher.add('NAME', None, *pattern) matches = matcher(nlp_text) for match_id, start, end in matches: span = nlp_text[start:end] return span.text
Example #7
Source File: resume_parser.py From ResumeParser with MIT License | 5 votes |
def __get_basic_details(self): name = utils.extract_name(self.__nlp, matcher=self.__matcher) email = utils.extract_email(self.__text) mobile = utils.extract_mobile_number(self.__text) skills = utils.extract_skills(self.__nlp, self.__noun_chunks) edu = utils.extract_education([sent.string.strip() for sent in self.__nlp.sents]) experience = utils.extract_experience(self.__text) entities = utils.extract_entity_sections(self.__text_raw) self.__details['name'] = name self.__details['email'] = email self.__details['mobile_number'] = mobile self.__details['skills'] = skills # self.__details['education'] = entities['education'] self.__details['education'] = edu self.__details['experience'] = experience try: self.__details['competencies'] = utils.extract_competencies(self.__text_raw, entities['experience']) self.__details['measurable_results'] = utils.extract_measurable_results(self.__text_raw, entities['experience']) except KeyError: self.__details['competencies'] = [] self.__details['measurable_results'] = [] return
Example #8
Source File: grounding_concepts.py From KagNet with MIT License | 5 votes |
def load_matcher(nlp): config = configparser.ConfigParser() config.read("paths.cfg") with open(config["paths"]["matcher_patterns"], "r", encoding="utf8") as f: all_patterns = json.load(f) matcher = Matcher(nlp.vocab) for concept, pattern in tqdm(all_patterns.items(), desc="Adding patterns to Matcher."): matcher.add(concept, None, pattern) return matcher
Example #9
Source File: ecommerce_preprocess.py From DeepPavlov with Apache License 2.0 | 5 votes |
def extract_money(self, doc: spacy.tokens.Doc) -> Tuple[List, Tuple[float, float]]: """Extract money entities and money related tokens from `doc`. Parameters: doc: a list of tokens with corresponding tags, lemmas, etc. Returns: doc_no_money: doc with no money related tokens. money_range: money range from `money_range[0]` to `money_range[1]` extracted from the doc. """ matches = self.matcher(doc) money_range: Tuple = () doc_no_money = list(doc) negated = False for match_id, start, end in matches: string_id = self.model.vocab.strings[match_id] span = doc[start:end] for child in doc[start].children: if child.dep_ == 'neg': negated = True num_token = [token for token in span if token.like_num == True] if (string_id == 'below' and negated == False) or (string_id == 'above' and negated == True): money_range = (0, float(num_token[0].text)) if (string_id == 'above' and negated == False) or (string_id == 'below' and negated == True): money_range = (float(num_token[0].text), float(math.inf)) del doc_no_money[start:end + 1] return doc_no_money, money_range
Example #10
Source File: grounding_concepts.py From KagNet with MIT License | 4 votes |
def ground_mentioned_concepts(nlp, matcher, s, ans = ""): s = s.lower() doc = nlp(s) matches = matcher(doc) mentioned_concepts = set() span_to_concepts = {} for match_id, start, end in matches: span = doc[start:end].text # the matched span if len(set(span.split(" ")).intersection(set(ans.split(" ")))) > 0: continue original_concept = nlp.vocab.strings[match_id] # print("Matched '" + span + "' to the rule '" + string_id) if len(original_concept.split("_")) == 1: original_concept = list(lemmatize(nlp, original_concept))[0] if span not in span_to_concepts: span_to_concepts[span] = set() span_to_concepts[span].add(original_concept) for span, concepts in span_to_concepts.items(): concepts_sorted = list(concepts) concepts_sorted.sort(key=len) # mentioned_concepts.update(concepts_sorted[0:2]) shortest = concepts_sorted[0:3] # for c in shortest: if c in blacklist: continue lcs = lemmatize(nlp, c) intersect = lcs.intersection(shortest) if len(intersect)>0: mentioned_concepts.add(list(intersect)[0]) else: mentioned_concepts.add(c) # stop = timeit.default_timer() # print('\t Done! Time: ', "{0:.2f} sec".format(float(stop - start_time))) # if __name__ == "__main__": # print("Sentence: " + s) # print(mentioned_concepts) # print() return mentioned_concepts