Python spacy.gold() Examples
The following are 4
code examples of spacy.gold().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
spacy
, or try the search function
.
Example #1
Source File: crf_entity_extractor.py From Rasa_NLU_Chi with Apache License 2.0 | 7 votes |
def _from_json_to_crf(self, message, # type: Message entity_offsets # type: List[Tuple[int, int, Text]] ): # type: (...) -> List[Tuple[Text, Text, Text, Text]] """Convert json examples to format of underlying crfsuite.""" from spacy.gold import GoldParse doc = message.get("spacy_doc") gold = GoldParse(doc, entities=entity_offsets) ents = [l[5] for l in gold.orig_annot] if '-' in ents: logger.warn("Misaligned entity annotation in sentence '{}'. " "Make sure the start and end values of the " "annotated training examples end at token " "boundaries (e.g. don't include trailing " "whitespaces).".format(doc.text)) if not self.component_config["BILOU_flag"]: for i, label in enumerate(ents): if self._bilou_from_label(label) in {"B", "I", "U", "L"}: # removes BILOU prefix from label ents[i] = self._entity_from_label(label) return self._from_text_to_crf(message, ents)
Example #2
Source File: CRF.py From NLU with MIT License | 6 votes |
def jsonToCrf(self, json_eg, spacy_nlp): entity_offsets = [] for sentence in json_eg['text']: doc = spacy_nlp(sentence) for i in json_eg['entities']: entity_offsets.append(tuple((i['rangeFrom'],i['rangeTo'],i['entity']))) gold = GoldParse(doc, entities=entity_offsets) ents = [l[5] for l in gold.orig_annot] crf_format = [(doc[i].text, doc[i].tag_, ents[i]) for i in range(len(doc))] return crf_format
Example #3
Source File: crf_entity_extractor.py From rasa_nlu with Apache License 2.0 | 5 votes |
def _from_json_to_crf(self, message: Message, entity_offsets: List[Tuple[int, int, Text]] ) -> List[Tuple[Text, Text, Text, Text]]: """Convert json examples to format of underlying crfsuite.""" if self.pos_features: from spacy.gold import GoldParse doc = message.get("spacy_doc") gold = GoldParse(doc, entities=entity_offsets) ents = [l[5] for l in gold.orig_annot] else: tokens = message.get("tokens") ents = self._bilou_tags_from_offsets(tokens, entity_offsets) if '-' in ents: logger.warning("Misaligned entity annotation in sentence '{}'. " "Make sure the start and end values of the " "annotated training examples end at token " "boundaries (e.g. don't include trailing " "whitespaces or punctuation)." "".format(message.text)) if not self.component_config["BILOU_flag"]: for i, label in enumerate(ents): if self._bilou_from_label(label) in {"B", "I", "U", "L"}: # removes BILOU prefix from label ents[i] = self._entity_from_label(label) return self._from_text_to_crf(message, ents)
Example #4
Source File: CRF.py From NLU with MIT License | 5 votes |
def createDataset(self, intents, spacy_nlp): dataset = [] entity_offsets = [] intentCounter = 0 for intent in intents: sentenceCounter = 0 for sentence in intent['text']: doc = spacy_nlp(sentence) print(doc.text) for entity in intent['entities'][sentenceCounter]: entity_offsets.append(tuple((entity['rangeFrom'],entity['rangeTo'],entity['entity']))) gold = GoldParse(doc, entities=entity_offsets) ents = [l[5] for l in gold.orig_annot] crf_format = [(doc[entity].text, doc[entity].tag_, ents[entity]) for i in range(len(doc))] dataset.append(crf_format) sentenceCounter = sentenceCounter + 1 intentCounter = intentCounter + 1 return dataset