Python spacy.language() Examples
The following are 30
code examples of spacy.language().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
spacy
, or try the search function
.
Example #1
Source File: spacy_parser.py From fonduer with MIT License | 7 votes |
def model_installed(name: str) -> bool: """Check if spaCy language model is installed. From https://github.com/explosion/spaCy/blob/master/spacy/util.py :param name: :return: """ data_path = util.get_data_path() if not data_path or not data_path.exists(): raise IOError(f"Can't find spaCy data path: {data_path}") if name in {d.name for d in data_path.iterdir()}: return True if is_package(name): # installed as package return True if Path(name).exists(): # path to model data directory return True return False
Example #2
Source File: spacy_processors_test.py From forte with Apache License 2.0 | 6 votes |
def test_neg_spacy_processor(self): spacy = Pipeline[DataPack]() spacy.set_reader(StringReader()) config = { "processors": 'ner', "lang": "xx_ent_wiki_sm", # Language code for the language to build the Pipeline "use_gpu": False } spacy.add(SpacyProcessor(), config=config) spacy.initialize() sentences = ["This tool is called Forte.", "The goal of this project to help you build NLP " "pipelines.", "NLP has never been made this easy before."] document = ' '.join(sentences) with self.assertRaises(ProcessExecutionException): _ = spacy.process(document)
Example #3
Source File: spacy_utils.py From Rasa_NLU_Chi with Apache License 2.0 | 6 votes |
def ensure_proper_language_model(nlp): # type: (Optional[Language]) -> None """Checks if the spacy language model is properly loaded. Raises an exception if the model is invalid.""" if nlp is None: raise Exception("Failed to load spacy language model. " "Loading the model returned 'None'.") if nlp.path is None: # Spacy sets the path to `None` if # it did not load the model from disk. # In this case `nlp` is an unusable stub. raise Exception("Failed to load spacy language model for " "lang '{}'. Make sure you have downloaded the " "correct model (https://spacy.io/docs/usage/)." "".format(nlp.lang))
Example #4
Source File: spacy_utils.py From Rasa_NLU_Chi with Apache License 2.0 | 6 votes |
def create(cls, cfg): # type: (RasaNLUModelConfig) -> SpacyNLP import spacy component_conf = cfg.for_component(cls.name, cls.defaults) spacy_model_name = component_conf.get("model") # if no model is specified, we fall back to the language string if not spacy_model_name: spacy_model_name = cfg.language component_conf["model"] = cfg.language logger.info("Trying to load spacy model with " "name '{}'".format(spacy_model_name)) nlp = spacy.load(spacy_model_name, parser=False) cls.ensure_proper_language_model(nlp) return SpacyNLP(component_conf, nlp)
Example #5
Source File: spacy_utils.py From rasa-for-botfront with Apache License 2.0 | 6 votes |
def ensure_proper_language_model(nlp: Optional["Language"]) -> None: """Checks if the spacy language model is properly loaded. Raises an exception if the model is invalid.""" if nlp is None: raise Exception( "Failed to load spacy language model. " "Loading the model returned 'None'." ) if nlp.path is None: # Spacy sets the path to `None` if # it did not load the model from disk. # In this case `nlp` is an unusable stub. raise Exception( "Failed to load spacy language model for " "lang '{}'. Make sure you have downloaded the " "correct model (https://spacy.io/docs/usage/)." "".format(nlp.lang) )
Example #6
Source File: spacy_utils.py From rasa-for-botfront with Apache License 2.0 | 6 votes |
def create( cls, component_config: Dict[Text, Any], config: RasaNLUModelConfig ) -> "SpacyNLP": component_config = override_defaults(cls.defaults, component_config) spacy_model_name = component_config.get("model") # if no model is specified, we fall back to the language string if not spacy_model_name: spacy_model_name = config.language component_config["model"] = config.language logger.info(f"Trying to load spacy model with name '{spacy_model_name}'") nlp = cls.load_model(spacy_model_name) cls.ensure_proper_language_model(nlp) return cls(component_config, nlp)
Example #7
Source File: profanity_filter.py From profanity-filter with GNU General Public License v3.0 | 6 votes |
def _censor(self, text: str, return_bool=False) -> Union[str, bool]: """:return: text with any profane words censored or bool (True - text has profane words, False otherwise) if return_bool=True""" result = '' text_parts = self._split_by_language(text=text) for language, text_part in text_parts: result_part = text_part doc = self._parse(language=language, text=text_part) for token in doc: if token._.is_profane: if return_bool: return True else: result_part = self._replace_token(text=result_part, old=token, new=token._.censored) result += result_part if return_bool: return False else: return result
Example #8
Source File: spacy_component.py From profanity-filter with GNU General Public License v3.0 | 6 votes |
def __call__(self, doc: Doc, language: Language = None, stop_on_first_profane_word: Optional[bool] = None) -> Doc: self.register_extensions(exist_ok=True) if language is None: language = self._language if stop_on_first_profane_word is None: stop_on_first_profane_word = self._stop_on_first_profane_word i = 0 while i < len(doc): j = i + 1 while (j < len(doc) and not doc[j - 1].whitespace_ and not doc[j - 1].is_space and not doc[j - 1].is_punct and not doc[j].is_space and not doc[j].is_punct): j += 1 span = self._censor_spaceless_span(doc[i:j], language=language) if stop_on_first_profane_word and span._.is_profane: break i += len(span) return doc
Example #9
Source File: spacy_utils.py From rasa_nlu with Apache License 2.0 | 6 votes |
def ensure_proper_language_model(nlp: Optional['Language']) -> None: """Checks if the spacy language model is properly loaded. Raises an exception if the model is invalid.""" if nlp is None: raise Exception("Failed to load spacy language model. " "Loading the model returned 'None'.") if nlp.path is None: # Spacy sets the path to `None` if # it did not load the model from disk. # In this case `nlp` is an unusable stub. raise Exception("Failed to load spacy language model for " "lang '{}'. Make sure you have downloaded the " "correct model (https://spacy.io/docs/usage/)." "".format(nlp.lang))
Example #10
Source File: spacy_utils.py From rasa_nlu with Apache License 2.0 | 6 votes |
def create(cls, component_config: Dict[Text, Any], config: RasaNLUModelConfig) -> 'SpacyNLP': import spacy component_config = override_defaults(cls.defaults, component_config) spacy_model_name = component_config.get("model") # if no model is specified, we fall back to the language string if not spacy_model_name: spacy_model_name = config.language component_config["model"] = config.language logger.info("Trying to load spacy model with " "name '{}'".format(spacy_model_name)) nlp = spacy.load(spacy_model_name, disable=['parser']) cls.ensure_proper_language_model(nlp) return cls(component_config, nlp)
Example #11
Source File: spacy-fastext.py From word2vecVN with Apache License 2.0 | 6 votes |
def load_nlp(vectors_loc, lang=None): if lang is None: nlp = Language() else: # create empty language class – this is required if you're planning to # save the model to disk and load it back later (models always need a # "lang" setting). Use 'xx' for blank multi-language class. nlp = spacy.blank(lang) with open(vectors_loc, 'rb') as file_: header = file_.readline() nr_row, nr_dim = header.split() nlp.vocab.reset_vectors(width=int(nr_dim)) for line in file_: line = line.rstrip().decode('utf8') pieces = line.rsplit(' ', int(nr_dim)) word = pieces[0] vector = numpy.asarray([float(v) for v in pieces[1:]], dtype='f') nlp.vocab.set_vector(word, vector) # add the vectors to the vocab return nlp
Example #12
Source File: spacy_parser.py From fonduer with MIT License | 6 votes |
def _load_lang_model(self) -> None: """Load spaCy language model. If a model is not installed, download it before loading it. Currenty supported spaCy languages en English (50MB) de German (645MB) fr French (1.33GB) es Spanish (377MB) :return: """ if self.lang in self.languages: if not SpacyParser.model_installed(self.lang): download(self.lang) model = spacy.load(self.lang) elif self.lang in self.alpha_languages: language_module = importlib.import_module(f"spacy.lang.{self.lang}") language_method = getattr(language_module, self.alpha_languages[self.lang]) model = language_method() self.model = model
Example #13
Source File: spacy_processors.py From forte with Apache License 2.0 | 5 votes |
def default_configs(cls): """ This defines a basic config structure for spaCy. Returns: """ config = super().default_configs() config.update({ 'processors': 'tokenize, pos, lemma', 'lang': 'en_core_web_sm', # Language code for the language to build the Pipeline 'use_gpu': False, }) return config
Example #14
Source File: spacy_processors_test.py From forte with Apache License 2.0 | 5 votes |
def setUp(self): self.spacy = Pipeline[DataPack]() self.spacy.set_reader(StringReader()) config = { "processors": "tokenize", "lang": "en_core_web_sm", # Language code for the language to build the Pipeline "use_gpu": False } self.spacy.add(SpacyProcessor(), config=config) self.spacy.initialize() self.nlp: Language = spacy.load(config['lang'])
Example #15
Source File: spacy_utils.py From Rasa_NLU_Chi with Apache License 2.0 | 5 votes |
def cache_key(cls, model_metadata): # type: (Metadata) -> Text component_meta = model_metadata.for_component(cls.name) # Fallback, use the language name, e.g. "en", # as the model name if no explicit name is defined spacy_model_name = component_meta.get("model", model_metadata.language) return cls.name + "-" + spacy_model_name
Example #16
Source File: spacy_parser.py From fonduer with MIT License | 5 votes |
def __init__(self, vocab: Vocab) -> None: """Initialize a custom tokenizer. :param vocab: The vocab attribute of the respective spacy language object. """ self.vocab = vocab
Example #17
Source File: spacy_tokenizer.py From DeepPavlov with Apache License 2.0 | 5 votes |
def _try_load_spacy_model(model_name: str, disable: Iterable[str] = ()): disable = set(disable) try: model = spacy.load(model_name, disable=disable) except OSError as e: try: model = __import__(model_name).load(disable=disable) if not isinstance(model, spacy.language.Language): raise RuntimeError(f'{model_name} is not a spacy model module') except Exception: raise e return model
Example #18
Source File: spacy_component.py From profanity-filter with GNU General Public License v3.0 | 5 votes |
def __init__(self, profanity_filter: 'ProfanityFilter', nlp: spacy.language.Language, language: Language = None, stop_on_first_profane_word: bool = False): self._language = language self._nlp = nlp # Used only for tokenization self._profanity_filter = profanity_filter self._stop_on_first_profane_word = stop_on_first_profane_word # noinspection PyProtectedMember
Example #19
Source File: spacy_utils.py From rasa-for-botfront with Apache License 2.0 | 5 votes |
def cache_key( cls, component_meta: Dict[Text, Any], model_metadata: "Metadata" ) -> Optional[Text]: # Fallback, use the language name, e.g. "en", # as the model name if no explicit name is defined spacy_model_name = component_meta.get("model", model_metadata.language) return cls.name + "-" + spacy_model_name
Example #20
Source File: spacy_extractor.py From cookiecutter-spacy-fastapi with MIT License | 5 votes |
def __init__( self, nlp: Language, input_id_col: str = "id", input_text_col: str = "text" ): """Initialize the SpacyExtractor pipeline. nlp (spacy.language.Language): pre-loaded spacy language model input_text_col (str): property on each document to run the model on input_id_col (str): property on each document to correlate with request RETURNS (EntityRecognizer): The newly constructed object. """ self.nlp = nlp self.input_id_col = input_id_col self.input_text_col = input_text_col
Example #21
Source File: spacy_utils.py From rasa_nlu with Apache License 2.0 | 5 votes |
def cache_key(cls, component_meta: Dict[Text, Any], model_metadata: 'Metadata') -> Optional[Text]: # Fallback, use the language name, e.g. "en", # as the model name if no explicit name is defined spacy_model_name = component_meta.get("model", model_metadata.language) return cls.name + "-" + spacy_model_name
Example #22
Source File: profanity_filter.py From profanity-filter with GNU General Public License v3.0 | 5 votes |
def _detect_languages(self, text: str) -> Languages: fallback_language = self.languages[0] fallback_result = OrderedSet([fallback_language]) if AnalysisType.MULTILINGUAL in self.analyses: polyglot_output = polyglot.detect.Detector(text, quiet=True) result = OrderedSet([language.code for language in polyglot_output.languages if language.code != 'un']) if not result: result = fallback_result else: result = fallback_result result = result.intersection(self.languages) return result
Example #23
Source File: profanity_filter.py From profanity-filter with GNU General Public License v3.0 | 5 votes |
def _is_profane_word(self, language: Language, word: str) -> bool: profane_word_dictionaries = (self.profane_word_dictionaries.values() if language is None else [self.profane_word_dictionaries[language]]) return any(word in profane_word_dictionary for profane_word_dictionary in profane_word_dictionaries)
Example #24
Source File: profanity_filter.py From profanity-filter with GNU General Public License v3.0 | 5 votes |
def _get_trie(self, language: Language) -> Trie: result = None # noinspection PyTypeChecker languages = OrderedSet([language]) | self.languages for language in languages: with suppress(KeyError): result = self._trie[language] break return result
Example #25
Source File: profanity_filter.py From profanity-filter with GNU General Public License v3.0 | 5 votes |
def _is_dictionary_word(self, language: Language, word: str) -> bool: try: return any(spell.spell(word) for spell in self._get_spells(language=language)) except UnicodeEncodeError: return False
Example #26
Source File: profanity_filter.py From profanity-filter with GNU General Public License v3.0 | 5 votes |
def _lemmas(self, language: Language, word: Union[str, spacy.tokens.Token]) -> 'OrderedSet[str]': result = OrderedSet() if not word: return result word = self._make_spacy_token(language=language, word=word) spacy_lemma = word.lemma_ result.add(word.text) spacy_lemma = spacy_lemma.lower() if spacy_lemma != '-PRON-' else word.lower_ result.add(spacy_lemma) result |= self._stems(language=language, word=word.text) result |= self._normal_forms(language=language, word=word.text) return result
Example #27
Source File: profanity_filter.py From profanity-filter with GNU General Public License v3.0 | 5 votes |
def spells(self, value: Optional[Spells]) -> None: if AnalysisType.DEEP in self.analyses: self.clear_cache() if value is not None: self._spells = value else: self._spells = {} for language in self._languages: with suppress(HunSpellError): self._spells[language] = HunSpell(self._DATA_DIR / f'{language}.dic', self._DATA_DIR / f'{language}.aff') if not self._spells: self.analyses -= {AnalysisType.DEEP}
Example #28
Source File: spacy_component.py From profanity-filter with GNU General Public License v3.0 | 5 votes |
def _censor_spaceless_span(self, span: Span, language: Language) -> Span: token = spacy_utlis.make_token(nlp=self._nlp, word=str(span) if len(span) > 1 else span[0]) censored_word = self._profanity_filter.censor_word(word=token, language=language) if censored_word.is_profane: with span.doc.retokenize() as retokenizer: retokenizer.merge(span) token = span[0] token._.censored = censored_word.censored token._.original_profane_word = censored_word.original_profane_word else: for token in span: token._.censored = token.text return span
Example #29
Source File: spacy_utlis.py From profanity-filter with GNU General Public License v3.0 | 5 votes |
def parse(nlp: spacy.language.Language, text: str, language: Language = None, use_profanity_filter: bool = False) -> Union[Doc, Token]: disable = [] if use_profanity_filter else [SpacyProfanityFilterComponent.name] component_cfg = {} if use_profanity_filter: component_cfg[SpacyProfanityFilterComponent.name] = { 'language': language, } return nlp(text, disable=disable, component_cfg=component_cfg)
Example #30
Source File: spacy_utlis.py From profanity-filter with GNU General Public License v3.0 | 5 votes |
def make_token(nlp: spacy.language.Language, word: Union[str, Token]) -> Token: if hasattr(word, 'text'): return word doc = parse(nlp=nlp, text=word) with doc.retokenize() as retokenizer: retokenizer.merge(doc[:]) return doc[0]