Python spacy.cli() Examples
The following are 3
code examples of spacy.cli().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
spacy
, or try the search function
.
Example #1
Source File: spacy_annotator.py From errudite with GNU General Public License v2.0 | 5 votes |
def load_lang_model(lang: str, disable: List[str]): """Load spaCy language model or download if model is available and not installed Arguments: lang {str} -- language disable {List[str]} -- If only using tokenizer, can disable ['parser', 'ner', 'textcat'] Returns: [type] -- [description] """ if 'coref' in lang: try: return spacy.load(lang, disable=disable) # except Exception as e: return SpacyAnnotator.load_lang_model(lang.split('_')[0], disable=disable) try: return spacy.load(lang, disable=disable) except OSError: logger.warning(f"Spacy models '{lang}' not found. Downloading and installing.") spacy_download(lang) # NOTE(mattg): The following four lines are a workaround suggested by Ines for spacy # 2.1.0, which removed the linking that was done in spacy 2.0. importlib doesn't find # packages that were installed in the same python session, so the way `spacy_download` # works in 2.1.0 is broken for this use case. These four lines can probably be removed # at some point in the future, once spacy has figured out a better way to handle this. # See https://github.com/explosion/spaCy/issues/3435. from spacy.cli import link from spacy.util import get_package_path package_path = get_package_path(lang) link(lang, lang, model_path=package_path) return spacy.load(lang, disable=disable)
Example #2
Source File: nlp_utils.py From ludwig with Apache License 2.0 | 4 votes |
def load_nlp_pipeline(language='xx'): if language not in language_module_registry: logger.error( 'Language {} is not supported.' 'Suported languages are: {}'.format( language, language_module_registry.keys() )) raise ValueError else: spacy_module_name = language_module_registry[language] global nlp_pipelines if nlp_pipelines[language] is None: logger.info('Loading NLP pipeline') try: import spacy except ImportError: logger.error( ' spacy is not installed. ' 'In order to install all text feature dependencies run ' 'pip install ludwig[text]' ) sys.exit(-1) try: nlp_pipelines[language] = spacy.load( spacy_module_name, disable=['parser', 'tagger', 'ner'] ) except OSError: logger.info( ' spaCy {} model is missing, downloading it ' '(this will only happen once)' ) from spacy.cli import download download(spacy_module_name) nlp_pipelines[language] = spacy.load( spacy_module_name, disable=['parser', 'tagger', 'ner'] ) return nlp_pipelines[language]
Example #3
Source File: embeddings.py From danlp with BSD 3-Clause "New" or "Revised" License | 4 votes |
def _process_embeddings_for_spacy(tmp_file_path: str, meta_info: dict, cache_dir: str = DEFAULT_CACHE_DIR, clean_up_raw_data: bool = True, verbose: bool = False): """ To use pretrained embeddings with spaCy the embeddings need to be stored in a specific format. This function converts embeddings saved in the binary word2vec format to a spaCy model with the init_model() function from spaCy. The generated files will be saved in the cache_dir under a folder called <pretrained_embedding>.spacy More information on converting pretrained word embeddings to spaCy models here: https://spacy.io/usage/vectors-similarity#custom :param str tmp_file_path: the file name of the embedding binary file :param str cache_dir: the directory for storing cached data :param bool verbose: """ from pathlib import Path from spacy.cli import init_model embeddings = meta_info['name'] bin_file_path = os.path.join(cache_dir, embeddings + ".bin") if not os.path.isfile( bin_file_path): # Preprocess to transform to word2vec .bin format _process_downloaded_embeddings(tmp_file_path, meta_info, cache_dir, clean_up_raw_data, verbose) vec_file = embeddings + ".vec" word_vecs = KeyedVectors.load_word2vec_format(bin_file_path, binary=True, encoding='utf8') assert_wv_dimensions(word_vecs, embeddings) word_vecs.save_word2vec_format(vec_file, binary=False) spacy_dir = os.path.join(cache_dir, embeddings + '.spacy') os.makedirs(spacy_dir, exist_ok=True) if os.path.isabs(spacy_dir): full_spacy_dir = Path(spacy_dir) else: full_spacy_dir = Path(os.path.join(os.getcwd(), spacy_dir)) init_model('da', full_spacy_dir, vectors_loc=vec_file) os.remove(vec_file) # Clean up the vec file