Python spacy.cli() Examples

The following are 3 code examples of spacy.cli(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module spacy , or try the search function

Example #1

Source File: spacy_annotator.py From errudite with GNU General Public License v2.0

5 votes

def load_lang_model(lang: str, disable: List[str]):
        """Load spaCy language model or download if
            model is available and not installed
        
        Arguments:
            lang {str} -- language
            disable {List[str]} -- If only using tokenizer, can disable ['parser', 'ner', 'textcat']
        
        Returns:
            [type] -- [description]
        """
        if 'coref' in lang:
            try:
                return spacy.load(lang, disable=disable) # 
            except Exception as e:
                return SpacyAnnotator.load_lang_model(lang.split('_')[0], disable=disable)
        try:
            return spacy.load(lang, disable=disable)
        except OSError:
            logger.warning(f"Spacy models '{lang}' not found.  Downloading and installing.")
            spacy_download(lang)
            # NOTE(mattg): The following four lines are a workaround suggested by Ines for spacy
            # 2.1.0, which removed the linking that was done in spacy 2.0.  importlib doesn't find
            # packages that were installed in the same python session, so the way `spacy_download`
            # works in 2.1.0 is broken for this use case.  These four lines can probably be removed
            # at some point in the future, once spacy has figured out a better way to handle this.
            # See https://github.com/explosion/spaCy/issues/3435.
            from spacy.cli import link
            from spacy.util import get_package_path
            package_path = get_package_path(lang)
            link(lang, lang, model_path=package_path)
            return spacy.load(lang, disable=disable)

Example #2

Source File: nlp_utils.py From ludwig with Apache License 2.0

4 votes

def load_nlp_pipeline(language='xx'):
    if language not in language_module_registry:
        logger.error(
            'Language {} is not supported.'
            'Suported languages are: {}'.format(
                language,
                language_module_registry.keys()
            ))
        raise ValueError
    else:
        spacy_module_name = language_module_registry[language]
    global nlp_pipelines
    if nlp_pipelines[language] is None:
        logger.info('Loading NLP pipeline')
        try:
            import spacy
        except ImportError:
            logger.error(
                ' spacy is not installed. '
                'In order to install all text feature dependencies run '
                'pip install ludwig[text]'
            )
            sys.exit(-1)

        try:
            nlp_pipelines[language] = spacy.load(
                spacy_module_name,
                disable=['parser', 'tagger', 'ner']
            )
        except OSError:
            logger.info(
                ' spaCy {} model is missing, downloading it '
                '(this will only happen once)'
            )
            from spacy.cli import download
            download(spacy_module_name)
            nlp_pipelines[language] = spacy.load(
                spacy_module_name,
                disable=['parser', 'tagger', 'ner']
            )

    return nlp_pipelines[language]

Example #3

Source File: embeddings.py From danlp with BSD 3-Clause "New" or "Revised" License

4 votes

def _process_embeddings_for_spacy(tmp_file_path: str, meta_info: dict,
                                  cache_dir: str = DEFAULT_CACHE_DIR,
                                  clean_up_raw_data: bool = True,
                                  verbose: bool = False):
    """
    To use pretrained embeddings with spaCy the embeddings need to be stored in
    a specific format. This function converts embeddings saved in the binary
    word2vec format to a spaCy model with the init_model() function from
    spaCy. The generated files will be saved in the cache_dir under a
    folder called <pretrained_embedding>.spacy

    More information on converting pretrained word embeddings to spaCy models here:
    https://spacy.io/usage/vectors-similarity#custom

    :param str tmp_file_path: the file name of the embedding binary file
    :param str cache_dir: the directory for storing cached data
    :param bool verbose:
    """
    from pathlib import Path
    from spacy.cli import init_model

    embeddings = meta_info['name']

    bin_file_path = os.path.join(cache_dir, embeddings + ".bin")

    if not os.path.isfile(
            bin_file_path):  # Preprocess to transform to word2vec .bin format
        _process_downloaded_embeddings(tmp_file_path, meta_info, cache_dir,
                                       clean_up_raw_data, verbose)

    vec_file = embeddings + ".vec"

    word_vecs = KeyedVectors.load_word2vec_format(bin_file_path, binary=True,
                                                  encoding='utf8')
    assert_wv_dimensions(word_vecs, embeddings)
    word_vecs.save_word2vec_format(vec_file, binary=False)

    spacy_dir = os.path.join(cache_dir, embeddings + '.spacy')
    os.makedirs(spacy_dir, exist_ok=True)

    if os.path.isabs(spacy_dir):
        full_spacy_dir = Path(spacy_dir)
    else:
        full_spacy_dir = Path(os.path.join(os.getcwd(), spacy_dir))

    init_model('da', full_spacy_dir, vectors_loc=vec_file)

    os.remove(vec_file)  # Clean up the vec file