Python nltk.internals.find_jars_within_path() Examples
The following are 10
code examples of nltk.internals.find_jars_within_path().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
nltk.internals
, or try the search function
.
Example #1
Source File: malt.py From razzy-spinner with GNU General Public License v3.0 | 6 votes |
def find_maltparser(parser_dirname): """ A module to find MaltParser .jar file and its dependencies. """ if os.path.exists(parser_dirname): # If a full path is given. _malt_dir = parser_dirname else: # Try to find path to maltparser directory in environment variables. _malt_dir = find_dir(parser_dirname, env_vars=('MALT_PARSER',)) # Checks that that the found directory contains all the necessary .jar malt_dependencies = ['','',''] _malt_jars = set(find_jars_within_path(_malt_dir)) _jars = set(jar.rpartition('/')[2] for jar in _malt_jars) malt_dependencies = set(['log4j.jar', 'libsvm.jar', 'liblinear-1.8.jar']) assert malt_dependencies.issubset(_jars) assert any(filter(lambda i: i.startswith('maltparser-') and i.endswith('.jar'), _jars)) return list(_malt_jars)
Example #2
Source File: stanford-pos-tagger.py From seq2seq-keyphrase with MIT License | 6 votes |
def __init__(self, model_filename, path_to_jar=None, encoding='utf8', verbose=False, java_options='-mx1000m'): if not self._JAR: warnings.warn('The StanfordTagger class is not meant to be ' 'instantiated directly. Did you mean StanfordPOSTagger or StanfordNERTagger?') self._stanford_jar = find_jar( self._JAR, path_to_jar, searchpath=(), url=_stanford_url, verbose=verbose) self._stanford_model = find_file(model_filename, env_vars=('STANFORD_MODELS',), verbose=verbose) # Adding logging jar files to classpath stanford_dir = os.path.split(self._stanford_jar)[0] self._stanford_jar = tuple(find_jars_within_path(stanford_dir)) self._encoding = encoding self.java_options = java_options
Example #3
Source File: malt.py From V1EngineeringInc-Docs with Creative Commons Attribution Share Alike 4.0 International | 6 votes |
def find_maltparser(parser_dirname): """ A module to find MaltParser .jar file and its dependencies. """ if os.path.exists(parser_dirname): # If a full path is given. _malt_dir = parser_dirname else: # Try to find path to maltparser directory in environment variables. _malt_dir = find_dir(parser_dirname, env_vars=('MALT_PARSER',)) # Checks that that the found directory contains all the necessary .jar malt_dependencies = ['', '', ''] _malt_jars = set(find_jars_within_path(_malt_dir)) _jars = set(os.path.split(jar)[1] for jar in _malt_jars) malt_dependencies = set(['log4j.jar', 'libsvm.jar', 'liblinear-1.8.jar']) assert malt_dependencies.issubset(_jars) assert any( filter(lambda i: i.startswith('maltparser-') and i.endswith('.jar'), _jars) ) return list(_malt_jars)
Example #4
Source File: keyphrase_test_dataset.py From seq2seq-keyphrase with MIT License | 5 votes |
def get_postag_with_record(records, pairs): path = os.path.dirname(__file__) path = path[:path.rfind(os.sep, 0, len(path)-10)+1] + 'stanford-postagger/' print(path) # jar = '/Users/memray/Project/stanford/stanford-postagger/stanford-postagger.jar' jar = path + '/stanford-postagger.jar' model = path + '/models/english-bidirectional-distsim.tagger' pos_tagger = StanfordPOSTagger(model, jar) # model = '/Users/memray/Project/stanford/stanford-postagger/models/english-left3words-distsim.tagger' # model = '/Users/memray/Project/stanford/stanford-postagger/models/english-bidirectional-distsim.tagger' stanford_dir = jar.rpartition('/')[0] stanford_jars = find_jars_within_path(stanford_dir) pos_tagger._stanford_jar = ':'.join(stanford_jars) tagged_source = [] # Predict on testing data for idx, (record, pair) in enumerate(zip(records, pairs)): # len(test_data_plain) print('*' * 100) print('File: ' + record['name']) print('Input: ' + str(pair[0])) text = pos_tagger.tag(pair[0]) print('[%d/%d][%d] : %s' % (idx, len(records) , len(pair[0]), str(text))) tagged_source.append(text) return tagged_source
Example #5
Source File: keyphrase_test_dataset.py From seq2seq-keyphrase with MIT License | 5 votes |
def get_postag_with_index(sources, idx2word, word2idx): path = os.path.dirname(__file__) path = path[:path.rfind(os.sep, 0, len(path)-10)+1] + 'stanford-postagger/' print(path) # jar = '/Users/memray/Project/stanford/stanford-postagger/stanford-postagger.jar' jar = path + '/stanford-postagger.jar' model = path + '/models/english-bidirectional-distsim.tagger' pos_tagger = StanfordPOSTagger(model, jar) # model = '/Users/memray/Project/stanford/stanford-postagger/models/english-left3words-distsim.tagger' # model = '/Users/memray/Project/stanford/stanford-postagger/models/english-bidirectional-distsim.tagger' stanford_dir = jar.rpartition('/')[0] stanford_jars = find_jars_within_path(stanford_dir) pos_tagger._stanford_jar = ':'.join(stanford_jars) tagged_source = [] # Predict on testing data for idx in xrange(len(sources)): # len(test_data_plain) test_s_o = sources[idx] source_text = keyphrase_utils.cut_zero(test_s_o, idx2word) text = pos_tagger.tag(source_text) print('[%d/%d] : %s' % (idx, len(sources), str(text))) tagged_source.append(text) return tagged_source
Example #6
Source File: test_dataset_producer.py From seq2seq-keyphrase-pytorch with Apache License 2.0 | 5 votes |
def load_pos_tagger(): path = os.path.dirname(__file__) path = os.path.join(file_dir[: file_dir.rfind('pykp') + 4], 'stanford-postagger') print(path) # jar = '/Users/memray/Project/stanford/stanford-postagger/stanford-postagger.jar' jar = path + '/stanford-postagger.jar' model = path + '/models/english-bidirectional-distsim.tagger' pos_tagger = StanfordPOSTagger(model, jar) stanford_dir = jar.rpartition('/')[0] stanford_jars = find_jars_within_path(stanford_dir) pos_tagger._stanford_jar = ':'.join(stanford_jars) return pos_tagger
Example #7
Source File: parser.py From Lango with GNU General Public License v2.0 | 5 votes |
def __init__(self): self.parser = StanfordParser( model_path='edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz') stanford_dir = self.parser._classpath[0].rpartition('/')[0] self.parser._classpath = tuple(find_jars_within_path(stanford_dir))
Example #8
Source File: test_dataset_producer.py From OpenNMT-kpg-release with MIT License | 5 votes |
def load_pos_tagger(): path = os.path.dirname(__file__) path = os.path.join(file_dir[: file_dir.rfind('pykp') + 4], 'stanford-postagger') print(path) # jar = '/Users/memray/Project/stanford/stanford-postagger/stanford-postagger.jar' jar = path + '/stanford-postagger.jar' model = path + '/models/english-bidirectional-distsim.tagger' pos_tagger = StanfordPOSTagger(model, jar) stanford_dir = jar.rpartition('/')[0] stanford_jars = find_jars_within_path(stanford_dir) pos_tagger._stanford_jar = ':'.join(stanford_jars) return pos_tagger
Example #9
Source File: export_dataset.py From OpenNMT-kpg-release with MIT License | 5 votes |
def load_pos_tagger(stanford_base_dir): # path = os.path.dirname(__file__) # path = os.path.join(file_dir[: file_dir.rfind('pykp') + 4], 'stanford-postagger') # print(path) # jar = '/Users/memray/Project/stanford/stanford-postagger/stanford-postagger.jar' jar = stanford_base_dir + '/stanford-postagger.jar' model = stanford_base_dir + '/models/english-bidirectional-distsim.tagger' pos_tagger = StanfordPOSTagger(model_filename=model, path_to_jar=jar) stanford_base_dir = jar.rpartition('/')[0] stanford_jars = find_jars_within_path(stanford_base_dir) pos_tagger._stanford_jar = ':'.join(stanford_jars) return pos_tagger
Example #10
Source File: keyphrase_test_dataset.py From seq2seq-keyphrase with MIT License | 4 votes |
def check_postag(config): train_set, validation_set, test_set, idx2word, word2idx = deserialize_from_file(config['dataset']) path = os.path.dirname(__file__) path = path[:path.rfind(os.sep, 0, len(path)-10)+1] + 'stanford-postagger/' jar = path + '/stanford-postagger.jar' model = path + '/models/english-bidirectional-distsim.tagger' pos_tagger = StanfordPOSTagger(model, jar) for dataset_name in config['testing_datasets']: # override the original test_set # test_set = load_testing_data(dataset_name, kwargs=dict(basedir=config['path']))(idx2word, word2idx, config['preprocess_type']) test_sets = load_additional_testing_data(config['testing_datasets'], idx2word, word2idx, config) test_set = test_sets[dataset_name] # print(dataset_name) # print('Avg length=%d, Max length=%d' % (np.average([len(s) for s in test_set['source']]), np.max([len(s) for s in test_set['source']]))) test_data_plain = zip(*(test_set['source'], test_set['target'])) test_size = len(test_data_plain) # Alternatively to setting the CLASSPATH add the jar and model via their path: jar = '/Users/memray/Project/stanford/stanford-postagger/stanford-postagger.jar' # model = '/Users/memray/Project/stanford/stanford-postagger/models/english-left3words-distsim.tagger' model = '/Users/memray/Project/stanford/stanford-postagger/models/english-bidirectional-distsim.tagger' pos_tagger = StanfordPOSTagger(model, jar) for idx in xrange(len(test_data_plain)): # len(test_data_plain) test_s_o, test_t_o = test_data_plain[idx] source = keyphrase_utils.cut_zero(test_s_o, idx2word) print(source) # Add other jars from Stanford directory stanford_dir = jar.rpartition('/')[0] stanford_jars = find_jars_within_path(stanford_dir) pos_tagger._stanford_jar = ':'.join(stanford_jars) text = pos_tagger.tag(source) print(text)