Python nltk.parse.stanford.StanfordParser() Examples
The following are 8
code examples of nltk.parse.stanford.StanfordParser().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
nltk.parse.stanford
, or try the search function
.
Example #1
Source File: corpus_cleaner.py From acl2017-interactive_summarizer with Apache License 2.0 | 6 votes |
def __init__(self, datasets_path, corpus_name, parse_type, lang='english'): self.datasets_path = datasets_path self.corpus_name = corpus_name self.corpus_path = path.join(datasets_path, corpus_name) self.docs_path = path.join(self.corpus_path, "docs") self.topics_file = path.join(self.corpus_path, "topics.xml") self.models_path = path.join(self.corpus_path, "models") self.smodels_path = path.join(self.corpus_path, "smodels") self.jar_path = path.join(PROJECT_PATH, "summarizer", "jars") os.environ['CLASSPATH'] = self.jar_path self.cleaned_path = path.join(datasets_path, "processed") if parse_type == 'parse': if lang == 'english': self.parser = stanford.StanfordParser(model_path="%s/englishPCFG.ser.gz" % (self.jar_path)) if lang == 'german': self.parser = stanford.StanfordParser(model_path="%s/germanPCFG.ser.gz" % (self.jar_path)) # self.cleaned_path = path.join(datasets_path, "processed.parse") if parse_type == 'props': # TODO if lang == 'english': self.props_parser = ClausIE.get_instance() if lang == 'german': self.parser = stanford.StanfordParser(model_path="%s/germanPCFG.ser.gz" % (self.jar_path))
Example #2
Source File: svo.py From py-nltk-svo with MIT License | 5 votes |
def __init__(self): """ Initialize the SVO Methods """ self.noun_types = ["NN", "NNP", "NNPS", "NNS", "PRP"] self.verb_types = ["VB", "VBD", "VBG", "VBN", "VBP", "VBZ"] self.adjective_types = ["JJ", "JJR", "JJS"] self.pred_verb_phrase_siblings = None self.parser = stanford.StanfordParser() self.sent_detector = nltk.data.load('tokenizers/punkt/english.pickle')
Example #3
Source File: readai.py From readAI with GNU General Public License v2.0 | 5 votes |
def main(argv): debug = False try: opts, args = getopt.getopt(argv, "hd",["help","debug"]) except getopt.GetoptError as e: usage() sys.exit(2) for opt, arg in opts: if opt in ["-h", "help"]: usage() sys.exit(2) if opt in ["-d", "debug"]: debug = True parser = stanford.StanfordParser() line = raw_input("Enter line: ") while line != 'stop': sent = list(parser.raw_parse(line))[0] if debug: print sent # print parse tree if sent[0].label() == "SBARQ": print answer(sent) else: try: describe(sent) except ValueError as e: print "Error describing sentence. " + e if debug: print smap # print semantic map line = raw_input("Enter line: ")
Example #4
Source File: rdf_triple.py From RDF-Triple-API with MIT License | 5 votes |
def clear_data(self): self.parser = stanford.StanfordParser(model_path=r"/users/ted/stanford nlp/stanford-parser-full-2015-01-30/stanford-parser-3.5.1-models/edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz") self.first_NP = '' self.first_VP = '' self.parse_tree = None self.subject = RDF_Triple.RDF_SOP('subject') self.predicate = RDF_Triple.RDF_SOP('predicate', 'VB') self.Object = RDF_Triple.RDF_SOP('object')
Example #5
Source File: extract_sentences.py From StrepHit with GNU General Public License v3.0 | 5 votes |
def setup_extractor(self): self.splitter = PunktSentenceSplitter(self.language) self.parser = StanfordParser(path_to_jar='dev/stanford-corenlp-3.6.0.jar', path_to_models_jar='dev/stanford-corenlp-3.6.0-models.jar', java_options=' -mx2G -Djava.ext.dirs=dev/') self.token_to_lemma = {} for lemma, tokens in self.lemma_to_token.iteritems(): for t in tokens: self.token_to_lemma[t] = lemma self.all_verbs = set(self.token_to_lemma.keys())
Example #6
Source File: compute_lu_distribution.py From StrepHit with GNU General Public License v3.0 | 5 votes |
def main(corpus, verbs, processes, outfile, sub_sentences): """ Compute the LU distribution in the corpus, i.e. how many LUs per sentence """ global splitter, tagger, parser, all_verbs splitter = PunktSentenceSplitter('en') tagger = TTPosTagger('en') parser = StanfordParser(path_to_jar='dev/stanford-corenlp-3.6.0.jar', path_to_models_jar='dev/stanford-corenlp-3.6.0-models.jar', java_options=' -mx1G -Djava.ext.dirs=dev/') # no way to make classpath work all_verbs = reduce(lambda x, y: x.union(y), imap(set, json.load(verbs).values()), set()) all_verbs.discard('be') all_verbs.discard('have') args = load_corpus(corpus, 'bio', text_only=True) worker = worker_with_sub_sentences if sub_sentences else worker_with_sentences counter = defaultdict(int) for i, counts in enumerate(parallel.map(worker, args, processes)): for k, v in counts.iteritems(): counter[k] += v if (i + 1) % 10000 == 0: logger.info('Processed %d documents', i + 1) counter = OrderedDict(sorted(counter.items(), key=lambda (k, v): k)) for k, v in counter.iteritems(): print k, v json.dump(counter, outfile, indent=2)
Example #7
Source File: parser.py From Lango with GNU General Public License v2.0 | 5 votes |
def __init__(self): self.parser = StanfordParser()
Example #8
Source File: parser.py From Lango with GNU General Public License v2.0 | 5 votes |
def __init__(self): self.parser = StanfordParser( model_path='edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz') stanford_dir = self.parser._classpath[0].rpartition('/')[0] self.parser._classpath = tuple(find_jars_within_path(stanford_dir))