Python nltk.parse.stanford.StanfordParser() Examples

The following are 8 code examples of nltk.parse.stanford.StanfordParser(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module nltk.parse.stanford , or try the search function .
Example #1
Source File: corpus_cleaner.py    From acl2017-interactive_summarizer with Apache License 2.0 6 votes vote down vote up
def __init__(self, datasets_path, corpus_name, parse_type, lang='english'):
        self.datasets_path = datasets_path
        self.corpus_name = corpus_name
        self.corpus_path = path.join(datasets_path, corpus_name)
        self.docs_path = path.join(self.corpus_path, "docs")
        self.topics_file = path.join(self.corpus_path, "topics.xml")
        self.models_path = path.join(self.corpus_path, "models")
        self.smodels_path = path.join(self.corpus_path, "smodels")
        self.jar_path = path.join(PROJECT_PATH, "summarizer", "jars")
        os.environ['CLASSPATH'] = self.jar_path
        self.cleaned_path = path.join(datasets_path, "processed")

        if parse_type == 'parse':
            if lang == 'english':
                self.parser = stanford.StanfordParser(model_path="%s/englishPCFG.ser.gz" % (self.jar_path))
            if lang == 'german':
                self.parser = stanford.StanfordParser(model_path="%s/germanPCFG.ser.gz" % (self.jar_path))
                # self.cleaned_path = path.join(datasets_path, "processed.parse")
        if parse_type == 'props':  # TODO
            if lang == 'english':
                self.props_parser = ClausIE.get_instance()
            if lang == 'german':
                self.parser = stanford.StanfordParser(model_path="%s/germanPCFG.ser.gz" % (self.jar_path)) 
Example #2
Source File: svo.py    From py-nltk-svo with MIT License 5 votes vote down vote up
def __init__(self):
        """
        Initialize the SVO Methods
        """
        self.noun_types = ["NN", "NNP", "NNPS", "NNS", "PRP"]
        self.verb_types = ["VB", "VBD", "VBG", "VBN", "VBP", "VBZ"]
        self.adjective_types = ["JJ", "JJR", "JJS"]
        self.pred_verb_phrase_siblings = None
        self.parser = stanford.StanfordParser()
        self.sent_detector = nltk.data.load('tokenizers/punkt/english.pickle') 
Example #3
Source File: readai.py    From readAI with GNU General Public License v2.0 5 votes vote down vote up
def main(argv):

    debug = False

    try:
        opts, args = getopt.getopt(argv, "hd",["help","debug"])
    except getopt.GetoptError as e:
        usage()
        sys.exit(2)
    for opt, arg in opts:
        if opt in ["-h", "help"]:
            usage()
            sys.exit(2)
        if opt in ["-d", "debug"]:
            debug = True

    parser = stanford.StanfordParser()

    line = raw_input("Enter line: ")

    while line != 'stop':
        sent = list(parser.raw_parse(line))[0]
        if debug:
            print sent # print parse tree
        if sent[0].label() == "SBARQ":
            print answer(sent)
        else:
            try:
                describe(sent)
            except ValueError as e:
                print "Error describing sentence. " + e
            if debug:
                print smap # print semantic map
        line = raw_input("Enter line: ") 
Example #4
Source File: rdf_triple.py    From RDF-Triple-API with MIT License 5 votes vote down vote up
def clear_data(self):
        self.parser = stanford.StanfordParser(model_path=r"/users/ted/stanford nlp/stanford-parser-full-2015-01-30/stanford-parser-3.5.1-models/edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz")
        self.first_NP = ''
        self.first_VP = ''
        self.parse_tree = None
        self.subject = RDF_Triple.RDF_SOP('subject')
        self.predicate = RDF_Triple.RDF_SOP('predicate', 'VB')
        self.Object = RDF_Triple.RDF_SOP('object') 
Example #5
Source File: extract_sentences.py    From StrepHit with GNU General Public License v3.0 5 votes vote down vote up
def setup_extractor(self):
        self.splitter = PunktSentenceSplitter(self.language)
        self.parser = StanfordParser(path_to_jar='dev/stanford-corenlp-3.6.0.jar',
                                     path_to_models_jar='dev/stanford-corenlp-3.6.0-models.jar',
                                     java_options=' -mx2G -Djava.ext.dirs=dev/')

        self.token_to_lemma = {}
        for lemma, tokens in self.lemma_to_token.iteritems():
            for t in tokens:
                self.token_to_lemma[t] = lemma
        self.all_verbs = set(self.token_to_lemma.keys()) 
Example #6
Source File: compute_lu_distribution.py    From StrepHit with GNU General Public License v3.0 5 votes vote down vote up
def main(corpus, verbs, processes, outfile, sub_sentences):
    """ Compute the LU distribution in the corpus, i.e. how many LUs per sentence
    """
    global splitter, tagger, parser, all_verbs
    splitter = PunktSentenceSplitter('en')
    tagger = TTPosTagger('en')
    parser = StanfordParser(path_to_jar='dev/stanford-corenlp-3.6.0.jar',
                            path_to_models_jar='dev/stanford-corenlp-3.6.0-models.jar',
                            java_options=' -mx1G -Djava.ext.dirs=dev/')  # no way to make classpath work
    all_verbs = reduce(lambda x, y: x.union(y), imap(set, json.load(verbs).values()), set())
    all_verbs.discard('be')
    all_verbs.discard('have')

    args = load_corpus(corpus, 'bio', text_only=True)
    worker = worker_with_sub_sentences if sub_sentences else worker_with_sentences
    counter = defaultdict(int)

    for i, counts in enumerate(parallel.map(worker, args, processes)):
        for k, v in counts.iteritems():
            counter[k] += v

        if (i + 1) % 10000 == 0:
            logger.info('Processed %d documents', i + 1)

    counter = OrderedDict(sorted(counter.items(), key=lambda (k, v): k))
    for k, v in counter.iteritems():
        print k, v

    json.dump(counter, outfile, indent=2) 
Example #7
Source File: parser.py    From Lango with GNU General Public License v2.0 5 votes vote down vote up
def __init__(self):
        self.parser = StanfordParser() 
Example #8
Source File: parser.py    From Lango with GNU General Public License v2.0 5 votes vote down vote up
def __init__(self):
        self.parser = StanfordParser(
            model_path='edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz')
        stanford_dir = self.parser._classpath[0].rpartition('/')[0]
        self.parser._classpath = tuple(find_jars_within_path(stanford_dir))