opennlp.tools.chunker.ChunkerME Java Exaples

Source File: AnswerTypeEventStream.java From wiseowl with MIT License

6 votes

public static void main(String[] args) throws IOException {
    if (args.length == 0) {
        System.err.println("Usage: AnswerTypeEventStream eventfile");
        System.exit(1);
    }
    int ai = 0;
    String eventFile = args[ai++];
    String modelsDirProp = System.getProperty("models.dir", "book/src/main" + File.separator + "opennlp-models" +
            File.separator + "english");
    File modelsDir = new File(modelsDirProp);
    File wordnetDir = new File(System.getProperty("wordnet.dir", "book/src/main" + File.separator + "WordNet-3.0" + File.separator + "dict"));
    InputStream chunkerStream = new FileInputStream(
            new File(modelsDir, "en-chunker.bin"));
    ChunkerModel chunkerModel = new ChunkerModel(chunkerStream);
    ChunkerME chunker = new ChunkerME(chunkerModel);
    InputStream posStream = new FileInputStream(
            new File(modelsDir, "en-pos-maxent.bin"));
    POSModel posModel = new POSModel(posStream);
    POSTaggerME tagger = new POSTaggerME(posModel);
    Parser parser = new ChunkParser(chunker, tagger);
    AnswerTypeContextGenerator actg = new AnswerTypeContextGenerator(wordnetDir);
    EventStream es = new AnswerTypeEventStream(eventFile, actg, parser);
    while (es.hasNext()) {
        System.out.println(es.next().toString());
    }
}

Source File: OpenNLP.java From baleen with Apache License 2.0

6 votes

@Override
public void doInitialize(UimaContext aContext) throws ResourceInitializationException {
  try {
    tokensModel.loadModel(TokenizerModel.class, getClass().getResourceAsStream("en_token.bin"));
    sentencesModel.loadModel(SentenceModel.class, getClass().getResourceAsStream("en_sent.bin"));
    posModel.loadModel(POSModel.class, getClass().getResourceAsStream("en_pos_maxent.bin"));
    chunkModel.loadModel(ChunkerModel.class, getClass().getResourceAsStream("en_chunker.bin"));
  } catch (BaleenException be) {
    getMonitor().error("Unable to load OpenNLP Language Models", be);
    throw new ResourceInitializationException(be);
  }

  try {
    sentenceDetector = new SentenceDetectorME((SentenceModel) sentencesModel.getModel());
    wordTokenizer = new TokenizerME((TokenizerModel) tokensModel.getModel());
    posTagger = new POSTaggerME((POSModel) posModel.getModel());
    phraseChunker = new ChunkerME((ChunkerModel) chunkModel.getModel());
  } catch (Exception e) {
    getMonitor().error("Unable to create OpenNLP taggers", e);
    throw new ResourceInitializationException(e);
  }
}

Source File: BasicActions.java From knowledge-extraction with Apache License 2.0

6 votes

@Test
public void testChunker(){
	try (InputStream modelIn = BasicActions.class.getClassLoader().
				getResourceAsStream(Consts.EN_CHUNK_MODEL);){
		
		String[] tokens = testTokenizer();
		String[] tags = testTagger();
		
		ChunkerModel chunkerModel = new ChunkerModel(modelIn);
		ChunkerME chunker = new ChunkerME(chunkerModel);
		String chunks[] = chunker.chunk(tokens, tags);
			System.out.println(Arrays.toString(chunks));
	} catch (IOException e) {
		e.printStackTrace();
	}
}

Source File: ChunkerUnitTest.java From tutorials with MIT License

6 votes

@Test
public void givenChunkerModel_whenChunk_thenChunksAreDetected() throws Exception {

    SimpleTokenizer tokenizer = SimpleTokenizer.INSTANCE;
    String[] tokens = tokenizer.tokenize("He reckons the current account deficit will narrow to only 8 billion.");

    InputStream inputStreamPOSTagger = getClass().getResourceAsStream("/models/en-pos-maxent.bin");
    POSModel posModel = new POSModel(inputStreamPOSTagger);
    POSTaggerME posTagger = new POSTaggerME(posModel);
    String tags[] = posTagger.tag(tokens);

    InputStream inputStreamChunker = new FileInputStream("src/main/resources/models/en-chunker.bin");
    ChunkerModel chunkerModel = new ChunkerModel(inputStreamChunker);
    ChunkerME chunker = new ChunkerME(chunkerModel);
    String[] chunks = chunker.chunk(tokens, tags);
    assertThat(chunks).contains("B-NP", "B-VP", "B-NP", "I-NP", "I-NP", "I-NP", "B-VP", "I-VP", "B-PP", "B-NP", "I-NP", "I-NP", "O");
}

Source File: WiseOwlQParserPlugin.java From wiseowl with MIT License

5 votes

@SuppressWarnings("rawtypes")
public void init(NamedList initArgs) {
	
    SolrParams params = SolrParams.toSolrParams(initArgs);
    String modelDirectory = params.get("modelDirectory",
            System.getProperty("model.dir"));//<co id="qqpp.model"/>
    String wordnetDirectory = params.get("wordnetDirectory",
            System.getProperty("wordnet.dir"));//<co id="qqpp.wordnet"/>
    if (modelDirectory != null) {
      File modelsDir = new File(modelDirectory);
      try {
        InputStream chunkerStream = new FileInputStream(
            new File(modelsDir,"en-chunker.bin"));
        ChunkerModel chunkerModel = new ChunkerModel(chunkerStream);
        chunker = new ChunkerME(chunkerModel); //<co id="qqpp.chunker"/>
        InputStream posStream = new FileInputStream(
            new File(modelsDir,"en-pos-maxent.bin"));
        POSModel posModel = new POSModel(posStream);
        tagger =  new POSTaggerME(posModel); //<co id="qqpp.tagger"/>
       // model = new DoccatModel(new FileInputStream( //<co id="qqpp.theModel"/>
     //       new File(modelDirectory,"en-answer.bin"))).getMaxentModel();
        model = new SuffixSensitiveGISModelReader(new File(modelDirectory+"/qa/ans.bin")).getModel();
        //GISModel m = new SuffixSensitiveGISModelReader(new File(modelFileName)).getModel(); 
        probs = new double[model.getNumOutcomes()];
        atcg = new AnswerTypeContextGenerator(
                new File(wordnetDirectory, "dict"));//<co id="qqpp.context"/>
      } catch (IOException e) {
        throw new RuntimeException(e);
      }
    }
  }

Source File: AnswerTypeClassifier.java From wiseowl with MIT License

5 votes

/**
 * Train the answer model
 * <p>
 * Hint:
 * <pre>
 *  mvn exec:java -Dexec.mainClass=com.tamingtext.qa.AnswerTypeClassifier \
 *    -Dexec.args="dist/data/questions-train.txt en-answer.bin" \
 *    -Dmodel.dir=../../opennlp-models \
 *    -Dwordnet.dir=../../Wordnet-3.0/dict
 *  </pre>
 *
 * @param args
 * @throws IOException
 */
public static void main(String[] args) throws IOException {
    if (args.length < 2) {
        System.err.println("Usage: AnswerTypeClassifier <trainFile> <modelFile>");
        System.exit(1);
    }

    String trainFile = args[0];
    File outFile = new File(args[1]);
    String modelsDirProp = System.getProperty("model.dir");
    File modelsDir = new File(modelsDirProp);
    String wordnetDir = System.getProperty("wordnet.dir");

    InputStream chunkerStream = new FileInputStream(
            new File(modelsDir, "en-chunker.bin"));
    ChunkerModel chunkerModel = new ChunkerModel(chunkerStream);
    ChunkerME chunker = new ChunkerME(chunkerModel);
    InputStream posStream = new FileInputStream(
            new File(modelsDir, "en-pos-maxent.bin"));
    POSModel posModel = new POSModel(posStream);
    POSTaggerME tagger = new POSTaggerME(posModel);
    Parser parser = new ChunkParser(chunker, tagger);
    AnswerTypeContextGenerator actg = new AnswerTypeContextGenerator(new File(wordnetDir));
    //<start id="atc.train"/>
    AnswerTypeEventStream es = new AnswerTypeEventStream(trainFile,
            actg, parser);
    GISModel model = GIS.trainModel(100, new TwoPassDataIndexer(es, 3));//<co id="atc.train.do"/>
    GISModelWriter writer = new SuffixSensitiveGISModelWriter(model, outFile);
    writer.persist();
    //new DoccatModel("en", model).serialize(new FileOutputStream(outFile));
/*
<calloutlist>
    <callout arearefs="atc.train.do"><para>Using the event stream, which feeds us training examples, do the actual training using OpenNLP's Maxent classifier.</para></callout>
</calloutlist>
*/
    //<end id="atc.train"/>
}

Source File: FocusNoun.java From wiseowl with MIT License

5 votes

public static void main(String args[]) throws IOException
{
	String wordnetDir = System.getProperty("wordnet.dir");
	//wordnetDir="WordNet-3.0/dict/";
	String question="Who is Abraham Lincoln?";
	AnswerTypeContextGenerator atcg=new AnswerTypeContextGenerator(new File(wordnetDir));
	String q=null;
    String modelsDirProp = System.getProperty("model.dir");
   // modelsDirProp="opennlp-models/";
    File modelsDir = new File(modelsDirProp);
    InputStream chunkerStream = new FileInputStream(
        new File(modelsDir,"en-chunker.bin"));
    ChunkerModel chunkerModel = new ChunkerModel(chunkerStream);
    ChunkerME chunker = new ChunkerME(chunkerModel);
    InputStream posStream = new FileInputStream(
        new File(modelsDir,"en-pos-maxent.bin"));
    POSModel posModel = new POSModel(posStream);
    POSTaggerME tagger =  new POSTaggerME(posModel);
    Parser parser = new ChunkParser(chunker, tagger);
    
    Parse query = ParserTool.parseLine(question,parser,1)[0];
	String[] context=atcg.getContext(query);
	for(int i=0;i<context.length;i++)
	{
		if(context[i].startsWith("hw=") || context[i].startsWith("mw="))
		{
			System.out.println(context[i].substring(3));
		}
	}
}

Source File: FocusNoun.java From wiseowl with MIT License

5 votes

public String[] getFocusNoun(String question) throws IOException
{
	String wordnetDir = System.getProperty("wordnet.dir");
	wordnetDir="WordNet-3.0/dict/";
	AnswerTypeContextGenerator atcg=new AnswerTypeContextGenerator(new File(wordnetDir));
	String q=null;
    String modelsDirProp = System.getProperty("model.dir");
    modelsDirProp="opennlp-models/";
    File modelsDir = new File(modelsDirProp);
    InputStream chunkerStream = new FileInputStream(
        new File(modelsDir,"en-chunker.bin"));
    ChunkerModel chunkerModel = new ChunkerModel(chunkerStream);
    ChunkerME chunker = new ChunkerME(chunkerModel);
    InputStream posStream = new FileInputStream(
        new File(modelsDir,"en-pos-maxent.bin"));
    POSModel posModel = new POSModel(posStream);
    POSTaggerME tagger =  new POSTaggerME(posModel);
    Parser parser = new ChunkParser(chunker, tagger);
    
    Parse query = ParserTool.parseLine(question,parser,1)[0];
	String[] context=atcg.getContext(query);
	String[] focus=new String[2];
	int p=0;
	for(int i=0;i<context.length;i++)
	{
		if(context[i].startsWith("hw=") || context[i].startsWith("mw="))
		{
			//System.out.println(context[i].substring(3));
			focus[p++]=context[i].substring(3);
		}
	}
	return focus;
}

Source File: OpenNlpChunkerConceptProvider.java From bioasq with Apache License 2.0

5 votes

@Override
public boolean initialize(ResourceSpecifier aSpecifier, Map<String, Object> aAdditionalParams)
        throws ResourceInitializationException {
  boolean ret = super.initialize(aSpecifier, aAdditionalParams);
  String model = String.class.cast(getParameterValue("chunker-model"));
  try (InputStream ois = getClass().getResourceAsStream(model)) {
    chunker = new ChunkerME(new ChunkerModel(ois));
    Streams.closeQuietly(ois);
  } catch (Exception e) {
    throw new ResourceInitializationException(e);
  }
  type = Arrays.asList(String.class.cast(getParameterValue("type")).split(","));
  minLength = Integer.class.cast(getParameterValue("min-length"));
  return ret;
}

Source File: OpenNlpModule.java From SciGraph with Apache License 2.0

5 votes

@CheckedProvides(ChunkerProvider.class)
ChunkerME getChunker() throws IOException {
  try (InputStream is = getClass().getResourceAsStream("/opennlp/en-chunker.bin")) {
    ChunkerModel model = new ChunkerModel(is);
    return new ChunkerME(model);
  }
}

Source File: Chapter5.java From Natural-Language-Processing-with-Java-Second-Edition with MIT License

4 votes

private static void usingOpenNLPChunker() {
        try (
                InputStream posModelStream = new FileInputStream(
                        getModelDir() + "\\en-pos-maxent.bin");
                InputStream chunkerStream = new FileInputStream(
                        getModelDir() + "\\en-chunker.bin");) {
                    POSModel model = new POSModel(posModelStream);
                    POSTaggerME tagger = new POSTaggerME(model);
                    
                    // Used to create sample data for trainer
//                    for (String sentence : sentences) {
//                        String sen[] = tokenizeSentence(sentence);
//                        String tags[] = tagger.tag(sen);
//                        for (int i = 0; i < tags.length; i++) {
////                    for (String token : sentence) {
//                            System.out.print(sen[i] + "/" + tags[i] + " ");
//                        }
//                        System.out.println();
//                    }
//                    System.out.println();

                    String tags[] = tagger.tag(sentence);
                    for (int i = 0; i < tags.length; i++) {
//                    for (String token : sentence) {
                        System.out.print(sentence[i] + "/" + tags[i] + " ");
                    }
                    System.out.println();

                    // chunker
                    System.out.println("------------Chunker -----------");
                    ChunkerModel chunkerModel = new ChunkerModel(chunkerStream);
                    ChunkerME chunkerME = new ChunkerME(chunkerModel);
                    String result[] = chunkerME.chunk(sentence, tags);

                    for (int i = 0; i < result.length; i++) {
                        System.out.println("[" + sentence[i] + "] " + result[i]);
                    }

                    System.out.println("------------Chunker Spans -----------");
                    Span[] spans = chunkerME.chunkAsSpans(sentence, tags);
                    for (Span span : spans) {
                        System.out.print("Type: " + span.getType() + " - " + " Begin: "
                                + span.getStart() + " End:" + span.getEnd()
                                + " Length: " + span.length() + "  [");
                        for (int j = span.getStart(); j < span.getEnd(); j++) {
                            System.out.print(sentence[j] + " ");
                        }
                        System.out.println("]");
                    }
                } catch (IOException ex) {
                    ex.printStackTrace();
                }

    }

Source File: ChunkParser.java From wiseowl with MIT License

4 votes

public ChunkParser(ChunkerME chunker, POSTaggerME tagger) {
  this.chunker = chunker;
  this.tagger = tagger;
}

Source File: NLPChunkerOp.java From lucene-solr with Apache License 2.0

4 votes

public NLPChunkerOp(ChunkerModel chunkerModel) throws IOException {
  chunker = new ChunkerME(chunkerModel);
}

Source File: OpenNlpModule.java From SciGraph with Apache License 2.0

4 votes

@Override
ChunkerME get() throws IOException;

Source File: ChunkerOpenNLP.java From jate with GNU Lesser General Public License v3.0

4 votes

public ChunkerOpenNLP(InputStream model) throws IOException {
    chunker=new ChunkerME(new ChunkerModel(model));
}

opennlp.tools.chunker.ChunkerME Java Examples