opennlp.tools.namefind.NameFinderME Java Exaples

Source File: NamedEntityRecognitionUnitTest.java From tutorials with MIT License

6 votes

@Test
public void givenEnglishPersonModel_whenNER_thenPersonsAreDetected() throws Exception {
    
    SimpleTokenizer tokenizer = SimpleTokenizer.INSTANCE;
    String[] tokens = tokenizer.tokenize("John is 26 years old. His best friend's name is Leonard. He has a sister named Penny.");
    
    InputStream inputStreamNameFinder = getClass().getResourceAsStream("/models/en-ner-person.bin");
    TokenNameFinderModel model = new TokenNameFinderModel(inputStreamNameFinder);
    NameFinderME nameFinderME = new NameFinderME(model);
    List<Span> spans = Arrays.asList(nameFinderME.find(tokens));
    assertThat(spans.toString()).isEqualTo("[[0..1) person, [13..14) person, [20..21) person]");
    List<String> names = new ArrayList<String>();
    int k = 0;
    for (Span s : spans) {
        names.add("");
        for (int index = s.getStart(); index < s.getEnd(); index++) {
            names.set(k, names.get(k) + tokens[index]);
        }
        k++;
    }
    assertThat(names).contains("John","Leonard","Penny");
}

Source File: Chapter1.java From Natural-Language-Processing-with-Java-Second-Edition with MIT License

6 votes

private static void nameFinderExample() {
    try {
        String[] sentences = {
            "Tim was a good neighbor. Perhaps not as good a Bob "
            + "Haywood, but still pretty good. Of course Mr. Adam "
            + "took the cake!"};
        Tokenizer tokenizer = SimpleTokenizer.INSTANCE;
        TokenNameFinderModel model = new TokenNameFinderModel(new File(
                "C:\\OpenNLP Models", "en-ner-person.bin"));
        NameFinderME finder = new NameFinderME(model);

        for (String sentence : sentences) {
            // Split the sentence into tokens
            String[] tokens = tokenizer.tokenize(sentence);

            // Find the names in the tokens and return Span objects
            Span[] nameSpans = finder.find(tokens);

            // Print the names extracted from the tokens using the Span data
            System.out.println(Arrays.toString(
                    Span.spansToStrings(nameSpans, tokens)));
        }
    } catch (IOException ex) {
        ex.printStackTrace();
    }
}

Source File: OpenNlpDoccatRecommender.java From inception with Apache License 2.0

6 votes

@Override
public void train(RecommenderContext aContext, List<CAS> aCasses)
    throws RecommendationException
{
    List<DocumentSample> docSamples = extractSamples(aCasses);
    
    if (docSamples.size() < 2) {
        LOG.info("Not enough training data: [{}] items", docSamples.size());
        return;
    }
    
    // The beam size controls how many results are returned at most. But even if the user
    // requests only few results, we always use at least the default bean size recommended by
    // OpenNLP
    int beamSize = Math.max(maxRecommendations, NameFinderME.DEFAULT_BEAM_SIZE);

    TrainingParameters params = traits.getParameters();
    params.put(BeamSearch.BEAM_SIZE_PARAMETER, Integer.toString(beamSize));
    
    DoccatModel model = train(docSamples, params);
    
    aContext.put(KEY_MODEL, model);
}

Source File: OpenNlpNerRecommender.java From inception with Apache License 2.0

6 votes

@Override
public void train(RecommenderContext aContext, List<CAS> aCasses)
    throws RecommendationException
{
    List<NameSample> nameSamples = extractNameSamples(aCasses);
    
    if (nameSamples.size() < 2) {
        LOG.info("Not enough training data: [{}] items", nameSamples.size());
        return;
    }
    
    // The beam size controls how many results are returned at most. But even if the user
    // requests only few results, we always use at least the default bean size recommended by
    // OpenNLP
    int beamSize = Math.max(maxRecommendations, NameFinderME.DEFAULT_BEAM_SIZE);

    TrainingParameters params = traits.getParameters();
    params.put(BeamSearch.BEAM_SIZE_PARAMETER, Integer.toString(beamSize));
    
    TokenNameFinderModel model = train(nameSamples, params);
    
    aContext.put(KEY_MODEL, model);
}

Source File: TestNER.java From Mutters with Apache License 2.0

6 votes

@Test
public void testAddressNER() throws Exception
{
  URL modelUrl = Thread.currentThread().getContextClassLoader().getResource("models/en-ner-address.bin");
  assertThat(modelUrl, is(notNullValue()));

  TokenNameFinderModel model = new TokenNameFinderModel(modelUrl);
  assertThat(model, is(notNullValue()));

  NameFinderME nameFinder = new NameFinderME(model);
  String[] tokens = SimpleTokenizer.INSTANCE.tokenize("Send a taxi to 12 Pleasent Street");
  Span[] spans = nameFinder.find(tokens);
  assertThat(spans.length, is(1));

  String[] locations = Span.spansToStrings(spans, tokens);
  assertThat(locations.length, is(1));
  assertThat(locations[0], is("12 Pleasent Street"));
}

Source File: TestNER.java From Mutters with Apache License 2.0

6 votes

@Test
public void testDateNER() throws Exception
{
  URL modelUrl = Thread.currentThread().getContextClassLoader().getResource("models/en-ner-dates.bin");
  assertThat(modelUrl, is(notNullValue()));

  TokenNameFinderModel model = new TokenNameFinderModel(modelUrl);
  assertThat(model, is(notNullValue()));

  NameFinderME nameFinder = new NameFinderME(model);
  String[] tokens = SimpleTokenizer.INSTANCE
      .tokenize("Mr. John Smith of New York, married Anne Green of London today.");
  assertThat(tokens.length, is(15));

  Span[] spans = nameFinder.find(tokens);
  assertThat(spans.length, is(1));

  String[] locations = Span.spansToStrings(spans, tokens);
  assertThat(locations.length, is(1));
  assertThat(locations[0], is("today"));
}

Source File: TestNER.java From Mutters with Apache License 2.0

6 votes

@Test
public void testLocationNER() throws Exception
{
  URL modelUrl = Thread.currentThread().getContextClassLoader().getResource("models/en-ner-locations.bin");
  assertThat(modelUrl, is(notNullValue()));

  TokenNameFinderModel model = new TokenNameFinderModel(modelUrl);
  assertThat(model, is(notNullValue()));

  NameFinderME nameFinder = new NameFinderME(model);
  String[] tokens = SimpleTokenizer.INSTANCE
      .tokenize("Mr. John Smith of New York, married Anne Green of London today.");
  assertThat(tokens.length, is(15));

  Span[] spans = nameFinder.find(tokens);
  assertThat(spans.length, is(2));

  String[] locations = Span.spansToStrings(spans, tokens);
  assertThat(locations.length, is(2));
  assertThat(locations[0], is("New York"));
  assertThat(locations[1], is("London"));
}

Source File: NameFinderFactory.java From wiseowl with MIT License

6 votes

protected void loadNameFinders(String language, String modelDirectory) throws IOException {
  //<start id="maxent.examples.namefinder.setup"/> 
  File modelFile;

  File[] models //<co id="nfe.findmodels"/>
    = findNameFinderModels(language, modelDirectory);
  modelNames = new String[models.length];
  finders = new NameFinderME[models.length];

  for (int fi = 0; fi < models.length; fi++) {
    modelFile = models[fi];
    modelNames[fi] = modelNameFromFile(language, modelFile); //<co id="nfe.modelname"/>
    
    log.info("Loading model {}", modelFile); 
    InputStream modelStream = new FileInputStream(modelFile);
    TokenNameFinderModel model = //<co id="nfe.modelreader"/>
        new TokenNameFinderModel(modelStream);
    finders[fi] = new NameFinderME(model);
    
  }

}

Source File: TestNER.java From Mutters with Apache License 2.0

6 votes

@Test
public void testPersonNER() throws Exception
{
  URL modelUrl = Thread.currentThread().getContextClassLoader().getResource("models/en-ner-persons.bin");
  assertThat(modelUrl, is(notNullValue()));

  TokenNameFinderModel model = new TokenNameFinderModel(modelUrl);
  assertThat(model, is(notNullValue()));

  NameFinderME nameFinder = new NameFinderME(model);
  String[] tokens = SimpleTokenizer.INSTANCE
      .tokenize("Mr. John Smith of New York, married Anne Green of London today.");
  assertThat(tokens.length, is(15));

  Span[] spans = nameFinder.find(tokens);
  assertThat(spans.length, is(2));

  String[] names = Span.spansToStrings(spans, tokens);
  assertThat(names.length, is(2));
  assertThat(names[0], is("John Smith"));
  assertThat(names[1], is("Anne Green"));
}

Source File: OpenNlpService.java From elasticsearch-ingest-opennlp with Apache License 2.0

6 votes

public ExtractedEntities find(String content, String field) {
    try {
        if (!nameFinderModels.containsKey(field)) {
            throw new ElasticsearchException("Could not find field [{}], possible values {}", field, nameFinderModels.keySet());
        }
        TokenNameFinderModel finderModel = nameFinderModels.get(field);
        if (threadLocal.get() == null || !threadLocal.get().equals(finderModel)) {
            threadLocal.set(finderModel);
        }

        String[] tokens = SimpleTokenizer.INSTANCE.tokenize(content);
        Span[] spans = new NameFinderME(finderModel).find(tokens);

        return new ExtractedEntities(tokens, spans);
    } finally {
        threadLocal.remove();
    }
}

Source File: Chapter4.java From Natural-Language-Processing-with-Java-Second-Edition with MIT License

5 votes

private static void usingMultipleNERModels() {
    // Models - en-ner-person.bin en-ner-location.bin en-ner-money.bin 
    // en-ner-organization.bin en-ner-time.bin
    try {
        InputStream tokenStream = new FileInputStream(
                new File(getModelDir(), "en-token.bin"));

        TokenizerModel tokenModel = new TokenizerModel(tokenStream);
        Tokenizer tokenizer = new TokenizerME(tokenModel);

        String modelNames[] = {"en-ner-person.bin", "en-ner-location.bin",
            "en-ner-organization.bin"};
        ArrayList<String> list = new ArrayList();
        for (String name : modelNames) {
            TokenNameFinderModel entityModel = new TokenNameFinderModel(
                    new FileInputStream(
                            new File(getModelDir(), name)));
            NameFinderME nameFinder = new NameFinderME(entityModel);
            for (int index = 0; index < sentences.length; index++) {
                String tokens[] = tokenizer.tokenize(sentences[index]);
                Span nameSpans[] = nameFinder.find(tokens);
                for (Span span : nameSpans) {
                    list.add("Sentence: " + index
                            + " Span: " + span.toString() + " Entity: "
                            + tokens[span.getStart()]);
                }
            }
        }
        System.out.println("Multiple Entities");
        for (String element : list) {
            System.out.println(element);
        }
    } catch (Exception ex) {
        ex.printStackTrace();
    }
}

Source File: NERScorer.java From uncc2014watsonsim with GNU General Public License v2.0

5 votes

public Parse[] parsePassageText(String p) throws InvalidFormatException{
	if (!modelsAreInitialized)init();
	//initialize 	 
	SentenceDetectorME sentenceDetector = new SentenceDetectorME(this.sentenceModel);
	NameFinderME nameFinder = new NameFinderME(this.nerModel);
	Parser parser = ParserFactory.create(
			this.parserModel,
			20, // beam size
			0.95); // advance percentage
	//find sentences, tokenize each, parse each, return top parse for each 	 	 
	String[] sentences = sentenceDetector.sentDetect(p);
	Parse[] results = new Parse[sentences.length];
	for (int i=0;i<sentences.length;i++){
		//String[] tks = SimpleTokenizer.INSTANCE.tokenize(sentences[i]);
		
		//StringTokenizer st = new StringTokenizer(tks[i]); 
		//There are several tokenizers available. SimpleTokenizer works best
		Tokenizer tokenizer = SimpleTokenizer.INSTANCE;
		for (int si = 0; si < sentences.length; si++) {
	        Span[] tokenSpans = tokenizer.tokenizePos(sentences[si]);
	        String[] tokens = Span.spansToStrings(tokenSpans, sentences[si]);
	        Span[] names = nameFinder.find(tokens);
	        for (int ni = 0; ni < names.length; ni++) {
	            Span startSpan = tokenSpans[names[ni].getStart()];
	            int nameStart = startSpan.getStart();
	            Span endSpan = tokenSpans[names[ni].getEnd() - 1];
	            int nameEnd = endSpan.getEnd();
	            String name = sentences[si].substring(nameStart, nameEnd);
	            System.out.println(name);
	        }
	    }
		String sent= StringUtils.join(tokenizer," ");
		System.out.println("Found sentence " + sent);
		Parse[] sentResults = ParserTool.parseLine(sent,parser, 1);
		results[i]=sentResults[0];
	}
	return results;
}

Source File: BasicActions.java From knowledge-extraction with Apache License 2.0

5 votes

@Test
public void testNameFinder(){
	try (InputStream modelIn = BasicActions.class.getClassLoader()
				.getResourceAsStream(Consts.EN_NER_MODEL);){
		
		TokenNameFinderModel model = new TokenNameFinderModel(modelIn);
		NameFinderME nameFinder = new NameFinderME(model);
		Span nameSpans[] = nameFinder.find(testTokenizer());
		System.out.println(Arrays.toString(nameSpans));
		
	} catch (IOException e) {
		e.printStackTrace();
	}
}

Source File: NameFilter.java From wiseowl with MIT License

5 votes

public NameFilter(TokenStream in,String[] modelNames, NameFinderME[] finders) {
  super(in);
  this.tokenizer = SimpleTokenizer.INSTANCE;
  this.finders = finders;
  this.tokenTypeNames = new String[modelNames.length];
  for (int i=0; i < modelNames.length; i++) {
    tokenTypeNames[i] = NE_PREFIX + modelNames[i];
  }
}

Source File: OpenNlpNerRecommender.java From inception with Apache License 2.0

5 votes

private TokenNameFinderModel train(List<NameSample> aNameSamples,
        TrainingParameters aParameters)
    throws RecommendationException
{
    try (NameSampleStream stream = new NameSampleStream(aNameSamples)) {
        TokenNameFinderFactory finderFactory = new TokenNameFinderFactory();
        return NameFinderME.train("unknown", null, stream, aParameters, finderFactory);
    } catch (IOException e) {
        LOG.error("Exception during training the OpenNLP Named Entity Recognizer model.", e);
        throw new RecommendationException("Error while training OpenNLP pos", e);
    }
}

Source File: Chapter4.java From Natural-Language-Processing-with-Java-Second-Edition with MIT License

5 votes

private static void trainingOpenNLPNERModel() {
    try (OutputStream modelOutputStream = new BufferedOutputStream(
            new FileOutputStream(new File("modelFile")));) {
        ObjectStream<String> lineStream = new PlainTextByLineStream(
                new FileInputStream("en-ner-person.train"), "UTF-8");
        ObjectStream<NameSample> sampleStream = new NameSampleDataStream(lineStream);

        TokenNameFinderModel model = NameFinderME.train("en", "person", sampleStream,
                null, 100, 5);

        model.serialize(modelOutputStream);
    } catch (IOException ex) {
        ex.printStackTrace();
    }
}

Source File: Chapter4.java From Natural-Language-Processing-with-Java-Second-Edition with MIT License

5 votes

private static void usingMultipleNERModels() {
    // Models - en-ner-person.bin en-ner-location.bin en-ner-money.bin 
    // en-ner-organization.bin en-ner-time.bin
    try {
        InputStream tokenStream = new FileInputStream(
                new File(getModelDir(), "en-token.bin"));

        TokenizerModel tokenModel = new TokenizerModel(tokenStream);
        Tokenizer tokenizer = new TokenizerME(tokenModel);

        String modelNames[] = {"en-ner-person.bin", "en-ner-location.bin",
            "en-ner-organization.bin"};
        ArrayList<String> list = new ArrayList();
        for (String name : modelNames) {
            TokenNameFinderModel entityModel = new TokenNameFinderModel(
                    new FileInputStream(
                            new File(getModelDir(), name)));
            NameFinderME nameFinder = new NameFinderME(entityModel);
            for (int index = 0; index < sentences.length; index++) {
                String tokens[] = tokenizer.tokenize(sentences[index]);
                Span nameSpans[] = nameFinder.find(tokens);
                for (Span span : nameSpans) {
                    list.add("Sentence: " + index
                            + " Span: " + span.toString() + " Entity: "
                            + tokens[span.getStart()]);
                }
            }
        }
        System.out.println("Multiple Entities");
        for (String element : list) {
            System.out.println(element);
        }
    } catch (Exception ex) {
        ex.printStackTrace();
    }
}

Source File: NLPNERTaggerOp.java From lucene-solr with Apache License 2.0

4 votes

public NLPNERTaggerOp(TokenNameFinderModel model) {
  this.nameFinder = new NameFinderME(model);
}

Source File: OpenNlpService.java From elasticsearch-ingest-opennlp with Apache License 2.0

4 votes

static String createAnnotatedText(String content, List<ExtractedEntities> extractedEntities) {
    // these spans contain the real offset of each word in start/end variables!
    // the spans of the method argument contain the offset of each token, as mentioned in tokens!
    Span[] spansWithRealOffsets = SimpleTokenizer.INSTANCE.tokenizePos(content);

    List<Span> spansList = new ArrayList<>();
    extractedEntities.stream()
            .map(ExtractedEntities::getSpans)
            .forEach(s -> spansList.addAll(Arrays.asList(s)));

    Span[] spans = NameFinderME.dropOverlappingSpans(spansList.toArray(new Span[0]));
    String[] tokens = extractedEntities.get(0).getTokens();

    // shortcut if there is no enrichment to be done
    if (spans.length == 0) {
        return content;
    }

    StringBuilder builder = new StringBuilder();
    for (int i = 0; i < tokens.length; i++) {
        final int idx = i;
        String token = tokens[i];

        final Optional<Span> optionalSpan = Arrays.stream(spans).filter(s -> s.getStart() == idx).findFirst();
        if (optionalSpan.isPresent()) {
            Span span = optionalSpan.get();
            int start = span.getStart();
            int end = span.getEnd();
            String type = span.getType();

            String[] spanTokens = new String[end - start];
            int spanPosition = 0;
            for (int tokenPosition = start ; tokenPosition < end; tokenPosition++) {
                spanTokens[spanPosition++] = tokens[tokenPosition];
            }
            String entityString = Strings.arrayToDelimitedString(spanTokens, " ");

            builder.append("[");
            builder.append(entityString);
            builder.append("](");
            builder.append(Strings.capitalize(type));
            builder.append("_");
            builder.append(entityString);
            builder.append(")");
            i = end - 1;
        } else {
            builder.append(token);
        }

        // only append a whitespace, if the offsets actually differ
        if (i < tokens.length - 1) {
            if (spansWithRealOffsets[i].getEnd() != spansWithRealOffsets[i+1].getStart()) {
                builder.append(" ");
            }
        }
    }

    return builder.toString();
}

Source File: NameFinderFactory.java From wiseowl with MIT License

4 votes

/** Obtain a reference to the array of NameFinderME's loaded by the engine. 
 * @return
 */
public NameFinderME[] getNameFinders() {
  return finders;
}

Source File: OpenNlpNerRecommender.java From inception with Apache License 2.0

4 votes

@Override
public EvaluationResult evaluate(List<CAS> aCasses, DataSplitter aDataSplitter)
    throws RecommendationException
{
    List<NameSample> data = extractNameSamples(aCasses);
    List<NameSample> trainingSet = new ArrayList<>();
    List<NameSample> testSet = new ArrayList<>();

    for (NameSample nameSample : data) {
        switch (aDataSplitter.getTargetSet(nameSample)) {
        case TRAIN:
            trainingSet.add(nameSample);
            break;
        case TEST:
            testSet.add(nameSample);
            break;
        default:
            // Do nothing
            break;
        }            
    }
    
    int testSetSize = testSet.size();
    int trainingSetSize = trainingSet.size();
    double overallTrainingSize = data.size() - testSetSize;
    double trainRatio = (overallTrainingSize > 0) ? trainingSetSize / overallTrainingSize : 0.0;

    if (trainingSetSize < 2 || testSetSize < 2) {
        String info = String.format(
                "Not enough evaluation data: training set [%s] items, test set [%s] of total [%s]",
                trainingSetSize, testSetSize, data.size());
        LOG.info(info);
        
        EvaluationResult result = new EvaluationResult(trainingSetSize,
                testSetSize, trainRatio);
        result.setEvaluationSkipped(true);
        result.setErrorMsg(info);
        return result;
    }

    LOG.info("Training on [{}] items, predicting on [{}] of total [{}]", trainingSet.size(),
            testSet.size(), data.size());

    // Train model
    TokenNameFinderModel model = train(trainingSet, traits.getParameters());
    NameFinderME nameFinder = new NameFinderME(model);

    // Evaluate
    List<LabelPair> labelPairs = new ArrayList<>();
    for (NameSample sample : testSet) {
        // clear adaptive data from feature generators if necessary
        if (sample.isClearAdaptiveDataSet()) {
            nameFinder.clearAdaptiveData();
        }

        // Span contains one NE, Array of them all in one sentence
        String[] sentence = sample.getSentence();
        Span[] predictedNames = nameFinder.find(sentence);
        Span[] goldNames = sample.getNames();

        labelPairs.addAll(determineLabelsForASentence(sentence, predictedNames,
                goldNames));

    }

    return labelPairs.stream().collect(EvaluationResult
            .collector(trainingSetSize, testSetSize, trainRatio, NO_NE_TAG));
}

Source File: OpenNlpNerRecommender.java From inception with Apache License 2.0

4 votes

@Override
public void predict(RecommenderContext aContext, CAS aCas) throws RecommendationException
{
    TokenNameFinderModel model = aContext.get(KEY_MODEL).orElseThrow(() -> 
            new RecommendationException("Key [" + KEY_MODEL + "] not found in context"));
    
    NameFinderME finder = new NameFinderME(model);

    Type sentenceType = getType(aCas, Sentence.class);
    Type tokenType = getType(aCas, Token.class);
    Type predictedType = getPredictedType(aCas);

    Feature predictedFeature = getPredictedFeature(aCas);
    Feature isPredictionFeature = getIsPredictionFeature(aCas);
    Feature scoreFeature = getScoreFeature(aCas);

    int predictionCount = 0;
    for (AnnotationFS sentence : select(aCas, sentenceType)) {
        if (predictionCount >= traits.getPredictionLimit()) {
            break;
        }
        predictionCount++;
        
        List<AnnotationFS> tokenAnnotations = selectCovered(tokenType, sentence);
        String[] tokens = tokenAnnotations.stream()
            .map(AnnotationFS::getCoveredText)
            .toArray(String[]::new);

        for (Span prediction : finder.find(tokens)) {
            String label = prediction.getType();
            if (NameSample.DEFAULT_TYPE.equals(label)) {
                continue;
            }
            int begin = tokenAnnotations.get(prediction.getStart()).getBegin();
            int end = tokenAnnotations.get(prediction.getEnd() - 1).getEnd();
            AnnotationFS annotation = aCas.createAnnotation(predictedType, begin, end);
            annotation.setStringValue(predictedFeature, label);
            annotation.setDoubleValue(scoreFeature, prediction.getProb());
            annotation.setBooleanValue(isPredictionFeature, true);

            aCas.addFsToIndexes(annotation);
        }
    }
}

Source File: NERDemo.java From Natural-Language-Processing-with-Java-Second-Edition with MIT License

4 votes

public static void main(String args[]){
    String sentences[] = {"Joe was the last person to see Fred. ", 
        "He saw him in Boston at McKenzie's pub at 3:00 where he " 
        + " paid $2.45 for an ale. ", 
        "Joe wanted to go to Vermont for the day to visit a cousin who " 
        + "works at IBM, but Sally and he had to look for Fred"}; 
    String sentence = "He was the last person to see Fred."; 
    try
    {
        InputStream tokenStream = new FileInputStream(new File(getResourcePath()+ "en-token.bin"));
        InputStream modelStream = new FileInputStream(new File(getResourcePath() + "en-ner-person.bin"));
        TokenizerModel tokenModel = new TokenizerModel(tokenStream);
        Tokenizer tokenizer = new TokenizerME(tokenModel);
        TokenNameFinderModel entityModel = new TokenNameFinderModel(modelStream);
        NameFinderME nameFinder = new NameFinderME(entityModel);
        String tokens1[] = tokenizer.tokenize(sentence);
        Span nameSpans1[] = nameFinder.find(tokens1);
        for (int i = 0; i < nameSpans1.length; i++) { 
            System.out.println("Span: " + nameSpans1[i].toString()); 
            System.out.println("Entity: " 
                + tokens1[nameSpans1[i].getStart()]); 
        } 
        
        System.out.println("---------- Multiple Sentences -----------");
        for (String sentence1 : sentences) { 
            String tokens[] = tokenizer.tokenize(sentence1); 
            Span nameSpans[] = nameFinder.find(tokens); 
            for (int i = 0; i < nameSpans.length; i++) { 
                System.out.println("Span: " + nameSpans[i].toString()); 
                System.out.println("Entity: "  
                    + tokens[nameSpans[i].getStart()]); 
            } 
            System.out.println(); 
        } 
        
    }
    catch(Exception e){
        System.out.println(e);
    }
}

Source File: Discoverer.java From DataDefender with Apache License 2.0

4 votes

private Model createModelFrom(TokenNameFinderModel tnf, String modelName) {
    NameFinderME nameFinder = new NameFinderME(tnf);
    return new Model(tokenizer, nameFinder, modelName);
}

Source File: Model.java From DataDefender with Apache License 2.0

4 votes

public Model(final Tokenizer tokenizer, final NameFinderME nameFinder, final String name) {
    this.name       = name;
    this.tokenizer  = tokenizer;
    this.nameFinder = nameFinder;
}

Source File: Model.java From DataDefender with Apache License 2.0

4 votes

public NameFinderME getNameFinder() {
    return this.nameFinder;
}

Source File: NETagger.java From OpenEphyra with GNU General Public License v2.0

4 votes

/**
 * Performs named entity tagging on an array of full parses of sentences.
 * 
 * @param parses array of full parses of sentences
 */
// TODO only works with OpenNLP taggers so far
@SuppressWarnings("unchecked")
public static void tagNes(Parse[] parses) {
	String[] results = new String[parses.length];
	for (int s = 0; s < results.length; s++) results[s] = "";
	
	// initialize prevTokenMaps
	Map[] prevTokenMaps = new HashMap[finders.length];
	for (int i = 0; i < finders.length; i++)
		prevTokenMaps[i] = new HashMap();
	
	for (Parse parse : parses) {
		// get tokens
		Parse[] tokens = parse.getTagNodes();
		
		// find named entites
		String[][] finderTags = new String[finders.length][];
		for (int i = 0; i < finders.length; i++)
			finderTags[i] = finders[i].find(tokens, prevTokenMaps[i]);
		
		// update prevTokenMaps
		for (int i = 0; i < prevTokenMaps.length; i++)
			for (int j = 0; j < tokens.length; j++)
				prevTokenMaps[i].put(tokens[j], finderTags[i][j]);
		
		for (int i = 0; i < finders.length; i++) {
			int start = -1;
			List<Span> names = new ArrayList<Span>(5);
			
			// determine spans of tokens that are named entities
			for (int j = 0; j < tokens.length; j++) {
				if ((finderTags[i][j].equals(NameFinderME.START) ||
					 finderTags[i][j].equals(NameFinderME.OTHER))) {
					if (start != -1) names.add(new Span(start, j - 1));
					start = -1;
				}
				if (finderTags[i][j].equals(NameFinderME.START)) start = j;
			}
			if (start != -1) names.add(new Span(start, tokens.length - 1));
			
			// add name entity information to parse
			addNames(finderNames[i], names, tokens);
		}
    }
}

opennlp.tools.namefind.NameFinderME Java Examples