Java Code Examples for opennlp.tools.namefind.NameFinderME#find()
The following examples show how to use
opennlp.tools.namefind.NameFinderME#find() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: Chapter1.java From Natural-Language-Processing-with-Java-Second-Edition with MIT License | 6 votes |
private static void nameFinderExample() { try { String[] sentences = { "Tim was a good neighbor. Perhaps not as good a Bob " + "Haywood, but still pretty good. Of course Mr. Adam " + "took the cake!"}; Tokenizer tokenizer = SimpleTokenizer.INSTANCE; TokenNameFinderModel model = new TokenNameFinderModel(new File( "C:\\OpenNLP Models", "en-ner-person.bin")); NameFinderME finder = new NameFinderME(model); for (String sentence : sentences) { // Split the sentence into tokens String[] tokens = tokenizer.tokenize(sentence); // Find the names in the tokens and return Span objects Span[] nameSpans = finder.find(tokens); // Print the names extracted from the tokens using the Span data System.out.println(Arrays.toString( Span.spansToStrings(nameSpans, tokens))); } } catch (IOException ex) { ex.printStackTrace(); } }
Example 2
Source File: TestNER.java From Mutters with Apache License 2.0 | 6 votes |
@Test public void testPersonNER() throws Exception { URL modelUrl = Thread.currentThread().getContextClassLoader().getResource("models/en-ner-persons.bin"); assertThat(modelUrl, is(notNullValue())); TokenNameFinderModel model = new TokenNameFinderModel(modelUrl); assertThat(model, is(notNullValue())); NameFinderME nameFinder = new NameFinderME(model); String[] tokens = SimpleTokenizer.INSTANCE .tokenize("Mr. John Smith of New York, married Anne Green of London today."); assertThat(tokens.length, is(15)); Span[] spans = nameFinder.find(tokens); assertThat(spans.length, is(2)); String[] names = Span.spansToStrings(spans, tokens); assertThat(names.length, is(2)); assertThat(names[0], is("John Smith")); assertThat(names[1], is("Anne Green")); }
Example 3
Source File: TestNER.java From Mutters with Apache License 2.0 | 6 votes |
@Test public void testLocationNER() throws Exception { URL modelUrl = Thread.currentThread().getContextClassLoader().getResource("models/en-ner-locations.bin"); assertThat(modelUrl, is(notNullValue())); TokenNameFinderModel model = new TokenNameFinderModel(modelUrl); assertThat(model, is(notNullValue())); NameFinderME nameFinder = new NameFinderME(model); String[] tokens = SimpleTokenizer.INSTANCE .tokenize("Mr. John Smith of New York, married Anne Green of London today."); assertThat(tokens.length, is(15)); Span[] spans = nameFinder.find(tokens); assertThat(spans.length, is(2)); String[] locations = Span.spansToStrings(spans, tokens); assertThat(locations.length, is(2)); assertThat(locations[0], is("New York")); assertThat(locations[1], is("London")); }
Example 4
Source File: TestNER.java From Mutters with Apache License 2.0 | 6 votes |
@Test public void testDateNER() throws Exception { URL modelUrl = Thread.currentThread().getContextClassLoader().getResource("models/en-ner-dates.bin"); assertThat(modelUrl, is(notNullValue())); TokenNameFinderModel model = new TokenNameFinderModel(modelUrl); assertThat(model, is(notNullValue())); NameFinderME nameFinder = new NameFinderME(model); String[] tokens = SimpleTokenizer.INSTANCE .tokenize("Mr. John Smith of New York, married Anne Green of London today."); assertThat(tokens.length, is(15)); Span[] spans = nameFinder.find(tokens); assertThat(spans.length, is(1)); String[] locations = Span.spansToStrings(spans, tokens); assertThat(locations.length, is(1)); assertThat(locations[0], is("today")); }
Example 5
Source File: TestNER.java From Mutters with Apache License 2.0 | 6 votes |
@Test public void testAddressNER() throws Exception { URL modelUrl = Thread.currentThread().getContextClassLoader().getResource("models/en-ner-address.bin"); assertThat(modelUrl, is(notNullValue())); TokenNameFinderModel model = new TokenNameFinderModel(modelUrl); assertThat(model, is(notNullValue())); NameFinderME nameFinder = new NameFinderME(model); String[] tokens = SimpleTokenizer.INSTANCE.tokenize("Send a taxi to 12 Pleasent Street"); Span[] spans = nameFinder.find(tokens); assertThat(spans.length, is(1)); String[] locations = Span.spansToStrings(spans, tokens); assertThat(locations.length, is(1)); assertThat(locations[0], is("12 Pleasent Street")); }
Example 6
Source File: Chapter4.java From Natural-Language-Processing-with-Java-Second-Edition with MIT License | 5 votes |
private static void usingMultipleNERModels() { // Models - en-ner-person.bin en-ner-location.bin en-ner-money.bin // en-ner-organization.bin en-ner-time.bin try { InputStream tokenStream = new FileInputStream( new File(getModelDir(), "en-token.bin")); TokenizerModel tokenModel = new TokenizerModel(tokenStream); Tokenizer tokenizer = new TokenizerME(tokenModel); String modelNames[] = {"en-ner-person.bin", "en-ner-location.bin", "en-ner-organization.bin"}; ArrayList<String> list = new ArrayList(); for (String name : modelNames) { TokenNameFinderModel entityModel = new TokenNameFinderModel( new FileInputStream( new File(getModelDir(), name))); NameFinderME nameFinder = new NameFinderME(entityModel); for (int index = 0; index < sentences.length; index++) { String tokens[] = tokenizer.tokenize(sentences[index]); Span nameSpans[] = nameFinder.find(tokens); for (Span span : nameSpans) { list.add("Sentence: " + index + " Span: " + span.toString() + " Entity: " + tokens[span.getStart()]); } } } System.out.println("Multiple Entities"); for (String element : list) { System.out.println(element); } } catch (Exception ex) { ex.printStackTrace(); } }
Example 7
Source File: Chapter4.java From Natural-Language-Processing-with-Java-Second-Edition with MIT License | 5 votes |
private static void usingMultipleNERModels() { // Models - en-ner-person.bin en-ner-location.bin en-ner-money.bin // en-ner-organization.bin en-ner-time.bin try { InputStream tokenStream = new FileInputStream( new File(getModelDir(), "en-token.bin")); TokenizerModel tokenModel = new TokenizerModel(tokenStream); Tokenizer tokenizer = new TokenizerME(tokenModel); String modelNames[] = {"en-ner-person.bin", "en-ner-location.bin", "en-ner-organization.bin"}; ArrayList<String> list = new ArrayList(); for (String name : modelNames) { TokenNameFinderModel entityModel = new TokenNameFinderModel( new FileInputStream( new File(getModelDir(), name))); NameFinderME nameFinder = new NameFinderME(entityModel); for (int index = 0; index < sentences.length; index++) { String tokens[] = tokenizer.tokenize(sentences[index]); Span nameSpans[] = nameFinder.find(tokens); for (Span span : nameSpans) { list.add("Sentence: " + index + " Span: " + span.toString() + " Entity: " + tokens[span.getStart()]); } } } System.out.println("Multiple Entities"); for (String element : list) { System.out.println(element); } } catch (Exception ex) { ex.printStackTrace(); } }
Example 8
Source File: BasicActions.java From knowledge-extraction with Apache License 2.0 | 5 votes |
@Test public void testNameFinder(){ try (InputStream modelIn = BasicActions.class.getClassLoader() .getResourceAsStream(Consts.EN_NER_MODEL);){ TokenNameFinderModel model = new TokenNameFinderModel(modelIn); NameFinderME nameFinder = new NameFinderME(model); Span nameSpans[] = nameFinder.find(testTokenizer()); System.out.println(Arrays.toString(nameSpans)); } catch (IOException e) { e.printStackTrace(); } }
Example 9
Source File: NERScorer.java From uncc2014watsonsim with GNU General Public License v2.0 | 5 votes |
public Parse[] parsePassageText(String p) throws InvalidFormatException{ if (!modelsAreInitialized)init(); //initialize SentenceDetectorME sentenceDetector = new SentenceDetectorME(this.sentenceModel); NameFinderME nameFinder = new NameFinderME(this.nerModel); Parser parser = ParserFactory.create( this.parserModel, 20, // beam size 0.95); // advance percentage //find sentences, tokenize each, parse each, return top parse for each String[] sentences = sentenceDetector.sentDetect(p); Parse[] results = new Parse[sentences.length]; for (int i=0;i<sentences.length;i++){ //String[] tks = SimpleTokenizer.INSTANCE.tokenize(sentences[i]); //StringTokenizer st = new StringTokenizer(tks[i]); //There are several tokenizers available. SimpleTokenizer works best Tokenizer tokenizer = SimpleTokenizer.INSTANCE; for (int si = 0; si < sentences.length; si++) { Span[] tokenSpans = tokenizer.tokenizePos(sentences[si]); String[] tokens = Span.spansToStrings(tokenSpans, sentences[si]); Span[] names = nameFinder.find(tokens); for (int ni = 0; ni < names.length; ni++) { Span startSpan = tokenSpans[names[ni].getStart()]; int nameStart = startSpan.getStart(); Span endSpan = tokenSpans[names[ni].getEnd() - 1]; int nameEnd = endSpan.getEnd(); String name = sentences[si].substring(nameStart, nameEnd); System.out.println(name); } } String sent= StringUtils.join(tokenizer," "); System.out.println("Found sentence " + sent); Parse[] sentResults = ParserTool.parseLine(sent,parser, 1); results[i]=sentResults[0]; } return results; }
Example 10
Source File: NERDemo.java From Natural-Language-Processing-with-Java-Second-Edition with MIT License | 4 votes |
public static void main(String args[]){ String sentences[] = {"Joe was the last person to see Fred. ", "He saw him in Boston at McKenzie's pub at 3:00 where he " + " paid $2.45 for an ale. ", "Joe wanted to go to Vermont for the day to visit a cousin who " + "works at IBM, but Sally and he had to look for Fred"}; String sentence = "He was the last person to see Fred."; try { InputStream tokenStream = new FileInputStream(new File(getResourcePath()+ "en-token.bin")); InputStream modelStream = new FileInputStream(new File(getResourcePath() + "en-ner-person.bin")); TokenizerModel tokenModel = new TokenizerModel(tokenStream); Tokenizer tokenizer = new TokenizerME(tokenModel); TokenNameFinderModel entityModel = new TokenNameFinderModel(modelStream); NameFinderME nameFinder = new NameFinderME(entityModel); String tokens1[] = tokenizer.tokenize(sentence); Span nameSpans1[] = nameFinder.find(tokens1); for (int i = 0; i < nameSpans1.length; i++) { System.out.println("Span: " + nameSpans1[i].toString()); System.out.println("Entity: " + tokens1[nameSpans1[i].getStart()]); } System.out.println("---------- Multiple Sentences -----------"); for (String sentence1 : sentences) { String tokens[] = tokenizer.tokenize(sentence1); Span nameSpans[] = nameFinder.find(tokens); for (int i = 0; i < nameSpans.length; i++) { System.out.println("Span: " + nameSpans[i].toString()); System.out.println("Entity: " + tokens[nameSpans[i].getStart()]); } System.out.println(); } } catch(Exception e){ System.out.println(e); } }
Example 11
Source File: OpenNlpNerRecommender.java From inception with Apache License 2.0 | 4 votes |
@Override public void predict(RecommenderContext aContext, CAS aCas) throws RecommendationException { TokenNameFinderModel model = aContext.get(KEY_MODEL).orElseThrow(() -> new RecommendationException("Key [" + KEY_MODEL + "] not found in context")); NameFinderME finder = new NameFinderME(model); Type sentenceType = getType(aCas, Sentence.class); Type tokenType = getType(aCas, Token.class); Type predictedType = getPredictedType(aCas); Feature predictedFeature = getPredictedFeature(aCas); Feature isPredictionFeature = getIsPredictionFeature(aCas); Feature scoreFeature = getScoreFeature(aCas); int predictionCount = 0; for (AnnotationFS sentence : select(aCas, sentenceType)) { if (predictionCount >= traits.getPredictionLimit()) { break; } predictionCount++; List<AnnotationFS> tokenAnnotations = selectCovered(tokenType, sentence); String[] tokens = tokenAnnotations.stream() .map(AnnotationFS::getCoveredText) .toArray(String[]::new); for (Span prediction : finder.find(tokens)) { String label = prediction.getType(); if (NameSample.DEFAULT_TYPE.equals(label)) { continue; } int begin = tokenAnnotations.get(prediction.getStart()).getBegin(); int end = tokenAnnotations.get(prediction.getEnd() - 1).getEnd(); AnnotationFS annotation = aCas.createAnnotation(predictedType, begin, end); annotation.setStringValue(predictedFeature, label); annotation.setDoubleValue(scoreFeature, prediction.getProb()); annotation.setBooleanValue(isPredictionFeature, true); aCas.addFsToIndexes(annotation); } } }
Example 12
Source File: OpenNlpNerRecommender.java From inception with Apache License 2.0 | 4 votes |
@Override public EvaluationResult evaluate(List<CAS> aCasses, DataSplitter aDataSplitter) throws RecommendationException { List<NameSample> data = extractNameSamples(aCasses); List<NameSample> trainingSet = new ArrayList<>(); List<NameSample> testSet = new ArrayList<>(); for (NameSample nameSample : data) { switch (aDataSplitter.getTargetSet(nameSample)) { case TRAIN: trainingSet.add(nameSample); break; case TEST: testSet.add(nameSample); break; default: // Do nothing break; } } int testSetSize = testSet.size(); int trainingSetSize = trainingSet.size(); double overallTrainingSize = data.size() - testSetSize; double trainRatio = (overallTrainingSize > 0) ? trainingSetSize / overallTrainingSize : 0.0; if (trainingSetSize < 2 || testSetSize < 2) { String info = String.format( "Not enough evaluation data: training set [%s] items, test set [%s] of total [%s]", trainingSetSize, testSetSize, data.size()); LOG.info(info); EvaluationResult result = new EvaluationResult(trainingSetSize, testSetSize, trainRatio); result.setEvaluationSkipped(true); result.setErrorMsg(info); return result; } LOG.info("Training on [{}] items, predicting on [{}] of total [{}]", trainingSet.size(), testSet.size(), data.size()); // Train model TokenNameFinderModel model = train(trainingSet, traits.getParameters()); NameFinderME nameFinder = new NameFinderME(model); // Evaluate List<LabelPair> labelPairs = new ArrayList<>(); for (NameSample sample : testSet) { // clear adaptive data from feature generators if necessary if (sample.isClearAdaptiveDataSet()) { nameFinder.clearAdaptiveData(); } // Span contains one NE, Array of them all in one sentence String[] sentence = sample.getSentence(); Span[] predictedNames = nameFinder.find(sentence); Span[] goldNames = sample.getNames(); labelPairs.addAll(determineLabelsForASentence(sentence, predictedNames, goldNames)); } return labelPairs.stream().collect(EvaluationResult .collector(trainingSetSize, testSetSize, trainRatio, NO_NE_TAG)); }