org.grobid.core.GrobidModels Java Examples
The following examples show how to use
org.grobid.core.GrobidModels.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: NERFrParser.java From grobid-ner with Apache License 2.0 | 6 votes |
/** * Extract all occurrences of named entities from a list of LayoutToken * coming from a document with fixed/preserved layout, e.g. PDF. * The positions of the recognized entities are given with coordinates in * the input document. */ public List<Entity> extractNE(List<LayoutToken> tokens) { if (tokens == null) return null; LexiconPositionsIndexes positionsIndexes = new LexiconPositionsIndexes(lexicon); positionsIndexes.computeIndexes(tokens); String res = NERParserCommon.toFeatureVectorLayout(tokens, positionsIndexes); String result = label(res); //List<Pair<String, String>> labeled = GenericTaggerUtils.getTokensAndLabels(result); //String text = LayoutTokensUtil.toText(tokens); List<Entity> entities = nerParserCommon.resultExtraction(GrobidModels.ENTITIES_NERFR, result, tokens); // we use now the sense tagger for the recognized named entity //List<Sense> senses = senseTagger.extractSenses(labeled, tokens, positionsIndexes); //NERParserCommon.merge(entities, senses); return entities; }
Example #2
Source File: NEREnParser.java From grobid-ner with Apache License 2.0 | 6 votes |
/** * Extract all occurrences of named entities from a list of LayoutToken * coming from a document with fixed/preserved layout, e.g. PDF. * The positions of the recognized entities are given with coordinates in * the input document. */ public List<Entity> extractNE(List<LayoutToken> tokens) { if (tokens == null) return null; LexiconPositionsIndexes positionsIndexes = new LexiconPositionsIndexes(lexicon); positionsIndexes.computeIndexes(tokens); String res = NERParserCommon.toFeatureVectorLayout(tokens, positionsIndexes); String result = label(res); //List<Pair<String, String>> labeled = GenericTaggerUtils.getTokensAndLabels(result); //String text = LayoutTokensUtil.toText(tokens); List<Entity> entities = nerParserCommon.resultExtraction(GrobidModels.ENTITIES_NER, result, tokens); // we use now the sense tagger for the recognized named entity //List<Sense> senses = senseTagger.extractSenses(labeled, tokens, positionsIndexes); //NERParserCommon.merge(entities, senses); return entities; }
Example #3
Source File: NERParserCommonTest.java From grobid-ner with Apache License 2.0 | 6 votes |
@Test public void testresultExtraction_clusteror_simple2() throws Exception { final String input = "Austria Hungary fought the enemies with Germany."; String result = "Austria\taustria\tA\tAu\tAus\tAust\tAustr\ta\tia\tria\ttria\tstria\tINITCAP\tNODIGIT\t0\t0\t0\t0\t1\t0\t0\t1\t1\t1\t1\tXxxx\tXx\t0\tB-LOCATION\n" + "Hungary\thungary\tA\tAu\tAus\tAust\tAustr\ta\tia\tria\ttria\tstria\tINITCAP\tNODIGIT\t0\t0\t0\t0\t1\t0\t0\t1\t1\t1\t1\tXxxx\tXx\t0\tLOCATION\n" + "fought\tfought\tf\tfo\tfou\tfoug\tfough\tt\tht\tght\tught\tought\tNOCAPS\tNODIGIT\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\txxxx\tx\t0\tO\n" + "the\tthe\tt\tth\tthe\tthe\tthe\te\the\tthe\tthe\tthe\tNOCAPS\tNODIGIT\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\txxx\tx\t0\tO\n" + "enemies\tenemies\te\ten\tene\tenem\tenemi\ts\tes\ties\tmies\temies\tNOCAPS\tNODIGIT\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\txxxx\tx\t0\tO\n" + "with\twith\tw\twi\twit\twith\twith\th\tth\tith\twith\twith\tNOCAPS\tNODIGIT\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\txxxx\tx\t0\tO\n" + "Germany\tgermany\tG\tGe\tGer\tGerm\tGerma\ty\tny\tany\tmany\trmany\tINITCAP\tNODIGIT\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\tXxxx\tXx\t0\tB-LOCATION\n" + ".\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\tALLCAPS\tNODIGIT\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t.\t.\t0\tO"; List<LayoutToken> tokenisation = GrobidAnalyzer.getInstance().tokenizeWithLayoutToken(input); final List<Entity> entities = target.resultExtraction(GrobidModels.ENTITIES_NER, result, tokenisation); assertThat(entities, hasSize(2)); assertThat(entities.get(0).getRawName(), is("Austria Hungary")); assertThat(entities.get(0).getType(), is(LOCATION)); assertThat(entities.get(0).getOffsetStart(), is(0)); assertThat(entities.get(0).getOffsetEnd(), is(15)); assertThat(input.substring(entities.get(0).getOffsetStart(), entities.get(0).getOffsetEnd()), is("Austria Hungary")); }
Example #4
Source File: NERFrenchTrainer.java From grobid-ner with Apache License 2.0 | 5 votes |
public NERFrenchTrainer() { super(GrobidModels.ENTITIES_NERFR); // adjusting CRF training parameters for this model this.epsilon = 0.000001; this.window = 20; this.nbMaxIterations = 1000; // read additional properties for this sub-project to get the paths to the resources Properties prop = new Properties(); InputStream input = null; try { input = new FileInputStream("src/main/resources/grobid-ner.properties"); // load the properties file prop.load(input); // get the property value leMondeCorpusPath = prop.getProperty("grobid.ner.leMondeCorpus.path"); } catch (IOException ex) { throw new GrobidResourceException( "An exception occured when accessing/reading the grobid-ner property file.", ex); } finally { if (input != null) { try { input.close(); } catch (IOException e) { e.printStackTrace(); } } } }
Example #5
Source File: SenseTrainer.java From grobid-ner with Apache License 2.0 | 5 votes |
public SenseTrainer() { super(GrobidModels.ENTITIES_NERSense); descriptions = new TreeMap<String, String>(); // we read first the module specific property file to get the paths to the resources Properties prop = new Properties(); InputStream input = null; try { input = new FileInputStream("src/main/resources/grobid-ner.properties"); // load the properties file prop.load(input); // get the property value reutersPath = prop.getProperty("grobid.ner.reuters.paths"); conllPath = prop.getProperty("grobid.ner.reuters.conll_path"); idiliaPath = prop.getProperty("grobid.ner.reuters.idilia_path"); nerCorpusPath = prop.getProperty("grobid.ner.extra_corpus"); } catch (IOException ex) { throw new GrobidResourceException( "An exception occured when accessing/reading the grobid-ner property file.", ex); } finally { if (input != null) { try { input.close(); } catch (IOException e) { e.printStackTrace(); } } } }
Example #6
Source File: NERTrainer.java From grobid-ner with Apache License 2.0 | 5 votes |
public NERTrainer() { super(GrobidModels.ENTITIES_NER); // adjusting CRF training parameters for this model this.epsilon = 0.000001; this.window = 20; this.nbMaxIterations = 200; // read additional properties for this sub-project to get the paths to the resources Properties prop = new Properties(); InputStream input = null; try { input = new FileInputStream("src/main/resources/grobid-ner.properties"); // load the properties file prop.load(input); // get the property value reutersPath = prop.getProperty("grobid.ner.reuters.paths"); idiliaPath = prop.getProperty("grobid.ner.reuters.idilia_path"); nerCorpusPath = prop.getProperty("grobid.ner.extra_corpus"); } catch (IOException ex) { throw new GrobidResourceException( "An exception occured when accessing/reading the grobid-ner property file.", ex); } finally { IOUtils.closeQuietly(input); } }
Example #7
Source File: NERParserCommonTest.java From grobid-ner with Apache License 2.0 | 5 votes |
@Test public void testresultExtraction_clusteror_simple() throws Exception { final String input = "Austria fought the enemies with Germany."; String result = "Austria\taustria\tA\tAu\tAus\tAust\tAustr\ta\tia\tria\ttria\tstria\tINITCAP\tNODIGIT\t0\t0\t0\t0\t1\t0\t0\t1\t1\t1\t1\tXxxx\tXx\t0\tB-UNKNOWN\n" + "fought\tfought\tf\tfo\tfou\tfoug\tfough\tt\tht\tght\tught\tought\tNOCAPS\tNODIGIT\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\txxxx\tx\t0\tO\n" + "the\tthe\tt\tth\tthe\tthe\tthe\te\the\tthe\tthe\tthe\tNOCAPS\tNODIGIT\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\txxx\tx\t0\tO\n" + "enemies\tenemies\te\ten\tene\tenem\tenemi\ts\tes\ties\tmies\temies\tNOCAPS\tNODIGIT\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\txxxx\tx\t0\tO\n" + "with\twith\tw\twi\twit\twith\twith\th\tth\tith\twith\twith\tNOCAPS\tNODIGIT\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\txxxx\tx\t0\tO\n" + "Germany\tgermany\tG\tGe\tGer\tGerm\tGerma\ty\tny\tany\tmany\trmany\tINITCAP\tNODIGIT\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\tXxxx\tXx\t0\tB-LOCATION\n" + ".\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\tALLCAPS\tNODIGIT\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t.\t.\t0\tO"; List<LayoutToken> tokenisation = GrobidAnalyzer.getInstance().tokenizeWithLayoutToken(input); final List<Entity> entities = target.resultExtraction(GrobidModels.ENTITIES_NER, result, tokenisation); assertThat(entities, hasSize(2)); final Entity entity0 = entities.get(0); assertThat(entity0.getRawName(), is("Austria")); assertThat(entity0.getOffsetStart(), is(0)); assertThat(entity0.getOffsetEnd(), is(7)); final Entity entity1 = entities.get(1); assertThat(entity1.getRawName(), is("Germany")); assertThat(entity1.getOffsetStart(), is(32)); assertThat(entity1.getOffsetEnd(), is(39)); }
Example #8
Source File: NLPLeaderboardFigParser.java From science-result-extractor with Apache License 2.0 | 4 votes |
NLPLeaderboardFigParser() { super(GrobidModels.FIGURE); }
Example #9
Source File: NEREvaluation.java From grobid-ner with Apache License 2.0 | 4 votes |
public NEREvaluation() { GrobidProperties.getInstance(); model = GrobidModels.ENTITIES_NER; loadAdditionalProperties(); }
Example #10
Source File: SenseTagger.java From grobid-ner with Apache License 2.0 | 4 votes |
public SenseTagger() { this(GrobidModels.ENTITIES_NERSense); }
Example #11
Source File: NEREnParser.java From grobid-ner with Apache License 2.0 | 4 votes |
public NEREnParser() { this(GrobidModels.ENTITIES_NER); }
Example #12
Source File: NERFrParser.java From grobid-ner with Apache License 2.0 | 2 votes |
public NERFrParser() { this(GrobidModels.ENTITIES_NERFR); }