org.grobid.core.GrobidModels Java Exaples

Source File: NERFrParser.java From grobid-ner with Apache License 2.0

6 votes

/**
 * Extract all occurrences of named entities from a list of LayoutToken
 * coming from a document with fixed/preserved layout, e.g. PDF. 
 * The positions of the recognized entities are given with coordinates in 
 * the input document.
 */
public List<Entity> extractNE(List<LayoutToken> tokens) {
    if (tokens == null)
        return null;

    LexiconPositionsIndexes positionsIndexes = new LexiconPositionsIndexes(lexicon);
    positionsIndexes.computeIndexes(tokens);

    String res = NERParserCommon.toFeatureVectorLayout(tokens, positionsIndexes);
    String result = label(res);
    //List<Pair<String, String>> labeled = GenericTaggerUtils.getTokensAndLabels(result);

    //String text = LayoutTokensUtil.toText(tokens);
    List<Entity> entities = nerParserCommon.resultExtraction(GrobidModels.ENTITIES_NERFR, result, tokens);

    // we use now the sense tagger for the recognized named entity
    //List<Sense> senses = senseTagger.extractSenses(labeled, tokens, positionsIndexes);

    //NERParserCommon.merge(entities, senses);

    return entities;
}

Source File: NEREnParser.java From grobid-ner with Apache License 2.0

6 votes

/**
 * Extract all occurrences of named entities from a list of LayoutToken
 * coming from a document with fixed/preserved layout, e.g. PDF. 
 * The positions of the recognized entities are given with coordinates in 
 * the input document.
 */
public List<Entity> extractNE(List<LayoutToken> tokens) {
    if (tokens == null)
        return null;
    
    LexiconPositionsIndexes positionsIndexes = new LexiconPositionsIndexes(lexicon);
    positionsIndexes.computeIndexes(tokens);

    String res = NERParserCommon.toFeatureVectorLayout(tokens, positionsIndexes);
    String result = label(res);
    //List<Pair<String, String>> labeled = GenericTaggerUtils.getTokensAndLabels(result);

    //String text = LayoutTokensUtil.toText(tokens);
    List<Entity> entities = nerParserCommon.resultExtraction(GrobidModels.ENTITIES_NER, result, tokens);

    // we use now the sense tagger for the recognized named entity
    //List<Sense> senses = senseTagger.extractSenses(labeled, tokens, positionsIndexes);

    //NERParserCommon.merge(entities, senses);

    return entities;
}

Source File: NERParserCommonTest.java From grobid-ner with Apache License 2.0

6 votes

@Test
public void testresultExtraction_clusteror_simple2() throws Exception {
    final String input = "Austria Hungary fought the enemies with Germany.";
    String result = "Austria\taustria\tA\tAu\tAus\tAust\tAustr\ta\tia\tria\ttria\tstria\tINITCAP\tNODIGIT\t0\t0\t0\t0\t1\t0\t0\t1\t1\t1\t1\tXxxx\tXx\t0\tB-LOCATION\n" +
            "Hungary\thungary\tA\tAu\tAus\tAust\tAustr\ta\tia\tria\ttria\tstria\tINITCAP\tNODIGIT\t0\t0\t0\t0\t1\t0\t0\t1\t1\t1\t1\tXxxx\tXx\t0\tLOCATION\n" +
            "fought\tfought\tf\tfo\tfou\tfoug\tfough\tt\tht\tght\tught\tought\tNOCAPS\tNODIGIT\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\txxxx\tx\t0\tO\n" +
            "the\tthe\tt\tth\tthe\tthe\tthe\te\the\tthe\tthe\tthe\tNOCAPS\tNODIGIT\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\txxx\tx\t0\tO\n" +
            "enemies\tenemies\te\ten\tene\tenem\tenemi\ts\tes\ties\tmies\temies\tNOCAPS\tNODIGIT\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\txxxx\tx\t0\tO\n" +
            "with\twith\tw\twi\twit\twith\twith\th\tth\tith\twith\twith\tNOCAPS\tNODIGIT\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\txxxx\tx\t0\tO\n" +
            "Germany\tgermany\tG\tGe\tGer\tGerm\tGerma\ty\tny\tany\tmany\trmany\tINITCAP\tNODIGIT\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\tXxxx\tXx\t0\tB-LOCATION\n" +
            ".\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\tALLCAPS\tNODIGIT\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t.\t.\t0\tO";
    List<LayoutToken> tokenisation = GrobidAnalyzer.getInstance().tokenizeWithLayoutToken(input);


    final List<Entity> entities = target.resultExtraction(GrobidModels.ENTITIES_NER, result, tokenisation);

    assertThat(entities, hasSize(2));
    assertThat(entities.get(0).getRawName(), is("Austria Hungary"));
    assertThat(entities.get(0).getType(), is(LOCATION));
    assertThat(entities.get(0).getOffsetStart(), is(0));
    assertThat(entities.get(0).getOffsetEnd(), is(15));
    assertThat(input.substring(entities.get(0).getOffsetStart(), entities.get(0).getOffsetEnd()), is("Austria Hungary"));
}

Source File: NERFrenchTrainer.java From grobid-ner with Apache License 2.0

5 votes

public NERFrenchTrainer() {
    super(GrobidModels.ENTITIES_NERFR);

    // adjusting CRF training parameters for this model
    this.epsilon = 0.000001;
    this.window = 20;
    this.nbMaxIterations = 1000;

    // read additional properties for this sub-project to get the paths to the resources
    Properties prop = new Properties();
    InputStream input = null;
    try {
        input = new FileInputStream("src/main/resources/grobid-ner.properties");

        // load the properties file
        prop.load(input);

        // get the property value
        leMondeCorpusPath = prop.getProperty("grobid.ner.leMondeCorpus.path");
    } catch (IOException ex) {
        throw new GrobidResourceException(
                "An exception occured when accessing/reading the grobid-ner property file.", ex);
    } finally {
        if (input != null) {
            try {
                input.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
}

Source File: SenseTrainer.java From grobid-ner with Apache License 2.0

5 votes

public SenseTrainer() {
    super(GrobidModels.ENTITIES_NERSense);
    descriptions = new TreeMap<String, String>();

    // we read first the module specific property file to get the paths to the resources
    Properties prop = new Properties();
    InputStream input = null;

    try {
        input = new FileInputStream("src/main/resources/grobid-ner.properties");

        // load the properties file
        prop.load(input);

        // get the property value
        reutersPath = prop.getProperty("grobid.ner.reuters.paths");
        conllPath = prop.getProperty("grobid.ner.reuters.conll_path");
        idiliaPath = prop.getProperty("grobid.ner.reuters.idilia_path");
        nerCorpusPath = prop.getProperty("grobid.ner.extra_corpus");
    } catch (IOException ex) {
        throw new GrobidResourceException(
                "An exception occured when accessing/reading the grobid-ner property file.", ex);
    } finally {
        if (input != null) {
            try {
                input.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
}

Source File: NERTrainer.java From grobid-ner with Apache License 2.0

5 votes

public NERTrainer() {
    super(GrobidModels.ENTITIES_NER);

    // adjusting CRF training parameters for this model
    this.epsilon = 0.000001;
    this.window = 20;
    this.nbMaxIterations = 200;

    // read additional properties for this sub-project to get the paths to the resources
    Properties prop = new Properties();
    InputStream input = null;
    try {
        input = new FileInputStream("src/main/resources/grobid-ner.properties");

        // load the properties file
        prop.load(input);

        // get the property value
        reutersPath = prop.getProperty("grobid.ner.reuters.paths");
        idiliaPath = prop.getProperty("grobid.ner.reuters.idilia_path");
        nerCorpusPath = prop.getProperty("grobid.ner.extra_corpus");
    } catch (IOException ex) {
        throw new GrobidResourceException(
                "An exception occured when accessing/reading the grobid-ner property file.", ex);
    } finally {
        IOUtils.closeQuietly(input);
    }
}

Source File: NERParserCommonTest.java From grobid-ner with Apache License 2.0

5 votes

@Test
public void testresultExtraction_clusteror_simple() throws Exception {
    final String input = "Austria fought the enemies with Germany.";
    String result = "Austria\taustria\tA\tAu\tAus\tAust\tAustr\ta\tia\tria\ttria\tstria\tINITCAP\tNODIGIT\t0\t0\t0\t0\t1\t0\t0\t1\t1\t1\t1\tXxxx\tXx\t0\tB-UNKNOWN\n" +
            "fought\tfought\tf\tfo\tfou\tfoug\tfough\tt\tht\tght\tught\tought\tNOCAPS\tNODIGIT\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\txxxx\tx\t0\tO\n" +
            "the\tthe\tt\tth\tthe\tthe\tthe\te\the\tthe\tthe\tthe\tNOCAPS\tNODIGIT\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\txxx\tx\t0\tO\n" +
            "enemies\tenemies\te\ten\tene\tenem\tenemi\ts\tes\ties\tmies\temies\tNOCAPS\tNODIGIT\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\txxxx\tx\t0\tO\n" +
            "with\twith\tw\twi\twit\twith\twith\th\tth\tith\twith\twith\tNOCAPS\tNODIGIT\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\txxxx\tx\t0\tO\n" +
            "Germany\tgermany\tG\tGe\tGer\tGerm\tGerma\ty\tny\tany\tmany\trmany\tINITCAP\tNODIGIT\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\tXxxx\tXx\t0\tB-LOCATION\n" +
            ".\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\tALLCAPS\tNODIGIT\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t.\t.\t0\tO";
    List<LayoutToken> tokenisation = GrobidAnalyzer.getInstance().tokenizeWithLayoutToken(input);


    final List<Entity> entities = target.resultExtraction(GrobidModels.ENTITIES_NER, result, tokenisation);

    assertThat(entities, hasSize(2));

    final Entity entity0 = entities.get(0);
    assertThat(entity0.getRawName(), is("Austria"));
    assertThat(entity0.getOffsetStart(), is(0));
    assertThat(entity0.getOffsetEnd(), is(7));

    final Entity entity1 = entities.get(1);
    assertThat(entity1.getRawName(), is("Germany"));
    assertThat(entity1.getOffsetStart(), is(32));
    assertThat(entity1.getOffsetEnd(), is(39));
}

Source File: NLPLeaderboardFigParser.java From science-result-extractor with Apache License 2.0

4 votes

NLPLeaderboardFigParser() {
    super(GrobidModels.FIGURE);
}

Source File: NEREvaluation.java From grobid-ner with Apache License 2.0

4 votes

public NEREvaluation() {
    GrobidProperties.getInstance();
    model = GrobidModels.ENTITIES_NER;
    loadAdditionalProperties();
}

Source File: SenseTagger.java From grobid-ner with Apache License 2.0

4 votes

public SenseTagger() {
    this(GrobidModels.ENTITIES_NERSense);
}

Source File: NEREnParser.java From grobid-ner with Apache License 2.0

4 votes

public NEREnParser() {
    this(GrobidModels.ENTITIES_NER);
}

Source File: NERFrParser.java From grobid-ner with Apache License 2.0

2 votes

public NERFrParser() {
    this(GrobidModels.ENTITIES_NERFR);

}

org.grobid.core.GrobidModels Java Examples