Java Code Examples for de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity#setValue()

The following examples show how to use de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity#setValue() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SingleLabelAnnotator.java    From ambiverse-nlu with Apache License 2.0 6 votes vote down vote up
@Override
public void process(JCas aJCas) throws AnalysisEngineProcessException {

	Collection<NamedEntity> namedEntities = JCasUtil.select(aJCas, NamedEntity.class);
	for (NamedEntity entity : namedEntities) {
		entity.setValue(entity.getValue().replace("PERSON", "ENTITY")
				.replace("PER", "ENTITY")
				.replace("LOCATION", "ENTITY")
				.replace("LOC", "ENTITY")
				.replace("ORGANIZATION", "ENTITY")
				.replace("ORG", "ENTITY")
				.replace("MISC", "ENTITY"));
	}
	Collection<TextClassificationOutcome> outcomes = JCasUtil.select(aJCas, TextClassificationOutcome.class);
	for (TextClassificationOutcome outcome : outcomes) {
		if (outcome.getOutcome().equals("OTH")) {
			continue;
		}
		outcome.setOutcome("ENTITY");
	}
}
 
Example 2
Source File: Tcf2DKPro.java    From inception with Apache License 2.0 6 votes vote down vote up
public void convertNamedEntities(JCas aJCas, TextCorpus aCorpusData,
        Map<String, Token> aTokens)
{
    if (aCorpusData.getNamedEntitiesLayer() == null) {
        // No layer to read from.
        return;
    }

    for (int i = 0; i < aCorpusData.getNamedEntitiesLayer().size(); i++) {
        // get the named entity
        eu.clarin.weblicht.wlfxb.tc.api.NamedEntity entity = aCorpusData
                .getNamedEntitiesLayer().getEntity(i);

        eu.clarin.weblicht.wlfxb.tc.api.Token[] namedEntityTokens = aCorpusData
                .getNamedEntitiesLayer().getTokens(entity);

        NamedEntity outNamedEntity = new NamedEntity(aJCas);

        outNamedEntity.setBegin(getOffsets(namedEntityTokens, aTokens)[0]);
        outNamedEntity.setEnd(getOffsets(namedEntityTokens, aTokens)[1]);
        outNamedEntity.setValue(entity.getType());
        outNamedEntity.addToIndexes();
    }

}
 
Example 3
Source File: VisibilityCalculationTests.java    From inception with Apache License 2.0 6 votes vote down vote up
private CAS getTestCas() throws Exception
{
    String documentText = "Dies ist ein Testtext, ach ist der schoen, der schoenste von allen"
            + " Testtexten.";
    JCas jcas = JCasFactory.createText(documentText, "de");

    NamedEntity neLabel = new NamedEntity(jcas, 0, 3);
    neLabel.setValue("LOC");
    neLabel.addToIndexes();

    // the annotation's feature value is initialized as null
    NamedEntity neNoLabel = new NamedEntity(jcas, 13, 20);
    neNoLabel.addToIndexes();

    return jcas.getCas();
}
 
Example 4
Source File: DocumentAnnotations.java    From ambiverse-nlu with Apache License 2.0 5 votes vote down vote up
public void addMentionsToJCas(JCas jCas) {
  for (Entry<Integer, Integer> mention : mentions) {
    NamedEntity ne = new NamedEntity(jCas, mention.getKey(), mention.getKey() + mention.getValue());
    ne.setValue("manual");
    ne.addToIndexes();
  }
}
 
Example 5
Source File: MtasUimaParserTest.java    From inception with Apache License 2.0 5 votes vote down vote up
@Test
public void testZeroWidthSpanNotIndexed() throws Exception
{
    TokenBuilder<Token, Sentence> builder = TokenBuilder.create(Token.class, Sentence.class);
    builder.buildTokens(jcas, "This is a test . \n This is sentence two .");

    NamedEntity zeroWidthNe = new NamedEntity(jcas, 4, 4);
    zeroWidthNe.setValue("OTH");
    zeroWidthNe.addToIndexes();
    
    AnnotationLayer layer = new AnnotationLayer(NamedEntity.class.getName(),
            "Named Entity", SPAN_TYPE, project, true, TOKENS, NO_OVERLAP);
    when(annotationSchemaService.listAnnotationLayer(any(Project.class)))
            .thenReturn(asList(layer));

    MtasUimaParser sut = new MtasUimaParser(asList(
            new AnnotationFeature(1l, layer, "value", CAS.TYPE_NAME_STRING),
            new AnnotationFeature(2l, layer, "identifier",
                    CAS.TYPE_NAME_STRING)),
            annotationSchemaService,
            featureIndexingSupportRegistry);
    MtasTokenCollection tc = sut.createTokenCollection(jcas.getCas());
    
    MtasUtils.print(tc);
    
    List<MtasToken> tokens = new ArrayList<>();
    tc.iterator().forEachRemaining(tokens::add);

    assertThat(tokens)
        .filteredOn(t -> t.getPrefix().startsWith("Named_Entity"))
        .extracting(MtasToken::getPrefix)
        .isEmpty();
}
 
Example 6
Source File: MtasDocumentIndexTest.java    From inception with Apache License 2.0 5 votes vote down vote up
private void annotateDocument(Project aProject, User aUser, SourceDocument aSourceDocument)
    throws Exception
{
    log.info("Preparing annotated document....");
    
    // Manually build annotated CAS
    JCas jCas = JCasFactory.createJCas();
    
    JCasBuilder builder = new JCasBuilder(jCas);

    builder.add("The", Token.class);
    builder.add(" ");
    builder.add("capital", Token.class);
    builder.add(" ");
    builder.add("of", Token.class);
    builder.add(" ");
    
    int begin = builder.getPosition();
    builder.add("Galicia", Token.class);
    
    NamedEntity ne = new NamedEntity(jCas, begin, builder.getPosition());
    ne.setValue("LOC");
    ne.addToIndexes();
    
    builder.add(" ");
    builder.add("is", Token.class);
    builder.add(" ");
    builder.add("Santiago", Token.class);
    builder.add(" ");
    builder.add("de", Token.class);
    builder.add(" ");
    builder.add("Compostela", Token.class);
    builder.add(" ");
    builder.add(".", Token.class);
    
    // Create annotation document
    AnnotationDocument annotationDocument = documentService
            .createOrGetAnnotationDocument(aSourceDocument, aUser);

    // Write annotated CAS to annotated document
    try (CasStorageSession casStorageSession = CasStorageSession.open()) {
        log.info("Writing annotated document using documentService.writeAnnotationCas");
        documentService.writeAnnotationCas(jCas.getCas(), annotationDocument, false);
    }

    log.info("Writing for annotated document to be indexed");
    await("Waiting for indexing process to complete")
            .atMost(60, SECONDS)
            .pollInterval(5, SECONDS)
            .until(() -> searchService.isIndexValid(aProject)
                    && !searchService.isIndexInProgress(aProject));
    log.info("Indexing complete!");
}
 
Example 7
Source File: DataMajorityNerRecommenderTest.java    From inception with Apache License 2.0 5 votes vote down vote up
private List<CAS> getTestNECas(String aText, String[] aVals, int[][] aIndices) throws Exception
{
    JCas jcas = JCasFactory.createText(aText, "de");

    for (int i = 0; i < aVals.length; i++) {
        NamedEntity newNE = new NamedEntity(jcas, aIndices[i][0], aIndices[i][1]);
        newNE.setValue(aVals[i]);
        newNE.addToIndexes();
    }

    List<CAS> casses = new ArrayList<>();
    casses.add(jcas.getCas());

    return casses;
}
 
Example 8
Source File: StringMatchingRecommenderTest.java    From inception with Apache License 2.0 5 votes vote down vote up
private List<CAS> getTestNECas(String aText, String[] aVals, int[][] aNEIndices,
        int[][] aSentIndices, int[][] aTokenIndices)
    throws Exception
{
    JCas jcas = JCasFactory.createText(aText, "de");

    for (int j = 0; j < aSentIndices.length; j++) {
        Sentence newSent = new Sentence(jcas, aSentIndices[j][0], aSentIndices[j][1]);
        newSent.addToIndexes();
    }

    for (int k = 0; k < aTokenIndices.length; k++) {
        Token newToken = new Token(jcas, aTokenIndices[k][0], aTokenIndices[k][1]);
        newToken.addToIndexes();
    }

    for (int i = 0; i < aVals.length; i++) {
        NamedEntity newNE = new NamedEntity(jcas, aNEIndices[i][0], aNEIndices[i][1]);
        newNE.setValue(aVals[i]);
        newNE.addToIndexes();
    }

    List<CAS> casses = new ArrayList<>();
    casses.add(jcas.getCas());

    return casses;
}
 
Example 9
Source File: Tsv3XSerializerTest.java    From webanno with Apache License 2.0 5 votes vote down vote up
private NamedEntity addNamedEntity(JCas aJCas, int aBegin, int aEnd, String aValue)
{
    NamedEntity ne = new NamedEntity(aJCas, aBegin, aEnd);
    ne.setValue(aValue);
    ne.addToIndexes();
    return ne;
}
 
Example 10
Source File: ConstraintsGeneratorTest.java    From webanno with Apache License 2.0 5 votes vote down vote up
@Test
public void testTwoConditions()
    throws Exception
{
    JCas jcas = makeJCasOneSentence();
    CAS cas = jcas.getCas();
    
    List<Token> tokens = new ArrayList<>(select(jcas, Token.class));
    
    Token t1 = tokens.get(0);
    Token t2 = tokens.get(tokens.size() - 1);

    NamedEntity gov = new NamedEntity(jcas, t1.getBegin(), t1.getEnd());
    gov.setValue("Animal");
    gov.addToIndexes();
    NamedEntity dep =  new NamedEntity(jcas, t2.getBegin(), t2.getEnd());
    dep.setValue("NotWeight");
    dep.addToIndexes();

    Type relationType = cas.getTypeSystem().getType("webanno.custom.Relation");
    
    AnnotationFS fs1 = cas.createAnnotation(relationType, dep.getBegin(), dep.getEnd());
    FSUtil.setFeature(fs1, "Governor", gov);
    FSUtil.setFeature(fs1, "Dependent", dep);
    cas.addFsToIndexes(fs1);
    
    ConstraintsGrammar parser = new ConstraintsGrammar(new FileInputStream(
            "src/test/resources/rules/twoConditions.rules"));
    Parse p = parser.Parse();
    ParsedConstraints constraints = p.accept(new ParserVisitor());

    Evaluator constraintsEvaluator = new ValuesGenerator();
    List<PossibleValue> possibleValues = constraintsEvaluator.generatePossibleValues(
            fs1, "label", constraints);
    
    System.out.println(possibleValues);
    
    // "Weight" != "NotWeight", so the rule should not match
    assertEquals(0, possibleValues.size());
}
 
Example 11
Source File: MtasUimaParserTest.java    From inception with Apache License 2.0 4 votes vote down vote up
@Test
public void testNamedEnity() throws Exception
{
    JCasBuilder builder = new JCasBuilder(jcas);
    builder.add("I", Token.class);
    builder.add(" ");
    builder.add("am", Token.class);
    builder.add(" ");
    int begin = builder.getPosition();
    builder.add("John", Token.class);
    builder.add(" ");
    builder.add("Smith", Token.class);
    NamedEntity ne = new NamedEntity(jcas, begin, builder.getPosition());
    ne.setValue("PER");
    ne.addToIndexes();
    builder.add(" ");
    builder.add(".", Token.class);
    
    AnnotationLayer layer = new AnnotationLayer(NamedEntity.class.getName(),
            "Named Entity", SPAN_TYPE, project, true, TOKENS, NO_OVERLAP);
    when(annotationSchemaService.listAnnotationLayer(any(Project.class)))
            .thenReturn(asList(layer));

    MtasUimaParser sut = new MtasUimaParser(
            asList(new AnnotationFeature(1l, layer, "value", CAS.TYPE_NAME_STRING),
                    new AnnotationFeature(2l, layer, "identifier", CAS.TYPE_NAME_STRING)),
            annotationSchemaService, featureIndexingSupportRegistry);
    MtasTokenCollection tc = sut.createTokenCollection(jcas.getCas());
    
    MtasUtils.print(tc);
    
    List<MtasToken> tokens = new ArrayList<>();
    tc.iterator().forEachRemaining(tokens::add);

    assertThat(tokens)
        .filteredOn(t -> t.getPrefix().startsWith("Named_Entity"))
        .extracting(MtasToken::getPrefix)
        .containsExactly("Named_Entity", "Named_Entity.value");

    assertThat(tokens)
        .filteredOn(t -> t.getPrefix().startsWith("Named_Entity"))
        .extracting(MtasToken::getPostfix)
        .containsExactly("", "PER");
}
 
Example 12
Source File: OpenNlpDoccatRecommenderTest.java    From inception with Apache License 2.0 4 votes vote down vote up
@Override
public void getNext(JCas aJCas) throws IOException, CollectionException
{
    Resource res = nextFile();
    initCas(aJCas, res);

    StringBuilder text = new StringBuilder();
    
    try (InputStream is = new BufferedInputStream(
            CompressionUtils.getInputStream(res.getLocation(), res.getInputStream()))) {
        LineIterator i = IOUtils.lineIterator(is, "UTF-8");
        try {
            while (i.hasNext()) {
                String line = i.next();
                
                if (line.startsWith("#")) {
                    continue;
                }
                
                String[] fields = line.split("\\s", 2);
                
                if (text.length() > 0) {
                    text.append("\n");
                }
                
                int sentenceBegin = text.length();
                text.append(fields[1]);
                
                NamedEntity ne = new NamedEntity(aJCas,  sentenceBegin, text.length());
                ne.setValue(fields[0]);
                ne.addToIndexes();
                
                new Sentence(aJCas, sentenceBegin, text.length()).addToIndexes();
            }
        }
        finally {
            LineIterator.closeQuietly(i);
        }
        
        aJCas.setDocumentText(text.toString());
    }
}