Java Code Examples for de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token#getBegin()
The following examples show how to use
de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token#getBegin() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: WebAnnoTsv3WriterTestBase.java From webanno with Apache License 2.0 | 6 votes |
@Test public void testTokenBoundedStackedLookAlike() throws Exception { JCas jcas = makeJCasOneSentence(); int n = 0; for (Token t : select(jcas, Token.class)) { Span ne = new Span(jcas, t.getBegin(), t.getEnd()); ne.setValue("NOTSTACKED[" + n + "]"); ne.addToIndexes(); n++; } writeAndAssertEquals(jcas, WebannoTsv3Writer.PARAM_SPAN_LAYERS, asList(Span.class)); }
Example 2
Source File: WebAnnoTsv3WriterTestBase.java From webanno with Apache License 2.0 | 6 votes |
@Test public void testTokenBoundedBioLookAlike() throws Exception { JCas jcas = makeJCasOneSentence(); int n = 0; for (Token t : select(jcas, Token.class)) { Span ne = new Span(jcas, t.getBegin(), t.getEnd()); ne.setValue(((n == 0) ? "B-" : "I-") + "NOTBIO!"); ne.addToIndexes(); n++; } writeAndAssertEquals(jcas, WebannoTsv3Writer.PARAM_SPAN_LAYERS, asList(Span.class)); }
Example 3
Source File: RelationRendererTest.java From webanno with Apache License 2.0 | 5 votes |
@Test public void thatRelationCrossSentenceBehaviorOnRenderGeneratesErrors() throws Exception { TokenBuilder<Token, Sentence> builder = new TokenBuilder<>(Token.class, Sentence.class); builder.buildTokens(jcas, "This is a test .\nThis is sentence two ."); for (Token t : select(jcas, Token.class)) { POS pos = new POS(jcas, t.getBegin(), t.getEnd()); t.setPos(pos); pos.addToIndexes(); } RelationAdapter adapter = new RelationAdapter(layerSupportRegistry, featureSupportRegistry, null, depLayer, FEAT_REL_TARGET, FEAT_REL_SOURCE, () -> asList(dependencyLayerGovernor, dependencyLayerDependent), behaviors); List<POS> posAnnotations = new ArrayList<>(select(jcas, POS.class)); POS source = posAnnotations.get(0); POS target = posAnnotations.get(posAnnotations.size() - 1); depLayer.setCrossSentence(true); AnnotationFS dep = adapter.add(document, username, source, target, jcas.getCas()); depLayer.setCrossSentence(false); RelationRenderer sut = new RelationRenderer(adapter, layerSupportRegistry, featureSupportRegistry, asList(new RelationCrossSentenceBehavior())); VDocument vdoc = new VDocument(); sut.render(jcas.getCas(), asList(), vdoc, 0, jcas.getDocumentText().length()); assertThat(vdoc.comments()) .usingFieldByFieldElementComparator() .contains(new VComment(dep, ERROR, "Crossing sentence boundaries is not permitted.")); }
Example 4
Source File: WebAnnoTsv3WriterTestBase.java From webanno with Apache License 2.0 | 5 votes |
@Test public void testSingleStackedNonTokenRelationWithoutFeatureValue2() throws Exception { JCas jcas = makeJCasOneSentence(); CAS cas = jcas.getCas(); List<Token> tokens = new ArrayList<>(select(jcas, Token.class)); Token t1 = tokens.get(0); Token t2 = tokens.get(tokens.size() - 1); Span gov = new Span(jcas, t1.getBegin(), t1.getEnd()); gov.addToIndexes(); Span dep = new Span(jcas, t2.getBegin(), t2.getEnd()); dep.addToIndexes(); new Span(jcas, t2.getBegin(), t2.getEnd()).addToIndexes(); Type relationType = cas.getTypeSystem().getType("webanno.custom.Relation"); // One at the beginning // WebAnno legacy conventions // AnnotationFS fs1 = cas.createAnnotation(relationType, // min(dep.getBegin(), gov.getBegin()), // max(dep.getEnd(), gov.getEnd())); // DKPro Core conventions AnnotationFS fs1 = cas.createAnnotation(relationType, dep.getBegin(), dep.getEnd()); FSUtil.setFeature(fs1, "Governor", gov); FSUtil.setFeature(fs1, "Dependent", dep); cas.addFsToIndexes(fs1); writeAndAssertEquals(jcas, WebannoTsv3Writer.PARAM_SPAN_LAYERS, asList(Span.class), WebannoTsv3Writer.PARAM_RELATION_LAYERS, asList("webanno.custom.Relation")); }
Example 5
Source File: ConstraintsGeneratorTest.java From webanno with Apache License 2.0 | 5 votes |
@Test public void testTwoConditions() throws Exception { JCas jcas = makeJCasOneSentence(); CAS cas = jcas.getCas(); List<Token> tokens = new ArrayList<>(select(jcas, Token.class)); Token t1 = tokens.get(0); Token t2 = tokens.get(tokens.size() - 1); NamedEntity gov = new NamedEntity(jcas, t1.getBegin(), t1.getEnd()); gov.setValue("Animal"); gov.addToIndexes(); NamedEntity dep = new NamedEntity(jcas, t2.getBegin(), t2.getEnd()); dep.setValue("NotWeight"); dep.addToIndexes(); Type relationType = cas.getTypeSystem().getType("webanno.custom.Relation"); AnnotationFS fs1 = cas.createAnnotation(relationType, dep.getBegin(), dep.getEnd()); FSUtil.setFeature(fs1, "Governor", gov); FSUtil.setFeature(fs1, "Dependent", dep); cas.addFsToIndexes(fs1); ConstraintsGrammar parser = new ConstraintsGrammar(new FileInputStream( "src/test/resources/rules/twoConditions.rules")); Parse p = parser.Parse(); ParsedConstraints constraints = p.accept(new ParserVisitor()); Evaluator constraintsEvaluator = new ValuesGenerator(); List<PossibleValue> possibleValues = constraintsEvaluator.generatePossibleValues( fs1, "label", constraints); System.out.println(possibleValues); // "Weight" != "NotWeight", so the rule should not match assertEquals(0, possibleValues.size()); }
Example 6
Source File: WebAnnoTsv3WriterTestBase.java From webanno with Apache License 2.0 | 5 votes |
@Test public void testSingleStackedNonTokenRelationWithoutFeatureValue3() throws Exception { JCas jcas = makeJCasOneSentence(); CAS cas = jcas.getCas(); List<Token> tokens = new ArrayList<>(select(jcas, Token.class)); Token t1 = tokens.get(0); Token t2 = tokens.get(tokens.size() - 1); Span gov = new Span(jcas, t1.getBegin(), t1.getEnd()); gov.addToIndexes(); new Span(jcas, t1.getBegin(), t1.getEnd()).addToIndexes(); Span dep = new Span(jcas, t2.getBegin(), t2.getEnd()); dep.addToIndexes(); Type relationType = cas.getTypeSystem().getType("webanno.custom.Relation"); // One at the beginning // WebAnno legacy conventions // AnnotationFS fs1 = cas.createAnnotation(relationType, // min(dep.getBegin(), gov.getBegin()), // max(dep.getEnd(), gov.getEnd())); // DKPro Core conventions AnnotationFS fs1 = cas.createAnnotation(relationType, dep.getBegin(), dep.getEnd()); FSUtil.setFeature(fs1, "Governor", gov); FSUtil.setFeature(fs1, "Dependent", dep); cas.addFsToIndexes(fs1); writeAndAssertEquals(jcas, WebannoTsv3Writer.PARAM_SPAN_LAYERS, asList(Span.class), WebannoTsv3Writer.PARAM_RELATION_LAYERS, asList("webanno.custom.Relation")); }
Example 7
Source File: WebAnnoTsv3WriterTestBase.java From webanno with Apache License 2.0 | 5 votes |
@Test public void testTokenBoundedSpanWithAsteriskFeatureValue() throws Exception { JCas jcas = makeJCasOneSentence(); for (Token t : select(jcas, Token.class)) { Span ne = new Span(jcas, t.getBegin(), t.getEnd()); ne.setValue("*"); ne.addToIndexes(); } writeAndAssertEquals(jcas, WebannoTsv3Writer.PARAM_SPAN_LAYERS, asList(Span.class)); }
Example 8
Source File: WebAnnoTsv3WriterTestBase.java From webanno with Apache License 2.0 | 5 votes |
@Test public void testTokenBoundedSpanWithSpecialSymbolsValue() throws Exception { JCas jcas = makeJCasOneSentence(); for (Token t : select(jcas, Token.class)) { Span ne = new Span(jcas, t.getBegin(), t.getEnd()); ne.setValue("#*'\"`´\t:;{}|[ ]()\\§$%?=&_\n"); ne.addToIndexes(); } writeAndAssertEquals(jcas, WebannoTsv3Writer.PARAM_SPAN_LAYERS, asList(Span.class)); }
Example 9
Source File: ArgumentPrinterUtils.java From argument-reasoning-comprehension-task with Apache License 2.0 | 5 votes |
/** * Returns a covering sentence if it starts at the token, null otherwise * * @param t token * @return sentence or null */ public static Sentence sentenceStartsOnToken(Token t) { List<Sentence> sentences = JCasUtil.selectCovering(Sentence.class, t); return (!sentences.isEmpty() && sentences.get(0).getBegin() == t.getBegin()) ? sentences.get(0) : null; }
Example 10
Source File: WebAnnoTsv3WriterTestBase.java From webanno with Apache License 2.0 | 5 votes |
@Test public void testSingleNonMultiTokenRelationWithoutFeatureValue() throws Exception { JCas jcas = makeJCasOneSentence(); CAS cas = jcas.getCas(); List<Token> tokens = new ArrayList<>(select(jcas, Token.class)); Token t1 = tokens.get(0); Token t2 = tokens.get(1); Token t3 = tokens.get(2); Token t4 = tokens.get(3); Span gov = new Span(jcas, t1.getBegin(), t2.getEnd()); gov.addToIndexes(); Span dep = new Span(jcas, t3.getBegin(), t4.getEnd()); dep.addToIndexes(); Type relationType = cas.getTypeSystem().getType("webanno.custom.Relation"); // One at the beginning // WebAnno legacy conventions // AnnotationFS fs1 = cas.createAnnotation(relationType, // min(dep.getBegin(), gov.getBegin()), // max(dep.getEnd(), gov.getEnd())); // DKPro Core conventions AnnotationFS fs1 = cas.createAnnotation(relationType, dep.getBegin(), dep.getEnd()); FSUtil.setFeature(fs1, "Governor", gov); FSUtil.setFeature(fs1, "Dependent", dep); cas.addFsToIndexes(fs1); writeAndAssertEquals(jcas, WebannoTsv3Writer.PARAM_SPAN_LAYERS, asList(Span.class), WebannoTsv3Writer.PARAM_RELATION_LAYERS, asList("webanno.custom.Relation")); }
Example 11
Source File: ArgumentPrinterUtils.java From argument-reasoning-comprehension-task with Apache License 2.0 | 5 votes |
/** * Returns true if the token has a preceding whitespace in the original document * * @param token token * @param jCas jcas * @return boolen */ public static boolean hasSpaceBefore(Token token, JCas jCas) { // select previous token(s) List<Token> prevTokens = JCasUtil.selectPreceding(jCas, Token.class, token, 1); Paragraph paragraph = JCasUtil.selectCovering(jCas, Paragraph.class, token).iterator() .next(); return !prevTokens.isEmpty() && (prevTokens.iterator().next().getEnd() != token.getBegin()) && (token.getBegin() != paragraph.getBegin()); }
Example 12
Source File: WebAnnoTsv3WriterTestBase.java From webanno with Apache License 2.0 | 5 votes |
@Test public void testSingleStackedNonTokenRelationWithoutFeatureValue() throws Exception { JCas jcas = makeJCasOneSentence(); CAS cas = jcas.getCas(); List<Token> tokens = new ArrayList<>(select(jcas, Token.class)); Token t1 = tokens.get(0); Token t2 = tokens.get(tokens.size() - 1); Span gov = new Span(jcas, t1.getBegin(), t1.getEnd()); gov.addToIndexes(); new Span(jcas, t1.getBegin(), t1.getEnd()).addToIndexes(); Span dep = new Span(jcas, t2.getBegin(), t2.getEnd()); dep.addToIndexes(); new Span(jcas, t2.getBegin(), t2.getEnd()).addToIndexes(); Type relationType = cas.getTypeSystem().getType("webanno.custom.Relation"); // One at the beginning // WebAnno legacy conventions // AnnotationFS fs1 = cas.createAnnotation(relationType, // min(dep.getBegin(), gov.getBegin()), // max(dep.getEnd(), gov.getEnd())); // DKPro Core conventions AnnotationFS fs1 = cas.createAnnotation(relationType, dep.getBegin(), dep.getEnd()); FSUtil.setFeature(fs1, "Governor", gov); FSUtil.setFeature(fs1, "Dependent", dep); cas.addFsToIndexes(fs1); writeAndAssertEquals(jcas, WebannoTsv3Writer.PARAM_SPAN_LAYERS, asList(Span.class), WebannoTsv3Writer.PARAM_RELATION_LAYERS, asList("webanno.custom.Relation")); }
Example 13
Source File: DictionaryMatchAnnotator.java From ambiverse-nlu with Apache License 2.0 | 5 votes |
private void addYagoDictionaryMatch(JCas jCas, List<Token> currentTokenSequence) { for (Token token : currentTokenSequence) { YagoDictionaryMatch match = new YagoDictionaryMatch(jCas, token.getBegin(), token.getEnd()); match.addToIndexes(); logger.trace("Yago match added for token: " + token.getCoveredText() + " from sequence: " + tokenSequenceToString(currentTokenSequence)); } }
Example 14
Source File: WebAnnoTsv3WriterTestBase.java From webanno with Apache License 2.0 | 5 votes |
@Ignore("Relations between different layers not supported in WebAnno TSV 3 atm") @Test public void testSingleMixedRelationWithoutFeatureValue() throws Exception { JCas jcas = makeJCasOneSentence(); CAS cas = jcas.getCas(); List<Token> tokens = new ArrayList<>(select(jcas, Token.class)); Token gov = tokens.get(0); Token t2 = tokens.get(tokens.size() - 1); Span dep = new Span(jcas, t2.getBegin(), t2.getEnd()); dep.addToIndexes(); Type relationType = cas.getTypeSystem().getType("webanno.custom.Relation"); // One at the beginning // WebAnno legacy conventions // AnnotationFS fs1 = cas.createAnnotation(relationType, // min(dep.getBegin(), gov.getBegin()), // max(dep.getEnd(), gov.getEnd())); // DKPro Core conventions AnnotationFS fs1 = cas.createAnnotation(relationType, dep.getBegin(), dep.getEnd()); FSUtil.setFeature(fs1, "Governor", gov); FSUtil.setFeature(fs1, "Dependent", dep); cas.addFsToIndexes(fs1); writeAndAssertEquals(jcas, WebannoTsv3Writer.PARAM_SPAN_LAYERS, asList(Span.class), WebannoTsv3Writer.PARAM_RELATION_LAYERS, asList("webanno.custom.Relation")); }
Example 15
Source File: WebannoTsv2Writer.java From webanno with Apache License 2.0 | 4 votes |
private void setTokenAnnos(CAS aCas, Map<Integer, String> aTokenAnnoMap, Type aType, Feature aFeature) { LowLevelCAS llCas = aCas.getLowLevelCAS(); for (AnnotationFS annoFs : CasUtil.select(aCas, aType)) { boolean first = true; boolean previous = false; // exists previous annotation, place-holed O-_ should be kept for (Token token : selectCovered(Token.class, annoFs)) { if (annoFs.getBegin() <= token.getBegin() && annoFs.getEnd() >= token.getEnd()) { String annotation = annoFs.getFeatureValueAsString(aFeature); if (annotation == null) { annotation = aType.getName() + "_"; } if (aTokenAnnoMap.get(llCas.ll_getFSRef(token)) == null) { if (previous) { if (!multipleSpans.contains(aType.getName())) { aTokenAnnoMap.put(llCas.ll_getFSRef(token), annotation); } else { aTokenAnnoMap.put(llCas.ll_getFSRef(token), "O-_|" + (first ? "B-" : "I-") + annotation); first = false; } } else { if (!multipleSpans.contains(aType.getName())) { aTokenAnnoMap.put(llCas.ll_getFSRef(token), annotation); } else { aTokenAnnoMap.put(llCas.ll_getFSRef(token), (first ? "B-" : "I-") + annotation); first = false; } } } else { if (!multipleSpans.contains(aType.getName())) { aTokenAnnoMap.put(llCas.ll_getFSRef(token), aTokenAnnoMap.get(llCas.ll_getFSRef(token)) + "|" + annotation); previous = true; } else { aTokenAnnoMap.put(llCas.ll_getFSRef(token), aTokenAnnoMap.get(llCas.ll_getFSRef(token)) + "|" + (first ? "B-" : "I-") + annotation); first = false; previous = true; } } } } } }
Example 16
Source File: MtasUimaParserTest.java From inception with Apache License 2.0 | 4 votes |
@Test public void testDependencyRelation() throws Exception { // Set up document with a dummy dependency relation jcas.setDocumentText("a b"); Token t1 = new Token(jcas, 0, 1); t1.addToIndexes(); POS p1 = new POS(jcas, t1.getBegin(), t1.getEnd()); p1.setPosValue("A"); t1.setPos(p1); p1.addToIndexes(); Token t2 = new Token(jcas, 2, 3); t2.addToIndexes(); POS p2 = new POS(jcas, t2.getBegin(), t2.getEnd()); p2.setPosValue("B"); t2.setPos(p2); p2.addToIndexes(); Dependency d1 = new Dependency(jcas, t2.getBegin(), t2.getEnd()); d1.setDependent(t2); d1.setGovernor(t1); d1.addToIndexes(); // Set up annotation schema with POS and Dependency AnnotationLayer tokenLayer = new AnnotationLayer(Token.class.getName(), "Token", SPAN_TYPE, project, true, SINGLE_TOKEN, NO_OVERLAP); tokenLayer.setId(1l); AnnotationFeature tokenLayerPos = new AnnotationFeature(1l, tokenLayer, "pos", POS.class.getName()); AnnotationLayer posLayer = new AnnotationLayer(POS.class.getName(), "POS", SPAN_TYPE, project, true, SINGLE_TOKEN, NO_OVERLAP); posLayer.setId(2l); AnnotationFeature posLayerValue = new AnnotationFeature(1l, posLayer, "PosValue", CAS.TYPE_NAME_STRING); AnnotationLayer depLayer = new AnnotationLayer(Dependency.class.getName(), "Dependency", RELATION_TYPE, project, true, SINGLE_TOKEN, NO_OVERLAP); depLayer.setId(3l); depLayer.setAttachType(tokenLayer); depLayer.setAttachFeature(tokenLayerPos); AnnotationFeature dependencyLayerGovernor = new AnnotationFeature(2l, depLayer, "Governor", Token.class.getName()); AnnotationFeature dependencyLayerDependent = new AnnotationFeature(3l, depLayer, "Dependent", Token.class.getName()); when(annotationSchemaService.listAnnotationLayer(any(Project.class))) .thenReturn(asList(tokenLayer, posLayer, depLayer)); when(annotationSchemaService.getAdapter(posLayer)).thenReturn(new SpanAdapter( layerSupportRegistry, featureSupportRegistry, null, posLayer, () -> asList(posLayerValue), null)); when(annotationSchemaService.getAdapter(depLayer)) .thenReturn(new RelationAdapter( layerSupportRegistry, featureSupportRegistry, null, depLayer, FEAT_REL_TARGET, FEAT_REL_SOURCE, () -> asList(dependencyLayerGovernor, dependencyLayerDependent), emptyList())); MtasUimaParser sut = new MtasUimaParser( asList(tokenLayerPos, posLayerValue, dependencyLayerGovernor, dependencyLayerDependent), annotationSchemaService, featureIndexingSupportRegistry); MtasTokenCollection tc = sut.createTokenCollection(jcas.getCas()); MtasUtils.print(tc); List<MtasToken> tokens = new ArrayList<>(); tc.iterator().forEachRemaining(tokens::add); assertThat(tokens) .filteredOn(t -> t.getPrefix().startsWith("Dependency")) .extracting(t -> t.getPrefix() + "=" + t.getPostfix()) .containsExactly( "Dependency=b", "Dependency-source=a", "Dependency-source.PosValue=A", "Dependency-target=b", "Dependency-target.PosValue=B"); }
Example 17
Source File: NerMentionAnnotator.java From ambiverse-nlu with Apache License 2.0 | 4 votes |
@Override public void process(JCas jCas) throws AnalysisEngineProcessException { Collection<Sentence> sentences = JCasUtil.select(jCas, Sentence.class); for(Sentence sentence : sentences){ List<Token> tokens = JCasUtil.selectCovered(jCas, Token.class, sentence); TextClassificationOutcome previous = null; int begin = 0; int end = 0; for (int i = 0; i < tokens.size(); i++) { Token token = tokens.get(i); List<TextClassificationOutcome> classOutcomes = JCasUtil.selectCovered(jCas, TextClassificationOutcome.class, token); TextClassificationOutcome classOutcome = classOutcomes.get(0); String outcomeClassType = classOutcome.getOutcome().replaceAll(".-", ""); if(i == tokens.size()-1){ // we reached the end of the sentence. if(previous != null){ if(outcomeClassType.equals(previous.getOutcome().replaceAll(".-", ""))){ end = token.getEnd(); addMentionToCas(jCas, previous, begin, end); } else { addMentionToCas(jCas, previous, begin, end); addMentionToCas(jCas, classOutcome, token.getBegin(), token.getEnd()); } } else { addMentionToCas(jCas, classOutcome, token.getBegin(), token.getEnd()); } break; } if(previous == null){ previous = classOutcome; begin = token.getBegin(); end = token.getEnd(); continue; } if(outcomeClassType.equals(previous.getOutcome().replaceAll(".-", ""))){ previous = classOutcome; end = token.getEnd(); } else { addMentionToCas(jCas, previous, begin, end); previous = classOutcome; begin = token.getBegin(); end = token.getEnd(); } } } }
Example 18
Source File: RelationAdapterTest.java From webanno with Apache License 2.0 | 4 votes |
@Test public void thatRelationOverlapBehaviorOnCreateWorks() throws Exception { TokenBuilder<Token, Sentence> builder = new TokenBuilder<>(Token.class, Sentence.class); builder.buildTokens(jcas, "This is a test .\nThis is sentence two ."); for (Token t : select(jcas, Token.class)) { POS pos = new POS(jcas, t.getBegin(), t.getEnd()); t.setPos(pos); pos.addToIndexes(); } RelationAdapter sut = new RelationAdapter(layerSupportRegistry, featureSupportRegistry, null, depLayer, FEAT_REL_TARGET, FEAT_REL_SOURCE, () -> asList(dependencyLayerGovernor, dependencyLayerDependent), behaviors); List<POS> posAnnotations = new ArrayList<>(select(jcas, POS.class)); POS source = posAnnotations.get(0); POS target = posAnnotations.get(1); // First annotation should work depLayer.setOverlapMode(ANY_OVERLAP); sut.add(document, username, source, target, jcas.getCas()); // Adding another annotation at the same place DOES NOT work depLayer.setOverlapMode(NO_OVERLAP); assertThatExceptionOfType(AnnotationException.class) .isThrownBy(() -> sut.add(document, username, source, target, jcas.getCas())) .withMessageContaining("no overlap or stacking"); depLayer.setOverlapMode(OverlapMode.OVERLAP_ONLY); assertThatExceptionOfType(AnnotationException.class) .isThrownBy(() -> sut.add(document, username, source, target, jcas.getCas())) .withMessageContaining("stacking is not allowed"); // Adding another annotation at the same place DOES work depLayer.setOverlapMode(OverlapMode.STACKING_ONLY); assertThatCode(() -> sut.add(document, username, source, target, jcas.getCas())) .doesNotThrowAnyException(); depLayer.setOverlapMode(OverlapMode.ANY_OVERLAP); assertThatCode(() -> sut.add(document, username, source, target, jcas.getCas())) .doesNotThrowAnyException(); }
Example 19
Source File: DKPro2CoreNlp.java From ambiverse-nlu with Apache License 2.0 | 4 votes |
public static Tree createStanfordTree(org.apache.uima.jcas.tcas.Annotation root, TreeFactory tFact, Map<Token, IndexedWord> aIdxTokens) { JCas aJCas; try { aJCas = root.getCAS().getJCas(); } catch (CASException e) { throw new IllegalStateException("Unable to get JCas from JCas wrapper"); } // define the new (root) node Tree rootNode; // before we can create a node, we must check if we have any children (we have to know // whether to create a node or a leaf - not very dynamic) if (root instanceof Constituent && !isLeaf((Constituent) root)) { Constituent node = (Constituent) root; List<Tree> childNodes = new ArrayList<Tree>(); // get childNodes from child annotations FSArray children = node.getChildren(); for (int i = 0; i < children.size(); i++) { childNodes.add(createStanfordTree(node.getChildren(i), tFact, aIdxTokens)); } // now create the node with its children rootNode = tFact.newTreeNode(node.getConstituentType(), childNodes); } else { // Handle leaf annotations // Leafs are always Token-annotations // We also have to insert a Preterminal node with the value of the // POS-Annotation on the token // because the POS is not directly stored within the treee Token wordAnnotation = (Token) root; // create leaf-node for the tree Tree wordNode; if (aIdxTokens != null) { wordNode = tFact.newLeaf(aIdxTokens.get(wordAnnotation)); } else { wordNode = tFact.newLeaf(wordAnnotation.getCoveredText()); } // create information about preceding and trailing whitespaces in the leaf node StringBuilder preWhitespaces = new StringBuilder(); StringBuilder trailWhitespaces = new StringBuilder(); List<Token> precedingTokenList = selectPreceding(aJCas, Token.class, wordAnnotation, 1); List<Token> followingTokenList = selectFollowing(aJCas, Token.class, wordAnnotation, 1); if (precedingTokenList.size() > 0) { Token precedingToken = precedingTokenList.get(0); int precedingWhitespaces = wordAnnotation.getBegin() - precedingToken.getEnd(); for (int i = 0; i < precedingWhitespaces; i++) { preWhitespaces.append(" "); } } if (followingTokenList.size() > 0) { Token followingToken = followingTokenList.get(0); int trailingWhitespaces = followingToken.getBegin() - wordAnnotation.getEnd(); for (int i = 0; i < trailingWhitespaces; i++) { trailWhitespaces.append(" "); } } // write whitespace information as CoreAnnotation.BeforeAnnotation and // CoreAnnotation.AfterAnnotation to the node add annotation to list and write back to // node label ((CoreLabel) wordNode.label()).set(CoreAnnotations.BeforeAnnotation.class, preWhitespaces.toString()); ((CoreLabel) wordNode.label()).set(CoreAnnotations.AfterAnnotation.class, trailWhitespaces.toString()); // get POS-annotation POS pos = wordAnnotation.getPos(); // create POS-Node in the tree and attach word-node to it rootNode = tFact.newTreeNode(pos.getPosValue(), Arrays.asList((new Tree[] { wordNode }))); } return rootNode; }
Example 20
Source File: CasDiffTest.java From webanno with Apache License 2.0 | 4 votes |
@Test public void relationStackedSpansTest() throws Exception { TypeSystemDescription global = TypeSystemDescriptionFactory.createTypeSystemDescription(); TypeSystemDescription local = TypeSystemDescriptionFactory .createTypeSystemDescriptionFromPath( "src/test/resources/desc/type/webannoTestTypes.xml"); TypeSystemDescription merged = CasCreationUtils.mergeTypeSystems(asList(global, local)); TokenBuilder<Token, Sentence> tb = new TokenBuilder<>(Token.class, Sentence.class); JCas jcasA = JCasFactory.createJCas(merged); { CAS casA = jcasA.getCas(); tb.buildTokens(jcasA, "This is a test ."); List<Token> tokensA = new ArrayList<>(select(jcasA, Token.class)); Token t1A = tokensA.get(0); Token t2A = tokensA.get(tokensA.size() - 1); NamedEntity govA = new NamedEntity(jcasA, t1A.getBegin(), t1A.getEnd()); govA.addToIndexes(); // Here we add a stacked named entity! new NamedEntity(jcasA, t1A.getBegin(), t1A.getEnd()).addToIndexes(); NamedEntity depA = new NamedEntity(jcasA, t2A.getBegin(), t2A.getEnd()); depA.addToIndexes(); Type relationTypeA = casA.getTypeSystem().getType("webanno.custom.Relation"); AnnotationFS fs1A = casA.createAnnotation(relationTypeA, depA.getBegin(), depA.getEnd()); FSUtil.setFeature(fs1A, "Governor", govA); FSUtil.setFeature(fs1A, "Dependent", depA); FSUtil.setFeature(fs1A, "value", "REL"); casA.addFsToIndexes(fs1A); } JCas jcasB = JCasFactory.createJCas(merged); { CAS casB = jcasB.getCas(); tb.buildTokens(jcasB, "This is a test ."); List<Token> tokensB = new ArrayList<>(select(jcasB, Token.class)); Token t1B = tokensB.get(0); Token t2B = tokensB.get(tokensB.size() - 1); NamedEntity govB = new NamedEntity(jcasB, t1B.getBegin(), t1B.getEnd()); govB.addToIndexes(); NamedEntity depB = new NamedEntity(jcasB, t2B.getBegin(), t2B.getEnd()); depB.addToIndexes(); Type relationTypeB = casB.getTypeSystem().getType("webanno.custom.Relation"); AnnotationFS fs1B = casB.createAnnotation(relationTypeB, depB.getBegin(), depB.getEnd()); FSUtil.setFeature(fs1B, "Governor", govB); FSUtil.setFeature(fs1B, "Dependent", depB); FSUtil.setFeature(fs1B, "value", "REL"); casB.addFsToIndexes(fs1B); } Map<String, List<CAS>> casByUser = new LinkedHashMap<>(); casByUser.put("user1", asList(jcasA.getCas())); casByUser.put("user2", asList(jcasB.getCas())); List<? extends DiffAdapter> diffAdapters = asList(new RelationDiffAdapter( "webanno.custom.Relation", WebAnnoConst.FEAT_REL_TARGET, WebAnnoConst.FEAT_REL_SOURCE, "value")); CasDiff diff = doDiff(diffAdapters, LINK_TARGET_AS_LABEL, casByUser); DiffResult result = diff.toResult(); // result.print(System.out); assertEquals(1, result.size()); assertEquals(0, result.getDifferingConfigurationSets().size()); assertEquals(0, result.getIncompleteConfigurationSets().size()); // Todo: Agreement has moved to separate project - should create agreement test there // CodingAgreementResult agreement = AgreementUtils.getCohenKappaAgreement(diff, // "webanno.custom.Relation", "value", casByUser); // // // Asserts // System.out.printf("Agreement: %s%n", agreement.toString()); // AgreementUtils.dumpAgreementStudy(System.out, agreement); // // assertEquals(1, agreement.getPluralitySets().size()); }