de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token Java Examples
The following examples show how to use
de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: Tcf2DKPro.java From inception with Apache License 2.0 | 6 votes |
public void convertNamedEntities(JCas aJCas, TextCorpus aCorpusData, Map<String, Token> aTokens) { if (aCorpusData.getNamedEntitiesLayer() == null) { // No layer to read from. return; } for (int i = 0; i < aCorpusData.getNamedEntitiesLayer().size(); i++) { // get the named entity eu.clarin.weblicht.wlfxb.tc.api.NamedEntity entity = aCorpusData .getNamedEntitiesLayer().getEntity(i); eu.clarin.weblicht.wlfxb.tc.api.Token[] namedEntityTokens = aCorpusData .getNamedEntitiesLayer().getTokens(entity); NamedEntity outNamedEntity = new NamedEntity(aJCas); outNamedEntity.setBegin(getOffsets(namedEntityTokens, aTokens)[0]); outNamedEntity.setEnd(getOffsets(namedEntityTokens, aTokens)[1]); outNamedEntity.setValue(entity.getType()); outNamedEntity.addToIndexes(); } }
Example #2
Source File: WebAnnoTsv3WriterTestBase.java From webanno with Apache License 2.0 | 6 votes |
@Test public void testMultiTokenChain() throws Exception { JCas jcas = makeJCasOneSentence(); CAS cas = jcas.getCas(); List<Token> tokens = new ArrayList<>(select(jcas, Token.class)); Token t1 = tokens.get(0); Token t2 = tokens.get(1); Token t3 = tokens.get(2); Token t4 = tokens.get(3); Type head = cas.getTypeSystem().getType("webanno.custom.SimpleChain"); Type link = cas.getTypeSystem().getType("webanno.custom.SimpleLink"); makeChainHead(head, makeChainLink(link, cas, t1.getBegin(), t2.getEnd(), null, null, makeChainLink(link, cas, t3.getBegin(), t4.getEnd(), null, null, null))); writeAndAssertEquals(jcas, WebannoTsv3Writer.PARAM_CHAIN_LAYERS, asList("webanno.custom.Simple")); }
Example #3
Source File: WebAnnoTsv3WriterTestBase.java From webanno with Apache License 2.0 | 6 votes |
@Test public void testSimpleChain() throws Exception { JCas jcas = makeJCasOneSentence(); CAS cas = jcas.getCas(); List<Token> tokens = new ArrayList<>(select(jcas, Token.class)); Token t1 = tokens.get(0); Token t2 = tokens.get(1); Token t3 = tokens.get(2); Type head = cas.getTypeSystem().getType("webanno.custom.SimpleChain"); Type link = cas.getTypeSystem().getType("webanno.custom.SimpleLink"); makeChainHead(head, makeChainLink(link, cas, t1.getBegin(), t1.getEnd(), null, null, makeChainLink(link, cas, t2.getBegin(), t2.getEnd(), null, null, makeChainLink(link, cas, t3.getBegin(), t3.getEnd(), null, null, null)))); writeAndAssertEquals(jcas, WebannoTsv3Writer.PARAM_CHAIN_LAYERS, asList("webanno.custom.Simple")); }
Example #4
Source File: SpanAdapterTest.java From webanno with Apache License 2.0 | 6 votes |
@Test public void thatAdjacentAnnotationsDoNotOverlap() throws AnnotationException { jcas.setDocumentText("Test."); new Sentence(jcas, 0, 5).addToIndexes(); new Token(jcas, 0, 4).addToIndexes(); new Token(jcas, 4, 5).addToIndexes(); new NamedEntity(jcas, 0, 4).addToIndexes(); new NamedEntity(jcas, 4, 5).addToIndexes(); SpanAdapter sut = new SpanAdapter(layerSupportRegistry, featureSupportRegistry, null, neLayer, () -> asList(), behaviors); neLayer.setOverlapMode(NO_OVERLAP); assertThat(sut.validate(jcas.getCas())) .isEmpty(); }
Example #5
Source File: Step0bTextSegmenterA.java From argument-reasoning-comprehension-task with Apache License 2.0 | 6 votes |
private static void copyParagraphAndTokenAnnotations(JCas source, JCas target) { if (!source.getDocumentText().equals(target.getDocumentText())) { throw new IllegalArgumentException("Source and target have different content"); } for (Paragraph p : JCasUtil.select(source, Paragraph.class)) { Paragraph paragraph = new Paragraph(target); paragraph.setBegin(p.getBegin()); paragraph.setEnd(p.getEnd()); paragraph.addToIndexes(); } for (Token t : JCasUtil.select(source, Token.class)) { Token token = new Token(target); token.setBegin(t.getBegin()); token.setEnd(t.getEnd()); token.addToIndexes(); } }
Example #6
Source File: WebAnnoTsv3WriterTestBase.java From webanno with Apache License 2.0 | 6 votes |
@Test public void testAnnotationWithLeadingWhitespaceAtStart() throws Exception { JCas jcas = JCasFactory.createJCas(); DocumentMetaData.create(jcas).setDocumentId("doc"); jcas.setDocumentText(" one two"); new Token(jcas, 1, 4).addToIndexes(); new Token(jcas, 5, 8).addToIndexes(); new Sentence(jcas, 1, 8).addToIndexes(); // NE has leading whitespace - on export this should be silently dropped new NamedEntity(jcas, 0, 4).addToIndexes(); writeAndAssertEquals(jcas); }
Example #7
Source File: WebAnnoTsv3WriterTestBase.java From webanno with Apache License 2.0 | 6 votes |
@Test public void testAnnotationWithLeadingWhitespace() throws Exception { JCas jcas = JCasFactory.createJCas(); DocumentMetaData.create(jcas).setDocumentId("doc"); jcas.setDocumentText("one two"); new Token(jcas, 0, 3).addToIndexes(); new Token(jcas, 5, 8).addToIndexes(); new Sentence(jcas, 0, 8).addToIndexes(); // NE has leading whitespace - on export this should be silently dropped new NamedEntity(jcas, 4, 8).addToIndexes(); writeAndAssertEquals(jcas); }
Example #8
Source File: DiffTestUtils.java From webanno with Apache License 2.0 | 6 votes |
public static TypeSystemDescription createMultiLinkWithRoleTestTypeSytem() throws Exception { List<TypeSystemDescription> typeSystems = new ArrayList<>(); TypeSystemDescription tsd = new TypeSystemDescription_impl(); // Link type TypeDescription linkTD = tsd.addType(LINK_TYPE, "", CAS.TYPE_NAME_TOP); linkTD.addFeature("role", "", CAS.TYPE_NAME_STRING); linkTD.addFeature("target", "", Token.class.getName()); // Link host TypeDescription hostTD = tsd.addType(HOST_TYPE, "", CAS.TYPE_NAME_ANNOTATION); hostTD.addFeature("links", "", CAS.TYPE_NAME_FS_ARRAY, linkTD.getName(), false); typeSystems.add(tsd); typeSystems.add(TypeSystemDescriptionFactory.createTypeSystemDescription()); return CasCreationUtils.mergeTypeSystems(typeSystems); }
Example #9
Source File: LemmaLayerInitializer.java From webanno with Apache License 2.0 | 6 votes |
@Override public void configure(Project aProject) throws IOException { AnnotationLayer tokenLayer = annotationSchemaService.findLayer(aProject, Token.class.getName()); AnnotationFeature tokenLemmaFeature = new AnnotationFeature(aProject, tokenLayer, "lemma", "lemma", Lemma.class.getName()); annotationSchemaService.createFeature(tokenLemmaFeature); AnnotationLayer lemmaLayer = new AnnotationLayer(Lemma.class.getName(), "Lemma", SPAN_TYPE, aProject, true, SINGLE_TOKEN, NO_OVERLAP); lemmaLayer.setAttachType(tokenLayer); lemmaLayer.setAttachFeature(tokenLemmaFeature); annotationSchemaService.createLayer(lemmaLayer); AnnotationFeature lemmaFeature = new AnnotationFeature(); lemmaFeature.setDescription("lemma Annotation"); lemmaFeature.setName("value"); lemmaFeature.setType(CAS.TYPE_NAME_STRING); lemmaFeature.setProject(aProject); lemmaFeature.setUiName("Lemma"); lemmaFeature.setLayer(lemmaLayer); annotationSchemaService.createFeature(lemmaFeature); }
Example #10
Source File: OOVFilter.java From argument-reasoning-comprehension-task with Apache License 2.0 | 6 votes |
@Override boolean keepArgument(JCas jCas) { Collection<Token> tokens = JCasUtil.select(jCas, Token.class); int oovWords = 0; for (Token token : tokens) { if (!vocabulary.contains(token.getCoveredText())) { oovWords++; } } frequency.addValue(oovWords); // System.out.println(frequency); return oovWords <= THRESHOLD; }
Example #11
Source File: CasDiffTest.java From webanno with Apache License 2.0 | 6 votes |
@Test public void singleEmptyCasTest() throws Exception { String text = ""; CAS user1Cas = JCasFactory.createJCas().getCas(); user1Cas.setDocumentText(text); Map<String, List<CAS>> casByUser = new LinkedHashMap<>(); casByUser.put("user1", asList(user1Cas)); List<SpanDiffAdapter> diffAdapters = asList(new SpanDiffAdapter(Token.class.getName())); DiffResult result = doDiff(diffAdapters, LINK_TARGET_AS_LABEL, casByUser).toResult(); // result.print(System.out); assertEquals(0, result.size()); assertEquals(0, result.getDifferingConfigurationSets().size()); }
Example #12
Source File: TRExReader.java From ambiverse-nlu with Apache License 2.0 | 6 votes |
private <K extends Constituent> K getConstituent(TAnnotation constituent, Class<K> clazz, JCas jCas) { Constituent result = getInstancedConstitient(jCas, constituent, clazz); if(constituent.boundaries != null) { result.setExplicit(true); result.setBegin(constituent.boundaries[0]); result.setEnd(constituent.boundaries[1]); } result.setUri(constituent.uri); List<Token> tokens = JCasUtil.selectCovered(jCas, Token.class, result.getBegin(), result.getEnd()); FSArray array = new FSArray(jCas, tokens.size()); for (int i = 0; i < tokens.size(); i++) { array.set(i, tokens.get(i)); } array.addToIndexes(); result.setTokens(array); jCas.addFsToIndexes(clazz.cast(result)); return clazz.cast(result); }
Example #13
Source File: LappsGridRecommenderConformityTest.java From inception with Apache License 2.0 | 6 votes |
@Test @Parameters(method = "getPosServices") public void testPosConformity(LappsGridService aService) throws Exception { CAS cas = loadData(); predict(aService.getUrl(), cas); SoftAssertions softly = new SoftAssertions(); softly.assertThat(JCasUtil.select(cas.getJCas(), Token.class)) .as("Prediction should contain Tokens") .isNotEmpty(); softly.assertThat(JCasUtil.select(cas.getJCas(), POS.class)) .as("Prediction should contain POS tags") .isNotEmpty(); softly.assertAll(); }
Example #14
Source File: PredictionsWriter.java From ambiverse-nlu with Apache License 2.0 | 6 votes |
private <T extends Annotation> String getTag(Class<T> clazz, JCas jCas, Token token) { List<T> annotationList = JCasUtil.selectCovering(jCas, clazz, token); String value = annotationList.isEmpty()? "OTH" : clazz == NamedEntity.class? ((NamedEntity)annotationList.get(0)).getValue(): ((TextClassificationOutcome)annotationList.get(0)).getOutcome(); if (!"OTH".equals(value)) { // spanish uses different signs for class labels... if ("es".equals(language)) { value = value.replace("LUG", "LOC") .replace("OTROS", "MISC") .replace("PERS", "PER"); } value = value.replace("ORGANIZATION", "ORG") .replace("LOCATION", "LOC") .replace("PERSON", "PER"); // todo add chunk tags if they are not predicted rethink about spanish! // if (!value.contains("-")) { // // value = ("OTH".equals(prevNE) || !prevNE.contains(value) ? "B-" : "I-") + value; // } } return value; }
Example #15
Source File: WebAnnoTsv3WriterTestBase.java From webanno with Apache License 2.0 | 6 votes |
@Test public void testAnnotationWithTrailingWhitespaceAtEnd() throws Exception { JCas jcas = JCasFactory.createJCas(); DocumentMetaData.create(jcas).setDocumentId("doc"); jcas.setDocumentText("one two "); new Token(jcas, 0, 3).addToIndexes(); new Token(jcas, 4, 7).addToIndexes(); new Sentence(jcas, 0, 7).addToIndexes(); // NE has trailing whitespace - on export this should be silently dropped new NamedEntity(jcas, 4, 8).addToIndexes(); writeAndAssertEquals(jcas); }
Example #16
Source File: Tcf2DKPro.java From inception with Apache License 2.0 | 6 votes |
public void convertSentences(JCas aJCas, TextCorpus aCorpusData, Map<String, Token> aTokens) { if (aCorpusData.getSentencesLayer() == null) { // No layer to read from. return; } for (int i = 0; i < aCorpusData.getSentencesLayer().size(); i++) { eu.clarin.weblicht.wlfxb.tc.api.Token[] sentencesTokens = aCorpusData .getSentencesLayer().getTokens(aCorpusData.getSentencesLayer().getSentence(i)); Sentence outSentence = new Sentence(aJCas); outSentence.setBegin(aTokens.get(sentencesTokens[0].getID()).getBegin()); outSentence.setEnd(aTokens.get(sentencesTokens[sentencesTokens.length - 1].getID()) .getEnd()); outSentence.addToIndexes(); } }
Example #17
Source File: Tcf2DKPro.java From inception with Apache License 2.0 | 6 votes |
public void convertOrthoGraphy(JCas aJCas, TextCorpus aCorpusData, Map<String, Token> aTokens) { if (aCorpusData.getOrthographyLayer() == null) { return; } for (int i = 0; i < aCorpusData.getOrthographyLayer().size(); i++) { eu.clarin.weblicht.wlfxb.tc.api.Token[] orthoTokens = aCorpusData.getOrthographyLayer() .getTokens(aCorpusData.getOrthographyLayer().getCorrection(i)); String value = aCorpusData.getOrthographyLayer().getCorrection(i).getString(); String operation = Optional .ofNullable(aCorpusData.getOrthographyLayer().getCorrection(i).getOperation()) .map(CorrectionOperation::name).orElse(null); SofaChangeAnnotation ortho = new SofaChangeAnnotation(aJCas); ortho.setBegin(aTokens.get(orthoTokens[0].getID()).getBegin()); ortho.setEnd(aTokens.get(orthoTokens[0].getID()).getEnd()); ortho.setValue(value); ortho.setOperation(operation); ortho.addToIndexes(); } }
Example #18
Source File: Tcf2DKPro.java From inception with Apache License 2.0 | 6 votes |
public void convertLemma(JCas aJCas, TextCorpus aCorpusData, Map<String, Token> aTokens) { if (aCorpusData.getLemmasLayer() == null) { return; } for (int i = 0; i < aCorpusData.getLemmasLayer().size(); i++) { eu.clarin.weblicht.wlfxb.tc.api.Token[] lemmaTokens = aCorpusData.getLemmasLayer() .getTokens(aCorpusData.getLemmasLayer().getLemma(i)); String value = aCorpusData.getLemmasLayer().getLemma(i).getString(); Lemma outLemma = new Lemma(aJCas); outLemma.setBegin(aTokens.get(lemmaTokens[0].getID()).getBegin()); outLemma.setEnd(aTokens.get(lemmaTokens[0].getID()).getEnd()); outLemma.setValue(value); outLemma.addToIndexes(); // Set the lemma to the token aTokens.get(lemmaTokens[0].getID()).setLemma(outLemma); } }
Example #19
Source File: Tcf2DKPro.java From inception with Apache License 2.0 | 6 votes |
public void convertPos(JCas aJCas, TextCorpus aCorpusData, Map<String, Token> aTokens) { if (aCorpusData.getPosTagsLayer() == null) { return; } for (int i = 0; i < aCorpusData.getPosTagsLayer().size(); i++) { eu.clarin.weblicht.wlfxb.tc.api.Token[] posTokens = aCorpusData.getPosTagsLayer() .getTokens(aCorpusData.getPosTagsLayer().getTag(i)); String value = aCorpusData.getPosTagsLayer().getTag(i).getString(); POS outPos = new POS(aJCas); outPos.setBegin(aTokens.get(posTokens[0].getID()).getBegin()); outPos.setEnd(aTokens.get(posTokens[0].getID()).getEnd()); outPos.setPosValue(value); POSUtils.assignCoarseValue(outPos); outPos.addToIndexes(); // Set the POS to the token aTokens.get(posTokens[0].getID()).setPos(outPos); } }
Example #20
Source File: HtmlAnnotationEditor.java From inception with Apache License 2.0 | 6 votes |
private List<AnnotationLayer> getLayersToRender() { AnnotatorState state = getModelObject(); List<AnnotationLayer> layersToRender = new ArrayList<>(); for (AnnotationLayer layer : state.getAnnotationLayers()) { boolean isSegmentationLayer = layer.getName().equals(Token.class.getName()) || layer.getName().equals(Sentence.class.getName()); boolean isUnsupportedLayer = layer.getType().equals(CHAIN_TYPE) && (state.getMode().equals(Mode.AUTOMATION) || state.getMode().equals(Mode.CORRECTION) || state.getMode().equals(Mode.CURATION)); if (layer.isEnabled() && !isSegmentationLayer && !isUnsupportedLayer) { layersToRender.add(layer); } } return layersToRender; }
Example #21
Source File: SpanAdapterTest.java From webanno with Apache License 2.0 | 6 votes |
@Test public void thatSpanCrossSentenceBehaviorOnCreateThrowsException() { neLayer.setCrossSentence(false); TokenBuilder<Token, Sentence> builder = new TokenBuilder<>(Token.class, Sentence.class); builder.buildTokens(jcas, "This is a test .\nThis is sentence two ."); SpanAdapter sut = new SpanAdapter(layerSupportRegistry, featureSupportRegistry, null, neLayer, () -> asList(), behaviors); assertThatExceptionOfType(MultipleSentenceCoveredException.class) .isThrownBy(() -> sut.add(document, username, jcas.getCas(), 0, jcas.getDocumentText().length())) .withMessageContaining("covers multiple sentences"); }
Example #22
Source File: SuggestionViewPanel.java From webanno with Apache License 2.0 | 6 votes |
private String render(CAS aCas, AnnotatorState aBratAnnotatorModel, ColoringStrategy aCurationColoringStrategy) throws IOException { List<AnnotationLayer> layersToRender = new ArrayList<>(); for (AnnotationLayer layer : aBratAnnotatorModel.getAnnotationLayers()) { boolean isSegmentationLayer = layer.getName().equals(Token.class.getName()) || layer.getName().equals(Sentence.class.getName()); boolean isUnsupportedLayer = layer.getType().equals(CHAIN_TYPE); if (layer.isEnabled() && !isSegmentationLayer && !isUnsupportedLayer) { layersToRender.add(layer); } } VDocument vdoc = new VDocument(); preRenderer.render(vdoc, aBratAnnotatorModel.getWindowBeginOffset(), aBratAnnotatorModel.getWindowEndOffset(), aCas, layersToRender); GetDocumentResponse response = new GetDocumentResponse(); BratRenderer renderer = new BratRenderer(schemaService, coloringService); renderer.render(response, aBratAnnotatorModel, vdoc, aCas, aCurationColoringStrategy); return JSONUtil.toInterpretableJsonString(response); }
Example #23
Source File: WebannoTsv3Writer.java From webanno with Apache License 2.0 | 6 votes |
private void setTokenSentenceAddress(JCas aJCas) { int sentNMumber = 1; for (Sentence sentence : select(aJCas, Sentence.class)) { int lineNumber = 1; for (Token token : selectCovered(Token.class, sentence)) { AnnotationUnit unit = new AnnotationUnit(token.getBegin(), token.getEnd(), false, token.getCoveredText()); units.add(unit); if (lineNumber == 1) { sentenceUnits.put(unit, sentence.getCoveredText()); } unitsLineNumber.put(unit, sentNMumber + "-" + lineNumber); lineNumber++; } sentNMumber++; } }
Example #24
Source File: ArgumentPrinterUtils.java From argument-reasoning-comprehension-task with Apache License 2.0 | 6 votes |
/** * Returns true, if the argument component annotation ends at this token * * @param t token * @param jCas jcas * @return boolean */ public static boolean argAnnotationEnds(Token t, JCas jCas) { List<ArgumentComponent> argumentAnnotations = new ArrayList<>(); argumentAnnotations .addAll(JCasUtil.selectCovering(jCas, Claim.class, t.getBegin(), t.getEnd())); argumentAnnotations .addAll(JCasUtil.selectCovering(jCas, Backing.class, t.getBegin(), t.getEnd())); argumentAnnotations .addAll(JCasUtil.selectCovering(jCas, Premise.class, t.getBegin(), t.getEnd())); argumentAnnotations .addAll(JCasUtil.selectCovering(jCas, Rebuttal.class, t.getBegin(), t.getEnd())); argumentAnnotations .addAll(JCasUtil.selectCovering(jCas, Refutation.class, t.getBegin(), t.getEnd())); return !argumentAnnotations.isEmpty() && argumentAnnotations.get(0).getEnd() == t.getEnd(); }
Example #25
Source File: ArgumentPrinterUtils.java From argument-reasoning-comprehension-task with Apache License 2.0 | 6 votes |
/** * Returns true, if the argument component annotation begins at this token * * @param t token * @param jCas jcas * @return boolean */ public static ArgumentComponent argAnnotationBegins(Token t, JCas jCas) { List<ArgumentComponent> argumentAnnotations = new ArrayList<>(); argumentAnnotations .addAll(JCasUtil.selectCovering(jCas, Claim.class, t.getBegin(), t.getEnd())); argumentAnnotations .addAll(JCasUtil.selectCovering(jCas, Backing.class, t.getBegin(), t.getEnd())); argumentAnnotations .addAll(JCasUtil.selectCovering(jCas, Premise.class, t.getBegin(), t.getEnd())); argumentAnnotations .addAll(JCasUtil.selectCovering(jCas, Rebuttal.class, t.getBegin(), t.getEnd())); argumentAnnotations .addAll(JCasUtil.selectCovering(jCas, Refutation.class, t.getBegin(), t.getEnd())); if (!argumentAnnotations.isEmpty() && argumentAnnotations.get(0).getBegin() == t .getBegin()) { return argumentAnnotations.get(0); } return null; }
Example #26
Source File: WebAnnoTsv3WriterTestBase.java From webanno with Apache License 2.0 | 6 votes |
@Test public void testZeroWidthAnnotationBeyondLastTokenIsMovedToEndOfLastToken() throws Exception { JCas jcas = JCasFactory.createJCas(); DocumentMetaData.create(jcas).setDocumentId("doc"); jcas.setDocumentText("one two "); new Token(jcas, 0, 3).addToIndexes(); new Token(jcas, 4, 7).addToIndexes(); new Sentence(jcas, 0, 7).addToIndexes(); // NE is after the end of the last token and should be moved to the end of the last token // otherwise it could not be represented in the TSV3 format. new NamedEntity(jcas, 8, 8).addToIndexes(); writeAndAssertEquals(jcas); }
Example #27
Source File: WebannoTsv3Writer.java From webanno with Apache License 2.0 | 5 votes |
private void setAmbiguity(JCas aJCas) { List<String> spanAndTokenLayers = spanLayers; spanAndTokenLayers.add(Token.class.getName()); for (String l : spanAndTokenLayers) { Type type = getType(aJCas.getCas(), l); ambigUnits.putIfAbsent(type.getName(), new HashMap<>()); for (AnnotationFS fs : CasUtil.select(aJCas.getCas(), type)) { AnnotationUnit unit = getFirstUnit(fs); // multiple token anno if (isMultipleTokenAnnotation(fs.getBegin(), fs.getEnd())) { SubTokenAnno sta = new SubTokenAnno(); sta.setBegin(fs.getBegin()); sta.setEnd(fs.getEnd()); sta.setText(fs.getCoveredText()); Set<AnnotationUnit> sus = new LinkedHashSet<>(); for (AnnotationUnit newUnit : getSubUnits(sta, sus)) { ambigUnits.get(type.getName()).put(newUnit, true); } } // stacked anno else if (ambigUnits.get(type.getName()).get(unit) != null) { ambigUnits.get(type.getName()).put(unit, true); } // single or first occurrence of stacked anno else { ambigUnits.get(type.getName()).put(unit, false); } } } }
Example #28
Source File: OpenNlpPosRecommender.java From inception with Apache License 2.0 | 5 votes |
private List<POSSample> extractPosSamples(List<CAS> aCasses) { List<POSSample> posSamples = new ArrayList<>(); casses: for (CAS cas : aCasses) { Type sentenceType = getType(cas, Sentence.class); Type tokenType = getType(cas, Token.class); Map<AnnotationFS, List<AnnotationFS>> sentences = indexCovered(cas, sentenceType, tokenType); for (Map.Entry<AnnotationFS, List<AnnotationFS>> e : sentences.entrySet()) { if (posSamples.size() >= traits.getTrainingSetSizeLimit()) { break casses; } AnnotationFS sentence = e.getKey(); Collection<AnnotationFS> tokens = e.getValue(); createPosSample(cas, sentence, tokens).map(posSamples::add); } } LOG.debug("Extracted {} POS samples", posSamples.size()); return posSamples; }
Example #29
Source File: Tcf2DKPro.java From inception with Apache License 2.0 | 5 votes |
/** * Get the start and end offsets of a span annotation * * @param aSpanTokens * list of span token ids. [t_3,_t_5, t_1] * @param aAllTokens * all available tokens in the file * @return the offsets. */ public int[] getOffsets(String[] aSpanTokens, Map<String, Token> aAllTokens) { List<Integer> beginPositions = new ArrayList<>(); List<Integer> endPositions = new ArrayList<>(); for (String token : aSpanTokens) { beginPositions.add(aAllTokens.get(token).getBegin()); endPositions.add(aAllTokens.get(token).getEnd()); } return new int[] { (Collections.min(beginPositions)), (Collections.max(endPositions)) }; }
Example #30
Source File: ArgumentPrinterUtils.java From argument-reasoning-comprehension-task with Apache License 2.0 | 5 votes |
/** * Returns a covering sentence if it starts at the token, null otherwise * * @param t token * @return sentence or null */ public static Sentence sentenceStartsOnToken(Token t) { List<Sentence> sentences = JCasUtil.selectCovering(Sentence.class, t); return (!sentences.isEmpty() && sentences.get(0).getBegin() == t.getBegin()) ? sentences.get(0) : null; }