de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData Java Examples
The following examples show how to use
de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: CasMergeSuiteTest.java From webanno with Apache License 2.0 | 8 votes |
private void writeAndAssertEquals(JCas curatorCas) throws Exception { String targetFolder = "target/test-output/" + testContext.getClassName() + "/" + referenceFolder.getName(); DocumentMetaData dmd = DocumentMetaData.get(curatorCas); dmd.setDocumentId("curator"); runPipeline(curatorCas, createEngineDescription(WebannoTsv3XWriter.class, WebannoTsv3XWriter.PARAM_TARGET_LOCATION, targetFolder, WebannoTsv3XWriter.PARAM_OVERWRITE, true)); File referenceFile = new File(referenceFolder, "curator.tsv"); assumeTrue("No reference data available for this test.", referenceFile.exists()); File actualFile = new File(targetFolder, "curator.tsv"); String reference = FileUtils.readFileToString(referenceFile, "UTF-8"); String actual = FileUtils.readFileToString(actualFile, "UTF-8"); assertEquals(reference, actual); }
Example #2
Source File: WebAnnoTsv3WriterTestBase.java From webanno with Apache License 2.0 | 6 votes |
@Test public void testAnnotationWithLeadingWhitespaceAtStart() throws Exception { JCas jcas = JCasFactory.createJCas(); DocumentMetaData.create(jcas).setDocumentId("doc"); jcas.setDocumentText(" one two"); new Token(jcas, 1, 4).addToIndexes(); new Token(jcas, 5, 8).addToIndexes(); new Sentence(jcas, 1, 8).addToIndexes(); // NE has leading whitespace - on export this should be silently dropped new NamedEntity(jcas, 0, 4).addToIndexes(); writeAndAssertEquals(jcas); }
Example #3
Source File: CasPersistenceUtils.java From webanno with Apache License 2.0 | 6 votes |
public static void readSerializedCas(CAS aCas, File aFile) throws IOException { CAS realCas = getRealCas(aCas); // UIMA-6162 Workaround: synchronize CAS during de/serialization synchronized (((CASImpl) realCas).getBaseCAS()) { try (ObjectInputStream is = new ObjectInputStream(new FileInputStream(aFile))) { CASCompleteSerializer serializer = (CASCompleteSerializer) is.readObject(); deserializeCASComplete(serializer, (CASImpl) realCas); // Workaround for UIMA adding back deleted DocumentAnnotations // https://issues.apache.org/jira/browse/UIMA-6199 // If there is a DocumentMetaData annotation, then we can drop any of the default // UIMA DocumentAnnotation instances (excluding the DocumentMetaData of course) if (!aCas.select(DocumentMetaData.class.getName()).isEmpty()) { aCas.select(CAS.TYPE_NAME_DOCUMENT_ANNOTATION) .filter(fs -> !DocumentMetaData.class.getName().equals( fs.getType().getName())) .forEach(aCas::removeFsFromIndexes); } } catch (ClassNotFoundException e) { throw new IOException(e); } } }
Example #4
Source File: ConstraintsGeneratorTest.java From webanno with Apache License 2.0 | 6 votes |
private JCas makeJCasOneSentence() throws UIMAException { TypeSystemDescription global = TypeSystemDescriptionFactory.createTypeSystemDescription(); TypeSystemDescription local = TypeSystemDescriptionFactory .createTypeSystemDescriptionFromPath( "src/test/resources/desc/types/webannoTestTypes.xml"); TypeSystemDescription merged = CasCreationUtils.mergeTypeSystems(asList(global, local)); JCas jcas = JCasFactory.createJCas(merged); DocumentMetaData.create(jcas).setDocumentId("doc"); TokenBuilder<Token, Sentence> tb = new TokenBuilder<>(Token.class, Sentence.class); tb.buildTokens(jcas, "This is a test ."); return jcas; }
Example #5
Source File: WebAnnoTsv3WriterTestBase.java From webanno with Apache License 2.0 | 6 votes |
@Test public void testElevatedType() throws Exception { JCas jcas = JCasFactory.createJCas(); DocumentMetaData.create(jcas).setDocumentId("doc"); jcas.setDocumentText("John"); // Add an elevated type which is not a direct subtype of Annotation. This type not be picked // up by the schema analyzer but should still be serialized as the POS type which is in fact // picked up. POS_NOUN pos = new POS_NOUN(jcas, 0, 4); pos.setPosValue("NN"); pos.setCoarseValue("NOUN"); pos.addToIndexes(); Token t = new Token(jcas, 0, 4); t.setPos(pos); t.addToIndexes(); new Sentence(jcas, 0, 4).addToIndexes(); writeAndAssertEquals(jcas); }
Example #6
Source File: WebAnnoTsv3WriterTestBase.java From webanno with Apache License 2.0 | 6 votes |
@Test public void testZeroWidthAnnotationBeforeFirstTokenIsMovedToBeginOfFirstToken() throws Exception { JCas jcas = JCasFactory.createJCas(); DocumentMetaData.create(jcas).setDocumentId("doc"); jcas.setDocumentText(" one two"); new Token(jcas, 2, 5).addToIndexes(); new Token(jcas, 6, 9).addToIndexes(); new Sentence(jcas, 2, 9).addToIndexes(); // NE is after the end of the last token and should be moved to the end of the last token // otherwise it could not be represented in the TSV3 format. new NamedEntity(jcas, 1, 1).addToIndexes(); writeAndAssertEquals(jcas); }
Example #7
Source File: WebAnnoTsv3WriterTestBase.java From webanno with Apache License 2.0 | 6 votes |
@Test public void testZeroWidthAnnotationBeyondLastTokenIsMovedToEndOfLastToken() throws Exception { JCas jcas = JCasFactory.createJCas(); DocumentMetaData.create(jcas).setDocumentId("doc"); jcas.setDocumentText("one two "); new Token(jcas, 0, 3).addToIndexes(); new Token(jcas, 4, 7).addToIndexes(); new Sentence(jcas, 0, 7).addToIndexes(); // NE is after the end of the last token and should be moved to the end of the last token // otherwise it could not be represented in the TSV3 format. new NamedEntity(jcas, 8, 8).addToIndexes(); writeAndAssertEquals(jcas); }
Example #8
Source File: WebAnnoTsv3WriterTestBase.java From webanno with Apache License 2.0 | 6 votes |
@Test public void testZeroWidthAnnotationBetweenTokenIsMovedToEndOfPreviousToken() throws Exception { JCas jcas = JCasFactory.createJCas(); DocumentMetaData.create(jcas).setDocumentId("doc"); jcas.setDocumentText("one two"); new Token(jcas, 0, 3).addToIndexes(); new Token(jcas, 5, 8).addToIndexes(); new Sentence(jcas, 0, 8).addToIndexes(); // NE is after the end of the last token and should be moved to the end of the last token // otherwise it could not be represented in the TSV3 format. new NamedEntity(jcas, 4, 4).addToIndexes(); writeAndAssertEquals(jcas); }
Example #9
Source File: WebAnnoTsv3WriterTestBase.java From webanno with Apache License 2.0 | 6 votes |
@Test public void testAnnotationWithLeadingWhitespace() throws Exception { JCas jcas = JCasFactory.createJCas(); DocumentMetaData.create(jcas).setDocumentId("doc"); jcas.setDocumentText("one two"); new Token(jcas, 0, 3).addToIndexes(); new Token(jcas, 5, 8).addToIndexes(); new Sentence(jcas, 0, 8).addToIndexes(); // NE has leading whitespace - on export this should be silently dropped new NamedEntity(jcas, 4, 8).addToIndexes(); writeAndAssertEquals(jcas); }
Example #10
Source File: WebAnnoTsv3WriterTestBase.java From webanno with Apache License 2.0 | 6 votes |
@Test public void testAnnotationWithTrailingWhitespaceAtEnd() throws Exception { JCas jcas = JCasFactory.createJCas(); DocumentMetaData.create(jcas).setDocumentId("doc"); jcas.setDocumentText("one two "); new Token(jcas, 0, 3).addToIndexes(); new Token(jcas, 4, 7).addToIndexes(); new Sentence(jcas, 0, 7).addToIndexes(); // NE has trailing whitespace - on export this should be silently dropped new NamedEntity(jcas, 4, 8).addToIndexes(); writeAndAssertEquals(jcas); }
Example #11
Source File: WebAnnoTsv3WriterTestBase.java From webanno with Apache License 2.0 | 6 votes |
@Test public void testAnnotationWithTrailingWhitespace() throws Exception { JCas jcas = JCasFactory.createJCas(); DocumentMetaData.create(jcas).setDocumentId("doc"); jcas.setDocumentText("one two"); new Token(jcas, 0, 3).addToIndexes(); new Token(jcas, 5, 8).addToIndexes(); new Sentence(jcas, 0, 8).addToIndexes(); // NE has trailing whitespace - on export this should be silently dropped new NamedEntity(jcas, 0, 4).addToIndexes(); writeAndAssertEquals(jcas); }
Example #12
Source File: WebAnnoTsv3WriterTestBase.java From webanno with Apache License 2.0 | 6 votes |
@Test public void testTwoSentencesWithNoSpaceInBetween() throws Exception { TypeSystemDescription global = TypeSystemDescriptionFactory.createTypeSystemDescription(); TypeSystemDescription local = TypeSystemDescriptionFactory .createTypeSystemDescriptionFromPath( "src/test/resources/desc/type/webannoTestTypes.xml"); TypeSystemDescription merged = CasCreationUtils.mergeTypeSystems(asList(global, local)); JCas jcas = JCasFactory.createJCas(merged); DocumentMetaData.create(jcas).setDocumentId("doc"); jcas.setDocumentText("onetwo"); new Token(jcas, 0, 3).addToIndexes(); new Sentence(jcas, 0, 3).addToIndexes(); new Token(jcas, 3, 6).addToIndexes(); new Sentence(jcas, 3, 6).addToIndexes(); writeAndAssertEquals(jcas); }
Example #13
Source File: Conll2003AidaReader.java From ambiverse-nlu with Apache License 2.0 | 6 votes |
@Override protected void initCas(CAS aCas, Resource aResource) { try { // Set the document metadata DocumentMetaData docMetaData = DocumentMetaData.create(aCas); docMetaData.setLanguage(language); // docMetaData.setDocumentTitle(new File(aResource.getPath()).getName()); // docMetaData.setDocumentUri(aResource.getResolvedUri().toString() + qualifier); // docMetaData.setDocumentId("doc id"); // if (aResource.getBase() != null) { // docMetaData.setDocumentBaseUri(aResource.getResolvedBase()); // docMetaData.setCollectionId(aResource.getResolvedBase()); // } // Set the document language aCas.setDocumentLanguage(language); } catch (CASException e) { // This should not happen. throw new RuntimeException(e); } }
Example #14
Source File: CompressedXmiWriter.java From argument-reasoning-comprehension-task with Apache License 2.0 | 5 votes |
@Override public void process(JCas aJCas) throws AnalysisEngineProcessException { try { java.io.ByteArrayOutputStream jCasOutputStream = new java.io.ByteArrayOutputStream(); XmiCasSerializer.serialize(aJCas.getCas(), jCasOutputStream); // get name = id + .xmi String singleEntryName = DocumentMetaData.get(aJCas).getDocumentId() + ".xmi"; // convert output stream to input stream // InputStream inputStream = new ByteArrayInputStream(jCasOutputStream.toByteArray()); // add to the tar addSingleEntryToTar(jCasOutputStream.toByteArray(), singleEntryName); if (!typeSystemWritten) { writeTypeSystem(aJCas); typeSystemWritten = true; } counter++; } catch (IOException | SAXException ex) { throw new AnalysisEngineProcessException(ex); } }
Example #15
Source File: CasMerge.java From webanno with Apache License 2.0 | 5 votes |
private static void clearAnnotations(CAS aCas) throws UIMAException { CAS backup = CasFactory.createCas((TypeSystemDescription) null); // Copy the CAS - basically we do this just to keep the full type system information CASCompleteSerializer serializer = serializeCASComplete((CASImpl) getRealCas(aCas)); deserializeCASComplete(serializer, (CASImpl) getRealCas(backup)); // Remove all annotations from the target CAS but we keep the type system! aCas.reset(); // Copy over essential information if (exists(backup, getType(backup, DocumentMetaData.class))) { copyDocumentMetadata(backup, aCas); } else { WebAnnoCasUtil.createDocumentMetadata(aCas); } aCas.setDocumentLanguage(backup.getDocumentLanguage()); // DKPro Core Issue 435 aCas.setDocumentText(backup.getDocumentText()); // Transfer token boundaries for (AnnotationFS t : selectTokens(backup)) { aCas.addFsToIndexes(createToken(aCas, t.getBegin(), t.getEnd())); } // Transfer sentence boundaries for (AnnotationFS s : selectSentences(backup)) { aCas.addFsToIndexes(createSentence(aCas, s.getBegin(), s.getEnd())); } }
Example #16
Source File: SparkUimaUtils.java From ambiverse-nlu with Apache License 2.0 | 5 votes |
public static void createSequenceFile(Object[] params, String uri) throws URISyntaxException, IOException, UIMAException, NoSuchMethodException, MissingSettingException, ClassNotFoundException { Configuration conf = new Configuration(); Path path = new Path(uri); Writer writer = SequenceFile.createWriter( conf, Writer.file(path), Writer.keyClass(Text.class), Writer.valueClass(SCAS.class)); int count = 0; CollectionReaderDescription readerDescription = Reader.getCollectionReaderDescription(Reader.COLLECTION_FORMAT.NYT, params); for (JCas jCas : SimplePipelineCasPoolIterator.iteratePipeline(20, readerDescription)) { if(JCasUtil.exists(jCas, DocumentMetaData.class)) { ++count; // Get the ID. DocumentMetaData dmd = JCasUtil.selectSingle(jCas, DocumentMetaData.class); String docId = "NULL"; if (dmd != null) { docId = dmd.getDocumentId(); } else { throw new IOException("No Document ID for xml: " + jCas.getView("xml").getDocumentText()); } Text docIdText = new Text(docId); SCAS scas = new SCAS(jCas.getCas()); writer.append(docIdText, scas); } jCas.release(); } logger.info("Wrote " + count + " documents to " + uri); IOUtils.closeStream(writer); }
Example #17
Source File: TeiReaderTest.java From webanno with Apache License 2.0 | 5 votes |
@Test @Ignore("No TEI yet to opensource ") public void testTeiReader() throws Exception { CollectionReaderDescription reader = createReaderDescription(TeiReader.class, TeiReader.PARAM_LANGUAGE, "en", TeiReader.PARAM_SOURCE_LOCATION, "classpath:/local/", TeiReader.PARAM_PATTERNS, new String[] { "[+]*.xml" }); String firstSentence = "70 I DAG."; for (JCas jcas : new JCasIterable(reader)) { DocumentMetaData meta = DocumentMetaData.get(jcas); String text = jcas.getDocumentText(); System.out.printf("%s - %d%n", meta.getDocumentId(), text.length()); System.out.println(jcas.getDocumentLanguage()); assertEquals(2235, JCasUtil.select(jcas, Token.class).size()); assertEquals(745, JCasUtil.select(jcas, POS.class).size()); assertEquals(745, JCasUtil.select(jcas, Lemma.class).size()); assertEquals(0, JCasUtil.select(jcas, NamedEntity.class).size()); assertEquals(30, JCasUtil.select(jcas, Sentence.class).size()); assertEquals(firstSentence, JCasUtil.select(jcas, Sentence.class).iterator().next() .getCoveredText()); } }
Example #18
Source File: NYTEntitySalienceFeatureExtractor.java From ambiverse-nlu with Apache License 2.0 | 5 votes |
@Override public List<TrainingInstance> getTrainingInstances(JCas jCas, TrainingSettings.FeatureExtractor featureExtractor, int positiveInstanceScalingFactor) throws Exception { List<TrainingInstance> trainingInstances = new ArrayList<>(); Collection<SalientEntity> salientEntities = JCasUtil.select(jCas, SalientEntity.class); Map<String, SalientEntity> salientEntityMap = new HashMap<>(); //The salient entities at this point don't have IDs. ITs better if we find the ids from the Aida Entities for(SalientEntity salientEntity : salientEntities) { salientEntityMap.put(salientEntity.getID(), salientEntity); } Logger logger = LoggerFactory.getLogger(NYTEntitySalienceFeatureExtractor.class); String docId = JCasUtil.selectSingle(jCas, DocumentMetaData.class).getDocumentId(); logger.info("[{}] Document entities: {}.", docId, salientEntityMap.size()); List<EntityInstance> entityInstances = getEntityInstances(jCas, featureExtractor); // Extract features for entities. for (EntityInstance ei : entityInstances) { String entityId = ei.getEntityId(); if(salientEntityMap.containsKey(entityId)) { Double label = salientEntityMap.get(entityId).getLabel(); // Generate the training instance with boolean label. TrainingInstance ti = new TrainingInstance(label, ei.getFeatureValues(), entityId, docId); logger.debug("[{}] for entity {} ti: {}.", docId, entityId, ti); trainingInstances.add(ti); // Scale positive examples if necessary. int addCount = (label == 1.0) ? positiveInstanceScalingFactor : 1; for (int i = 1; i < addCount; ++i) { trainingInstances.add(ti); } } } return trainingInstances; }
Example #19
Source File: WebAnnoCasUtilTest.java From webanno with Apache License 2.0 | 5 votes |
@Test public void thatCreateDocumentMetadataUpgradesExistingDocumentAnnotation() throws Exception { TypeSystemDescription tsd = createTypeSystemDescription(); CAS cas = getRealCas(createCas(tsd)); assertThat(cas.select(DocumentAnnotation.class).asList()) .as("CAS has no DocumentAnnotation") .isEmpty(); cas.setDocumentLanguage("en"); assertThat(cas.select(DocumentAnnotation.class).asList()) .as("CAS initialized with DocumentAnnotation") .extracting(fs -> fs.getType().getName()) .containsExactly(TYPE_NAME_DOCUMENT_ANNOTATION); assertThat(cas.select(DocumentAnnotation.class).asList()) .as("Language has been set") .extracting(DocumentAnnotation::getLanguage) .containsExactly("en"); WebAnnoCasUtil.createDocumentMetadata(cas); assertThat(cas.select(DocumentAnnotation.class).asList()) .as("DocumentAnnotation has been upgraded to DocumentMetaData") .extracting(fs -> fs.getType().getName()) .containsExactly(DocumentMetaData.class.getName()); assertThat(cas.select(DocumentAnnotation.class).asList()) .as("Language survived upgrade") .extracting(DocumentAnnotation::getLanguage) .containsExactly("en"); }
Example #20
Source File: WebAnnoCasUtil.java From webanno with Apache License 2.0 | 5 votes |
public static String getDocumentTitle(CAS aCas) { try { Type type = getType(aCas, DocumentMetaData.class); FeatureStructure dmd = selectSingle(aCas, type); return FSUtil.getFeature(dmd, "documentTitle", String.class); } catch (IllegalArgumentException e) { return null; } }
Example #21
Source File: WebAnnoCasUtil.java From webanno with Apache License 2.0 | 5 votes |
public static String getDocumentUri(CAS aCas) { try { Type type = getType(aCas, DocumentMetaData.class); FeatureStructure dmd = selectSingle(aCas, type); return FSUtil.getFeature(dmd, "documentUri", String.class); } catch (IllegalArgumentException e) { return null; } }
Example #22
Source File: WebAnnoCasUtil.java From webanno with Apache License 2.0 | 5 votes |
public static String getDocumentId(CAS aCas) { try { Type type = getType(aCas, DocumentMetaData.class); FeatureStructure dmd = selectSingle(aCas, type); return FSUtil.getFeature(dmd, "documentId", String.class); } catch (IllegalArgumentException e) { return null; } }
Example #23
Source File: WebAnnoCasUtil.java From webanno with Apache License 2.0 | 5 votes |
public static FeatureStructure getDocumentMetadata(CAS aCas) { Type type = getType(aCas, DocumentMetaData.class); FeatureStructure dmd; try { dmd = selectSingle(aCas, type); } catch (IllegalArgumentException e) { dmd = createDocumentMetadata(aCas); } return dmd; }
Example #24
Source File: WebAnnoCasUtil.java From webanno with Apache License 2.0 | 5 votes |
public static FeatureStructure createDocumentMetadata(CAS aCas) { Type type = getType(aCas, DocumentMetaData.class); FeatureStructure dmd; if (aCas.getDocumentText() != null) { dmd = aCas.createAnnotation(type, 0, aCas.getDocumentText().length()); } else { dmd = aCas.createAnnotation(type, 0, 0); } // If there is already a DocumentAnnotation copy it's information and delete it FeatureStructure da = aCas.getDocumentAnnotation(); if (da != null) { FSUtil.setFeature(dmd, FEATURE_BASE_NAME_LANGUAGE, FSUtil.getFeature(da, FEATURE_BASE_NAME_LANGUAGE, String.class)); FSUtil.setFeature(dmd, FEATURE_BASE_NAME_BEGIN, FSUtil.getFeature(da, FEATURE_BASE_NAME_BEGIN, Integer.class)); FSUtil.setFeature(dmd, FEATURE_BASE_NAME_END, FSUtil.getFeature(da, FEATURE_BASE_NAME_END, Integer.class)); aCas.removeFsFromIndexes(da); } else if (aCas.getDocumentText() != null) { FSUtil.setFeature(dmd, FEATURE_BASE_NAME_BEGIN, 0); FSUtil.setFeature(dmd, FEATURE_BASE_NAME_END, aCas.getDocumentText().length()); } aCas.addFsToIndexes(dmd); return dmd; }
Example #25
Source File: BratAnnotatorUtility.java From webanno with Apache License 2.0 | 5 votes |
public static CAS clearAnnotations(CAS aCas) throws IOException { CAS target; try { target = CasFactory.createCas((TypeSystemDescription) null); } catch (UIMAException e) { throw new IOException(e); } // Copy the CAS - basically we do this just to keep the full type system information CASCompleteSerializer serializer = serializeCASComplete((CASImpl) getRealCas(aCas)); deserializeCASComplete(serializer, (CASImpl) getRealCas(target)); // Remove all annotations from the target CAS but we keep the type system! target.reset(); // Copy over essential information if (exists(aCas, getType(aCas, DocumentMetaData.class))) { copyDocumentMetadata(aCas, target); } else { WebAnnoCasUtil.createDocumentMetadata(aCas); } target.setDocumentLanguage(aCas.getDocumentLanguage()); // DKPro Core Issue 435 target.setDocumentText(aCas.getDocumentText()); // Transfer token boundaries for (AnnotationFS t : selectTokens(aCas)) { target.addFsToIndexes(createToken(target, t.getBegin(), t.getEnd())); } // Transfer sentence boundaries for (AnnotationFS s : selectSentences(aCas)) { target.addFsToIndexes(createSentence(target, s.getBegin(), s.getEnd())); } return target; }
Example #26
Source File: Tsv3XSerializerTest.java From webanno with Apache License 2.0 | 5 votes |
private JCas makeJCasOneSentence(String aText) throws UIMAException { TypeSystemDescription global = TypeSystemDescriptionFactory.createTypeSystemDescription(); TypeSystemDescription local = TypeSystemDescriptionFactory .createTypeSystemDescriptionFromPath( "src/test/resources/desc/type/webannoTestTypes.xml"); TypeSystemDescription merged = CasCreationUtils.mergeTypeSystems(asList(global, local)); JCas jcas = JCasFactory.createJCas(merged); DocumentMetaData.create(jcas).setDocumentId("doc"); TokenBuilder<Token, Sentence> tb = new TokenBuilder<>(Token.class, Sentence.class); tb.buildTokens(jcas, aText); // Remove the sentences generated by the token builder which treats the line break as a // sentence break for (Sentence s : select(jcas, Sentence.class)) { s.removeFromIndexes(); } // Add a new sentence covering the whole text new Sentence(jcas, 0, jcas.getDocumentText().length()).addToIndexes(); return jcas; }
Example #27
Source File: WebAnnoTsv3WriterTestBase.java From webanno with Apache License 2.0 | 5 votes |
private static JCas makeJCas() throws UIMAException { TypeSystemDescription global = TypeSystemDescriptionFactory.createTypeSystemDescription(); TypeSystemDescription local = TypeSystemDescriptionFactory .createTypeSystemDescriptionFromPath( "src/test/resources/desc/type/webannoTestTypes.xml"); TypeSystemDescription merged = CasCreationUtils.mergeTypeSystems(asList(global, local)); JCas jcas = JCasFactory.createJCas(merged); DocumentMetaData.create(jcas).setDocumentId("doc"); return jcas; }
Example #28
Source File: NYTEntitySalienceFeatureExtractor.java From ambiverse-nlu with Apache License 2.0 | 5 votes |
@Override public List<EntityInstance> getEntityInstances(JCas jCas, TrainingSettings.FeatureExtractor featureExtractor) throws Exception { Collection<AidaEntity> aidaEntities = JCasUtil.select(jCas, AidaEntity.class); ListMultimap<String, AidaEntity> entitiesMentions = ArrayListMultimap.create(); // Group by actual entity (uima.Entity is a mention). for (AidaEntity aidaEntity : aidaEntities) { entitiesMentions.put(aidaEntity.getID(), aidaEntity); } Logger logger = LoggerFactory.getLogger(NYTEntitySalienceFeatureExtractor.class); String docId = JCasUtil.selectSingle(jCas, DocumentMetaData.class).getDocumentId(); logger.debug("[" + docId + "] AIDA entities: " + entitiesMentions.keySet()); List<EntityInstance> entityInstances = new ArrayList<>(entitiesMentions.size()); // Extract features for entities. for (Map.Entry<String, Collection<AidaEntity>> entry : entitiesMentions.asMap().entrySet()) { String entityId = entry.getKey(); Collection<AidaEntity> entityMentions = entry.getValue(); // Generate feature 8. Map<Integer, Double> entityFeatureValues = getEntityFeatureValues(jCas, entityMentions, featureExtractor); EntityInstance ei = new EntityInstance(entityId, entityFeatureValues); entityInstances.add(ei); } return entityInstances; }
Example #29
Source File: SynchronizedTcuLookUpTable.java From ambiverse-nlu with Apache License 2.0 | 5 votes |
private boolean isTheSameDocument(JCas aView) { DocumentMetaData meta = JCasUtil.selectSingle(aView, DocumentMetaData.class); String currentId = meta.getDocumentId(); boolean isSame = currentId.equals(lastSeenDocumentIdTL.get()); lastSeenDocumentIdTL.set(currentId); return isSame; }
Example #30
Source File: LoadFactAnnotations.java From ambiverse-nlu with Apache License 2.0 | 5 votes |
private void convert(CAS aCAS, int fact) throws CollectionException, SQLException { JCas jcas; try { jcas = aCAS.getJCas(); } catch (CASException e) { throw new CollectionException(e); } JCasBuilder doc = new JCasBuilder(jcas); DocumentMetaData md = JCasUtil.selectSingle(jcas, DocumentMetaData.class); md.setDocumentId(Integer.toString(fact)); doc.add("fact"); doc.close(); }