org.apache.uima.jcas.tcas.DocumentAnnotation Java Examples

The following examples show how to use org.apache.uima.jcas.tcas.DocumentAnnotation. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: DocumentTypeByLocationTest.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Test
public void testBaseDirectoryTwoLayers() throws Exception {

  DocumentAnnotation da = getDocumentAnnotation();
  da.setSourceUri(tmp.getAbsolutePath());

  processJCas(BASE_DIRECTORY, topDir.getAbsolutePath());

  String relative =
      tmp.getAbsolutePath()
          .substring(
              topDir.getAbsolutePath().length() + 1,
              tmp.getAbsolutePath().length() - tmp.getName().length() - 1);

  assertEquals(relative, da.getDocType());
}
 
Example #2
Source File: ActiveMQTest.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Test
public void testMessagePersisted() throws AnalysisEngineProcessException, JMSException {
  // Create document
  jCas.setDocumentText("Hello World!");
  DocumentAnnotation da = (DocumentAnnotation) jCas.getDocumentAnnotationFs();
  da.setSourceUri("hello.txt");

  // Process document (i.e. send it to ActiveMQ)
  ae.process(jCas);

  // Check that message has been received
  Message msg = topicConsumer.receive(receiveTimeout);
  assertNotNull(msg);
  TextMessage txtMsg = (TextMessage) msg;
  assertTrue(txtMsg.getText().contains("\"content\":\"Hello World!\""));

  // Check there are no more messages on the queue
  assertNull(topicConsumer.receive(receiveTimeout));
}
 
Example #3
Source File: FeatureUtilsTest.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Test
public void testStringArray() {
  DocumentAnnotation da = (DocumentAnnotation) jCas.getDocumentAnnotationFs();
  StringArray rel = new StringArray(jCas, 3);
  rel.set(0, "ENG");
  rel.set(1, "WAL");
  rel.set(2, "SCO");
  da.setDocumentReleasability(rel);

  Feature f = da.getType().getFeatureByBaseName(DOCUMENT_RELEASABILITY);

  Object[] o = FeatureUtils.featureToArray(f, da);
  assertEquals(3, o.length);
  assertTrue(o[0] instanceof String);
  assertEquals("ENG", (String) o[0]);
  assertTrue(o[1] instanceof String);
  assertEquals("WAL", (String) o[1]);
  assertTrue(o[2] instanceof String);
  assertEquals("SCO", (String) o[2]);
}
 
Example #4
Source File: JCasSerialiser.java    From baleen with Apache License 2.0 6 votes vote down vote up
private Map<String, Object> serialiseDocumentAnnotation(final DocumentAnnotation da) {
  final Map<String, Object> map = new HashMap<>();

  map.put(JsonJCas.DA_DOCUMENT_TYPE, da.getDocType());
  map.put(JsonJCas.DA_LANGUAGE, da.getLanguage());

  map.put(JsonJCas.DA_SOURCE_URI, da.getSourceUri());
  map.put(JsonJCas.DA_CLASSIFICATION, da.getDocumentClassification());
  final String[] caveats =
      da.getDocumentCaveats() != null ? da.getDocumentCaveats().toArray() : new String[0];
  map.put(JsonJCas.DA_CAVEATS, caveats);
  final String[] rels =
      da.getDocumentReleasability() != null
          ? da.getDocumentReleasability().toArray()
          : new String[0];
  map.put(JsonJCas.DA_RELEASABILITY, rels);

  return map;
}
 
Example #5
Source File: FeatureUtilsTest.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Test
public void testStringArrayToObject() {
  DocumentAnnotation da = (DocumentAnnotation) jCas.getDocumentAnnotationFs();
  StringArray rel = new StringArray(jCas, 3);
  rel.set(0, "true");
  rel.set(1, "2");
  rel.set(2, "0.45");
  da.setDocumentReleasability(rel);

  Feature f = da.getType().getFeatureByBaseName(DOCUMENT_RELEASABILITY);

  Object[] o = FeatureUtils.featureToArray(f, da);
  assertEquals(3, o.length);
  assertTrue(o[0] instanceof Boolean);
  assertTrue((Boolean) o[0]);
  assertTrue(o[1] instanceof Integer);
  assertEquals(new Integer(2), (Integer) o[1]);
  assertTrue(o[2] instanceof Double);
  assertEquals(new Double(0.45), (Double) o[2]);
}
 
Example #6
Source File: FeatureUtilsTest.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Test
public void testStringArrayToList() {
  DocumentAnnotation da = (DocumentAnnotation) jCas.getDocumentAnnotationFs();
  StringArray rel = new StringArray(jCas, 3);
  rel.set(0, "ENG");
  rel.set(1, "WAL");
  rel.set(2, "SCO");
  da.setDocumentReleasability(rel);

  Feature f = da.getType().getFeatureByBaseName(DOCUMENT_RELEASABILITY);

  List<Object> o = FeatureUtils.featureToList(f, da);
  assertEquals(3, o.size());
  assertTrue(o.get(0) instanceof String);
  assertEquals("ENG", (String) o.get(0));
  assertTrue(o.get(1) instanceof String);
  assertEquals("WAL", (String) o.get(1));
  assertTrue(o.get(2) instanceof String);
  assertEquals("SCO", (String) o.get(2));
}
 
Example #7
Source File: Html5Test.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Test
public void testCreateExternalIdFile() throws UIMAException {
  AnalysisEngine consumer =
      AnalysisEngineFactory.createEngine(
          Html5.class,
          TypeSystemSingleton.getTypeSystemDescriptionInstance(),
          Html5.PARAM_OUTPUT_FOLDER,
          outputFolder.getPath(),
          Html5.PARAM_USE_EXTERNAL_ID,
          true,
          Html5.PARAM_CONTENT_HASH_AS_ID,
          false);

  jCas.setDocumentText("Hello World!");
  DocumentAnnotation da = (DocumentAnnotation) jCas.getDocumentAnnotationFs();
  da.setSourceUri("hello.txt");

  consumer.process(jCas);

  File f =
      new File(
          outputFolder, "734cad14909bedfafb5b273b6b0eb01fbfa639587d217f78ce9639bba41f4415.html");
  assertTrue(f.exists());
}
 
Example #8
Source File: SharedIdGenerator.java    From baleen with Apache License 2.0 6 votes vote down vote up
/**
 * Reset id generation if new JCas.
 *
 * @param jCas the j cas
 * @return true, if reset
 */
public boolean resetIfNewJCas(final JCas jCas) {
  final DocumentAnnotation documentAnnotation = UimaSupport.getDocumentAnnotation(jCas);

  final String documentId = documentAnnotation.getHash();

  final boolean isNewDocument =
      currentDocumentId == null || !currentDocumentId.equals(documentId);
  if (isNewDocument) {
    getMonitor().debug("Reset id cache for document {}", documentId);
    clearMappings();

    currentDocumentId = documentId;
  }

  return isNewDocument;
}
 
Example #9
Source File: JCasDeserialiser.java    From baleen with Apache License 2.0 6 votes vote down vote up
/**
 * Deserialise the given JSON map by populating the given JCas.
 *
 * @param jCas to populate
 * @param input to deserialise
 * @throws IOException if there is an error while deserialising.
 */
public void deseralize(final JCas jCas, final Map<String, Object> input) {

  // Read top level
  jCas.setDocumentText((String) input.getOrDefault(JsonJCas.DOCUMENT_TEXT, ""));
  jCas.setDocumentLanguage((String) input.getOrDefault(JsonJCas.DOCUMENT_LANGUAGE, ""));

  // Read Document annotations
  final DocumentAnnotation documentAnnotation = UimaSupport.getDocumentAnnotation(jCas);
  final Map<String, Object> daNode =
      (Map<String, Object>) input.get(JsonJCas.DOCUMENT_ANNOTATION);
  processDocumentAnnotation(jCas, documentAnnotation, daNode);

  final List<Map<String, Object>> annotationsNode =
      (List<Map<String, Object>>) input.get(JsonJCas.ANNOTATIONS);
  final List<ReferencedFeatures> featuresToDereference =
      processAnnotations(jCas, annotationsNode);

  // Here we need to do hydrate the references

  final Map<Long, BaleenAnnotation> annotationIndex = buildAnnotationIndex(jCas);
  featuresToDereference.forEach(r -> r.rehydrate(jCas, annotationIndex));
}
 
Example #10
Source File: ElasticsearchTestBase.java    From baleen with Apache License 2.0 6 votes vote down vote up
protected long createNoEntitiesDocument() {
  jCas.reset();
  jCas.setDocumentText("Hello World");
  jCas.setDocumentLanguage("en");

  long timestamp = System.currentTimeMillis();

  DocumentAnnotation da = getDocumentAnnotation(jCas);
  da.setTimestamp(timestamp);
  da.setSourceUri("test/no_entities");
  da.setDocType("test");
  da.setDocumentClassification("OFFICIAL");
  da.setDocumentCaveats(
      UimaTypesUtils.toArray(jCas, Arrays.asList(new String[] {"TEST_A", "TEST_B"})));
  da.setDocumentReleasability(
      UimaTypesUtils.toArray(jCas, Arrays.asList(new String[] {"ENG", "SCO", "WAL"})));

  return timestamp;
}
 
Example #11
Source File: Html5Test.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Test
public void testCreateFile() throws UIMAException {
  AnalysisEngine consumer =
      AnalysisEngineFactory.createEngine(
          Html5.class,
          TypeSystemSingleton.getTypeSystemDescriptionInstance(),
          Html5.PARAM_OUTPUT_FOLDER,
          outputFolder.getPath());

  jCas.setDocumentText("Hello World!");
  DocumentAnnotation da = (DocumentAnnotation) jCas.getDocumentAnnotationFs();
  da.setSourceUri("hello.txt");

  consumer.process(jCas);

  File f = new File(outputFolder, "hello.txt.html");
  assertTrue(f.exists());
}
 
Example #12
Source File: DocumentTypeByLocationTest.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Test
public void testBaseDirectoryOneLayers() throws Exception {

  DocumentAnnotation da = getDocumentAnnotation();
  da.setSourceUri(tmp.getAbsolutePath());

  processJCas(BASE_DIRECTORY, parentDir.getAbsolutePath());

  String relative =
      tmp.getAbsolutePath()
          .substring(
              parentDir.getAbsolutePath().length() + 1,
              tmp.getAbsolutePath().length() - tmp.getName().length() - 1);

  assertEquals(relative, da.getDocType());
}
 
Example #13
Source File: DocumentTypeByLocationTest.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Test
public void test() throws Exception {

  try {
    DocumentAnnotation da = getDocumentAnnotation();
    da.setSourceUri(tmp.getAbsolutePath());

    processJCas();

    // Remove slash (requried for unix paths)
    String absolutePath = childDir.getAbsolutePath();
    if (absolutePath.startsWith(File.separator)) {
      absolutePath = absolutePath.substring(1);
    }

    assertEquals(absolutePath, da.getDocType());
  } finally {
    tmp.delete();
  }
}
 
Example #14
Source File: MongoParagraph.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Override
protected void doProcess(JCas jCas) throws AnalysisEngineProcessException {
  String documentId = getUniqueId(jCas);

  List<Document> batchInsert = new ArrayList<>();

  for (Paragraph paragraph : JCasUtil.select(jCas, Paragraph.class)) {
    Document doc = new Document();

    DocumentAnnotation da = getDocumentAnnotation(jCas);

    doc.append(FIELD_DOCUMENT_ID, documentId)
        .append(FIELD_CONTENT, paragraph.getCoveredText())
        .append(FIELD_DOCUMENT_SOURCE, da.getSourceUri())
        .append(FIELD_BEGIN, paragraph.getBegin())
        .append(FIELD_END, paragraph.getEnd());

    batchInsert.add(doc);
  }

  if (!batchInsert.isEmpty()) paragraphsCollection.insertMany(batchInsert);
}
 
Example #15
Source File: Postgres.java    From baleen with Apache License 2.0 6 votes vote down vote up
private Integer executeDocInsert(JCas jCas) throws SQLException, BaleenException {
  DocumentAnnotation da = getDocumentAnnotation(jCas);
  String documentId = ConsumerUtils.getExternalId(da, contentHashAsId);

  insertDocStatement.clearParameters();
  insertDocStatement.setString(1, documentId);
  insertDocStatement.setString(2, da.getDocType());
  insertDocStatement.setString(3, da.getSourceUri());
  insertDocStatement.setString(4, jCas.getDocumentText());
  insertDocStatement.setString(5, jCas.getDocumentLanguage());
  insertDocStatement.setTimestamp(6, new Timestamp(da.getTimestamp()));
  insertDocStatement.setString(7, da.getDocumentClassification());
  insertDocStatement.setArray(
      8, createVarcharArray(postgresResource.getConnection(), da.getDocumentCaveats()));
  insertDocStatement.setArray(
      9, createVarcharArray(postgresResource.getConnection(), da.getDocumentReleasability()));
  insertDocStatement.executeUpdate();

  Integer docKey = getKey(insertDocStatement);
  if (docKey == null) {
    throw new BaleenException("No document key returned");
  }

  return docKey;
}
 
Example #16
Source File: AbstractBaleenFileConsumerTest.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Test
public void testNullBasePath() throws Exception {
  AnalysisEngine consumer =
      AnalysisEngineFactory.createEngine(
          TestFileConsumer.class, TypeSystemSingleton.getTypeSystemDescriptionInstance());

  DocumentAnnotation da = (DocumentAnnotation) jCas.getDocumentAnnotationFs();
  da.setSourceUri(FILENAME);

  consumer.process(jCas);

  File f = new File(FILENAME);
  assertTrue(f.exists());

  f.delete();
}
 
Example #17
Source File: DocumentTypeByFilename.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Override
public void doProcess(JCas aJCas) throws AnalysisEngineProcessException {
  DocumentAnnotation da = getDocumentAnnotation(aJCas);

  File f = new File(da.getSourceUri());

  String type = defaultType;

  Matcher m = typePattern.matcher(f.getName());
  if (m.matches()) {
    type = m.group(group);
  }

  if (lowerCase) type = type.toLowerCase();

  da.setDocType(prefix + type.trim());
}
 
Example #18
Source File: Html5Test.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Test
public void testLineBreak() throws UIMAException, IOException {
  AnalysisEngine consumer =
      AnalysisEngineFactory.createEngine(
          Html5.class,
          TypeSystemSingleton.getTypeSystemDescriptionInstance(),
          Html5.PARAM_OUTPUT_FOLDER,
          outputFolder.getPath());

  DocumentAnnotation da = (DocumentAnnotation) jCas.getDocumentAnnotationFs();
  da.setSourceUri("multiline.txt");

  jCas.setDocumentText("His name was James\n\nBond.");
  Person p = new Person(jCas, 13, 24);
  p.addToIndexes();

  consumer.process(jCas);

  File f = new File(outputFolder, "multiline.txt.html");
  assertTrue(f.exists());

  assertTrue(
      Files.asCharSource(f, StandardCharsets.UTF_8)
          .read()
          .contains("data-referent=\"\">James\n\nBond</span>"));
}
 
Example #19
Source File: Mongo.java    From baleen with Apache License 2.0 6 votes vote down vote up
private void saveDocument(String documentId, JCas jCas) {
  Document doc = new Document();

  DocumentAnnotation da = getDocumentAnnotation(jCas);

  doc.append(fields.getExternalId(), documentId)
      .append(
          FIELD_DOCUMENT,
          new Document()
              .append(FIELD_DOCUMENT_TYPE, da.getDocType())
              .append(FIELD_DOCUMENT_SOURCE, da.getSourceUri())
              .append(FIELD_DOCUMENT_LANGUAGE, da.getLanguage())
              .append(FIELD_DOCUMENT_TIMESTAMP, new Date(da.getTimestamp()))
              .append(FIELD_DOCUMENT_CLASSIFICATION, da.getDocumentClassification())
              .append(FIELD_DOCUMENT_CAVEATS, toList(da.getDocumentCaveats()))
              .append(FIELD_DOCUMENT_RELEASABILITY, toList(da.getDocumentReleasability())));

  addPublishedIds(jCas, doc);
  addMetadata(jCas, doc);

  if (outputContent) {
    doc.append(FIELD_CONTENT, jCas.getDocumentText());
  }

  documentsCollection.insertOne(doc);
}
 
Example #20
Source File: FeatureUtilsTest.java    From baleen with Apache License 2.0 5 votes vote down vote up
@Test
public void testEmptyToList() {
  DocumentAnnotation da = (DocumentAnnotation) jCas.getDocumentAnnotationFs();
  Feature f = da.getType().getFeatureByBaseName(DOCUMENT_RELEASABILITY);

  List<Object> o = FeatureUtils.featureToList(f, da);
  assertEquals(Collections.emptyList(), o);
}
 
Example #21
Source File: FeatureUtilsTest.java    From baleen with Apache License 2.0 5 votes vote down vote up
@Test
public void testNullArrayValue() {
  DocumentAnnotation da = (DocumentAnnotation) jCas.getDocumentAnnotationFs();
  da.setDocumentReleasability(null);

  Feature f = da.getType().getFeatureByBaseName(DOCUMENT_RELEASABILITY);

  Object[] o = FeatureUtils.featureToArray(f, da);
  assertEquals(0, o.length);
}
 
Example #22
Source File: FeatureUtilsTest.java    From baleen with Apache License 2.0 5 votes vote down vote up
@Test
public void testNull() {
  DocumentAnnotation da = (DocumentAnnotation) jCas.getDocumentAnnotationFs();
  StringArray rel = new StringArray(jCas, 3);
  rel.set(0, "ENG");
  rel.set(1, "WAL");
  rel.set(2, "SCO");
  da.setDocumentReleasability(rel);

  Feature f = da.getType().getFeatureByBaseName(DOCUMENT_RELEASABILITY);

  Object o = FeatureUtils.featureToObject(f, da);
  assertNull(o);
}
 
Example #23
Source File: DocumentGraphFactoryTest.java    From baleen with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
private void assertMetadata(JCas jCas, Map<String, Object> variables) {
  DocumentAnnotation da = UimaSupport.getDocumentAnnotation(jCas);
  String documentId = ConsumerUtils.getExternalId(da, false);

  assertEquals(da.getDocType(), variables.get(FIELD_DOCUMENT_TYPE));
  assertEquals(da.getSourceUri(), variables.get(FIELD_DOCUMENT_SOURCE));
  assertEquals(da.getLanguage(), variables.get(FIELD_DOCUMENT_LANGUAGE));
  assertEquals(new Date(da.getTimestamp()), variables.get(FIELD_DOCUMENT_TIMESTAMP));

  assertEquals(da.getDocumentClassification(), variables.get(FIELD_DOCUMENT_CLASSIFICATION));
  assertEquals(
      UimaTypesUtils.toList(da.getDocumentCaveats()), variables.get(FIELD_DOCUMENT_CAVEATS));

  assertFalse(variables.containsKey(FIELD_DOCUMENT_RELEASABILITY));

  Map<String, String> publishedId =
      ((List<Map<String, String>>) variables.get(FIELD_PUBLISHEDIDS)).get(0);
  assertEquals("12", publishedId.get(FIELD_PUBLISHEDIDS_ID));
  assertEquals("test", publishedId.get(FIELD_PUBLISHEDIDS_TYPE));

  Map<String, Collection<Object>> meta =
      (Map<String, Collection<Object>>) variables.get(FIELD_METADATA);
  assertTrue(meta.get("test").contains("1"));
  assertTrue(meta.get("test").contains("2"));
  assertEquals(2, meta.get("test").size());

  assertNull(variables.get(FIELD_CONTENT));
  assertEquals(documentId, variables.get("externalId"));
}
 
Example #24
Source File: DocumentTypeTest.java    From baleen with Apache License 2.0 5 votes vote down vote up
@Test
public void testThrehold() throws Exception {
  jCas.setDocumentText("This text isn't going to score above the threshold.");
  processJCas(
      DocumentType.PARAM_MODEL,
      getClass().getResource(DOCUMENTTYPE_BIN).getPath(),
      DocumentType.PARAM_CONFIDENCE_THRESHOLD,
      "0.99"); // Model trained on IOM and BBC reporting, and is OFFICIAL

  DocumentAnnotation da = getDocumentAnnotation();

  assertEquals(null, da.getDocType());
}
 
Example #25
Source File: DocumentGraphFactoryTest.java    From baleen with Apache License 2.0 5 votes vote down vote up
@Test
public void testDocumentGraphWithDocument() throws UIMAException {

  DocumentGraphOptions options = DocumentGraphOptions.builder().withDocument(true).build();
  DocumentGraphFactory factory = createfactory(options);

  JCas jCas = JCasFactory.createJCas();
  JCasTestGraphUtil.populateJcas(jCas);

  Graph graph = factory.create(jCas);

  assertEquals(1, graph.traversal().V().hasLabel(DOCUMENT).count().next().intValue());
  assertEquals(10, graph.traversal().E().hasLabel(MENTION_IN).count().next().intValue());
  assertEquals(2, graph.traversal().V().hasLabel(RELATION).count().next().intValue());
  assertEquals(2, graph.traversal().E().hasLabel(SOURCE).count().next().intValue());
  assertEquals(2, graph.traversal().E().hasLabel(TARGET).count().next().intValue());

  assertEquals(3, graph.traversal().V().hasLabel(REFERENCE_TARGET).count().next().intValue());
  assertEquals(1, graph.traversal().V().hasLabel(EVENT).count().next().intValue());
  assertEquals(4, graph.traversal().V().hasLabel(MENTION).count().next().intValue());
  assertEquals(4, graph.traversal().E().hasLabel(MENTION_OF).count().next().intValue());
  assertEquals(0, graph.traversal().E().hasLabel(RELATION).count().next().intValue());
  assertEquals(2, graph.traversal().E().hasLabel(PARTICIPANT_IN).count().next().intValue());

  assertEquals(11, IteratorUtils.count(graph.vertices()));
  assertEquals(20, IteratorUtils.count(graph.edges()));

  DocumentAnnotation da = UimaSupport.getDocumentAnnotation(jCas);
  String documentId = ConsumerUtils.getExternalId(da, false);

  Vertex documentVert = graph.traversal().V(documentId).next();
  Map<String, Object> properties = new HashMap<>();
  documentVert.properties().forEachRemaining(vp -> properties.put(vp.key(), vp.value()));

  assertMetadata(jCas, properties);
}
 
Example #26
Source File: DocumentTypeByFilenameTest.java    From baleen with Apache License 2.0 5 votes vote down vote up
@Test
public void testPatternCaseSensitiveFalse() throws Exception {
  DocumentAnnotation da = getDocumentAnnotation();
  da.setSourceUri("20170127-Test_Document.docx");

  processJCas(
      DocumentTypeByFilename.PARAM_PATTERN,
      "\\d{8}-([a-z]).*",
      DocumentTypeByFilename.PARAM_DEFAULT,
      "unknown");

  assertEquals("t", da.getDocType());
}
 
Example #27
Source File: DocumentTypeByFilenameTest.java    From baleen with Apache License 2.0 5 votes vote down vote up
@Test
public void testPatternNoMatch() throws Exception {
  DocumentAnnotation da = getDocumentAnnotation();
  da.setSourceUri("20170127-Test_Document.docx");

  processJCas(
      DocumentTypeByFilename.PARAM_PATTERN,
      "([a-z]{2}).*",
      DocumentTypeByFilename.PARAM_DEFAULT,
      "unknown");

  assertEquals("unknown", da.getDocType());
}
 
Example #28
Source File: WebAnnoCasUtilTest.java    From webanno with Apache License 2.0 5 votes vote down vote up
@Test
public void thatCreateDocumentMetadataUpgradesExistingDocumentAnnotation() throws Exception
{
    TypeSystemDescription tsd = createTypeSystemDescription();
    
    CAS cas = getRealCas(createCas(tsd));
    
    assertThat(cas.select(DocumentAnnotation.class).asList())
            .as("CAS has no DocumentAnnotation")
            .isEmpty();
    
    cas.setDocumentLanguage("en");
    
    assertThat(cas.select(DocumentAnnotation.class).asList())
            .as("CAS initialized with DocumentAnnotation")
            .extracting(fs -> fs.getType().getName())
            .containsExactly(TYPE_NAME_DOCUMENT_ANNOTATION);
    assertThat(cas.select(DocumentAnnotation.class).asList())
            .as("Language has been set")
            .extracting(DocumentAnnotation::getLanguage)
            .containsExactly("en");

    WebAnnoCasUtil.createDocumentMetadata(cas);

    assertThat(cas.select(DocumentAnnotation.class).asList())
            .as("DocumentAnnotation has been upgraded to DocumentMetaData")
            .extracting(fs -> fs.getType().getName())
            .containsExactly(DocumentMetaData.class.getName());
    assertThat(cas.select(DocumentAnnotation.class).asList())
            .as("Language survived upgrade")
            .extracting(DocumentAnnotation::getLanguage)
            .containsExactly("en");
}
 
Example #29
Source File: DocumentTypeByFilenameTest.java    From baleen with Apache License 2.0 5 votes vote down vote up
@Test
public void testPrefix() throws Exception {
  DocumentAnnotation da = getDocumentAnnotation();
  da.setSourceUri("20170127-Test_Document.docx");

  processJCas(DocumentTypeByFilename.PARAM_PREFIX, "filetype_");

  assertEquals("filetype_docx", da.getDocType());
}
 
Example #30
Source File: TemporalElasticsearch.java    From baleen with Apache License 2.0 5 votes vote down vote up
@Override
protected void doProcess(JCas jCas) throws AnalysisEngineProcessException {
  DocumentAnnotation da = UimaSupport.getDocumentAnnotation(jCas);
  String docId = ConsumerUtils.getExternalId(da, contentHashAsId);

  for (Temporal temporal : JCasUtil.select(jCas, Temporal.class)) {

    String precision = temporal.getPrecision();

    if (!TemporalUtils.PRECISION_EXACT.equals(precision)) {
      continue;
    }

    String id = temporal.getExternalId();
    Map<String, Object> json = new HashMap<>();
    json.put(FIELD_DOC_ID, docId);
    json.put(FIELD_EXTERNAL_ID, id);
    json.put(FIELD_BEGIN, temporal.getBegin());
    json.put(FIELD_END, temporal.getEnd());
    json.put(FIELD_VALUE, temporal.getValue());
    json.put(FIELD_TEMPORAL_TYPE, temporal.getTemporalType());
    json.put(FIELD_CONFIDENCE, temporal.getConfidence());

    String scope = temporal.getScope();
    if (TemporalUtils.SCOPE_RANGE.equals(scope)) {
      json.put(
          FIELD_DATE_RANGE,
          ImmutableMap.of(
              FIELD_GTE, temporal.getTimestampStart(), FIELD_LTE, temporal.getTimestampStop()));
    } else {
      json.put(FIELD_DATE, temporal.getTimestampStart());
    }

    // Persist to ElasticSearch
    addDocument(id, json);
  }
}