Java Code Examples for org.apache.uima.jcas.tcas.DocumentAnnotation#setLanguage()

The following examples show how to use org.apache.uima.jcas.tcas.DocumentAnnotation#setLanguage() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: JCasDeserialiser.java    From baleen with Apache License 2.0 5 votes vote down vote up
private void processDocumentAnnotation(
    final JCas jCas, final DocumentAnnotation da, final Map<String, Object> map) {
  da.setDocType((String) map.getOrDefault(JsonJCas.DA_DOCUMENT_TYPE, ""));
  da.setDocumentClassification((String) map.getOrDefault(JsonJCas.DA_CLASSIFICATION, ""));
  da.setLanguage((String) map.getOrDefault(JsonJCas.DA_LANGUAGE, ""));
  da.setSourceUri((String) map.getOrDefault(JsonJCas.DA_SOURCE_URI, ""));
  da.setTimestamp(((Number) map.getOrDefault(JsonJCas.DA_TIMESTAMP, 0)).longValue());

  da.setDocumentCaveats(
      UimaTypesUtils.toArray(
          jCas, (Collection<String>) map.getOrDefault(JsonJCas.DA_CAVEATS, null)));
  da.setDocumentReleasability(
      UimaTypesUtils.toArray(
          jCas, (Collection<String>) map.getOrDefault(JsonJCas.DA_RELEASABILITY, null)));
}
 
Example 2
Source File: MimeReader.java    From baleen with Apache License 2.0 4 votes vote down vote up
@Override
protected void doGetNext(final JCas jCas) throws IOException, CollectionException {
  final Path path = files.pop();
  final File file = path.toFile();

  final int left = files.size();

  getMonitor()
      .info(
          "Processing {} ({} %)",
          file.getAbsolutePath(), String.format("%.2f", 100 * (total - left) / (double) total));

  try (FileInputStream is = new FileInputStream(file)) {
    final Session s = Session.getDefaultInstance(new Properties());
    final MimeMessageParser parser = new MimeMessageParser(new MimeMessage(s, is));
    parser.parse();
    final MimeMessage message = parser.getMimeMessage();

    final DocumentAnnotation da = UimaSupport.getDocumentAnnotation(jCas);
    da.setTimestamp(calculateBestDate(message, file));
    da.setDocType("email");
    da.setDocumentClassification("O");
    String source = file.getAbsolutePath().substring(rootFolder.length());
    da.setSourceUri(source);
    da.setLanguage("en");

    // Add all headers as metadata, with email prefix
    final Enumeration<Header> allHeaders = message.getAllHeaders();
    while (allHeaders.hasMoreElements()) {
      final Header header = allHeaders.nextElement();
      addMetadata(jCas, "email." + header.getName(), header.getValue());
    }

    addMetadata(jCas, "from", parser.getFrom());
    addMetadata(jCas, "to", parser.getTo());
    addMetadata(jCas, "cc", parser.getCc());
    addMetadata(jCas, "bcc", parser.getBcc());
    addMetadata(jCas, "subject", parser.getSubject());

    // Add fake title
    addMetadata(jCas, "title", parser.getSubject());

    String actualContent = parser.getPlainContent();

    if (actualContent == null) {
      actualContent = "";
    }

    // TODO: At this point we could create a representation of the addresses, etc in the content
    // eg a table of to, from, and etc
    // then annotate them a commsidentifier, date, person.
    // We could also create relations between sender and receiver

    String content = actualContent + "\n\n---\n\n";

    final String headerBlock = createHeaderBlock(content.length(), jCas, parser);
    content = content + headerBlock;

    final Text text = new Text(jCas);
    text.setBegin(0);
    text.setEnd(actualContent.length());
    text.addToIndexes();

    extractContent(new ByteArrayInputStream(content.getBytes()), source, jCas);
  } catch (final Exception e) {
    getMonitor().warn("Discarding message", e);
  }
}
 
Example 3
Source File: JCasTestGraphUtil.java    From baleen with Apache License 2.0 4 votes vote down vote up
public static void populateJcas(final JCas jCas) {

    jCas.setDocumentText(CONTENT);
    final DocumentAnnotation da = (DocumentAnnotation) jCas.getDocumentAnnotationFs();
    da.setDocumentClassification("CLASS");
    da.setDocType("MANUAL");
    da.setSourceUri("http://test.com");
    da.setLanguage("en");
    da.setTimestamp(new Date().getTime());
    da.setDocumentCaveats(new StringArray(jCas, 2));
    da.setDocumentCaveats(0, "GITHUB");
    da.setDocumentCaveats(1, "CAVEAT");

    final Metadata m1 = new Metadata(jCas);
    m1.setKey("test");
    m1.setValue("1");
    m1.addToIndexes(jCas);

    final Metadata m2 = new Metadata(jCas);
    m2.setKey("test");
    m2.setValue("2");
    m2.addToIndexes(jCas);

    final PublishedId pId = new PublishedId(jCas);
    pId.setPublishedIdType("test");
    pId.setValue("12");
    pId.addToIndexes(jCas);

    ReferenceTarget target = new ReferenceTarget(jCas);
    target.setLinking("testLinking");
    target.addToIndexes(jCas);

    final Person js = new Person(jCas);
    js.setBegin(25);
    js.setEnd(35);
    js.setGender("Male");
    js.setValue("John Smith");
    js.setConfidence(0.9d);
    js.setReferent(target);
    js.addToIndexes(jCas);

    final Person jd = new Person(jCas);
    jd.setBegin(50);
    jd.setEnd(58);
    jd.setGender("Female");
    jd.setValue("Jane Doe");
    jd.setConfidence(0.8d);
    jd.addToIndexes(jCas);

    final Person he = new Person(jCas);
    he.setBegin(60);
    he.setEnd(62);
    he.setGender("Male");
    he.setValue("He");
    he.setConfidence(0.9d);
    he.setReferent(target);
    he.addToIndexes(jCas);

    final Location l = new Location(jCas);
    l.setBegin(72);
    l.setEnd(87);
    l.setGeoJson(GEO_JSON);
    l.setValue("Dinagat Islands");
    l.setConfidence(0.9d);
    l.addToIndexes(jCas);

    final Relation related = new Relation(jCas);
    related.setBegin(36);
    related.setEnd(49);
    related.setValue("is related to");
    related.setRelationshipType(RELATED_TYPE);
    related.setSource(js);
    related.setTarget(jd);
    related.addToIndexes(jCas);

    final Relation lives = new Relation(jCas);
    lives.setBegin(63);
    lives.setEnd(71);
    lives.setValue("lives at");
    lives.setRelationshipType(LIVES_TYPE);
    lives.setSource(js);
    lives.setTarget(l);
    lives.addToIndexes(jCas);

    final Event event = new Event(jCas);
    event.setBegin(0);
    event.setEnd(10);
    event.setValue("test event");
    event.setEventType(new StringArray(jCas, 1));
    event.setEventType(0, "MEETING");
    event.setEntities(new FSArray(jCas, 2));
    event.setEntities(0, js);
    event.setEntities(1, jd);
    event.setArguments(new StringArray(jCas, 2));
    event.setArguments(0, "argument");
    event.setArguments(1, "Other");
    event.addToIndexes(jCas);
  }