org.apache.uima.fit.util.JCasUtil#indexCovering

Source File: DictionariesExtractor.java From ambiverse-nlu with Apache License 2.0

6 votes

public Set<Feature> extract(JCas jcas, TextClassificationTarget unit) throws TextClassificationException {
    if (!isTheSameDocument(jcas)) {
        logger.trace("Building index of covering dictionaries annotations...");
        dictionaryMap = JCasUtil.indexCovering(jcas, Token.class, DictionaryFeatureAnnotation.class);
    }

    Token token = JCasUtil.selectCovered(jcas, Token.class, unit).iterator().next();

    Set<String> dictionaryFeatureAnnotations = dictionaryMap.get(token)
            .stream()
            .map(DictionaryFeatureAnnotation::getDictionary)
            .collect(Collectors.toSet());

    return dictionaries.get(jcas.getDocumentLanguage())
            .stream()
            .map(d -> new Feature(d, dictionaryFeatureAnnotations.contains(d)))
            .collect(Collectors.toSet());
}

Source File: Coreference.java From baleen with Apache License 2.0

6 votes

@Override
protected void write(JCas jCas) {

  final String source = getDocumentAnnotation(jCas).getSourceUri();

  // For each entity we need to find all the other sentences they are contained in

  // This should be all entities and sentences
  final Map<Entity, List<Sentence>> coveringSentence =
      JCasUtil.indexCovering(jCas, Entity.class, Sentence.class);
  final Map<Sentence, List<Entity>> coveredEntities =
      JCasUtil.indexCovered(jCas, Sentence.class, Entity.class);
  final Map<Sentence, List<WordToken>> coveredTokens =
      JCasUtil.indexCovered(jCas, Sentence.class, WordToken.class);
  final Map<WordToken, List<Entity>> coveringEntity =
      JCasUtil.indexCovering(jCas, WordToken.class, Entity.class);

  JCasUtil.select(jCas, Entity.class).stream()
      .map(
          e ->
              convertEntityToRow(
                  source, coveringSentence, coveredEntities, coveredTokens, coveringEntity, e))
      .filter(s -> s.length > 0)
      .forEach(this::write);
}

Source File: ProperNounInformationCollector.java From baleen with Apache License 2.0

6 votes

@Override
public <T extends Entity> Set<EntityInformation<T>> getEntityInformation(
    JCas jCas, Class<T> clazz) {
  Multimap<ReferenceTarget, T> map = ReferentUtils.createReferentMap(jCas, clazz);
  Map<T, List<Sentence>> index = JCasUtil.indexCovering(jCas, clazz, Sentence.class);
  Map<T, List<WordToken>> tokens = JCasUtil.indexCovered(jCas, clazz, WordToken.class);

  Set<EntityInformation<T>> infos = new HashSet<>();
  for (Map.Entry<ReferenceTarget, Collection<T>> entry : map.asMap().entrySet()) {
    Collection<Sentence> sentences =
        entry.getValue().stream().flatMap(m -> index.get(m).stream()).collect(Collectors.toSet());

    List<T> properNouns =
        entry.getValue().stream()
            .filter(
                e ->
                    tokens.get(e).stream()
                        .map(WordToken::getPartOfSpeech)
                        .anyMatch("NNP"::equals))
            .collect(toList());

    infos.add(new EntityInformation<T>(entry.getKey(), properNouns, sentences));
  }

  return infos;
}

Source File: JCasInformationCollector.java From baleen with Apache License 2.0

6 votes

@Override
public <T extends Entity> Set<EntityInformation<T>> getEntityInformation(
    JCas jCas, Class<T> clazz) {
  Multimap<ReferenceTarget, T> map = ReferentUtils.createReferentMap(jCas, clazz);
  Map<T, List<Sentence>> index = JCasUtil.indexCovering(jCas, clazz, Sentence.class);

  Set<EntityInformation<T>> infos = new HashSet<>();
  for (Map.Entry<ReferenceTarget, Collection<T>> entry : map.asMap().entrySet()) {
    Collection<Sentence> sentences =
        entry.getValue().stream().flatMap(m -> index.get(m).stream()).collect(Collectors.toSet());

    infos.add(new EntityInformation<T>(entry.getKey(), entry.getValue(), sentences));
  }

  return infos;
}

Source File: RemoveInteractionInEntities.java From baleen with Apache License 2.0

5 votes

@Override
protected void doProcess(JCas jCas) throws AnalysisEngineProcessException {
  Map<Interaction, List<Entity>> covering =
      JCasUtil.indexCovering(jCas, Interaction.class, Entity.class);

  removeFromJCasIndex(covering.keySet());
}

Source File: PartOfSpeechRelationshipAnnotator.java From baleen with Apache License 2.0

5 votes

@Override
protected void extract(JCas jCas) throws AnalysisEngineProcessException {

  Map<WordToken, List<Entity>> coveredEntities =
      JCasUtil.indexCovering(jCas, WordToken.class, Entity.class);

  Map<Sentence, List<WordToken>> sentences =
      JCasUtil.indexCovered(jCas, Sentence.class, WordToken.class);

  sentences.forEach((s, tokens) -> processSentence(jCas, s, sort(tokens), coveredEntities));
}

Source File: UbmreConstituent.java From baleen with Apache License 2.0

5 votes

@Override
protected void preExtract(JCas jCas) {
  super.preExtract(jCas);

  parseTree = ParseTree.build(jCas);

  interactionCoveringTokens = JCasUtil.indexCovering(jCas, Interaction.class, WordToken.class);
}

Source File: TextBlocks.java From baleen with Apache License 2.0

5 votes

@Override
protected void doProcess(final JCas jCas) throws AnalysisEngineProcessException {

  final Collection<Structure> structures = JCasUtil.select(jCas, Structure.class);

  if (structures.isEmpty()) {
    // If the jCas has no structural annotations then the entire text should be marked as a text
    // block

    final int end = jCas.getDocumentText().length();
    final Text t = new Text(jCas, 0, end);
    addToJCasIndex(t);

  } else {
    // Otherwise add the types we want...

    structures.stream()
        .filter(s -> structuralClasses.contains(s.getClass()))
        .map(s -> new Text(jCas, s.getBegin(), s.getEnd()))
        .forEach(this::addToJCasIndex);

    // Now remove any that cover others, so we keep only biggest/most detailed as per request
    final Map<Text, List<Text>> cover;
    if (keepSmallest) {
      cover = JCasUtil.indexCovering(jCas, Text.class, Text.class);
    } else {
      cover = JCasUtil.indexCovered(jCas, Text.class, Text.class);
    }
    cover.forEach(
        (t, c) ->
            c.remove(t)); // Remove where x has been pulled out as covering itself (potential bug
    // introduced in UIMAfit 2.3.0)
    cover.values().stream().flatMap(Collection::stream).forEach(this::removeFromJCasIndex);
  }
}

Source File: DocumentFactory.java From baleen with Apache License 2.0

5 votes

/**
 * Construct the document factory for the given jCas and {@link SentenceFactory}
 *
 * @param jCas to base the document on
 * @param sentenceFactory to use
 */
public DocumentFactory(JCas jCas, SentenceFactory sentenceFactory) {
  this(
      jCas,
      JCasUtil.indexCovering(jCas, Entity.class, Sentence.class),
      ReferentUtils.createReferentMap(jCas, Entity.class),
      sentenceFactory);
}

Source File: SentenceFactory.java From baleen with Apache License 2.0

5 votes

/**
 * Construct the sentence factory for the given jCas.
 *
 * @param jCas to create senteces from
 */
public SentenceFactory(JCas jCas) {
  this(
      JCasUtil.indexCovered(jCas, Sentence.class, WordToken.class),
      JCasUtil.indexCovering(jCas, WordToken.class, Entity.class),
      JCasUtil.indexCovering(jCas, WordToken.class, PhraseChunk.class),
      JCasUtil.indexCovered(jCas, Sentence.class, Dependency.class));
}

Source File: CsvEvent.java From baleen with Apache License 2.0

5 votes

@Override
protected void write(JCas jCas) {
  final String source = getDocumentAnnotation(jCas).getSourceUri();

  final Map<Event, List<Sentence>> coveringSentence =
      JCasUtil.indexCovering(jCas, Event.class, Sentence.class);

  JCasUtil.select(jCas, Event.class).stream()
      .map(e -> extracted(source, coveringSentence, e))
      .filter(s -> s.length > 0)
      .forEach(this::write);
}

Source File: CsvRelation.java From baleen with Apache License 2.0

5 votes

@Override
protected void write(JCas jCas) {
  final String source = getDocumentAnnotation(jCas).getSourceUri();

  final Map<Relation, List<Sentence>> coveringSentence =
      JCasUtil.indexCovering(jCas, Relation.class, Sentence.class);

  JCasUtil.select(jCas, Relation.class).stream()
      .map(
          r -> {
            String sentence = "";
            final List<Sentence> sentences = coveringSentence.get(r);
            if (!sentences.isEmpty()) {
              sentence = sentences.iterator().next().getCoveredText();
            }

            return new String[] {
              source,
              sentence,
              r.getRelationshipType(),
              r.getRelationSubType(),
              normalize(r.getSource().getValue()),
              normalize(r.getTarget().getValue()),
              normalize(r.getSource().getCoveredText()),
              normalize(r.getTarget().getCoveredText()),
              r.getSource().getType().getShortName(),
              r.getTarget().getType().getShortName(),
              Double.toString(r.getConfidence())
            };
          })
      .forEach(this::write);
}

Source File: ParseTree.java From baleen with Apache License 2.0

4 votes

/**
 * Builds the tree.
 *
 * @param jCas the j cas
 * @return the parses the tree
 */
public static ParseTree build(JCas jCas) {

  // Build a tree phrase to phrase

  final Map<PhraseChunk, List<PhraseChunk>> index =
      JCasUtil.indexCovering(jCas, PhraseChunk.class, PhraseChunk.class);

  final Collection<PhraseChunk> phrases = JCasUtil.select(jCas, PhraseChunk.class);

  final List<ParseTreeNode> roots = new LinkedList<>();
  final Map<PhraseChunk, ParseTreeNode> chunkToNode = new HashMap<>();

  for (final PhraseChunk chunk : phrases) {

    ParseTreeNode treeNode = chunkToNode.computeIfAbsent(chunk, ParseTreeNode::new);

    final Collection<PhraseChunk> covering = index.get(chunk);
    if (covering == null || covering.isEmpty()) {
      // Nothing is covering this Jcas, so its a root
      roots.add(treeNode);
    } else {
      // This is covered, so we add the smallest one as out parent
      final PhraseChunk parent = findSmallest(covering);

      ParseTreeNode parentNode = chunkToNode.get(parent);
      if (parentNode == null) {
        parentNode = new ParseTreeNode(parent);
        chunkToNode.put(parent, parentNode);
      }

      treeNode.setParent(parentNode);
      parentNode.addChild(treeNode);
    }
  }

  // Add words to the tree

  final Map<PhraseChunk, List<WordToken>> wordIndex =
      JCasUtil.indexCovered(jCas, PhraseChunk.class, WordToken.class);

  final Map<WordToken, ParseTreeNode> wordToNode = new HashMap<>();

  chunkToNode
      .values()
      .forEach(
          n -> {
            // Sort all tree nodes by sentence order
            n.getChildren().sort(SENTENCE_ORDER);

            // Get all the words which are within this chunk, and then remove those which are in
            // children
            final Collection<WordToken> allWords = wordIndex.get(n.getChunk());
            if (allWords != null) {
              final List<WordToken> words = new ArrayList<>(allWords);

              // Remove the words which are covered by our children, leaving just our words
              if (n.hasChildren()) {
                n.getChildren().stream()
                    .map(t -> wordIndex.get(t.getChunk()))
                    .filter(Objects::nonNull)
                    .forEach(words::removeAll);
              }

              // Add the words into the treenode
              n.addWords(words);
              words.stream().forEach(w -> wordToNode.put(w, n));
            }
          });

  // Sort roots

  roots.sort(SENTENCE_ORDER);

  return new ParseTree(roots, chunkToNode, wordToNode);
}

Source File: MongoEvents.java From baleen with Apache License 2.0

4 votes

private <T extends Base> void saveEvents(String documentId, JCas jCas, Class<T> textClass) {

    final Map<Event, List<T>> coveringText = JCasUtil.indexCovering(jCas, Event.class, textClass);

    List<Document> eventDocuments =
        JCasUtil.select(jCas, Event.class).stream()
            .map(
                e -> {
                  String text =
                      coveringText.get(e).stream()
                          .map(T::getCoveredText)
                          .collect(Collectors.joining(" "));

                  // @formatter:off
                  Document document =
                      new Document()
                          .append(FIELD_TEXT, text)
                          .append(FIELD_ENTITIES, getEntityDocuments(e))
                          .append(FIELD_DOCUMENT_ID, documentId)
                          .append(FIELD_TYPES, getEventTypes(e))
                          .append(FIELD_VALUE, e.getValue())
                          .append(FIELD_TOKENS, getEventTokens(e))
                          .append(FIELD_BEGIN, e.getBegin())
                          .append(FIELD_END, e.getEnd())
                          .append(FIELD_CONFIDENCE, e.getConfidence());

                  if (outputHistory) {
                    HistoryConverter converter =
                        new HistoryConverter(
                            e, fields, getSupport().getDocumentHistory(jCas), getMonitor());
                    Map<String, Object> historyMap = converter.convert();
                    document.append(FIELD_HISTORY, historyMap);
                  }

                  return document;

                  // @formatter:on
                })
            .collect(Collectors.toList());

    if (!eventDocuments.isEmpty()) {
      eventsCollection.insertMany(eventDocuments);
    }
  }

Source File: MongoRelations.java From baleen with Apache License 2.0

4 votes

private void saveRelations(String documentId, JCas jCas) {
  final Map<Relation, List<Sentence>> coveringSentence =
      JCasUtil.indexCovering(jCas, Relation.class, Sentence.class);

  List<Document> rels =
      JCasUtil.select(jCas, Relation.class).stream()
          .map(
              r -> {
                String sentence =
                    coveringSentence.get(r).stream()
                        .map(Sentence::getCoveredText)
                        .collect(Collectors.joining(". "));

                // @formatter:off
                return new Document()
                    .append(fields.getExternalId(), r.getExternalId())
                    .append(FIELD_RELATIONSHIP_TYPE, r.getRelationshipType())
                    .append(FIELD_RELATIONSHIP_SUBTYPE, r.getRelationSubType())
                    .append(FIELD_SOURCE_VALUE, r.getSource().getValue())
                    .append(FIELD_SOURCE_TYPE, r.getSource().getType().getShortName())
                    .append(FIELD_SOURCE_TYPE_FULL, r.getSource().getType().getName())
                    .append(FIELD_VALUE, r.getValue())
                    .append(FIELD_TARGET_VALUE, r.getTarget().getValue())
                    .append(FIELD_TARGET_TYPE, r.getTarget().getType().getShortName())
                    .append(FIELD_TARGET_TYPE_FULL, r.getTarget().getType().getName())
                    .append(FIELD_SENTENCE, sentence)
                    .append(FIELD_DOCUMENT_ID, documentId)
                    .append(FIELD_SOURCE, r.getSource().getExternalId())
                    .append(FIELD_TARGET, r.getTarget().getExternalId())
                    .append(FIELD_BEGIN, r.getBegin())
                    .append(FIELD_END, r.getEnd())
                    .append(FIELD_CONFIDENCE, r.getConfidence())
                    .append(FIELD_SENTENCE_DISTANCE, r.getSentenceDistance())
                    .append(FIELD_NORMAL_SENTENCE_DISTANCE, normalize(r.getSentenceDistance()))
                    .append(FIELD_WORD_DISTANCE, r.getWordDistance())
                    .append(FIELD_NORMAL_WORD_DISTANCE, normalize(r.getWordDistance()))
                    .append(FIELD_DEPENDENCY_DISTANCE, r.getDependencyDistance())
                    .append(
                        FIELD_NORMAL_DEPENDENCY_DISTANCE, normalize(r.getDependencyDistance()));
                // @formatter:on

              })
          .collect(Collectors.toList());

  if (!rels.isEmpty()) {
    relationsCollection.insertMany(rels);
  }
}

Source File: AbstractReNounRelationshipAnnotator.java From baleen with Apache License 2.0

3 votes

@Override
protected void extract(JCas jCas) throws AnalysisEngineProcessException {

  Map<WordToken, List<Entity>> entities =
      JCasUtil.indexCovering(jCas, WordToken.class, Entity.class);

  DependencyGraph dependencyGraph = DependencyGraph.build(jCas);

  patterns.get().forEach(seed -> processTree(seed, jCas, dependencyGraph, entities));
}

Java Code Examples for org.apache.uima.fit.util.JCasUtil#indexCovering()