Java Code Examples for org.apache.uima.fit.util.JCasUtil#indexCovering()
The following examples show how to use
org.apache.uima.fit.util.JCasUtil#indexCovering() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: DictionariesExtractor.java From ambiverse-nlu with Apache License 2.0 | 6 votes |
public Set<Feature> extract(JCas jcas, TextClassificationTarget unit) throws TextClassificationException { if (!isTheSameDocument(jcas)) { logger.trace("Building index of covering dictionaries annotations..."); dictionaryMap = JCasUtil.indexCovering(jcas, Token.class, DictionaryFeatureAnnotation.class); } Token token = JCasUtil.selectCovered(jcas, Token.class, unit).iterator().next(); Set<String> dictionaryFeatureAnnotations = dictionaryMap.get(token) .stream() .map(DictionaryFeatureAnnotation::getDictionary) .collect(Collectors.toSet()); return dictionaries.get(jcas.getDocumentLanguage()) .stream() .map(d -> new Feature(d, dictionaryFeatureAnnotations.contains(d))) .collect(Collectors.toSet()); }
Example 2
Source File: Coreference.java From baleen with Apache License 2.0 | 6 votes |
@Override protected void write(JCas jCas) { final String source = getDocumentAnnotation(jCas).getSourceUri(); // For each entity we need to find all the other sentences they are contained in // This should be all entities and sentences final Map<Entity, List<Sentence>> coveringSentence = JCasUtil.indexCovering(jCas, Entity.class, Sentence.class); final Map<Sentence, List<Entity>> coveredEntities = JCasUtil.indexCovered(jCas, Sentence.class, Entity.class); final Map<Sentence, List<WordToken>> coveredTokens = JCasUtil.indexCovered(jCas, Sentence.class, WordToken.class); final Map<WordToken, List<Entity>> coveringEntity = JCasUtil.indexCovering(jCas, WordToken.class, Entity.class); JCasUtil.select(jCas, Entity.class).stream() .map( e -> convertEntityToRow( source, coveringSentence, coveredEntities, coveredTokens, coveringEntity, e)) .filter(s -> s.length > 0) .forEach(this::write); }
Example 3
Source File: ProperNounInformationCollector.java From baleen with Apache License 2.0 | 6 votes |
@Override public <T extends Entity> Set<EntityInformation<T>> getEntityInformation( JCas jCas, Class<T> clazz) { Multimap<ReferenceTarget, T> map = ReferentUtils.createReferentMap(jCas, clazz); Map<T, List<Sentence>> index = JCasUtil.indexCovering(jCas, clazz, Sentence.class); Map<T, List<WordToken>> tokens = JCasUtil.indexCovered(jCas, clazz, WordToken.class); Set<EntityInformation<T>> infos = new HashSet<>(); for (Map.Entry<ReferenceTarget, Collection<T>> entry : map.asMap().entrySet()) { Collection<Sentence> sentences = entry.getValue().stream().flatMap(m -> index.get(m).stream()).collect(Collectors.toSet()); List<T> properNouns = entry.getValue().stream() .filter( e -> tokens.get(e).stream() .map(WordToken::getPartOfSpeech) .anyMatch("NNP"::equals)) .collect(toList()); infos.add(new EntityInformation<T>(entry.getKey(), properNouns, sentences)); } return infos; }
Example 4
Source File: JCasInformationCollector.java From baleen with Apache License 2.0 | 6 votes |
@Override public <T extends Entity> Set<EntityInformation<T>> getEntityInformation( JCas jCas, Class<T> clazz) { Multimap<ReferenceTarget, T> map = ReferentUtils.createReferentMap(jCas, clazz); Map<T, List<Sentence>> index = JCasUtil.indexCovering(jCas, clazz, Sentence.class); Set<EntityInformation<T>> infos = new HashSet<>(); for (Map.Entry<ReferenceTarget, Collection<T>> entry : map.asMap().entrySet()) { Collection<Sentence> sentences = entry.getValue().stream().flatMap(m -> index.get(m).stream()).collect(Collectors.toSet()); infos.add(new EntityInformation<T>(entry.getKey(), entry.getValue(), sentences)); } return infos; }
Example 5
Source File: RemoveInteractionInEntities.java From baleen with Apache License 2.0 | 5 votes |
@Override protected void doProcess(JCas jCas) throws AnalysisEngineProcessException { Map<Interaction, List<Entity>> covering = JCasUtil.indexCovering(jCas, Interaction.class, Entity.class); removeFromJCasIndex(covering.keySet()); }
Example 6
Source File: PartOfSpeechRelationshipAnnotator.java From baleen with Apache License 2.0 | 5 votes |
@Override protected void extract(JCas jCas) throws AnalysisEngineProcessException { Map<WordToken, List<Entity>> coveredEntities = JCasUtil.indexCovering(jCas, WordToken.class, Entity.class); Map<Sentence, List<WordToken>> sentences = JCasUtil.indexCovered(jCas, Sentence.class, WordToken.class); sentences.forEach((s, tokens) -> processSentence(jCas, s, sort(tokens), coveredEntities)); }
Example 7
Source File: UbmreConstituent.java From baleen with Apache License 2.0 | 5 votes |
@Override protected void preExtract(JCas jCas) { super.preExtract(jCas); parseTree = ParseTree.build(jCas); interactionCoveringTokens = JCasUtil.indexCovering(jCas, Interaction.class, WordToken.class); }
Example 8
Source File: TextBlocks.java From baleen with Apache License 2.0 | 5 votes |
@Override protected void doProcess(final JCas jCas) throws AnalysisEngineProcessException { final Collection<Structure> structures = JCasUtil.select(jCas, Structure.class); if (structures.isEmpty()) { // If the jCas has no structural annotations then the entire text should be marked as a text // block final int end = jCas.getDocumentText().length(); final Text t = new Text(jCas, 0, end); addToJCasIndex(t); } else { // Otherwise add the types we want... structures.stream() .filter(s -> structuralClasses.contains(s.getClass())) .map(s -> new Text(jCas, s.getBegin(), s.getEnd())) .forEach(this::addToJCasIndex); // Now remove any that cover others, so we keep only biggest/most detailed as per request final Map<Text, List<Text>> cover; if (keepSmallest) { cover = JCasUtil.indexCovering(jCas, Text.class, Text.class); } else { cover = JCasUtil.indexCovered(jCas, Text.class, Text.class); } cover.forEach( (t, c) -> c.remove(t)); // Remove where x has been pulled out as covering itself (potential bug // introduced in UIMAfit 2.3.0) cover.values().stream().flatMap(Collection::stream).forEach(this::removeFromJCasIndex); } }
Example 9
Source File: DocumentFactory.java From baleen with Apache License 2.0 | 5 votes |
/** * Construct the document factory for the given jCas and {@link SentenceFactory} * * @param jCas to base the document on * @param sentenceFactory to use */ public DocumentFactory(JCas jCas, SentenceFactory sentenceFactory) { this( jCas, JCasUtil.indexCovering(jCas, Entity.class, Sentence.class), ReferentUtils.createReferentMap(jCas, Entity.class), sentenceFactory); }
Example 10
Source File: SentenceFactory.java From baleen with Apache License 2.0 | 5 votes |
/** * Construct the sentence factory for the given jCas. * * @param jCas to create senteces from */ public SentenceFactory(JCas jCas) { this( JCasUtil.indexCovered(jCas, Sentence.class, WordToken.class), JCasUtil.indexCovering(jCas, WordToken.class, Entity.class), JCasUtil.indexCovering(jCas, WordToken.class, PhraseChunk.class), JCasUtil.indexCovered(jCas, Sentence.class, Dependency.class)); }
Example 11
Source File: CsvEvent.java From baleen with Apache License 2.0 | 5 votes |
@Override protected void write(JCas jCas) { final String source = getDocumentAnnotation(jCas).getSourceUri(); final Map<Event, List<Sentence>> coveringSentence = JCasUtil.indexCovering(jCas, Event.class, Sentence.class); JCasUtil.select(jCas, Event.class).stream() .map(e -> extracted(source, coveringSentence, e)) .filter(s -> s.length > 0) .forEach(this::write); }
Example 12
Source File: CsvRelation.java From baleen with Apache License 2.0 | 5 votes |
@Override protected void write(JCas jCas) { final String source = getDocumentAnnotation(jCas).getSourceUri(); final Map<Relation, List<Sentence>> coveringSentence = JCasUtil.indexCovering(jCas, Relation.class, Sentence.class); JCasUtil.select(jCas, Relation.class).stream() .map( r -> { String sentence = ""; final List<Sentence> sentences = coveringSentence.get(r); if (!sentences.isEmpty()) { sentence = sentences.iterator().next().getCoveredText(); } return new String[] { source, sentence, r.getRelationshipType(), r.getRelationSubType(), normalize(r.getSource().getValue()), normalize(r.getTarget().getValue()), normalize(r.getSource().getCoveredText()), normalize(r.getTarget().getCoveredText()), r.getSource().getType().getShortName(), r.getTarget().getType().getShortName(), Double.toString(r.getConfidence()) }; }) .forEach(this::write); }
Example 13
Source File: ParseTree.java From baleen with Apache License 2.0 | 4 votes |
/** * Builds the tree. * * @param jCas the j cas * @return the parses the tree */ public static ParseTree build(JCas jCas) { // Build a tree phrase to phrase final Map<PhraseChunk, List<PhraseChunk>> index = JCasUtil.indexCovering(jCas, PhraseChunk.class, PhraseChunk.class); final Collection<PhraseChunk> phrases = JCasUtil.select(jCas, PhraseChunk.class); final List<ParseTreeNode> roots = new LinkedList<>(); final Map<PhraseChunk, ParseTreeNode> chunkToNode = new HashMap<>(); for (final PhraseChunk chunk : phrases) { ParseTreeNode treeNode = chunkToNode.computeIfAbsent(chunk, ParseTreeNode::new); final Collection<PhraseChunk> covering = index.get(chunk); if (covering == null || covering.isEmpty()) { // Nothing is covering this Jcas, so its a root roots.add(treeNode); } else { // This is covered, so we add the smallest one as out parent final PhraseChunk parent = findSmallest(covering); ParseTreeNode parentNode = chunkToNode.get(parent); if (parentNode == null) { parentNode = new ParseTreeNode(parent); chunkToNode.put(parent, parentNode); } treeNode.setParent(parentNode); parentNode.addChild(treeNode); } } // Add words to the tree final Map<PhraseChunk, List<WordToken>> wordIndex = JCasUtil.indexCovered(jCas, PhraseChunk.class, WordToken.class); final Map<WordToken, ParseTreeNode> wordToNode = new HashMap<>(); chunkToNode .values() .forEach( n -> { // Sort all tree nodes by sentence order n.getChildren().sort(SENTENCE_ORDER); // Get all the words which are within this chunk, and then remove those which are in // children final Collection<WordToken> allWords = wordIndex.get(n.getChunk()); if (allWords != null) { final List<WordToken> words = new ArrayList<>(allWords); // Remove the words which are covered by our children, leaving just our words if (n.hasChildren()) { n.getChildren().stream() .map(t -> wordIndex.get(t.getChunk())) .filter(Objects::nonNull) .forEach(words::removeAll); } // Add the words into the treenode n.addWords(words); words.stream().forEach(w -> wordToNode.put(w, n)); } }); // Sort roots roots.sort(SENTENCE_ORDER); return new ParseTree(roots, chunkToNode, wordToNode); }
Example 14
Source File: MongoEvents.java From baleen with Apache License 2.0 | 4 votes |
private <T extends Base> void saveEvents(String documentId, JCas jCas, Class<T> textClass) { final Map<Event, List<T>> coveringText = JCasUtil.indexCovering(jCas, Event.class, textClass); List<Document> eventDocuments = JCasUtil.select(jCas, Event.class).stream() .map( e -> { String text = coveringText.get(e).stream() .map(T::getCoveredText) .collect(Collectors.joining(" ")); // @formatter:off Document document = new Document() .append(FIELD_TEXT, text) .append(FIELD_ENTITIES, getEntityDocuments(e)) .append(FIELD_DOCUMENT_ID, documentId) .append(FIELD_TYPES, getEventTypes(e)) .append(FIELD_VALUE, e.getValue()) .append(FIELD_TOKENS, getEventTokens(e)) .append(FIELD_BEGIN, e.getBegin()) .append(FIELD_END, e.getEnd()) .append(FIELD_CONFIDENCE, e.getConfidence()); if (outputHistory) { HistoryConverter converter = new HistoryConverter( e, fields, getSupport().getDocumentHistory(jCas), getMonitor()); Map<String, Object> historyMap = converter.convert(); document.append(FIELD_HISTORY, historyMap); } return document; // @formatter:on }) .collect(Collectors.toList()); if (!eventDocuments.isEmpty()) { eventsCollection.insertMany(eventDocuments); } }
Example 15
Source File: MongoRelations.java From baleen with Apache License 2.0 | 4 votes |
private void saveRelations(String documentId, JCas jCas) { final Map<Relation, List<Sentence>> coveringSentence = JCasUtil.indexCovering(jCas, Relation.class, Sentence.class); List<Document> rels = JCasUtil.select(jCas, Relation.class).stream() .map( r -> { String sentence = coveringSentence.get(r).stream() .map(Sentence::getCoveredText) .collect(Collectors.joining(". ")); // @formatter:off return new Document() .append(fields.getExternalId(), r.getExternalId()) .append(FIELD_RELATIONSHIP_TYPE, r.getRelationshipType()) .append(FIELD_RELATIONSHIP_SUBTYPE, r.getRelationSubType()) .append(FIELD_SOURCE_VALUE, r.getSource().getValue()) .append(FIELD_SOURCE_TYPE, r.getSource().getType().getShortName()) .append(FIELD_SOURCE_TYPE_FULL, r.getSource().getType().getName()) .append(FIELD_VALUE, r.getValue()) .append(FIELD_TARGET_VALUE, r.getTarget().getValue()) .append(FIELD_TARGET_TYPE, r.getTarget().getType().getShortName()) .append(FIELD_TARGET_TYPE_FULL, r.getTarget().getType().getName()) .append(FIELD_SENTENCE, sentence) .append(FIELD_DOCUMENT_ID, documentId) .append(FIELD_SOURCE, r.getSource().getExternalId()) .append(FIELD_TARGET, r.getTarget().getExternalId()) .append(FIELD_BEGIN, r.getBegin()) .append(FIELD_END, r.getEnd()) .append(FIELD_CONFIDENCE, r.getConfidence()) .append(FIELD_SENTENCE_DISTANCE, r.getSentenceDistance()) .append(FIELD_NORMAL_SENTENCE_DISTANCE, normalize(r.getSentenceDistance())) .append(FIELD_WORD_DISTANCE, r.getWordDistance()) .append(FIELD_NORMAL_WORD_DISTANCE, normalize(r.getWordDistance())) .append(FIELD_DEPENDENCY_DISTANCE, r.getDependencyDistance()) .append( FIELD_NORMAL_DEPENDENCY_DISTANCE, normalize(r.getDependencyDistance())); // @formatter:on }) .collect(Collectors.toList()); if (!rels.isEmpty()) { relationsCollection.insertMany(rels); } }
Example 16
Source File: AbstractReNounRelationshipAnnotator.java From baleen with Apache License 2.0 | 3 votes |
@Override protected void extract(JCas jCas) throws AnalysisEngineProcessException { Map<WordToken, List<Entity>> entities = JCasUtil.indexCovering(jCas, WordToken.class, Entity.class); DependencyGraph dependencyGraph = DependencyGraph.build(jCas); patterns.get().forEach(seed -> processTree(seed, jCas, dependencyGraph, entities)); }