org.apache.uima.cas.text.AnnotationFS#getFeatureValueAsString

Source File: DataMajorityNerRecommender.java From inception with Apache License 2.0

6 votes

private List<Annotation> extractAnnotations(List<CAS> aCasses)
{
    List<Annotation> annotations = new ArrayList<>();

    for (CAS cas : aCasses) {
        Type annotationType = CasUtil.getType(cas, layerName);
        Feature predictedFeature = annotationType.getFeatureByBaseName(featureName);

        for (AnnotationFS ann : CasUtil.select(cas, annotationType)) {
            String label = ann.getFeatureValueAsString(predictedFeature);
            if (isNotEmpty(label)) {
                annotations.add(new Annotation(label, ann.getBegin(), ann.getEnd()));
            }
        }
    }

    return annotations;
}

Source File: StringMatchingRecommender.java From inception with Apache License 2.0

5 votes

private List<Sample> extractData(List<CAS> aCasses, String aLayerName, String aFeatureName)
{
    long start = System.currentTimeMillis();
    
    List<Sample> data = new ArrayList<>();
    
    int docNo = 0;
    for (CAS cas : aCasses) {
        Type sentenceType = getType(cas, Sentence.class);
        Type tokenType = getType(cas, Token.class);
        Type annotationType = getType(cas, aLayerName);
        Feature predictedFeature = annotationType.getFeatureByBaseName(aFeatureName);
        
        for (AnnotationFS sentence : select(cas, sentenceType)) {
            List<Span> spans = new ArrayList<>();
            
            for (AnnotationFS annotation : selectCovered(annotationType, sentence)) {
                String label = annotation.getFeatureValueAsString(predictedFeature);
                if (isNotEmpty(label)) {
                    spans.add(new Span(annotation.getBegin(), annotation.getEnd(),
                            annotation.getCoveredText(),
                            annotation.getFeatureValueAsString(predictedFeature), -1.0));
                }
            }
            
            Collection<AnnotationFS> tokens = selectCovered(tokenType, sentence);

            data.add(new Sample(docNo, cas.getDocumentText(), tokens, spans));
        }
        
        docNo++;
    }
    
    log.trace("Extracting data took {}ms", System.currentTimeMillis() - start);
    
    return data;
}

Source File: OpenNlpDoccatRecommender.java From inception with Apache License 2.0

5 votes

private List<DocumentSample> extractSamples(List<CAS> aCasses)
{
    List<DocumentSample> samples = new ArrayList<>();
    casses: for (CAS cas : aCasses) {
        Type sentenceType = getType(cas, Sentence.class);
        Type tokenType = getType(cas, Token.class);

        Map<AnnotationFS, List<AnnotationFS>> sentences = indexCovered(
                cas, sentenceType, tokenType);
        for (Entry<AnnotationFS, List<AnnotationFS>> e : sentences.entrySet()) {
            AnnotationFS sentence = e.getKey();
            Collection<AnnotationFS> tokens = e.getValue();
            String[] tokenTexts = tokens.stream()
                .map(AnnotationFS::getCoveredText)
                .toArray(String[]::new);
            
            Type annotationType = getType(cas, layerName);
            Feature feature = annotationType.getFeatureByBaseName(featureName);
            
            for (AnnotationFS annotation : selectCovered(annotationType, sentence)) {
                if (samples.size() >= traits.getTrainingSetSizeLimit()) {
                    break casses;
                }
                
                String label = annotation.getFeatureValueAsString(feature);
                DocumentSample nameSample = new DocumentSample(
                        label != null ? label : NO_CATEGORY, tokenTexts);
                if (nameSample.getCategory() != null) {
                    samples.add(nameSample);
                }
            }
        }
    }
    
    return samples;
}

Source File: AutomationUtil.java From webanno with Apache License 2.0

5 votes

private static boolean isSamAnno(Type aType, AnnotationFS aMFs, AnnotationFS aFs)
{
    for (Feature f : aType.getFeatures()) {
        // anywhere is ok
        if (f.getName().equals(CAS.FEATURE_FULL_NAME_BEGIN)) {
            continue;
        }
        // anywhere is ok
        if (f.getName().equals(CAS.FEATURE_FULL_NAME_END)) {
            continue;
        }
        if (!f.getRange().isPrimitive() && aMFs.getFeatureValue(f) instanceof SofaFS) {
            continue;
        }
        // do not attach relation on empty span annotations
        if (aMFs.getFeatureValueAsString(f) == null) {
            continue;
        }
        if (aFs.getFeatureValueAsString(f) == null) {
            continue;
        }
        if (!aMFs.getFeatureValueAsString(f).equals(aFs.getFeatureValueAsString(f))) {
            return false;
        }
    }
    return true;
}

Source File: OpenNlpNerRecommender.java From inception with Apache License 2.0

4 votes

private Span[] extractAnnotatedSpans(CAS aCas, AnnotationFS aSentence,
                                     Collection<AnnotationFS> aTokens) {
    // Convert character offsets to token indices
    Int2ObjectMap<AnnotationFS> idxTokenOffset = new Int2ObjectOpenHashMap<>();
    Object2IntMap<AnnotationFS> idxToken = new Object2IntOpenHashMap<>();
    int idx = 0;
    for (AnnotationFS t : aTokens) {
        idxTokenOffset.put(t.getBegin(), t);
        idxTokenOffset.put(t.getEnd(), t);
        idxToken.put(t, idx);
        idx++;
    }

    // Create spans from target annotations
    Type annotationType = getType(aCas, layerName);
    Feature feature = annotationType.getFeatureByBaseName(featureName);
    List<AnnotationFS> annotations = selectCovered(annotationType, aSentence);
    int numberOfAnnotations = annotations.size();
    List<Span> result = new ArrayList<>();

    int highestEndTokenPositionObserved = 0;
    for (int i = 0; i < numberOfAnnotations; i++) {
        AnnotationFS annotation = annotations.get(i);
        String label = annotation.getFeatureValueAsString(feature);
        
        AnnotationFS beginToken = idxTokenOffset.get(annotation.getBegin());
        AnnotationFS endToken = idxTokenOffset.get(annotation.getEnd());
        if (beginToken == null || endToken == null) {
            LOG.warn("Skipping annotation not starting/ending at token boundaries: [{}-{}, {}]",
                    annotation.getBegin(), annotation.getEnd(), label);
            continue;
        }
        
        int begin = idxToken.get(beginToken);
        int end = idxToken.get(endToken);
        
        // If the begin offset of the current annotation is lower than the highest offset so far
        // observed, then it is overlapping with some annotation that we have seen before. 
        // Because OpenNLP NER does not support overlapping annotations, we skip it.
        if (begin < highestEndTokenPositionObserved) {
            LOG.debug("Skipping overlapping annotation: [{}-{}, {}]", begin, end + 1, label);
            continue;
        }
        
        if (isNotBlank(label)) {
            result.add(new Span(begin, end + 1, label));
            highestEndTokenPositionObserved = end + 1;
        }
    }
    return result.toArray(new Span[result.size()]);
}

Source File: DL4JSequenceRecommender.java From inception with Apache License 2.0

4 votes

public List<String> extractTokenLabels(List<AnnotationFS> aTokens,
        List<AnnotationFS> aLabels)
{
    Type annotationType = getType(aTokens.get(0).getCAS(), layerName);
    Feature feature = annotationType.getFeatureByBaseName(featureName);
    
    String[] labels = new String[aTokens.size()];
    int tokenIdx = 0;
    int labelIdx = 0;
    
    boolean seenBeginMatch = false;
    boolean seenEndMatch = false;
    int maxOffset = -1;

    // This loop assumes that labels start and end at token offsets. Labels that span over
    // multiple tokens are supported as well.
    while (tokenIdx < aTokens.size() && labelIdx < aLabels.size()) {
        AnnotationFS token = aTokens.get(tokenIdx);
        AnnotationFS label = aLabels.get(labelIdx);
        
        if (Math.min(label.getBegin(), label.getEnd()) < maxOffset) {
            throw new IllegalArgumentException("Overlapping labels are not supported!");
        }
        
        // Check if we have seen the begin/end of the label matching a token boundary
        seenBeginMatch |= label.getBegin() == token.getBegin();
        seenEndMatch |= label.getEnd() == token.getEnd();
        
        // First step: collect the label
        if (label.getBegin() <= token.getBegin() && token.getEnd() <= label.getEnd()) {
            String value = label.getFeatureValueAsString(feature);
            labels[tokenIdx] = StringUtils.defaultIfEmpty(value, NO_LABEL);
        }
        else {
            labels[tokenIdx] = NO_LABEL;
        }
        
        // Second step: move to next label (if necessary)
        if (label.getEnd() <= token.getEnd()) {
            labelIdx++;
            
            if (!seenBeginMatch || !seenEndMatch) {
                throw new IllegalArgumentException("Labels must start/end at token boundaries!");
            }
            
            seenBeginMatch = false;
            seenEndMatch = false;
            maxOffset = Math.max(label.getBegin(), label.getEnd());
        }
            
        // In any case, we move to the next token
        tokenIdx++;
    }
    
    if (labelIdx < aLabels.size()) {
        throw new IllegalArgumentException("Overlapping labels are not supported!");
    }
    
    // If we ran out of labels before seeing all tokens, set the label for the remaining 
    // tokens here.
    while (tokenIdx < aTokens.size()) {
        labels[tokenIdx] = NO_LABEL;
        tokenIdx++;
    }
    
    return asList(labels);
}

Source File: WebannoTsv2Writer.java From webanno with Apache License 2.0

4 votes

private void setTokenAnnos(CAS aCas, Map<Integer, String> aTokenAnnoMap, Type aType,
        Feature aFeature)
{
    LowLevelCAS llCas = aCas.getLowLevelCAS();
    for (AnnotationFS annoFs : CasUtil.select(aCas, aType)) {
        boolean first = true;
        boolean previous = false; // exists previous annotation, place-holed O-_ should be kept
        for (Token token : selectCovered(Token.class, annoFs)) {
            if (annoFs.getBegin() <= token.getBegin() && annoFs.getEnd() >= token.getEnd()) {
                String annotation = annoFs.getFeatureValueAsString(aFeature);
                if (annotation == null) {
                    annotation = aType.getName() + "_";
                }
                if (aTokenAnnoMap.get(llCas.ll_getFSRef(token)) == null) {
                    if (previous) {
                        if (!multipleSpans.contains(aType.getName())) {
                            aTokenAnnoMap.put(llCas.ll_getFSRef(token), annotation);
                        }
                        else {
                            aTokenAnnoMap.put(llCas.ll_getFSRef(token), "O-_|"
                                    + (first ? "B-" : "I-") + annotation);
                            first = false;
                        }
                    }
                    else {
                        if (!multipleSpans.contains(aType.getName())) {
                            aTokenAnnoMap.put(llCas.ll_getFSRef(token), annotation);
                        }
                        else {
                            aTokenAnnoMap.put(llCas.ll_getFSRef(token), (first ? "B-" : "I-")
                                    + annotation);
                            first = false;
                        }
                    }
                }
                else {
                    if (!multipleSpans.contains(aType.getName())) {
                        aTokenAnnoMap.put(llCas.ll_getFSRef(token),
                                aTokenAnnoMap.get(llCas.ll_getFSRef(token)) + "|"
                                        + annotation);
                        previous = true;
                    }
                    else {
                        aTokenAnnoMap.put(llCas.ll_getFSRef(token),
                                aTokenAnnoMap.get(llCas.ll_getFSRef(token)) + "|"
                                        + (first ? "B-" : "I-") + annotation);
                        first = false;
                        previous = true;
                    }
                }

            }
        }
    }
}

Source File: WebannoTsv3Writer.java From webanno with Apache License 2.0

4 votes

private void setChainAnnoPerFeature(Map<AnnotationUnit, List<List<String>>> aAnnotationsPertype,
        Type aType, AnnotationFS aFs, AnnotationUnit aUnit, int aLinkNo, int achainNo,
        boolean aMultiUnit, boolean aFirst)
{
    List<String> annoPerFeatures = new ArrayList<>();
    List<Feature> features = aType.getFeatures();
    Collections.sort(features, (a, b) -> 
            StringUtils.compare(a.getShortName(), b.getShortName()));
    for (Feature feature : features) {
        if (feature.getName().equals(CAS.FEATURE_FULL_NAME_SOFA)
                || feature.getName().equals(CAS.FEATURE_FULL_NAME_BEGIN)
                || feature.getName().equals(CAS.FEATURE_FULL_NAME_END)
                || feature.getShortName().equals(GOVERNOR)
                || feature.getShortName().equals(DEPENDENT)
                || feature.getShortName().equals(FIRST)
                || feature.getShortName().equals(NEXT)) {
            continue;
        }
        String annotation = aFs.getFeatureValueAsString(feature);

        if (annotation == null) {
            annotation = "*";
        }
        else {
            annotation = replaceEscapeChars(annotation);
        }

        if (feature.getShortName().equals(REF_REL)) {
            annotation = annotation + "->" + achainNo + "-" + aLinkNo;
        }
        else if (aMultiUnit) {
            annotation = annotation + "[" + achainNo + "]";
        }
        else {
            annotation = annotation + "[" + achainNo + "]";
        }
        featurePerLayer.get(aType.getName()).add(feature.getShortName());

        annoPerFeatures.add(annotation);
    }
    aAnnotationsPertype.putIfAbsent(aUnit, new ArrayList<>());
    ambigUnits.putIfAbsent(aType.getName(), new HashMap<>());
    ambigUnits.get(aType.getName()).put(aUnit, true); // coref are always ambig

    if (annoPerFeatures.size() == 0) {
        annoPerFeatures.add("*" + "[" + achainNo + "]");
    }
    aAnnotationsPertype.get(aUnit).add(annoPerFeatures);
}

Source File: WebannoTsv3Writer.java From webanno with Apache License 2.0

4 votes

private void setRelationAnnoPerFeature(
        Map<AnnotationUnit, List<List<String>>> annotationsPertype, Type type, AnnotationFS fs,
        AnnotationUnit depUnit, AnnotationUnit govUnit, int aGovRef, int aDepRef, Type aDepType)
{
    List<String> annoPerFeatures = new ArrayList<>();
    featurePerLayer.putIfAbsent(type.getName(), new LinkedHashSet<>());
    List<Feature> features = type.getFeatures();
    Collections.sort(features, (a, b) -> 
            StringUtils.compare(a.getShortName(), b.getShortName()));
    for (Feature feature : features) {
        if (feature.getName().equals(CAS.FEATURE_FULL_NAME_SOFA)
                || feature.getName().equals(CAS.FEATURE_FULL_NAME_BEGIN)
                || feature.getName().equals(CAS.FEATURE_FULL_NAME_END)
                || feature.getShortName().equals(GOVERNOR)
                || feature.getShortName().equals(DEPENDENT)
                || feature.getShortName().equals(FIRST)
                || feature.getShortName().equals(NEXT)) {
            continue;
        }
        int ref = getRefId(type, fs, depUnit);
        String annotation = fs.getFeatureValueAsString(feature);
        if (annotation == null) {
            annotation = "*";
        }
        else {
            annotation = replaceEscapeChars(annotation);
        }
        annoPerFeatures.add(annotation);// +(ref > 0 ? "[" + ref + "]" : ""));
        featurePerLayer.get(type.getName()).add(feature.getShortName());
    }
    // add the governor and dependent unit addresses (separated by _
    String govRef = unitsLineNumber.get(govUnit)
            + ((aDepRef > 0 || aGovRef > 0) ? "[" + aGovRef + "_" + aDepRef + "]" : "");
    annoPerFeatures.add(govRef);
    featurePerLayer.get(type.getName()).add(BT + aDepType.getName());
    // the column for the dependent unit address
    annotationsPertype.putIfAbsent(depUnit, new ArrayList<>());
    if (annoPerFeatures.size() == 0) {
        annoPerFeatures.add("*");
    }
    annotationsPertype.get(depUnit).add(annoPerFeatures);
}

Java Code Examples for org.apache.uima.cas.text.AnnotationFS#getFeatureValueAsString()