org.apache.uima.jcas.tcas.Annotation#setBegin

Source File: AnnotationUtilsTest.java From baleen with Apache License 2.0

5 votes

@Test
public void testGetSingleCovered() {
  final Annotation a = new Annotation(jCas);
  a.setBegin(0);
  a.setEnd(4);

  final Optional<Annotation> single = AnnotationUtils.getSingleCovered(Annotation.class, a);
  Assert.assertEquals("012", single.get().getCoveredText());
}

Source File: AnnotationUtilsTest.java From baleen with Apache License 2.0

5 votes

@Test
public void testGetSingleCoveredMissing() {
  final Annotation a = new Annotation(jCas);
  a.setBegin(1);
  a.setEnd(12);

  final Optional<Annotation> missing = AnnotationUtils.getSingleCovered(Annotation.class, a);
  Assert.assertFalse(missing.isPresent());
}

Source File: DummyAnnotator2.java From baleen with Apache License 2.0

5 votes

@Override
public void process(JCas aJCas) throws AnalysisEngineProcessException {
  // Do nothing
  Pattern p = Pattern.compile("\\d+");
  Matcher m = p.matcher(aJCas.getDocumentText());
  while (m.find()) {
    Annotation a = new Annotation(aJCas);
    a.setBegin(m.start());
    a.setEnd(m.end());

    a.addToIndexes();
  }
}

Source File: AbbreviationsExpanderAnnotator.java From bluima with Apache License 2.0

5 votes

public static void expandAbbreviations(JCas jCas) {
    String pmId = getHeaderDocId(jCas);

    // otherwise was very slow
    Map<Abbreviation, List<Annotation>> cache = newHashMap();

    List<Abbreviation> tmp = newLinkedList(select(jCas, Abbreviation.class));
    for (Abbreviation abrev : tmp) {

        Annotation reference = abrev.getTextReference();
        if (reference != null && reference instanceof Abbreviation) {
            Abbreviation aRef = (Abbreviation) reference;

            List<Annotation> covereds;
            if (cache.containsKey(aRef))
                covereds = cache.get(aRef);
            else {
                covereds = getCovered(jCas, aRef, pmId);
                cache.put(aRef, covereds);
            }

            // copy them to the other abbreviation short-forms
            for (Annotation covered : covereds) {

                Annotation clone = (Annotation) covered.clone();
                clone.setBegin(abrev.getBegin());
                clone.setEnd(abrev.getEnd());
                clone.addToIndexes(jCas);

                if (!clone.getCoveredText().equals(aRef.getCoveredText()))
                    LOG.warn("'{}' not matching2 '{}' in " + pmId,
                            clone.getCoveredText(), aRef.getCoveredText());
            }
        }
    }
}

Source File: CASImpl.java From uima-uimaj with Apache License 2.0

5 votes

@Override
  public Annotation createAnnotation(Type type, int begin, int end) {
    // duplicates a later check
//    if (this.isBaseCas()) {
//      // Can't create annotation on base CAS
//      throw new CASRuntimeException(CASRuntimeException.INVALID_BASE_CAS_METHOD, "createAnnotation(Type, int, int)");
//    }
    Annotation fs = (Annotation) createFS(type);
    fs.setBegin(begin);
    fs.setEnd(end);
    return fs;
  }

Source File: IndexCorruptionReportingTest.java From uima-uimaj with Apache License 2.0

5 votes

public void testReport() throws Exception {
  JCas jcas = cas.getJCas();
  Annotation a = new Annotation(jcas, 0, 10);
  a.addToIndexes();
  try {
    a.setBegin(2);
  } catch (UIMARuntimeException e) {
    assertTrue(e.getMessageKey().equals(UIMARuntimeException.ILLEGAL_FS_FEAT_UPDATE));
  }
}

Source File: AnnotationUtilsTest.java From baleen with Apache License 2.0

4 votes

private void addAnnotation(final int start, final int end) {
  final Annotation a = new WordToken(jCas);
  a.setBegin(start);
  a.setEnd(end);
  a.addToIndexes();
}

Source File: MongoCollectionReader.java From bluima with Apache License 2.0

4 votes

@Override
public void getNext(JCas jCas) throws IOException, CollectionException {

    // text & id
    DBObject doc = cur.next();
    Object text = doc.get(TEXT);
    if (text != null)
        jCas.setDocumentText(doc.get(TEXT).toString());
    else
        jCas.setDocumentText("");
    Header h = new Header(jCas);
    h.setDocId(doc.get(ID).toString());
    if (doc.containsField(TITLE) && doc.get(TITLE) != null)
        h.setTitle(doc.get(TITLE).toString());
    else
        h.setTitle("");
    h.addToIndexes();

    // all other annotations, from mappings
    for (String dbListsName : doc.keySet()) {

        for (String annotClass : ALL_MAPPINGS_KEYS) {
            MongoFieldMapping fm = ALL_MAPPINGS.get(annotClass);

            if (fm.shortName.equals(dbListsName)) {

                BasicDBList dbList = (BasicDBList) doc.get(dbListsName);
                for (Object o : dbList) {
                    BasicDBObject dbO = (BasicDBObject) o;

                    try {
                        Annotation a = getAnnotationByClassName(jCas,
                                annotClass);
                        a.setBegin(dbO.getInt(BEGIN));// LATER maybe opt.
                        a.setEnd(dbO.getInt(END));

                        Type t = a.getType();
                        for (Feature f : t.getFeatures()) {
                            // System.err.println("f.short "
                            // + f.getShortName());

                            if (fm.fieldMappings.containsKey(f
                                    .getShortName())) {

                                String fieldKey = fm.fieldMappings.get(f
                                        .getShortName());
                                String range = f.getRange().getShortName();

                                MongoFieldMapping.readFieldFromDb(fieldKey,
                                        range, a, f, dbO, jCas);
                            }
                        }
                        a.addToIndexes();

                    } catch (Exception e) {
                        LOG.error("while processing docId " + doc.get(ID),
                                e);
                    }
                }
            }
        }
    }
}

Source File: FeatureStructureTest.java From uima-uimaj with Apache License 2.0

4 votes

/**
 * This test tests V2 backwards compatibility 
 * The goal is to match what V2 did for low level cas access
 * The area this is testing is the use of the LL int operations to change the type of an existing feature structure.
 */
public void testLLsetType() {
  LowLevelCAS llc = cas.getLowLevelCAS();
   FSArray fsa = new FSArray(ts.getType(CAS.TYPE_NAME_FS_ARRAY), cas, 3);
   fsa.addToIndexes();  // otherwise won't be replaced later
   NonEmptyFSList  fsl = new NonEmptyFSList(ts.getType(CAS.TYPE_NAME_NON_EMPTY_FS_LIST), cas);
   fsl.addToIndexes(); // otherwise won't be replaced later
   
   Annotation token = this.cas.createFS(tokenType);
   cas.setId2FSsMaybeUnconditionally(token);  
   
   // set up some refs; these must be updated if the type changes in a way to require a new FS
   fsa.set(0, token);   // set the 0th  element of a FS Array to point to the "token"
   fsl.setHead(token);  // set the head element of a FS List to point to the "token"
   int tokId = token._id();
   
   // set some feature values; some of these are copied (if there's room, etc.)
   TOP ttfv = cas.createFS(tokenTypeType);
   token.setFeatureValue(tokenTypeFeat, ttfv);
   token.setFloatValue(tokenFloatFeat, 1.1f);
   assertEquals(1.1f, token.getFloatValue(tokenFloatFeat));
   token.setDoubleValue(tokenDoubleFeat, 1.7d);
   assertEquals(1.7d, token.getDoubleValue(tokenDoubleFeat));
   token.setBegin(3);
   token.setEnd(5);
   
   Sofa sofa = (Sofa) token.getSofa();
   assertTrue(sofa != null);
   assertTrue(fsa.get(0) == token);
   assertTrue(fsl.getHead() == token);
   
   // change the type to just Annotation
   // because this is a supertype, it should not create a new FS
   
   llc.ll_setIntValue(tokId, 0, TypeSystemConstants.annotTypeCode);
   Annotation fs = cas.getFsFromId(tokId);
   assertTrue(fs == token);
   assertTrue(fs._id() == token._id());
   assertEquals(ts.annotType, fs._getTypeImpl());
   assertEquals(fs.getBegin(), 3);
   assertEquals(fs.getEnd(), 5);
   assertEquals(sofa, fs.getSofa());
   assertTrue(fsa.get(0) == fs);
   assertTrue(fsl.getHead() == fs);
   
   // Change Annotation back to Token type    
   
   llc.ll_setIntValue(tokId, 0, tokenType.getCode());
   token = cas.getFsFromId(tokId);
   assertTrue(fs == token);
   assertTrue(fs._id() == token._id());
   assertEquals(fs.getBegin(), 3);
   assertEquals(fs.getEnd(), 5);
   assertEquals(sofa, fs.getSofa());
   assertEquals(1.1f, token.getFloatValue(tokenFloatFeat));
   assertEquals(ttfv, token.getFeatureValue(tokenTypeFeat));
   assertTrue(fsa.get(0) == token);
   assertTrue(fsl.getHead() == token);
   
   // change type where the type forces a copy
   // token -> token_type_type
   //  These types are completely orthogonal, one doesn't subsume the other
   
   llc.ll_setIntValue(tokId,  0,  tokenTypeType.getCode());
   TOP ttt = cas.getFsFromId(tokId);
   assertTrue(ttt != token);
   assertTrue(ttt._id() == tokId);
   assertEquals(ttt._getTypeImpl(), tokenTypeType);
   assertTrue(fsa.get(0) == ttt);
   assertTrue(fsl.getHead() == ttt);
   
   
   llc.ll_setIntValue(tokId,  0,  tokenType.getCode());
   token = cas.getFsFromId(tokId);
   assertTrue(ttt != token);
   assertTrue(ttt._id() == token._id());
   assertEquals(token.getBegin(), 0);
   assertEquals(token.getEnd(), 0);
   assertEquals(sofa, token.getSofa());
   assertEquals(0.0f, token.getFloatValue(tokenFloatFeat));
   assertEquals(null, token.getFeatureValue(tokenTypeFeat));
   assertTrue(fsa.get(0) == token);
   assertTrue(fsl.getHead() == token);

}

Source File: SimpleTextMerger.java From uima-uimaj with Apache License 2.0

4 votes

public void process(JCas aJCas) throws AnalysisEngineProcessException {
  // procure a new CAS if we don't have one already
  if (mMergedCas == null) {
    mMergedCas = getEmptyJCas();
  }

  // append document text
  String docText = aJCas.getDocumentText();
  int prevDocLen = mDocBuf.length();
  mDocBuf.append(docText);

  // copy specified annotation types
  CasCopier copier = new CasCopier(aJCas.getCas(), mMergedCas.getCas());
  Set copiedIndexedFs = new HashSet(); // needed in case one annotation is in two indexes (could
  // happen if specified annotation types overlap)
  for (int i = 0; i < mAnnotationTypesToCopy.length; i++) {
    Type type = mMergedCas.getTypeSystem().getType(mAnnotationTypesToCopy[i]);
    FSIndex index = aJCas.getCas().getAnnotationIndex(type);
    Iterator iter = index.iterator();
    while (iter.hasNext()) {
      FeatureStructure fs = (FeatureStructure) iter.next();
      if (!copiedIndexedFs.contains(fs)) {
        Annotation copyOfFs = (Annotation) copier.copyFs(fs);
        // update begin and end
        copyOfFs.setBegin(copyOfFs.getBegin() + prevDocLen);
        copyOfFs.setEnd(copyOfFs.getEnd() + prevDocLen);
        mMergedCas.addFsToIndexes(copyOfFs);
        copiedIndexedFs.add(fs);
      }
    }
  }

  // get the SourceDocumentInformation FS, which indicates the sourceURI of the document
  // and whether the incoming CAS is the last segment
  FSIterator it = aJCas.getAnnotationIndex(SourceDocumentInformation.type).iterator();
  if (!it.hasNext()) {
    throw new AnalysisEngineProcessException(MESSAGE_DIGEST, MISSING_SOURCE_DOCUMENT_INFO,
            new Object[0]);
  }
  SourceDocumentInformation sourceDocInfo = (SourceDocumentInformation) it.next();
  if (sourceDocInfo.getLastSegment()) {
    // time to produce an output CAS
    // set the document text
    mMergedCas.setDocumentText(mDocBuf.toString());

    // add source document info to destination CAS
    SourceDocumentInformation destSDI = new SourceDocumentInformation(mMergedCas);
    destSDI.setUri(sourceDocInfo.getUri());
    destSDI.setOffsetInSource(0);
    destSDI.setLastSegment(true);
    destSDI.addToIndexes();

    mDocBuf = new StringBuffer();
    mReadyToOutput = true;
  }
}

Java Code Examples for org.apache.uima.jcas.tcas.Annotation#setBegin()