org.apache.uima.cas.CAS#reset

Source File: CasIOUtilsTest.java From uima-uimaj with Apache License 2.0

6 votes

private void testFormat(SerialFormat format, String fileEnding, boolean leniently) throws Exception {
  File casFile = new File("target/temp-test-output/simpleCas."+ fileEnding);
  casFile.getParentFile().mkdirs();
  FileOutputStream docOS = new FileOutputStream(casFile);
  CasIOUtils.save(cas, docOS, format);
  docOS.close();
  
  // Use a CAS initialized with the "correct" type system or with a different type system?
  CAS casToUse = leniently ? cas2 : cas;
  casToUse.reset();
  
  FileInputStream casInputStream = new FileInputStream(casFile);
  SerialFormat loadedFormat = CasIOUtils.load(casInputStream, null, casToUse, leniently ? CasLoadMode.LENIENT : CasLoadMode.DEFAULT);
  casInputStream.close();
  Assert.assertEquals(format, loadedFormat);
  assertCorrectlyLoaded(casToUse, leniently);
}

Source File: AnalysisEngine_implTest.java From uima-uimaj with Apache License 2.0

6 votes

/**
 * Auxiliary method used by testProcess()
 * 
 * @param aTaeDesc
 *          description of TextAnalysisEngine to test
 */
protected void _testProcessInner(AnalysisEngine ae, CAS tcas, ResultSpecification resultSpec,
    ResultSpecification expectedLastResultSpec) throws UIMAException {
  // create and initialize TextAnalysisEngine

  // Test each form of the process method. When TestAnnotator executes, it
  // stores in static fields the document text and the ResultSpecification.
  // We use these to make sure the information propagates correctly to the annotator.

  // process(CAS)
  //   Calls with the Result spec set to default to that of the outer annotator output capabilities
  tcas.setDocumentText("new test");
  ae.process(tcas);
  assertEquals("new test", TestAnnotator.lastDocument);
  tcas.reset();

  // process(CAS,ResultSpecification)
  tcas.setDocumentText("testing...");
  ae.process(tcas, resultSpec);
  assertEquals("testing...", TestAnnotator.lastDocument);
  assertEquals(expectedLastResultSpec, TestAnnotator.lastResultSpec);
  tcas.reset();
  ae.destroy();
}

Source File: ExampleApplication.java From uima-uimaj with Apache License 2.0

6 votes

/**
 * Processes a single XML file and prints annotations to System.out
 * 
 * @param aFile
 *          file to process
 * @param aAE
 *          Analysis Engine that will process the file
 * @param aCAS
 *          CAS that will be used to hold analysis results
 */
private static void processFile(File aFile, AnalysisEngine aAE, CAS aCAS) throws IOException,
        AnalysisEngineProcessException {
  System.out.println("Processing file " + aFile.getName());

  String document = FileUtils.file2String(aFile);
  document = document.trim();

  // put document text in CAS
  aCAS.setDocumentText(document);

  // process
  aAE.process(aCAS);

  // print annotations to System.out
  PrintAnnotations.printAnnotations(aCAS, System.out);

  // reset the CAS to prepare it for processing the next document
  aCAS.reset();
}

Source File: BratAnnotatorUtility.java From webanno with Apache License 2.0

5 votes

public static CAS clearAnnotations(CAS aCas)
    throws IOException
{
    CAS target;
    try {
        target = CasFactory.createCas((TypeSystemDescription) null);
    }
    catch (UIMAException e) {
        throw new IOException(e);
    }
    
    // Copy the CAS - basically we do this just to keep the full type system information
    CASCompleteSerializer serializer = serializeCASComplete((CASImpl) getRealCas(aCas));
    deserializeCASComplete(serializer, (CASImpl) getRealCas(target));

    // Remove all annotations from the target CAS but we keep the type system!
    target.reset();
    
    // Copy over essential information
    if (exists(aCas, getType(aCas, DocumentMetaData.class))) {
        copyDocumentMetadata(aCas, target);
    }
    else {
        WebAnnoCasUtil.createDocumentMetadata(aCas);
    }
    target.setDocumentLanguage(aCas.getDocumentLanguage()); // DKPro Core Issue 435
    target.setDocumentText(aCas.getDocumentText());
    
    // Transfer token boundaries
    for (AnnotationFS t : selectTokens(aCas)) {
        target.addFsToIndexes(createToken(target, t.getBegin(), t.getEnd()));
    }

    // Transfer sentence boundaries
    for (AnnotationFS s : selectSentences(aCas)) {
        target.addFsToIndexes(createSentence(target, s.getBegin(), s.getEnd()));
    }

    return target;
}

Source File: CasIOUtilsTest.java From uima-uimaj with Apache License 2.0

5 votes

public void testXCAS(boolean leniently) throws Exception {
  File casFile = new File("target/temp-test-output/simpleCas.xcas");
  casFile.getParentFile().mkdirs();
  try (FileOutputStream docOS = new FileOutputStream(casFile)) {
    CasIOUtils.save(cas, docOS, SerialFormat.XCAS);
  }
  
  // Use a CAS initialized with the "correct" type system or with a different type system?
  CAS casToUse = leniently ? cas2 : cas;
  
  casToUse.reset();
  CasIOUtils.load(casFile.toURI().toURL(), null, casToUse, leniently ? CasLoadMode.LENIENT : CasLoadMode.DEFAULT);
  assertCorrectlyLoaded(casToUse, leniently);
}

Source File: AnalysisEnginePoolTest.java From uima-uimaj with Apache License 2.0

5 votes

/**
 * Auxilliary method used by testProcess()
 * 
 * @param aTaeDesc
 *          description of TextAnalysisEngine to test
 */
protected void _testProcess(AnalysisEnginePool aPool, int i)
        throws UIMAException {
  AnalysisEngine tae = aPool.getAnalysisEngine(0);
  try {
    // Test each form of the process method. When TestAnnotator executes, it
    // stores in static fields the document text and the ResultSpecification.
    // We use thse to make sure the information propogates correctly to the annotator.

    // process(CAS)
    CAS tcas = tae.newCAS();
    mLastTypeSystem = tcas.getTypeSystem();
    tcas.setDocumentText("new test");
    tae.process(tcas);
    tcas.reset();

    // process(CAS,ResultSpecification)
    ResultSpecification resultSpec = new ResultSpecification_impl(tcas.getTypeSystem());
    resultSpec.addResultType("NamedEntity", true);

    tcas.setDocumentText("testing...");
    tae.process(tcas, resultSpec);
    tcas.reset();
  } finally {
    aPool.releaseAnalysisEngine(tae);
  }
}

Source File: BinaryCasSerDesPerformance.java From uima-uimaj with Apache License 2.0

5 votes

public void testBinaryCasDeserialization6Performance() throws Exception {
    
    File dir = new File("" /*"/au/t/data/bin-compr-6/shakespeare.txt_40_processed"*/);
    
    if (!dir.exists()) return;
    
    File typeSystemFile = new File(dir, "typesystem.xml");
    XMLInputSource in = new XMLInputSource(typeSystemFile);
    TypeSystemDescription typeSystemDescription = UIMAFramework.getXMLParser().parseTypeSystemDescription(in);
    CAS cas = CasCreationUtils.createCas(typeSystemDescription, null, null);
    
    long accumDeser = 0;
    long accumSer = 0;
    for (int i = 0; i <10; i++) {
    for (final File f : dir.listFiles()) {
      if (f.getName().equals("typesystem.xml")) {
        continue;
      }
      InputStream inputStream = new BufferedInputStream(new FileInputStream(f));
      cas.reset();
      long ist = System.nanoTime();
      Serialization.deserializeCAS(cas, inputStream);
      accumDeser += System.nanoTime() - ist;  
      
      ByteArrayOutputStream baos = new ByteArrayOutputStream(1024*512);
      ist = System.nanoTime();
      Serialization.serializeWithCompression(cas, baos, cas.getTypeSystem());
      accumSer += System.nanoTime() - ist;
//      System.out.format("Time to deserialize was %,d milliseconds, size = %d%n", 
//          (System.nanoTime() - ist) / 1000000L, ((CASImpl)cas).getHeap().getHeapSize());
    }
    }
    System.out.format("Time to deserialize all files was %,d milliseconds%n", accumDeser / 1000000); // (System.nanoTime() - startTime) / 1000000L);
    System.out.format("Time to serialize   all files was %,d milliseconds%n", accumSer / 1000000);
  }

Source File: XmiCasDeserializerTest.java From uima-uimaj with Apache License 2.0

5 votes

public void testOutOfTypeSystemDataComplexCas() throws Exception {
   // deserialize a complex XCAS
   CAS originalCas = CasCreationUtils.createCas(typeSystem, null, indexes);
   InputStream serCasStream = new FileInputStream(JUnitExtension.getFile("ExampleCas/cas.xml"));
   XCASDeserializer.deserialize(serCasStream, originalCas);
   serCasStream.close();
   
   //serialize to XMI
   String xmiStr = serialize(originalCas, null);
   
   //deserialize into a CAS with no type system
   CAS casWithNoTs = CasCreationUtils.createCas(new TypeSystemDescription_impl(),
           new TypePriorities_impl(), new FsIndexDescription[0]);
   XmiSerializationSharedData sharedData = new XmiSerializationSharedData();
   deserialize(xmiStr, casWithNoTs, sharedData, true, -1);
       
   // now reserialize including OutOfTypeSystem data
   String xmiStr2 = serialize(casWithNoTs, sharedData);
   
   //deserialize into a new CAS that has the full type system
   CAS newCas = CasCreationUtils.createCas(typeSystem, null, indexes);
   deserialize(xmiStr2, newCas, null, false, -1);
   
   //compare
   CasComparer.assertEquals(originalCas, newCas);
   
   //Test a partial type system with a missing some missing features and
   //missing "Organization" type
   File partialTypeSystemFile = JUnitExtension.getFile("ExampleCas/partialTestTypeSystem.xml");
   TypeSystemDescription partialTypeSystem = UIMAFramework.getXMLParser().parseTypeSystemDescription(
           new XMLInputSource(partialTypeSystemFile));
   CAS partialTsCas = CasCreationUtils.createCas(partialTypeSystem, null, indexes);
   XmiSerializationSharedData sharedData2 = new XmiSerializationSharedData();
   deserialize(xmiStr, partialTsCas, sharedData2, true, -1);
       
   String xmiStr3 = serialize(partialTsCas, sharedData2);
   newCas.reset();
   deserialize(xmiStr3, newCas, null, false, -1);
   CasComparer.assertEquals(originalCas, newCas);    
}

Source File: MultiprocessingAnalysisEngine_implTest.java From uima-uimaj with Apache License 2.0

5 votes

/**
 * Auxilliary method used by testProcess()
 * 
 * @param aTaeDesc
 *          description of TextAnalysisEngine to test
 * @param i
 *          thread identifier for multithreaded testing
 */
protected void _testProcess(AnalysisEngineDescription aTaeDesc, int i) throws UIMAException {
  // create and initialize MultiprocessingTextAnalysisEngine
  MultiprocessingAnalysisEngine_impl tae = new MultiprocessingAnalysisEngine_impl();
  tae.initialize(aTaeDesc, null);

  // Test each form of the process method. When TestAnnotator executes, it
  // stores in static fields the document text and the ResultSpecification.
  // We use thse to make sure the information propogates correctly to the annotator.

  // process(CAS)
  CAS tcas = tae.newCAS();
  tcas.setDocumentText("new test");
  tae.process(tcas);
  assertEquals("new test", TestAnnotator.lastDocument);
  tcas.reset();

  // process(CAS,ResultSpecification)
  ResultSpecification resultSpec = new ResultSpecification_impl(tcas.getTypeSystem());
  resultSpec.addResultType("NamedEntity", true);

  tcas.setDocumentText("testing...");
  tae.process(tcas, resultSpec);
  assertEquals("testing...", TestAnnotator.lastDocument);
  assertEquals(resultSpec, TestAnnotator.lastResultSpec);
  tcas.reset();
}

Source File: CasPool.java From uima-uimaj with Apache License 2.0

5 votes

/**
 * Checks in a CAS to the pool. This automatically calls the {@link CAS#reset()} method, to ensure
 * that when the CAS is later retrieved from the pool it will be ready to use. Also notifies other
 * Threads that may be waiting for an instance to become available.
 * 
 * Synchronized on the CAS to avoid the unnatural case where 
 * multiple threads attempt to return the same CAS to the pool
 * at the same time. 
 * 
 * @param aCas
 *          the Cas to release
 */
public void releaseCas(CAS aCas) {
  // note the pool stores references to the InitialView of each CAS
  aCas.setCurrentComponentInfo(null);  // https://issues.apache.org/jira/browse/UIMA-3655
  CAS cas = aCas.getView(CAS.NAME_DEFAULT_SOFA);

  // make sure this CAS actually belongs to this pool and is checked out
  // synchronize to avoid the same CAS being released on 2 threads
  synchronized (cas) {
    if (!mAllInstances.contains(cas) || mFreeInstances.contains(cas)) {
      UIMAFramework.getLogger(CLASS_NAME).logrb(Level.WARNING, CLASS_NAME.getName(), "releaseCas",
              LOG_RESOURCE_BUNDLE, "UIMA_return_cas_to_pool__WARNING");
    } else {
      // restore the ClassLoader and unlock the CAS, since release() can be called 
      // from within a CAS Multiplier.
      ((CASImpl)cas).restoreClassLoaderUnlockCas(); 
      
      // reset CAS
      cas.reset();
      
      // Add the CAS to the end of the free instances List
      mFreeInstances.add(cas);
      permits.release();  // should follow adding cas back to mFreeInstances
    }
  }

  // Notify any threads waiting on this object
  // not needed by UIMA Core - other users may need.
  synchronized (this) {
    notifyAll();
  }
}

Source File: CPECasPool.java From uima-uimaj with Apache License 2.0

4 votes

/**
 * Checks in a CAS to the pool. This automatically calls the {@link CAS#reset()} method, to ensure
 * that when the CAS is later retrieved from the pool it will be ready to use. Also notifies other
 * Threads that may be waiting for an instance to become available.
 * 
 * @param aCas
 *          the CAS to release
 */
public synchronized void releaseCas(CAS aCas) {
  // make sure this CAS actually belongs to this pool and is checked out
  if (!mAllInstances.contains(aCas) || mFreeInstances.contains(aCas)) {
    if (UIMAFramework.getLogger().isLoggable(Level.WARNING)) {
      UIMAFramework.getLogger(this.getClass()).logrb(Level.WARNING, this.getClass().getName(),
              "process", CPMUtils.CPM_LOG_RESOURCE_BUNDLE, "UIMA_CPM_invalid_checkin__WARNING",
              new Object[] { Thread.currentThread().getName() });
    }
  } else {
    // reset CAS
    aCas.reset();
    // Add the CAS to the end of the free instances List
    mFreeInstances.add(aCas);

    // get the position of the CAS in the list.
    int index = checkedOutInstances.indexOf(aCas); // new code JC 05/11/2005
    if (index != -1) {
      checkedOutInstances.remove(index);
      if (UIMAFramework.getLogger().isLoggable(Level.FINEST)) {
        UIMAFramework.getLogger(this.getClass()).logrb(
                Level.FINEST,
                this.getClass().getName(),
                "process",
                CPMUtils.CPM_LOG_RESOURCE_BUNDLE,
                "UIMA_CPM_removed_from_checkedout_list__FINEST",
                new Object[] { Thread.currentThread().getName(),
                    String.valueOf(checkedOutInstances.size()) });
      }
    }

    if (UIMAFramework.getLogger().isLoggable(Level.FINEST)) {
      UIMAFramework.getLogger(this.getClass()).logrb(
              Level.FINEST,
              this.getClass().getName(),
              "process",
              CPMUtils.CPM_LOG_RESOURCE_BUNDLE,
              "UIMA_CPM_return_cas_to_pool__FINEST",
              new Object[] { Thread.currentThread().getName(),
                  String.valueOf(checkedOutInstances.size()) });
    }
    this.notifyAll();  // when CAS becomes available
  }

}

Source File: SimplePipeline.java From uima-uimafit with Apache License 2.0

4 votes

/**
 * <p>
 * Provides a simple way to run a pipeline for a given collection reader and sequence of analysis
 * engines. After processing all CASes provided by the reader, the method calls
 * {@link AnalysisEngine#collectionProcessComplete() collectionProcessComplete()} on the engines.
 * Note that {@link AnalysisEngine#destroy()} and {@link CollectionReader#destroy()} are
 * <b>NOT</b> called. As the components were instantiated by the caller, they must also be managed
 * (i.e. destroyed) the caller.
 * </p>
 * <p>
 * External resources can only be shared between the reader and/or the analysis engines if the
 * reader/engines have been previously instantiated using a shared resource manager.
 * </p>
 * 
 * @param aResMgr
 *          a resource manager. Normally the same one used by the collection reader and analysis
 *          engines.
 * @param reader
 *          a collection reader
 * @param engines
 *          a sequence of analysis engines
 * @throws IOException
 *           if there is an I/O problem in the reader
 * @throws ResourceInitializationException 
 *           if there is a problem initializing or running the pipeline.
 * @throws CollectionException 
 *           if there is a problem initializing or running the pipeline.
 * @throws AnalysisEngineProcessException 
 *           if there is a problem initializing or running the pipeline.
 */
public static void runPipeline(final ResourceManager aResMgr, final CollectionReader reader,
        final AnalysisEngine... engines) throws IOException, ResourceInitializationException,
        AnalysisEngineProcessException, CollectionException {
  final List<ResourceMetaData> metaData = new ArrayList<ResourceMetaData>();
  metaData.add(reader.getMetaData());
  for (AnalysisEngine engine : engines) {
    metaData.add(engine.getMetaData());
  }

  final CAS cas = CasCreationUtils.createCas(metaData, null, aResMgr);
  reader.typeSystemInit(cas.getTypeSystem());

  while (reader.hasNext()) {
    reader.getNext(cas);
    runPipeline(cas, engines);
    cas.reset();
  }

  collectionProcessComplete(engines);
}

Source File: XCASDeserializerTest.java From uima-uimaj with Apache License 2.0

4 votes

public void testMultipleSofas() throws Exception {
    /*************************************************
     * Make CAS with 2 sofas, initial and OtherSofa  *
     *                                               *
     * Add instance of TOP and index in both views   *
     *                                               *
     * Serialize to string "xml"                     *
     *                                               *
     * Deserialize from string                       *
     *************************************************/
    CAS cas = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(), indexes);
    // set document text for the initial view
    cas.setDocumentText("This is a test");
    // create a new view and set its document text
    CAS cas2 = cas.createView("OtherSofa");
    cas2.setDocumentText("This is only a test");

    // Change this test to create an instance of TOP because you cannot add an annotation to other than 
    //   the view it is created in. https://issues.apache.org/jira/browse/UIMA-4099
    // create a TOP and add to index of both views
    Type topType = cas.getTypeSystem().getTopType();
    FeatureStructure aTOP = cas.createFS(topType);
    cas.getIndexRepository().addFS(aTOP);
    cas2.getIndexRepository().addFS(aTOP); 
    FSIterator<FeatureStructure> it = cas.getIndexRepository().getAllIndexedFS(topType);
    FSIterator<FeatureStructure> it2 = cas2.getIndexRepository().getAllIndexedFS(topType);
    it.next(); it.next();
    it2.next(); it2.next(); 
    assertFalse(it.hasNext());
    assertFalse(it2.hasNext());
     
    // serialize
    StringWriter sw = new StringWriter();
    XMLSerializer xmlSer = new XMLSerializer(sw, false);
    XCASSerializer xcasSer = new XCASSerializer(cas.getTypeSystem());
    xcasSer.serialize(cas, xmlSer.getContentHandler(), true);
    String xml = sw.getBuffer().toString();

    // deserialize into another CAS (repeat twice to check it still works after reset)
    CAS newCas = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(), indexes);
    for (int i = 0; i < 2; i++) {
      XCASDeserializer newDeser = new XCASDeserializer(newCas.getTypeSystem());
      ContentHandler newDeserHandler = newDeser.getXCASHandler(newCas);
      SAXParserFactory fact = SAXParserFactory.newInstance();
      SAXParser parser = fact.newSAXParser();
      XMLReader xmlReader = parser.getXMLReader();
      xmlReader.setContentHandler(newDeserHandler);
      xmlReader.parse(new InputSource(new StringReader(xml)));

      // check sofas
      assertEquals("This is a test", newCas.getDocumentText());
      CAS newCas2 = newCas.getView("OtherSofa");
      assertEquals("This is only a test", newCas2.getDocumentText());

      // check that annotation is still indexed in both views
      it = newCas.getIndexRepository().getAllIndexedFS(topType);
      it2 = newCas2.getIndexRepository().getAllIndexedFS(topType);
      it.next(); it.next();
      it2.next(); it2.next(); 
      assertFalse(it.hasNext());
      assertFalse(it2.hasNext());
//      assertTrue(tIndex.size() == 2); // document annot and this one
//      assertTrue(t2Index.size() == 2); // ditto
      newCas.reset();  // testing if works after cas reset, go around loop 2nd time
    }
  }

Source File: XmiCasDeserializerTest.java From uima-uimaj with Apache License 2.0

4 votes

public void testOutOfTypeSystemData() throws Exception {
   // deserialize a simple XMI into a CAS with no TypeSystem    
   CAS cas = CasCreationUtils.createCas(new TypeSystemDescription_impl(),
           new TypePriorities_impl(), new FsIndexDescription[0]);
   File xmiFile = JUnitExtension.getFile("ExampleCas/simpleCas.xmi");
   String xmiStr = FileUtils.file2String(xmiFile, "UTF-8");
   
   XmiSerializationSharedData sharedData = new XmiSerializationSharedData();
   deserialize(xmiStr, cas, sharedData, true, -1);
   
   //do some checks on the out-of-type system data
   List ootsElems = sharedData.getOutOfTypeSystemElements();
   assertEquals(9, ootsElems.size());
   List ootsViewMembers = sharedData.getOutOfTypeSystemViewMembers("1");
   assertEquals(7, ootsViewMembers.size());
   
   // now reserialize including OutOfTypeSystem data
   String xmiStr2 = serialize(cas, sharedData);
   
   //deserialize both original and new XMI into CASes that do have the full typesystem
   CAS newCas1 = CasCreationUtils.createCas(typeSystem, null, indexes);
   TypeSystem ts = newCas1.getTypeSystem();
   deserialize(xmiStr, newCas1, null, false, -1);
   CAS newCas2 = CasCreationUtils.createCas(ts, null, indexes, null);
   deserialize(xmiStr2, newCas2, null, false, -1);
   CasComparer.assertEquals(newCas1, newCas2);  
   
   //Test a partial type system with a missing some missing features and
   //missing "Organization" type
   File partialTypeSystemFile = JUnitExtension.getFile("ExampleCas/partialTestTypeSystem.xml");
   TypeSystemDescription partialTypeSystem = UIMAFramework.getXMLParser().parseTypeSystemDescription(
           new XMLInputSource(partialTypeSystemFile));
   CAS partialTsCas = CasCreationUtils.createCas(partialTypeSystem, null, indexes);
   XmiSerializationSharedData sharedData2 = new XmiSerializationSharedData();
   deserialize(xmiStr, partialTsCas, sharedData2, true, -1);
   
   assertEquals(1,sharedData2.getOutOfTypeSystemElements().size());
   OotsElementData ootsFeats3 = sharedData2.getOutOfTypeSystemFeatures(sharedData2.getFsForXmiId(3));
   assertEquals(1, ootsFeats3.attributes.size());
   XmlAttribute ootsAttr = ootsFeats3.attributes.get(0);
   assertEquals("mentionType", ootsAttr.name);
   assertEquals("NAME", ootsAttr.value);
   OotsElementData ootsFeats5 = sharedData2.getOutOfTypeSystemFeatures(sharedData2.getFsForXmiId(5));
   assertEquals(0, ootsFeats5.attributes.size());
   assertEquals(1, ootsFeats5.childElements.size());
   XmlElementNameAndContents ootsChildElem = ootsFeats5.childElements.get(0);
   assertEquals("mentionType", ootsChildElem.name.qName);
   assertEquals("NAME", ootsChildElem.contents);
   
   OotsElementData ootsFeats8 = sharedData2.getOutOfTypeSystemFeatures(sharedData2.getFsForXmiId(8));
   assertEquals(1, ootsFeats8.attributes.size());
   OotsElementData ootsFeats10 = sharedData2.getOutOfTypeSystemFeatures(sharedData2.getFsForXmiId(10));
   assertEquals(1, ootsFeats10.attributes.size());
   OotsElementData ootsFeats11 = sharedData2.getOutOfTypeSystemFeatures(sharedData2.getFsForXmiId(11));
   assertEquals(4, ootsFeats11.childElements.size());
   
   String xmiStr3 = serialize(partialTsCas, sharedData2);
   newCas2.reset();
   deserialize(xmiStr3, newCas2, null, false, -1);
   CasComparer.assertEquals(newCas1, newCas2);    
}

Source File: MultiprocessingAnalysisEngine_implTest.java From uima-uimaj with Apache License 2.0

4 votes

public void run() {
  
  while (true) {
   
    if (!MultiThreadUtils.wait4go(this)) {
      break;
    }

    try {
      
      Random r = new Random();
  
      // Test each form of the process method. When TestAnnotator executes, it
      // stores in static fields the document text and the ResultSpecification.
      // We use thse to make sure the information propagates correctly to the 
      // annotator. (However, we can't check these until after the threads are
      // finished, as their state is nondeterministic during multithreaded
      // processing.)
  
      // process(CAS)
      for (int i = 0; i < 5; i++) {
        CAS tcas = mAE.newCAS();
        mLastTypeSystem = tcas.getTypeSystem();
        tcas.setDocumentText("new test");
        mAE.process(tcas);
        Thread.sleep(0, r.nextInt(1000));  // between 0 and 1 microseconds
        tcas.reset();

        // process(CAS,ResultSpecification)
        ResultSpecification resultSpec = new ResultSpecification_impl(tcas.getTypeSystem());
        resultSpec.addResultType("NamedEntity", true);

        tcas.setDocumentText("testing...");
        Thread.sleep(0, r.nextInt(1000));  // between 0 and 1 microseconds
        mAE.process(tcas, resultSpec);
        Thread.sleep(0, r.nextInt(1000));  // between 0 and 1 microseconds
        tcas.reset();
      }
    } catch (Throwable t) {
      t.printStackTrace();
      //can't cause unit test to fail by throwing exception from thread.
      //record the failure and the main thread will check for it later.
      mFailure = t;
    }
  }
}

Source File: TreeParser.java From deeplearning4j with Apache License 2.0

4 votes

/**
 * Gets trees from text.
 * First a sentence segmenter is used to segment the training examples in to sentences.
 * Sentences are then turned in to trees and returned.
 *
 * This will also process sentences with the following label format:
 * <YOURLABEL> some text </YOURLABEL>
 *
 * This will allow you to iterate on and label sentences and label spans yourself.
 *
 * @param text the text to process
 * @param labels
 * @return the list of trees
 * @throws Exception
 */
public List<Tree> getTreesWithLabels(String text, List<String> labels) throws Exception {
    CAS c = pool.getCas();
    c.setDocumentText(text);
    tokenizer.process(c);
    List<String> lowerCaseLabels = new ArrayList<>();
    for (String s : labels)
        lowerCaseLabels.add(s.toLowerCase());
    labels = lowerCaseLabels;

    List<Tree> ret = new ArrayList<>();
    CAS c2 = pool.getCas();
    for (Sentence sentence : JCasUtil.select(c.getJCas(), Sentence.class)) {
        List<String> tokens = new ArrayList<>();
        for (Token t : JCasUtil.selectCovered(Token.class, sentence))
            tokens.add(t.getCoveredText());

        Pair<String, MultiDimensionalMap<Integer, Integer, String>> stringsWithLabels =
                        ContextLabelRetriever.stringWithLabels(sentence.getCoveredText(), tf);
        c2.setDocumentText(stringsWithLabels.getFirst());



        tokenizer.process(c2);
        parser.process(c2);

        //build the tree based on this
        //damn it
        List<TopTreebankNode> nodes = new ArrayList<>(JCasUtil.select(c2.getJCas(), TopTreebankNode.class));
        if (nodes.size() > 1) {
            log.warn("More than one top level node for a treebank parse. Only accepting first input node.");
        }

        else if (nodes.isEmpty()) {
            c2.reset();
            continue;
        }


        Collection<String> labels2 = stringsWithLabels.getSecond().values();
        Set<String> diff = SetUtils.difference(labels2, labels);
        if (!diff.isEmpty()) {
            log.warn("Found invalid sentence. Skipping");
            c2.reset();
            continue;

        }

        TopTreebankNode node = nodes.get(0);
        ret.add(TreeFactory.buildTree(node, stringsWithLabels, labels));
        c2.reset();

    }

    pool.releaseCas(c);
    pool.releaseCas(c2);

    return ret;


}

Source File: TreeParser.java From deeplearning4j with Apache License 2.0

4 votes

/**
 * Gets trees from text.
 * First a sentence segmenter is used to segment the training examples in to sentences.
 * Sentences are then turned in to trees and returned.
 *
 * This will also process sentences with the following label format:
 * <YOURLABEL> some text </YOURLABEL>
 *
 * This will allow you to iterate on and label sentences and label spans yourself.
 *
 * @param text the text to process
 * @param label the label for the whole sentence
 * @param labels the possible labels for the sentence
 * @return the list of trees
 * @throws Exception
 */
public List<Tree> getTreesWithLabels(String text, String label, List<String> labels) throws Exception {
    if (text.isEmpty())
        return new ArrayList<>();
    CAS c = pool.getCas();
    c.setDocumentText("<" + label + "> " + text + " </" + label + ">");
    tokenizer.process(c);
    List<String> lowerCaseLabels = new ArrayList<>();
    for (String s : labels)
        lowerCaseLabels.add(s.toLowerCase());
    labels = lowerCaseLabels;

    List<Tree> ret = new ArrayList<>();
    CAS c2 = pool.getCas();
    for (Sentence sentence : JCasUtil.select(c.getJCas(), Sentence.class)) {
        if (sentence.getCoveredText().isEmpty())
            continue;

        List<String> tokens = new ArrayList<>();
        for (Token t : JCasUtil.selectCovered(Token.class, sentence))
            tokens.add(t.getCoveredText());

        try {
            Pair<String, MultiDimensionalMap<Integer, Integer, String>> stringsWithLabels =
                            ContextLabelRetriever.stringWithLabels(sentence.getCoveredText(), tf);
            c2.setDocumentText(stringsWithLabels.getFirst());
            tokenizer.process(c2);
            parser.process(c2);

            //build the tree based on this
            List<TopTreebankNode> nodes = new ArrayList<>(JCasUtil.select(c2.getJCas(), TopTreebankNode.class));
            if (nodes.size() > 1) {
                log.warn("More than one top level node for a treebank parse. Only accepting first input node.");
            }

            else if (nodes.isEmpty()) {
                c2.reset();
                continue;
            }



            TopTreebankNode node = nodes.get(0);
            ret.add(TreeFactory.buildTree(node, stringsWithLabels, labels));
            c2.reset();

        } catch (Exception e) {
            log.warn("Unable to parse " + sentence.getCoveredText());
            c2.reset();
            continue;
        }



    }

    pool.releaseCas(c);
    pool.releaseCas(c2);

    return ret;


}

Source File: MultiprocessingAnalysisEngine_implTest.java From uima-uimaj with Apache License 2.0

4 votes

public void run() {
      Random r = new Random();
      while (true) {

        if (!MultiThreadUtils.wait4go(this)) { // wait for go signal after all threads are setup.
          break; // time to terminate
        }
        
        try {
  
          // Test each form of the process method. When TestAnnotator executes, it
          // stores in static fields the document text and the ResultSpecification.
          // We use thse to make sure the information propogates correctly to the 
          // annotator. (However, we can't check these until after the threads are
          // finished, as their state is nondeterministic during multithreaded
          // processing.)
  
          // process(CAS)
          CAS tcas = mAE.newCAS();
//          for (int i = 0; i < 1000; i++) {  // uncomment to debug
            mLastTypeSystem = tcas.getTypeSystem();
            tcas.setDocumentText("new test");
            mAE.process(tcas);
  //          System.out.println("Debug finished processing a cas");
            if (doSleeps) 
              Thread.sleep(0, r.nextInt(1000));  // 0 to 1 microseconds
            tcas.reset();
    
            // process(CAS,ResultSpecification)
            ResultSpecification resultSpec = new ResultSpecification_impl(tcas.getTypeSystem());
            resultSpec.addResultType("NamedEntity", true);
    
            tcas.setDocumentText("testing...");
            if (doSleeps) 
              Thread.sleep(0, r.nextInt(1000));  // 0 to 1 microseconds
            mAE.process(tcas, resultSpec);
            if (doSleeps) 
              Thread.sleep(0, r.nextInt(1000));  // 0 to 1 microseconds
            tcas.reset();
//          }
        } catch (Throwable t) {
          t.printStackTrace();
          //can't cause unit test to fail by throwing exception from thread.
          //record the failure and the main thread will check for it later.
          mFailure = t;
        }
      }
    }

Source File: AnalysisEngine_implTest.java From uima-uimaj with Apache License 2.0

4 votes

public void testProcess() throws Exception {
    try {
      // test simple primitive TextAnalysisEngine (using TestAnnotator class)
      // This test should work with or without a type system description
      AnalysisEngineDescription primitiveDesc = new AnalysisEngineDescription_impl();
      primitiveDesc.setPrimitive(true);
      primitiveDesc
              .setAnnotatorImplementationName("org.apache.uima.analysis_engine.impl.TestAnnotator");
      primitiveDesc.getMetaData().setName("Test Primitive TAE");

//      TypeSystemDescription tsd = new TypeSystemDescription_impl();
//      tsd.addType("NamedEntity", "", "uima.tcas.Annotation");
//      tsd.addType("DocumentStructure", "", "uima.cas.TOP");
//      primitiveDesc.getAnalysisEngineMetaData().setTypeSystem(tsd);
      Capability cap = new Capability_impl();
      cap.addOutputType("NamedEntity", true);
      cap.addOutputType("DocumentStructure", true);
      Capability[] caps = new Capability[] {cap};
      primitiveDesc.getAnalysisEngineMetaData().setCapabilities(caps);
      _testProcess(primitiveDesc);

      primitiveDesc = new AnalysisEngineDescription_impl();
      primitiveDesc.setPrimitive(true);
      primitiveDesc
              .setAnnotatorImplementationName("org.apache.uima.analysis_engine.impl.TestAnnotator");
      primitiveDesc.getMetaData().setName("Test Primitive TAE");

      TypeSystemDescription tsd = new TypeSystemDescription_impl();
      tsd.addType("NamedEntity", "", "uima.tcas.Annotation");
      tsd.addType("DocumentStructure", "", "uima.cas.TOP");
      primitiveDesc.getAnalysisEngineMetaData().setTypeSystem(tsd);
      cap = new Capability_impl();
      cap.addOutputType("NamedEntity", true);
      cap.addOutputType("DocumentStructure", true);
      caps = new Capability[] {cap};
      primitiveDesc.getAnalysisEngineMetaData().setCapabilities(caps);
      _testProcess(primitiveDesc);

      // test simple aggregate TextAnalysisEngine (again using TestAnnotator class)
      AnalysisEngineDescription aggDesc = new AnalysisEngineDescription_impl();
      aggDesc.setPrimitive(false);
      aggDesc.getMetaData().setName("Test Aggregate TAE");
      aggDesc.getDelegateAnalysisEngineSpecifiersWithImports().put("Test", primitiveDesc);
      FixedFlow_impl flow = new FixedFlow_impl();
      flow.setFixedFlow(new String[] { "Test" });
      aggDesc.getAnalysisEngineMetaData().setFlowConstraints(flow);
      aggDesc.getAnalysisEngineMetaData().setCapabilities(caps);
      _testProcess(aggDesc);

      // test aggregate TAE containing a CAS Consumer
      File outFile = JUnitExtension.getFile("CpmOutput.txt");
      if(outFile != null && outFile.exists()) {
        //outFile.delete() //can't be relied upon.  Instead set file to zero length.
        FileOutputStream fos = new FileOutputStream(outFile, false);
        fos.close();
        assertEquals(0,outFile.length());
      }

      AnalysisEngineDescription aggWithCcDesc = UIMAFramework.getXMLParser().parseAnalysisEngineDescription(
              new XMLInputSource(JUnitExtension
                      .getFile("TextAnalysisEngineImplTest/AggregateTaeWithCasConsumer.xml")));
      
      _testProcess(aggWithCcDesc, new String[] {"en"});      
      // test that CAS Consumer ran
      if (null == outFile) {
        outFile = JUnitExtension.getFile("CpmOutput.txt");
      }
      assertTrue(outFile != null && outFile.exists());
      assertTrue(outFile.length() > 0);
      outFile.delete();
      
      //test aggregate that uses ParallelStep
      AnalysisEngineDescription desc = UIMAFramework.getXMLParser().parseAnalysisEngineDescription(
        new XMLInputSource(JUnitExtension.getFile("TextAnalysisEngineImplTest/AggregateForParallelStepTest.xml")));
      AnalysisEngine ae = UIMAFramework.produceAnalysisEngine(desc);
      CAS cas = ae.newCAS();
      cas.setDocumentText("new test");
      ae.process(cas);
      assertEquals("new test", TestAnnotator.lastDocument);
      assertEquals("new test", TestAnnotator2.lastDocument);
      cas.reset();
      
    } catch (Exception e) {
      JUnitExtension.handleException(e);
    }
  }

Source File: SimplePipeline.java From uima-uimafit with Apache License 2.0

3 votes

/**
 * <p>
 * Run the CollectionReader and AnalysisEngines as a pipeline. After processing all CASes provided
 * by the reader, the method calls the life-cycle methods
 * ({@link AnalysisEngine#collectionProcessComplete() collectionProcessComplete()} on the engines
 * and {@link Resource#destroy() destroy()}) on all engines. Note that the life-cycle methods are
 * <b>NOT</b> called on the reader. As the reader was instantiated by the caller, it must also be
 * managed (i.e. destroyed) the caller.
 * </p>
 * <p>
 * Note that with this method, external resources cannot be shared between the reader and the
 * analysis engines. They can be shared amongst the analysis engines.
 * </p>
 * <p>
 * The CAS is created using the resource manager used by the collection reader.
 * </p>
 * 
 * @param reader
 *          The CollectionReader that loads the documents into the CAS.
 * @param descs
 *          Primitive AnalysisEngineDescriptions that process the CAS, in order. If you have a mix
 *          of primitive and aggregate engines, then please create the AnalysisEngines yourself
 *          and call the other runPipeline method.
 * @throws IOException
 *           if there is an I/O problem in the reader
 * @throws ResourceInitializationException 
 *           if there is a problem initializing or running the pipeline.
 * @throws CollectionException 
 *           if there is a problem initializing or running the pipeline.
 * @throws AnalysisEngineProcessException 
 *           if there is a problem initializing or running the pipeline.
 */
public static void runPipeline(final CollectionReader reader,
        final AnalysisEngineDescription... descs) throws IOException,
        ResourceInitializationException, AnalysisEngineProcessException, CollectionException {
  AnalysisEngine aae = null;
  try {
    // Create AAE
    final AnalysisEngineDescription aaeDesc = createEngineDescription(descs);

    // Instantiate AAE
    aae = createEngine(aaeDesc);

    // Create CAS from merged metadata
    final CAS cas = CasCreationUtils.createCas(asList(reader.getMetaData(), aae.getMetaData()), 
            null, reader.getResourceManager());
    reader.typeSystemInit(cas.getTypeSystem());

    // Process
    while (reader.hasNext()) {
      reader.getNext(cas);
      aae.process(cas);
      cas.reset();
    }

    // Signal end of processing
    aae.collectionProcessComplete();
  } finally {
    // Destroy
    LifeCycleUtil.destroy(aae);
  }
}

Java Code Examples for org.apache.uima.cas.CAS#reset()