Java Code Examples for org.apache.uima.cas.CAS#reset()
The following examples show how to use
org.apache.uima.cas.CAS#reset() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CasIOUtilsTest.java From uima-uimaj with Apache License 2.0 | 6 votes |
private void testFormat(SerialFormat format, String fileEnding, boolean leniently) throws Exception { File casFile = new File("target/temp-test-output/simpleCas."+ fileEnding); casFile.getParentFile().mkdirs(); FileOutputStream docOS = new FileOutputStream(casFile); CasIOUtils.save(cas, docOS, format); docOS.close(); // Use a CAS initialized with the "correct" type system or with a different type system? CAS casToUse = leniently ? cas2 : cas; casToUse.reset(); FileInputStream casInputStream = new FileInputStream(casFile); SerialFormat loadedFormat = CasIOUtils.load(casInputStream, null, casToUse, leniently ? CasLoadMode.LENIENT : CasLoadMode.DEFAULT); casInputStream.close(); Assert.assertEquals(format, loadedFormat); assertCorrectlyLoaded(casToUse, leniently); }
Example 2
Source File: AnalysisEngine_implTest.java From uima-uimaj with Apache License 2.0 | 6 votes |
/** * Auxiliary method used by testProcess() * * @param aTaeDesc * description of TextAnalysisEngine to test */ protected void _testProcessInner(AnalysisEngine ae, CAS tcas, ResultSpecification resultSpec, ResultSpecification expectedLastResultSpec) throws UIMAException { // create and initialize TextAnalysisEngine // Test each form of the process method. When TestAnnotator executes, it // stores in static fields the document text and the ResultSpecification. // We use these to make sure the information propagates correctly to the annotator. // process(CAS) // Calls with the Result spec set to default to that of the outer annotator output capabilities tcas.setDocumentText("new test"); ae.process(tcas); assertEquals("new test", TestAnnotator.lastDocument); tcas.reset(); // process(CAS,ResultSpecification) tcas.setDocumentText("testing..."); ae.process(tcas, resultSpec); assertEquals("testing...", TestAnnotator.lastDocument); assertEquals(expectedLastResultSpec, TestAnnotator.lastResultSpec); tcas.reset(); ae.destroy(); }
Example 3
Source File: ExampleApplication.java From uima-uimaj with Apache License 2.0 | 6 votes |
/** * Processes a single XML file and prints annotations to System.out * * @param aFile * file to process * @param aAE * Analysis Engine that will process the file * @param aCAS * CAS that will be used to hold analysis results */ private static void processFile(File aFile, AnalysisEngine aAE, CAS aCAS) throws IOException, AnalysisEngineProcessException { System.out.println("Processing file " + aFile.getName()); String document = FileUtils.file2String(aFile); document = document.trim(); // put document text in CAS aCAS.setDocumentText(document); // process aAE.process(aCAS); // print annotations to System.out PrintAnnotations.printAnnotations(aCAS, System.out); // reset the CAS to prepare it for processing the next document aCAS.reset(); }
Example 4
Source File: BratAnnotatorUtility.java From webanno with Apache License 2.0 | 5 votes |
public static CAS clearAnnotations(CAS aCas) throws IOException { CAS target; try { target = CasFactory.createCas((TypeSystemDescription) null); } catch (UIMAException e) { throw new IOException(e); } // Copy the CAS - basically we do this just to keep the full type system information CASCompleteSerializer serializer = serializeCASComplete((CASImpl) getRealCas(aCas)); deserializeCASComplete(serializer, (CASImpl) getRealCas(target)); // Remove all annotations from the target CAS but we keep the type system! target.reset(); // Copy over essential information if (exists(aCas, getType(aCas, DocumentMetaData.class))) { copyDocumentMetadata(aCas, target); } else { WebAnnoCasUtil.createDocumentMetadata(aCas); } target.setDocumentLanguage(aCas.getDocumentLanguage()); // DKPro Core Issue 435 target.setDocumentText(aCas.getDocumentText()); // Transfer token boundaries for (AnnotationFS t : selectTokens(aCas)) { target.addFsToIndexes(createToken(target, t.getBegin(), t.getEnd())); } // Transfer sentence boundaries for (AnnotationFS s : selectSentences(aCas)) { target.addFsToIndexes(createSentence(target, s.getBegin(), s.getEnd())); } return target; }
Example 5
Source File: CasIOUtilsTest.java From uima-uimaj with Apache License 2.0 | 5 votes |
public void testXCAS(boolean leniently) throws Exception { File casFile = new File("target/temp-test-output/simpleCas.xcas"); casFile.getParentFile().mkdirs(); try (FileOutputStream docOS = new FileOutputStream(casFile)) { CasIOUtils.save(cas, docOS, SerialFormat.XCAS); } // Use a CAS initialized with the "correct" type system or with a different type system? CAS casToUse = leniently ? cas2 : cas; casToUse.reset(); CasIOUtils.load(casFile.toURI().toURL(), null, casToUse, leniently ? CasLoadMode.LENIENT : CasLoadMode.DEFAULT); assertCorrectlyLoaded(casToUse, leniently); }
Example 6
Source File: AnalysisEnginePoolTest.java From uima-uimaj with Apache License 2.0 | 5 votes |
/** * Auxilliary method used by testProcess() * * @param aTaeDesc * description of TextAnalysisEngine to test */ protected void _testProcess(AnalysisEnginePool aPool, int i) throws UIMAException { AnalysisEngine tae = aPool.getAnalysisEngine(0); try { // Test each form of the process method. When TestAnnotator executes, it // stores in static fields the document text and the ResultSpecification. // We use thse to make sure the information propogates correctly to the annotator. // process(CAS) CAS tcas = tae.newCAS(); mLastTypeSystem = tcas.getTypeSystem(); tcas.setDocumentText("new test"); tae.process(tcas); tcas.reset(); // process(CAS,ResultSpecification) ResultSpecification resultSpec = new ResultSpecification_impl(tcas.getTypeSystem()); resultSpec.addResultType("NamedEntity", true); tcas.setDocumentText("testing..."); tae.process(tcas, resultSpec); tcas.reset(); } finally { aPool.releaseAnalysisEngine(tae); } }
Example 7
Source File: BinaryCasSerDesPerformance.java From uima-uimaj with Apache License 2.0 | 5 votes |
public void testBinaryCasDeserialization6Performance() throws Exception { File dir = new File("" /*"/au/t/data/bin-compr-6/shakespeare.txt_40_processed"*/); if (!dir.exists()) return; File typeSystemFile = new File(dir, "typesystem.xml"); XMLInputSource in = new XMLInputSource(typeSystemFile); TypeSystemDescription typeSystemDescription = UIMAFramework.getXMLParser().parseTypeSystemDescription(in); CAS cas = CasCreationUtils.createCas(typeSystemDescription, null, null); long accumDeser = 0; long accumSer = 0; for (int i = 0; i <10; i++) { for (final File f : dir.listFiles()) { if (f.getName().equals("typesystem.xml")) { continue; } InputStream inputStream = new BufferedInputStream(new FileInputStream(f)); cas.reset(); long ist = System.nanoTime(); Serialization.deserializeCAS(cas, inputStream); accumDeser += System.nanoTime() - ist; ByteArrayOutputStream baos = new ByteArrayOutputStream(1024*512); ist = System.nanoTime(); Serialization.serializeWithCompression(cas, baos, cas.getTypeSystem()); accumSer += System.nanoTime() - ist; // System.out.format("Time to deserialize was %,d milliseconds, size = %d%n", // (System.nanoTime() - ist) / 1000000L, ((CASImpl)cas).getHeap().getHeapSize()); } } System.out.format("Time to deserialize all files was %,d milliseconds%n", accumDeser / 1000000); // (System.nanoTime() - startTime) / 1000000L); System.out.format("Time to serialize all files was %,d milliseconds%n", accumSer / 1000000); }
Example 8
Source File: XmiCasDeserializerTest.java From uima-uimaj with Apache License 2.0 | 5 votes |
public void testOutOfTypeSystemDataComplexCas() throws Exception { // deserialize a complex XCAS CAS originalCas = CasCreationUtils.createCas(typeSystem, null, indexes); InputStream serCasStream = new FileInputStream(JUnitExtension.getFile("ExampleCas/cas.xml")); XCASDeserializer.deserialize(serCasStream, originalCas); serCasStream.close(); //serialize to XMI String xmiStr = serialize(originalCas, null); //deserialize into a CAS with no type system CAS casWithNoTs = CasCreationUtils.createCas(new TypeSystemDescription_impl(), new TypePriorities_impl(), new FsIndexDescription[0]); XmiSerializationSharedData sharedData = new XmiSerializationSharedData(); deserialize(xmiStr, casWithNoTs, sharedData, true, -1); // now reserialize including OutOfTypeSystem data String xmiStr2 = serialize(casWithNoTs, sharedData); //deserialize into a new CAS that has the full type system CAS newCas = CasCreationUtils.createCas(typeSystem, null, indexes); deserialize(xmiStr2, newCas, null, false, -1); //compare CasComparer.assertEquals(originalCas, newCas); //Test a partial type system with a missing some missing features and //missing "Organization" type File partialTypeSystemFile = JUnitExtension.getFile("ExampleCas/partialTestTypeSystem.xml"); TypeSystemDescription partialTypeSystem = UIMAFramework.getXMLParser().parseTypeSystemDescription( new XMLInputSource(partialTypeSystemFile)); CAS partialTsCas = CasCreationUtils.createCas(partialTypeSystem, null, indexes); XmiSerializationSharedData sharedData2 = new XmiSerializationSharedData(); deserialize(xmiStr, partialTsCas, sharedData2, true, -1); String xmiStr3 = serialize(partialTsCas, sharedData2); newCas.reset(); deserialize(xmiStr3, newCas, null, false, -1); CasComparer.assertEquals(originalCas, newCas); }
Example 9
Source File: MultiprocessingAnalysisEngine_implTest.java From uima-uimaj with Apache License 2.0 | 5 votes |
/** * Auxilliary method used by testProcess() * * @param aTaeDesc * description of TextAnalysisEngine to test * @param i * thread identifier for multithreaded testing */ protected void _testProcess(AnalysisEngineDescription aTaeDesc, int i) throws UIMAException { // create and initialize MultiprocessingTextAnalysisEngine MultiprocessingAnalysisEngine_impl tae = new MultiprocessingAnalysisEngine_impl(); tae.initialize(aTaeDesc, null); // Test each form of the process method. When TestAnnotator executes, it // stores in static fields the document text and the ResultSpecification. // We use thse to make sure the information propogates correctly to the annotator. // process(CAS) CAS tcas = tae.newCAS(); tcas.setDocumentText("new test"); tae.process(tcas); assertEquals("new test", TestAnnotator.lastDocument); tcas.reset(); // process(CAS,ResultSpecification) ResultSpecification resultSpec = new ResultSpecification_impl(tcas.getTypeSystem()); resultSpec.addResultType("NamedEntity", true); tcas.setDocumentText("testing..."); tae.process(tcas, resultSpec); assertEquals("testing...", TestAnnotator.lastDocument); assertEquals(resultSpec, TestAnnotator.lastResultSpec); tcas.reset(); }
Example 10
Source File: CasPool.java From uima-uimaj with Apache License 2.0 | 5 votes |
/** * Checks in a CAS to the pool. This automatically calls the {@link CAS#reset()} method, to ensure * that when the CAS is later retrieved from the pool it will be ready to use. Also notifies other * Threads that may be waiting for an instance to become available. * * Synchronized on the CAS to avoid the unnatural case where * multiple threads attempt to return the same CAS to the pool * at the same time. * * @param aCas * the Cas to release */ public void releaseCas(CAS aCas) { // note the pool stores references to the InitialView of each CAS aCas.setCurrentComponentInfo(null); // https://issues.apache.org/jira/browse/UIMA-3655 CAS cas = aCas.getView(CAS.NAME_DEFAULT_SOFA); // make sure this CAS actually belongs to this pool and is checked out // synchronize to avoid the same CAS being released on 2 threads synchronized (cas) { if (!mAllInstances.contains(cas) || mFreeInstances.contains(cas)) { UIMAFramework.getLogger(CLASS_NAME).logrb(Level.WARNING, CLASS_NAME.getName(), "releaseCas", LOG_RESOURCE_BUNDLE, "UIMA_return_cas_to_pool__WARNING"); } else { // restore the ClassLoader and unlock the CAS, since release() can be called // from within a CAS Multiplier. ((CASImpl)cas).restoreClassLoaderUnlockCas(); // reset CAS cas.reset(); // Add the CAS to the end of the free instances List mFreeInstances.add(cas); permits.release(); // should follow adding cas back to mFreeInstances } } // Notify any threads waiting on this object // not needed by UIMA Core - other users may need. synchronized (this) { notifyAll(); } }
Example 11
Source File: CPECasPool.java From uima-uimaj with Apache License 2.0 | 4 votes |
/** * Checks in a CAS to the pool. This automatically calls the {@link CAS#reset()} method, to ensure * that when the CAS is later retrieved from the pool it will be ready to use. Also notifies other * Threads that may be waiting for an instance to become available. * * @param aCas * the CAS to release */ public synchronized void releaseCas(CAS aCas) { // make sure this CAS actually belongs to this pool and is checked out if (!mAllInstances.contains(aCas) || mFreeInstances.contains(aCas)) { if (UIMAFramework.getLogger().isLoggable(Level.WARNING)) { UIMAFramework.getLogger(this.getClass()).logrb(Level.WARNING, this.getClass().getName(), "process", CPMUtils.CPM_LOG_RESOURCE_BUNDLE, "UIMA_CPM_invalid_checkin__WARNING", new Object[] { Thread.currentThread().getName() }); } } else { // reset CAS aCas.reset(); // Add the CAS to the end of the free instances List mFreeInstances.add(aCas); // get the position of the CAS in the list. int index = checkedOutInstances.indexOf(aCas); // new code JC 05/11/2005 if (index != -1) { checkedOutInstances.remove(index); if (UIMAFramework.getLogger().isLoggable(Level.FINEST)) { UIMAFramework.getLogger(this.getClass()).logrb( Level.FINEST, this.getClass().getName(), "process", CPMUtils.CPM_LOG_RESOURCE_BUNDLE, "UIMA_CPM_removed_from_checkedout_list__FINEST", new Object[] { Thread.currentThread().getName(), String.valueOf(checkedOutInstances.size()) }); } } if (UIMAFramework.getLogger().isLoggable(Level.FINEST)) { UIMAFramework.getLogger(this.getClass()).logrb( Level.FINEST, this.getClass().getName(), "process", CPMUtils.CPM_LOG_RESOURCE_BUNDLE, "UIMA_CPM_return_cas_to_pool__FINEST", new Object[] { Thread.currentThread().getName(), String.valueOf(checkedOutInstances.size()) }); } this.notifyAll(); // when CAS becomes available } }
Example 12
Source File: SimplePipeline.java From uima-uimafit with Apache License 2.0 | 4 votes |
/** * <p> * Provides a simple way to run a pipeline for a given collection reader and sequence of analysis * engines. After processing all CASes provided by the reader, the method calls * {@link AnalysisEngine#collectionProcessComplete() collectionProcessComplete()} on the engines. * Note that {@link AnalysisEngine#destroy()} and {@link CollectionReader#destroy()} are * <b>NOT</b> called. As the components were instantiated by the caller, they must also be managed * (i.e. destroyed) the caller. * </p> * <p> * External resources can only be shared between the reader and/or the analysis engines if the * reader/engines have been previously instantiated using a shared resource manager. * </p> * * @param aResMgr * a resource manager. Normally the same one used by the collection reader and analysis * engines. * @param reader * a collection reader * @param engines * a sequence of analysis engines * @throws IOException * if there is an I/O problem in the reader * @throws ResourceInitializationException * if there is a problem initializing or running the pipeline. * @throws CollectionException * if there is a problem initializing or running the pipeline. * @throws AnalysisEngineProcessException * if there is a problem initializing or running the pipeline. */ public static void runPipeline(final ResourceManager aResMgr, final CollectionReader reader, final AnalysisEngine... engines) throws IOException, ResourceInitializationException, AnalysisEngineProcessException, CollectionException { final List<ResourceMetaData> metaData = new ArrayList<ResourceMetaData>(); metaData.add(reader.getMetaData()); for (AnalysisEngine engine : engines) { metaData.add(engine.getMetaData()); } final CAS cas = CasCreationUtils.createCas(metaData, null, aResMgr); reader.typeSystemInit(cas.getTypeSystem()); while (reader.hasNext()) { reader.getNext(cas); runPipeline(cas, engines); cas.reset(); } collectionProcessComplete(engines); }
Example 13
Source File: XCASDeserializerTest.java From uima-uimaj with Apache License 2.0 | 4 votes |
public void testMultipleSofas() throws Exception { /************************************************* * Make CAS with 2 sofas, initial and OtherSofa * * * * Add instance of TOP and index in both views * * * * Serialize to string "xml" * * * * Deserialize from string * *************************************************/ CAS cas = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(), indexes); // set document text for the initial view cas.setDocumentText("This is a test"); // create a new view and set its document text CAS cas2 = cas.createView("OtherSofa"); cas2.setDocumentText("This is only a test"); // Change this test to create an instance of TOP because you cannot add an annotation to other than // the view it is created in. https://issues.apache.org/jira/browse/UIMA-4099 // create a TOP and add to index of both views Type topType = cas.getTypeSystem().getTopType(); FeatureStructure aTOP = cas.createFS(topType); cas.getIndexRepository().addFS(aTOP); cas2.getIndexRepository().addFS(aTOP); FSIterator<FeatureStructure> it = cas.getIndexRepository().getAllIndexedFS(topType); FSIterator<FeatureStructure> it2 = cas2.getIndexRepository().getAllIndexedFS(topType); it.next(); it.next(); it2.next(); it2.next(); assertFalse(it.hasNext()); assertFalse(it2.hasNext()); // serialize StringWriter sw = new StringWriter(); XMLSerializer xmlSer = new XMLSerializer(sw, false); XCASSerializer xcasSer = new XCASSerializer(cas.getTypeSystem()); xcasSer.serialize(cas, xmlSer.getContentHandler(), true); String xml = sw.getBuffer().toString(); // deserialize into another CAS (repeat twice to check it still works after reset) CAS newCas = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(), indexes); for (int i = 0; i < 2; i++) { XCASDeserializer newDeser = new XCASDeserializer(newCas.getTypeSystem()); ContentHandler newDeserHandler = newDeser.getXCASHandler(newCas); SAXParserFactory fact = SAXParserFactory.newInstance(); SAXParser parser = fact.newSAXParser(); XMLReader xmlReader = parser.getXMLReader(); xmlReader.setContentHandler(newDeserHandler); xmlReader.parse(new InputSource(new StringReader(xml))); // check sofas assertEquals("This is a test", newCas.getDocumentText()); CAS newCas2 = newCas.getView("OtherSofa"); assertEquals("This is only a test", newCas2.getDocumentText()); // check that annotation is still indexed in both views it = newCas.getIndexRepository().getAllIndexedFS(topType); it2 = newCas2.getIndexRepository().getAllIndexedFS(topType); it.next(); it.next(); it2.next(); it2.next(); assertFalse(it.hasNext()); assertFalse(it2.hasNext()); // assertTrue(tIndex.size() == 2); // document annot and this one // assertTrue(t2Index.size() == 2); // ditto newCas.reset(); // testing if works after cas reset, go around loop 2nd time } }
Example 14
Source File: XmiCasDeserializerTest.java From uima-uimaj with Apache License 2.0 | 4 votes |
public void testOutOfTypeSystemData() throws Exception { // deserialize a simple XMI into a CAS with no TypeSystem CAS cas = CasCreationUtils.createCas(new TypeSystemDescription_impl(), new TypePriorities_impl(), new FsIndexDescription[0]); File xmiFile = JUnitExtension.getFile("ExampleCas/simpleCas.xmi"); String xmiStr = FileUtils.file2String(xmiFile, "UTF-8"); XmiSerializationSharedData sharedData = new XmiSerializationSharedData(); deserialize(xmiStr, cas, sharedData, true, -1); //do some checks on the out-of-type system data List ootsElems = sharedData.getOutOfTypeSystemElements(); assertEquals(9, ootsElems.size()); List ootsViewMembers = sharedData.getOutOfTypeSystemViewMembers("1"); assertEquals(7, ootsViewMembers.size()); // now reserialize including OutOfTypeSystem data String xmiStr2 = serialize(cas, sharedData); //deserialize both original and new XMI into CASes that do have the full typesystem CAS newCas1 = CasCreationUtils.createCas(typeSystem, null, indexes); TypeSystem ts = newCas1.getTypeSystem(); deserialize(xmiStr, newCas1, null, false, -1); CAS newCas2 = CasCreationUtils.createCas(ts, null, indexes, null); deserialize(xmiStr2, newCas2, null, false, -1); CasComparer.assertEquals(newCas1, newCas2); //Test a partial type system with a missing some missing features and //missing "Organization" type File partialTypeSystemFile = JUnitExtension.getFile("ExampleCas/partialTestTypeSystem.xml"); TypeSystemDescription partialTypeSystem = UIMAFramework.getXMLParser().parseTypeSystemDescription( new XMLInputSource(partialTypeSystemFile)); CAS partialTsCas = CasCreationUtils.createCas(partialTypeSystem, null, indexes); XmiSerializationSharedData sharedData2 = new XmiSerializationSharedData(); deserialize(xmiStr, partialTsCas, sharedData2, true, -1); assertEquals(1,sharedData2.getOutOfTypeSystemElements().size()); OotsElementData ootsFeats3 = sharedData2.getOutOfTypeSystemFeatures(sharedData2.getFsForXmiId(3)); assertEquals(1, ootsFeats3.attributes.size()); XmlAttribute ootsAttr = ootsFeats3.attributes.get(0); assertEquals("mentionType", ootsAttr.name); assertEquals("NAME", ootsAttr.value); OotsElementData ootsFeats5 = sharedData2.getOutOfTypeSystemFeatures(sharedData2.getFsForXmiId(5)); assertEquals(0, ootsFeats5.attributes.size()); assertEquals(1, ootsFeats5.childElements.size()); XmlElementNameAndContents ootsChildElem = ootsFeats5.childElements.get(0); assertEquals("mentionType", ootsChildElem.name.qName); assertEquals("NAME", ootsChildElem.contents); OotsElementData ootsFeats8 = sharedData2.getOutOfTypeSystemFeatures(sharedData2.getFsForXmiId(8)); assertEquals(1, ootsFeats8.attributes.size()); OotsElementData ootsFeats10 = sharedData2.getOutOfTypeSystemFeatures(sharedData2.getFsForXmiId(10)); assertEquals(1, ootsFeats10.attributes.size()); OotsElementData ootsFeats11 = sharedData2.getOutOfTypeSystemFeatures(sharedData2.getFsForXmiId(11)); assertEquals(4, ootsFeats11.childElements.size()); String xmiStr3 = serialize(partialTsCas, sharedData2); newCas2.reset(); deserialize(xmiStr3, newCas2, null, false, -1); CasComparer.assertEquals(newCas1, newCas2); }
Example 15
Source File: MultiprocessingAnalysisEngine_implTest.java From uima-uimaj with Apache License 2.0 | 4 votes |
public void run() { while (true) { if (!MultiThreadUtils.wait4go(this)) { break; } try { Random r = new Random(); // Test each form of the process method. When TestAnnotator executes, it // stores in static fields the document text and the ResultSpecification. // We use thse to make sure the information propagates correctly to the // annotator. (However, we can't check these until after the threads are // finished, as their state is nondeterministic during multithreaded // processing.) // process(CAS) for (int i = 0; i < 5; i++) { CAS tcas = mAE.newCAS(); mLastTypeSystem = tcas.getTypeSystem(); tcas.setDocumentText("new test"); mAE.process(tcas); Thread.sleep(0, r.nextInt(1000)); // between 0 and 1 microseconds tcas.reset(); // process(CAS,ResultSpecification) ResultSpecification resultSpec = new ResultSpecification_impl(tcas.getTypeSystem()); resultSpec.addResultType("NamedEntity", true); tcas.setDocumentText("testing..."); Thread.sleep(0, r.nextInt(1000)); // between 0 and 1 microseconds mAE.process(tcas, resultSpec); Thread.sleep(0, r.nextInt(1000)); // between 0 and 1 microseconds tcas.reset(); } } catch (Throwable t) { t.printStackTrace(); //can't cause unit test to fail by throwing exception from thread. //record the failure and the main thread will check for it later. mFailure = t; } } }
Example 16
Source File: TreeParser.java From deeplearning4j with Apache License 2.0 | 4 votes |
/** * Gets trees from text. * First a sentence segmenter is used to segment the training examples in to sentences. * Sentences are then turned in to trees and returned. * * This will also process sentences with the following label format: * <YOURLABEL> some text </YOURLABEL> * * This will allow you to iterate on and label sentences and label spans yourself. * * @param text the text to process * @param labels * @return the list of trees * @throws Exception */ public List<Tree> getTreesWithLabels(String text, List<String> labels) throws Exception { CAS c = pool.getCas(); c.setDocumentText(text); tokenizer.process(c); List<String> lowerCaseLabels = new ArrayList<>(); for (String s : labels) lowerCaseLabels.add(s.toLowerCase()); labels = lowerCaseLabels; List<Tree> ret = new ArrayList<>(); CAS c2 = pool.getCas(); for (Sentence sentence : JCasUtil.select(c.getJCas(), Sentence.class)) { List<String> tokens = new ArrayList<>(); for (Token t : JCasUtil.selectCovered(Token.class, sentence)) tokens.add(t.getCoveredText()); Pair<String, MultiDimensionalMap<Integer, Integer, String>> stringsWithLabels = ContextLabelRetriever.stringWithLabels(sentence.getCoveredText(), tf); c2.setDocumentText(stringsWithLabels.getFirst()); tokenizer.process(c2); parser.process(c2); //build the tree based on this //damn it List<TopTreebankNode> nodes = new ArrayList<>(JCasUtil.select(c2.getJCas(), TopTreebankNode.class)); if (nodes.size() > 1) { log.warn("More than one top level node for a treebank parse. Only accepting first input node."); } else if (nodes.isEmpty()) { c2.reset(); continue; } Collection<String> labels2 = stringsWithLabels.getSecond().values(); Set<String> diff = SetUtils.difference(labels2, labels); if (!diff.isEmpty()) { log.warn("Found invalid sentence. Skipping"); c2.reset(); continue; } TopTreebankNode node = nodes.get(0); ret.add(TreeFactory.buildTree(node, stringsWithLabels, labels)); c2.reset(); } pool.releaseCas(c); pool.releaseCas(c2); return ret; }
Example 17
Source File: TreeParser.java From deeplearning4j with Apache License 2.0 | 4 votes |
/** * Gets trees from text. * First a sentence segmenter is used to segment the training examples in to sentences. * Sentences are then turned in to trees and returned. * * This will also process sentences with the following label format: * <YOURLABEL> some text </YOURLABEL> * * This will allow you to iterate on and label sentences and label spans yourself. * * @param text the text to process * @param label the label for the whole sentence * @param labels the possible labels for the sentence * @return the list of trees * @throws Exception */ public List<Tree> getTreesWithLabels(String text, String label, List<String> labels) throws Exception { if (text.isEmpty()) return new ArrayList<>(); CAS c = pool.getCas(); c.setDocumentText("<" + label + "> " + text + " </" + label + ">"); tokenizer.process(c); List<String> lowerCaseLabels = new ArrayList<>(); for (String s : labels) lowerCaseLabels.add(s.toLowerCase()); labels = lowerCaseLabels; List<Tree> ret = new ArrayList<>(); CAS c2 = pool.getCas(); for (Sentence sentence : JCasUtil.select(c.getJCas(), Sentence.class)) { if (sentence.getCoveredText().isEmpty()) continue; List<String> tokens = new ArrayList<>(); for (Token t : JCasUtil.selectCovered(Token.class, sentence)) tokens.add(t.getCoveredText()); try { Pair<String, MultiDimensionalMap<Integer, Integer, String>> stringsWithLabels = ContextLabelRetriever.stringWithLabels(sentence.getCoveredText(), tf); c2.setDocumentText(stringsWithLabels.getFirst()); tokenizer.process(c2); parser.process(c2); //build the tree based on this List<TopTreebankNode> nodes = new ArrayList<>(JCasUtil.select(c2.getJCas(), TopTreebankNode.class)); if (nodes.size() > 1) { log.warn("More than one top level node for a treebank parse. Only accepting first input node."); } else if (nodes.isEmpty()) { c2.reset(); continue; } TopTreebankNode node = nodes.get(0); ret.add(TreeFactory.buildTree(node, stringsWithLabels, labels)); c2.reset(); } catch (Exception e) { log.warn("Unable to parse " + sentence.getCoveredText()); c2.reset(); continue; } } pool.releaseCas(c); pool.releaseCas(c2); return ret; }
Example 18
Source File: MultiprocessingAnalysisEngine_implTest.java From uima-uimaj with Apache License 2.0 | 4 votes |
public void run() { Random r = new Random(); while (true) { if (!MultiThreadUtils.wait4go(this)) { // wait for go signal after all threads are setup. break; // time to terminate } try { // Test each form of the process method. When TestAnnotator executes, it // stores in static fields the document text and the ResultSpecification. // We use thse to make sure the information propogates correctly to the // annotator. (However, we can't check these until after the threads are // finished, as their state is nondeterministic during multithreaded // processing.) // process(CAS) CAS tcas = mAE.newCAS(); // for (int i = 0; i < 1000; i++) { // uncomment to debug mLastTypeSystem = tcas.getTypeSystem(); tcas.setDocumentText("new test"); mAE.process(tcas); // System.out.println("Debug finished processing a cas"); if (doSleeps) Thread.sleep(0, r.nextInt(1000)); // 0 to 1 microseconds tcas.reset(); // process(CAS,ResultSpecification) ResultSpecification resultSpec = new ResultSpecification_impl(tcas.getTypeSystem()); resultSpec.addResultType("NamedEntity", true); tcas.setDocumentText("testing..."); if (doSleeps) Thread.sleep(0, r.nextInt(1000)); // 0 to 1 microseconds mAE.process(tcas, resultSpec); if (doSleeps) Thread.sleep(0, r.nextInt(1000)); // 0 to 1 microseconds tcas.reset(); // } } catch (Throwable t) { t.printStackTrace(); //can't cause unit test to fail by throwing exception from thread. //record the failure and the main thread will check for it later. mFailure = t; } } }
Example 19
Source File: AnalysisEngine_implTest.java From uima-uimaj with Apache License 2.0 | 4 votes |
public void testProcess() throws Exception { try { // test simple primitive TextAnalysisEngine (using TestAnnotator class) // This test should work with or without a type system description AnalysisEngineDescription primitiveDesc = new AnalysisEngineDescription_impl(); primitiveDesc.setPrimitive(true); primitiveDesc .setAnnotatorImplementationName("org.apache.uima.analysis_engine.impl.TestAnnotator"); primitiveDesc.getMetaData().setName("Test Primitive TAE"); // TypeSystemDescription tsd = new TypeSystemDescription_impl(); // tsd.addType("NamedEntity", "", "uima.tcas.Annotation"); // tsd.addType("DocumentStructure", "", "uima.cas.TOP"); // primitiveDesc.getAnalysisEngineMetaData().setTypeSystem(tsd); Capability cap = new Capability_impl(); cap.addOutputType("NamedEntity", true); cap.addOutputType("DocumentStructure", true); Capability[] caps = new Capability[] {cap}; primitiveDesc.getAnalysisEngineMetaData().setCapabilities(caps); _testProcess(primitiveDesc); primitiveDesc = new AnalysisEngineDescription_impl(); primitiveDesc.setPrimitive(true); primitiveDesc .setAnnotatorImplementationName("org.apache.uima.analysis_engine.impl.TestAnnotator"); primitiveDesc.getMetaData().setName("Test Primitive TAE"); TypeSystemDescription tsd = new TypeSystemDescription_impl(); tsd.addType("NamedEntity", "", "uima.tcas.Annotation"); tsd.addType("DocumentStructure", "", "uima.cas.TOP"); primitiveDesc.getAnalysisEngineMetaData().setTypeSystem(tsd); cap = new Capability_impl(); cap.addOutputType("NamedEntity", true); cap.addOutputType("DocumentStructure", true); caps = new Capability[] {cap}; primitiveDesc.getAnalysisEngineMetaData().setCapabilities(caps); _testProcess(primitiveDesc); // test simple aggregate TextAnalysisEngine (again using TestAnnotator class) AnalysisEngineDescription aggDesc = new AnalysisEngineDescription_impl(); aggDesc.setPrimitive(false); aggDesc.getMetaData().setName("Test Aggregate TAE"); aggDesc.getDelegateAnalysisEngineSpecifiersWithImports().put("Test", primitiveDesc); FixedFlow_impl flow = new FixedFlow_impl(); flow.setFixedFlow(new String[] { "Test" }); aggDesc.getAnalysisEngineMetaData().setFlowConstraints(flow); aggDesc.getAnalysisEngineMetaData().setCapabilities(caps); _testProcess(aggDesc); // test aggregate TAE containing a CAS Consumer File outFile = JUnitExtension.getFile("CpmOutput.txt"); if(outFile != null && outFile.exists()) { //outFile.delete() //can't be relied upon. Instead set file to zero length. FileOutputStream fos = new FileOutputStream(outFile, false); fos.close(); assertEquals(0,outFile.length()); } AnalysisEngineDescription aggWithCcDesc = UIMAFramework.getXMLParser().parseAnalysisEngineDescription( new XMLInputSource(JUnitExtension .getFile("TextAnalysisEngineImplTest/AggregateTaeWithCasConsumer.xml"))); _testProcess(aggWithCcDesc, new String[] {"en"}); // test that CAS Consumer ran if (null == outFile) { outFile = JUnitExtension.getFile("CpmOutput.txt"); } assertTrue(outFile != null && outFile.exists()); assertTrue(outFile.length() > 0); outFile.delete(); //test aggregate that uses ParallelStep AnalysisEngineDescription desc = UIMAFramework.getXMLParser().parseAnalysisEngineDescription( new XMLInputSource(JUnitExtension.getFile("TextAnalysisEngineImplTest/AggregateForParallelStepTest.xml"))); AnalysisEngine ae = UIMAFramework.produceAnalysisEngine(desc); CAS cas = ae.newCAS(); cas.setDocumentText("new test"); ae.process(cas); assertEquals("new test", TestAnnotator.lastDocument); assertEquals("new test", TestAnnotator2.lastDocument); cas.reset(); } catch (Exception e) { JUnitExtension.handleException(e); } }
Example 20
Source File: SimplePipeline.java From uima-uimafit with Apache License 2.0 | 3 votes |
/** * <p> * Run the CollectionReader and AnalysisEngines as a pipeline. After processing all CASes provided * by the reader, the method calls the life-cycle methods * ({@link AnalysisEngine#collectionProcessComplete() collectionProcessComplete()} on the engines * and {@link Resource#destroy() destroy()}) on all engines. Note that the life-cycle methods are * <b>NOT</b> called on the reader. As the reader was instantiated by the caller, it must also be * managed (i.e. destroyed) the caller. * </p> * <p> * Note that with this method, external resources cannot be shared between the reader and the * analysis engines. They can be shared amongst the analysis engines. * </p> * <p> * The CAS is created using the resource manager used by the collection reader. * </p> * * @param reader * The CollectionReader that loads the documents into the CAS. * @param descs * Primitive AnalysisEngineDescriptions that process the CAS, in order. If you have a mix * of primitive and aggregate engines, then please create the AnalysisEngines yourself * and call the other runPipeline method. * @throws IOException * if there is an I/O problem in the reader * @throws ResourceInitializationException * if there is a problem initializing or running the pipeline. * @throws CollectionException * if there is a problem initializing or running the pipeline. * @throws AnalysisEngineProcessException * if there is a problem initializing or running the pipeline. */ public static void runPipeline(final CollectionReader reader, final AnalysisEngineDescription... descs) throws IOException, ResourceInitializationException, AnalysisEngineProcessException, CollectionException { AnalysisEngine aae = null; try { // Create AAE final AnalysisEngineDescription aaeDesc = createEngineDescription(descs); // Instantiate AAE aae = createEngine(aaeDesc); // Create CAS from merged metadata final CAS cas = CasCreationUtils.createCas(asList(reader.getMetaData(), aae.getMetaData()), null, reader.getResourceManager()); reader.typeSystemInit(cas.getTypeSystem()); // Process while (reader.hasNext()) { reader.getNext(cas); aae.process(cas); cas.reset(); } // Signal end of processing aae.collectionProcessComplete(); } finally { // Destroy LifeCycleUtil.destroy(aae); } }