org.apache.uima.collection.CollectionReaderDescription Java Examples
The following examples show how to use
org.apache.uima.collection.CollectionReaderDescription.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: FormatSupportDescription.java From webanno with Apache License 2.0 | 6 votes |
@SuppressWarnings("unchecked") @Override public CollectionReaderDescription getReaderDescription(TypeSystemDescription aTSD) throws ResourceInitializationException { if (!isReadable()) { throw new UnsupportedOperationException("The format [" + getName() + "] cannot be read"); } Class<? extends CollectionReader> readerClazz; try { readerClazz = (Class<? extends CollectionReader>) Class.forName(readerClass); } catch (ClassNotFoundException e) { throw new ResourceInitializationException(e); } return createReaderDescription(readerClazz, aTSD); }
Example #2
Source File: UimaFactoryInjectionTest.java From uima-uimafit with Apache License 2.0 | 6 votes |
private static void initUimaApplicationContext(final ApplicationContext aApplicationContext) { new UIMAFramework_impl() { { CompositeResourceFactory_impl factory = (CompositeResourceFactory_impl) getResourceFactory(); factory.registerFactory(CasConsumerDescription.class, aApplicationContext.getBean(CasConsumerFactory_impl.class)); factory.registerFactory(CasInitializerDescription.class, aApplicationContext.getBean(CasInitializerFactory_impl.class)); factory.registerFactory(CollectionReaderDescription.class, aApplicationContext.getBean(CollectionReaderFactory_impl.class)); factory.registerFactory(ResourceCreationSpecifier.class, aApplicationContext.getBean(AnalysisEngineFactory_impl.class)); factory.registerFactory(CustomResourceSpecifier.class, aApplicationContext.getBean(CustomResourceFactory_impl.class)); } }; }
Example #3
Source File: CollectionReaderDescription_implTest.java From uima-uimaj with Apache License 2.0 | 6 votes |
public void testXMLization() throws Exception { try { // write objects to XML StringWriter writer = new StringWriter(); mTestDesc.toXML(writer); String testDescXml = writer.getBuffer().toString(); // System.out.println(testDescXml); // parse objects from XML (no schema validation) InputStream is = new ByteArrayInputStream(testDescXml.getBytes(encoding)); CollectionReaderDescription newDesc = (CollectionReaderDescription) UIMAFramework .getXMLParser().parse(new XMLInputSource(is, null)); // compare Assert.assertEquals(mTestDesc, newDesc); } catch (Exception e) { JUnitExtension.handleException(e); } }
Example #4
Source File: CasMultiplierTest.java From uima-uimafit with Apache License 2.0 | 6 votes |
@Ignore("UIMA-3470 not fixed yet") @Test public void testIteratePipelineOnText() throws Exception { CollectionReaderDescription reader = createReaderDescription(Reader.class); AnalysisEngineDescription incrementor = createEngineDescription(TextIncrementor.class); AnalysisEngineDescription consumer = createEngineDescription(Consumer.class); int expectedResult = 4; for (JCas jcas : iteratePipeline(reader, incrementor, incrementor, incrementor, consumer)) { assertEquals(expectedResult, Consumer.textResult); assertEquals(expectedResult, Integer.parseInt(jcas.getDocumentText())); expectedResult++; } }
Example #5
Source File: AggregateCollectionReader.java From bluima with Apache License 2.0 | 6 votes |
public AggregateCollectionReader(List<CollectionReader> readers, TypeSystemDescription tsd) { try { CollectionReaderDescription crd = CollectionReaderFactory .createReaderDescription(AggregateCollectionReader.class, tsd); ResourceMetaData metaData = crd.getMetaData(); ConfigurationParameterSettings paramSettings = metaData .getConfigurationParameterSettings(); Map<String, Object> additionalParameters = new HashMap<String, Object>(); additionalParameters .put(CollectionReader.PARAM_CONFIG_PARAM_SETTINGS, paramSettings); initialize(crd, additionalParameters); this.readers = readers; this.readerIterator = this.readers.iterator(); currentReader = this.readerIterator.next(); } catch (ResourceInitializationException rie) { throw new RuntimeException(rie); } }
Example #6
Source File: Conll2003AidaReaderTest.java From ambiverse-nlu with Apache License 2.0 | 6 votes |
private void callReader(int begin, int end) throws NoSuchMethodException, MissingSettingException, IOException, ClassNotFoundException, UIMAException { CollectionReaderDescription readerDescription = Reader.getCollectionReaderDescription(Reader.COLLECTION_FORMAT.AIDA, PARAM_SOURCE_LOCATION, "src/test/resources/ner/test_collections/", PARAM_PATTERNS, "CoNLL-YAGO_ext_small_en.tsv", // 5 docs total PARAM_LANGUAGE, "en", PARAM_SINGLE_FILE, true, PARAM_FIRSTDOCUMENT, begin, PARAM_LASTDOCUMENT, end, PARAM_ORDER, OrderType.WORD_POS_POSITION_MENTION_ENTITY_TYPE ); SimplePipeline.runPipeline(readerDescription, AnalysisEngineFactory.createEngineDescription(CasDumpWriter.class, PARAM_OUTPUT_FILE, "casdump.txt")); }
Example #7
Source File: CasMultiplierTest.java From uima-uimafit with Apache License 2.0 | 6 votes |
/** * Simulates a CPE with CAS multipliers that always read one CAS and always produce one CAS. * It actually appears to work despite CPE not supporting CAS multipliers. */ @SuppressWarnings("javadoc") @Test public void testRunPipeline() throws Exception { CollectionReaderDescription reader = createReaderDescription(Reader.class); AnalysisEngineDescription incrementor = createEngineDescription(Incrementor.class); AnalysisEngineDescription consumer = createEngineDescription(Consumer.class); AnalysisEngineDescription aggregate = createEngineDescription(incrementor, incrementor, incrementor, consumer); runPipeline(reader, aggregate); // The order in which the consumer sees the CASes is arbitrary, in particular because we never // tell the CPE that the aggregate which contains the consumer cannot be scaled out. assertFalse(aggregate.getAnalysisEngineMetaData().getOperationalProperties() .isMultipleDeploymentAllowed()); Collections.sort(Consumer.result); assertEquals(asList(4,5,6,7,8,9,10,11,12,13), Consumer.result); }
Example #8
Source File: JCasIterator.java From uima-uimafit with Apache License 2.0 | 6 votes |
/** * Iterate over the documents loaded by the given reader, running the analysis engines on each * one before yielding them. By default, components <b>DO get</b> life-cycle events, such as * collectionProcessComplete or destroy when this constructor is used. * * @param aResMgr * The {@link ResourceManager} used to create the components and the JCas. If this * parameter is {@code null} then {@link ResourceManagerFactory#newResourceManager()} * will be used to obtain a resource manager. If a new resource manager was internally * created, it is destroyed at the end of the pipeline (if {@link #isSelfDestroy()}). * @param aReader * The CollectionReader for loading documents. * @param aEngines * The AnalysisEngines for processing documents. * @throws ResourceInitializationException * if a failure occurs during initialization of the components * @throws CASException * if the JCas could not be initialized */ public JCasIterator(final ResourceManager aResMgr, final CollectionReaderDescription aReader, final AnalysisEngineDescription... aEngines) throws CASException, ResourceInitializationException { selfComplete = true; selfDestroy = true; if (aResMgr == null) { resMgr = newResourceManager(); resourceManagerCreatedInternally = true; } else { resMgr = aResMgr; resourceManagerCreatedInternally = false; } collectionReader = produceCollectionReader(aReader, resMgr, null); analysisEngines = new AnalysisEngine[] { produceAnalysisEngine(createEngineDescription(aEngines), resMgr, null) }; jCas = createCas(resMgr, collectionReader, analysisEngines); collectionReader.typeSystemInit(jCas.getTypeSystem()); }
Example #9
Source File: CpePipeline.java From uima-uimafit with Apache License 2.0 | 5 votes |
/** * Run the CollectionReader and AnalysisEngines as a multi-threaded pipeline. * * @param parallelism * Number of threads to use when running the analysis engines in the CPE. * @param readerDesc * The CollectionReader that loads the documents into the CAS. * @param descs * Primitive AnalysisEngineDescriptions that process the CAS, in order. If you have a mix * of primitive and aggregate engines, then please create the AnalysisEngines yourself * and call the other runPipeline method. * @throws SAXException * if there was a XML-related problem materializing the component descriptors that are * referenced from the CPE descriptor * @throws IOException * if there was a I/O-related problem materializing the component descriptors that are * referenced from the CPE descriptor * @throws CpeDescriptorException * if there was a problem configuring the CPE descriptor * @throws ResourceInitializationException * if there was a problem initializing or running the CPE. * @throws InvalidXMLException * if there was a problem initializing or running the CPE. * @throws AnalysisEngineProcessException * if there was a problem running the CPE. */ public static void runPipeline(final int parallelism, final CollectionReaderDescription readerDesc, final AnalysisEngineDescription... descs) throws SAXException, CpeDescriptorException, IOException, ResourceInitializationException, InvalidXMLException, AnalysisEngineProcessException { // Create AAE final AnalysisEngineDescription aaeDesc = createEngineDescription(descs); CpeBuilder builder = new CpeBuilder(); builder.setReader(readerDesc); builder.setAnalysisEngine(aaeDesc); builder.setMaxProcessingUnitThreadCount(Runtime.getRuntime().availableProcessors() - 1); StatusCallbackListenerImpl status = new StatusCallbackListenerImpl(); CollectionProcessingEngine engine = builder.createCpe(status); engine.process(); try { synchronized (status) { while (status.isProcessing) { status.wait(); } } } catch (InterruptedException e) { // Do nothing } if (status.exceptions.size() > 0) { throw new AnalysisEngineProcessException(status.exceptions.get(0)); } }
Example #10
Source File: MultiPageEditor.java From uima-uimaj with Apache License 2.0 | 5 votes |
/** * Link local processing descriptors from ae. * * @param d the d */ // ************************************************************** private void linkLocalProcessingDescriptorsFromAe(CollectionReaderDescription d) { d.setImplementationName(aeDescription.getAnnotatorImplementationName()); d.setFrameworkImplementation(aeDescription.getFrameworkImplementation()); linkCommonCollectionDescriptorsFromAe(d); }
Example #11
Source File: Conll2003ReaderTcBmeow.java From ambiverse-nlu with Apache License 2.0 | 5 votes |
@Override public void initialize(UimaContext aContext) throws ResourceInitializationException { super.initialize(aContext); Object[] configurationParams = getConfigurationParams(aContext); CollectionReaderDescription readerDescription = createReaderDescription(readerClassName, configurationParams); reader = UIMAFramework.produceCollectionReader(readerDescription, getResourceManager(), null); }
Example #12
Source File: XMLParser_impl.java From uima-uimaj with Apache License 2.0 | 5 votes |
public CollectionReaderDescription parseCollectionReaderDescription(XMLInputSource aInput, ParsingOptions aOptions) throws InvalidXMLException { // attempt to locate resource specifier schema XMLizable object = parse(aInput, RESOURCE_SPECIFIER_NAMESPACE, SCHEMA_URL, aOptions); if (object instanceof CollectionReaderDescription) { return (CollectionReaderDescription) object; } else { throw new InvalidXMLException(InvalidXMLException.INVALID_CLASS, new Object[] { CollectionReaderDescription.class.getName(), object.getClass().getName() }); } }
Example #13
Source File: ManualEvaluation.java From ambiverse-nlu with Apache License 2.0 | 5 votes |
private static void evaluateTrainedFile(String fileName) throws IOException, UIMAException { Path directoryPath; String trainedFile = directory + fileName; if (singleLabelling) { directoryPath = Paths.get(directory, fileName + "-singleLabel-evaluation"); Path trainedPath = generateSingleLabeledFile(trainedFile); trainedFile = trainedPath.toString(); } else { directoryPath = Paths.get(trainedFile + "-evaluation"); } Files.createDirectory(directoryPath); // produce jcas with trained file CollectionReaderDescription reader = createReaderDescription(Conll2003AidaReader.class, PARAM_LANGUAGE, language, Conll2003AidaReader.PARAM_SINGLE_FILE, true, Conll2003AidaReader.PARAM_ORDER, WORD_POSITION_TYPE, PARAM_SOURCE_LOCATION, trainedFile, Conll2003AidaReader.PARAM_MANUAL_TOKENS_NER, false, Conll2003AidaReader.PARAM_NAMED_ENTITY_PER_TOKEN, true); AnalysisEngineDescription mentionSpansWriter = createEngineDescription(MentionSpansEvaluationWriter.class, MentionSpansEvaluationWriter.PARAM_OUTPUT_FILE, directoryPath.toString() + "/ManualSpanEvaluation.txt"); System.out.println("Running mention spans evaluation"); SimplePipeline.runPipeline(reader, manualAnnotatorPerMentionDescription, nerMentionAnnotatorDescription, mentionSpansWriter); AnalysisEngineDescription predictionsWriter = createEngineDescription(PredictionsWriter.class, PredictionsWriter.PARAM_LANGUAGE, language, PredictionsWriter.PARAM_MENTION_OUTPUT_FILE, directoryPath.toString() + "/ConllMentionEvaluation.txt", PredictionsWriter.PARAM_TOKEN_OUTPUT_FILE, directoryPath.toString() + "/ConllTokenEvaluation.txt", PredictionsWriter.PARAM_KNOW_NER, true, PredictionsWriter.PARAM_POSITION_TYPE, ConllEvaluation.TrainedPositionType.ORIGINAL); System.out.println("Running tokens and mentions evaluation"); SimplePipeline.runPipeline(reader, manualAnnotatorPerTokenDescription, predictionsWriter); }
Example #14
Source File: CollectionReaderDescription_implTest.java From uima-uimaj with Apache License 2.0 | 5 votes |
public void testSerialization() throws Exception { try { // serialize objects to byte array byte[] testDescBytes = SerializationUtils.serialize(mTestDesc); // deserialize CollectionReaderDescription newDesc = (CollectionReaderDescription) SerializationUtils .deserialize(testDescBytes); Assert.assertEquals(mTestDesc, newDesc); } catch (Exception e) { JUnitExtension.handleException(e); } }
Example #15
Source File: ExternalResourceFactory.java From uima-uimafit with Apache License 2.0 | 5 votes |
/** * Convenience method to set the external resource dependencies on a resource specifier. * Unfortunately different methods need to be used for different sub-classes. * * @throws IllegalArgumentException * if the sub-class passed is not supported. */ private static void setResourceDependencies(ResourceSpecifier aDesc, ExternalResourceDependency[] aDependencies) { if (aDesc instanceof CollectionReaderDescription) { ((CollectionReaderDescription) aDesc).setExternalResourceDependencies(aDependencies); } else if (aDesc instanceof AnalysisEngineDescription) { ((AnalysisEngineDescription) aDesc).setExternalResourceDependencies(aDependencies); } else { throw new IllegalArgumentException( "Resource specified cannot have external resource dependencies"); } }
Example #16
Source File: SparkUimaUtils.java From ambiverse-nlu with Apache License 2.0 | 5 votes |
public static void createSequenceFile(Object[] params, String uri) throws URISyntaxException, IOException, UIMAException, NoSuchMethodException, MissingSettingException, ClassNotFoundException { Configuration conf = new Configuration(); Path path = new Path(uri); Writer writer = SequenceFile.createWriter( conf, Writer.file(path), Writer.keyClass(Text.class), Writer.valueClass(SCAS.class)); int count = 0; CollectionReaderDescription readerDescription = Reader.getCollectionReaderDescription(Reader.COLLECTION_FORMAT.NYT, params); for (JCas jCas : SimplePipelineCasPoolIterator.iteratePipeline(20, readerDescription)) { if(JCasUtil.exists(jCas, DocumentMetaData.class)) { ++count; // Get the ID. DocumentMetaData dmd = JCasUtil.selectSingle(jCas, DocumentMetaData.class); String docId = "NULL"; if (dmd != null) { docId = dmd.getDocumentId(); } else { throw new IOException("No Document ID for xml: " + jCas.getView("xml").getDocumentText()); } Text docIdText = new Text(docId); SCAS scas = new SCAS(jCas.getCas()); writer.append(docIdText, scas); } jCas.release(); } logger.info("Wrote " + count + " documents to " + uri); IOUtils.closeStream(writer); }
Example #17
Source File: ExternalResourceFactory.java From uima-uimafit with Apache License 2.0 | 5 votes |
/** * Convenience method to get the external resource dependencies from a resource specifier. * Unfortunately different methods need to be used for different sub-classes. * * @throws IllegalArgumentException * if the sub-class passed is not supported. */ private static ExternalResourceDependency[] getResourceDependencies( ResourceSpecifier aDesc) { if (aDesc instanceof CollectionReaderDescription) { return ((CollectionReaderDescription) aDesc).getExternalResourceDependencies(); } else if (aDesc instanceof AnalysisEngineDescription) { return ((AnalysisEngineDescription) aDesc).getExternalResourceDependencies(); } else { throw new IllegalArgumentException( "Resource specified cannot have external resource dependencies"); } }
Example #18
Source File: CpeBuilder.java From bluima with Apache License 2.0 | 5 votes |
@Override public void setReader(CollectionReaderDescription aDesc) throws IOException, SAXException, CpeDescriptorException { // Remove all collection readers cpeDesc.setAllCollectionCollectionReaders(new CpeCollectionReader[0]); URL descUrl = materializeDescriptor(aDesc).toURI().toURL(); CpeCollectionReader reader = produceCollectionReader(descUrl.toString()); cpeDesc.addCollectionReader(reader); }
Example #19
Source File: MultiPageEditor.java From uima-uimaj with Apache License 2.0 | 5 votes |
/** * Creates the and link local processing descriptors to ae. * * @param d the d * @throws ResourceInitializationException the resource initialization exception */ private void createAndLinkLocalProcessingDescriptorsToAe(CollectionReaderDescription d) throws ResourceInitializationException { aeDescription = UIMAFramework.getResourceSpecifierFactory().createAnalysisEngineDescription(); aeDescription.setAnnotatorImplementationName(d.getImplementationName()); aeDescription.setFrameworkImplementation(d.getFrameworkImplementation()); linkLocalProcessingDescriptorsToAe(d); }
Example #20
Source File: CpeBuilder.java From bluima with Apache License 2.0 | 5 votes |
/** use default ctor and setters instead */ @Deprecated public CpeBuilder(int aMaxProcessingUnitThreatCount, CollectionReaderDescription aDesc) throws IOException, SAXException, CpeDescriptorException { setReader(aDesc); setMaxProcessingUnitThreatCount(aMaxProcessingUnitThreatCount); }
Example #21
Source File: UimaHelpers.java From biomedicus with Apache License 2.0 | 5 votes |
public static CollectionReaderDescription loadCollectionReaderDescription(Path path) throws BiomedicusException { CollectionReaderDescription collectionReaderDescription; try { XMLInputSource aInput = new XMLInputSource(path.toFile()); collectionReaderDescription = UIMAFramework.getXMLParser() .parseCollectionReaderDescription(aInput); } catch (IOException | InvalidXMLException e) { throw new BiomedicusException(e); } return collectionReaderDescription; }
Example #22
Source File: CollectionReaderFactory_implTest.java From uima-uimaj with Apache License 2.0 | 5 votes |
public void testInvalidFrameworkImplementation() { CollectionReaderDescription desc = new CollectionReaderDescription_impl(); desc.setFrameworkImplementation("foo"); try { ccFactory.produceResource(CollectionReader.class, desc, Collections.EMPTY_MAP); fail(); } catch (ResourceInitializationException e) { assertNotNull(e.getMessage()); assertFalse(e.getMessage().startsWith("EXCEPTION MESSAGE LOCALIZATION FAILED")); assertEquals(e.getMessageKey(), ResourceInitializationException.UNSUPPORTED_FRAMEWORK_IMPLEMENTATION); } }
Example #23
Source File: CasDataCollectionReader_ImplBase.java From uima-uimaj with Apache License 2.0 | 5 votes |
/** * Called by the framework to initialize this Collection Reader. Subclasses should generally NOT * override this method; instead they should override the zero-argument {@link #initialize()} * method and access metadata via the {@link #getProcessingResourceMetaData()} method. This method * is non-final only for legacy reasons. * * @see org.apache.uima.resource.Resource#initialize(org.apache.uima.resource.ResourceSpecifier, * java.util.Map) */ public boolean initialize(ResourceSpecifier aSpecifier, Map<String, Object> aAdditionalParams) throws ResourceInitializationException { // aSpecifier must be a CollectionReaderDescription if (aSpecifier instanceof CollectionReaderDescription) { // do framework intitialiation if (super.initialize(aSpecifier, aAdditionalParams)) { // do user initialization initialize(); return true; } } return false; }
Example #24
Source File: CollectionReaderFactoryTest.java From uima-uimafit with Apache License 2.0 | 5 votes |
@Test public void thatCreateReaderDescriptorAutoDetectionWorks() throws Exception { CollectionReaderDescription aed = createReaderDescription(TestCR.class); TypeSystemDescription tsd = createTypeSystemDescription(); assertThat(tsd.getType(Token.class.getName())) .as("Token type auto-detection") .isNotNull(); assertThat(tsd.getType(Sentence.class.getName())) .as("Sentence type auto-detection") .isNotNull(); assertThat(tsd.getType(AnalyzedText.class.getName())) .as("AnalyzedText type auto-detection") .isNotNull(); TypePriorityList[] typePrioritiesLists = typePriorities.getPriorityLists(); assertThat(typePrioritiesLists.length).isEqualTo(1); assertThat(typePrioritiesLists[0].getTypes()) .as("Type priorities auto-detection") .containsExactly(Sentence.class.getName(), AnalyzedText.class.getName(), Token.class.getName()); FsIndexDescription[] indexes = aed.getCollectionReaderMetaData().getFsIndexCollection().getFsIndexes(); assertThat(indexes.length).isEqualTo(1); assertThat(indexes[0]) .extracting(FsIndexDescription::getLabel, FsIndexDescription::getTypeName, FsIndexDescription::getKind) .containsExactly("Automatically Scanned Index", Token.class.getName(), FsIndexDescription.KIND_SORTED); }
Example #25
Source File: CollectionReaderFactoryTest.java From uima-uimafit with Apache License 2.0 | 5 votes |
@Test public void testResourceMetaData() throws Exception { CollectionReaderDescription desc = CollectionReaderFactory .createReaderDescription(TestCR.class); org.apache.uima.resource.metadata.ResourceMetaData meta = desc.getMetaData(); assertEquals("dummy", meta.getName()); assertEquals("1.0", meta.getVersion()); assertEquals("Just a dummy", meta.getDescription()); assertEquals("ASL 2.0", meta.getCopyright()); assertEquals("uimaFIT", meta.getVendor()); }
Example #26
Source File: DescriptorMakeUtil.java From uima-uimaj with Apache License 2.0 | 5 votes |
public static String makeCollectionReader(String descFileName, boolean shouldCrash, String functionName, int errorCount, String exceptionName, int documentCount) throws Exception { XMLInputSource in = new XMLInputSource(descFileName); CollectionReaderDescription crd = UIMAFramework.getXMLParser() .parseCollectionReaderDescription(in); crd.getCollectionReaderMetaData().getConfigurationParameterSettings().setParameterValue( "DocumentCount", documentCount); // set the function to crash, if desired if (shouldCrash) { crd.getCollectionReaderMetaData().getConfigurationParameterSettings().setParameterValue( "ErrorFunction", functionName); crd.getCollectionReaderMetaData().getConfigurationParameterSettings().setParameterValue( "ErrorCount", errorCount); crd.getCollectionReaderMetaData().getConfigurationParameterSettings().setParameterValue( "ErrorException", exceptionName); } File baseDir = JUnitExtension.getFile("CpmTests" + FS + "CpeDesc"); if (!baseDir.exists()) { baseDir.mkdir(); } File tmpFileName = new File(baseDir, "TmpCollectionReader.xml"); OutputStream out = new FileOutputStream(tmpFileName); serializeDescriptor(crd, out); return tmpFileName.getAbsolutePath(); }
Example #27
Source File: Conll2012FormatSupport.java From webanno with Apache License 2.0 | 5 votes |
@Override public CollectionReaderDescription getReaderDescription(TypeSystemDescription aTSD) throws ResourceInitializationException { return createReaderDescription(Conll2012Reader.class, aTSD, // Constituents are not supported by WebAnno and trying to read a file which does // not have them triggers an NPE in DKPro Core 1.11.0 Conll2012Reader.PARAM_READ_CONSTITUENT, false); }
Example #28
Source File: XmiFormatSupport.java From webanno with Apache License 2.0 | 5 votes |
@Override public CollectionReaderDescription getReaderDescription(TypeSystemDescription aTSD) throws ResourceInitializationException { return createReaderDescription(XmiReader.class, XmiReader.PARAM_LENIENT, true); }
Example #29
Source File: AggregateWithReaderTest.java From uima-uimafit with Apache License 2.0 | 5 votes |
/** * Demo of running a collection reader as part of an aggregate engine. This allows to run a * pipeline an access the output CASes directly - no need to write the data to files. */ @Test public void demoAggregateWithReader() throws UIMAException { ResourceSpecifierFactory factory = UIMAFramework.getResourceSpecifierFactory(); CollectionReaderDescription reader = factory.createCollectionReaderDescription(); reader.getMetaData().setName("reader"); reader.setImplementationName(SimpleReader.class.getName()); AnalysisEngineDescription analyzer = factory.createAnalysisEngineDescription(); analyzer.getMetaData().setName("analyzer"); analyzer.setPrimitive(true); analyzer.setImplementationName(SimpleAnalyzer.class.getName()); FixedFlow flow = factory.createFixedFlow(); flow.setFixedFlow(new String[] { "reader", "analyzer" }); AnalysisEngineDescription aggregate = factory.createAnalysisEngineDescription(); aggregate.getMetaData().setName("aggregate"); aggregate.getAnalysisEngineMetaData().setFlowConstraints(flow); aggregate.getAnalysisEngineMetaData().getOperationalProperties().setOutputsNewCASes(true); aggregate.getAnalysisEngineMetaData().getOperationalProperties() .setMultipleDeploymentAllowed(false); aggregate.setPrimitive(false); aggregate.getDelegateAnalysisEngineSpecifiersWithImports().put("reader", reader); aggregate.getDelegateAnalysisEngineSpecifiersWithImports().put("analyzer", analyzer); AnalysisEngine pipeline = UIMAFramework.produceAnalysisEngine(aggregate); CasIterator iterator = pipeline.processAndOutputNewCASes(pipeline.newCAS()); while (iterator.hasNext()) { CAS cas = iterator.next(); System.out.printf("[%s] is [%s]%n", cas.getDocumentText(), cas.getDocumentLanguage()); } }
Example #30
Source File: TeiReaderTest.java From webanno with Apache License 2.0 | 5 votes |
@Test @Ignore("No TEI yet to opensource ") public void testTeiReader() throws Exception { CollectionReaderDescription reader = createReaderDescription(TeiReader.class, TeiReader.PARAM_LANGUAGE, "en", TeiReader.PARAM_SOURCE_LOCATION, "classpath:/local/", TeiReader.PARAM_PATTERNS, new String[] { "[+]*.xml" }); String firstSentence = "70 I DAG."; for (JCas jcas : new JCasIterable(reader)) { DocumentMetaData meta = DocumentMetaData.get(jcas); String text = jcas.getDocumentText(); System.out.printf("%s - %d%n", meta.getDocumentId(), text.length()); System.out.println(jcas.getDocumentLanguage()); assertEquals(2235, JCasUtil.select(jcas, Token.class).size()); assertEquals(745, JCasUtil.select(jcas, POS.class).size()); assertEquals(745, JCasUtil.select(jcas, Lemma.class).size()); assertEquals(0, JCasUtil.select(jcas, NamedEntity.class).size()); assertEquals(30, JCasUtil.select(jcas, Sentence.class).size()); assertEquals(firstSentence, JCasUtil.select(jcas, Sentence.class).iterator().next() .getCoveredText()); } }