Java Code Examples for org.apache.uima.fit.factory.CollectionReaderFactory#createReader()
The following examples show how to use
org.apache.uima.fit.factory.CollectionReaderFactory#createReader() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: BioNLPGeniaEventsCollectionReaderTest.java From bluima with Apache License 2.0 | 6 votes |
@Test public void test() throws Exception { CollectionReader cr = CollectionReaderFactory.createReader( BioNLPGeniaEventsCollectionReader.class); int i = 0; while (cr.hasNext()) { CAS cas = CasCreationUtils.createCas(cr .getProcessingResourceMetaData()); cr.getNext(cas); // if (createHtml) // viewer.createHtml(cas.getJCas(), cas.getTypeSystem(), // styleMapFile, new File("target/" + i)); i++; } cr.close(); assertEquals(259, i); }
Example 2
Source File: BioNLPGeniaEventsReaderTest.java From bluima with Apache License 2.0 | 6 votes |
@Test public void testCount() throws Exception { CollectionReader cr = CollectionReaderFactory.createReader( BioNLPGeniaEventsCollectionReader.class, BlueUima.PARAM_INPUT_DIRECTORY, TEST_DIR); int i = 0; while (cr.hasNext()) { CAS cas = CasCreationUtils.createCas(cr .getProcessingResourceMetaData()); cr.getNext(cas); LOG.debug(To.string("cas nr " + i, cas.getJCas())); i++; } cr.close(); assertEquals(3, i); }
Example 3
Source File: BaleenCollectionReaderTest.java From baleen with Apache License 2.0 | 6 votes |
@Test public void testHasNextLooping() throws Exception { ExternalResourceDescription contentExtractor = ExternalResourceFactory.createNamedResourceDescription( KEY_CONTENT_EXTRACTOR, FakeBaleenContentExtractor.class); DummyBaleenCollectionReader cr = (DummyBaleenCollectionReader) CollectionReaderFactory.createReader( DummyBaleenCollectionReader.class, BaleenCollectionReader.KEY_CONTENT_EXTRACTOR, contentExtractor); while (cr.hasNext()) { JCas jCas = JCasSingleton.getJCasInstance(); cr.getNext(jCas.getCas()); } cr.destroy(); }
Example 4
Source File: Biocreative2GeneCollectionReaderTest.java From bluima with Apache License 2.0 | 6 votes |
@Test public void testTestCorpus() throws Exception { CollectionReader cr = CollectionReaderFactory.createReader( Biocreative2GeneCollectionReader.class, BlueUima.PARAM_MODE, "test"); CAS cas = CasCreationUtils .createCas(cr.getProcessingResourceMetaData()); cr.getNext(cas); Collection<BioEntityMention> genes = JCasUtil.select(cas.getJCas(), BioEntityMention.class); assertEquals(2, genes.size()); cr.close(); }
Example 5
Source File: ActiveMQTransportsTest.java From baleen with Apache License 2.0 | 5 votes |
private BaleenCollectionReader createReciever() throws ResourceInitializationException { return (BaleenCollectionReader) CollectionReaderFactory.createReader( ActiveMQTransportReceiver.class, TypeSystemSingleton.getTypeSystemDescriptionInstance(), SharedActiveMQResource.RESOURCE_KEY, mqerd, KEY_CONTENT_EXTRACTOR, ceerd); }
Example 6
Source File: PubmedArchiveCollectionReader.java From bluima with Apache License 2.0 | 5 votes |
public static CollectionReader getCR(String path) throws ResourceInitializationException, FileNotFoundException { File testcaseDir = ResourceHelper.getFile(path); return CollectionReaderFactory.createReader( PubmedArchiveCollectionReader.class, BlueUima.PARAM_INPUT_DIRECTORY, testcaseDir.getAbsolutePath()); }
Example 7
Source File: PubmedWebServiceCollectionReader.java From bluima with Apache License 2.0 | 5 votes |
public static CollectionReader getCR(String query, int nrResults) throws ResourceInitializationException { return CollectionReaderFactory.createReader( PubmedWebServiceCollectionReader.class, BlueUima.PARAM_MAX_NR_RESULTS, nrResults,// BlueUima.PARAM_QUERY, query); }
Example 8
Source File: RedisTransportsTest.java From baleen with Apache License 2.0 | 5 votes |
private BaleenCollectionReader createReciever() throws ResourceInitializationException { return (BaleenCollectionReader) CollectionReaderFactory.createReader( RedisTransportReceiver.class, TypeSystemSingleton.getTypeSystemDescriptionInstance(), SharedRedisResource.RESOURCE_KEY, erd, KEY_CONTENT_EXTRACTOR, ExternalResourceFactory.createNamedResourceDescription( KEY_CONTENT_EXTRACTOR, FakeBaleenContentExtractor.class)); }
Example 9
Source File: RabbitMQTransportsTest.java From baleen with Apache License 2.0 | 5 votes |
private BaleenCollectionReader createReciever() throws ResourceInitializationException { return (BaleenCollectionReader) CollectionReaderFactory.createReader( RabbitMQTransportReceiver.class, TypeSystemSingleton.getTypeSystemDescriptionInstance(), SharedRabbitMQResource.RESOURCE_KEY, erd, KEY_CONTENT_EXTRACTOR, ExternalResourceFactory.createNamedResourceDescription( KEY_CONTENT_EXTRACTOR, FakeBaleenContentExtractor.class)); }
Example 10
Source File: KafkaTransportsTest.java From baleen with Apache License 2.0 | 5 votes |
private BaleenCollectionReader createReciever() throws ResourceInitializationException { return (BaleenCollectionReader) CollectionReaderFactory.createReader( KafkaTransportReceiver.class, TypeSystemSingleton.getTypeSystemDescriptionInstance(), SharedKafkaResource.RESOURCE_KEY, erd, KEY_CONTENT_EXTRACTOR, ExternalResourceFactory.createNamedResourceDescription( KEY_CONTENT_EXTRACTOR, FakeBaleenContentExtractor.class)); }
Example 11
Source File: MemoryTransportsTest.java From baleen with Apache License 2.0 | 5 votes |
private MemoryTransportReceiver createReciever() throws ResourceInitializationException { return (MemoryTransportReceiver) CollectionReaderFactory.createReader( MemoryTransportReceiver.class, TypeSystemSingleton.getTypeSystemDescriptionInstance(), KEY_CONTENT_EXTRACTOR, ExternalResourceFactory.createNamedResourceDescription( KEY_CONTENT_EXTRACTOR, FakeBaleenContentExtractor.class), SharedMemoryQueueResource.RESOURCE_KEY, erd); }
Example 12
Source File: AbstractReaderTest.java From baleen with Apache License 2.0 | 5 votes |
protected BaleenCollectionReader getCollectionReader(Object... args) throws ResourceInitializationException { Object[] argumentWithExtractor = ImmutableList.builder() .add(KEY_CONTENT_EXTRACTOR) .add(contentExtractor) .addAll(Arrays.asList(args)) .build() .toArray(); return (BaleenCollectionReader) CollectionReaderFactory.createReader( readerClass, TypeSystemSingleton.getTypeSystemDescriptionInstance(), argumentWithExtractor); }
Example 13
Source File: PdfAnnoRendererTest.java From inception with Apache License 2.0 | 5 votes |
/** * Tests if anno file is correctly rendered for a given document */ @Test public void testRender() throws Exception { String file = "src/test/resources/tcf04-karin-wl.xml"; String pdftxt = new Scanner( new File("src/test/resources/rendererTestPdfExtract.txt")).useDelimiter("\\Z").next(); CAS cas = JCasFactory.createJCas().getCas(); CollectionReader reader = CollectionReaderFactory.createReader(TcfReader.class, TcfReader.PARAM_SOURCE_LOCATION, file); reader.getNext(cas); AnnotatorState state = new AnnotatorStateImpl(Mode.ANNOTATION); state.setPagingStrategy(new SentenceOrientedPagingStrategy()); state.getPreferences().setWindowSize(10); state.setProject(project); VDocument vdoc = new VDocument(); preRenderer.render(vdoc, 0, cas.getDocumentText().length(), cas, schemaService.listAnnotationLayer(project)); PdfExtractFile pdfExtractFile = new PdfExtractFile(pdftxt, new HashMap<>()); PdfAnnoRenderer renderer = new PdfAnnoRenderer(schemaService, new ColoringServiceImpl(schemaService)); PdfAnnoModel annoFile = renderer.render(state, vdoc, cas.getDocumentText(), pdfExtractFile, 0); assertThat(annoFile.getAnnoFileContent()) .isEqualToNormalizingNewlines(contentOf( new File("src/test/resources/rendererTestAnnoFile.anno"), UTF_8)); }
Example 14
Source File: AbstractSchedulerTest.java From baleen with Apache License 2.0 | 4 votes |
public T create(Object... args) throws ResourceInitializationException { return (T) CollectionReaderFactory.createReader(clazz, args); }
Example 15
Source File: RunExperiment.java From uima-uimafit with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws UIMAException, IOException { // Choosing different location depending on whether we are in the actual uimaFIT source tree // or in the extracted examples from the binary distribution. String samplePosFileName; if (new File("src/main/resources").exists()) { samplePosFileName = "src/main/resources/org/apache/uima/fit/examples/pos/sample-gold.txt"; } else { samplePosFileName = "src/org/apache/uima/fit/examples/pos/sample-gold.txt"; } // The lineReader simply copies the lines from the input file into the // default view - one line per CAS CollectionReader lineReader = CollectionReaderFactory.createReader(LineReader.class, LineReader.PARAM_INPUT_FILE, samplePosFileName); AggregateBuilder builder = new AggregateBuilder(); // The goldTagger parses the data in the default view into Token objects // along with their part-of-speech tags which will be added to the // GOLD_VIEW AnalysisEngineDescription goldTagger = AnalysisEngineFactory.createEngineDescription( GoldTagger.class); builder.add(goldTagger); // The textCopier creates the SYSTEM_VIEW and set the text of this view // to that of the text found in GOLD_VIEW AnalysisEngineDescription textCopier = AnalysisEngineFactory.createEngineDescription( ViewTextCopierAnnotator.class, ViewTextCopierAnnotator.PARAM_SOURCE_VIEW_NAME, ViewNames.GOLD_VIEW, ViewTextCopierAnnotator.PARAM_DESTINATION_VIEW_NAME, ViewNames.SYSTEM_VIEW); builder.add(textCopier); // The sentenceAndTokenCopier copies Token and Sentence annotations in // the GOLD_VIEW into the SYSTEM_VIEW AnalysisEngineDescription sentenceAndTokenCopier = AnalysisEngineFactory .createEngineDescription(SentenceAndTokenCopier.class); builder.add(sentenceAndTokenCopier, ViewNames.VIEW1, ViewNames.GOLD_VIEW, ViewNames.VIEW2, ViewNames.SYSTEM_VIEW); // The baselineTagger is run on the SYSTEM_VIEW AnalysisEngineDescription baselineTagger = AnalysisEngineFactory.createEngineDescription( BaselineTagger.class); builder.add(baselineTagger, CAS.NAME_DEFAULT_SOFA, ViewNames.SYSTEM_VIEW); // The evaluator will compare the part-of-speech tags in the SYSTEM_VIEW // with those in the GOLD_VIEW AnalysisEngineDescription evaluator = AnalysisEngineFactory.createEngineDescription( Evaluator.class); builder.add(evaluator); // The xWriter writes out the contents of each CAS (one per sentence) to // an XMI file. It is instructive to open one of these // XMI files in the CAS Visual Debugger and look at the contents of each // view. AnalysisEngineDescription xWriter = AnalysisEngineFactory.createEngineDescription( XmiWriter.class, XmiWriter.PARAM_OUTPUT_DIRECTORY, "target/examples/pos/xmi"); builder.add(xWriter); // runs the collection reader and the aggregate AE. SimplePipeline.runPipeline(lineReader, builder.createAggregate()); }
Example 16
Source File: AbstractSchedulerTest.java From baleen with Apache License 2.0 | 4 votes |
public T create() throws ResourceInitializationException { return (T) CollectionReaderFactory.createReader(clazz); }
Example 17
Source File: WebAnnoTsv3ReaderWriterTest.java From webanno with Apache License 2.0 | 4 votes |
@Test public void test() throws Exception { String targetFolder = "target/test-output/" + testContext.getTestOutputFolderName(); CollectionReader reader = CollectionReaderFactory.createReader( WebannoTsv3Reader.class, WebannoTsv3Reader.PARAM_SOURCE_LOCATION, "src/test/resources/tsv3/", WebannoTsv3Reader.PARAM_PATTERNS, "coref.tsv"); List<String> slotFeatures = new ArrayList<>(); List<String> slotTargets = new ArrayList<>(); List<String> linkTypes = new ArrayList<>(); List<String> spanLayers = new ArrayList<>(); spanLayers.add(NamedEntity.class.getName()); spanLayers.add(POS.class.getName()); spanLayers.add(Lemma.class.getName()); List<String> chainLayers = new ArrayList<>(); chainLayers.add("de.tudarmstadt.ukp.dkpro.core.api.coref.type.Coreference"); List<String> relationLayers = new ArrayList<>(); relationLayers.add(Dependency.class.getName()); AnalysisEngineDescription writer = createEngineDescription( WebannoTsv3Writer.class, WebannoTsv3Writer.PARAM_TARGET_LOCATION, targetFolder, WebannoTsv3Writer.PARAM_STRIP_EXTENSION, true, WebannoTsv3Writer.PARAM_OVERWRITE, true, WebannoTsv3Writer.PARAM_SPAN_LAYERS, spanLayers, WebannoTsv3Writer.PARAM_SLOT_FEATS, slotFeatures, WebannoTsv3Writer.PARAM_SLOT_TARGETS, slotTargets, WebannoTsv3Writer.PARAM_LINK_TYPES, linkTypes, WebannoTsv3Writer.PARAM_CHAIN_LAYERS, chainLayers, WebannoTsv3Writer.PARAM_RELATION_LAYERS, relationLayers); runPipeline(reader, writer); CollectionReader reader1 = CollectionReaderFactory.createReader( WebannoTsv3Reader.class, WebannoTsv3Reader.PARAM_SOURCE_LOCATION, "src/test/resources/tsv3/", WebannoTsv3Reader.PARAM_PATTERNS, "coref.tsv"); CollectionReader reader2 = CollectionReaderFactory.createReader( WebannoTsv3Reader.class, WebannoTsv3Reader.PARAM_SOURCE_LOCATION, targetFolder, WebannoTsv3Reader.PARAM_PATTERNS, "coref.tsv"); CAS cas1 = JCasFactory.createJCas().getCas(); reader1.getNext(cas1); CAS cas2 = JCasFactory.createJCas().getCas(); reader2.getNext(cas2); assertEquals(JCasUtil.select(cas2.getJCas(), Token.class).size(), JCasUtil.select(cas1.getJCas(), Token.class).size()); assertEquals(JCasUtil.select(cas2.getJCas(), POS.class).size(), JCasUtil.select(cas1.getJCas(), POS.class).size()); assertEquals(JCasUtil.select(cas2.getJCas(), Lemma.class).size(), JCasUtil.select(cas1.getJCas(), Lemma.class).size()); assertEquals(JCasUtil.select(cas2.getJCas(), NamedEntity.class).size(), JCasUtil.select(cas1.getJCas(), NamedEntity.class).size()); assertEquals(JCasUtil.select(cas2.getJCas(), Sentence.class).size(), JCasUtil.select(cas1.getJCas(), Sentence.class).size()); assertEquals(JCasUtil.select(cas2.getJCas(), Dependency.class).size(), JCasUtil.select(cas1.getJCas(), Dependency.class).size()); }
Example 18
Source File: WebAnnoTsv3XReaderWriterTest.java From webanno with Apache License 2.0 | 4 votes |
@Test public void test() throws Exception { String targetFolder = "target/test-output/" + testContext.getTestOutputFolderName(); CollectionReader reader = CollectionReaderFactory.createReader( WebannoTsv3XReader.class, WebannoTsv3XReader.PARAM_SOURCE_LOCATION, "src/test/resources/tsv3/", WebannoTsv3XReader.PARAM_PATTERNS, "coref.tsv"); AnalysisEngineDescription writer = createEngineDescription( WebannoTsv3XWriter.class, WebannoTsv3XWriter.PARAM_TARGET_LOCATION, targetFolder, WebannoTsv3XWriter.PARAM_STRIP_EXTENSION, true, WebannoTsv3XWriter.PARAM_OVERWRITE, true); runPipeline(reader, writer); CollectionReader reader1 = CollectionReaderFactory.createReader( WebannoTsv3XReader.class, WebannoTsv3XReader.PARAM_SOURCE_LOCATION, "src/test/resources/tsv3/", WebannoTsv3XReader.PARAM_PATTERNS, "coref.tsv"); CollectionReader reader2 = CollectionReaderFactory.createReader( WebannoTsv3XReader.class, WebannoTsv3XReader.PARAM_SOURCE_LOCATION, targetFolder, WebannoTsv3XReader.PARAM_PATTERNS, "coref.tsv"); CAS cas1 = JCasFactory.createJCas().getCas(); reader1.getNext(cas1); CAS cas2 = JCasFactory.createJCas().getCas(); reader2.getNext(cas2); assertEquals(JCasUtil.select(cas2.getJCas(), Token.class).size(), JCasUtil.select(cas1.getJCas(), Token.class).size()); assertEquals(JCasUtil.select(cas2.getJCas(), POS.class).size(), JCasUtil.select(cas1.getJCas(), POS.class).size()); assertEquals(JCasUtil.select(cas2.getJCas(), Lemma.class).size(), JCasUtil.select(cas1.getJCas(), Lemma.class).size()); assertEquals(JCasUtil.select(cas2.getJCas(), NamedEntity.class).size(), JCasUtil.select(cas1.getJCas(), NamedEntity.class).size()); assertEquals(JCasUtil.select(cas2.getJCas(), Sentence.class).size(), JCasUtil.select(cas1.getJCas(), Sentence.class).size()); assertEquals(JCasUtil.select(cas2.getJCas(), Dependency.class).size(), JCasUtil.select(cas1.getJCas(), Dependency.class).size()); }
Example 19
Source File: Biocreative2GeneCollectionReaderTest.java From bluima with Apache License 2.0 | 4 votes |
/** * <pre> * GENE.eval * P00001606T0076|14 33|alkaline phosphatases * P00001606T0076|37 50|5-nucleotidase * * train.in: * P00001606T0076 Comparison with alkaline phosphatases and 5-nucleotidase * </pre> */ @Test public void testTrainCorpus() throws Exception { CollectionReader cr = CollectionReaderFactory.createReader( Biocreative2GeneCollectionReader.class, BlueUima.PARAM_MODE, "train"); CAS cas = CasCreationUtils .createCas(cr.getProcessingResourceMetaData()); cr.getNext(cas); Collection<BioEntityMention> genes = JCasUtil.select(cas.getJCas(), BioEntityMention.class); assertEquals(2, genes.size()); Iterator<BioEntityMention> iterator = genes.iterator(); BioEntityMention gene = iterator.next(); assertEquals("alkaline phosphatases", gene.getCoveredText()); gene = iterator.next(); assertEquals("5-nucleotidase", gene.getCoveredText()); // move to 'P00027739T0000 Serum gamma glutamyltransferase in the // diagnosis of liver disease in cattle.' to test ALTGENE annotations for (int i = 0; i < 11; i++) { cas = CasCreationUtils .createCas(cr.getProcessingResourceMetaData()); cr.hasNext(); cr.getNext(cas); Header header = JCasUtil.selectSingle(cas.getJCas(), Header.class); LOG.debug("docid:{}, text:{}", header.getDocId(), cas.getDocumentText()); } genes = JCasUtil.select(cas.getJCas(), BioEntityMention.class); iterator = genes.iterator(); // check the 2 alternate forms assertEquals(2, genes.size()); gene = iterator.next(); LOG.debug(gene.getCoveredText()); assertEquals("Serum gamma glutamyltransferase", gene.getCoveredText()); gene = iterator.next(); LOG.debug(gene.getCoveredText()); assertEquals("gamma glutamyltransferase", gene.getCoveredText()); }
Example 20
Source File: PdfAnnoRendererTest.java From inception with Apache License 2.0 | 4 votes |
/** * Tests if given offsets for PDFAnno can be converted to offsets for the document in INCEpTION */ @Test public void testConvertToDocumentOffset() throws Exception { String file = "src/test/resources/tcf04-karin-wl.xml"; String pdftxt = new Scanner( new File("src/test/resources/rendererTestPdfExtract.txt")).useDelimiter("\\Z").next(); PdfExtractFile pdfExtractFile = new PdfExtractFile(pdftxt, new HashMap<>()); CAS cas = JCasFactory.createJCas().getCas(); CollectionReader reader = CollectionReaderFactory.createReader(TcfReader.class, TcfReader.PARAM_SOURCE_LOCATION, file); reader.getNext(cas); AnnotatorState state = new AnnotatorStateImpl(Mode.ANNOTATION); state.setPagingStrategy(new SentenceOrientedPagingStrategy()); state.getPreferences().setWindowSize(10); state.setProject(project); DocumentModel documentModel = new DocumentModel(cas.getDocumentText()); // List of PDFAnno offsets // indices represent line numbers in the PDFExtractFile for the according character List<Offset> offsets = new ArrayList<>(); offsets.add(new Offset(3, 3)); offsets.add(new Offset(3, 4)); offsets.add(new Offset(3, 5)); offsets.add(new Offset(3, 6)); offsets.add(new Offset(3, 7)); offsets.add(new Offset(3, 8)); offsets.add(new Offset(6, 8)); offsets.add(new Offset(7, 7)); offsets.add(new Offset(7, 8)); offsets.add(new Offset(8, 8)); offsets.add(new Offset(8, 13)); offsets.add(new Offset(28, 28)); offsets.add(new Offset(28, 30)); offsets.add(new Offset(35, 38)); // convert to offests for document in INCEpTION List<Offset> docOffsets = PdfAnnoRenderer.convertToDocumentOffsets(offsets, documentModel, pdfExtractFile); List<Offset> expectedOffsets = new ArrayList<>(); expectedOffsets.add(new Offset(0, 0)); expectedOffsets.add(new Offset(0, 1)); expectedOffsets.add(new Offset(0, 2)); expectedOffsets.add(new Offset(0, 3)); expectedOffsets.add(new Offset(0, 4)); expectedOffsets.add(new Offset(0, 6)); expectedOffsets.add(new Offset(3, 6)); expectedOffsets.add(new Offset(4, 4)); expectedOffsets.add(new Offset(4, 6)); expectedOffsets.add(new Offset(6, 6)); expectedOffsets.add(new Offset(6, 11)); expectedOffsets.add(new Offset(29, 29)); expectedOffsets.add(new Offset(29, 31)); expectedOffsets.add(new Offset(38, 41)); assertThat(docOffsets).isEqualTo(expectedOffsets); }