org.apache.uima.collection.CollectionReader Java Examples
The following examples show how to use
org.apache.uima.collection.CollectionReader.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: CurationTestUtils.java From webanno with Apache License 2.0 | 6 votes |
public static CAS readXMI(String aPath, TypeSystemDescription aType) throws UIMAException, IOException { CollectionReader reader = createReader(XmiReader.class, XmiReader.PARAM_SOURCE_LOCATION, "src/test/resources/" + aPath); CAS jcas; if (aType != null) { TypeSystemDescription builtInTypes = TypeSystemDescriptionFactory .createTypeSystemDescription(); List<TypeSystemDescription> allTypes = new ArrayList<>(); allTypes.add(builtInTypes); allTypes.add(aType); jcas = JCasFactory.createJCas(CasCreationUtils.mergeTypeSystems(allTypes)).getCas(); } else { jcas = JCasFactory.createJCas().getCas(); } reader.getNext(jcas); return jcas; }
Example #2
Source File: BioNLPGeniaEventsCollectionReaderTest.java From bluima with Apache License 2.0 | 6 votes |
@Test public void test() throws Exception { CollectionReader cr = CollectionReaderFactory.createReader( BioNLPGeniaEventsCollectionReader.class); int i = 0; while (cr.hasNext()) { CAS cas = CasCreationUtils.createCas(cr .getProcessingResourceMetaData()); cr.getNext(cas); // if (createHtml) // viewer.createHtml(cas.getJCas(), cas.getTypeSystem(), // styleMapFile, new File("target/" + i)); i++; } cr.close(); assertEquals(259, i); }
Example #3
Source File: LineOrientedTextReaderTest.java From webanno with Apache License 2.0 | 6 votes |
@Test public void test() throws Exception { JCas doc = JCasFactory.createJCas(); CollectionReader reader = createReader(LineOrientedTextReader.class, LineOrientedTextReader.PARAM_SOURCE_LOCATION, "LICENSE.txt"); reader.getNext(doc.getCas()); // select(doc, Sentence.class).forEach(s -> System.out.println(s.getCoveredText())); assertEquals(169, select(doc, Sentence.class).size()); assertEquals(0, select(doc, Token.class).size()); }
Example #4
Source File: BaleenPipeline.java From baleen with Apache License 2.0 | 6 votes |
/** * Constructor * * @param name Pipeline name * @param originalYaml The original YAML string that was used to build the pipeline * @param orderer The IPipelineOrderer to use to order the pipeline * @param collectionReader The collection reader * @param annotators The annotators to be ordered and used * @param consumers The consumers to be ordered and used * @throws IOException if error reading config * @deprecated Use {@link BaleenPipeline#BaleenPipeline(String, PipelineConfiguration, * IPipelineOrderer, CollectionReader, List, List)} */ @Deprecated public BaleenPipeline( String name, String originalYaml, IPipelineOrderer orderer, CollectionReader collectionReader, List<AnalysisEngine> annotators, List<AnalysisEngine> consumers) throws IOException { this( name, new YamlPipelineConfiguration(originalYaml), orderer, collectionReader, annotators, consumers); }
Example #5
Source File: BioNLPGeniaEventsReaderTest.java From bluima with Apache License 2.0 | 6 votes |
@Test public void testCount() throws Exception { CollectionReader cr = CollectionReaderFactory.createReader( BioNLPGeniaEventsCollectionReader.class, BlueUima.PARAM_INPUT_DIRECTORY, TEST_DIR); int i = 0; while (cr.hasNext()) { CAS cas = CasCreationUtils.createCas(cr .getProcessingResourceMetaData()); cr.getNext(cas); LOG.debug(To.string("cas nr " + i, cas.getJCas())); i++; } cr.close(); assertEquals(3, i); }
Example #6
Source File: FormatSupportDescription.java From webanno with Apache License 2.0 | 6 votes |
@SuppressWarnings("unchecked") @Override public CollectionReaderDescription getReaderDescription(TypeSystemDescription aTSD) throws ResourceInitializationException { if (!isReadable()) { throw new UnsupportedOperationException("The format [" + getName() + "] cannot be read"); } Class<? extends CollectionReader> readerClazz; try { readerClazz = (Class<? extends CollectionReader>) Class.forName(readerClass); } catch (ClassNotFoundException e) { throw new ResourceInitializationException(e); } return createReaderDescription(readerClazz, aTSD); }
Example #7
Source File: PubmedDatabaseCRTest.java From bluima with Apache License 2.0 | 6 votes |
@Test public void testAuthors() throws Exception { // http://www.ncbi.nlm.nih.gov/pubmed/?term=1&report=xml&format=text CollectionReader cr = createReader(PubmedDatabaseCR.class, BlueUima.PARAM_BETWEEN, new int[] { 0, 1 }, BlueUima.PARAM_SKIP_EMPTY_DOCS, false); String[] lastNames = { "Makar", "McMartin", "Palese", "Tephly" }; String[] foreNames = { "A B", "K E", "M", "T R" }; // AB___A B___Makar__-__KE___K // E___McMartin__-__M___M___Palese__-__TR___T R___Tephly for (JCas jCas : asList(cr)) { Header header = JCasUtil.selectSingle(jCas, Header.class); FSArray authors = header.getAuthors(); for (int i = 0; i < authors.size(); i++) { AuthorInfo a = (AuthorInfo) authors.get(i); assertEquals(foreNames[i], a.getForeName()); assertEquals(lastNames[i], a.getLastName()); } assertEquals("1976-01-16", header.getCopyright()); } }
Example #8
Source File: NamedEntityLinkerTest.java From inception with Apache License 2.0 | 6 votes |
private List<CAS> loadData(Dataset ds, File ... files) throws UIMAException, IOException { CollectionReader reader = createReader( Conll2002Reader.class, Conll2002Reader.PARAM_PATTERNS, files, Conll2002Reader.PARAM_LANGUAGE, ds.getLanguage(), Conll2002Reader.PARAM_COLUMN_SEPARATOR, Conll2002Reader.ColumnSeparators.TAB.getName(), Conll2002Reader.PARAM_HAS_TOKEN_NUMBER, true, Conll2002Reader.PARAM_HAS_HEADER, true, Conll2002Reader.PARAM_HAS_EMBEDDED_NAMED_ENTITY, true); List<CAS> casList = new ArrayList<>(); while (reader.hasNext()) { JCas cas = JCasFactory.createJCas(); reader.getNext(cas.getCas()); casList.add(cas.getCas()); } return casList; }
Example #9
Source File: DL4JSequenceRecommenderTest.java From inception with Apache License 2.0 | 6 votes |
private JCas loadNerDevelopmentData() throws IOException, UIMAException { Dataset ds = loader.load("germeval2014-de", CONTINUE); CollectionReader reader = createReader(Conll2002Reader.class, Conll2002Reader.PARAM_PATTERNS, ds.getDefaultSplit().getDevelopmentFiles(), Conll2002Reader.PARAM_LANGUAGE, ds.getLanguage(), Conll2002Reader.PARAM_COLUMN_SEPARATOR, Conll2002Reader.ColumnSeparators.TAB.getName(), Conll2002Reader.PARAM_HAS_TOKEN_NUMBER, true, Conll2002Reader.PARAM_HAS_HEADER, true, Conll2002Reader.PARAM_HAS_EMBEDDED_NAMED_ENTITY, true); JCas cas = JCasFactory.createJCas(); reader.getNext(cas.getCas()); return cas; }
Example #10
Source File: XmiCollectionReaderTest.java From bluima with Apache License 2.0 | 6 votes |
@Test public void testSerializeDeserializeXmi() throws Exception { // serialize CollectionReader cr = createReader(TextArrayReader.class, PARAM_INPUT, new String[] { "this is a test" }); AnalysisEngine serializer = createEngine(XWriter.class, PARAM_OUTPUT_DIR, testDir + "/serdeser"); runPipeline(cr, serializer); // deserialize cr = createReader(XCollectionReader.class, PARAM_INPUT_DIRECTORY, testDir + "/serdeser"); List<JCas> deserializedCases = asList(cr); assertEquals(1, deserializedCases.size()); assertEquals("this is a test", deserializedCases.get(0) .getDocumentText()); }
Example #11
Source File: ExternalRecommenderIntegrationTest.java From inception with Apache License 2.0 | 6 votes |
private List<CAS> loadData(Dataset ds, File ... files) throws UIMAException, IOException { CollectionReader reader = createReader(Conll2002Reader.class, Conll2002Reader.PARAM_PATTERNS, files, Conll2002Reader.PARAM_LANGUAGE, ds.getLanguage(), Conll2002Reader.PARAM_COLUMN_SEPARATOR, Conll2002Reader.ColumnSeparators.TAB.getName(), Conll2002Reader.PARAM_HAS_TOKEN_NUMBER, true, Conll2002Reader.PARAM_HAS_HEADER, true, Conll2002Reader.PARAM_HAS_EMBEDDED_NAMED_ENTITY, true); List<CAS> casList = new ArrayList<>(); while (reader.hasNext()) { // Add the CasMetadata type to the CAS List<TypeSystemDescription> typeSystems = new ArrayList<>(); typeSystems.add(createTypeSystemDescription()); typeSystems.add(CasMetadataUtils.getInternalTypeSystem()); JCas cas = JCasFactory.createJCas(mergeTypeSystems(typeSystems)); reader.getNext(cas.getCas()); casList.add(cas.getCas()); } return casList; }
Example #12
Source File: DiffTestUtils.java From webanno with Apache License 2.0 | 6 votes |
public static JCas readWebAnnoTSV(String aPath, TypeSystemDescription aType) throws UIMAException, IOException { CollectionReader reader = createReader(WebannoTsv2Reader.class, WebannoTsv2Reader.PARAM_SOURCE_LOCATION, "src/test/resources/" + aPath); JCas jcas; if (aType != null) { TypeSystemDescription builtInTypes = TypeSystemDescriptionFactory .createTypeSystemDescription(); List<TypeSystemDescription> allTypes = new ArrayList<>(); allTypes.add(builtInTypes); allTypes.add(aType); jcas = JCasFactory.createJCas(CasCreationUtils.mergeTypeSystems(allTypes)); } else { jcas = JCasFactory.createJCas(); } reader.getNext(jcas.getCas()); return jcas; }
Example #13
Source File: DiffTestUtils.java From webanno with Apache License 2.0 | 6 votes |
public static JCas readXMI(String aPath, TypeSystemDescription aType) throws UIMAException, IOException { CollectionReader reader = createReader(XmiReader.class, XmiReader.PARAM_SOURCE_LOCATION, "src/test/resources/" + aPath); JCas jcas; if (aType != null) { TypeSystemDescription builtInTypes = TypeSystemDescriptionFactory .createTypeSystemDescription(); List<TypeSystemDescription> allTypes = new ArrayList<>(); allTypes.add(builtInTypes); allTypes.add(aType); jcas = JCasFactory.createJCas(CasCreationUtils.mergeTypeSystems(allTypes)); } else { jcas = JCasFactory.createJCas(); } reader.getNext(jcas.getCas()); return jcas; }
Example #14
Source File: OpenNlpDoccatRecommenderTest.java From inception with Apache License 2.0 | 6 votes |
private List<CAS> loadData(Dataset ds, File ... files) throws UIMAException, IOException { CollectionReader reader = createReader(Reader.class, Reader.PARAM_PATTERNS, files, Reader.PARAM_LANGUAGE, ds.getLanguage()); AnalysisEngine segmenter = createEngine(BreakIteratorSegmenter.class, BreakIteratorSegmenter.PARAM_WRITE_SENTENCE, false); List<CAS> casList = new ArrayList<>(); while (reader.hasNext()) { JCas cas = JCasFactory.createJCas(); reader.getNext(cas.getCas()); segmenter.process(cas); casList.add(cas.getCas()); } return casList; }
Example #15
Source File: StringMatchingRecommenderTest.java From inception with Apache License 2.0 | 6 votes |
private List<CAS> loadData(Dataset ds, File ... files) throws UIMAException, IOException { CollectionReader reader = createReader(Conll2002Reader.class, Conll2002Reader.PARAM_PATTERNS, files, Conll2002Reader.PARAM_LANGUAGE, ds.getLanguage(), Conll2002Reader.PARAM_COLUMN_SEPARATOR, Conll2002Reader.ColumnSeparators.TAB.getName(), Conll2002Reader.PARAM_HAS_TOKEN_NUMBER, true, Conll2002Reader.PARAM_HAS_HEADER, true, Conll2002Reader.PARAM_HAS_EMBEDDED_NAMED_ENTITY, true); List<CAS> casList = new ArrayList<>(); int n = 1; while (reader.hasNext()) { JCas cas = JCasFactory.createJCas(); reader.getNext(cas.getCas()); casList.add(cas.getCas()); casStorageSession.add("testDataCas" + n, EXCLUSIVE_WRITE_ACCESS, cas.getCas()); } return casList; }
Example #16
Source File: PubmedCentralCollectionReaderTest.java From bluima with Apache License 2.0 | 6 votes |
@Test @Ignore // FIXME public void testCount() throws Exception { CollectionReader cr = PubmedCentralCollectionReader.getCR("pmc_test_archive"); int i = 0; while (cr.hasNext()) { CAS cas = CasCreationUtils.createCas(cr .getProcessingResourceMetaData()); cr.getNext(cas); i++; } cr.close(); assertEquals(6, i); }
Example #17
Source File: JobBuilderTest.java From baleen with Apache License 2.0 | 5 votes |
@Test public void testValid2() throws Exception { String yaml = Files.asCharSource(getFile("jobConfig2.yaml"), Charset.defaultCharset()).read(); JobBuilder jb = new JobBuilder("Test Job", new YamlPipelineConfiguration(yaml)); BaleenJob job = (BaleenJob) jb.createNewPipeline(); assertEquals("Test Job", job.getName()); assertEquals(yaml, job.originalConfig()); assertEquals(yaml, job.orderedConfig()); CollectionReader cr = job.collectionReader(); assertEquals(BaleenDefaults.DEFAULT_SCHEDULER, cr.getMetaData().getName()); assertEquals("Foo", cr.getConfigParameterValue("key")); List<AnalysisEngine> annotators = job.annotators(); assertEquals(2, annotators.size()); AnalysisEngine ann0 = annotators.get(0); assertEquals("uk.gov.dstl.baleen.testing.DummyTask", ann0.getMetaData().getName()); assertEquals("Foo", ann0.getConfigParameterValue("key")); AnalysisEngine ann1 = annotators.get(1); assertEquals("uk.gov.dstl.baleen.testing.DummyTaskParams", ann1.getMetaData().getName()); assertEquals("Bar", ann1.getConfigParameterValue("key")); List<AnalysisEngine> consumers = job.consumers(); assertEquals(0, consumers.size()); }
Example #18
Source File: DL4JSequenceRecommenderTest.java From inception with Apache License 2.0 | 5 votes |
private JCas loadPosDevelopmentData() throws IOException, UIMAException { Dataset ds = loader.load("conll2000-en", CONTINUE); CollectionReader reader = createReader(Conll2000Reader.class, Conll2000Reader.PARAM_PATTERNS, ds.getDefaultSplit().getTestFiles(), Conll2000Reader.PARAM_LANGUAGE, ds.getLanguage()); JCas cas = JCasFactory.createJCas(); reader.getNext(cas.getCas()); return cas; }
Example #19
Source File: PubmedDatabaseCRTest.java From bluima with Apache License 2.0 | 5 votes |
@Test public void testLimit10() throws Exception { CollectionReader cr = createReader(PubmedDatabaseCR.class, BlueUima.PARAM_BETWEEN, new int[] { 0, 9 }, BlueUima.PARAM_SKIP_EMPTY_DOCS, false); ArrayList<JCas> jCases = asList(cr); assertEquals(9, jCases.size()); for (JCas jCas : jCases) { int docId = getHeaderIntDocId(jCas); System.err.println(docId); } }
Example #20
Source File: LappsGridRecommenderConformityTest.java From inception with Apache License 2.0 | 5 votes |
private static CAS loadData(File aFile) throws UIMAException, IOException { CollectionReader reader = createReader(XmiReader.class, ConllUReader.PARAM_PATTERNS, aFile); List<CAS> casList = new ArrayList<>(); while (reader.hasNext()) { JCas cas = JCasFactory.createJCas(); reader.getNext(cas.getCas()); casList.add(cas.getCas()); } return casList.get(0); }
Example #21
Source File: ZipXmiCollectionReaderTest.java From bluima with Apache License 2.0 | 5 votes |
@Test public void testSerializeDeserializeXmi() throws Exception { CollectionReader cr = createReader(TextArrayReader.class, PARAM_INPUT, new String[] { "this is a test" }); AnalysisEngine serializer = createEngine(ZipXWriter.class, BlueUima.PARAM_OUTPUT_DIR, testDir + "/serdeser"); SimplePipeline.runPipeline(cr, serializer); cr = createReader(ZipXmiCollectionReader.class, PARAM_INPUT_DIRECTORY, testDir + "/serdeser"); CAS cas = createCas(cr.getProcessingResourceMetaData()); cr.getNext(cas); assertEquals("this is a test", cas.getDocumentText()); }
Example #22
Source File: SparkUimaUtils.java From ambiverse-nlu with Apache License 2.0 | 5 votes |
public static List<SCAS> readFrom(CollectionReader reader) throws IOException, UIMAException { List<SCAS> scasList = new ArrayList<>(); while (reader.hasNext()) { JCas jcas = JCasFactory.createJCas(); CAS cas = jcas.getCas(); reader.getNext(cas); scasList.add(new SCAS(cas)); } return scasList; }
Example #23
Source File: PdfCollectionReaderTest.java From bluima with Apache License 2.0 | 5 votes |
@Test @Ignore public void testOnSampleForSrikanth() throws Exception { CollectionReader cr = createReader(PdfCollectionReader.class, PARAM_INPUT_DIRECTORY, "pdf_srikanth"); AnalysisEngine dumper = createEngine(DocumentTextWriter.class, PARAM_OUTPUT_DIR, "/Users/richarde/Desktop/"); SimplePipeline.runPipeline(cr, dumper); }
Example #24
Source File: PdfCollectionReaderTest.java From bluima with Apache License 2.0 | 5 votes |
@Test @Ignore public void testOnSampleForExtractionQuality() throws Exception { CollectionReader cr = createReader( PdfCollectionReader.class, PARAM_INPUT_DIRECTORY, "/Users/richarde/data_hdd/_papers_etc/pubmed/sample_pdfs_68/pdfs", PARAM_EXPAND_ABBREVIATIONS, true); AnalysisEngine dumper = createEngine(DocumentTextWriter.class, PARAM_OUTPUT_DIR, "/Users/richarde/Desktop/"); runPipeline(cr, dumper); }
Example #25
Source File: PdfCollectionReaderTest.java From bluima with Apache License 2.0 | 5 votes |
@Test @Ignore public void testExtractTablesOnSample() throws Exception { CollectionReader cr = createReader(PdfCollectionReader.class, PARAM_INPUT_DIRECTORY, "/Users/richarde/data/_papers_etc/pmc_pdfs_sample"); AnalysisEngine dumper = createEngine(TableWriter.class); SimplePipeline.runPipeline(cr, dumper); }
Example #26
Source File: AgreementTestUtils.java From webanno with Apache License 2.0 | 5 votes |
public static CAS readConll2006(String aPath) throws UIMAException, IOException { CollectionReader reader = createReader(Conll2006Reader.class, Conll2006Reader.PARAM_SOURCE_LOCATION, "src/test/resources/" + aPath); CAS jcas = JCasFactory.createJCas().getCas(); reader.getNext(jcas); return jcas; }
Example #27
Source File: Txt2PubmedIdIndexer.java From bluima with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { // cr CollectionReader cr = CollectionReaderFactory.createReader( PubmedWholeDatabaseCR.class, PARAM_DB_CONNECTION, new String[] { "localhost", "bb_pubmed", "root", "" }); SimplePipeline.runPipeline( cr, createEngineDescription(MyIndexer.class), createEngineDescription(StatsAnnotatorPlus.class, PARAM_PRINT_EVERY, 50000)); }
Example #28
Source File: AgreementTestUtils.java From webanno with Apache License 2.0 | 5 votes |
public static JCas loadWebAnnoTsv3(File aPath) throws UIMAException, IOException { CollectionReader reader = createReader(WebannoTsv3XReader.class, WebannoTsv3XReader.PARAM_SOURCE_LOCATION, aPath); JCas jcas = JCasFactory.createJCas(); reader.getNext(jcas.getCas()); return jcas; }
Example #29
Source File: AgreementTestUtils.java From webanno with Apache License 2.0 | 5 votes |
public static CAS read(String aPath) throws UIMAException, IOException { CollectionReader reader = createReader(Conll2006Reader.class, Conll2006Reader.PARAM_SOURCE_LOCATION, "src/test/resources/" + aPath); CAS cas = JCasFactory.createJCas().getCas(); reader.getNext(cas); return cas; }
Example #30
Source File: AgreementTestUtils.java From webanno with Apache License 2.0 | 5 votes |
public static JCas loadWebAnnoTsv3(String aPath) throws UIMAException, IOException { CollectionReader reader = createReader(WebannoTsv3XReader.class, WebannoTsv3XReader.PARAM_SOURCE_LOCATION, "src/test/resources/" + aPath); JCas jcas = JCasFactory.createJCas(); reader.getNext(jcas.getCas()); return jcas; }