Java Code Examples for org.apache.uima.collection.CollectionReader#getNext()

The following examples show how to use org.apache.uima.collection.CollectionReader#getNext() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: BioNLPGeniaEventsCollectionReaderTest.java    From bluima with Apache License 2.0 6 votes vote down vote up
@Test
   public void test() throws Exception {

CollectionReader cr = CollectionReaderFactory.createReader(
	BioNLPGeniaEventsCollectionReader.class);

int i = 0;
while (cr.hasNext()) {
    CAS cas = CasCreationUtils.createCas(cr
	    .getProcessingResourceMetaData());
    cr.getNext(cas);

    // if (createHtml)
    // viewer.createHtml(cas.getJCas(), cas.getTypeSystem(),
    // styleMapFile, new File("target/" + i));

    i++;
}
cr.close();
assertEquals(259, i);

   }
 
Example 2
Source File: OpenNlpDoccatRecommenderTest.java    From inception with Apache License 2.0 6 votes vote down vote up
private List<CAS> loadData(Dataset ds, File ... files) throws UIMAException, IOException
{
    CollectionReader reader = createReader(Reader.class,
            Reader.PARAM_PATTERNS, files, 
            Reader.PARAM_LANGUAGE, ds.getLanguage());

    AnalysisEngine segmenter = createEngine(BreakIteratorSegmenter.class,
            BreakIteratorSegmenter.PARAM_WRITE_SENTENCE, false);
    
    List<CAS> casList = new ArrayList<>();
    while (reader.hasNext()) {
        JCas cas = JCasFactory.createJCas();
        reader.getNext(cas.getCas());
        segmenter.process(cas);
        casList.add(cas.getCas());
    }
    return casList;
}
 
Example 3
Source File: Biocreative2GeneCollectionReaderTest.java    From bluima with Apache License 2.0 6 votes vote down vote up
@Test
public void testTestCorpus() throws Exception {

    CollectionReader cr = CollectionReaderFactory.createReader(
            Biocreative2GeneCollectionReader.class, BlueUima.PARAM_MODE,
            "test");

    CAS cas = CasCreationUtils
            .createCas(cr.getProcessingResourceMetaData());
    cr.getNext(cas);

    Collection<BioEntityMention> genes = JCasUtil.select(cas.getJCas(),
            BioEntityMention.class);
    assertEquals(2, genes.size());

    cr.close();
}
 
Example 4
Source File: AgreementTestUtils.java    From webanno with Apache License 2.0 6 votes vote down vote up
public static JCas readWebAnnoTSV(String aPath, TypeSystemDescription aType)
    throws UIMAException, IOException
{
    CollectionReader reader = createReader(WebannoTsv2Reader.class,
            WebannoTsv2Reader.PARAM_SOURCE_LOCATION, "src/test/resources/" + aPath);
    JCas jcas;
    if (aType != null) {
        TypeSystemDescription builtInTypes = TypeSystemDescriptionFactory
                .createTypeSystemDescription();
        List<TypeSystemDescription> allTypes = new ArrayList<>();
        allTypes.add(builtInTypes);
        allTypes.add(aType);
        jcas = JCasFactory.createJCas(CasCreationUtils.mergeTypeSystems(allTypes));
    }
    else {
        jcas = JCasFactory.createJCas();
    }

    reader.getNext(jcas.getCas());

    return jcas;
}
 
Example 5
Source File: DiffTestUtils.java    From webanno with Apache License 2.0 6 votes vote down vote up
public static JCas readWebAnnoTSV(String aPath, TypeSystemDescription aType)
    throws UIMAException, IOException
{
    CollectionReader reader = createReader(WebannoTsv2Reader.class,
            WebannoTsv2Reader.PARAM_SOURCE_LOCATION, "src/test/resources/" + aPath);
    JCas jcas;
    if (aType != null) {
        TypeSystemDescription builtInTypes = TypeSystemDescriptionFactory
                .createTypeSystemDescription();
        List<TypeSystemDescription> allTypes = new ArrayList<>();
        allTypes.add(builtInTypes);
        allTypes.add(aType);
        jcas = JCasFactory.createJCas(CasCreationUtils.mergeTypeSystems(allTypes));
    }
    else {
        jcas = JCasFactory.createJCas();
    }

    reader.getNext(jcas.getCas());

    return jcas;
}
 
Example 6
Source File: NamedEntityLinkerTest.java    From inception with Apache License 2.0 6 votes vote down vote up
private List<CAS> loadData(Dataset ds, File ... files) throws UIMAException, IOException
{
    CollectionReader reader = createReader(
        Conll2002Reader.class,
        Conll2002Reader.PARAM_PATTERNS, files, 
        Conll2002Reader.PARAM_LANGUAGE, ds.getLanguage(), 
        Conll2002Reader.PARAM_COLUMN_SEPARATOR, Conll2002Reader.ColumnSeparators.TAB.getName(),
        Conll2002Reader.PARAM_HAS_TOKEN_NUMBER, true, 
        Conll2002Reader.PARAM_HAS_HEADER, true, 
        Conll2002Reader.PARAM_HAS_EMBEDDED_NAMED_ENTITY, true);

    List<CAS> casList = new ArrayList<>();
    while (reader.hasNext()) {
        JCas cas = JCasFactory.createJCas();
        reader.getNext(cas.getCas());
        casList.add(cas.getCas());
    }
    return casList;
}
 
Example 7
Source File: DL4JSequenceRecommenderTest.java    From inception with Apache License 2.0 6 votes vote down vote up
private JCas loadNerDevelopmentData() throws IOException, UIMAException
{
    Dataset ds = loader.load("germeval2014-de", CONTINUE);
    
    CollectionReader reader = createReader(Conll2002Reader.class,
        Conll2002Reader.PARAM_PATTERNS, ds.getDefaultSplit().getDevelopmentFiles(), 
        Conll2002Reader.PARAM_LANGUAGE, ds.getLanguage(), 
        Conll2002Reader.PARAM_COLUMN_SEPARATOR, Conll2002Reader.ColumnSeparators.TAB.getName(),
        Conll2002Reader.PARAM_HAS_TOKEN_NUMBER, true, 
        Conll2002Reader.PARAM_HAS_HEADER, true, 
        Conll2002Reader.PARAM_HAS_EMBEDDED_NAMED_ENTITY, true);
    
    JCas cas = JCasFactory.createJCas();
    reader.getNext(cas.getCas());
    return cas;
}
 
Example 8
Source File: AgreementTestUtils.java    From webanno with Apache License 2.0 5 votes vote down vote up
public static CAS read(String aPath)
    throws UIMAException, IOException
{
    CollectionReader reader = createReader(Conll2006Reader.class,
            Conll2006Reader.PARAM_SOURCE_LOCATION, "src/test/resources/" + aPath);

    CAS cas = JCasFactory.createJCas().getCas();

    reader.getNext(cas);

    return cas;
}
 
Example 9
Source File: SymbolicRulesTest.java    From webanno with Apache License 2.0 5 votes vote down vote up
@Test
public void testSimpleSymbolicRules()
    throws Exception
{
    ConstraintsGrammar parser = new ConstraintsGrammar(new FileInputStream(
            "src/test/resources/rules/symbolic1.rules"));
    Parse p = parser.Parse();

    ParsedConstraints constraints = p.accept(new ParserVisitor());

    JCas jcas = JCasFactory.createJCas();

    CollectionReader reader = createReader(Conll2006Reader.class,
            Conll2006Reader.PARAM_SOURCE_LOCATION, "src/test/resources/text/1.conll");
    
    reader.getNext(jcas.getCas());

    POS pos = new POS(jcas, 8, 9);
    pos.setPosValue("pronoun");
    pos.addToIndexes();
    
    Evaluator constraintsEvaluator = new ValuesGenerator();

    Lemma lemma = select(jcas, Lemma.class).iterator().next();
    
    List<PossibleValue> possibleValues = constraintsEvaluator.generatePossibleValues(lemma,
            "value", constraints);

    List<PossibleValue> expectedOutput = new ArrayList<>();
    expectedOutput.add(new PossibleValue("good", true));

    assertEquals(expectedOutput, possibleValues);
}
 
Example 10
Source File: SymbolicRulesTest.java    From webanno with Apache License 2.0 5 votes vote down vote up
@Test
public void testSimpleSymbolicRules2()
    throws Exception
{
    ConstraintsGrammar parser = new ConstraintsGrammar(new FileInputStream(
            "src/test/resources/rules/symbolic2.rules"));
    Parse p = parser.Parse();

    ParsedConstraints constraints = p.accept(new ParserVisitor());

    JCas jcas = JCasFactory.createJCas();

    CollectionReader reader = createReader(Conll2006Reader.class,
            Conll2006Reader.PARAM_SOURCE_LOCATION, "src/test/resources/text/1.conll");
    
    reader.getNext(jcas.getCas());

    POS pos = new POS(jcas, 8, 9);
    pos.setPosValue("pronoun");
    pos.addToIndexes();
    
    Evaluator constraintsEvaluator = new ValuesGenerator();

    Lemma lemma = select(jcas, Lemma.class).iterator().next();
    
    List<PossibleValue> possibleValues = constraintsEvaluator.generatePossibleValues(lemma,
            "value", constraints);

    List<PossibleValue> expectedOutput = new ArrayList<>();
    expectedOutput.add(new PossibleValue("good", true));

    assertEquals(expectedOutput, possibleValues);
}
 
Example 11
Source File: DL4JSequenceRecommenderTest.java    From inception with Apache License 2.0 5 votes vote down vote up
private JCas loadPosDevelopmentData() throws IOException, UIMAException
{
    Dataset ds = loader.load("conll2000-en", CONTINUE);
    
    CollectionReader reader = createReader(Conll2000Reader.class,
            Conll2000Reader.PARAM_PATTERNS, ds.getDefaultSplit().getTestFiles(), 
            Conll2000Reader.PARAM_LANGUAGE, ds.getLanguage());
    
    JCas cas = JCasFactory.createJCas();
    reader.getNext(cas.getCas());
    return cas;
}
 
Example 12
Source File: CurationTestUtils.java    From webanno with Apache License 2.0 5 votes vote down vote up
public static JCas loadWebAnnoTsv3(String aPath) throws UIMAException, IOException
{
    CollectionReader reader = createReader(WebannoTsv3XReader.class,
            WebannoTsv3XReader.PARAM_SOURCE_LOCATION, "src/test/resources/" + aPath);
    JCas jcas = JCasFactory.createJCas();
    reader.getNext(jcas.getCas());
    return jcas;
}
 
Example 13
Source File: AgreementTestUtils.java    From webanno with Apache License 2.0 5 votes vote down vote up
public static JCas loadWebAnnoTsv3(String aPath) throws UIMAException, IOException
{
    CollectionReader reader = createReader(WebannoTsv3XReader.class,
            WebannoTsv3XReader.PARAM_SOURCE_LOCATION, "src/test/resources/" + aPath);
    JCas jcas = JCasFactory.createJCas();
    reader.getNext(jcas.getCas());
    return jcas;
}
 
Example 14
Source File: ZipXmiCollectionReaderTest.java    From bluima with Apache License 2.0 5 votes vote down vote up
@Test
public void testSerializeDeserializeXmi() throws Exception {

    CollectionReader cr = createReader(TextArrayReader.class,
             PARAM_INPUT, new String[] { "this is a test" });
    AnalysisEngine serializer = createEngine(ZipXWriter.class,
            BlueUima.PARAM_OUTPUT_DIR, testDir + "/serdeser");
    SimplePipeline.runPipeline(cr, serializer);

    cr = createReader(ZipXmiCollectionReader.class, 
            PARAM_INPUT_DIRECTORY, testDir + "/serdeser");
    CAS cas = createCas(cr.getProcessingResourceMetaData());
    cr.getNext(cas);
    assertEquals("this is a test", cas.getDocumentText());
}
 
Example 15
Source File: PdfAnnoRendererTest.java    From inception with Apache License 2.0 5 votes vote down vote up
/**
 * Tests if anno file is correctly rendered for a given document
 */
@Test
public void testRender() throws Exception
{
    String file = "src/test/resources/tcf04-karin-wl.xml";
    String pdftxt = new Scanner(
        new File("src/test/resources/rendererTestPdfExtract.txt")).useDelimiter("\\Z").next();

    CAS cas = JCasFactory.createJCas().getCas();
    CollectionReader reader = CollectionReaderFactory.createReader(TcfReader.class,
        TcfReader.PARAM_SOURCE_LOCATION, file);
    reader.getNext(cas);

    AnnotatorState state = new AnnotatorStateImpl(Mode.ANNOTATION);
    state.setPagingStrategy(new SentenceOrientedPagingStrategy());
    state.getPreferences().setWindowSize(10);
    state.setProject(project);

    VDocument vdoc = new VDocument();
    preRenderer.render(vdoc, 0, cas.getDocumentText().length(), cas,
            schemaService.listAnnotationLayer(project));

    PdfExtractFile pdfExtractFile = new PdfExtractFile(pdftxt, new HashMap<>());
    PdfAnnoRenderer renderer = new PdfAnnoRenderer(schemaService,
            new ColoringServiceImpl(schemaService));
    PdfAnnoModel annoFile = renderer.render(state, vdoc, cas.getDocumentText(), pdfExtractFile,
            0);

    assertThat(annoFile.getAnnoFileContent())
        .isEqualToNormalizingNewlines(contentOf(
                new File("src/test/resources/rendererTestAnnoFile.anno"), UTF_8));
}
 
Example 16
Source File: AgreementTestUtils.java    From webanno with Apache License 2.0 5 votes vote down vote up
public static JCas loadWebAnnoTsv3(File aPath) throws UIMAException, IOException
{
    CollectionReader reader = createReader(WebannoTsv3XReader.class,
            WebannoTsv3XReader.PARAM_SOURCE_LOCATION, aPath);
    JCas jcas = JCasFactory.createJCas();
    reader.getNext(jcas.getCas());
    return jcas;
}
 
Example 17
Source File: CurationTestUtils.java    From webanno with Apache License 2.0 5 votes vote down vote up
public static JCas loadWebAnnoTsv3(File aPath) throws UIMAException, IOException
{
    CollectionReader reader = createReader(WebannoTsv3XReader.class,
            WebannoTsv3XReader.PARAM_SOURCE_LOCATION, aPath);
    JCas jcas = JCasFactory.createJCas();
    reader.getNext(jcas.getCas());
    return jcas;
}
 
Example 18
Source File: SimplePipeline.java    From uima-uimafit with Apache License 2.0 4 votes vote down vote up
/**
 * <p>
 * Provides a simple way to run a pipeline for a given collection reader and sequence of analysis
 * engines. After processing all CASes provided by the reader, the method calls
 * {@link AnalysisEngine#collectionProcessComplete() collectionProcessComplete()} on the engines.
 * Note that {@link AnalysisEngine#destroy()} and {@link CollectionReader#destroy()} are
 * <b>NOT</b> called. As the components were instantiated by the caller, they must also be managed
 * (i.e. destroyed) the caller.
 * </p>
 * <p>
 * External resources can only be shared between the reader and/or the analysis engines if the
 * reader/engines have been previously instantiated using a shared resource manager.
 * </p>
 * 
 * @param aResMgr
 *          a resource manager. Normally the same one used by the collection reader and analysis
 *          engines.
 * @param reader
 *          a collection reader
 * @param engines
 *          a sequence of analysis engines
 * @throws IOException
 *           if there is an I/O problem in the reader
 * @throws ResourceInitializationException 
 *           if there is a problem initializing or running the pipeline.
 * @throws CollectionException 
 *           if there is a problem initializing or running the pipeline.
 * @throws AnalysisEngineProcessException 
 *           if there is a problem initializing or running the pipeline.
 */
public static void runPipeline(final ResourceManager aResMgr, final CollectionReader reader,
        final AnalysisEngine... engines) throws IOException, ResourceInitializationException,
        AnalysisEngineProcessException, CollectionException {
  final List<ResourceMetaData> metaData = new ArrayList<ResourceMetaData>();
  metaData.add(reader.getMetaData());
  for (AnalysisEngine engine : engines) {
    metaData.add(engine.getMetaData());
  }

  final CAS cas = CasCreationUtils.createCas(metaData, null, aResMgr);
  reader.typeSystemInit(cas.getTypeSystem());

  while (reader.hasNext()) {
    reader.getNext(cas);
    runPipeline(cas, engines);
    cas.reset();
  }

  collectionProcessComplete(engines);
}
 
Example 19
Source File: WebAnnoTsv2ReaderWriterTest.java    From webanno with Apache License 2.0 4 votes vote down vote up
@Test
public void test()
    throws Exception
{
    String targetFolder = "target/test-output/" + testContext.getTestOutputFolderName();
    
    CollectionReader reader = createCollectionReader(
            WebannoTsv2Reader.class,
            WebannoTsv2Reader.PARAM_PATH, "src/test/resources/tsv2/",
            WebannoTsv2Reader.PARAM_PATTERNS, "example2.tsv");
 
    List<String> multipleSpans = new ArrayList<>();
    multipleSpans.add("de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity");
    multipleSpans.add("de.tudarmstadt.ukp.dkpro.core.api.coref.type.Coreference");
    AnalysisEngineDescription writer = createPrimitiveDescription(
            WebannoTsv2Writer.class,
            WebannoTsv2Writer.PARAM_TARGET_LOCATION, targetFolder,
            WebannoTsv2Writer.PARAM_OVERWRITE, true, 
            WebannoTsv2Writer.PARAM_STRIP_EXTENSION, true, 
            WebannoTsv2Writer.PARAM_OVERWRITE, true,
            WebannoTsv2Writer.MULTIPLE_SPAN_ANNOTATIONS, multipleSpans);
   
    runPipeline(reader, writer);
    
    CollectionReader reader1 = createCollectionReader(
            WebannoTsv2Reader.class,
            WebannoTsv2Reader.PARAM_PATH, "src/test/resources/tsv2/",
            WebannoTsv2Reader.PARAM_PATTERNS, "example2.tsv");
    CAS cas1 = JCasFactory.createJCas().getCas();
    reader1.getNext(cas1);

    CollectionReader reader2 = createCollectionReader(WebannoTsv2Reader.class,
            WebannoTsv2Reader.PARAM_PATH, targetFolder,
            WebannoTsv2Reader.PARAM_PATTERNS, "example2.tsv");

    CAS cas2 = JCasFactory.createJCas().getCas();
    reader2.getNext(cas2);

    assertEquals(JCasUtil.select(cas2.getJCas(), Token.class).size(),
            JCasUtil.select(cas1.getJCas(), Token.class).size());
    assertEquals(JCasUtil.select(cas2.getJCas(), POS.class).size(),
            JCasUtil.select(cas1.getJCas(), POS.class).size());
    assertEquals(JCasUtil.select(cas2.getJCas(), Lemma.class).size(),
            JCasUtil.select(cas1.getJCas(), Lemma.class).size());
    assertEquals(JCasUtil.select(cas2.getJCas(), NamedEntity.class).size(),
            JCasUtil.select(cas1.getJCas(), NamedEntity.class).size());
    assertEquals(JCasUtil.select(cas2.getJCas(), Sentence.class).size(),
            JCasUtil.select(cas1.getJCas(), Sentence.class).size());
}
 
Example 20
Source File: SimplePipeline.java    From uima-uimafit with Apache License 2.0 3 votes vote down vote up
/**
 * <p>
 * Run the CollectionReader and AnalysisEngines as a pipeline. After processing all CASes provided
 * by the reader, the method calls {@link AnalysisEngine#collectionProcessComplete()
 * collectionProcessComplete()} on the engines, {@link CollectionReader#close() close()} on the
 * reader and {@link Resource#destroy() destroy()} on the reader and all engines.
 * </p>
 * <p>
 * External resources can be shared between the reader and the analysis engines.
 * </p>
 * <p>
 * This method is suitable for the batch-processing of sets of documents where the overheaded
 * of instantiating the pipeline components does not significantly impact the overall runtime
 * of the pipeline. If you need to avoid this overhead, e.g. because you wish to run a pipeline
 * on individual documents, then you should not use this method. Instead, create a CAS using
 * {@link JCasFactory}, create a reader instance using {@link CollectionReaderFactory#createReader},
 * create an engine instance using {@link AnalysisEngineFactory#createEngine} and then use
 * a loop to process the data, resetting the CAS after each step.
 * </p>
 * <pre><code>
 *   while (reader.hasNext()) {
 *     reader.getNext(cas);
 *     engine.process(cas);
 *     cas.reset();
 *   }
 * </code></pre>
 * 
 * @param readerDesc
 *          The CollectionReader that loads the documents into the CAS.
 * @param descs
 *          Primitive AnalysisEngineDescriptions that process the CAS, in order. If you have a mix
 *          of primitive and aggregate engines, then please create the AnalysisEngines yourself
 *          and call the other runPipeline method.
 * @throws IOException
 *           if there is an I/O problem in the reader
 * @throws ResourceInitializationException 
 *           if there is a problem initializing or running the pipeline.
 * @throws CollectionException 
 *           if there is a problem initializing or running the pipeline.
 * @throws AnalysisEngineProcessException 
 *           if there is a problem initializing or running the pipeline.
 */
public static void runPipeline(final CollectionReaderDescription readerDesc,
        final AnalysisEngineDescription... descs) throws IOException,
        ResourceInitializationException, AnalysisEngineProcessException, CollectionException {
  CollectionReader reader = null;
  AnalysisEngine aae = null;
  ResourceManager resMgr = null;
  try {
    resMgr = ResourceManagerFactory.newResourceManager();
    
    // Create the components
    reader = UIMAFramework.produceCollectionReader(readerDesc, resMgr, null);

    // Create AAE
    final AnalysisEngineDescription aaeDesc = createEngineDescription(descs);

    // Instantiate AAE
    aae = UIMAFramework.produceAnalysisEngine(aaeDesc, resMgr, null);

    // Create CAS from merged metadata
    final CAS cas = CasCreationUtils.createCas(asList(reader.getMetaData(), aae.getMetaData()),
            null, resMgr);
    reader.typeSystemInit(cas.getTypeSystem());

    // Process
    while (reader.hasNext()) {
      reader.getNext(cas);
      aae.process(cas);
      cas.reset();
    }

    // Signal end of processing
    aae.collectionProcessComplete();
  } finally {
    // Destroy
    LifeCycleUtil.destroy(reader);
    LifeCycleUtil.destroy(aae);
    LifeCycleUtil.destroy(resMgr);
  }
}