org.apache.uima.fit.factory.CollectionReaderFactory Java Examples

The following examples show how to use org.apache.uima.fit.factory.CollectionReaderFactory. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ExternalResourceFactoryTest.java    From uima-uimafit with Apache License 2.0 6 votes vote down vote up
/**
 * Test sharing a resource list between two AEs on the same aggregate.
 */
@SuppressWarnings("javadoc")
@Test
public void testMultiValue2() throws Exception {
  MultiValuedResourceAE.resources.clear();

  ExternalResourceDescription extDesc1 = createResourceDescription(ResourceWithAssert.class);
  ExternalResourceDescription extDesc2 = createResourceDescription(ResourceWithAssert.class);

  AnalysisEngineDescription aed = createEngineDescription(
          createEngineDescription(MultiValuedResourceAE.class,
                  MultiValuedResourceAE.RES_RESOURCE_ARRAY, asList(extDesc1, extDesc2)),
          createEngineDescription(MultiValuedResourceAE.class,
                  MultiValuedResourceAE.RES_RESOURCE_ARRAY, asList(extDesc1, extDesc2)));

  CpePipeline.runPipeline(CollectionReaderFactory.createReaderDescription(Reader.class), aed);

  // Check that the shared resources are really the same
  assertEquals(MultiValuedResourceAE.resources.get(0), MultiValuedResourceAE.resources.get(2));
  assertEquals(MultiValuedResourceAE.resources.get(1), MultiValuedResourceAE.resources.get(3));
}
 
Example #2
Source File: BioNLPGeniaEventsCollectionReaderTest.java    From bluima with Apache License 2.0 6 votes vote down vote up
@Test
   public void test() throws Exception {

CollectionReader cr = CollectionReaderFactory.createReader(
	BioNLPGeniaEventsCollectionReader.class);

int i = 0;
while (cr.hasNext()) {
    CAS cas = CasCreationUtils.createCas(cr
	    .getProcessingResourceMetaData());
    cr.getNext(cas);

    // if (createHtml)
    // viewer.createHtml(cas.getJCas(), cas.getTypeSystem(),
    // styleMapFile, new File("target/" + i));

    i++;
}
cr.close();
assertEquals(259, i);

   }
 
Example #3
Source File: BioNLPGeniaEventsReaderTest.java    From bluima with Apache License 2.0 6 votes vote down vote up
@Test
public void testCount() throws Exception {

    CollectionReader cr = CollectionReaderFactory.createReader(
            BioNLPGeniaEventsCollectionReader.class,
            BlueUima.PARAM_INPUT_DIRECTORY, TEST_DIR);

    int i = 0;
    while (cr.hasNext()) {
        CAS cas = CasCreationUtils.createCas(cr
                .getProcessingResourceMetaData());
        cr.getNext(cas);
        LOG.debug(To.string("cas nr " + i, cas.getJCas()));
        i++;
    }
    cr.close();
    assertEquals(3, i);
}
 
Example #4
Source File: Biocreative2GeneCollectionReaderTest.java    From bluima with Apache License 2.0 6 votes vote down vote up
@Test
public void testTestCorpus() throws Exception {

    CollectionReader cr = CollectionReaderFactory.createReader(
            Biocreative2GeneCollectionReader.class, BlueUima.PARAM_MODE,
            "test");

    CAS cas = CasCreationUtils
            .createCas(cr.getProcessingResourceMetaData());
    cr.getNext(cas);

    Collection<BioEntityMention> genes = JCasUtil.select(cas.getJCas(),
            BioEntityMention.class);
    assertEquals(2, genes.size());

    cr.close();
}
 
Example #5
Source File: AggregateCollectionReader.java    From bluima with Apache License 2.0 6 votes vote down vote up
public AggregateCollectionReader(List<CollectionReader> readers,
    TypeSystemDescription tsd) {
try {
    CollectionReaderDescription crd = CollectionReaderFactory
	    .createReaderDescription(AggregateCollectionReader.class, tsd);
    ResourceMetaData metaData = crd.getMetaData();
    ConfigurationParameterSettings paramSettings = metaData
	    .getConfigurationParameterSettings();
    Map<String, Object> additionalParameters = new HashMap<String, Object>();
    additionalParameters
	    .put(CollectionReader.PARAM_CONFIG_PARAM_SETTINGS,
		    paramSettings);
    initialize(crd, additionalParameters);

    this.readers = readers;
    this.readerIterator = this.readers.iterator();
    currentReader = this.readerIterator.next();
} catch (ResourceInitializationException rie) {
    throw new RuntimeException(rie);
}
   }
 
Example #6
Source File: BaleenCollectionReaderTest.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Test
public void testHasNextLooping() throws Exception {

  ExternalResourceDescription contentExtractor =
      ExternalResourceFactory.createNamedResourceDescription(
          KEY_CONTENT_EXTRACTOR, FakeBaleenContentExtractor.class);

  DummyBaleenCollectionReader cr =
      (DummyBaleenCollectionReader)
          CollectionReaderFactory.createReader(
              DummyBaleenCollectionReader.class,
              BaleenCollectionReader.KEY_CONTENT_EXTRACTOR,
              contentExtractor);

  while (cr.hasNext()) {
    JCas jCas = JCasSingleton.getJCasInstance();
    cr.getNext(jCas.getCas());
  }

  cr.destroy();
}
 
Example #7
Source File: CompressedXmiWriter.java    From argument-reasoning-comprehension-task with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args)
        throws Exception
{
    String in = "/tmp/temp-in";
    String out = "/tmp/out2.tar.gz";
    // test it
    SimplePipeline.runPipeline(CollectionReaderFactory.createReaderDescription(
            XmiReader.class,
            XmiReader.PARAM_SOURCE_LOCATION, in,
            XmiReader.PARAM_PATTERNS, XmiReader.INCLUDE_PREFIX + "*.xmi"
            ),
            AnalysisEngineFactory.createEngineDescription(
                    NoOpAnnotator.class
            ),
            AnalysisEngineFactory.createEngineDescription(
                    CompressedXmiWriter.class,
                    CompressedXmiWriter.PARAM_OUTPUT_FILE, out
            )
    );

}
 
Example #8
Source File: JCasPoolIterable.java    From ambiverse-nlu with Apache License 2.0 6 votes vote down vote up
public JCasPoolIterable(int casPoolSize, CollectionReaderDescription aReader, AnalysisEngineDescription... aEngines)
    throws CASException, ResourceInitializationException {
  this.selfComplete = false;
  this.selfDestroy = false;
  this.destroyed = false;
  this.casPoolSize = casPoolSize;
  this.collectionReader = CollectionReaderFactory.createReader(aReader);
  analysisEngines = new AnalysisEngine[aEngines.length];
  for (int i = 0; i < aEngines.length; i++) {
    analysisEngines[i] = createEngine(aEngines[i]);
  }
  ResourceManager rm = ResourceManagerFactory.newResourceManager();
  rm.getCasManager().addMetaData(collectionReader.getProcessingResourceMetaData());
  AnalysisEngine[] resMgr = analysisEngines;
  int var5 = aEngines.length;
  for (int var6 = 0; var6 < var5; ++var6) {
    AnalysisEngine ae = resMgr[var6];
    rm.getCasManager().addMetaData(ae.getProcessingResourceMetaData());
  }
  casManager = rm.getCasManager();
  casManager.defineCasPool("iterableJcas", casPoolSize, null);
}
 
Example #9
Source File: ExternalResourceFactoryTest.java    From uima-uimafit with Apache License 2.0 6 votes vote down vote up
/**
 * Test sharing a resource list across aggregates.
 */
@SuppressWarnings("javadoc")
@Test
public void testMultiValue3() throws Exception {
  MultiValuedResourceAE.resources.clear();

  ExternalResourceDescription extDesc1 = createResourceDescription(ResourceWithAssert.class);
  ExternalResourceDescription extDesc2 = createResourceDescription(ResourceWithAssert.class);

  AnalysisEngineDescription aed = createEngineDescription(
          createEngineDescription(MultiValuedResourceAE.class,
                  MultiValuedResourceAE.RES_RESOURCE_ARRAY, asList(extDesc1, extDesc2)),
          createEngineDescription(createEngineDescription(MultiValuedResourceAE.class,
                  MultiValuedResourceAE.RES_RESOURCE_ARRAY, asList(extDesc1, extDesc2))));

  CpePipeline.runPipeline(CollectionReaderFactory.createReaderDescription(Reader.class), aed);

  // Check that the shared resources are really the same
  assertEquals(MultiValuedResourceAE.resources.get(0), MultiValuedResourceAE.resources.get(2));
  assertEquals(MultiValuedResourceAE.resources.get(1), MultiValuedResourceAE.resources.get(3));
}
 
Example #10
Source File: ExternalResourceFactoryTest.java    From uima-uimafit with Apache License 2.0 6 votes vote down vote up
/**
 * Test nested resource lists.
 */
@SuppressWarnings("javadoc")
@Test
public void testMultiValue4() throws Exception {
  ExternalResourceDescription extDesc1 = createResourceDescription(ResourceWithAssert.class);
  ExternalResourceDescription extDesc2 = createResourceDescription(ResourceWithAssert.class);

  ExternalResourceDescription extDesc3 = createResourceDescription(ResourceWithAssert.class);
  ExternalResourceDescription extDesc4 = createResourceDescription(ResourceWithAssert.class);

  ExternalResourceDescription mv1 = createResourceDescription(MultiValuedResource.class,
          MultiValuedResource.RES_RESOURCE_LIST, new ExternalResourceDescription[] { extDesc1,
              extDesc2 });

  ExternalResourceDescription mv2 = createResourceDescription(MultiValuedResource.class,
          MultiValuedResource.RES_RESOURCE_LIST, new ExternalResourceDescription[] { extDesc3,
              extDesc4 });

  AnalysisEngineDescription aed = createEngineDescription(MultiValuedResourceAE.class,
          MultiValuedResourceAE.RES_RESOURCE_ARRAY, asList(mv1, mv2));

  CpePipeline.runPipeline(CollectionReaderFactory.createReaderDescription(Reader.class), aed);
}
 
Example #11
Source File: Step09AnnotatedDataHTMLExporter.java    From argument-reasoning-comprehension-task with Apache License 2.0 6 votes vote down vote up
public static void exportToHTML(File inputFile, File outputFile)
        throws Exception
{
    File intermediateXMIsFile = File.createTempFile("temp", ".xmi.tar.gz");

    SingleXMLToXMIExporter.exportToXMIs(inputFile, intermediateXMIsFile);

    SimplePipeline.runPipeline(
            CollectionReaderFactory.createReaderDescription(
                    CompressedXmiReader.class,
                    CompressedXmiReader.PARAM_SOURCE_LOCATION, intermediateXMIsFile
            ),
            AnalysisEngineFactory.createEngineDescription(ArgumentsToHTMLExporter.class,
                    ArgumentsToHTMLExporter.PARAM_OUTPUT_FILE, outputFile),
            AnalysisEngineFactory.createEngineDescription(
                    ArgumentDumpWriter.class
            )
    );

    Files.delete(intermediateXMIsFile.toPath());
}
 
Example #12
Source File: PipelineBuilder.java    From baleen with Apache License 2.0 5 votes vote down vote up
/** Create a new Collection Reader */
private CollectionReader createCollectionReader() throws BaleenException {
  String className = BuilderUtils.getClassNameFromConfig(collectionReaderConfig);
  Map<String, Object> params =
      BuilderUtils.flattenConfig(null, BuilderUtils.getParamsFromConfig(collectionReaderConfig));

  if (className == null || className.isEmpty()) {
    throw new InvalidParameterException("Collection Reader class not specified");
  }

  Map<String, Object> nonNullParams = params;
  if (nonNullParams == null) {
    nonNullParams = Collections.emptyMap();
  }

  try {
    Class<? extends CollectionReader> clazz =
        BuilderUtils.getClassFromString(className, getDefaultReaderPackage());
    Map<String, ExternalResourceDescription> crResources = getOrCreateResources(clazz);
    Object[] paramArr =
        BuilderUtils.mergeAndExtractParams(
            globalConfig, nonNullParams, ignoreParams, crResources);

    return UIMAFramework.produceCollectionReader(
        CollectionReaderFactory.createReaderDescription(clazz, paramArr), resourceManager, null);
  } catch (ResourceInitializationException e) {
    throw new BaleenException("Couldn't initialize collection reader", e);
  }
}
 
Example #13
Source File: PdfAnnoRendererTest.java    From inception with Apache License 2.0 5 votes vote down vote up
/**
 * Tests if anno file is correctly rendered for a given document
 */
@Test
public void testRender() throws Exception
{
    String file = "src/test/resources/tcf04-karin-wl.xml";
    String pdftxt = new Scanner(
        new File("src/test/resources/rendererTestPdfExtract.txt")).useDelimiter("\\Z").next();

    CAS cas = JCasFactory.createJCas().getCas();
    CollectionReader reader = CollectionReaderFactory.createReader(TcfReader.class,
        TcfReader.PARAM_SOURCE_LOCATION, file);
    reader.getNext(cas);

    AnnotatorState state = new AnnotatorStateImpl(Mode.ANNOTATION);
    state.setPagingStrategy(new SentenceOrientedPagingStrategy());
    state.getPreferences().setWindowSize(10);
    state.setProject(project);

    VDocument vdoc = new VDocument();
    preRenderer.render(vdoc, 0, cas.getDocumentText().length(), cas,
            schemaService.listAnnotationLayer(project));

    PdfExtractFile pdfExtractFile = new PdfExtractFile(pdftxt, new HashMap<>());
    PdfAnnoRenderer renderer = new PdfAnnoRenderer(schemaService,
            new ColoringServiceImpl(schemaService));
    PdfAnnoModel annoFile = renderer.render(state, vdoc, cas.getDocumentText(), pdfExtractFile,
            0);

    assertThat(annoFile.getAnnoFileContent())
        .isEqualToNormalizingNewlines(contentOf(
                new File("src/test/resources/rendererTestAnnoFile.anno"), UTF_8));
}
 
Example #14
Source File: PubmedCentralCollectionReader.java    From bluima with Apache License 2.0 5 votes vote down vote up
public static CollectionReader getCR(String path)
        throws ResourceInitializationException, FileNotFoundException {

    return CollectionReaderFactory.createReader(
            PubmedCentralCollectionReader.class,
            BlueUima.PARAM_INPUT_DIRECTORY, path);
}
 
Example #15
Source File: PubmedArchiveCollectionReader.java    From bluima with Apache License 2.0 5 votes vote down vote up
public static CollectionReader getCR(String path)
		throws ResourceInitializationException, FileNotFoundException {
	File testcaseDir = ResourceHelper.getFile(path);

	return CollectionReaderFactory.createReader(
			PubmedArchiveCollectionReader.class,
			BlueUima.PARAM_INPUT_DIRECTORY, testcaseDir.getAbsolutePath());
}
 
Example #16
Source File: PubmedWebServiceCollectionReader.java    From bluima with Apache License 2.0 5 votes vote down vote up
public static CollectionReader getCR(String query, int nrResults)
        throws ResourceInitializationException {
    return CollectionReaderFactory.createReader(
            PubmedWebServiceCollectionReader.class, 
            BlueUima.PARAM_MAX_NR_RESULTS, nrResults,//
            BlueUima.PARAM_QUERY, query);
}
 
Example #17
Source File: CpePipelineTest.java    From uima-uimafit with Apache License 2.0 5 votes vote down vote up
@Test
public void test()
	throws Exception
{
	CpePipeline.runPipeline(CollectionReaderFactory.createReaderDescription(Reader.class),
			AnalysisEngineFactory.createEngineDescription(Annotator.class),
			AnalysisEngineFactory.createEngineDescription(Writer.class));
	Assert.assertEquals(MARKER, Writer.MARKER_SEEN);
}
 
Example #18
Source File: Txt2PubmedIdIndexer.java    From bluima with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {

        // cr
        CollectionReader cr = CollectionReaderFactory.createReader(
                PubmedWholeDatabaseCR.class, PARAM_DB_CONNECTION, new String[] {
                        "localhost", "bb_pubmed", "root", "" });

        SimplePipeline.runPipeline(
                cr,
                createEngineDescription(MyIndexer.class),
                createEngineDescription(StatsAnnotatorPlus.class,
                        PARAM_PRINT_EVERY, 50000));
    }
 
Example #19
Source File: ExternalResourceFactoryTest.java    From uima-uimafit with Apache License 2.0 5 votes vote down vote up
@Test
public void testMultiBinding() throws Exception {
  ExternalResourceDescription extDesc = createResourceDescription(ResourceWithAssert.class);

  // Binding external resource to each Annotator individually
  AnalysisEngineDescription aed1 = createEngineDescription(MultiBindAE.class,
          MultiBindAE.RES_KEY, extDesc);
  AnalysisEngineDescription aed2 = createEngineDescription(MultiBindAE.class,
          MultiBindAE.RES_KEY, extDesc);

  // Check the external resource was injected
  MultiBindAE.reset();
  AnalysisEngineDescription aaed = createEngineDescription(aed1, aed2);
  CpePipeline.runPipeline(CollectionReaderFactory.createReaderDescription(Reader.class), aaed);
}
 
Example #20
Source File: RedisTransportsTest.java    From baleen with Apache License 2.0 5 votes vote down vote up
private BaleenCollectionReader createReciever() throws ResourceInitializationException {
  return (BaleenCollectionReader)
      CollectionReaderFactory.createReader(
          RedisTransportReceiver.class,
          TypeSystemSingleton.getTypeSystemDescriptionInstance(),
          SharedRedisResource.RESOURCE_KEY,
          erd,
          KEY_CONTENT_EXTRACTOR,
          ExternalResourceFactory.createNamedResourceDescription(
              KEY_CONTENT_EXTRACTOR, FakeBaleenContentExtractor.class));
}
 
Example #21
Source File: RabbitMQTransportsTest.java    From baleen with Apache License 2.0 5 votes vote down vote up
private BaleenCollectionReader createReciever() throws ResourceInitializationException {
  return (BaleenCollectionReader)
      CollectionReaderFactory.createReader(
          RabbitMQTransportReceiver.class,
          TypeSystemSingleton.getTypeSystemDescriptionInstance(),
          SharedRabbitMQResource.RESOURCE_KEY,
          erd,
          KEY_CONTENT_EXTRACTOR,
          ExternalResourceFactory.createNamedResourceDescription(
              KEY_CONTENT_EXTRACTOR, FakeBaleenContentExtractor.class));
}
 
Example #22
Source File: ActiveMQTransportsTest.java    From baleen with Apache License 2.0 5 votes vote down vote up
private BaleenCollectionReader createReciever() throws ResourceInitializationException {
  return (BaleenCollectionReader)
      CollectionReaderFactory.createReader(
          ActiveMQTransportReceiver.class,
          TypeSystemSingleton.getTypeSystemDescriptionInstance(),
          SharedActiveMQResource.RESOURCE_KEY,
          mqerd,
          KEY_CONTENT_EXTRACTOR,
          ceerd);
}
 
Example #23
Source File: ExternalResourceFactoryTest.java    From uima-uimafit with Apache License 2.0 5 votes vote down vote up
/**
 * Test resource list.
 */
@SuppressWarnings("javadoc")
@Test
public void testMultiValue() throws Exception {
  ExternalResourceDescription extDesc1 = createResourceDescription(ResourceWithAssert.class);
  ExternalResourceDescription extDesc2 = createResourceDescription(ResourceWithAssert.class);

  AnalysisEngineDescription aed = createEngineDescription(MultiValuedResourceAE.class,
          MultiValuedResourceAE.RES_RESOURCE_ARRAY, asList(extDesc1, extDesc2));

  CpePipeline.runPipeline(CollectionReaderFactory.createReaderDescription(Reader.class), aed);
}
 
Example #24
Source File: BaleenCollectionReaderTest.java    From baleen with Apache License 2.0 5 votes vote down vote up
@Test
public void test() throws Exception {

  ExternalResourceDescription contentExtractor =
      ExternalResourceFactory.createNamedResourceDescription(
          KEY_CONTENT_EXTRACTOR, FakeBaleenContentExtractor.class);

  FakeCollectionReader cr =
      (FakeCollectionReader)
          CollectionReaderFactory.createReader(
              FakeCollectionReader.class,
              BaleenCollectionReader.KEY_CONTENT_EXTRACTOR,
              contentExtractor);

  UimaContext context = UimaContextFactory.createUimaContext();
  cr.initialize(context);
  assertTrue(cr.initialised);

  assertNotNull(cr.getSupport());
  assertNotNull(cr.getMonitor());
  assertNotNull(cr.getProgress());

  Progress[] progress = cr.getProgress();
  assertEquals("testunits", progress[0].getUnit());

  assertFalse(cr.hasNext());
  assertTrue(cr.hasNext);

  cr.getNext((JCas) null);
  assertTrue(cr.getNext);

  cr.destroy();
  assertTrue(cr.closed);
}
 
Example #25
Source File: KafkaTransportsTest.java    From baleen with Apache License 2.0 5 votes vote down vote up
private BaleenCollectionReader createReciever() throws ResourceInitializationException {
  return (BaleenCollectionReader)
      CollectionReaderFactory.createReader(
          KafkaTransportReceiver.class,
          TypeSystemSingleton.getTypeSystemDescriptionInstance(),
          SharedKafkaResource.RESOURCE_KEY,
          erd,
          KEY_CONTENT_EXTRACTOR,
          ExternalResourceFactory.createNamedResourceDescription(
              KEY_CONTENT_EXTRACTOR, FakeBaleenContentExtractor.class));
}
 
Example #26
Source File: MemoryTransportsTest.java    From baleen with Apache License 2.0 5 votes vote down vote up
private MemoryTransportReceiver createReciever() throws ResourceInitializationException {
  return (MemoryTransportReceiver)
      CollectionReaderFactory.createReader(
          MemoryTransportReceiver.class,
          TypeSystemSingleton.getTypeSystemDescriptionInstance(),
          KEY_CONTENT_EXTRACTOR,
          ExternalResourceFactory.createNamedResourceDescription(
              KEY_CONTENT_EXTRACTOR, FakeBaleenContentExtractor.class),
          SharedMemoryQueueResource.RESOURCE_KEY,
          erd);
}
 
Example #27
Source File: AbstractReaderTest.java    From baleen with Apache License 2.0 5 votes vote down vote up
protected BaleenCollectionReader getCollectionReader(Object... args)
    throws ResourceInitializationException {
  Object[] argumentWithExtractor =
      ImmutableList.builder()
          .add(KEY_CONTENT_EXTRACTOR)
          .add(contentExtractor)
          .addAll(Arrays.asList(args))
          .build()
          .toArray();
  return (BaleenCollectionReader)
      CollectionReaderFactory.createReader(
          readerClass,
          TypeSystemSingleton.getTypeSystemDescriptionInstance(),
          argumentWithExtractor);
}
 
Example #28
Source File: InformationExtraction2Postgres.java    From newsleak with GNU Affero General Public License v3.0 5 votes vote down vote up
/**
 * Gets the UIMA reader according to the current configuration.
 *
 * @param type
 *            The reader type (e.g. "csv" for externally preprocessed fulltexts
 *            and metadata, or "hoover" for the Hoover text extraction system)
 * @return the reader
 * @throws ResourceInitializationException
 *             the resource initialization exception
 */
public CollectionReaderDescription getReader(String type) throws ResourceInitializationException {
	CollectionReaderDescription reader = null;
	if (type.equals("csv")) {
		reader = CollectionReaderFactory.createReaderDescription(NewsleakCsvStreamReader.class, this.typeSystem,
				NewsleakCsvStreamReader.PARAM_DOCUMENT_FILE, this.documentFile,
				NewsleakCsvStreamReader.PARAM_METADATA_FILE, this.metadataFile,
				NewsleakCsvStreamReader.PARAM_INPUTDIR, this.dataDirectory,
				NewsleakCsvStreamReader.PARAM_DEFAULT_LANG, this.defaultLanguage,
				NewsleakReader.PARAM_DEBUG_MAX_DOCS, this.debugMaxDocuments, NewsleakReader.PARAM_MAX_DOC_LENGTH,
				this.maxDocumentLength);
	} else if (type.equals("hoover")) {
		this.metadataFile = this.hooverTmpMetadata;
		ExternalResourceDescription hooverResource = ExternalResourceFactory.createExternalResourceDescription(
				HooverResource.class, HooverResource.PARAM_HOST, this.hooverHost, HooverResource.PARAM_CLUSTERNAME,
				this.hooverClustername, HooverResource.PARAM_INDEX, this.hooverIndex, HooverResource.PARAM_PORT,
				this.hooverPort, HooverResource.PARAM_SEARCHURL, this.hooverSearchUrl);
		reader = CollectionReaderFactory.createReaderDescription(HooverElasticsearchReader.class, this.typeSystem,
				HooverElasticsearchReader.RESOURCE_HOOVER, hooverResource,
				HooverElasticsearchReader.RESOURCE_METADATA, this.getMetadataResourceDescription(),
				NewsleakReader.PARAM_DEBUG_MAX_DOCS, this.debugMaxDocuments, NewsleakReader.PARAM_MAX_DOC_LENGTH,
				this.maxDocumentLength);
	} else {
		this.logger.log(Level.SEVERE, "Unknown reader type: " + type);
		System.exit(1);
	}
	return reader;
}
 
Example #29
Source File: CompressedXmiReader.java    From argument-reasoning-comprehension-task with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args)
        throws Exception
{
    String in = "/tmp/out.tar.gz";
    SimplePipeline.runPipeline(
            CollectionReaderFactory.createReaderDescription(
                    CompressedXmiReader.class,
                    CompressedXmiReader.PARAM_SOURCE_LOCATION, in
            )
    );
}
 
Example #30
Source File: RunExperiment.java    From uima-uimafit with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws UIMAException, IOException {
  // Choosing different location depending on whether we are in the actual uimaFIT source tree
  // or in the extracted examples from the binary distribution.
  String samplePosFileName;
  if (new File("src/main/resources").exists()) {
      samplePosFileName = "src/main/resources/org/apache/uima/fit/examples/pos/sample-gold.txt";
  }
  else {
      samplePosFileName = "src/org/apache/uima/fit/examples/pos/sample-gold.txt";
  }

  // The lineReader simply copies the lines from the input file into the
  // default view - one line per CAS
  CollectionReader lineReader = CollectionReaderFactory.createReader(LineReader.class,
          LineReader.PARAM_INPUT_FILE, samplePosFileName);

  AggregateBuilder builder = new AggregateBuilder();

  // The goldTagger parses the data in the default view into Token objects
  // along with their part-of-speech tags which will be added to the
  // GOLD_VIEW
  AnalysisEngineDescription goldTagger = AnalysisEngineFactory.createEngineDescription(
          GoldTagger.class);
  builder.add(goldTagger);

  // The textCopier creates the SYSTEM_VIEW and set the text of this view
  // to that of the text found in GOLD_VIEW
  AnalysisEngineDescription textCopier = AnalysisEngineFactory.createEngineDescription(
          ViewTextCopierAnnotator.class,
          ViewTextCopierAnnotator.PARAM_SOURCE_VIEW_NAME, ViewNames.GOLD_VIEW,
          ViewTextCopierAnnotator.PARAM_DESTINATION_VIEW_NAME, ViewNames.SYSTEM_VIEW);
  builder.add(textCopier);

  // The sentenceAndTokenCopier copies Token and Sentence annotations in
  // the GOLD_VIEW into the SYSTEM_VIEW
  AnalysisEngineDescription sentenceAndTokenCopier = AnalysisEngineFactory
          .createEngineDescription(SentenceAndTokenCopier.class);
  builder.add(sentenceAndTokenCopier, ViewNames.VIEW1, ViewNames.GOLD_VIEW, ViewNames.VIEW2,
          ViewNames.SYSTEM_VIEW);

  // The baselineTagger is run on the SYSTEM_VIEW
  AnalysisEngineDescription baselineTagger = AnalysisEngineFactory.createEngineDescription(
          BaselineTagger.class);
  builder.add(baselineTagger, CAS.NAME_DEFAULT_SOFA, ViewNames.SYSTEM_VIEW);

  // The evaluator will compare the part-of-speech tags in the SYSTEM_VIEW
  // with those in the GOLD_VIEW
  AnalysisEngineDescription evaluator = AnalysisEngineFactory.createEngineDescription(
          Evaluator.class);
  builder.add(evaluator);

  // The xWriter writes out the contents of each CAS (one per sentence) to
  // an XMI file. It is instructive to open one of these
  // XMI files in the CAS Visual Debugger and look at the contents of each
  // view.
  AnalysisEngineDescription xWriter = AnalysisEngineFactory.createEngineDescription(
          XmiWriter.class, XmiWriter.PARAM_OUTPUT_DIRECTORY, "target/examples/pos/xmi");
  builder.add(xWriter);

  // runs the collection reader and the aggregate AE.
  SimplePipeline.runPipeline(lineReader, builder.createAggregate());
}