org.apache.uima.collection.CollectionReaderDescription Java Exaples

Source File: FormatSupportDescription.java From webanno with Apache License 2.0

6 votes

@SuppressWarnings("unchecked")
@Override
public CollectionReaderDescription getReaderDescription(TypeSystemDescription aTSD)
    throws ResourceInitializationException
{
    if (!isReadable()) {
        throw new UnsupportedOperationException("The format [" + getName() + "] cannot be read");
    }
    
    Class<? extends CollectionReader> readerClazz;
    try {
        readerClazz = (Class<? extends CollectionReader>) Class.forName(readerClass);
    }
    catch (ClassNotFoundException e) {
        throw new ResourceInitializationException(e);
    }
    
    return createReaderDescription(readerClazz, aTSD);
}

Source File: UimaFactoryInjectionTest.java From uima-uimafit with Apache License 2.0

6 votes

private static void initUimaApplicationContext(final ApplicationContext aApplicationContext) {
  new UIMAFramework_impl() {
    {
      CompositeResourceFactory_impl factory = (CompositeResourceFactory_impl) getResourceFactory();
      factory.registerFactory(CasConsumerDescription.class,
              aApplicationContext.getBean(CasConsumerFactory_impl.class));
      factory.registerFactory(CasInitializerDescription.class,
              aApplicationContext.getBean(CasInitializerFactory_impl.class));
      factory.registerFactory(CollectionReaderDescription.class,
              aApplicationContext.getBean(CollectionReaderFactory_impl.class));
      factory.registerFactory(ResourceCreationSpecifier.class,
              aApplicationContext.getBean(AnalysisEngineFactory_impl.class));
      factory.registerFactory(CustomResourceSpecifier.class,
              aApplicationContext.getBean(CustomResourceFactory_impl.class));
    }
  };
}

Source File: CollectionReaderDescription_implTest.java From uima-uimaj with Apache License 2.0

6 votes

public void testXMLization() throws Exception {
  try {
    // write objects to XML
    StringWriter writer = new StringWriter();
    mTestDesc.toXML(writer);
    String testDescXml = writer.getBuffer().toString();
    // System.out.println(testDescXml);

    // parse objects from XML (no schema validation)
    InputStream is = new ByteArrayInputStream(testDescXml.getBytes(encoding));
    CollectionReaderDescription newDesc = (CollectionReaderDescription) UIMAFramework
            .getXMLParser().parse(new XMLInputSource(is, null));

    // compare
    Assert.assertEquals(mTestDesc, newDesc);
  } catch (Exception e) {
    JUnitExtension.handleException(e);
  }
}

Source File: CasMultiplierTest.java From uima-uimafit with Apache License 2.0

6 votes

@Ignore("UIMA-3470 not fixed yet")
@Test
public void testIteratePipelineOnText() throws Exception {
  CollectionReaderDescription reader = createReaderDescription(Reader.class);
  
  AnalysisEngineDescription incrementor = createEngineDescription(TextIncrementor.class);
  
  AnalysisEngineDescription consumer = createEngineDescription(Consumer.class);
  
  int expectedResult = 4;
  for (JCas jcas : iteratePipeline(reader, incrementor, incrementor, incrementor, consumer)) {
    assertEquals(expectedResult, Consumer.textResult);
    assertEquals(expectedResult, Integer.parseInt(jcas.getDocumentText()));
    expectedResult++;
  }
}

Source File: AggregateCollectionReader.java From bluima with Apache License 2.0

6 votes

public AggregateCollectionReader(List<CollectionReader> readers,
    TypeSystemDescription tsd) {
try {
    CollectionReaderDescription crd = CollectionReaderFactory
	    .createReaderDescription(AggregateCollectionReader.class, tsd);
    ResourceMetaData metaData = crd.getMetaData();
    ConfigurationParameterSettings paramSettings = metaData
	    .getConfigurationParameterSettings();
    Map<String, Object> additionalParameters = new HashMap<String, Object>();
    additionalParameters
	    .put(CollectionReader.PARAM_CONFIG_PARAM_SETTINGS,
		    paramSettings);
    initialize(crd, additionalParameters);

    this.readers = readers;
    this.readerIterator = this.readers.iterator();
    currentReader = this.readerIterator.next();
} catch (ResourceInitializationException rie) {
    throw new RuntimeException(rie);
}
   }

Source File: Conll2003AidaReaderTest.java From ambiverse-nlu with Apache License 2.0

6 votes

private void callReader(int begin, int end) throws NoSuchMethodException, MissingSettingException, IOException, ClassNotFoundException, UIMAException {
    CollectionReaderDescription readerDescription = Reader.getCollectionReaderDescription(Reader.COLLECTION_FORMAT.AIDA,
            PARAM_SOURCE_LOCATION, "src/test/resources/ner/test_collections/",
            PARAM_PATTERNS, "CoNLL-YAGO_ext_small_en.tsv", // 5 docs total
            PARAM_LANGUAGE, "en",
            PARAM_SINGLE_FILE, true,
            PARAM_FIRSTDOCUMENT, begin,
            PARAM_LASTDOCUMENT, end,
            PARAM_ORDER, OrderType.WORD_POS_POSITION_MENTION_ENTITY_TYPE
    );


    SimplePipeline.runPipeline(readerDescription, AnalysisEngineFactory.createEngineDescription(CasDumpWriter.class,
            PARAM_OUTPUT_FILE, "casdump.txt"));


}

Source File: CasMultiplierTest.java From uima-uimafit with Apache License 2.0

6 votes

/**
 * Simulates a CPE with CAS multipliers that always read one CAS and always produce one CAS.
 * It actually appears to work despite CPE not supporting CAS multipliers.
 */
@SuppressWarnings("javadoc")
@Test
public void testRunPipeline() throws Exception {
  CollectionReaderDescription reader = createReaderDescription(Reader.class);
  
  AnalysisEngineDescription incrementor = createEngineDescription(Incrementor.class);
  
  AnalysisEngineDescription consumer = createEngineDescription(Consumer.class);
  
  AnalysisEngineDescription aggregate = createEngineDescription(incrementor, incrementor,
          incrementor, consumer);
  
  runPipeline(reader, aggregate);
  
  // The order in which the consumer sees the CASes is arbitrary, in particular because we never
  // tell the CPE that the aggregate which contains the consumer cannot be scaled out.
  assertFalse(aggregate.getAnalysisEngineMetaData().getOperationalProperties()
          .isMultipleDeploymentAllowed());
  Collections.sort(Consumer.result);
  
  assertEquals(asList(4,5,6,7,8,9,10,11,12,13), Consumer.result);
}

Source File: JCasIterator.java From uima-uimafit with Apache License 2.0

6 votes

/**
 * Iterate over the documents loaded by the given reader, running the analysis engines on each
 * one before yielding them. By default, components <b>DO get</b> life-cycle events, such as
 * collectionProcessComplete or destroy when this constructor is used.
 * 
 * @param aResMgr
 *          The {@link ResourceManager} used to create the components and the JCas. If this
 *          parameter is {@code null} then {@link ResourceManagerFactory#newResourceManager()}
 *          will be used to obtain a resource manager. If a new resource manager was internally
 *          created, it is destroyed at the end of the pipeline (if {@link #isSelfDestroy()}).
 * @param aReader
 *          The CollectionReader for loading documents.
 * @param aEngines
 *          The AnalysisEngines for processing documents.
 * @throws ResourceInitializationException
 *           if a failure occurs during initialization of the components
 * @throws CASException
 *           if the JCas could not be initialized
 */
public JCasIterator(final ResourceManager aResMgr, final CollectionReaderDescription aReader,
        final AnalysisEngineDescription... aEngines)
        throws CASException, ResourceInitializationException {
  selfComplete = true;
  selfDestroy = true;
  
  if (aResMgr == null) {
    resMgr = newResourceManager();
    resourceManagerCreatedInternally = true;
  }
  else {
    resMgr = aResMgr;
    resourceManagerCreatedInternally = false;
  }
  
  collectionReader = produceCollectionReader(aReader, resMgr, null);

  analysisEngines = new AnalysisEngine[] {
      produceAnalysisEngine(createEngineDescription(aEngines), resMgr, null) };
  
  jCas = createCas(resMgr, collectionReader, analysisEngines);
  collectionReader.typeSystemInit(jCas.getTypeSystem());
}

Source File: CpePipeline.java From uima-uimafit with Apache License 2.0

5 votes

/**
 * Run the CollectionReader and AnalysisEngines as a multi-threaded pipeline.
 * 
 * @param parallelism
 *          Number of threads to use when running the analysis engines in the CPE.
 * @param readerDesc
 *          The CollectionReader that loads the documents into the CAS.
 * @param descs
 *          Primitive AnalysisEngineDescriptions that process the CAS, in order. If you have a mix
 *          of primitive and aggregate engines, then please create the AnalysisEngines yourself
 *          and call the other runPipeline method.
 * @throws SAXException
 *           if there was a XML-related problem materializing the component descriptors that are
 *           referenced from the CPE descriptor
 * @throws IOException
 *           if there was a I/O-related problem materializing the component descriptors that are
 *           referenced from the CPE descriptor
 * @throws CpeDescriptorException
 *           if there was a problem configuring the CPE descriptor
 * @throws ResourceInitializationException 
 *           if there was a problem initializing or running the CPE.
 * @throws InvalidXMLException 
 *           if there was a problem initializing or running the CPE.
 * @throws AnalysisEngineProcessException 
 *           if there was a problem running the CPE.
 */
public static void runPipeline(final int parallelism,
        final CollectionReaderDescription readerDesc, final AnalysisEngineDescription... descs)
        throws SAXException, CpeDescriptorException, IOException, ResourceInitializationException,
        InvalidXMLException, AnalysisEngineProcessException {
  // Create AAE
  final AnalysisEngineDescription aaeDesc = createEngineDescription(descs);

  CpeBuilder builder = new CpeBuilder();
  builder.setReader(readerDesc);
  builder.setAnalysisEngine(aaeDesc);
  builder.setMaxProcessingUnitThreadCount(Runtime.getRuntime().availableProcessors() - 1);

  StatusCallbackListenerImpl status = new StatusCallbackListenerImpl();
  CollectionProcessingEngine engine = builder.createCpe(status);

  engine.process();
  try {
    synchronized (status) {
      while (status.isProcessing) {
        status.wait();
      }
    }
  } catch (InterruptedException e) {
    // Do nothing
  }

  if (status.exceptions.size() > 0) {
    throw new AnalysisEngineProcessException(status.exceptions.get(0));
  }
}

Source File: MultiPageEditor.java From uima-uimaj with Apache License 2.0

5 votes

/**
 * Link local processing descriptors from ae.
 *
 * @param d the d
 */
// **************************************************************
private void linkLocalProcessingDescriptorsFromAe(CollectionReaderDescription d) {
  d.setImplementationName(aeDescription.getAnnotatorImplementationName());
  d.setFrameworkImplementation(aeDescription.getFrameworkImplementation());
  linkCommonCollectionDescriptorsFromAe(d);
}

Source File: Conll2003ReaderTcBmeow.java From ambiverse-nlu with Apache License 2.0

5 votes

@Override
public void initialize(UimaContext aContext) throws ResourceInitializationException {
    super.initialize(aContext);

    Object[] configurationParams = getConfigurationParams(aContext);
    CollectionReaderDescription readerDescription = createReaderDescription(readerClassName,
            configurationParams);
    reader = UIMAFramework.produceCollectionReader(readerDescription, getResourceManager(), null);
}

Source File: XMLParser_impl.java From uima-uimaj with Apache License 2.0

5 votes

public CollectionReaderDescription parseCollectionReaderDescription(XMLInputSource aInput,
        ParsingOptions aOptions) throws InvalidXMLException {
  // attempt to locate resource specifier schema
  XMLizable object = parse(aInput, RESOURCE_SPECIFIER_NAMESPACE, SCHEMA_URL, aOptions);

  if (object instanceof CollectionReaderDescription) {
    return (CollectionReaderDescription) object;
  } else {
    throw new InvalidXMLException(InvalidXMLException.INVALID_CLASS, new Object[] {
        CollectionReaderDescription.class.getName(), object.getClass().getName() });
  }
}

Source File: ManualEvaluation.java From ambiverse-nlu with Apache License 2.0

5 votes

private static void evaluateTrainedFile(String fileName) throws IOException, UIMAException {
        Path directoryPath;
        String trainedFile = directory + fileName;
        if (singleLabelling) {
            directoryPath = Paths.get(directory, fileName + "-singleLabel-evaluation");

            Path trainedPath = generateSingleLabeledFile(trainedFile);
            trainedFile = trainedPath.toString();
        } else {
            directoryPath = Paths.get(trainedFile + "-evaluation");
        }

        Files.createDirectory(directoryPath);
//         produce jcas with trained file
        CollectionReaderDescription reader = createReaderDescription(Conll2003AidaReader.class,
                PARAM_LANGUAGE, language,
                Conll2003AidaReader.PARAM_SINGLE_FILE, true,
                Conll2003AidaReader.PARAM_ORDER, WORD_POSITION_TYPE,
                PARAM_SOURCE_LOCATION, trainedFile,
                Conll2003AidaReader.PARAM_MANUAL_TOKENS_NER, false,
                Conll2003AidaReader.PARAM_NAMED_ENTITY_PER_TOKEN, true);

        AnalysisEngineDescription mentionSpansWriter = createEngineDescription(MentionSpansEvaluationWriter.class,
                MentionSpansEvaluationWriter.PARAM_OUTPUT_FILE, directoryPath.toString() + "/ManualSpanEvaluation.txt");

        System.out.println("Running mention spans evaluation");
        SimplePipeline.runPipeline(reader, manualAnnotatorPerMentionDescription, nerMentionAnnotatorDescription, mentionSpansWriter);

        AnalysisEngineDescription predictionsWriter = createEngineDescription(PredictionsWriter.class,
                PredictionsWriter.PARAM_LANGUAGE, language,
                PredictionsWriter.PARAM_MENTION_OUTPUT_FILE, directoryPath.toString() + "/ConllMentionEvaluation.txt",
                PredictionsWriter.PARAM_TOKEN_OUTPUT_FILE, directoryPath.toString() + "/ConllTokenEvaluation.txt",
                PredictionsWriter.PARAM_KNOW_NER, true,
                PredictionsWriter.PARAM_POSITION_TYPE,  ConllEvaluation.TrainedPositionType.ORIGINAL);

        System.out.println("Running tokens and mentions evaluation");
        SimplePipeline.runPipeline(reader, manualAnnotatorPerTokenDescription, predictionsWriter);
    }

Source File: CollectionReaderDescription_implTest.java From uima-uimaj with Apache License 2.0

5 votes

public void testSerialization() throws Exception {
  try {
    // serialize objects to byte array

    byte[] testDescBytes = SerializationUtils.serialize(mTestDesc);

    // deserialize
    CollectionReaderDescription newDesc = (CollectionReaderDescription) SerializationUtils
            .deserialize(testDescBytes);

    Assert.assertEquals(mTestDesc, newDesc);
  } catch (Exception e) {
    JUnitExtension.handleException(e);
  }
}

Source File: ExternalResourceFactory.java From uima-uimafit with Apache License 2.0

5 votes

/**
 * Convenience method to set the external resource dependencies on a resource specifier.
 * Unfortunately different methods need to be used for different sub-classes.
 * 
 * @throws IllegalArgumentException
 *           if the sub-class passed is not supported.
 */
private static void setResourceDependencies(ResourceSpecifier aDesc,
        ExternalResourceDependency[] aDependencies) {
  if (aDesc instanceof CollectionReaderDescription) {
    ((CollectionReaderDescription) aDesc).setExternalResourceDependencies(aDependencies);
  } else if (aDesc instanceof AnalysisEngineDescription) {
    ((AnalysisEngineDescription) aDesc).setExternalResourceDependencies(aDependencies);
  } else {
    throw new IllegalArgumentException(
            "Resource specified cannot have external resource dependencies");
  }
}

Source File: SparkUimaUtils.java From ambiverse-nlu with Apache License 2.0

5 votes

public static void createSequenceFile(Object[] params, String uri)
    throws URISyntaxException, IOException, UIMAException, NoSuchMethodException, MissingSettingException, ClassNotFoundException {
  Configuration conf = new Configuration();
  Path path = new Path(uri);
  Writer writer =
      SequenceFile.createWriter(
          conf, Writer.file(path),
          Writer.keyClass(Text.class),
          Writer.valueClass(SCAS.class));

  int count = 0;

  CollectionReaderDescription readerDescription = Reader.getCollectionReaderDescription(Reader.COLLECTION_FORMAT.NYT, params);
  for (JCas jCas : SimplePipelineCasPoolIterator.iteratePipeline(20, readerDescription)) {
      if(JCasUtil.exists(jCas, DocumentMetaData.class)) {
        ++count;
        // Get the ID.
        DocumentMetaData dmd = JCasUtil.selectSingle(jCas, DocumentMetaData.class);
        String docId = "NULL";
        if (dmd != null) {
          docId = dmd.getDocumentId();
        } else {
          throw new IOException("No Document ID for xml: " + jCas.getView("xml").getDocumentText());
        }
        Text docIdText = new Text(docId);
        SCAS scas = new SCAS(jCas.getCas());
        writer.append(docIdText, scas);
      }
      jCas.release();
  }
  logger.info("Wrote " + count + " documents to " + uri);
  IOUtils.closeStream(writer);
}

Source File: ExternalResourceFactory.java From uima-uimafit with Apache License 2.0

5 votes

/**
 * Convenience method to get the external resource dependencies from a resource specifier.
 * Unfortunately different methods need to be used for different sub-classes.
 * 
 * @throws IllegalArgumentException
 *           if the sub-class passed is not supported.
 */
private static ExternalResourceDependency[] getResourceDependencies(
        ResourceSpecifier aDesc) {
  if (aDesc instanceof CollectionReaderDescription) {
    return ((CollectionReaderDescription) aDesc).getExternalResourceDependencies();
  } else if (aDesc instanceof AnalysisEngineDescription) {
    return ((AnalysisEngineDescription) aDesc).getExternalResourceDependencies();
  } else {
    throw new IllegalArgumentException(
            "Resource specified cannot have external resource dependencies");
  }
}

Source File: CpeBuilder.java From bluima with Apache License 2.0

5 votes

@Override
public void setReader(CollectionReaderDescription aDesc)
        throws IOException, SAXException, CpeDescriptorException {
    // Remove all collection readers
    cpeDesc.setAllCollectionCollectionReaders(new CpeCollectionReader[0]);
    URL descUrl = materializeDescriptor(aDesc).toURI().toURL();
    CpeCollectionReader reader = produceCollectionReader(descUrl.toString());
    cpeDesc.addCollectionReader(reader);
}

Source File: MultiPageEditor.java From uima-uimaj with Apache License 2.0

5 votes

/**
 * Creates the and link local processing descriptors to ae.
 *
 * @param d the d
 * @throws ResourceInitializationException the resource initialization exception
 */
private void createAndLinkLocalProcessingDescriptorsToAe(CollectionReaderDescription d)
        throws ResourceInitializationException {
  aeDescription = UIMAFramework.getResourceSpecifierFactory().createAnalysisEngineDescription();
  aeDescription.setAnnotatorImplementationName(d.getImplementationName());
  aeDescription.setFrameworkImplementation(d.getFrameworkImplementation());
  linkLocalProcessingDescriptorsToAe(d);
}

Source File: CpeBuilder.java From bluima with Apache License 2.0

5 votes

/** use default ctor and setters instead */
@Deprecated
public CpeBuilder(int aMaxProcessingUnitThreatCount,
        CollectionReaderDescription aDesc) throws IOException,
        SAXException, CpeDescriptorException {
    setReader(aDesc);
    setMaxProcessingUnitThreatCount(aMaxProcessingUnitThreatCount);
}

Source File: UimaHelpers.java From biomedicus with Apache License 2.0

5 votes

public static CollectionReaderDescription loadCollectionReaderDescription(Path path)
    throws BiomedicusException {
  CollectionReaderDescription collectionReaderDescription;
  try {
    XMLInputSource aInput = new XMLInputSource(path.toFile());
    collectionReaderDescription = UIMAFramework.getXMLParser()
        .parseCollectionReaderDescription(aInput);
  } catch (IOException | InvalidXMLException e) {
    throw new BiomedicusException(e);
  }
  return collectionReaderDescription;
}

Source File: CollectionReaderFactory_implTest.java From uima-uimaj with Apache License 2.0

5 votes

public void testInvalidFrameworkImplementation() {
  CollectionReaderDescription desc = new CollectionReaderDescription_impl();
  desc.setFrameworkImplementation("foo");    
  try {
    ccFactory.produceResource(CollectionReader.class, desc, Collections.EMPTY_MAP);
    fail();
  } catch (ResourceInitializationException e) {
    assertNotNull(e.getMessage());
    assertFalse(e.getMessage().startsWith("EXCEPTION MESSAGE LOCALIZATION FAILED"));
    assertEquals(e.getMessageKey(), ResourceInitializationException.UNSUPPORTED_FRAMEWORK_IMPLEMENTATION);
  }
}

Source File: CasDataCollectionReader_ImplBase.java From uima-uimaj with Apache License 2.0

5 votes

/**
 * Called by the framework to initialize this Collection Reader. Subclasses should generally NOT
 * override this method; instead they should override the zero-argument {@link #initialize()}
 * method and access metadata via the {@link #getProcessingResourceMetaData()} method. This method
 * is non-final only for legacy reasons.
 * 
 * @see org.apache.uima.resource.Resource#initialize(org.apache.uima.resource.ResourceSpecifier,
 *      java.util.Map)
 */
public boolean initialize(ResourceSpecifier aSpecifier, Map<String, Object> aAdditionalParams)
        throws ResourceInitializationException {
  // aSpecifier must be a CollectionReaderDescription
  if (aSpecifier instanceof CollectionReaderDescription) {
    // do framework intitialiation
    if (super.initialize(aSpecifier, aAdditionalParams)) {
      // do user initialization
      initialize();
      return true;
    }
  }
  return false;
}

Source File: CollectionReaderFactoryTest.java From uima-uimafit with Apache License 2.0

5 votes

@Test
public void thatCreateReaderDescriptorAutoDetectionWorks() throws Exception
{
  CollectionReaderDescription aed = createReaderDescription(TestCR.class);
  
  TypeSystemDescription tsd = createTypeSystemDescription();
  assertThat(tsd.getType(Token.class.getName()))
      .as("Token type auto-detection")
      .isNotNull();
  assertThat(tsd.getType(Sentence.class.getName()))
      .as("Sentence type auto-detection")
      .isNotNull();
  assertThat(tsd.getType(AnalyzedText.class.getName()))
      .as("AnalyzedText type auto-detection")
      .isNotNull();

  TypePriorityList[] typePrioritiesLists = typePriorities.getPriorityLists();
  assertThat(typePrioritiesLists.length).isEqualTo(1);
  assertThat(typePrioritiesLists[0].getTypes())
      .as("Type priorities auto-detection")
      .containsExactly(Sentence.class.getName(), AnalyzedText.class.getName(), Token.class.getName());

  FsIndexDescription[] indexes = aed.getCollectionReaderMetaData().getFsIndexCollection().getFsIndexes();
  assertThat(indexes.length).isEqualTo(1);
  assertThat(indexes[0])
      .extracting(FsIndexDescription::getLabel, FsIndexDescription::getTypeName, FsIndexDescription::getKind)
      .containsExactly("Automatically Scanned Index", Token.class.getName(), FsIndexDescription.KIND_SORTED);
}

Source File: CollectionReaderFactoryTest.java From uima-uimafit with Apache License 2.0

5 votes

@Test
public void testResourceMetaData() throws Exception
{
  CollectionReaderDescription desc = CollectionReaderFactory
          .createReaderDescription(TestCR.class);
  
  org.apache.uima.resource.metadata.ResourceMetaData meta = desc.getMetaData();
  
  assertEquals("dummy", meta.getName());
  assertEquals("1.0", meta.getVersion());
  assertEquals("Just a dummy", meta.getDescription());
  assertEquals("ASL 2.0", meta.getCopyright());
  assertEquals("uimaFIT", meta.getVendor());
}

Source File: DescriptorMakeUtil.java From uima-uimaj with Apache License 2.0

5 votes

public static String makeCollectionReader(String descFileName, boolean shouldCrash,
        String functionName, int errorCount, String exceptionName, int documentCount)
        throws Exception {

  XMLInputSource in = new XMLInputSource(descFileName);
  CollectionReaderDescription crd = UIMAFramework.getXMLParser()
          .parseCollectionReaderDescription(in);
  crd.getCollectionReaderMetaData().getConfigurationParameterSettings().setParameterValue(
          "DocumentCount", documentCount);
  // set the function to crash, if desired
  if (shouldCrash) {
    crd.getCollectionReaderMetaData().getConfigurationParameterSettings().setParameterValue(
            "ErrorFunction", functionName);
    crd.getCollectionReaderMetaData().getConfigurationParameterSettings().setParameterValue(
            "ErrorCount", errorCount);
    crd.getCollectionReaderMetaData().getConfigurationParameterSettings().setParameterValue(
            "ErrorException", exceptionName);
  }
  File baseDir = JUnitExtension.getFile("CpmTests" + FS + "CpeDesc");

  if (!baseDir.exists()) {
    baseDir.mkdir();
  }

  File tmpFileName = new File(baseDir, "TmpCollectionReader.xml");
  OutputStream out = new FileOutputStream(tmpFileName);
  serializeDescriptor(crd, out);
  return tmpFileName.getAbsolutePath();
}

Source File: Conll2012FormatSupport.java From webanno with Apache License 2.0

5 votes

@Override
public CollectionReaderDescription getReaderDescription(TypeSystemDescription aTSD)
    throws ResourceInitializationException
{
    return createReaderDescription(Conll2012Reader.class, aTSD,
            // Constituents are not supported by WebAnno and trying to read a file which does
            // not have them triggers an NPE in DKPro Core 1.11.0
            Conll2012Reader.PARAM_READ_CONSTITUENT, false);
}

Source File: XmiFormatSupport.java From webanno with Apache License 2.0

5 votes

@Override
public CollectionReaderDescription getReaderDescription(TypeSystemDescription aTSD)
    throws ResourceInitializationException
{
    return createReaderDescription(XmiReader.class,
            XmiReader.PARAM_LENIENT, true);
}

Source File: AggregateWithReaderTest.java From uima-uimafit with Apache License 2.0

5 votes

/**
 * Demo of running a collection reader as part of an aggregate engine. This allows to run a
 * pipeline an access the output CASes directly - no need to write the data to files.
 */
@Test
public void demoAggregateWithReader() throws UIMAException {
  ResourceSpecifierFactory factory = UIMAFramework.getResourceSpecifierFactory();

  CollectionReaderDescription reader = factory.createCollectionReaderDescription();
  reader.getMetaData().setName("reader");
  reader.setImplementationName(SimpleReader.class.getName());

  AnalysisEngineDescription analyzer = factory.createAnalysisEngineDescription();
  analyzer.getMetaData().setName("analyzer");
  analyzer.setPrimitive(true);
  analyzer.setImplementationName(SimpleAnalyzer.class.getName());

  FixedFlow flow = factory.createFixedFlow();
  flow.setFixedFlow(new String[] { "reader", "analyzer" });

  AnalysisEngineDescription aggregate = factory.createAnalysisEngineDescription();
  aggregate.getMetaData().setName("aggregate");
  aggregate.getAnalysisEngineMetaData().setFlowConstraints(flow);
  aggregate.getAnalysisEngineMetaData().getOperationalProperties().setOutputsNewCASes(true);
  aggregate.getAnalysisEngineMetaData().getOperationalProperties()
          .setMultipleDeploymentAllowed(false);
  aggregate.setPrimitive(false);
  aggregate.getDelegateAnalysisEngineSpecifiersWithImports().put("reader", reader);
  aggregate.getDelegateAnalysisEngineSpecifiersWithImports().put("analyzer", analyzer);

  AnalysisEngine pipeline = UIMAFramework.produceAnalysisEngine(aggregate);
  CasIterator iterator = pipeline.processAndOutputNewCASes(pipeline.newCAS());
  while (iterator.hasNext()) {
    CAS cas = iterator.next();
    System.out.printf("[%s] is [%s]%n", cas.getDocumentText(), cas.getDocumentLanguage());
  }
}

Source File: TeiReaderTest.java From webanno with Apache License 2.0

5 votes

@Test
 @Ignore("No TEI yet to opensource ")
public void testTeiReader()
    throws Exception
{
    CollectionReaderDescription reader = createReaderDescription(TeiReader.class,
            TeiReader.PARAM_LANGUAGE, "en", TeiReader.PARAM_SOURCE_LOCATION,
            "classpath:/local/", TeiReader.PARAM_PATTERNS, new String[] { "[+]*.xml" });

    String firstSentence = "70 I DAG.";

    for (JCas jcas : new JCasIterable(reader)) {
        DocumentMetaData meta = DocumentMetaData.get(jcas);
        String text = jcas.getDocumentText();
        System.out.printf("%s - %d%n", meta.getDocumentId(), text.length());
        System.out.println(jcas.getDocumentLanguage());

        assertEquals(2235, JCasUtil.select(jcas, Token.class).size());
        assertEquals(745, JCasUtil.select(jcas, POS.class).size());
        assertEquals(745, JCasUtil.select(jcas, Lemma.class).size());
        assertEquals(0, JCasUtil.select(jcas, NamedEntity.class).size());
        assertEquals(30, JCasUtil.select(jcas, Sentence.class).size());

        assertEquals(firstSentence, JCasUtil.select(jcas, Sentence.class).iterator().next()
                .getCoveredText());
    }

}

org.apache.uima.collection.CollectionReaderDescription Java Examples