org.apache.uima.cas.CAS#setDocumentLanguage

Source File: Conll2003AidaReader.java From ambiverse-nlu with Apache License 2.0

6 votes

@Override
    protected void initCas(CAS aCas, Resource aResource) {
        try {
            // Set the document metadata
            DocumentMetaData docMetaData = DocumentMetaData.create(aCas);
            docMetaData.setLanguage(language);
//      docMetaData.setDocumentTitle(new File(aResource.getPath()).getName());
//      docMetaData.setDocumentUri(aResource.getResolvedUri().toString() + qualifier);
//      docMetaData.setDocumentId("doc id");
//      if (aResource.getBase() != null) {
//        docMetaData.setDocumentBaseUri(aResource.getResolvedBase());
//        docMetaData.setCollectionId(aResource.getResolvedBase());
//      }

            // Set the document language
            aCas.setDocumentLanguage(language);
        } catch (CASException e) {
            // This should not happen.
            throw new RuntimeException(e);
        }
    }

Source File: SerDesTest6.java From uima-uimaj with Apache License 2.0

6 votes

public void testDocText() {
  try {
    CAS cas = CasCreationUtils.createCas((TypeSystemDescription) null, null, null);
    cas.setDocumentLanguage("latin");
    cas.setDocumentText("test");

    ByteArrayOutputStream baos = new ByteArrayOutputStream(1024);

    Serialization.serializeWithCompression(cas, baos, cas.getTypeSystem());

    CAS cas2 = CasCreationUtils.createCas((TypeSystemDescription) null, null, null);
    ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
    Serialization.deserializeCAS(cas2, bais);

    assertEquals("latin", cas2.getDocumentLanguage());
    assertEquals("test", cas2.getDocumentText());
  } catch (Exception e) {
    throw new RuntimeException(e);
  }
}

Source File: AnnotatorTester.java From uima-uimaj with Apache License 2.0

6 votes

/**
 * performs a test on the initialized annotator. The specified document is
 * processed with the given language.
 * 
 * @param text
 *           a document text
 * @param language
 *           the document text language
 * @return CAS - results of the analysis
 * @throws Exception passthru
 */
public CAS performTest(String text, String language) throws Exception {
   try {
      // Create a new CAS.
      CAS cas = this.ae.newCAS();
      // Set the document text on the CAS.
      cas.setDocumentText(text);
      cas.setDocumentLanguage(language);
      // Process the sample document.
      this.ae.process(cas);

      return cas;
   } catch (Exception ex) {
      JUnitExtension.handleException(ex);
   }

   return null;

}

Source File: AbstractTermSuiteCollectionReader.java From termsuite-core with Apache License 2.0

6 votes

protected void fillCas(CAS cas, File file) throws IOException, CollectionException {
	String uri = file.toURI().toString();
	SourceDocumentInformation sdi;
	try {
		sdi = new SourceDocumentInformation(cas.getJCas());
		sdi.setUri(uri);
		String text = getDocumentText(file.getAbsolutePath(), this.mEncoding);
		cas.setDocumentLanguage(mLanguage.getCode());
		cas.setDocumentText(preparator.prepare(text));
		sdi.setDocumentSize((int)file.length());
		sdi.setCumulatedDocumentSize(this.currentFileByteSize);
		sdi.setCorpusSize(this.totalFileByteSize);
		sdi.setBegin(0);
		sdi.setEnd(text.length());
		sdi.setOffsetInSource(0);
		sdi.setDocumentIndex(mCurrentIndex);
		sdi.setNbDocuments(this.mFiles.size());
		
		sdi.setLastSegment(mCurrentIndex == mFiles.size() - 1);
		sdi.addToIndexes();
	} catch (CASException e) {
		throw new CollectionException(e);
	}
}

Source File: AnalysisEngineFactoryTest.java From uima-uimafit with Apache License 2.0

6 votes

@Test
public void testPear() throws Exception {
  // Install PEAR package
  PackageBrowser instPear = PackageInstaller.installPackage(
          new File("target/test-output/AnalysisEngineFactoryTest/testPear"), 
          new File("src/test/resources/pear/DateTime.pear"), true);

  // Create analysis engine from the installed PEAR package
  XMLInputSource in = new XMLInputSource(instPear.getComponentPearDescPath());
  PearSpecifier specifier = UIMAFramework.getXMLParser().parsePearSpecifier(in);
  
  AnalysisEngine ae = createEngine(createEngineDescription(specifier));
  
  // Create a CAS with a sample document text and process the CAS   
  CAS cas = ae.newCAS();
  cas.setDocumentText("Sample text to process with a date 05/29/07 and a time 9:45 AM");
  cas.setDocumentLanguage("en");
  ae.process(cas);
}

Source File: XmlDetagger.java From uima-uimaj with Apache License 2.0

5 votes

public void process(CAS aCAS) throws AnalysisEngineProcessException {
    // get handle to CAS view containing XML document
    CAS xmlCas = aCAS.getView("xmlDocument");
    InputStream xmlStream = xmlCas.getSofa().getSofaDataStream();

    // parse with detag handler
    DetagHandler handler = new DetagHandler();
    try {
      SAXParser parser = parserFactory.newSAXParser();
      parser.parse(xmlStream, handler);
    } catch (Exception e) {
      throw new AnalysisEngineProcessException(e);
    }

    // create the plain text view and set its document text
    CAS plainTextView = aCAS.createView("plainTextDocument");
    plainTextView.setDocumentText(handler.getDetaggedText());
    plainTextView.setDocumentLanguage(aCAS.getView("_InitialView").getDocumentLanguage());

    // Index the SourceDocumentInformation object, if there is one, in the new sofa.
    // This is needed by the SemanticSearchCasIndexer
    FeatureStructure sourceDocInfoFs = xmlCas.select(sourceDocInfoType).singleOrNull();
    if (null != sourceDocInfoFs) {
      plainTextView.addFsToIndexes(sourceDocInfoFs);
    }
//    Iterator iter = xmlCas.getAnnotationIndex(sourceDocInfoType).iterator();
//    if (iter.hasNext()) {
//      FeatureStructure sourceDocInfoFs = (FeatureStructure) iter.next();
//      plainTextView.getIndexRepository().addFS(sourceDocInfoFs);
//    }

  }

Source File: PearRuntimeTest.java From uima-uimaj with Apache License 2.0

5 votes

private CAS runDesc(AnalysisEngineDescription desc) throws Exception {
  // Create analysis engine from aggregate ae description
  AnalysisEngine ae = UIMAFramework.produceAnalysisEngine(desc, null, null);
  assertNotNull(ae);

  // Create a CAS with a sample document text and process the CAS
  CAS cas = ae.newCAS();
  cas.setDocumentText("Sample text to process with a date 05/29/07 and a time 9:45 AM and a Room number GN-K35 or two GN-K37");
  cas.setDocumentLanguage("en");
  ae.process(cas);
  
  return cas;
}

Source File: AnnotatorTester.java From uima-uimaj with Apache License 2.0

5 votes

/**
 * does configuration parameter test.
 *
 * @param configDescFilePath the config desc file path
 * @return AnalysisEngine
 * @throws Exception passthru
 */
public static AnalysisEngine doConfigurationTest(String configDescFilePath)
      throws Exception {
   try {
      AnalysisEngine ae = null;
      // Create an XML input source from the specifier file.
      XMLInputSource in = new XMLInputSource(configDescFilePath);
      // Parse the specifier.
      ResourceSpecifier specifier = UIMAFramework.getXMLParser()
            .parseResourceSpecifier(in);
      // Create the Text Analysis Engine.
      ae = UIMAFramework.produceAnalysisEngine(specifier, null, null);

      // Create a new CAS.
      CAS cas = ae.newCAS();
      // Set the document text on the CAS.
      cas
            .setDocumentText("This is a simple text to check if the configuration works");
      cas.setDocumentLanguage("en");
      // Process the sample document.
      ae.process(cas);

      return ae;
   } catch (Exception ex) {
      JUnitExtension.handleException(ex);
   }

   return null;

}

Source File: CasMultiplierTest.java From uima-uimafit with Apache License 2.0

5 votes

@Override
    public void process(CAS aCAS) throws AnalysisEngineProcessException {
      int n = Integer.parseInt(aCAS.getDocumentLanguage());
//      System.out.printf("  In     : %s%n", aCAS.getDocumentLanguage());

      n++;
      aCAS.setDocumentLanguage(Integer.toString(n));
//      System.out.printf("  Out    : %s%n", aCAS.getDocumentLanguage());
    }

Source File: CasMerge.java From webanno with Apache License 2.0

5 votes

private static void clearAnnotations(CAS aCas)
    throws UIMAException
{
    CAS backup = CasFactory.createCas((TypeSystemDescription) null);
    
    // Copy the CAS - basically we do this just to keep the full type system information
    CASCompleteSerializer serializer = serializeCASComplete((CASImpl) getRealCas(aCas));
    deserializeCASComplete(serializer, (CASImpl) getRealCas(backup));

    // Remove all annotations from the target CAS but we keep the type system!
    aCas.reset();
    
    // Copy over essential information
    if (exists(backup, getType(backup, DocumentMetaData.class))) {
        copyDocumentMetadata(backup, aCas);
    }
    else {
        WebAnnoCasUtil.createDocumentMetadata(aCas);
    }
    aCas.setDocumentLanguage(backup.getDocumentLanguage()); // DKPro Core Issue 435
    aCas.setDocumentText(backup.getDocumentText());
    
    // Transfer token boundaries
    for (AnnotationFS t : selectTokens(backup)) {
        aCas.addFsToIndexes(createToken(aCas, t.getBegin(), t.getEnd()));
    }

    // Transfer sentence boundaries
    for (AnnotationFS s : selectSentences(backup)) {
        aCas.addFsToIndexes(createSentence(aCas, s.getBegin(), s.getEnd()));
    }
}

Source File: WebAnnoCasUtilTest.java From webanno with Apache License 2.0

5 votes

@Test
public void thatCreateDocumentMetadataUpgradesExistingDocumentAnnotation() throws Exception
{
    TypeSystemDescription tsd = createTypeSystemDescription();
    
    CAS cas = getRealCas(createCas(tsd));
    
    assertThat(cas.select(DocumentAnnotation.class).asList())
            .as("CAS has no DocumentAnnotation")
            .isEmpty();
    
    cas.setDocumentLanguage("en");
    
    assertThat(cas.select(DocumentAnnotation.class).asList())
            .as("CAS initialized with DocumentAnnotation")
            .extracting(fs -> fs.getType().getName())
            .containsExactly(TYPE_NAME_DOCUMENT_ANNOTATION);
    assertThat(cas.select(DocumentAnnotation.class).asList())
            .as("Language has been set")
            .extracting(DocumentAnnotation::getLanguage)
            .containsExactly("en");

    WebAnnoCasUtil.createDocumentMetadata(cas);

    assertThat(cas.select(DocumentAnnotation.class).asList())
            .as("DocumentAnnotation has been upgraded to DocumentMetaData")
            .extracting(fs -> fs.getType().getName())
            .containsExactly(DocumentMetaData.class.getName());
    assertThat(cas.select(DocumentAnnotation.class).asList())
            .as("Language survived upgrade")
            .extracting(DocumentAnnotation::getLanguage)
            .containsExactly("en");
}