org.apache.uima.cas.SerialFormat Java Examples
The following examples show how to use
org.apache.uima.cas.SerialFormat.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: DocumentImportStructureProvider.java From uima-uimaj with Apache License 2.0 | 6 votes |
private InputStream getDocument(String fileName, String text, String language, SerialFormat format) { String failedToImportLine = "Failed to import: " + fileName + "\n\n"; CAS cas = createEmtpyCAS(); cas.setDocumentText(removeNonXmlChars(text)); cas.setDocumentLanguage(language); ByteArrayOutputStream out = new ByteArrayOutputStream(40000); try { CasIOUtils.save(cas, out, format); } catch (IOException e) { throw new TaeError(failedToImportLine + e.getMessage(), e); } return new ByteArrayInputStream(out.toByteArray()); }
Example #2
Source File: CasIOUtilsTest.java From uima-uimaj with Apache License 2.0 | 6 votes |
public void testXMI(boolean leniently) throws Exception { File casFile = new File("target/temp-test-output/simpleCas.xmi"); casFile.getParentFile().mkdirs(); FileOutputStream docOS = new FileOutputStream(casFile); CasIOUtils.save(cas, docOS, SerialFormat.XMI); docOS.close(); // NOTE - when Saxon saves the cas it omits the prefixes. // e.g. produces: <NULL id="0"/> instead of: <cas:NULL xmi:id="0"/> // This causes JUnit test failure "unknown type NULL" // Use a CAS initialized with the "correct" type system or with a different type system? CAS casToUse = leniently ? cas2 : cas; casToUse.reset(); try (FileInputStream casInputStream = new FileInputStream(casFile)) { CasIOUtils.load(casInputStream, null, casToUse, leniently ? CasLoadMode.LENIENT : CasLoadMode.DEFAULT); } assertCorrectlyLoaded(casToUse, leniently); casToUse.reset(); CasIOUtils.load(casFile.toURI().toURL(), null, casToUse, leniently ? CasLoadMode.LENIENT : CasLoadMode.DEFAULT); assertCorrectlyLoaded(casToUse, leniently); }
Example #3
Source File: DocumentAnnotationTest.java From uima-uimaj with Apache License 2.0 | 6 votes |
public void testDocMeta() throws Exception { File typeSystemFile = JUnitExtension.getFile("ExampleCas/testTypeSystem_docmetadata.xml"); TypeSystemDescription typeSystem = UIMAFramework.getXMLParser().parseTypeSystemDescription( new XMLInputSource(typeSystemFile)); source = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(), null); target = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(), null); jcas = source.getJCas(); tstSerdesB4Sofa(SerialFormat.XMI); tstSerdesB4Sofa(SerialFormat.XCAS); tstSerdesB4Sofa(SerialFormat.BINARY); tstSerdesB4Sofa(SerialFormat.COMPRESSED); tstSerdesB4Sofa(SerialFormat.COMPRESSED_FILTERED); }
Example #4
Source File: DocumentAnnotationTest.java From uima-uimaj with Apache License 2.0 | 6 votes |
private void tstSerdesB4Sofa(SerialFormat format) throws IOException { source.reset(); target.reset(); new DocMeta(jcas).addToIndexes(); jcas.setDocumentText("something"); new Annotation(jcas); ByteArrayOutputStream bos = new ByteArrayOutputStream(); CasIOUtils.save(source, bos, format); bos.close(); CasIOUtils.load(new ByteArrayInputStream(bos.toByteArray()), target); AnnotationFS c = target.getDocumentAnnotation(); System.out.println(c); System.out.println(target.<DocMeta>getDocumentAnnotation()); assertTrue(CasCompare.compareCASes((CASImpl)source, (CASImpl)target)); }
Example #5
Source File: CasIOUtilsTest.java From uima-uimaj with Apache License 2.0 | 6 votes |
private void testFormat(SerialFormat format, String fileEnding, boolean leniently) throws Exception { File casFile = new File("target/temp-test-output/simpleCas."+ fileEnding); casFile.getParentFile().mkdirs(); FileOutputStream docOS = new FileOutputStream(casFile); CasIOUtils.save(cas, docOS, format); docOS.close(); // Use a CAS initialized with the "correct" type system or with a different type system? CAS casToUse = leniently ? cas2 : cas; casToUse.reset(); FileInputStream casInputStream = new FileInputStream(casFile); SerialFormat loadedFormat = CasIOUtils.load(casInputStream, null, casToUse, leniently ? CasLoadMode.LENIENT : CasLoadMode.DEFAULT); casInputStream.close(); Assert.assertEquals(format, loadedFormat); assertCorrectlyLoaded(casToUse, leniently); }
Example #6
Source File: BinaryCasSerDes.java From uima-uimaj with Apache License 2.0 | 6 votes |
/** * --------------------------------------------------------------------- * see Blob Format in CASSerializer * * This reads in and deserializes CAS data from a stream. Byte swapping may be * needed if the blob is from C++ -- C++ blob serialization writes data in * native byte order. * * Supports delta deserialization. For that, the the csds from the serialization event must be used. * * @param istream - * @return - the format of the input stream detected * @throws CASRuntimeException wraps IOException */ public SerialFormat reinit(InputStream istream) throws CASRuntimeException { final DataInputStream dis = CommonSerDes.maybeWrapToDataInputStream(istream); try { Header h = CommonSerDes.readHeader(dis); return reinit(h, istream, null, CasLoadMode.DEFAULT, null, AllowPreexistingFS.allow, null); } catch (IOException e) { String msg = e.getMessage(); if (msg == null) { msg = e.toString(); } throw new CASRuntimeException(CASRuntimeException.BLOB_DESERIALIZATION, msg); } }
Example #7
Source File: CasIOUtilsTest.java From uima-uimaj with Apache License 2.0 | 5 votes |
public void testXCAS(boolean leniently) throws Exception { File casFile = new File("target/temp-test-output/simpleCas.xcas"); casFile.getParentFile().mkdirs(); try (FileOutputStream docOS = new FileOutputStream(casFile)) { CasIOUtils.save(cas, docOS, SerialFormat.XCAS); } // Use a CAS initialized with the "correct" type system or with a different type system? CAS casToUse = leniently ? cas2 : cas; casToUse.reset(); CasIOUtils.load(casFile.toURI().toURL(), null, casToUse, leniently ? CasLoadMode.LENIENT : CasLoadMode.DEFAULT); assertCorrectlyLoaded(casToUse, leniently); }
Example #8
Source File: CasIOUtilsTest.java From uima-uimaj with Apache License 2.0 | 5 votes |
public void testWrongFormat() throws Exception { File casFile = new File("target/temp-test-output/simpleCas.wrong"); try { CasIOUtils.save(cas, new FileOutputStream(casFile), SerialFormat.UNKNOWN); } catch (Exception e) { // Assert.assertTrue(e instanceof IllegalArgumentException); return; } Assert.fail("An exception should have been thrown for wrong format."); }
Example #9
Source File: DocumentImportStructureProvider.java From uima-uimaj with Apache License 2.0 | 5 votes |
/** * Constructs a new DocumentImportStructureProvider object. * * @param containerFullPath */ public DocumentImportStructureProvider(String language, String importEncoding, SerialFormat casFormat) { this.language = language; this.importEncoding = importEncoding; // https://issues.apache.org/jira/browse/UIMA-1808 this.casFormat = casFormat; }
Example #10
Source File: CasIOUtilsTest.java From uima-uimaj with Apache License 2.0 | 4 votes |
public void testDocumentAnnotationIsNotResurrected() throws Exception { String refererAnnoTypeName = "org.apache.uima.testing.Referer"; String customDocAnnoTypeName = "org.apache.uima.testing.CustomDocumentAnnotation"; TypeSystemDescription tsd = UIMAFramework.getResourceSpecifierFactory().createTypeSystemDescription(); tsd.addType(customDocAnnoTypeName, "", CAS.TYPE_NAME_DOCUMENT_ANNOTATION); TypeDescription refererType = tsd.addType(refererAnnoTypeName, "", CAS.TYPE_NAME_TOP); refererType.addFeature("ref", "", CAS.TYPE_NAME_DOCUMENT_ANNOTATION); CAS cas = CasCreationUtils.createCas(tsd, null, null); // Initialize the default document annotation // ... then immediately remove it from the indexes. FeatureStructure da = cas.getDocumentAnnotation(); assertThat(cas.select(cas.getTypeSystem().getType(CAS.TYPE_NAME_DOCUMENT_ANNOTATION)).asList()) .extracting(fs -> fs.getType().getName()) .containsExactly(CAS.TYPE_NAME_DOCUMENT_ANNOTATION); // Add a feature structure that references the original document annotation before we remove // it from the indexes FeatureStructure referer = cas.createFS(cas.getTypeSystem().getType(refererAnnoTypeName)); referer.setFeatureValue(referer.getType().getFeatureByBaseName("ref"), da); cas.addFsToIndexes(referer); cas.removeFsFromIndexes(da); // Now add a new document annotation of our custom type FeatureStructure cda = cas.createFS(cas.getTypeSystem().getType(customDocAnnoTypeName)); cas.addFsToIndexes(cda); assertThat(cas.select(cas.getTypeSystem().getType(CAS.TYPE_NAME_DOCUMENT_ANNOTATION)).asList()) .extracting(fs -> fs.getType().getName()) .containsExactly(customDocAnnoTypeName); // Serialize to a buffer ByteArrayOutputStream bos = new ByteArrayOutputStream(); CasIOUtils.save(cas, bos, SerialFormat.SERIALIZED_TSI); // Deserialize from the buffer ByteArrayInputStream bis = new ByteArrayInputStream(bos.toByteArray()); CasIOUtils.load(bis, cas); assertThat(cas.select(cas.getTypeSystem().getType(CAS.TYPE_NAME_DOCUMENT_ANNOTATION)).asList()) .extracting(fs -> fs.getType().getName()) .containsExactly(customDocAnnoTypeName); }
Example #11
Source File: CasIOUtilsTest.java From uima-uimaj with Apache License 2.0 | 4 votes |
public void testS6() throws Exception { testFormat(SerialFormat.COMPRESSED_FILTERED, "bins6", false); }
Example #12
Source File: CasIOUtilsTest.java From uima-uimaj with Apache License 2.0 | 4 votes |
public void testS4tsi() throws Exception { testFormat(SerialFormat.COMPRESSED_TSI, "bins4", false); }
Example #13
Source File: CasIOUtilsTest.java From uima-uimaj with Apache License 2.0 | 4 votes |
public void testS4() throws Exception { testFormat(SerialFormat.COMPRESSED, "bins4", false); }
Example #14
Source File: CasIOUtilsTest.java From uima-uimaj with Apache License 2.0 | 4 votes |
public void testS0tsi() throws Exception { testFormat(SerialFormat.BINARY_TSI, "bins0", false); }
Example #15
Source File: CasIOUtilsTest.java From uima-uimaj with Apache License 2.0 | 4 votes |
public void testS0() throws Exception { testFormat(SerialFormat.BINARY, "bins0", false); }
Example #16
Source File: CasIOUtilsTest.java From uima-uimaj with Apache License 2.0 | 4 votes |
public void testS6pLenient() throws Exception { testFormat(SerialFormat.COMPRESSED_FILTERED_TSI, "bins6", true); }
Example #17
Source File: CasIOUtilsTest.java From uima-uimaj with Apache License 2.0 | 4 votes |
public void testS6pTs() throws Exception { testFormat(SerialFormat.COMPRESSED_FILTERED_TS, "bins6pTs", false); }
Example #18
Source File: CasIOUtilsTest.java From uima-uimaj with Apache License 2.0 | 4 votes |
public void testS6p() throws Exception { testFormat(SerialFormat.COMPRESSED_FILTERED_TSI, "bins6p", false); }
Example #19
Source File: CasIOUtilsTest.java From uima-uimaj with Apache License 2.0 | 4 votes |
public void testSp() throws Exception { testFormat(SerialFormat.SERIALIZED_TSI, "binsp", false); }
Example #20
Source File: CasIOUtilsAlwaysHoldOnTest.java From uima-uimaj with Apache License 2.0 | 4 votes |
@Test public void thatDocumentAnnotationIsNotResurrected() throws Exception { // Must set this to true, otherwise the test will not fail. Setting it to true will cause // FSes which are not in any index to still be serialized out. When reading this data back, // UIMA will find the non-indexed DocumentAnnotation and add it back without checking whether // is was actually indexed or not. System.setProperty(CASImpl.ALWAYS_HOLD_ONTO_FSS, "true"); String customDocAnnoTypeName = "org.apache.uima.testing.CustomDocumentAnnotation"; TypeSystemDescription tsd = UIMAFramework.getResourceSpecifierFactory().createTypeSystemDescription(); tsd.addType(customDocAnnoTypeName, "", CAS.TYPE_NAME_DOCUMENT_ANNOTATION); CAS cas = CasCreationUtils.createCas(tsd, null, null); // Initialize the default document annotation // ... then immediately remove it from the indexes. FeatureStructure da = cas.getDocumentAnnotation(); assertThat(cas.select(cas.getTypeSystem().getType(CAS.TYPE_NAME_DOCUMENT_ANNOTATION)).asList()) .extracting(fs -> fs.getType().getName()) .containsExactly(CAS.TYPE_NAME_DOCUMENT_ANNOTATION); cas.removeFsFromIndexes(da); // Now add a new document annotation of our custom type FeatureStructure cda = cas.createFS(cas.getTypeSystem().getType(customDocAnnoTypeName)); cas.addFsToIndexes(cda); assertThat(cas.select(cas.getTypeSystem().getType(CAS.TYPE_NAME_DOCUMENT_ANNOTATION)).asList()) .extracting(fs -> fs.getType().getName()) .containsExactly(customDocAnnoTypeName); // Serialize to a buffer ByteArrayOutputStream bos = new ByteArrayOutputStream(); CasIOUtils.save(cas, bos, SerialFormat.SERIALIZED_TSI); // Deserialize from the buffer ByteArrayInputStream bis = new ByteArrayInputStream(bos.toByteArray()); CasIOUtils.load(bis, cas); assertThat(cas.select(cas.getTypeSystem().getType(CAS.TYPE_NAME_DOCUMENT_ANNOTATION)).asList()) .extracting(fs -> fs.getType().getName()) .containsExactly(customDocAnnoTypeName); }
Example #21
Source File: CasIOUtilsTest.java From uima-uimaj with Apache License 2.0 | 4 votes |
public void testS() throws Exception { testFormat(SerialFormat.SERIALIZED, "bins", false); }
Example #22
Source File: ImportDocumentWizardPage.java From uima-uimaj with Apache License 2.0 | 4 votes |
SerialFormat getCasFormat() { return documentFormat; }
Example #23
Source File: XmlCasDeserializer.java From uima-uimaj with Apache License 2.0 | 3 votes |
/** * Deserializes a CAS from XMI or XCAS, version returning the SerialFormat * * @param aStream * input stream from which to read the XML document * @param aCAS * CAS into which to deserialize. This CAS must be set up with a type system that is * compatible with that in the XML * @param aLenient * if true, unknown Types will be ignored. If false, unknown Types will cause an * exception. The default is false. * @return the format of the data * * @throws SAXException * if an XML Parsing error occurs * @throws IOException * if an I/O failure occurs */ static SerialFormat deserializeR(InputStream aStream, CAS aCAS, boolean aLenient) throws SAXException, IOException { XMLReader xmlReader = XMLUtils.createXMLReader(); XmlCasDeserializerHandler handler = new XmlCasDeserializerHandler(aCAS, aLenient); xmlReader.setContentHandler(handler); xmlReader.parse(new InputSource(aStream)); return (handler.mDelegateHandler instanceof XmiCasDeserializer.XmiCasDeserializerHandler) ? SerialFormat.XMI : SerialFormat.XCAS; }
Example #24
Source File: CasIOUtils.java From uima-uimaj with Apache License 2.0 | 3 votes |
/** * Loads a CAS from a URL source. The format is determined from the content. * * If the value of tsiUrl is null it is ignored. * * @param casUrl * The url to deserialize the CAS from * @param tsiUrl * null or an optional url to deserialize the type system and index definitions from * @param aCAS * The CAS that should be filled * @param casLoadMode specifies how to handle reinitialization and lenient loading * see the Javadocs for CasLoadMode * @return the SerialFormat of the loaded CAS * @throws IOException Problem loading */ public static SerialFormat load(URL casUrl, URL tsiUrl, CAS aCAS, CasLoadMode casLoadMode) throws IOException { InputStream casIS = new BufferedInputStream(casUrl.openStream()); InputStream tsIS = (tsiUrl == null) ? null : new BufferedInputStream(tsiUrl.openStream()); try { return load(casIS, tsIS, aCAS, casLoadMode); } finally { closeQuitely(casIS); closeQuitely(tsIS); } }
Example #25
Source File: CasIOUtils.java From uima-uimaj with Apache License 2.0 | 2 votes |
/** * Write the CAS in the specified format. * * @param aCas * The CAS that should be serialized and stored * @param docOS * The output stream for the CAS * @param format * The SerialFormat in which the CAS should be stored. * @throws IOException * - Problem saving to the given InputStream */ public static void save(CAS aCas, OutputStream docOS, SerialFormat format) throws IOException { save(aCas, docOS, null, format); }
Example #26
Source File: CasIOUtils.java From uima-uimaj with Apache License 2.0 | 2 votes |
/** * This load variant can be used for loading Form 6 compressed CASes where the * type system to use to deserialize is provided as an argument. It can also load other formats, * where its behavior is identical to load(casInputStream, aCas). * * Loads a CAS from an Input Stream. The format is determined from the content. * For SerialFormats of ending in _TSI SERIALIZED_TSI or COMPRESSED_FILTERED_TSI, * the type system and index definitions are read from the cas input source; * the value of typeSystem is ignored. * * For COMPRESSED_FILTERED_xxx formats, if the typeSystem is not null, * the typeSystem is used for decoding. * * If embedded TSI information is available, the CAS's type system and indexes definition are replaced, * except for SerialFormats COMPRESSED_FILTERED, COMPRESSED_FILTERED_TS, and COMPRESSED_FILTERED_TSI. * * To replace the CAS's type system and indexes definition for these, use a load form which * has the CasLoadMode argument, and set this to REINIT. * * @param casInputStream * The input stream containing the CAS, appropriately buffered. * @param aCAS * The CAS that should be filled * @param typeSystem the type system to use for decoding the serialized form, must be non-null * @return the SerialFormat of the loaded CAS * @throws IOException Problem loading from given InputStream */ public static SerialFormat load(InputStream casInputStream, CAS aCAS, TypeSystem typeSystem) throws IOException { return load(casInputStream, null, aCAS, CasLoadMode.DEFAULT, (TypeSystemImpl) typeSystem); }
Example #27
Source File: CasIOUtils.java From uima-uimaj with Apache License 2.0 | 2 votes |
/** * Loads a CAS from an Input Stream. The format is determined from the content. * For formats of ending in _TSI SERIALIZED_TSI or COMPRESSED_FILTERED_TSI, * the type system and index definitions are read from the cas input source; * the value of tsiInputStream is ignored. * * For other formats, if the tsiInputStream is not null, * type system and index definitions are read from that source. * * If TSI information is available, the CAS's type system and indexes definition are replaced, * except for SerialFormats COMPRESSED_FILTERED, COMPRESSED_FILTERED_TS, and COMPRESSED_FILTERED_TSI. * * If the CasLoadMode == REINIT, then the TSI information is also used for these 3 formats to replace the CAS's definitions. * * @param casInputStream * The input stream containing the CAS, appropriately buffered. * @param tsiInputStream * The optional input stream containing the type system, appropriately buffered. * This is only used if it is non null and * - the casInputStream does not already come with an embedded CAS Type System and Index Definition, or * - the serial format is COMPRESSED_FILTERED_TSI * @param aCAS * The CAS that should be filled * @param casLoadMode specifies loading alternative like lenient and reinit, see CasLoadMode. * @return the SerialFormat of the loaded CAS * @throws IOException * - Problem loading from given InputStream */ public static SerialFormat load(InputStream casInputStream, InputStream tsiInputStream, CAS aCAS, CasLoadMode casLoadMode) throws IOException { return load(casInputStream, tsiInputStream, aCAS, casLoadMode, null); }
Example #28
Source File: CasIOUtils.java From uima-uimaj with Apache License 2.0 | 2 votes |
/** * Loads a CAS from an Input Stream. The format is determined from the content. * * For SerialFormats ending with _TSI the embedded value is used instead of any supplied external TSI information. * TSI information is available either via embedded value, or if a non-null input is passed for tsiInputStream. * * If TSI information is available, the CAS's type system and indexes definition are replaced, * except for SerialFormats COMPRESSED_FILTERED, COMPRESSED_FILTERED_TS, and COMPRESSED_FILTERED_TSI. * * The CasLoadMode is set to LENIENT if the leniently flag is true; otherwise it is set to DEFAULT. * * @param casInputStream - * @param tsiInputStream - * @param aCAS - * @param leniently - * @return - * @throws IOException - */ public static SerialFormat load(InputStream casInputStream, InputStream tsiInputStream, CAS aCAS, boolean leniently) throws IOException { return load(casInputStream, tsiInputStream, aCAS, leniently ? CasLoadMode.LENIENT : CasLoadMode.DEFAULT); }
Example #29
Source File: CasIOUtils.java From uima-uimaj with Apache License 2.0 | 2 votes |
/** * Loads a CAS from an Input Stream. The format is determined from the content. * * For SerialFormats ending with _TSI the embedded value is used instead of any supplied external TSI information. * TSI information is available either via embedded value, or if a non-null input is passed for tsiInputStream. * * If TSI information is available, the CAS's type system and indexes definition are replaced, * except for SerialFormats COMPRESSED_FILTERED, COMPRESSED_FILTERED_TS, and COMPRESSED_FILTERED_TSI. * * The CasLoadMode is DEFAULT. * * @param casInputStream - * @param tsiInputStream - * @param aCAS - * @return - * @throws IOException - */ public static SerialFormat load(InputStream casInputStream, InputStream tsiInputStream, CAS aCAS) throws IOException { return load(casInputStream, tsiInputStream, aCAS, CasLoadMode.DEFAULT); }
Example #30
Source File: CasIOUtils.java From uima-uimaj with Apache License 2.0 | 2 votes |
/** * Loads a Cas from an Input Stream. The format is determined from the content. * For SerialFormats ending with _TSI except for COMPRESSED_FILTERED_TSI, * the CAS's type system and indexes definition are replaced. * CasLoadMode is DEFAULT. * * @param casInputStream * The input stream containing the CAS. Caller should buffer this appropriately. * @param aCAS * The CAS that should be filled * @return the SerialFormat of the loaded CAS * @throws IOException * - Problem loading from given InputStream */ public static SerialFormat load(InputStream casInputStream, CAS aCAS) throws IOException { return load(casInputStream, null, aCAS, CasLoadMode.DEFAULT); }