org.apache.uima.util.XMLSerializer Java Examples
The following examples show how to use
org.apache.uima.util.XMLSerializer.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: CTAKESClinicalPipelineFactory.java From ctakes-clinical-pipeline with Apache License 2.0 | 6 votes |
private static void serialize(JCas jcas, File file) throws SAXException, IOException { OutputStream outputStream = null; try { outputStream = new BufferedOutputStream(new FileOutputStream(file)); XmiCasSerializer xmiSerializer = new XmiCasSerializer( jcas.getTypeSystem()); XMLSerializer xmlSerializer = new XMLSerializer(outputStream, true); xmiSerializer.serialize(jcas.getCas(), xmlSerializer.getContentHandler()); } catch (FileNotFoundException fnfe) { throw new FileNotFoundException(fnfe.getMessage()); } catch (SAXException saxe) { throw new SAXException(saxe.getMessage()); } finally { try { outputStream.close(); } catch (IOException ioe) { throw new IOException(ioe.getMessage()); } } }
Example #2
Source File: CpeBuilder.java From uima-uimafit with Apache License 2.0 | 6 votes |
/** * Writes a temporary file containing a XML descriptor of the given resource. Returns the file. * * @param resource * A resource specifier that should we materialized. * @return The file containing the XML representation of the given resource. */ private static File materializeDescriptor(ResourceSpecifier resource) throws IOException, SAXException { File tempDesc = File.createTempFile("desc", ".xml"); tempDesc.deleteOnExit(); // Write the descriptor using XML 1.1 to allow a wider range of characters for parameter values try (OutputStream os = Files.newOutputStream(tempDesc.toPath())) { XMLSerializer sax2xml = new XMLSerializer(os, true); sax2xml.setOutputProperty(OutputKeys.VERSION, "1.1"); ContentHandler contentHandler = sax2xml.getContentHandler(); contentHandler.startDocument(); resource.toXML(sax2xml.getContentHandler(), true); contentHandler.endDocument(); } return tempDesc; }
Example #3
Source File: CreateSampleXCASFile.java From uima-uimafit with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws UIMAException, SAXException, IOException { TokenBuilder<Token, Sentence> tokenBuilder = new TokenBuilder<Token, Sentence>(Token.class, Sentence.class, "pos", "stem"); JCas jCas = JCasFactory.createJCas(); // quote from http://www.gutenberg.org/files/20417/20417-h/20417-h.htm String text = "... the more knowledge advances the more it becomes possible to condense it into little books."; tokenBuilder .buildTokens( jCas, text, "... the more knowledge advances the more it becomes possible to condense it into little books . ", ". T M K A T M I B P T C I I L B .", "... the more knowledge advance the more it become possible to condense it into little book . "); FileOutputStream out = new FileOutputStream("src/test/resources/data/docs/test.xcas"); XCASSerializer ser = new XCASSerializer(jCas.getTypeSystem()); XMLSerializer xmlSer = new XMLSerializer(out, false); ser.serialize(jCas.getCas(), xmlSer.getContentHandler()); out.close(); }
Example #4
Source File: JsonCasSerializerTest.java From uima-uimaj with Apache License 2.0 | 6 votes |
private String serialize() throws Exception { StringWriter sw = null; ByteArrayOutputStream baos = null; try { if (doJson) { sw = new StringWriter(); jcs.serialize(cas, sw); return sw.toString(); } else { XmiCasSerializer xcs = new XmiCasSerializer(jcs.getCss().getFilterTypes()); baos = new ByteArrayOutputStream(); XMLSerializer sax2xml = new XMLSerializer(baos, jcs.getCss().isFormattedOutput); xcs.serialize(cas, sax2xml.getContentHandler(), null); return baos.toString("UTF-8"); } } catch (Exception e) { System.err.format("Exception occurred. The string produced so far was: %n%s%n", (sw == null) ? baos.toString("UTF-8") : sw.toString()); throw e; } }
Example #5
Source File: NewPrimitiveTypesTest.java From uima-uimaj with Apache License 2.0 | 6 votes |
public void testXCASSerialization() throws Exception { // create FS createExampleFS(cas); // serialize XCASSerializer ser = new XCASSerializer(cas.getTypeSystem()); OutputStream outputXCAS = new FileOutputStream(JUnitExtension .getFile("ExampleCas/newprimitives.xcas")); XMLSerializer xmlSer = new XMLSerializer(outputXCAS); ser.serialize(cas, xmlSer.getContentHandler()); // reset cas.reset(); // deserialize InputStream inputXCAS = new FileInputStream(JUnitExtension .getFile("ExampleCas/newprimitives.xcas")); XCASDeserializer.deserialize(inputXCAS, cas, false); // check values validateFSData(cas); }
Example #6
Source File: XCASDeserializerTest.java From uima-uimaj with Apache License 2.0 | 6 votes |
public void testOutOfTypeSystem3() throws Exception { // deserialize an XCAS using the implicit value feature into a CAS with no TypeSystem CAS cas = CasCreationUtils.createCas(new TypeSystemDescription_impl(), new TypePriorities_impl(), new FsIndexDescription[0]); String xcas = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><CAS>" + "<uima.tcas.Document _content=\"text\">Test Document</uima.tcas.Document>" + "<uima.tcas.DocumentAnnotation _indexed=\"1\" _id=\"8\" sofa=\"1\" begin=\"0\" end=\"13\" language=\"en\"/>" + "<foo.Bar _indexed=\"1\" _id=\"2\" sofa=\"1\" begin=\"0\" end=\"0\" baz=\"blah\">this is the value feature</foo.Bar></CAS>"; OutOfTypeSystemData ootsd = new OutOfTypeSystemData(); XMLReader xmlReader = XMLUtils.createXMLReader(); XCASDeserializer deser = new XCASDeserializer(cas.getTypeSystem()); ContentHandler handler = deser.getXCASHandler(cas, ootsd); xmlReader.setContentHandler(handler); xmlReader.parse(new InputSource(new StringReader(xcas))); // now reserialize including OutOfTypeSystem data XCASSerializer xcasSer = new XCASSerializer(cas.getTypeSystem()); StringWriter sw = new StringWriter(); XMLSerializer xmlSer = new XMLSerializer(sw, false); xcasSer.serialize(cas, xmlSer.getContentHandler(), true, ootsd); String xml = sw.getBuffer().toString(); // System.out.println(xml); // make sure the value feature was not lost (it will be serialized as an attribute however) assertTrue(xml.indexOf("value=\"this is the value feature\"") != -1); }
Example #7
Source File: XCasWriterCasConsumer.java From uima-uimaj with Apache License 2.0 | 5 votes |
/** * Serialize a CAS to a file in XCAS format. * * @param aCas CAS to serialize * @param name output file * @throws IOException if an I/O failure occurs * @throws SAXException if an error occurs generating the XML text */ private void writeXCas(CAS aCas, File name) throws IOException, SAXException { try (OutputStream out = new FileOutputStream(name)) { XCASSerializer ser = new XCASSerializer(aCas.getTypeSystem()); XMLSerializer xmlSer = new XMLSerializer(out, false); ser.serialize(aCas, xmlSer.getContentHandler()); } }
Example #8
Source File: ExternalRecommender.java From inception with Apache License 2.0 | 5 votes |
private String serializeCas(CAS aCas) throws RecommendationException { try (StringWriter out = new StringWriter()) { // Passing "null" as the type system to the XmiCasSerializer means that we want // to serialize all types (i.e. no filtering for a specific target type system). XmiCasSerializer xmiCasSerializer = new XmiCasSerializer(null); XMLSerializer sax2xml = new XMLSerializer(out, true); xmiCasSerializer.serialize(getRealCas(aCas), sax2xml.getContentHandler(), null, null, null); return out.toString(); } catch (CASRuntimeException | SAXException | IOException e) { throw new RecommendationException("Error while serializing CAS!", e); } }
Example #9
Source File: NewPrimitiveTypesTest.java From uima-uimaj with Apache License 2.0 | 5 votes |
public void testXmiSerialization() throws Exception { // create FS createExampleFS(cas); // serialize StringWriter sw = new StringWriter(); XMLSerializer xmlSer = new XMLSerializer(sw, false); XmiCasSerializer xmiSer = new XmiCasSerializer(cas.getTypeSystem()); xmiSer.serialize(cas, xmlSer.getContentHandler()); String xml = sw.getBuffer().toString(); // System.out.println(xml); // reset cas.reset(); // deserialize XmiCasDeserializer deser = new XmiCasDeserializer(cas.getTypeSystem()); ContentHandler deserHandler = deser.getXmiCasHandler(cas); SAXParserFactory fact = SAXParserFactory.newInstance(); SAXParser parser = fact.newSAXParser(); XMLReader xmlReader = parser.getXMLReader(); xmlReader.setContentHandler(deserHandler); xmlReader.parse(new InputSource(new StringReader(xml))); // check values validateFSData(cas); }
Example #10
Source File: CreateSampleXMIFile.java From uima-uimafit with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws UIMAException, SAXException, IOException { TokenBuilder<Token, Sentence> tokenBuilder = new TokenBuilder<Token, Sentence>(Token.class, Sentence.class, "pos", "stem"); JCas jCas = JCasFactory.createJCas(); String text = "Me and all my friends are non-conformists."; tokenBuilder.buildTokens(jCas, text, "Me and all my friends are non - conformists .", "M A A M F A N - C .", "me and all my friend are non - conformist ."); FileOutputStream out = new FileOutputStream("src/test/resources/data/docs/test.xmi"); XmiCasSerializer ser = new XmiCasSerializer(jCas.getTypeSystem()); XMLSerializer xmlSer = new XMLSerializer(out, false); ser.serialize(jCas.getCas(), xmlSer.getContentHandler()); out.close(); }
Example #11
Source File: DATACasUtils.java From uima-uimaj with Apache License 2.0 | 5 votes |
/** * Gets the XCA sas string. * * @param aCasData the a cas data * @param keysToFilter the keys to filter * @return the XCA sas string * @throws Exception the exception */ public static String getXCASasString(CasData aCasData, String[] keysToFilter) throws Exception { CasDataToXCas generator = new CasDataToXCas(); generator.setTypesToFilter(keysToFilter); ByteArrayOutputStream baos = new ByteArrayOutputStream(); XMLSerializer sax2xml = new XMLSerializer(baos); generator.setContentHandler(sax2xml.getContentHandler()); generator.generateXCas(aCasData); return new String(baos.toByteArray()); }
Example #12
Source File: XmiCasDeserializerTest.java From uima-uimaj with Apache License 2.0 | 4 votes |
public void testMultipleSofas() throws Exception { try { CAS cas = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(), new FsIndexDescription[0]); // set document text for the initial view cas.setDocumentText("This is a test"); // create a new view and set its document text CAS cas2 = cas.createView("OtherSofa"); cas2.setDocumentText("This is only a test"); // Change this test to create an instance of TOP because you cannot add an annotation to other than // the view it is created in. https://issues.apache.org/jira/browse/UIMA-4099 // create a TOP and add to index of both views Type topType = cas.getTypeSystem().getTopType(); FeatureStructure aTOP = cas.createFS(topType); cas.getIndexRepository().addFS(aTOP); cas2.getIndexRepository().addFS(aTOP); FSIterator<FeatureStructure> it = cas.getIndexRepository().getAllIndexedFS(topType); FSIterator<FeatureStructure> it2 = cas2.getIndexRepository().getAllIndexedFS(topType); it.next(); it.next(); it2.next(); it2.next(); assertFalse(it.hasNext()); assertFalse(it2.hasNext()); // serialize StringWriter sw = new StringWriter(); XMLSerializer xmlSer = new XMLSerializer(sw, false); XmiCasSerializer xmiSer = new XmiCasSerializer(cas.getTypeSystem()); xmiSer.serialize(cas, xmlSer.getContentHandler()); String xml = sw.getBuffer().toString(); // deserialize into another CAS (repeat twice to check it still works after reset) CAS newCas = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(), new FsIndexDescription[0]); for (int i = 0; i < 2; i++) { XmiCasDeserializer newDeser = new XmiCasDeserializer(newCas.getTypeSystem()); ContentHandler newDeserHandler = newDeser.getXmiCasHandler(newCas); SAXParserFactory fact = SAXParserFactory.newInstance(); SAXParser parser = fact.newSAXParser(); XMLReader xmlReader = parser.getXMLReader(); xmlReader.setContentHandler(newDeserHandler); xmlReader.parse(new InputSource(new StringReader(xml))); // check sofas assertEquals("This is a test", newCas.getDocumentText()); CAS newCas2 = newCas.getView("OtherSofa"); assertEquals("This is only a test", newCas2.getDocumentText()); // check that annotation is still indexed in both views // check that annotation is still indexed in both views it = newCas.getIndexRepository().getAllIndexedFS(topType); it2 = newCas2.getIndexRepository().getAllIndexedFS(topType); it.next(); it.next(); it2.next(); it2.next(); assertFalse(it.hasNext()); // assertFalse(it2.hasNext()); assertTrue(tIndex.size() == 2); // document annot and this one // assertTrue(t2Index.size() == 2); // ditto newCas.reset(); } } catch (Exception e) { JUnitExtension.handleException(e); } }
Example #13
Source File: DotCorpusSerializer.java From uima-uimaj with Apache License 2.0 | 4 votes |
/** * Writes the <code>DotCorpus</code> instance to the given <code>OutputStream</code>. * * @param dotCorpus * the {@link DotCorpus} object to serialize. * @param out * - the stream to write the current <code>DotCorpus</code> instance. * @throws CoreException - */ public static void serialize(DotCorpus dotCorpus, OutputStream out) throws CoreException { XMLSerializer xmlSerializer = new XMLSerializer(out, true); ContentHandler xmlSerHandler = xmlSerializer.getContentHandler(); try { xmlSerHandler.startDocument(); xmlSerHandler.startElement("", CONFIG_ELEMENT, CONFIG_ELEMENT, new AttributesImpl()); for (String corpusFolder : dotCorpus.getCorpusFolderNameList()) { AttributesImpl corpusFolderAttributes = new AttributesImpl(); corpusFolderAttributes.addAttribute("", "", CORPUS_FOLDER_ATTRIBUTE, "", corpusFolder); xmlSerHandler.startElement("", CORPUS_ELEMENT, CORPUS_ELEMENT, corpusFolderAttributes); xmlSerHandler.endElement("", CORPUS_ELEMENT, CORPUS_ELEMENT); } for (AnnotationStyle style : dotCorpus.getAnnotationStyles()) { AttributesImpl styleAttributes = new AttributesImpl(); styleAttributes.addAttribute("", "", STYLE_TYPE_ATTRIBUTE, "", style.getAnnotation()); styleAttributes.addAttribute("", "", STYLE_STYLE_ATTRIBUTE, "", style.getStyle().name()); Color color = style.getColor(); int colorInt = new Color(color.getRed(), color.getGreen(), color.getBlue()).getRGB(); styleAttributes.addAttribute("", "", STYLE_COLOR_ATTRIBUTE, "", Integer.toString(colorInt)); styleAttributes.addAttribute("", "", STYLE_LAYER_ATTRIBUTE, "", Integer.toString(style .getLayer())); if (style.getConfiguration() != null) { styleAttributes.addAttribute("", "", STYLE_CONFIG_ATTRIBUTE, "", style .getConfiguration()); } xmlSerHandler.startElement("", STYLE_ELEMENT, STYLE_ELEMENT, styleAttributes); xmlSerHandler.endElement("", STYLE_ELEMENT, STYLE_ELEMENT); } for (String type : dotCorpus.getShownTypes()) { AttributesImpl shownAttributes = new AttributesImpl(); shownAttributes.addAttribute("", "", SHOWN_TYPE_ATTRIBUTE, "", type); shownAttributes.addAttribute("", "", SHOWN_IS_VISISBLE_ATTRIBUTE, "", "true"); xmlSerHandler.startElement("", SHOWN_ELEMENT, SHOWN_ELEMENT, shownAttributes); xmlSerHandler.endElement("", SHOWN_ELEMENT, SHOWN_ELEMENT); } if (dotCorpus.getTypeSystemFileName() != null) { AttributesImpl typeSystemFileAttributes = new AttributesImpl(); typeSystemFileAttributes.addAttribute("", "", TYPESYTEM_FILE_ATTRIBUTE, "", dotCorpus .getTypeSystemFileName()); xmlSerHandler.startElement("", TYPESYSTEM_ELEMENT, TYPESYSTEM_ELEMENT, typeSystemFileAttributes); xmlSerHandler.endElement("", TYPESYSTEM_ELEMENT, TYPESYSTEM_ELEMENT); } for (String folder : dotCorpus.getCasProcessorFolderNames()) { AttributesImpl taggerConfigAttributes = new AttributesImpl(); taggerConfigAttributes.addAttribute("", "", CAS_PROCESSOR_FOLDER_ATTRIBUTE, "", folder); xmlSerHandler.startElement("", CAS_PROCESSOR_ELEMENT, CAS_PROCESSOR_ELEMENT, taggerConfigAttributes); xmlSerHandler.endElement("", CAS_PROCESSOR_ELEMENT, CAS_PROCESSOR_ELEMENT); } if (dotCorpus.getEditorLineLengthHint() != DotCorpus.EDITOR_LINE_LENGTH_HINT_DEFAULT) { AttributesImpl editorLineLengthHintAttributes = new AttributesImpl(); editorLineLengthHintAttributes.addAttribute("", "", EDITOR_LINE_LENGTH_ATTRIBUTE, "", Integer.toString(dotCorpus.getEditorLineLengthHint())); xmlSerHandler.startElement("", EDITOR_ELEMENT, EDITOR_ELEMENT, editorLineLengthHintAttributes); xmlSerHandler.endElement("", EDITOR_ELEMENT, EDITOR_ELEMENT); } xmlSerHandler.endElement("", CONFIG_ELEMENT, CONFIG_ELEMENT); xmlSerHandler.endDocument(); } catch (SAXException e) { String message = e.getMessage() != null ? e.getMessage() : ""; IStatus s = new Status(IStatus.ERROR, CasEditorPlugin.ID, IStatus.OK, message, e); throw new CoreException(s); } }
Example #14
Source File: AnalysisEngine_implTest.java From uima-uimaj with Apache License 2.0 | 4 votes |
private void manyDelegatesCommon() throws Exception { // Test that an aggregate can be copied preserving all comments and ordering of delegates XMLParser.ParsingOptions parsingOptions = new XMLParser.ParsingOptions(false); parsingOptions.preserveComments = true; XMLParser parser = UIMAFramework.getXMLParser(); File inFile = JUnitExtension.getFile("TextAnalysisEngineImplTest/AggregateWithManyDelegates.xml"); AnalysisEngineDescription desc = parser.parseAnalysisEngineDescription(new XMLInputSource(inFile), parsingOptions); // Write out descriptor File cloneFile = new File(inFile.getParentFile(), "CopyOfAggregateWithManyDelegates.xml"); try (BufferedOutputStream os = new BufferedOutputStream(new FileOutputStream(cloneFile))) { XMLSerializer xmlSerializer = new XMLSerializer(false); xmlSerializer.setOutputStream(os); // set the amount to a value which will show up if used // indent should not be used because we're using a parser mode which preserves // comments and ignorable white space. // NOTE: Saxon appears to force the indent to be 3 - which is what the input file now uses. xmlSerializer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "4"); ContentHandler contentHandler = xmlSerializer.getContentHandler(); contentHandler.startDocument(); desc.toXML(contentHandler, true); contentHandler.endDocument(); } String inXml = FileCompare.file2String(inFile); String cloneXml = FileCompare.file2String(cloneFile); XMLAssert.assertXMLEqual(inXml, cloneXml); // When building from a source distribution the descriptor may not have // appropriate line-ends so compute the length as if always 1 byte. int diff = fileLength(cloneFile) - fileLength(inFile); // One platform inserts a blank line and a final newline, so don't insist on perfection // NOTE: This fails with Saxon as it omits the xmlns attribute (why?) and omits the newlines between adjacent comments. // It also produces many differences in indentation if the input is not indented by 3 assertTrue("File size changed by "+diff+" should be no more than 2", diff >= -2 && diff <= 2); // Initialize all delegates and check the initialization order (should be declaration order) TestAnnotator2.allContexts = ""; UIMAFramework.produceAnalysisEngine(desc); assertEquals("D/C/B/A/F/E/", TestAnnotator2.allContexts); // Check that copying aggregate preserved the order of the delegates desc = parser.parseAnalysisEngineDescription(new XMLInputSource(cloneFile), parsingOptions); TestAnnotator2.allContexts = ""; UIMAFramework.produceAnalysisEngine(desc); assertEquals("D/C/B/A/F/E/", TestAnnotator2.allContexts); cloneFile.delete(); }
Example #15
Source File: XCasToCasDataSaxHandlerTest.java From uima-uimaj with Apache License 2.0 | 4 votes |
private void _testConversions(CAS aCAS) throws IOException, ParserConfigurationException, SAXException, ResourceInitializationException, CASRuntimeException { // generate XCAS events and pipe them to XCasToCasDataSaxHandler CasData casData = new CasDataImpl(); XCasToCasDataSaxHandler handler = new XCasToCasDataSaxHandler(casData); XCASSerializer xcasSer = new XCASSerializer(aCAS.getTypeSystem()); xcasSer.serialize(aCAS, handler); Assert.assertNotNull(casData); assertValidCasData(casData, aCAS.getTypeSystem()); // System.out.println(casData); // now generate XCAS from the CasData CasDataToXCas generator = new CasDataToXCas(); StringWriter sw = new StringWriter(); XMLSerializer xmlSer = new XMLSerializer(sw, false); generator.setContentHandler(xmlSer.getContentHandler()); generator.generateXCas(casData); String xml = sw.getBuffer().toString(); //workaround for XML serializatioj problem on Sun Java 1.4 if (!builtInXmlSerializationSupportsCRs()) { xml = xml.replaceAll(" ", " "); } UIMAFramework.getLogger(XCasToCasDataSaxHandlerTest.class).log(Level.FINE, xml); // deserialize back into CAS for comparison // CASMgr tcasMgr = CASFactory.createCAS(aCAS.getTypeSystem()); // tcasMgr.initCASIndexes(); // tcasMgr.getIndexRepositoryMgr().commit(); CAS cas2 = CasCreationUtils.createCas(null, aCAS.getTypeSystem(), null); XCASDeserializer deser = new XCASDeserializer(cas2.getTypeSystem()); ContentHandler deserHandler = deser.getXCASHandler(cas2); SAXParserFactory fact = SAXParserFactory.newInstance(); SAXParser parser = fact.newSAXParser(); XMLReader xmlReader = parser.getXMLReader(); xmlReader.setContentHandler(deserHandler); xmlReader.parse(new InputSource(new StringReader(xml))); // CASes should be identical CasComparer.assertEquals(aCAS, cas2); }
Example #16
Source File: XmiCasDeserializerTest.java From uima-uimaj with Apache License 2.0 | 4 votes |
public void testTypeSystemFiltering() throws Exception { try { // deserialize a complex CAS from XCAS CAS cas = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(), indexes); InputStream serCasStream = new FileInputStream(JUnitExtension.getFile("ExampleCas/cas.xml")); XCASDeserializer deser = new XCASDeserializer(cas.getTypeSystem()); ContentHandler deserHandler = deser.getXCASHandler(cas); SAXParserFactory fact = SAXParserFactory.newInstance(); SAXParser parser = fact.newSAXParser(); XMLReader xmlReader = parser.getXMLReader(); xmlReader.setContentHandler(deserHandler); xmlReader.parse(new InputSource(serCasStream)); serCasStream.close(); // now read in a TypeSystem that's a subset of those types TypeSystemDescription partialTypeSystemDesc = UIMAFramework.getXMLParser() .parseTypeSystemDescription( new XMLInputSource(JUnitExtension .getFile("ExampleCas/partialTestTypeSystem.xml"))); TypeSystem partialTypeSystem = CasCreationUtils.createCas(partialTypeSystemDesc, null, null) .getTypeSystem(); // reserialize as XMI, filtering out anything that doesn't fit in the // partialTypeSystem StringWriter sw = new StringWriter(); XMLSerializer xmlSer = new XMLSerializer(sw, false); XmiCasSerializer xmiSer = new XmiCasSerializer(partialTypeSystem); xmiSer.serialize(cas, xmlSer.getContentHandler()); String xml = sw.getBuffer().toString(); // System.out.println(xml); // deserialize into another CAS (which has the whole type system) CAS cas2 = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(), indexes); XmiCasDeserializer deser2 = new XmiCasDeserializer(cas2.getTypeSystem()); ContentHandler deserHandler2 = deser2.getXmiCasHandler(cas2); xmlReader.setContentHandler(deserHandler2); xmlReader.parse(new InputSource(new StringReader(xml))); // check that types have been filtered out Type orgType = cas2.getTypeSystem().getType("org.apache.uima.testTypeSystem.Organization"); assertNotNull(orgType); assertTrue(cas2.getAnnotationIndex(orgType).size() == 0); assertTrue(cas.getAnnotationIndex(orgType).size() > 0); // but that some types are still there Type personType = cas2.getTypeSystem().getType("org.apache.uima.testTypeSystem.Person"); FSIndex personIndex = cas2.getAnnotationIndex(personType); assertTrue(personIndex.size() > 0); // check that mentionType has been filtered out (set to null) FeatureStructure somePlace = personIndex.iterator().get(); Feature mentionTypeFeat = personType.getFeatureByBaseName("mentionType"); assertNotNull(mentionTypeFeat); assertNull(somePlace.getStringValue(mentionTypeFeat)); } catch (Exception e) { JUnitExtension.handleException(e); } }
Example #17
Source File: XCASDeserializerTest.java From uima-uimaj with Apache License 2.0 | 4 votes |
public void testMultipleSofas() throws Exception { /************************************************* * Make CAS with 2 sofas, initial and OtherSofa * * * * Add instance of TOP and index in both views * * * * Serialize to string "xml" * * * * Deserialize from string * *************************************************/ CAS cas = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(), indexes); // set document text for the initial view cas.setDocumentText("This is a test"); // create a new view and set its document text CAS cas2 = cas.createView("OtherSofa"); cas2.setDocumentText("This is only a test"); // Change this test to create an instance of TOP because you cannot add an annotation to other than // the view it is created in. https://issues.apache.org/jira/browse/UIMA-4099 // create a TOP and add to index of both views Type topType = cas.getTypeSystem().getTopType(); FeatureStructure aTOP = cas.createFS(topType); cas.getIndexRepository().addFS(aTOP); cas2.getIndexRepository().addFS(aTOP); FSIterator<FeatureStructure> it = cas.getIndexRepository().getAllIndexedFS(topType); FSIterator<FeatureStructure> it2 = cas2.getIndexRepository().getAllIndexedFS(topType); it.next(); it.next(); it2.next(); it2.next(); assertFalse(it.hasNext()); assertFalse(it2.hasNext()); // serialize StringWriter sw = new StringWriter(); XMLSerializer xmlSer = new XMLSerializer(sw, false); XCASSerializer xcasSer = new XCASSerializer(cas.getTypeSystem()); xcasSer.serialize(cas, xmlSer.getContentHandler(), true); String xml = sw.getBuffer().toString(); // deserialize into another CAS (repeat twice to check it still works after reset) CAS newCas = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(), indexes); for (int i = 0; i < 2; i++) { XCASDeserializer newDeser = new XCASDeserializer(newCas.getTypeSystem()); ContentHandler newDeserHandler = newDeser.getXCASHandler(newCas); SAXParserFactory fact = SAXParserFactory.newInstance(); SAXParser parser = fact.newSAXParser(); XMLReader xmlReader = parser.getXMLReader(); xmlReader.setContentHandler(newDeserHandler); xmlReader.parse(new InputSource(new StringReader(xml))); // check sofas assertEquals("This is a test", newCas.getDocumentText()); CAS newCas2 = newCas.getView("OtherSofa"); assertEquals("This is only a test", newCas2.getDocumentText()); // check that annotation is still indexed in both views it = newCas.getIndexRepository().getAllIndexedFS(topType); it2 = newCas2.getIndexRepository().getAllIndexedFS(topType); it.next(); it.next(); it2.next(); it2.next(); assertFalse(it.hasNext()); assertFalse(it2.hasNext()); // assertTrue(tIndex.size() == 2); // document annot and this one // assertTrue(t2Index.size() == 2); // ditto newCas.reset(); // testing if works after cas reset, go around loop 2nd time } }
Example #18
Source File: XCASDeserializerTest.java From uima-uimaj with Apache License 2.0 | 4 votes |
public void testOutOfTypeSystem2() throws Exception { // deserialize a complex CAS into one with no TypeSystem CAS cas = CasCreationUtils.createCas(new TypeSystemDescription_impl(), new TypePriorities_impl(), new FsIndexDescription[0]); OutOfTypeSystemData ootsd = new OutOfTypeSystemData(); InputStream serCasStream = new FileInputStream(JUnitExtension.getFile("ExampleCas/cas.xml")); XCASDeserializer deser = new XCASDeserializer(cas.getTypeSystem()); ContentHandler deserHandler = deser.getXCASHandler(cas, ootsd); SAXParserFactory fact = SAXParserFactory.newInstance(); SAXParser parser = fact.newSAXParser(); XMLReader xmlReader = parser.getXMLReader(); xmlReader.setContentHandler(deserHandler); xmlReader.parse(new InputSource(serCasStream)); serCasStream.close(); // now reserialize including OutOfTypeSystem data XCASSerializer xcasSer = new XCASSerializer(cas.getTypeSystem()); StringWriter sw = new StringWriter(); XMLSerializer xmlSer = new XMLSerializer(sw, false); xcasSer.serialize(cas, xmlSer.getContentHandler(), true, ootsd); String xml = sw.getBuffer().toString(); // System.out.println("debug writing temp/xmlv3.xml"); // FileUtils.saveString2File(xml, new File("c:/temp/xmlv3.xml")); // System.out.println(xml); // deserialize into a CAS that accepts the full typesystem CAS cas2 = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(), indexes); XCASDeserializer deser2 = new XCASDeserializer(cas2.getTypeSystem()); ContentHandler deserHandler2 = deser2.getXCASHandler(cas2); xmlReader = parser.getXMLReader(); xmlReader.setContentHandler(deserHandler2); xmlReader.parse(new InputSource(new StringReader(xml))); // check that array refs are not null Type entityType = cas2.getTypeSystem().getType("org.apache.uima.testTypeSystem.Entity"); Feature classesFeat = entityType.getFeatureByBaseName("classes"); Iterator<FeatureStructure> iter = cas2.getIndexRepository().getIndex("testEntityIndex").iterator(); assertTrue(iter.hasNext()); while (iter.hasNext()) { FeatureStructure fs = iter.next(); StringArrayFS arrayFS = (StringArrayFS) fs.getFeatureValue(classesFeat); assertNotNull(arrayFS); for (int i = 0; i < arrayFS.size(); i++) { assertNotNull(arrayFS.get(i)); } } }
Example #19
Source File: MetaDataObject_impl.java From uima-uimaj with Apache License 2.0 | 4 votes |
private void toXML(XMLSerializer sax2xml) throws SAXException, IOException { ContentHandler contentHandler = sax2xml.getContentHandler(); contentHandler.startDocument(); toXML(contentHandler, true); // no reason to create a new content handler contentHandler.endDocument(); }
Example #20
Source File: XCasWriterCasConsumer.java From uima-uimaj with Apache License 2.0 | 3 votes |
/** * Serialize a CAS to a file in XCAS format * * @param aCas * CAS to serialize * @param name * output file * * @throws IOException * if an I/O failure occurs * @throws SAXException * if an error occurs generating the XML text */ private void writeXCas(CAS aCas, File name) throws IOException, SAXException { try (OutputStream out = new FileOutputStream(name)) { XCASSerializer ser = new XCASSerializer(aCas.getTypeSystem()); XMLSerializer sax2xml = new XMLSerializer(out, false); ser.serialize(aCas, sax2xml.getContentHandler()); } }
Example #21
Source File: XmiCasSerializer.java From uima-uimaj with Apache License 2.0 | 3 votes |
/** * Serializes a Delta CAS to an XMI stream. This version of this method allows many options to be configured. * * * @param aCAS * CAS to serialize. * @param aTargetTypeSystem * type system to which the produced XMI will conform. Any types or features not in the * target type system will not be serialized. A null value indicates that all types and features * will be serialized. * @param aStream * output stream to which to write the XMI document * @param aPrettyPrint * if true the XML output will be formatted with newlines and indenting. If false it will be unformatted. * @param aSharedData * an optional container for data that is shared between the {@link XmiCasSerializer} and the {@link XmiCasDeserializer}. * See the JavaDocs for {@link XmiSerializationSharedData} for details. * @param aMarker * an optional object that is used to filter and serialize a Delta CAS containing only * those FSs and Views created after Marker was set and preexisting FSs and views that were modified. * See the JavaDocs for {@link Marker} for details. * @param useXml_1_1 * if true, the output serializer is set with the OutputKeys.VERSION to "1.1". * @throws SAXException * if a problem occurs during XMI serialization */ public static void serialize(CAS aCAS, TypeSystem aTargetTypeSystem, OutputStream aStream, boolean aPrettyPrint, XmiSerializationSharedData aSharedData, Marker aMarker, boolean useXml_1_1) throws SAXException { XmiCasSerializer xmiCasSerializer = new XmiCasSerializer(aTargetTypeSystem); XMLSerializer sax2xml = new XMLSerializer(aStream, aPrettyPrint); if (useXml_1_1) { sax2xml.setOutputProperty(OutputKeys.VERSION,"1.1"); } xmiCasSerializer.serialize(aCAS, sax2xml.getContentHandler(), null, aSharedData, aMarker); }
Example #22
Source File: XmiWriterCasConsumer.java From uima-uimaj with Apache License 2.0 | 3 votes |
/** * Serialize a CAS to a file in XMI format * * @param aCas * CAS to serialize * @param name * output file * @throws SAXException - * @throws Exception - * * @throws ResourceProcessException - */ private void writeXmi(CAS aCas, File name, String modelFileName) throws IOException, SAXException { try (FileOutputStream out = new FileOutputStream(name)) { // write XMI XmiCasSerializer ser = new XmiCasSerializer(aCas.getTypeSystem()); XMLSerializer xmlSer = new XMLSerializer(out, false); ser.serialize(aCas, xmlSer.getContentHandler()); } }
Example #23
Source File: XmiWriterCasConsumer.java From uima-uimaj with Apache License 2.0 | 3 votes |
/** * Serialize a CAS to a file in XMI format * * @param aCas * CAS to serialize * @param name * output file * @throws SAXException - * @throws Exception - * * @throws ResourceProcessException - */ private void writeXmi(CAS aCas, File name, String modelFileName) throws IOException, SAXException { try (OutputStream out = new FileOutputStream(name)) { // write XMI XmiCasSerializer ser = new XmiCasSerializer(aCas.getTypeSystem()); XMLSerializer xmlSer = new XMLSerializer(out, false); ser.serialize(aCas, xmlSer.getContentHandler()); } }
Example #24
Source File: XCASSerializer.java From uima-uimaj with Apache License 2.0 | 3 votes |
/** * Serializes an XCAS to a stream. * * @param aCAS * CAS to serialize. * @param aStream * output stream to which to write the XCAS XML document * @param isFormattedOutput * if true the XCAS will be serialized formatted * * @param useXml_1_1 * if true, the output serializer is set with the OutputKeys.VERSION to "1.1". * @throws SAXException * if a problem occurs during XCAS serialization * @throws IOException * if an I/O failure occurs */ public static void serialize(CAS aCAS, OutputStream aStream, boolean isFormattedOutput, boolean useXml_1_1) throws SAXException, IOException { XCASSerializer xcasSerializer = new XCASSerializer(aCAS.getTypeSystem()); XMLSerializer sax2xml = new XMLSerializer(aStream, isFormattedOutput); if (useXml_1_1) { sax2xml.setOutputProperty(OutputKeys.VERSION,"1.1"); } xcasSerializer.serialize(aCAS, sax2xml.getContentHandler()); }
Example #25
Source File: MetaDataObject_impl.java From uima-uimaj with Apache License 2.0 | 2 votes |
/** * Writes out this object's XML representation. * * @param aOutputStream * an OutputStream to which the XML string will be written */ public void toXML(OutputStream aOutputStream) throws SAXException, IOException { toXML(new XMLSerializer(aOutputStream)); }
Example #26
Source File: MetaDataObject_impl.java From uima-uimaj with Apache License 2.0 | 2 votes |
/** * Writes out this object's XML representation. * * @param aWriter * a Writer to which the XML string will be written */ public void toXML(Writer aWriter) throws SAXException, IOException { toXML(new XMLSerializer(aWriter)); }
Example #27
Source File: TypeSystem2Xml.java From uima-uimaj with Apache License 2.0 | 2 votes |
/** * Converts a TypeSystem object to XML. Built-in types and array types are not included. * * @param aTypeSystem * the TypeSystem to convert * @param aOutputStream * the stream to which XML output will be written * * @throws IOException * if there is a problem writing to the provided OutputStream * @throws SAXException * if an error occurs during the translation of the type system to XML */ public static void typeSystem2Xml(TypeSystem aTypeSystem, OutputStream aOutputStream) throws SAXException, IOException { XMLSerializer sax2xml = new XMLSerializer(aOutputStream); typeSystem2Xml(aTypeSystem, sax2xml.getContentHandler()); }