org.apache.tika.io.IOUtils Java Examples
The following examples show how to use
org.apache.tika.io.IOUtils.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TikaTest.java From tika-server with Apache License 2.0 | 7 votes |
@Override public void handle(String filename, MediaType mediaType, InputStream stream) { ByteArrayOutputStream os = new ByteArrayOutputStream(); if (! stream.markSupported()) { stream = TikaInputStream.get(stream); } stream.mark(0); try { IOUtils.copy(stream, os); bytes.add(os.toByteArray()); stream.reset(); } catch (IOException e) { //swallow } }
Example #2
Source File: UserWrapper.java From wandora with GNU General Public License v3.0 | 6 votes |
private void addPicture(TopicMap tm, Topic userTopic) { try { URL imageUrl = new URL(AbstractFBGraphExtractor.URL_ROOT + this.user.getId() + "/picture"); String contentType = imageUrl.openConnection().getContentType(); byte[] data = IOUtils.toByteArray(imageUrl.openStream()); DataURL u = new DataURL(contentType, data); Topic picType = getOrCreateType(tm, "Profile Picture"); Topic langTopic = getOrCreateTopic(tm, XTMPSI.LANG_INDEPENDENT); userTopic.setData(picType, langTopic, u.toExternalForm()); } catch (IOException | TopicMapException e) { UserWrapper.logger.log(e); } }
Example #3
Source File: RepositoryRefactor.java From urule with Apache License 2.0 | 6 votes |
public List<String> getReferenceFiles(Node rootNode,String path,String searchText) throws Exception{ List<String> referenceFiles=new ArrayList<String>(); List<String> files=getFiles(rootNode, path); for(String nodePath:files){ InputStream inputStream=repositoryService.readFile(nodePath,null); try { String content = IOUtils.toString(inputStream); inputStream.close(); boolean containPath=content.contains(path); boolean containText=content.contains(searchText); if(containPath && containText){ referenceFiles.add(nodePath); } } catch (IOException e) { throw new RuleException(e); } } return referenceFiles; }
Example #4
Source File: PDFGenerationItemProcessor.java From CogStack-Pipeline with Apache License 2.0 | 6 votes |
private void logStream(final InputStream stream) { new Thread() { public void run() { Reader reader = new InputStreamReader(stream, IOUtils.UTF_8); StringBuilder out = new StringBuilder(); char[] buffer = new char[1024]; try { for (int n = reader.read(buffer); n != -1; n = reader.read(buffer)) { out.append(buffer, 0, n); } } catch (Exception e) { LOG.error(e.getMessage()); } finally { IOUtils.closeQuietly(stream); IOUtils.closeQuietly(reader); } LOG.debug(out.toString()); } }.start(); }
Example #5
Source File: ThumbnailGenerationItemProcessor.java From CogStack-Pipeline with Apache License 2.0 | 6 votes |
private void logStream(final InputStream stream) { new Thread() { public void run() { Reader reader = new InputStreamReader(stream, IOUtils.UTF_8); StringBuilder out = new StringBuilder(); char[] buffer = new char[1024]; try { for (int n = reader.read(buffer); n != -1; n = reader.read(buffer)) { out.append(buffer, 0, n); } } catch (Exception e) { LOG.error(e.getMessage()); } finally { IOUtils.closeQuietly(stream); IOUtils.closeQuietly(reader); } LOG.debug(out.toString()); } }.start(); }
Example #6
Source File: PDFPreprocessorParser.java From CogStack-Pipeline with Apache License 2.0 | 6 votes |
/** * Starts a thread that reads the contents of the standard output or error * stream of the given process to not block the process. The stream is * closed once fully processed. */ private void logStream(final String logType, final InputStream stream, final File file) { new Thread() { public void run() { Reader reader = new InputStreamReader(stream); StringBuilder out = new StringBuilder(); char[] buffer = new char[1024]; try { for (int n = reader.read(buffer); n != -1; n = reader.read(buffer)) { out.append(buffer, 0, n); } } catch (IOException e) { } finally { IOUtils.closeQuietly(stream); } String msg = out.toString(); LogFactory.getLog(PDFPreprocessorParser.class).debug(msg); } }.start(); }
Example #7
Source File: XMLContentExporterTest.java From syncope with Apache License 2.0 | 6 votes |
/** * Also checks for SYNCOPE-1307. * * @throws Exception exception thrown when dealing with IO. */ @Test public void issueSYNCOPE1128() throws Exception { ByteArrayOutputStream baos = new ByteArrayOutputStream(); exporter.export("Master", baos, null, null, null); String exported = baos.toString(Charset.defaultCharset()); assertTrue(StringUtils.isNotBlank(exported)); List<String> realms = IOUtils.readLines( IOUtils.toInputStream(exported), StandardCharsets.UTF_8.name()).stream(). filter(row -> row.trim().startsWith("<Realm")).collect(Collectors.toList()); assertEquals(4, realms.size()); assertTrue(realms.get(0).contains("name=\"/\"")); assertTrue(realms.get(1).contains("name=\"odd\"")); assertTrue(realms.get(2).contains("name=\"even\"")); assertTrue(realms.get(3).contains("name=\"two\"")); }
Example #8
Source File: TestPackageInstall.java From jackrabbit-filevault with Apache License 2.0 | 6 votes |
/** * Installs a binary properties. */ @Test public void testBinaryProperties() throws RepositoryException, IOException, PackageException { JcrPackage pack = packMgr.upload(getStream("/test-packages/tmp_binary.zip"), false); assertNotNull(pack); pack.install(getDefaultOptions()); Property p = admin.getProperty("/tmp/binary/test/jcr:data"); assertEquals(PropertyType.BINARY, p.getType()); StringBuilder buffer = new StringBuilder(8192); while (buffer.length() < 8192) { buffer.append("0123456789abcdef"); } String result = IOUtils.toString(p.getBinary().getStream()); assertEquals(buffer.toString(), result); }
Example #9
Source File: CSVDetector.java From data-prep with Apache License 2.0 | 6 votes |
/** * Reads an input stream and checks if it has a CSV format. * * The general contract of a detector is to not close the specified stream before returning. It is to the * responsibility of the caller to close it. The detector should leverage the mark/reset feature of the specified * {@see TikaInputStream} in order to let the stream always return the same bytes. * * @param metadata the specified TIKA {@link Metadata} * @param inputStream the specified input stream * @return either null or an CSV format * @throws IOException */ @Override public Format detect(Metadata metadata, TikaInputStream inputStream) throws IOException { Format result = detectText(metadata, inputStream); if (result == null) { inputStream.mark(FormatUtils.META_TAG_BUFFER_SIZE); byte[] buffer = new byte[FormatUtils.META_TAG_BUFFER_SIZE]; int n = 0; for (int m = inputStream.read(buffer); m != -1 && n < buffer.length; m = inputStream.read(buffer, n, buffer.length - n)) { n += m; } inputStream.reset(); String head = FormatUtils.readFromBuffer(buffer, 0, n); try (InputStream stream = TikaInputStream.get(IOUtils.toInputStream(head))) { result = detectText(new Metadata(), stream); } } return result; }
Example #10
Source File: DynRealmITCase.java From syncope with Apache License 2.0 | 6 votes |
private static ArrayNode fetchDynRealmsFromElasticsearch(final String userKey) throws Exception { String body = '{' + " \"query\": {" + " \"match\": {\"_id\": \"" + userKey + "\"}" + " }" + '}'; HttpClient httpClient = new HttpClient(); httpClient.start(); ContentResponse response = httpClient.newRequest("http://localhost:9200/master_user/_search"). method(HttpMethod.GET). header(HttpHeader.CONTENT_TYPE, MediaType.APPLICATION_JSON). content(new InputStreamContentProvider(IOUtils.toInputStream(body))). send(); assertEquals(HttpStatus.OK_200, response.getStatus()); return (ArrayNode) OBJECT_MAPPER.readTree(response.getContent()). get("hits").get("hits").get(0).get("_source").get("dynRealms"); }
Example #11
Source File: ResultsUtils.java From allure-java with Apache License 2.0 | 5 votes |
private static Optional<String> readResource(final ClassLoader classLoader, final String resourceName) { try (InputStream is = classLoader.getResourceAsStream(resourceName)) { if (Objects.isNull(is)) { return Optional.empty(); } final byte[] bytes = IOUtils.toByteArray(is); return Optional.of(new String(bytes, StandardCharsets.UTF_8)); } catch (IOException e) { LOGGER.warn("Unable to process description resource file", e); } return Optional.empty(); }
Example #12
Source File: SAML2SPMetadataTest.java From syncope with Apache License 2.0 | 5 votes |
private SAML2SPMetadata create(final String owner) throws Exception { SAML2SPMetadata saml2SPMetadata = entityFactory.newEntity(SAML2SPMetadata.class); saml2SPMetadata.setOwner(owner); String metadata = IOUtils.toString(new ClassPathResource("sp-metadata.xml").getInputStream()); saml2SPMetadata.setMetadata(metadata); saml2SPMetadataDAO.save(saml2SPMetadata); assertNotNull(saml2SPMetadata); assertNotNull(saml2SPMetadata.getKey()); assertNotNull(saml2SPMetadataDAO.findByOwner(saml2SPMetadata.getOwner())); return saml2SPMetadata; }
Example #13
Source File: TestPackageInstall.java From jackrabbit-filevault with Apache License 2.0 | 5 votes |
/** * Installs a package with no properties */ @Test public void testNoProperties() throws RepositoryException, IOException, PackageException { File tmpFile = File.createTempFile("vlttest", "zip"); IOUtils.copy(getStream("/test-packages/tmp_no_properties.zip"), FileUtils.openOutputStream(tmpFile)); JcrPackage pack = packMgr.upload(tmpFile, true, true, "testpackage", false); assertNotNull(pack); pack.install(getDefaultOptions()); }
Example #14
Source File: TestPackageInstall.java From jackrabbit-filevault with Apache License 2.0 | 5 votes |
/** * Installs a binary properties twice to check if it doesn't report an update. * TODO: this is not implemented yet. see JCRVLT-110 */ @Test @Ignore public void testBinaryPropertyTwice() throws RepositoryException, IOException, PackageException { JcrPackage pack = packMgr.upload(getStream("/test-packages/tmp_binary.zip"), false); assertNotNull(pack); pack.install(getDefaultOptions()); Property p = admin.getProperty("/tmp/binary/test/jcr:data"); assertEquals(PropertyType.BINARY, p.getType()); StringBuilder buffer = new StringBuilder(8192); while (buffer.length() < 8192) { buffer.append("0123456789abcdef"); } String result = IOUtils.toString(p.getBinary().getStream()); assertEquals(buffer.toString(), result); // install again to check if binary data is not updated ImportOptions opts = getDefaultOptions(); TrackingListener listener = new TrackingListener(opts.getListener()); opts.setListener(listener); pack.install(opts); //TODO: assertEquals("-", listener.getActions().get("/tmp/binary/test")); assertEquals("U", listener.getActions().get("/tmp/binary/test")); }
Example #15
Source File: HtmlDetector.java From data-prep with Apache License 2.0 | 5 votes |
/** * Reads an input stream and checks if it has a HTML format. * * The general contract of a detector is to not close the specified stream before returning. It is to the * responsibility of the caller to close it. The detector should leverage the mark/reset feature of the specified * {@see TikaInputStream} in order to let the stream always return the same bytes. * * * @param metadata the specified TIKA {@link Metadata} * @param inputStream the specified input stream * @return either null or an HTML format * @throws IOException */ @Override public Format detect(Metadata metadata, TikaInputStream inputStream) throws IOException { if (inputStream == null) { return null; } else { inputStream.mark(FormatUtils.META_TAG_BUFFER_SIZE); byte[] buffer = new byte[FormatUtils.META_TAG_BUFFER_SIZE]; int n = 0; for (int m = inputStream.read(buffer); m != -1 && n < buffer.length; m = inputStream.read(buffer, n, buffer.length - n)) { n += m; } inputStream.reset(); String head = FormatUtils.readFromBuffer(buffer, 0, n); try (InputStream stream = TikaInputStream.get(IOUtils.toInputStream(head))) { Charset charset = htmlEncodingDetector.detect(stream, metadata); if (charset != null) { return new Format(htmlFormatFamily, charset.name()); } } return null; } }
Example #16
Source File: HelmITSupport.java From nexus-repository-helm with Eclipse Public License 1.0 | 4 votes |
protected void checkYamlIncludesContent(InputStream is, String expectedContent) throws Exception { String downloadedPackageData = IOUtils.toString(is); assertThat(downloadedPackageData, containsString(expectedContent)); }
Example #17
Source File: XMLContentExporter.java From syncope with Apache License 2.0 | 4 votes |
private static String getValues(final ResultSet rs, final String columnName, final Integer columnType) throws SQLException { String res = null; try { switch (columnType) { case Types.BINARY: case Types.VARBINARY: case Types.LONGVARBINARY: final InputStream is = rs.getBinaryStream(columnName); if (is != null) { res = DatatypeConverter.printHexBinary(IOUtils.toByteArray(is)); } break; case Types.BLOB: final Blob blob = rs.getBlob(columnName); if (blob != null) { res = DatatypeConverter.printHexBinary(IOUtils.toByteArray(blob.getBinaryStream())); } break; case Types.BIT: case Types.BOOLEAN: if (rs.getBoolean(columnName)) { res = "1"; } else { res = "0"; } break; case Types.DATE: case Types.TIME: case Types.TIMESTAMP: final Timestamp timestamp = rs.getTimestamp(columnName); if (timestamp != null) { res = FormatUtils.format(new Date(timestamp.getTime())); } break; default: res = rs.getString(columnName); } } catch (IOException e) { LOG.error("Error retrieving hexadecimal string", e); } return res; }
Example #18
Source File: TestSubPackages.java From jackrabbit-filevault with Apache License 2.0 | 4 votes |
/** * Test if installing and re-creating a package with sub-packages on an alternative path results in the same package again. */ @Test public void testRoundTrip() throws IOException, RepositoryException, PackageException { JcrPackage pack = packMgr.upload(getStream("/test-packages/subtest.zip"), false); assertNotNull(pack); // install ImportOptions opts = getDefaultOptions(); opts.setNonRecursive(true); pack.install(opts); // create new package JcrPackage pkg = packMgr.open(PACKAGE_ID_SUB_TEST); packMgr.assemble(pkg, new DefaultProgressListener()); try (ZipInputStream in = new ZipInputStream(pkg.getData().getBinary().getStream())) { ZipEntry e; List<String> entries = new ArrayList<>(); String filter = ""; while ((e = in.getNextEntry()) != null) { entries.add(e.getName()); if ("META-INF/vault/filter.xml".equals(e.getName())) { filter = IOUtils.toString(in, "utf-8"); } } Collections.sort(entries); StringBuffer result = new StringBuffer(); for (String name: entries) { // exclude some of the entries that depend on the repository setup if ("jcr_root/etc/.content.xml".equals(name) || "jcr_root/etc/packages/my_packages/.content.xml".equals(name) || "jcr_root/etc/packages/.content.xml".equals(name)) { continue; } result.append(name).append("\n"); } assertEquals("Filter must be correct", "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" + "<workspaceFilter version=\"1.0\">\n" + " <filter root=\"/etc/packages/my_packages/sub_a.zip\"/>\n" + " <filter root=\"/etc/packages/my_packages/sub_b.zip\"/>\n" + "</workspaceFilter>\n", filter); assertEquals("Package must contain proper entries.", "META-INF/\n" + "META-INF/MANIFEST.MF\n" + "META-INF/vault/\n" + "META-INF/vault/config.xml\n" + "META-INF/vault/definition/\n" + "META-INF/vault/definition/.content.xml\n" + "META-INF/vault/filter.xml\n" + "META-INF/vault/nodetypes.cnd\n" + "META-INF/vault/properties.xml\n" + "jcr_root/.content.xml\n" + "jcr_root/etc/\n" + "jcr_root/etc/packages/\n" + "jcr_root/etc/packages/my_packages/\n" + "jcr_root/etc/packages/my_packages/sub_a.zip\n" + "jcr_root/etc/packages/my_packages/sub_a.zip.dir/\n" + "jcr_root/etc/packages/my_packages/sub_a.zip.dir/.content.xml\n" + "jcr_root/etc/packages/my_packages/sub_a.zip.dir/_jcr_content/\n" + "jcr_root/etc/packages/my_packages/sub_a.zip.dir/_jcr_content/_vlt_definition/\n" + "jcr_root/etc/packages/my_packages/sub_a.zip.dir/_jcr_content/_vlt_definition/.content.xml\n" + "jcr_root/etc/packages/my_packages/sub_b.zip\n" + "jcr_root/etc/packages/my_packages/sub_b.zip.dir/\n" + "jcr_root/etc/packages/my_packages/sub_b.zip.dir/.content.xml\n" + "jcr_root/etc/packages/my_packages/sub_b.zip.dir/_jcr_content/\n" + "jcr_root/etc/packages/my_packages/sub_b.zip.dir/_jcr_content/_vlt_definition/\n" + "jcr_root/etc/packages/my_packages/sub_b.zip.dir/_jcr_content/_vlt_definition/.content.xml\n", result.toString()); } }
Example #19
Source File: RITSupport.java From nexus-repository-r with Eclipse Public License 1.0 | 4 votes |
protected void verifyTextGzipContent(Matcher<String> expectedContent, InputStream is) throws Exception { try (InputStream cin = new CompressorStreamFactory().createCompressorInputStream(GZIP, is)) { final String downloadedPackageData = IOUtils.toString(cin); assertThat(downloadedPackageData, expectedContent); } }
Example #20
Source File: TestPackageInstall.java From jackrabbit-filevault with Apache License 2.0 | 3 votes |
/** * Installs a package with non-child filter doesn't remove the root. * * <pre> * <workspaceFilter version="1.0"> * <filter root="/etc"> * <include pattern="/etc"/> * <include pattern="/etc/clientlibs"/> * <include pattern="/etc/clientlibs/granite"/> * <include pattern="/etc/clientlibs/granite/test(/.*)?"/> * </filter> * </workspaceFilter> */ @Test public void testNoChildFilter() throws RepositoryException, IOException, PackageException { File tmpFile = File.createTempFile("vlttest", "zip"); IOUtils.copy(getStream("/test-packages/test-package-with-etc.zip"), FileUtils.openOutputStream(tmpFile)); JcrPackage pack = packMgr.upload(tmpFile, true, true, "test-package-with-etc", false); assertNodeExists("/etc"); admin.getNode("/etc").addNode("foo", NodeType.NT_FOLDER); admin.save(); pack.install(getDefaultOptions()); assertNodeExists("/etc/foo"); }